diff --git a/src/xenia/apu/xma_context.cc b/src/xenia/apu/xma_context.cc index a26f0ab9d..e4b19a7de 100644 --- a/src/xenia/apu/xma_context.cc +++ b/src/xenia/apu/xma_context.cc @@ -122,7 +122,7 @@ void XmaContext::ConvertFrame(const uint8_t** samples, bool is_two_channel, auto in = reinterpret_cast(samples[j]); // Raw samples sometimes aren't within [-1, 1] - float scaled_sample = xe::saturate_signed(in[i]) * scale; + float scaled_sample = xe::clamp_float(in[i], -1.0f, 1.0f) * scale; // Convert the sample and output it in big endian. auto sample = static_cast(scaled_sample); diff --git a/src/xenia/base/math.h b/src/xenia/base/math.h index 5d81bee9a..25aa89ede 100644 --- a/src/xenia/base/math.h +++ b/src/xenia/base/math.h @@ -72,20 +72,22 @@ constexpr T round_up(T value, V multiple, bool force_non_zero = true) { return (value + multiple - 1) / multiple * multiple; } -// Using the same conventions as in shading languages, returning 0 for NaN. -// std::max is `a < b ? b : a`, thus in case of NaN, the first argument is -// always returned. Also -0 is not < +0, so +0 is also chosen for it. +// For NaN, returns min_value (or, if it's NaN too, max_value). +// If either of the boundaries is zero, and if the value is at that boundary or +// exceeds it, the result will have the sign of that boundary. If both +// boundaries are zero, which sign is selected among the argument signs is not +// explicitly defined. template -constexpr T saturate_unsigned(T value) { - return std::min(static_cast(1.0f), std::max(static_cast(0.0f), value)); +T clamp_float(T value, T min_value, T max_value) { + float clamped_to_min = std::isgreater(value, min_value) ? value : min_value; + return std::isless(clamped_to_min, max_value) ? clamped_to_min : max_value; } -// This diverges from the GPU NaN rules for signed normalized formats (NaN -// should be converted to 0, not to -1), but this expectation is not needed most -// of time, and cannot be met for free (unlike for 0...1 clamping). +// Using the same conventions as in shading languages, returning 0 for NaN. +// 0 is always returned as positive. template -constexpr T saturate_signed(T value) { - return std::min(static_cast(1.0f), std::max(static_cast(-1.0f), value)); +T saturate(T value) { + return clamp_float(value, static_cast(0.0f), static_cast(1.0f)); } // Gets the next power of two value that is greater than or equal to the given @@ -365,12 +367,6 @@ inline uint64_t rotate_right(uint64_t v, uint8_t sh) { } #endif // XE_PLATFORM_WIN32 -template -T clamp(T value, T min_value, T max_value) { - const T t = value < min_value ? min_value : value; - return t > max_value ? max_value : t; -} - #if XE_ARCH_AMD64 // Utilities for SSE values. template diff --git a/src/xenia/base/memory.h b/src/xenia/base/memory.h index f7b1398c5..bf26ce220 100644 --- a/src/xenia/base/memory.h +++ b/src/xenia/base/memory.h @@ -16,12 +16,37 @@ #include #include #include +#include #include "xenia/base/byte_order.h" namespace xe { namespace memory { +// For variable declarations (not return values or `this` pointer). +// Not propagated. +#define XE_RESTRICT_VAR __restrict + +// Aliasing-safe bit reinterpretation. +// For more complex cases such as non-trivially-copyable types, write copying +// code respecting the requirements for them externally instead of using these +// functions. + +template +void Reinterpret(Dst& XE_RESTRICT_VAR dst, const Src& XE_RESTRICT_VAR src) { + static_assert(sizeof(Dst) == sizeof(Src)); + static_assert(std::is_trivially_copyable_v); + static_assert(std::is_trivially_copyable_v); + std::memcpy(&dst, &src, sizeof(Dst)); +} + +template +Dst Reinterpret(const Src& XE_RESTRICT_VAR src) { + Dst dst; + Reinterpret(dst, src); + return dst; +} + #if XE_PLATFORM_ANDROID void AndroidInitialize(); void AndroidShutdown(); diff --git a/src/xenia/base/testing/chrono_test.cc b/src/xenia/base/testing/chrono_test.cc index a63aac53c..f35f17ed8 100644 --- a/src/xenia/base/testing/chrono_test.cc +++ b/src/xenia/base/testing/chrono_test.cc @@ -107,10 +107,11 @@ TEST_CASE("WinSystemClock <-> XSystemClock", "[clock_cast]") { auto error2 = xsys.time_since_epoch() - wxsys.time_since_epoch(); auto error3 = wsys - wxsys; - REQUIRE(error1 < 10ms); - REQUIRE(error1 > -10ms); - REQUIRE(error2 < 10ms); - REQUIRE(error2 > -10ms); + // In AppVeyor, the difference often can be as large as roughly 16ms. + REQUIRE(error1 < 20ms); + REQUIRE(error1 > -20ms); + REQUIRE(error2 < 20ms); + REQUIRE(error2 > -20ms); REQUIRE(error3 < duration); REQUIRE(error3 > -duration); } diff --git a/src/xenia/debug/ui/debug_window.cc b/src/xenia/debug/ui/debug_window.cc index 89c606769..eb10a5fa7 100644 --- a/src/xenia/debug/ui/debug_window.cc +++ b/src/xenia/debug/ui/debug_window.cc @@ -182,7 +182,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) { ImVec2(kSplitterWidth, top_panes_height)); if (ImGui::IsItemActive()) { function_pane_width += io.MouseDelta.x; - function_pane_width = xe::clamp(function_pane_width, 30.0f, FLT_MAX); + function_pane_width = xe::clamp_float(function_pane_width, 30.0f, FLT_MAX); } ImGui::SameLine(); ImGui::BeginChild("##source_pane", @@ -194,7 +194,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) { ImVec2(kSplitterWidth, top_panes_height)); if (ImGui::IsItemActive()) { source_pane_width += io.MouseDelta.x; - source_pane_width = xe::clamp(source_pane_width, 30.0f, FLT_MAX); + source_pane_width = xe::clamp_float(source_pane_width, 30.0f, FLT_MAX); } ImGui::SameLine(); ImGui::BeginChild("##registers_pane", @@ -206,7 +206,8 @@ void DebugWindow::DrawFrame(ImGuiIO& io) { ImVec2(kSplitterWidth, top_panes_height)); if (ImGui::IsItemActive()) { registers_pane_width += io.MouseDelta.x; - registers_pane_width = xe::clamp(registers_pane_width, 30.0f, FLT_MAX); + registers_pane_width = + xe::clamp_float(registers_pane_width, 30.0f, FLT_MAX); } ImGui::SameLine(); ImGui::BeginChild("##right_pane", ImVec2(0, top_panes_height), true); @@ -234,7 +235,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) { ImGui::InvisibleButton("##hsplitter0", ImVec2(-1, kSplitterWidth)); if (ImGui::IsItemActive()) { bottom_panes_height -= io.MouseDelta.y; - bottom_panes_height = xe::clamp(bottom_panes_height, 30.0f, FLT_MAX); + bottom_panes_height = xe::clamp_float(bottom_panes_height, 30.0f, FLT_MAX); } ImGui::BeginChild("##log_pane", ImVec2(log_pane_width, bottom_panes_height), true); @@ -245,7 +246,8 @@ void DebugWindow::DrawFrame(ImGuiIO& io) { ImVec2(kSplitterWidth, bottom_panes_height)); if (ImGui::IsItemActive()) { breakpoints_pane_width -= io.MouseDelta.x; - breakpoints_pane_width = xe::clamp(breakpoints_pane_width, 30.0f, FLT_MAX); + breakpoints_pane_width = + xe::clamp_float(breakpoints_pane_width, 30.0f, FLT_MAX); } ImGui::SameLine(); ImGui::BeginChild("##breakpoints_pane", ImVec2(0, 0), true); diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index 3aee2ba87..8338d0dd2 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -455,9 +455,9 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index, // Scratch register writeback. if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) { uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0; - if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK].u32) { + if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK]) { // Enabled - write to address. - uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR].u32; + uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR]; uint32_t mem_addr = scratch_addr + (scratch_reg * 4); xe::store_and_swap(memory_->TranslatePhysical(mem_addr), value); } @@ -467,7 +467,7 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index, // This will block the command processor the next time it WAIT_MEM_REGs // and allow us to synchronize the memory. case XE_GPU_REG_COHER_STATUS_HOST: { - regs.values[index].u32 |= UINT32_C(0x80000000); + regs.values[index] |= UINT32_C(0x80000000); } break; case XE_GPU_REG_DC_LUT_RW_INDEX: { @@ -478,12 +478,12 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index, case XE_GPU_REG_DC_LUT_SEQ_COLOR: { // Should be in the 256-entry table writing mode. - assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1); - auto& gamma_ramp_rw_index = regs.Get(); + assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1); + auto gamma_ramp_rw_index = regs.Get(); // DC_LUT_SEQ_COLOR is in the red, green, blue order, but the write // enable mask is blue, green, red. bool write_gamma_ramp_component = - (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & + (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] & (UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0; if (write_gamma_ramp_component) { reg::DC_LUT_30_COLOR& gamma_ramp_entry = @@ -505,7 +505,11 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index, } if (++gamma_ramp_rw_component_ >= 3) { gamma_ramp_rw_component_ = 0; - ++gamma_ramp_rw_index.rw_index; + reg::DC_LUT_RW_INDEX new_gamma_ramp_rw_index = gamma_ramp_rw_index; + ++new_gamma_ramp_rw_index.rw_index; + WriteRegister( + XE_GPU_REG_DC_LUT_RW_INDEX, + xe::memory::Reinterpret(new_gamma_ramp_rw_index)); } if (write_gamma_ramp_component) { OnGammaRamp256EntryTableValueWritten(); @@ -514,14 +518,14 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index, case XE_GPU_REG_DC_LUT_PWL_DATA: { // Should be in the PWL writing mode. - assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1); - auto& gamma_ramp_rw_index = regs.Get(); + assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1); + auto gamma_ramp_rw_index = regs.Get(); // Bit 7 of the index is ignored for PWL. uint32_t gamma_ramp_rw_index_pwl = gamma_ramp_rw_index.rw_index & 0x7F; // DC_LUT_PWL_DATA is likely in the red, green, blue order because // DC_LUT_SEQ_COLOR is, but the write enable mask is blue, green, red. bool write_gamma_ramp_component = - (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & + (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] & (UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0; if (write_gamma_ramp_component) { reg::DC_LUT_PWL_DATA& gamma_ramp_entry = @@ -534,13 +538,17 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index, } if (++gamma_ramp_rw_component_ >= 3) { gamma_ramp_rw_component_ = 0; + reg::DC_LUT_RW_INDEX new_gamma_ramp_rw_index = gamma_ramp_rw_index; // TODO(Triang3l): Should this increase beyond 7 bits for PWL? // Direct3D 9 explicitly sets rw_index to 0x80 after writing the last // PWL entry. However, the DC_LUT_RW_INDEX documentation says that for // PWL, the bit 7 is ignored. - gamma_ramp_rw_index.rw_index = + new_gamma_ramp_rw_index.rw_index = (gamma_ramp_rw_index.rw_index & ~UINT32_C(0x7F)) | ((gamma_ramp_rw_index_pwl + 1) & 0x7F); + WriteRegister( + XE_GPU_REG_DC_LUT_RW_INDEX, + xe::memory::Reinterpret(new_gamma_ramp_rw_index)); } if (write_gamma_ramp_component) { OnGammaRampPWLValueWritten(); @@ -549,10 +557,10 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index, case XE_GPU_REG_DC_LUT_30_COLOR: { // Should be in the 256-entry table writing mode. - assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1); - auto& gamma_ramp_rw_index = regs.Get(); + assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1); + auto gamma_ramp_rw_index = regs.Get(); uint32_t gamma_ramp_write_enable_mask = - regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & 0b111; + regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] & 0b111; if (gamma_ramp_write_enable_mask) { reg::DC_LUT_30_COLOR& gamma_ramp_entry = gamma_ramp_256_entry_table_[gamma_ramp_rw_index.rw_index]; @@ -567,11 +575,16 @@ void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index, gamma_ramp_entry.color_10_red = gamma_ramp_value.color_10_red; } } - ++gamma_ramp_rw_index.rw_index; // TODO(Triang3l): Should this reset the component write index? If this // increase is assumed to behave like a full DC_LUT_RW_INDEX write, it - // probably should. + // probably should. Currently this also calls WriteRegister for + // DC_LUT_RW_INDEX, which resets gamma_ramp_rw_component_ as well. gamma_ramp_rw_component_ = 0; + reg::DC_LUT_RW_INDEX new_gamma_ramp_rw_index = gamma_ramp_rw_index; + ++new_gamma_ramp_rw_index.rw_index; + WriteRegister( + XE_GPU_REG_DC_LUT_RW_INDEX, + xe::memory::Reinterpret(new_gamma_ramp_rw_index)); if (gamma_ramp_write_enable_mask) { OnGammaRamp256EntryTableValueWritten(); } @@ -583,7 +596,7 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { // chrispy: rearrange check order, place set after checks if (XE_LIKELY(index < RegisterFile::kRegisterCount)) { - register_file_->values[index].u32 = value; + register_file_->values[index] = value; // quick pre-test // todo: figure out just how unlikely this is. if very (it ought to be, @@ -708,10 +721,11 @@ void CommandProcessor::MakeCoherent() { // https://web.archive.org/web/20160711162346/https://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf // https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454 - RegisterFile* regs = register_file_; - auto& status_host = regs->Get(); - auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32; - auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32; + volatile uint32_t* regs_volatile = register_file_->values; + auto status_host = xe::memory::Reinterpret( + uint32_t(regs_volatile[XE_GPU_REG_COHER_STATUS_HOST])); + uint32_t base_host = regs_volatile[XE_GPU_REG_COHER_BASE_HOST]; + uint32_t size_host = regs_volatile[XE_GPU_REG_COHER_SIZE_HOST]; if (!status_host.status) { return; @@ -731,7 +745,7 @@ void CommandProcessor::MakeCoherent() { base_host + size_host, size_host, action); // Mark coherent. - status_host.status = 0; + regs_volatile[XE_GPU_REG_COHER_STATUS_HOST] = 0; } void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); } @@ -752,4 +766,4 @@ void CommandProcessor::InitializeTrace() { #define COMMAND_PROCESSOR CommandProcessor #include "pm4_command_processor_implement.h" } // namespace gpu -} // namespace xe +} // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 1830cd0c1..5c2787bf3 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1768,7 +1768,7 @@ void D3D12CommandProcessor::WriteRegisterForceinline(uint32_t index, __m128i is_above_lower = _mm_cmpgt_epi16(to_rangecheck, lower_bounds); __m128i is_below_upper = _mm_cmplt_epi16(to_rangecheck, upper_bounds); __m128i is_within_range = _mm_and_si128(is_above_lower, is_below_upper); - register_file_->values[index].u32 = value; + register_file_->values[index] = value; uint32_t movmask = static_cast(_mm_movemask_epi8(is_within_range)); @@ -2047,7 +2047,7 @@ void D3D12CommandProcessor::WritePossiblySpecialRegistersFromMem( for (uint32_t index = start_index; index < end; ++index, ++base) { uint32_t value = xe::load_and_swap(base); - register_file_->values[index].u32 = value; + register_file_->values[index] = value; unsigned expr = 0; @@ -2780,8 +2780,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, while (xe::bit_scan_forward(vfetch_bits_remaining, &j)) { vfetch_bits_remaining = xe::clear_lowest_bit(vfetch_bits_remaining); uint32_t vfetch_index = i * 32 + j; - const auto& vfetch_constant = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); + xenos::xe_gpu_vertex_fetch_t vfetch_constant = + regs.GetVertexFetch(vfetch_index); switch (vfetch_constant.type) { case xenos::FetchConstantType::kVertex: break; @@ -3554,10 +3554,10 @@ void D3D12CommandProcessor::UpdateFixedFunctionState( // Blend factor. float blend_factor[] = { - regs[XE_GPU_REG_RB_BLEND_RED].f32, - regs[XE_GPU_REG_RB_BLEND_GREEN].f32, - regs[XE_GPU_REG_RB_BLEND_BLUE].f32, - regs[XE_GPU_REG_RB_BLEND_ALPHA].f32, + regs.Get(XE_GPU_REG_RB_BLEND_RED), + regs.Get(XE_GPU_REG_RB_BLEND_GREEN), + regs.Get(XE_GPU_REG_RB_BLEND_BLUE), + regs.Get(XE_GPU_REG_RB_BLEND_ALPHA), }; // std::memcmp instead of != so in case of NaN, every draw won't be // invalidating it. @@ -3599,7 +3599,7 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl( auto pa_cl_clip_cntl = regs.Get(); auto pa_cl_vte_cntl = regs.Get(); auto pa_su_sc_mode_cntl = regs.Get(); - float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; + auto rb_alpha_ref = regs.Get(XE_GPU_REG_RB_ALPHA_REF); auto rb_colorcontrol = regs.Get(); auto rb_depth_info = regs.Get(); auto rb_stencilrefmask = regs.Get(); @@ -3753,10 +3753,10 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl( // Tessellation factor range, plus 1.0 according to the images in // https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360 - float tessellation_factor_min = - regs[XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL].f32 + 1.0f; - float tessellation_factor_max = - regs[XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL].f32 + 1.0f; + auto tessellation_factor_min = + regs.Get(XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL) + 1.0f; + auto tessellation_factor_max = + regs.Get(XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL) + 1.0f; update_dirty_floatmask(system_constants_.tessellation_factor_range_min, tessellation_factor_min); @@ -3804,12 +3804,12 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl( &user_clip_plane_index)) { user_clip_planes_remaining = xe::clear_lowest_bit(user_clip_planes_remaining); - const float* user_clip_plane = - ®s[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4].f32; - if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane, + const void* user_clip_plane_regs = + ®s[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4]; + if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane_regs, 4 * sizeof(float))) { dirty = true; - std::memcpy(user_clip_plane_write_ptr, user_clip_plane, + std::memcpy(user_clip_plane_write_ptr, user_clip_plane_regs, 4 * sizeof(float)); } user_clip_plane_write_ptr += 4; @@ -3974,9 +3974,8 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl( color_exp_bias -= 5; } } - float color_exp_bias_scale; - *reinterpret_cast(&color_exp_bias_scale) = - 0x3F800000 + (color_exp_bias << 23); + auto color_exp_bias_scale = xe::memory::Reinterpret( + int32_t(0x3F800000 + (color_exp_bias << 23))); update_dirty_floatmask(system_constants_.color_exp_bias[i], color_exp_bias_scale); @@ -4028,7 +4027,7 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl( #endif uint32_t blend_factors_ops = - regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF; + regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF; update_dirty_uint32_cmp(system_constants_.edram_rt_blend_factors_ops[i], blend_factors_ops); @@ -4060,22 +4059,22 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl( if (primitive_polygonal) { if (pa_su_sc_mode_cntl.poly_offset_front_enable) { poly_offset_front_scale = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE); poly_offset_front_offset = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET); } if (pa_su_sc_mode_cntl.poly_offset_back_enable) { poly_offset_back_scale = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE); poly_offset_back_offset = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET); } } else { if (pa_su_sc_mode_cntl.poly_offset_para_enable) { poly_offset_front_scale = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE); poly_offset_front_offset = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET); poly_offset_back_scale = poly_offset_front_scale; poly_offset_back_offset = poly_offset_front_offset; } @@ -4153,26 +4152,26 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl( } } update_dirty_floatmask(system_constants_.edram_blend_constant[0], - regs[XE_GPU_REG_RB_BLEND_RED].f32); + regs.Get(XE_GPU_REG_RB_BLEND_RED)); system_constants_.edram_blend_constant[0] = - regs[XE_GPU_REG_RB_BLEND_RED].f32; + regs.Get(XE_GPU_REG_RB_BLEND_RED); update_dirty_floatmask(system_constants_.edram_blend_constant[1], - regs[XE_GPU_REG_RB_BLEND_GREEN].f32); + regs.Get(XE_GPU_REG_RB_BLEND_GREEN)); system_constants_.edram_blend_constant[1] = - regs[XE_GPU_REG_RB_BLEND_GREEN].f32; + regs.Get(XE_GPU_REG_RB_BLEND_GREEN); update_dirty_floatmask(system_constants_.edram_blend_constant[2], - regs[XE_GPU_REG_RB_BLEND_BLUE].f32); + regs.Get(XE_GPU_REG_RB_BLEND_BLUE)); system_constants_.edram_blend_constant[2] = - regs[XE_GPU_REG_RB_BLEND_BLUE].f32; + regs.Get(XE_GPU_REG_RB_BLEND_BLUE); update_dirty_floatmask(system_constants_.edram_blend_constant[3], - regs[XE_GPU_REG_RB_BLEND_ALPHA].f32); + regs.Get(XE_GPU_REG_RB_BLEND_ALPHA)); system_constants_.edram_blend_constant[3] = - regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; + regs.Get(XE_GPU_REG_RB_BLEND_ALPHA); } dirty |= ArchFloatMaskSignbit(dirty_float_mask); @@ -4266,10 +4265,10 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader, // These are the constant base addresses/ranges for shaders. // We have these hardcoded right now cause nothing seems to differ on the Xbox // 360 (however, OpenGL ES on Adreno 200 on Android has different ranges). - assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 || - regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); - assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || - regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); + assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 || + regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000); + assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 || + regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000); // Check if the float constant layout is still the same and get the counts. const Shader::ConstantRegisterMap& float_constant_map_vertex = vertex_shader->constant_register_map(); @@ -4344,8 +4343,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader, xe::clear_lowest_bit(float_constant_map_entry); std::memcpy(float_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + - (float_constant_index << 2)] - .f32, + (float_constant_index << 2)], 4 * sizeof(float)); float_constants += 4 * sizeof(float); } @@ -4376,8 +4374,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader, xe::clear_lowest_bit(float_constant_map_entry); std::memcpy(float_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + - (float_constant_index << 2)] - .f32, + (float_constant_index << 2)], 4 * sizeof(float)); float_constants += 4 * sizeof(float); } @@ -4397,8 +4394,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader, return false; } xe::smallcpy_const( - bool_loop_constants, - ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32); + bool_loop_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031]); cbuffer_binding_bool_loop_.up_to_date = true; current_graphics_root_up_to_date_ &= @@ -4414,7 +4410,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader, return false; } xe::smallcpy_const( - fetch_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32); + fetch_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0]); cbuffer_binding_fetch_.up_to_date = true; current_graphics_root_up_to_date_ &= @@ -5152,4 +5148,4 @@ void D3D12CommandProcessor::WriteGammaRampSRV( #undef COMMAND_PROCESSOR } // namespace d3d12 } // namespace gpu -} // namespace xe +} // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc index f4770e9fa..ac63881a7 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc @@ -679,8 +679,8 @@ void D3D12TextureCache::PrefetchSamplerParameters( D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters( const D3D12Shader::SamplerBinding& binding) const { const auto& regs = register_file(); - const auto& fetch = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6); + xenos::xe_gpu_texture_fetch_t fetch = + regs.GetTextureFetch(binding.fetch_constant); SamplerParameters parameters; @@ -1160,8 +1160,7 @@ ID3D12Resource* D3D12TextureCache::RequestSwapTexture( D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out, xenos::TextureFormat& format_out) { const auto& regs = register_file(); - const auto& fetch = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0); + xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0); TextureKey key; BindingInfoFromFetchConstant(fetch, key, nullptr); if (!key.is_valid || key.base_page == 0 || diff --git a/src/xenia/gpu/draw_extent_estimator.cc b/src/xenia/gpu/draw_extent_estimator.cc index 31e94dcbb..86e528639 100644 --- a/src/xenia/gpu/draw_extent_estimator.cc +++ b/src/xenia/gpu/draw_extent_estimator.cc @@ -15,6 +15,7 @@ #include "xenia/base/assert.h" #include "xenia/base/cvar.h" +#include "xenia/base/memory.h" #include "xenia/base/profiling.h" #include "xenia/gpu/registers.h" #include "xenia/gpu/ucode.h" @@ -67,7 +68,7 @@ void DrawExtentEstimator::PositionYExportSink::Export( point_size_ = value[0]; } if (value_mask & 0b0100) { - vertex_kill_ = *reinterpret_cast(&value[2]); + vertex_kill_ = xe::memory::Reinterpret(value[2]); } } } @@ -110,7 +111,7 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) { xenos::Endian index_endian = vgt_dma_size.swap_mode; if (vgt_draw_initiator.source_select == xenos::SourceSelect::kDMA) { xenos::IndexFormat index_format = vgt_draw_initiator.index_size; - uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32; + uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE]; uint32_t index_buffer_read_count = std::min(uint32_t(vgt_draw_initiator.num_indices), uint32_t(vgt_dma_size.num_words)); @@ -145,21 +146,22 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) { auto pa_cl_vte_cntl = regs.Get(); float viewport_y_scale = pa_cl_vte_cntl.vport_y_scale_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 + ? regs.Get(XE_GPU_REG_PA_CL_VPORT_YSCALE) : 1.0f; - float viewport_y_offset = pa_cl_vte_cntl.vport_y_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 - : 0.0f; + float viewport_y_offset = + pa_cl_vte_cntl.vport_y_offset_ena + ? regs.Get(XE_GPU_REG_PA_CL_VPORT_YOFFSET) + : 0.0f; int32_t point_vertex_min_diameter_float = 0; int32_t point_vertex_max_diameter_float = 0; float point_constant_radius_y = 0.0f; if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) { auto pa_su_point_minmax = regs.Get(); - *reinterpret_cast(&point_vertex_min_diameter_float) = - float(pa_su_point_minmax.min_size) * (2.0f / 16.0f); - *reinterpret_cast(&point_vertex_max_diameter_float) = - float(pa_su_point_minmax.max_size) * (2.0f / 16.0f); + point_vertex_min_diameter_float = xe::memory::Reinterpret( + float(pa_su_point_minmax.min_size) * (2.0f / 16.0f)); + point_vertex_max_diameter_float = xe::memory::Reinterpret( + float(pa_su_point_minmax.max_size) * (2.0f / 16.0f)); point_constant_radius_y = float(regs.Get().height) * (1.0f / 16.0f); } @@ -224,12 +226,13 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) { // Vertex-specified diameter. Clamped effectively as a signed integer in // the hardware, -NaN, -Infinity ... -0 to the minimum, +Infinity, +NaN // to the maximum. - point_radius_y = position_y_export_sink.point_size().value(); - *reinterpret_cast(&point_radius_y) = std::min( - point_vertex_max_diameter_float, - std::max(point_vertex_min_diameter_float, - *reinterpret_cast(&point_radius_y))); - point_radius_y *= 0.5f; + point_radius_y = + 0.5f * + xe::memory::Reinterpret(std::min( + point_vertex_max_diameter_float, + std::max(point_vertex_min_diameter_float, + xe::memory::Reinterpret( + position_y_export_sink.point_size().value())))); } else { // Constant radius. point_radius_y = point_constant_radius_y; @@ -329,7 +332,7 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y, float window_y_offset_f = float(window_y_offset); - float yoffset = regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32; + float yoffset = regs.Get(XE_GPU_REG_PA_CL_VPORT_YOFFSET); // First calculate all the integer.0 or integer.5 offsetting exactly at full // precision. @@ -347,11 +350,10 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y, sm3 = yoffset; } sm4 = pa_cl_vte_cntl.vport_y_scale_ena - ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32) + ? std::abs(regs.Get(XE_GPU_REG_PA_CL_VPORT_YSCALE)) : 1.0f; viewport_bottom = sm1 + sm2 + sm3 + sm4; - // Using floor, or, rather, truncation (because maxing with zero anyway) // similar to how viewport scissoring behaves on real AMD, Intel and Nvidia // GPUs on Direct3D 12 (but not WARP), also like in @@ -366,4 +368,4 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y, } } // namespace gpu -} // namespace xe +} // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/draw_util.cc b/src/xenia/gpu/draw_util.cc index e3cd3acc9..97e6807b4 100644 --- a/src/xenia/gpu/draw_util.cc +++ b/src/xenia/gpu/draw_util.cc @@ -9,8 +9,6 @@ #include "xenia/gpu/draw_util.h" -#include - #include "xenia/base/cvar.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" @@ -93,22 +91,21 @@ void GetPreferredFacePolygonOffset(const RegisterFile& regs, // ones that are rendered (except for shadow volumes). if (pa_su_sc_mode_cntl.poly_offset_front_enable && !pa_su_sc_mode_cntl.cull_front) { - scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; - offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; - + scale = regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE); + offset = regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET); scale = roundToNearestOrderOfMagnitude(scale); } if (pa_su_sc_mode_cntl.poly_offset_back_enable && !pa_su_sc_mode_cntl.cull_back && !scale && !offset) { - scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; - offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; + scale = regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE); + offset = regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET); } } else { // Non-triangle primitives use the front offset, but it's toggled via // poly_offset_para_enable. if (pa_su_sc_mode_cntl.poly_offset_para_enable) { - scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; - offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; + scale = regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE); + offset = regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET); } } scale_out = scale; @@ -143,7 +140,7 @@ bool IsPixelShaderNeededWithRasterization(const Shader& shader, } // Check if a color target is actually written. - uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; + uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK]; uint32_t rts_remaining = shader.writes_color_targets(); uint32_t rt_index; while (xe::bit_scan_forward(rts_remaining, &rt_index)) { @@ -306,7 +303,6 @@ void GetHostViewportInfo(GetViewportInfoArgs* XE_RESTRICT args, // Obtain the original viewport values in a normalized way. float scale_xy[] = { - pa_cl_vte_cntl.vport_x_scale_ena ? args->PA_CL_VPORT_XSCALE : 1.0f, pa_cl_vte_cntl.vport_y_scale_ena ? args->PA_CL_VPORT_YSCALE : 1.0f, }; @@ -392,16 +388,11 @@ void GetHostViewportInfo(GetViewportInfoArgs* XE_RESTRICT args, float offset_axis = offset_base_xy[i] + offset_add_xy[i]; float scale_axis = scale_xy[i]; float scale_axis_abs = std::abs(scale_xy[i]); - float axis_0 = offset_axis - scale_axis_abs; - float axis_1 = offset_axis + scale_axis_abs; float axis_max_unscaled_float = float(xy_max_unscaled[i]); - // max(0.0f, xy) drops NaN and < 0 - max picks the first argument in the - // !(a < b) case (always for NaN), min as float (axis_max_unscaled_float - // is well below 2^24) to safely drop very large values. - uint32_t axis_0_int = - uint32_t(std::min(axis_max_unscaled_float, std::max(0.0f, axis_0))); - uint32_t axis_1_int = - uint32_t(std::min(axis_max_unscaled_float, std::max(0.0f, axis_1))); + uint32_t axis_0_int = uint32_t(xe::clamp_float( + offset_axis - scale_axis_abs, 0.0f, axis_max_unscaled_float)); + uint32_t axis_1_int = uint32_t(xe::clamp_float( + offset_axis + scale_axis_abs, 0.0f, axis_max_unscaled_float)); uint32_t axis_extent_int = axis_1_int - axis_0_int; viewport_info_out.xy_offset[i] = axis_0_int * axis_resolution_scale; viewport_info_out.xy_extent[i] = axis_extent_int * axis_resolution_scale; @@ -507,8 +498,8 @@ void GetHostViewportInfo(GetViewportInfoArgs* XE_RESTRICT args, // extension. But cases when this really matters are yet to be found - // trying to fix this will result in more correct depth values, but // incorrect clipping. - z_min = xe::saturate_unsigned(host_clip_offset_z); - z_max = xe::saturate_unsigned(host_clip_offset_z + host_clip_scale_z); + z_min = xe::saturate(host_clip_offset_z); + z_max = xe::saturate(host_clip_offset_z + host_clip_scale_z); // Direct3D 12 doesn't allow reverse depth range - on some drivers it // works, on some drivers it doesn't, actually, but it was never // explicitly allowed by the specification. @@ -730,7 +721,7 @@ uint32_t GetNormalizedColorMask(const RegisterFile& regs, return 0; } uint32_t normalized_color_mask = 0; - uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; + uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK]; for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { // Exclude the render targets not statically written to by the pixel shader. // If the shader doesn't write to a render target, it shouldn't be written @@ -776,10 +767,16 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader, ? regs.Get().base : regs.Get().base; for (uint32_t constant_index : shader.memexport_stream_constants()) { - const auto& stream = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_000_X + - (float_constants_base + constant_index) * 4); - if (!stream.index_count) { + xenos::xe_gpu_memexport_stream_t stream = + regs.GetMemExportStream(float_constants_base + constant_index); + // Safety checks for stream constants potentially not set up if the export + // isn't done on the control flow path taken by the shader (not checking the + // Y component because the index is more likely to be constructed + // arbitrarily). + // The hardware validates the upper bits of eA according to the + // IPR2015-00325 sequencer specification. + if (stream.const_0x1 != 0x1 || stream.const_0x4b0 != 0x4B0 || + stream.const_0x96 != 0x96 || !stream.index_count) { continue; } const FormatInfo& format_info = @@ -821,7 +818,7 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader, } // Add a new range if haven't expanded an existing one. if (!range_reused) { - ranges_out.emplace_back(stream.base_address, stream_size_bytes); + ranges_out.emplace_back(uint32_t(stream.base_address), stream_size_bytes); } } } @@ -943,8 +940,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory, // Get the extent of pixels covered by the resolve rectangle, according to the // top-left rasterization rule. // D3D9 HACK: Vertices to use are always in vf0, and are written by the CPU. - auto fetch = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0); + xenos::xe_gpu_vertex_fetch_t fetch = regs.GetVertexFetch(0); if (fetch.type != xenos::FetchConstantType::kVertex || fetch.size != 3 * 2) { XELOGE("Unsupported resolve vertex buffer format"); assert_always(); @@ -997,10 +993,10 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory, GetScissor(regs, scissor, false); int32_t scissor_right = int32_t(scissor.offset[0] + scissor.extent[0]); int32_t scissor_bottom = int32_t(scissor.offset[1] + scissor.extent[1]); - x0 = xe::clamp(x0, int32_t(scissor.offset[0]), scissor_right); - y0 = xe::clamp(y0, int32_t(scissor.offset[1]), scissor_bottom); - x1 = xe::clamp(x1, int32_t(scissor.offset[0]), scissor_right); - y1 = xe::clamp(y1, int32_t(scissor.offset[1]), scissor_bottom); + x0 = std::clamp(x0, int32_t(scissor.offset[0]), scissor_right); + y0 = std::clamp(y0, int32_t(scissor.offset[1]), scissor_bottom); + x1 = std::clamp(x1, int32_t(scissor.offset[0]), scissor_right); + y1 = std::clamp(y1, int32_t(scissor.offset[1]), scissor_bottom); assert_true(x0 <= x1 && y0 <= y1); @@ -1114,7 +1110,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory, } // Calculate the destination memory extent. - uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32; + uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE]; uint32_t copy_dest_base_adjusted = rb_copy_dest_base; uint32_t copy_dest_extent_start, copy_dest_extent_end; auto rb_copy_dest_pitch = regs.Get(); @@ -1284,9 +1280,10 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory, info_out.copy_dest_info.copy_dest_swap = false; } - info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32; - info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; - info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32; + info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR]; + info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR]; + info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO]; + #if 0 XELOGD( "Resolve: {},{} <= x,y < {},{}, {} -> {} at 0x{:08X} (potentially " @@ -1377,4 +1374,4 @@ ResolveCopyShaderIndex ResolveInfo::GetCopyShader( } // namespace draw_util } // namespace gpu -} // namespace xe +} // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/draw_util.h b/src/xenia/gpu/draw_util.h index 08c710e6c..131d174e8 100644 --- a/src/xenia/gpu/draw_util.h +++ b/src/xenia/gpu/draw_util.h @@ -373,12 +373,12 @@ struct GetViewportInfoArgs { pa_cl_vte_cntl = regs.Get(); pa_su_sc_mode_cntl = regs.Get(); pa_su_vtx_cntl = regs.Get(); - PA_CL_VPORT_XSCALE = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; - PA_CL_VPORT_YSCALE = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; - PA_CL_VPORT_ZSCALE = regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32; - PA_CL_VPORT_XOFFSET = regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32; - PA_CL_VPORT_YOFFSET = regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32; - PA_CL_VPORT_ZOFFSET = regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32; + PA_CL_VPORT_XSCALE = regs.Get(XE_GPU_REG_PA_CL_VPORT_XSCALE); + PA_CL_VPORT_YSCALE = regs.Get(XE_GPU_REG_PA_CL_VPORT_YSCALE); + PA_CL_VPORT_ZSCALE = regs.Get(XE_GPU_REG_PA_CL_VPORT_ZSCALE); + PA_CL_VPORT_XOFFSET = regs.Get(XE_GPU_REG_PA_CL_VPORT_XOFFSET); + PA_CL_VPORT_YOFFSET = regs.Get(XE_GPU_REG_PA_CL_VPORT_YOFFSET); + PA_CL_VPORT_ZOFFSET = regs.Get(XE_GPU_REG_PA_CL_VPORT_ZOFFSET); pa_sc_window_offset = regs.Get(); depth_format = regs.Get().depth_format; } @@ -767,4 +767,4 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory, } // namespace gpu } // namespace xe -#endif // XENIA_GPU_DRAW_UTIL_H_ +#endif // XENIA_GPU_DRAW_UTIL_H_ \ No newline at end of file diff --git a/src/xenia/gpu/dxbc.h b/src/xenia/gpu/dxbc.h index e1587a7a5..ca3dfd1e3 100644 --- a/src/xenia/gpu/dxbc.h +++ b/src/xenia/gpu/dxbc.h @@ -17,6 +17,7 @@ #include "xenia/base/assert.h" #include "xenia/base/math.h" +#include "xenia/base/memory.h" namespace xe { namespace gpu { @@ -1103,10 +1104,10 @@ struct Src : OperandAddress { } static Src LI(int32_t x) { return LI(x, x, x, x); } static Src LF(float x, float y, float z, float w) { - return LU(*reinterpret_cast(&x), - *reinterpret_cast(&y), - *reinterpret_cast(&z), - *reinterpret_cast(&w)); + return LU(xe::memory::Reinterpret(x), + xe::memory::Reinterpret(y), + xe::memory::Reinterpret(z), + xe::memory::Reinterpret(w)); } static Src LF(float x) { return LF(x, x, x, x); } static Src LP(const uint32_t* xyzw) { @@ -1223,12 +1224,10 @@ struct Src : OperandAddress { bool negate) { if (is_integer) { if (absolute) { - *reinterpret_cast(&value) = - std::abs(*reinterpret_cast(&value)); + value = uint32_t(std::abs(int32_t(value))); } if (negate) { - *reinterpret_cast(&value) = - -*reinterpret_cast(&value); + value = uint32_t(-int32_t(value)); } } else { if (absolute) { diff --git a/src/xenia/gpu/graphics_system.cc b/src/xenia/gpu/graphics_system.cc index 5a9d8ac64..0f1ec6f8d 100644 --- a/src/xenia/gpu/graphics_system.cc +++ b/src/xenia/gpu/graphics_system.cc @@ -258,7 +258,7 @@ uint32_t GraphicsSystem::ReadRegister(uint32_t addr) { } assert_true(r < RegisterFile::kRegisterCount); - return register_file()->values[r].u32; + return register_file()->values[r]; } void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) { @@ -276,7 +276,7 @@ void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) { } assert_true(r < RegisterFile::kRegisterCount); - this->register_file()->values[r].u32 = value; + this->register_file()->values[r] = value; } void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t size_log2) { @@ -379,4 +379,4 @@ bool GraphicsSystem::Restore(ByteStream* stream) { } } // namespace gpu -} // namespace xe +} // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/packet_disassembler.h b/src/xenia/gpu/packet_disassembler.h index 8aa60a4a2..26aa91371 100644 --- a/src/xenia/gpu/packet_disassembler.h +++ b/src/xenia/gpu/packet_disassembler.h @@ -67,7 +67,7 @@ struct PacketAction { union { struct { uint32_t index; - RegisterFile::RegisterValue value; + uint32_t value; } register_write; struct { uint64_t value; @@ -194,7 +194,7 @@ struct PacketAction { PacketAction action; action.type = Type::kRegisterWrite; action.register_write.index = index; - action.register_write.value.u32 = value; + action.register_write.value = value; return action; } diff --git a/src/xenia/gpu/pm4_command_processor_implement.h b/src/xenia/gpu/pm4_command_processor_implement.h index 3ec3e71b7..739599dde 100644 --- a/src/xenia/gpu/pm4_command_processor_implement.h +++ b/src/xenia/gpu/pm4_command_processor_implement.h @@ -706,23 +706,27 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_WAIT_REG_MEM( uint32_t ref = reader_.ReadAndSwap(); uint32_t mask = reader_.ReadAndSwap(); uint32_t wait = reader_.ReadAndSwap(); + + bool is_memory = (wait_info & 0x10) != 0; + assert_true(is_memory || poll_reg_addr < RegisterFile::kRegisterCount); + const volatile uint32_t& value_ref = + is_memory ? *reinterpret_cast(memory_->TranslatePhysical( + poll_reg_addr & ~uint32_t(0x3))) + : register_file_->values[poll_reg_addr]; + bool matched = false; + do { - uint32_t value; - if (wait_info & 0x10) { - // Memory. - auto endianness = static_cast(poll_reg_addr & 0x3); - poll_reg_addr &= ~0x3; - value = xe::load(memory_->TranslatePhysical(poll_reg_addr)); - value = GpuSwap(value, endianness); - trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4); + uint32_t value = value_ref; + if (is_memory) { + trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr & ~uint32_t(0x3)), + sizeof(uint32_t)); + value = xenos::GpuSwap(value, + static_cast(poll_reg_addr & 0x3)); } else { - // Register. - assert_true(poll_reg_addr < RegisterFile::kRegisterCount); - value = register_file_->values[poll_reg_addr].u32; if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) { MakeCoherent(); - value = register_file_->values[poll_reg_addr].u32; + value = value_ref; } } matched = MatchValueAndRef(value & mask, ref, wait_info); @@ -758,17 +762,17 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_REG_RMW(uint32_t packet, uint32_t rmw_info = reader_.ReadAndSwap(); uint32_t and_mask = reader_.ReadAndSwap(); uint32_t or_mask = reader_.ReadAndSwap(); - uint32_t value = register_file_->values[rmw_info & 0x1FFF].u32; + uint32_t value = register_file_->values[rmw_info & 0x1FFF]; if ((rmw_info >> 31) & 0x1) { // & reg - value &= register_file_->values[and_mask & 0x1FFF].u32; + value &= register_file_->values[and_mask & 0x1FFF]; } else { // & imm value &= and_mask; } if ((rmw_info >> 30) & 0x1) { // | reg - value |= register_file_->values[or_mask & 0x1FFF].u32; + value |= register_file_->values[or_mask & 0x1FFF]; } else { // | imm value |= or_mask; @@ -788,7 +792,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_REG_TO_MEM( uint32_t reg_val; assert_true(reg_addr < RegisterFile::kRegisterCount); - reg_val = register_file_->values[reg_addr].u32; + reg_val = register_file_->values[reg_addr]; auto endianness = static_cast(mem_addr & 0x3); mem_addr &= ~0x3; @@ -836,7 +840,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_COND_WRITE( } else { // Register. assert_true(poll_reg_addr < RegisterFile::kRegisterCount); - value = register_file_->values[poll_reg_addr].u32; + value = register_file_->values[poll_reg_addr]; } bool matched = MatchValueAndRef(value & mask, ref, wait_info); @@ -858,7 +862,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_COND_WRITE( } XE_FORCEINLINE void COMMAND_PROCESSOR::WriteEventInitiator(uint32_t value) XE_RESTRICT { - register_file_->values[XE_GPU_REG_VGT_EVENT_INITIATOR].u32 = value; + register_file_->values[XE_GPU_REG_VGT_EVENT_INITIATOR] = value; } bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE( uint32_t packet, uint32_t count) XE_RESTRICT { @@ -898,10 +902,8 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE_SHD( data_value = GpuSwap(data_value, endianness); uint8_t* write_destination = memory_->TranslatePhysical(address); if (address > 0x1FFFFFFF) { - uint32_t writeback_base = - register_file_->values[XE_GPU_REG_WRITEBACK_BASE].u32; - uint32_t writeback_size = - register_file_->values[XE_GPU_REG_WRITEBACK_SIZE].u32; + uint32_t writeback_base = register_file_->values[XE_GPU_REG_WRITEBACK_BASE]; + uint32_t writeback_size = register_file_->values[XE_GPU_REG_WRITEBACK_SIZE]; uint32_t writeback_offset = address - writeback_base; // check whether the guest has written writeback base. if they haven't, skip // the offset check @@ -967,7 +969,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE_ZPD( if (fake_sample_count >= 0) { auto* pSampleCounts = memory_->TranslatePhysical( - register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR].u32); + register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR]); // 0xFFFFFEED is written to this two locations by D3D only on D3DISSUE_END // and used to detect a finished query. bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished && @@ -1003,7 +1005,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3Draw( vgt_draw_initiator.value = reader_.ReadAndSwap(); --count_remaining; - register_file_->values[XE_GPU_REG_VGT_DRAW_INITIATOR].u32 = + register_file_->values[XE_GPU_REG_VGT_DRAW_INITIATOR] = vgt_draw_initiator.value; bool draw_succeeded = true; // TODO(Triang3l): Remove IndexBufferInfo and replace handling of all this @@ -1025,7 +1027,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3Draw( } uint32_t vgt_dma_base = reader_.ReadAndSwap(); --count_remaining; - register_file_->values[XE_GPU_REG_VGT_DMA_BASE].u32 = vgt_dma_base; + register_file_->values[XE_GPU_REG_VGT_DMA_BASE] = vgt_dma_base; reg::VGT_DMA_SIZE vgt_dma_size; assert_not_zero(count_remaining); if (!count_remaining) { @@ -1034,7 +1036,7 @@ bool COMMAND_PROCESSOR::ExecutePacketType3Draw( } vgt_dma_size.value = reader_.ReadAndSwap(); --count_remaining; - register_file_->values[XE_GPU_REG_VGT_DMA_SIZE].u32 = vgt_dma_size.value; + register_file_->values[XE_GPU_REG_VGT_DMA_SIZE] = vgt_dma_size.value; uint32_t index_size_bytes = vgt_draw_initiator.index_size == xenos::IndexFormat::kInt16 @@ -1341,10 +1343,10 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_VIZ_QUERY( // The scan converter writes the internal result back to the register here. // We just fake it and say it was visible in case it is read back. if (id < 32) { - register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0].u32 |= - uint32_t(1) << id; + register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0] |= uint32_t(1) + << id; } else { - register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1].u32 |= + register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1] |= uint32_t(1) << (id - 32); } } @@ -1423,4 +1425,4 @@ void COMMAND_PROCESSOR::ExecutePacket(uint32_t ptr, uint32_t count) { } } while (reader_.read_count()); reader_ = old_reader; -} +} \ No newline at end of file diff --git a/src/xenia/gpu/primitive_processor.cc b/src/xenia/gpu/primitive_processor.cc index 827fb7b4e..9e20be2c4 100644 --- a/src/xenia/gpu/primitive_processor.cc +++ b/src/xenia/gpu/primitive_processor.cc @@ -498,8 +498,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) { uint32_t index_size_log2 = guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2; // The base should already be aligned, but aligning here too for safety. - guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 & - ~uint32_t((1 << index_size_log2) - 1); + guest_index_base = + regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1); guest_index_buffer_needed_bytes = guest_draw_vertex_count << index_size_log2; if (guest_index_base > SharedMemory::kBufferSize || @@ -652,8 +652,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) { uint32_t index_size_log2 = guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2; // The base should already be aligned, but aligning here too for safety. - guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 & - ~uint32_t((1 << index_size_log2) - 1); + guest_index_base = + regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1); guest_index_buffer_needed_bytes = guest_draw_vertex_count << index_size_log2; if (guest_index_base > SharedMemory::kBufferSize || diff --git a/src/xenia/gpu/register_file.h b/src/xenia/gpu/register_file.h index 11eebd8c5..8128bdcc9 100644 --- a/src/xenia/gpu/register_file.h +++ b/src/xenia/gpu/register_file.h @@ -12,8 +12,12 @@ #include #include +#include +#include "xenia/base/assert.h" +#include "xenia/base/memory.h" #include "xenia/gpu/registers.h" +#include "xenia/gpu/xenos.h" namespace xe { namespace gpu { @@ -34,39 +38,53 @@ class RegisterFile { static const RegisterInfo* GetRegisterInfo(uint32_t index); static bool IsValidRegister(uint32_t index); static constexpr size_t kRegisterCount = 0x5003; - union RegisterValue { - uint32_t u32; - float f32; - }; - RegisterValue values[kRegisterCount]; + uint32_t values[kRegisterCount]; + + const uint32_t& operator[](uint32_t reg) const { return values[reg]; } + uint32_t& operator[](uint32_t reg) { return values[reg]; } - const RegisterValue& operator[](uint32_t reg) const { return values[reg]; } - RegisterValue& operator[](uint32_t reg) { return values[reg]; } - const RegisterValue& operator[](Register reg) const { return values[reg]; } - RegisterValue& operator[](Register reg) { return values[reg]; } template - const T& Get(uint32_t reg) const { - return *reinterpret_cast(&values[reg]); + T Get(uint32_t reg) const { + return xe::memory::Reinterpret(values[reg]); } template - T& Get(uint32_t reg) { - return *reinterpret_cast(&values[reg]); + T Get(Register reg) const { + return Get(static_cast(reg)); } template - const T& Get(Register reg) const { - return *reinterpret_cast(&values[reg]); + T Get() const { + return Get(T::register_index); } - template - T& Get(Register reg) { - return *reinterpret_cast(&values[reg]); + + xenos::xe_gpu_vertex_fetch_t GetVertexFetch(uint32_t index) const { + assert_true(index < 96); + xenos::xe_gpu_vertex_fetch_t fetch; + std::memcpy(&fetch, + &values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + + (sizeof(fetch) / sizeof(uint32_t)) * index], + sizeof(fetch)); + return fetch; } - template - const T& Get() const { - return *reinterpret_cast(&values[T::register_index]); + + xenos::xe_gpu_texture_fetch_t GetTextureFetch(uint32_t index) const { + assert_true(index < 32); + xenos::xe_gpu_texture_fetch_t fetch; + std::memcpy(&fetch, + &values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + + (sizeof(fetch) / sizeof(uint32_t)) * index], + sizeof(fetch)); + return fetch; } - template - T& Get() { - return *reinterpret_cast(&values[T::register_index]); + + xenos::xe_gpu_memexport_stream_t GetMemExportStream( + uint32_t float_constant_index) const { + assert_true(float_constant_index < 512); + xenos::xe_gpu_memexport_stream_t stream; + std::memcpy( + &stream, + &values[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * float_constant_index], + sizeof(stream)); + return stream; } }; diff --git a/src/xenia/gpu/shader_interpreter.cc b/src/xenia/gpu/shader_interpreter.cc index 6eda12f42..ec7cf9a02 100644 --- a/src/xenia/gpu/shader_interpreter.cc +++ b/src/xenia/gpu/shader_interpreter.cc @@ -21,10 +21,7 @@ void ShaderInterpreter::Execute() { state_.Reset(); const uint32_t* bool_constants = - ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32; - const xenos::LoopConstant* loop_constants = - reinterpret_cast( - ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32); + ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031]; bool exec_ended = false; uint32_t cf_index_next = 1; @@ -133,8 +130,8 @@ void ShaderInterpreter::Execute() { cf_index_next = cf_loop_start.address(); continue; } - xenos::LoopConstant loop_constant = - loop_constants[cf_loop_start.loop_id()]; + auto loop_constant = register_file_.Get( + XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + cf_loop_start.loop_id()); state_.loop_constants[state_.loop_stack_depth] = loop_constant; uint32_t& loop_iterator_ref = state_.loop_iterators[state_.loop_stack_depth]; @@ -163,8 +160,11 @@ void ShaderInterpreter::Execute() { &cf_instr); xenos::LoopConstant loop_constant = state_.loop_constants[state_.loop_stack_depth - 1]; - assert_true(loop_constant.value == - loop_constants[cf_loop_end.loop_id()].value); + assert_zero( + std::memcmp(&loop_constant, + ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + + cf_loop_end.loop_id()], + sizeof(loop_constant))); uint32_t loop_iterator = ++state_.loop_iterators[state_.loop_stack_depth - 1]; if (loop_iterator < loop_constant.count && @@ -250,28 +250,31 @@ void ShaderInterpreter::Execute() { } } -const float* ShaderInterpreter::GetFloatConstant( +const std::array ShaderInterpreter::GetFloatConstant( uint32_t address, bool is_relative, bool relative_address_is_a0) const { - static const float zero[4] = {}; int32_t index = int32_t(address); if (is_relative) { index += relative_address_is_a0 ? state_.address_register : state_.GetLoopAddress(); } if (index < 0) { - return zero; + return std::array(); } auto base_and_size_minus_1 = register_file_.Get( shader_type_ == xenos::ShaderType::kVertex ? XE_GPU_REG_SQ_VS_CONST : XE_GPU_REG_SQ_PS_CONST); if (uint32_t(index) > base_and_size_minus_1.size) { - return zero; + return std::array(); } index += base_and_size_minus_1.base; if (index >= 512) { - return zero; + return std::array(); } - return ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index].f32; + std::array value; + std::memcpy(value.data(), + ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index], + sizeof(float) * 4); + return value; } void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { @@ -290,6 +293,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { const float* vector_src_ptr; uint32_t vector_src_register = instr.src_reg(1 + i); bool vector_src_absolute = false; + std::array vector_src_float_constant; if (instr.src_is_temp(1 + i)) { vector_src_ptr = GetTempRegister( ucode::AluInstruction::src_temp_reg(vector_src_register), @@ -297,9 +301,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { vector_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute( vector_src_register); } else { - vector_src_ptr = GetFloatConstant( + vector_src_float_constant = GetFloatConstant( vector_src_register, instr.src_const_is_addressed(1 + i), instr.is_const_address_register_relative()); + vector_src_ptr = vector_src_float_constant.data(); } uint32_t vector_src_absolute_mask = ~(uint32_t(vector_src_absolute) << 31); @@ -334,16 +339,18 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { } break; case ucode::AluVectorOpcode::kMax: { for (uint32_t i = 0; i < 4; ++i) { - vector_result[i] = vector_operands[0][i] >= vector_operands[1][i] - ? vector_operands[0][i] - : vector_operands[1][i]; + vector_result[i] = + std::isgreaterequal(vector_operands[0][i], vector_operands[1][i]) + ? vector_operands[0][i] + : vector_operands[1][i]; } } break; case ucode::AluVectorOpcode::kMin: { for (uint32_t i = 0; i < 4; ++i) { - vector_result[i] = vector_operands[0][i] < vector_operands[1][i] - ? vector_operands[0][i] - : vector_operands[1][i]; + vector_result[i] = + std::isless(vector_operands[0][i], vector_operands[1][i]) + ? vector_operands[0][i] + : vector_operands[1][i]; } } break; case ucode::AluVectorOpcode::kSeq: { @@ -354,14 +361,14 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { } break; case ucode::AluVectorOpcode::kSgt: { for (uint32_t i = 0; i < 4; ++i) { - vector_result[i] = - float(vector_operands[0][i] > vector_operands[1][i]); + vector_result[i] = float( + std::isgreater(vector_operands[0][i], vector_operands[1][i])); } } break; case ucode::AluVectorOpcode::kSge: { for (uint32_t i = 0; i < 4; ++i) { - vector_result[i] = - float(vector_operands[0][i] >= vector_operands[1][i]); + vector_result[i] = float(std::isgreaterequal(vector_operands[0][i], + vector_operands[1][i])); } } break; case ucode::AluVectorOpcode::kSne: { @@ -407,14 +414,14 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { } break; case ucode::AluVectorOpcode::kCndGe: { for (uint32_t i = 0; i < 4; ++i) { - vector_result[i] = vector_operands[0][i] >= 0.0f + vector_result[i] = std::isgreaterequal(vector_operands[0][i], 0.0f) ? vector_operands[1][i] : vector_operands[2][i]; } } break; case ucode::AluVectorOpcode::kCndGt: { for (uint32_t i = 0; i < 4; ++i) { - vector_result[i] = vector_operands[0][i] > 0.0f + vector_result[i] = std::isgreater(vector_operands[0][i], 0.0f) ? vector_operands[1][i] : vector_operands[2][i]; } @@ -466,32 +473,38 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { float x_abs = std::abs(x), y_abs = std::abs(y), z_abs = std::abs(z); // Result is T coordinate, S coordinate, 2 * major axis, face ID. if (z_abs >= x_abs && z_abs >= y_abs) { + bool z_negative = std::isless(z, 0.0f); vector_result[0] = -y; - vector_result[1] = z < 0.0f ? -x : x; + vector_result[1] = z_negative ? -x : x; vector_result[2] = z; - vector_result[3] = z < 0.0f ? 5.0f : 4.0f; + vector_result[3] = z_negative ? 5.0f : 4.0f; } else if (y_abs >= x_abs) { - vector_result[0] = y < 0.0f ? -z : z; + bool y_negative = std::isless(y, 0.0f); + vector_result[0] = y_negative ? -z : z; vector_result[1] = x; vector_result[2] = y; - vector_result[3] = y < 0.0f ? 3.0f : 2.0f; + vector_result[3] = y_negative ? 3.0f : 2.0f; } else { + bool x_negative = std::isless(x, 0.0f); vector_result[0] = -y; - vector_result[1] = x < 0.0f ? z : -z; + vector_result[1] = x_negative ? z : -z; vector_result[2] = x; - vector_result[3] = x < 0.0f ? 1.0f : 0.0f; + vector_result[3] = x_negative ? 1.0f : 0.0f; } vector_result[2] *= 2.0f; } break; case ucode::AluVectorOpcode::kMax4: { - if (vector_operands[0][0] >= vector_operands[0][1] && - vector_operands[0][0] >= vector_operands[0][2] && - vector_operands[0][0] >= vector_operands[0][3]) { + if (std::isgreaterequal(vector_operands[0][0], vector_operands[0][1]) && + std::isgreaterequal(vector_operands[0][0], vector_operands[0][2]) && + std::isgreaterequal(vector_operands[0][0], vector_operands[0][3])) { vector_result[0] = vector_operands[0][0]; - } else if (vector_operands[0][1] >= vector_operands[0][2] && - vector_operands[0][1] >= vector_operands[0][3]) { + } else if (std::isgreaterequal(vector_operands[0][1], + vector_operands[0][2]) && + std::isgreaterequal(vector_operands[0][1], + vector_operands[0][3])) { vector_result[0] = vector_operands[0][1]; - } else if (vector_operands[0][2] >= vector_operands[0][3]) { + } else if (std::isgreaterequal(vector_operands[0][2], + vector_operands[0][3])) { vector_result[0] = vector_operands[0][2]; } else { vector_result[0] = vector_operands[0][3]; @@ -517,21 +530,21 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { replicate_vector_result_x = true; } break; case ucode::AluVectorOpcode::kSetpGtPush: { - state_.predicate = - vector_operands[0][3] == 0.0f && vector_operands[1][3] > 0.0f; - vector_result[0] = - (vector_operands[0][0] == 0.0f && vector_operands[1][0] > 0.0f) - ? 0.0f - : vector_operands[0][0] + 1.0f; + state_.predicate = vector_operands[0][3] == 0.0f && + std::isgreater(vector_operands[1][3], 0.0f); + vector_result[0] = (vector_operands[0][0] == 0.0f && + std::isgreater(vector_operands[1][0], 0.0f)) + ? 0.0f + : vector_operands[0][0] + 1.0f; replicate_vector_result_x = true; } break; case ucode::AluVectorOpcode::kSetpGePush: { - state_.predicate = - vector_operands[0][3] == 0.0f && vector_operands[1][3] >= 0.0f; - vector_result[0] = - (vector_operands[0][0] == 0.0f && vector_operands[1][0] >= 0.0f) - ? 0.0f - : vector_operands[0][0] + 1.0f; + state_.predicate = vector_operands[0][3] == 0.0f && + std::isgreaterequal(vector_operands[1][3], 0.0f); + vector_result[0] = (vector_operands[0][0] == 0.0f && + std::isgreaterequal(vector_operands[1][0], 0.0f)) + ? 0.0f + : vector_operands[0][0] + 1.0f; replicate_vector_result_x = true; } break; // Not implementing pixel kill currently, the interpreter is currently @@ -545,19 +558,19 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { replicate_vector_result_x = true; } break; case ucode::AluVectorOpcode::kKillGt: { - vector_result[0] = - float(vector_operands[0][0] > vector_operands[1][0] || - vector_operands[0][1] > vector_operands[1][1] || - vector_operands[0][2] > vector_operands[1][2] || - vector_operands[0][3] > vector_operands[1][3]); + vector_result[0] = float( + std::isgreater(vector_operands[0][0], vector_operands[1][0]) || + std::isgreater(vector_operands[0][1], vector_operands[1][1]) || + std::isgreater(vector_operands[0][2], vector_operands[1][2]) || + std::isgreater(vector_operands[0][3], vector_operands[1][3])); replicate_vector_result_x = true; } break; case ucode::AluVectorOpcode::kKillGe: { - vector_result[0] = - float(vector_operands[0][0] >= vector_operands[1][0] || - vector_operands[0][1] >= vector_operands[1][1] || - vector_operands[0][2] >= vector_operands[1][2] || - vector_operands[0][3] >= vector_operands[1][3]); + vector_result[0] = float( + std::isgreaterequal(vector_operands[0][0], vector_operands[1][0]) || + std::isgreaterequal(vector_operands[0][1], vector_operands[1][1]) || + std::isgreaterequal(vector_operands[0][2], vector_operands[1][2]) || + std::isgreaterequal(vector_operands[0][3], vector_operands[1][3])); replicate_vector_result_x = true; } break; case ucode::AluVectorOpcode::kKillNe: { @@ -578,14 +591,13 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { vector_result[3] = vector_operands[1][3]; } break; case ucode::AluVectorOpcode::kMaxA: { - // std::max is `a < b ? b : a`, thus in case of NaN, the first argument - // (-256.0f) is always the result. state_.address_register = int32_t(std::floor( - std::min(255.0f, std::max(-256.0f, vector_operands[0][3])) + 0.5f)); + xe::clamp_float(vector_operands[0][3], -256.0f, 255.0f) + 0.5f)); for (uint32_t i = 0; i < 4; ++i) { - vector_result[i] = vector_operands[0][i] >= vector_operands[1][i] - ? vector_operands[0][i] - : vector_operands[1][i]; + vector_result[i] = + std::isgreaterequal(vector_operands[0][i], vector_operands[1][i]) + ? vector_operands[0][i] + : vector_operands[1][i]; } } break; default: { @@ -611,6 +623,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { // r#/c#.w or r#/c#.wx. const float* scalar_src_ptr; uint32_t scalar_src_register = instr.src_reg(3); + std::array scalar_src_float_constant; if (instr.src_is_temp(3)) { scalar_src_ptr = GetTempRegister( ucode::AluInstruction::src_temp_reg(scalar_src_register), @@ -618,9 +631,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { scalar_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute( scalar_src_register); } else { - scalar_src_ptr = GetFloatConstant( + scalar_src_float_constant = GetFloatConstant( scalar_src_register, instr.src_const_is_addressed(3), instr.is_const_address_register_relative()); + scalar_src_ptr = scalar_src_float_constant.data(); } uint32_t scalar_src_swizzle = instr.src_swizzle(3); scalar_operand_component_count = @@ -688,7 +702,8 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { case ucode::AluScalarOpcode::kMulsPrev2: { if (state_.previous_scalar == -FLT_MAX || !std::isfinite(state_.previous_scalar) || - !std::isfinite(scalar_operands[1]) || scalar_operands[1] <= 0.0f) { + !std::isfinite(scalar_operands[1]) || + std::islessequal(scalar_operands[1], 0.0f)) { state_.previous_scalar = -FLT_MAX; } else { // Direct3D 9 behavior (0 or denormal * anything = +0). @@ -699,23 +714,26 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { } } break; case ucode::AluScalarOpcode::kMaxs: { - state_.previous_scalar = scalar_operands[0] >= scalar_operands[1] - ? scalar_operands[0] - : scalar_operands[1]; + state_.previous_scalar = + std::isgreaterequal(scalar_operands[0], scalar_operands[1]) + ? scalar_operands[0] + : scalar_operands[1]; } break; case ucode::AluScalarOpcode::kMins: { - state_.previous_scalar = scalar_operands[0] >= scalar_operands[1] - ? scalar_operands[0] - : scalar_operands[1]; + state_.previous_scalar = + std::isless(scalar_operands[0], scalar_operands[1]) + ? scalar_operands[0] + : scalar_operands[1]; } break; case ucode::AluScalarOpcode::kSeqs: { state_.previous_scalar = float(scalar_operands[0] == 0.0f); } break; case ucode::AluScalarOpcode::kSgts: { - state_.previous_scalar = float(scalar_operands[0] > 0.0f); + state_.previous_scalar = float(std::isgreater(scalar_operands[0], 0.0f)); } break; case ucode::AluScalarOpcode::kSges: { - state_.previous_scalar = float(scalar_operands[0] >= 0.0f); + state_.previous_scalar = + float(std::isgreaterequal(scalar_operands[0], 0.0f)); } break; case ucode::AluScalarOpcode::kSnes: { state_.previous_scalar = float(scalar_operands[0] != 0.0f); @@ -781,22 +799,20 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { state_.previous_scalar = 1.0f / std::sqrt(scalar_operands[0]); } break; case ucode::AluScalarOpcode::kMaxAs: { - // std::max is `a < b ? b : a`, thus in case of NaN, the first argument - // (-256.0f) is always the result. state_.address_register = int32_t(std::floor( - std::min(255.0f, std::max(-256.0f, scalar_operands[0])) + 0.5f)); - state_.previous_scalar = scalar_operands[0] >= scalar_operands[1] - ? scalar_operands[0] - : scalar_operands[1]; + xe::clamp_float(scalar_operands[0], -256.0f, 255.0f) + 0.5f)); + state_.previous_scalar = + std::isgreaterequal(scalar_operands[0], scalar_operands[1]) + ? scalar_operands[0] + : scalar_operands[1]; } break; case ucode::AluScalarOpcode::kMaxAsf: { - // std::max is `a < b ? b : a`, thus in case of NaN, the first argument - // (-256.0f) is always the result. state_.address_register = int32_t( - std::floor(std::min(255.0f, std::max(-256.0f, scalar_operands[0])))); - state_.previous_scalar = scalar_operands[0] >= scalar_operands[1] - ? scalar_operands[0] - : scalar_operands[1]; + std::floor(xe::clamp_float(scalar_operands[0], -256.0f, 255.0f))); + state_.previous_scalar = + std::isgreaterequal(scalar_operands[0], scalar_operands[1]) + ? scalar_operands[0] + : scalar_operands[1]; } break; case ucode::AluScalarOpcode::kSubs: case ucode::AluScalarOpcode::kSubsc0: @@ -815,11 +831,11 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { state_.previous_scalar = float(!state_.predicate); } break; case ucode::AluScalarOpcode::kSetpGt: { - state_.predicate = scalar_operands[0] > 0.0f; + state_.predicate = std::isgreater(scalar_operands[0], 0.0f); state_.previous_scalar = float(!state_.predicate); } break; case ucode::AluScalarOpcode::kSetpGe: { - state_.predicate = scalar_operands[0] >= 0.0f; + state_.predicate = std::isgreaterequal(scalar_operands[0], 0.0f); state_.previous_scalar = float(!state_.predicate); } break; case ucode::AluScalarOpcode::kSetpInv: { @@ -831,7 +847,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { } break; case ucode::AluScalarOpcode::kSetpPop: { float new_counter = scalar_operands[0] - 1.0f; - state_.predicate = new_counter <= 0.0f; + state_.predicate = std::islessequal(new_counter, 0.0f); state_.previous_scalar = state_.predicate ? 0.0f : new_counter; } break; case ucode::AluScalarOpcode::kSetpClr: { @@ -848,10 +864,11 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { state_.previous_scalar = float(scalar_operands[0] == 0.0f); } break; case ucode::AluScalarOpcode::kKillsGt: { - state_.previous_scalar = float(scalar_operands[0] > 0.0f); + state_.previous_scalar = float(std::isgreater(scalar_operands[0], 0.0f)); } break; case ucode::AluScalarOpcode::kKillsGe: { - state_.previous_scalar = float(scalar_operands[0] >= 0.0f); + state_.previous_scalar = + float(std::isgreaterequal(scalar_operands[0], 0.0f)); } break; case ucode::AluScalarOpcode::kKillsNe: { state_.previous_scalar = float(scalar_operands[0] != 0.0f); @@ -877,11 +894,11 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) { if (instr.vector_clamp()) { for (uint32_t i = 0; i < 4; ++i) { - vector_result[i] = xe::saturate_unsigned(vector_result[i]); + vector_result[i] = xe::saturate(vector_result[i]); } } float scalar_result = instr.scalar_clamp() - ? xe::saturate_unsigned(state_.previous_scalar) + ? xe::saturate(state_.previous_scalar) : state_.previous_scalar; uint32_t scalar_result_write_mask = instr.GetScalarOpResultWriteMask(); @@ -977,10 +994,8 @@ void ShaderInterpreter::ExecuteVertexFetchInstruction( state_.vfetch_full_last = instr; } - xenos::xe_gpu_vertex_fetch_t fetch_constant = - *reinterpret_cast( - ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + - state_.vfetch_full_last.fetch_constant_index()]); + xenos::xe_gpu_vertex_fetch_t fetch_constant = register_file_.GetVertexFetch( + state_.vfetch_full_last.fetch_constant_index()); if (!instr.is_mini_fetch()) { // Get the part of the address that depends on vfetch_full data. diff --git a/src/xenia/gpu/shader_interpreter.h b/src/xenia/gpu/shader_interpreter.h index 759f606eb..46808562e 100644 --- a/src/xenia/gpu/shader_interpreter.h +++ b/src/xenia/gpu/shader_interpreter.h @@ -11,6 +11,7 @@ #define XENIA_GPU_SHADER_INTERPRETER_H_ #include +#include #include #include @@ -117,8 +118,8 @@ class ShaderInterpreter { float* GetTempRegister(uint32_t address, bool is_relative) { return temp_registers_[GetTempRegisterIndex(address, is_relative)]; } - const float* GetFloatConstant(uint32_t address, bool is_relative, - bool relative_address_is_a0) const; + const std::array GetFloatConstant( + uint32_t address, bool is_relative, bool relative_address_is_a0) const; void ExecuteAluInstruction(ucode::AluInstruction instr); void StoreFetchResult(uint32_t dest, bool is_dest_relative, uint32_t swizzle, diff --git a/src/xenia/gpu/spirv_builder.cc b/src/xenia/gpu/spirv_builder.cc index 2ba9446bc..fc2e92850 100644 --- a/src/xenia/gpu/spirv_builder.cc +++ b/src/xenia/gpu/spirv_builder.cc @@ -13,6 +13,8 @@ #include #include +#include "xenia/base/assert.h" + namespace xe { namespace gpu { @@ -101,5 +103,195 @@ spv::Id SpirvBuilder::createTriBuiltinCall(spv::Id result_type, return result; } +SpirvBuilder::IfBuilder::IfBuilder(spv::Id condition, unsigned int control, + SpirvBuilder& builder, + unsigned int thenWeight, + unsigned int elseWeight) + : builder(builder), + condition(condition), + control(control), + thenWeight(thenWeight), + elseWeight(elseWeight), + function(builder.getBuildPoint()->getParent()) { + // Make the blocks, but only put the then-block into the function, the + // else-block and merge-block will be added later, in order, after earlier + // code is emitted. + thenBlock = new spv::Block(builder.getUniqueId(), function); + elseBlock = nullptr; + mergeBlock = new spv::Block(builder.getUniqueId(), function); + + // Save the current block, so that we can add in the flow control split when + // makeEndIf is called. + headerBlock = builder.getBuildPoint(); + + spv::Id headerBlockId = headerBlock->getId(); + thenPhiParent = headerBlockId; + elsePhiParent = headerBlockId; + + function.addBlock(thenBlock); + builder.setBuildPoint(thenBlock); +} + +void SpirvBuilder::IfBuilder::makeBeginElse(bool branchToMerge) { +#ifndef NDEBUG + assert_true(currentBranch == Branch::kThen); +#endif + + if (branchToMerge) { + // Close out the "then" by having it jump to the mergeBlock. + thenPhiParent = builder.getBuildPoint()->getId(); + builder.createBranch(mergeBlock); + } + + // Make the first else block and add it to the function. + elseBlock = new spv::Block(builder.getUniqueId(), function); + function.addBlock(elseBlock); + + // Start building the else block. + builder.setBuildPoint(elseBlock); + +#ifndef NDEBUG + currentBranch = Branch::kElse; +#endif +} + +void SpirvBuilder::IfBuilder::makeEndIf(bool branchToMerge) { +#ifndef NDEBUG + assert_true(currentBranch == Branch::kThen || currentBranch == Branch::kElse); +#endif + + if (branchToMerge) { + // Jump to the merge block. + (elseBlock ? elsePhiParent : thenPhiParent) = + builder.getBuildPoint()->getId(); + builder.createBranch(mergeBlock); + } + + // Go back to the headerBlock and make the flow control split. + builder.setBuildPoint(headerBlock); + builder.createSelectionMerge(mergeBlock, control); + { + spv::Block* falseBlock = elseBlock ? elseBlock : mergeBlock; + std::unique_ptr branch = + std::make_unique(spv::OpBranchConditional); + branch->addIdOperand(condition); + branch->addIdOperand(thenBlock->getId()); + branch->addIdOperand(falseBlock->getId()); + if (thenWeight || elseWeight) { + branch->addImmediateOperand(thenWeight); + branch->addImmediateOperand(elseWeight); + } + builder.getBuildPoint()->addInstruction(std::move(branch)); + thenBlock->addPredecessor(builder.getBuildPoint()); + falseBlock->addPredecessor(builder.getBuildPoint()); + } + + // Add the merge block to the function. + function.addBlock(mergeBlock); + builder.setBuildPoint(mergeBlock); + +#ifndef NDEBUG + currentBranch = Branch::kMerge; +#endif +} + +spv::Id SpirvBuilder::IfBuilder::createMergePhi(spv::Id then_variable, + spv::Id else_variable) const { + assert_true(builder.getBuildPoint() == mergeBlock); + return builder.createQuadOp(spv::OpPhi, builder.getTypeId(then_variable), + then_variable, getThenPhiParent(), else_variable, + getElsePhiParent()); +} + +SpirvBuilder::SwitchBuilder::SwitchBuilder(spv::Id selector, + unsigned int selection_control, + SpirvBuilder& builder) + : builder_(builder), + selector_(selector), + selection_control_(selection_control), + function_(builder.getBuildPoint()->getParent()), + header_block_(builder.getBuildPoint()), + default_phi_parent_(builder.getBuildPoint()->getId()) { + merge_block_ = new spv::Block(builder_.getUniqueId(), function_); +} + +void SpirvBuilder::SwitchBuilder::makeBeginDefault() { + assert_null(default_block_); + + endSegment(); + + default_block_ = new spv::Block(builder_.getUniqueId(), function_); + function_.addBlock(default_block_); + default_block_->addPredecessor(header_block_); + builder_.setBuildPoint(default_block_); + + current_branch_ = Branch::kDefault; +} + +void SpirvBuilder::SwitchBuilder::makeBeginCase(unsigned int literal) { + endSegment(); + + auto case_block = new spv::Block(builder_.getUniqueId(), function_); + function_.addBlock(case_block); + cases_.emplace_back(literal, case_block->getId()); + case_block->addPredecessor(header_block_); + builder_.setBuildPoint(case_block); + + current_branch_ = Branch::kCase; +} + +void SpirvBuilder::SwitchBuilder::addCurrentCaseLiteral(unsigned int literal) { + assert_true(current_branch_ == Branch::kCase); + + cases_.emplace_back(literal, cases_.back().second); +} + +void SpirvBuilder::SwitchBuilder::makeEndSwitch() { + endSegment(); + + builder_.setBuildPoint(header_block_); + + builder_.createSelectionMerge(merge_block_, selection_control_); + + std::unique_ptr switch_instruction = + std::make_unique(spv::OpSwitch); + switch_instruction->addIdOperand(selector_); + if (default_block_) { + switch_instruction->addIdOperand(default_block_->getId()); + } else { + switch_instruction->addIdOperand(merge_block_->getId()); + merge_block_->addPredecessor(header_block_); + } + for (const std::pair& case_pair : cases_) { + switch_instruction->addImmediateOperand(case_pair.first); + switch_instruction->addIdOperand(case_pair.second); + } + builder_.getBuildPoint()->addInstruction(std::move(switch_instruction)); + + function_.addBlock(merge_block_); + builder_.setBuildPoint(merge_block_); + + current_branch_ = Branch::kMerge; +} + +void SpirvBuilder::SwitchBuilder::endSegment() { + assert_true(current_branch_ == Branch::kSelection || + current_branch_ == Branch::kDefault || + current_branch_ == Branch::kCase); + + if (current_branch_ == Branch::kSelection) { + return; + } + + if (!builder_.getBuildPoint()->isTerminated()) { + builder_.createBranch(merge_block_); + if (current_branch_ == Branch::kDefault) { + default_phi_parent_ = builder_.getBuildPoint()->getId(); + } + } + + current_branch_ = Branch::kSelection; +} + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/spirv_builder.h b/src/xenia/gpu/spirv_builder.h index 0496aa7c4..7422d7c63 100644 --- a/src/xenia/gpu/spirv_builder.h +++ b/src/xenia/gpu/spirv_builder.h @@ -10,7 +10,13 @@ #ifndef XENIA_GPU_SPIRV_BUILDER_H_ #define XENIA_GPU_SPIRV_BUILDER_H_ +#include +#include +#include +#include + #include "third_party/glslang/SPIRV/SpvBuilder.h" +#include "xenia/base/assert.h" namespace xe { namespace gpu { @@ -42,6 +48,104 @@ class SpirvBuilder : public spv::Builder { spv::Id createTriBuiltinCall(spv::Id result_type, spv::Id builtins, int entry_point, spv::Id operand1, spv::Id operand2, spv::Id operand3); + + // Helper to use for building nested control flow with if-then-else with + // additions over SpvBuilder::If. + class IfBuilder { + public: + IfBuilder(spv::Id condition, unsigned int control, SpirvBuilder& builder, + unsigned int thenWeight = 0, unsigned int elseWeight = 0); + + ~IfBuilder() { +#ifndef NDEBUG + assert_true(currentBranch == Branch::kMerge); +#endif + } + + void makeBeginElse(bool branchToMerge = true); + void makeEndIf(bool branchToMerge = true); + + // If there's no then/else block that branches to the merge block, the phi + // parent is the header block - this simplifies then-only usage. + spv::Id getThenPhiParent() const { return thenPhiParent; } + spv::Id getElsePhiParent() const { return elsePhiParent; } + + spv::Id createMergePhi(spv::Id then_variable, spv::Id else_variable) const; + + private: + enum class Branch { + kThen, + kElse, + kMerge, + }; + + IfBuilder(const IfBuilder& ifBuilder) = delete; + IfBuilder& operator=(const IfBuilder& ifBuilder) = delete; + + SpirvBuilder& builder; + spv::Id condition; + unsigned int control; + unsigned int thenWeight; + unsigned int elseWeight; + + spv::Function& function; + + spv::Block* headerBlock; + spv::Block* thenBlock; + spv::Block* elseBlock; + spv::Block* mergeBlock; + + spv::Id thenPhiParent; + spv::Id elsePhiParent; + +#ifndef NDEBUG + Branch currentBranch = Branch::kThen; +#endif + }; + + // Simpler and more flexible (such as multiple cases pointing to the same + // block) compared to makeSwitch. + class SwitchBuilder { + public: + SwitchBuilder(spv::Id selector, unsigned int selection_control, + SpirvBuilder& builder); + ~SwitchBuilder() { assert_true(current_branch_ == Branch::kMerge); } + + void makeBeginDefault(); + void makeBeginCase(unsigned int literal); + void addCurrentCaseLiteral(unsigned int literal); + void makeEndSwitch(); + + // If there's no default block that branches to the merge block, the phi + // parent is the header block - this simplifies case-only usage. + spv::Id getDefaultPhiParent() const { return default_phi_parent_; } + + private: + enum class Branch { + kSelection, + kDefault, + kCase, + kMerge, + }; + + void endSegment(); + + SpirvBuilder& builder_; + spv::Id selector_; + unsigned int selection_control_; + + spv::Function& function_; + + spv::Block* header_block_; + spv::Block* merge_block_; + spv::Block* default_block_ = nullptr; + + std::vector> cases_; + + spv::Id default_phi_parent_; + + Branch current_branch_ = Branch::kSelection; + }; }; } // namespace gpu diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 8bcaa19fd..399b7079f 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -30,30 +30,35 @@ namespace gpu { SpirvShaderTranslator::Features::Features(bool all) : spirv_version(all ? spv::Spv_1_5 : spv::Spv_1_0), max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)), + full_draw_index_uint32(all), + vertex_pipeline_stores_and_atomics(all), + fragment_stores_and_atomics(all), clip_distance(all), cull_distance(all), - demote_to_helper_invocation(all), - fragment_shader_sample_interlock(all), - full_draw_index_uint32(all), image_view_format_swizzle(all), signed_zero_inf_nan_preserve_float32(all), denorm_flush_to_zero_float32(all), - rounding_mode_rte_float32(all) {} + rounding_mode_rte_float32(all), + fragment_shader_sample_interlock(all), + demote_to_helper_invocation(all) {} SpirvShaderTranslator::Features::Features( const ui::vulkan::VulkanProvider::DeviceInfo& device_info) : max_storage_buffer_range(device_info.maxStorageBufferRange), + full_draw_index_uint32(device_info.fullDrawIndexUint32), + vertex_pipeline_stores_and_atomics( + device_info.vertexPipelineStoresAndAtomics), + fragment_stores_and_atomics(device_info.fragmentStoresAndAtomics), clip_distance(device_info.shaderClipDistance), cull_distance(device_info.shaderCullDistance), - demote_to_helper_invocation(device_info.shaderDemoteToHelperInvocation), - fragment_shader_sample_interlock( - device_info.fragmentShaderSampleInterlock), - full_draw_index_uint32(device_info.fullDrawIndexUint32), image_view_format_swizzle(device_info.imageViewFormatSwizzle), signed_zero_inf_nan_preserve_float32( device_info.shaderSignedZeroInfNanPreserveFloat32), denorm_flush_to_zero_float32(device_info.shaderDenormFlushToZeroFloat32), - rounding_mode_rte_float32(device_info.shaderRoundingModeRTEFloat32) { + rounding_mode_rte_float32(device_info.shaderRoundingModeRTEFloat32), + fragment_shader_sample_interlock( + device_info.fragmentShaderSampleInterlock), + demote_to_helper_invocation(device_info.shaderDemoteToHelperInvocation) { if (device_info.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) { spirv_version = spv::Spv_1_5; } else if (device_info.ext_1_2_VK_KHR_spirv_1_4) { @@ -117,6 +122,14 @@ void SpirvShaderTranslator::Reset() { main_interface_.clear(); var_main_registers_ = spv::NoResult; + var_main_memexport_address_ = spv::NoResult; + for (size_t memexport_eM_index = 0; + memexport_eM_index < xe::countof(var_main_memexport_data_); + ++memexport_eM_index) { + var_main_memexport_data_[memexport_eM_index] = spv::NoResult; + } + var_main_memexport_data_written_ = spv::NoResult; + main_memexport_allowed_ = spv::NoResult; var_main_point_size_edge_flag_kill_vertex_ = spv::NoResult; var_main_kill_pixel_ = spv::NoResult; var_main_fsi_color_written_ = spv::NoResult; @@ -310,6 +323,8 @@ void SpirvShaderTranslator::StartTranslation() { main_interface_.push_back(uniform_system_constants_); } + bool memexport_used = IsMemoryExportUsed(); + if (!is_depth_only_fragment_shader_) { // Common uniform buffer - float constants. uint32_t float_constant_count = @@ -420,9 +435,10 @@ void SpirvShaderTranslator::StartTranslation() { builder_->addMemberName(type_shared_memory, 0, "shared_memory"); builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationRestrict); - // TODO(Triang3l): Make writable when memexport is implemented. - builder_->addMemberDecoration(type_shared_memory, 0, - spv::DecorationNonWritable); + if (!memexport_used) { + builder_->addMemberDecoration(type_shared_memory, 0, + spv::DecorationNonWritable); + } builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationOffset, 0); builder_->addDecoration(type_shared_memory, @@ -509,6 +525,24 @@ void SpirvShaderTranslator::StartTranslation() { builder_->createVariable(spv::NoPrecision, spv::StorageClassFunction, type_register_array, "xe_var_registers"); } + if (memexport_used) { + var_main_memexport_address_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_float4_, + "xe_var_memexport_address", const_float4_0_); + uint8_t memexport_eM_remaining = current_shader().memexport_eM_written(); + uint32_t memexport_eM_index; + while ( + xe::bit_scan_forward(memexport_eM_remaining, &memexport_eM_index)) { + memexport_eM_remaining &= ~(uint8_t(1) << memexport_eM_index); + var_main_memexport_data_[memexport_eM_index] = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_float4_, + fmt::format("xe_var_memexport_data_{}", memexport_eM_index).c_str(), + const_float4_0_); + } + var_main_memexport_data_written_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_uint_, + "xe_var_memexport_data_written", const_uint_0_); + } } // Write the execution model-specific prologue with access to variables in the @@ -647,6 +681,10 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { builder_->setBuildPoint(main_loop_merge_); } + // Write data for the last memexport. + ExportToMemory( + current_shader().memexport_eM_potentially_written_before_end()); + if (is_vertex_shader()) { CompleteVertexOrTessEvalShaderInMain(); } else if (is_pixel_shader()) { @@ -1077,6 +1115,34 @@ void SpirvShaderTranslator::ProcessJumpInstruction( builder_->createBranch(main_loop_continue_); } +void SpirvShaderTranslator::ProcessAllocInstruction( + const ParsedAllocInstruction& instr, uint8_t export_eM) { + bool start_memexport = instr.type == ucode::AllocType::kMemory && + current_shader().memexport_eM_written(); + if (export_eM || start_memexport) { + CloseExecConditionals(); + } + + if (export_eM) { + ExportToMemory(export_eM); + // Reset which eM# elements have been written. + builder_->createStore(const_uint_0_, var_main_memexport_data_written_); + // Break dependencies from the previous memexport. + uint8_t export_eM_remaining = export_eM; + uint32_t eM_index; + while (xe::bit_scan_forward(export_eM_remaining, &eM_index)) { + export_eM_remaining &= ~(uint8_t(1) << eM_index); + builder_->createStore(const_float4_0_, + var_main_memexport_data_[eM_index]); + } + } + + if (start_memexport) { + // Initialize eA to an invalid address. + builder_->createStore(const_float4_0_, var_main_memexport_address_); + } +} + spv::Id SpirvShaderTranslator::SpirvSmearScalarResultOrConstant( spv::Id scalar, spv::Id vector_type) { bool is_constant = builder_->isConstant(scalar); @@ -1205,6 +1271,8 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { } void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { + Modification shader_modification = GetSpirvShaderModification(); + // The edge flag isn't used for any purpose by the translator. if (current_shader().writes_point_size_edge_flag_kill_vertex() & 0b101) { id_vector_temp_.clear(); @@ -1244,11 +1312,40 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { } } - Modification shader_modification = GetSpirvShaderModification(); - // TODO(Triang3l): For HostVertexShaderType::kRectangeListAsTriangleStrip, // start the vertex loop, and load the index there. + // Check if memory export should be allowed for this host vertex of the guest + // primitive to make sure export is done only once for each guest vertex. + if (IsMemoryExportUsed()) { + spv::Id memexport_allowed_for_host_vertex_of_guest_primitive = + spv::NoResult; + if (shader_modification.vertex.host_vertex_shader_type == + Shader::HostVertexShaderType::kPointListAsTriangleStrip) { + // Only for one host vertex for the point. + memexport_allowed_for_host_vertex_of_guest_primitive = + builder_->createBinOp( + spv::OpIEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, + builder_->createUnaryOp( + spv::OpBitcast, type_uint_, + builder_->createLoad(input_vertex_index_, + spv::NoPrecision)), + builder_->makeUintConstant(3)), + const_uint_0_); + } + + if (memexport_allowed_for_host_vertex_of_guest_primitive != spv::NoResult) { + main_memexport_allowed_ = + main_memexport_allowed_ != spv::NoResult + ? builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, main_memexport_allowed_, + memexport_allowed_for_host_vertex_of_guest_primitive) + : memexport_allowed_for_host_vertex_of_guest_primitive; + } + } + // Load the vertex index or the tessellation parameters. if (register_count()) { // TODO(Triang3l): Barycentric coordinates and patch index. @@ -1272,89 +1369,70 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { builder_->makeUintConstant(static_cast( kSysFlag_ComputeOrPrimitiveVertexIndexLoad))), const_uint_0_); - spv::Block& block_load_vertex_index_pre = *builder_->getBuildPoint(); - spv::Block& block_load_vertex_index_start = builder_->makeNewBlock(); - spv::Block& block_load_vertex_index_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_load_vertex_index_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(load_vertex_index, - &block_load_vertex_index_start, - &block_load_vertex_index_merge); - builder_->setBuildPoint(&block_load_vertex_index_start); - // Check if the index is 32-bit. - spv::Id vertex_index_is_32bit = builder_->createBinOp( - spv::OpINotEqual, type_bool_, - builder_->createBinOp( - spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, - builder_->makeUintConstant(static_cast( - kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit))), - const_uint_0_); - // Calculate the vertex index address in the shared memory. - id_vector_temp_.clear(); - id_vector_temp_.push_back( - builder_->makeIntConstant(kSystemConstantVertexIndexLoadAddress)); - spv::Id vertex_index_address = builder_->createBinOp( - spv::OpIAdd, type_uint_, - builder_->createLoad( - builder_->createAccessChain(spv::StorageClassUniform, - uniform_system_constants_, - id_vector_temp_), - spv::NoPrecision), - builder_->createBinOp( - spv::OpShiftLeftLogical, type_uint_, vertex_index, - builder_->createTriOp(spv::OpSelect, type_uint_, - vertex_index_is_32bit, const_uint_2, - builder_->makeUintConstant(1)))); - // Load the 32 bits containing the whole vertex index or two 16-bit - // vertex indices. - // TODO(Triang3l): Bounds checking. - spv::Id loaded_vertex_index = - LoadUint32FromSharedMemory(builder_->createUnaryOp( - spv::OpBitcast, type_int_, - builder_->createBinOp(spv::OpShiftRightLogical, type_uint_, - vertex_index_address, const_uint_2))); - // Extract the 16-bit index from the loaded 32 bits if needed. - loaded_vertex_index = builder_->createTriOp( - spv::OpSelect, type_uint_, vertex_index_is_32bit, - loaded_vertex_index, - builder_->createTriOp( - spv::OpBitFieldUExtract, type_uint_, loaded_vertex_index, - builder_->createBinOp( - spv::OpShiftLeftLogical, type_uint_, - builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, - vertex_index_address, const_uint_2), - builder_->makeUintConstant(4 - 1)), - builder_->makeUintConstant(16))); - // Endian-swap the loaded index. - id_vector_temp_.clear(); - id_vector_temp_.push_back( - builder_->makeIntConstant(kSystemConstantVertexIndexEndian)); - loaded_vertex_index = EndianSwap32Uint( - loaded_vertex_index, - builder_->createLoad( - builder_->createAccessChain(spv::StorageClassUniform, - uniform_system_constants_, - id_vector_temp_), - spv::NoPrecision)); - // Get the actual build point for phi. - spv::Block& block_load_vertex_index_end = *builder_->getBuildPoint(); - builder_->createBranch(&block_load_vertex_index_merge); - // Select between the loaded index and the original index from Vulkan. - builder_->setBuildPoint(&block_load_vertex_index_merge); + SpirvBuilder::IfBuilder load_vertex_index_if( + load_vertex_index, spv::SelectionControlDontFlattenMask, *builder_); + spv::Id loaded_vertex_index; { - std::unique_ptr loaded_vertex_index_phi_op = - std::make_unique(builder_->getUniqueId(), - type_uint_, spv::OpPhi); - loaded_vertex_index_phi_op->addIdOperand(loaded_vertex_index); - loaded_vertex_index_phi_op->addIdOperand( - block_load_vertex_index_end.getId()); - loaded_vertex_index_phi_op->addIdOperand(vertex_index); - loaded_vertex_index_phi_op->addIdOperand( - block_load_vertex_index_pre.getId()); - vertex_index = loaded_vertex_index_phi_op->getResultId(); - builder_->getBuildPoint()->addInstruction( - std::move(loaded_vertex_index_phi_op)); + // Check if the index is 32-bit. + spv::Id vertex_index_is_32bit = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, + builder_->makeUintConstant(static_cast( + kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit))), + const_uint_0_); + // Calculate the vertex index address in the shared memory. + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantVertexIndexLoadAddress)); + spv::Id vertex_index_address = builder_->createBinOp( + spv::OpIAdd, type_uint_, + builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, + id_vector_temp_), + spv::NoPrecision), + builder_->createBinOp( + spv::OpShiftLeftLogical, type_uint_, vertex_index, + builder_->createTriOp(spv::OpSelect, type_uint_, + vertex_index_is_32bit, const_uint_2, + builder_->makeUintConstant(1)))); + // Load the 32 bits containing the whole vertex index or two 16-bit + // vertex indices. + // TODO(Triang3l): Bounds checking. + loaded_vertex_index = + LoadUint32FromSharedMemory(builder_->createUnaryOp( + spv::OpBitcast, type_int_, + builder_->createBinOp(spv::OpShiftRightLogical, type_uint_, + vertex_index_address, const_uint_2))); + // Extract the 16-bit index from the loaded 32 bits if needed. + loaded_vertex_index = builder_->createTriOp( + spv::OpSelect, type_uint_, vertex_index_is_32bit, + loaded_vertex_index, + builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, loaded_vertex_index, + builder_->createBinOp( + spv::OpShiftLeftLogical, type_uint_, + builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, + vertex_index_address, const_uint_2), + builder_->makeUintConstant(4 - 1)), + builder_->makeUintConstant(16))); + // Endian-swap the loaded index. + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantVertexIndexEndian)); + loaded_vertex_index = EndianSwap32Uint( + loaded_vertex_index, + builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, + id_vector_temp_), + spv::NoPrecision)); } + load_vertex_index_if.makeEndIf(); + // Select between the loaded index and the original index from Vulkan. + vertex_index = load_vertex_index_if.createMergePhi(loaded_vertex_index, + vertex_index); } else { // TODO(Triang3l): Close line loop primitive. // Load the unswapped index as uint for swapping, or for indirect @@ -1368,53 +1446,35 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { builder_->makeUintConstant( static_cast(kSysFlag_VertexIndexLoad))), const_uint_0_); - spv::Block& block_load_vertex_index_pre = *builder_->getBuildPoint(); - spv::Block& block_load_vertex_index_start = builder_->makeNewBlock(); - spv::Block& block_load_vertex_index_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_load_vertex_index_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(load_vertex_index, - &block_load_vertex_index_start, - &block_load_vertex_index_merge); - builder_->setBuildPoint(&block_load_vertex_index_start); - // Load the 32-bit index. - // TODO(Triang3l): Bounds checking. - id_vector_temp_.clear(); - id_vector_temp_.push_back( - builder_->makeIntConstant(kSystemConstantVertexIndexLoadAddress)); - spv::Id loaded_vertex_index = - LoadUint32FromSharedMemory(builder_->createUnaryOp( - spv::OpBitcast, type_int_, - builder_->createBinOp( - spv::OpIAdd, type_uint_, - builder_->createBinOp( - spv::OpShiftRightLogical, type_uint_, - builder_->createLoad( - builder_->createAccessChain( - spv::StorageClassUniform, - uniform_system_constants_, id_vector_temp_), - spv::NoPrecision), - builder_->makeUintConstant(2)), - vertex_index))); - // Get the actual build point for phi. - spv::Block& block_load_vertex_index_end = *builder_->getBuildPoint(); - builder_->createBranch(&block_load_vertex_index_merge); - // Select between the loaded index and the original index from Vulkan. - builder_->setBuildPoint(&block_load_vertex_index_merge); + SpirvBuilder::IfBuilder load_vertex_index_if( + load_vertex_index, spv::SelectionControlDontFlattenMask, + *builder_); + spv::Id loaded_vertex_index; { - std::unique_ptr loaded_vertex_index_phi_op = - std::make_unique(builder_->getUniqueId(), - type_uint_, spv::OpPhi); - loaded_vertex_index_phi_op->addIdOperand(loaded_vertex_index); - loaded_vertex_index_phi_op->addIdOperand( - block_load_vertex_index_end.getId()); - loaded_vertex_index_phi_op->addIdOperand(vertex_index); - loaded_vertex_index_phi_op->addIdOperand( - block_load_vertex_index_pre.getId()); - vertex_index = loaded_vertex_index_phi_op->getResultId(); - builder_->getBuildPoint()->addInstruction( - std::move(loaded_vertex_index_phi_op)); + // Load the 32-bit index. + // TODO(Triang3l): Bounds checking. + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant( + kSystemConstantVertexIndexLoadAddress)); + loaded_vertex_index = + LoadUint32FromSharedMemory(builder_->createUnaryOp( + spv::OpBitcast, type_int_, + builder_->createBinOp( + spv::OpIAdd, type_uint_, + builder_->createBinOp( + spv::OpShiftRightLogical, type_uint_, + builder_->createLoad( + builder_->createAccessChain( + spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision), + builder_->makeUintConstant(2)), + vertex_index))); } + load_vertex_index_if.makeEndIf(); + // Select between the loaded index and the original index from Vulkan. + vertex_index = load_vertex_index_if.createMergePhi( + loaded_vertex_index, vertex_index); } // Endian-swap the index. id_vector_temp_.clear(); @@ -1864,6 +1924,13 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { } void SpirvShaderTranslator::StartFragmentShaderInMain() { + // TODO(Triang3l): Allow memory export with resolution scaling only for the + // center host pixel, with sample shading (for depth format conversion) only + // for the bottom-right sample (unlike in Direct3D, the sample mask input + // doesn't include covered samples of the primitive that correspond to other + // invocations, so use the sample that's the most friendly to the half-pixel + // offset). + // Set up pixel killing from within the translated shader without affecting // the control flow (unlike with OpKill), similarly to how pixel killing works // on the Xenos, and also keeping a single critical section exit and return @@ -2497,6 +2564,26 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, var_main_fsi_color_written_); } } break; + case InstructionStorageTarget::kExportAddress: { + // spv::NoResult if memory export usage is unsupported or invalid. + target_pointer = var_main_memexport_address_; + } break; + case InstructionStorageTarget::kExportData: { + // spv::NoResult if memory export usage is unsupported or invalid. + target_pointer = var_main_memexport_data_[result.storage_index]; + if (target_pointer != spv::NoResult) { + // Mark that the eM# has been written to and needs to be exported. + assert_true(var_main_memexport_data_written_ != spv::NoResult); + builder_->createStore( + builder_->createBinOp( + spv::OpBitwiseOr, type_uint_, + builder_->createLoad(var_main_memexport_data_written_, + spv::NoPrecision), + builder_->makeUintConstant(uint32_t(1) + << result.storage_index)), + var_main_memexport_data_written_); + } + } break; default: // TODO(Triang3l): All storage targets. break; @@ -2808,40 +2895,25 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) { static_cast(xenos::Endian::k8in32))); spv::Id is_8in16_or_8in32 = builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in16, is_8in32); - spv::Block& block_pre_8in16 = *builder_->getBuildPoint(); - assert_false(block_pre_8in16.isTerminated()); - spv::Block& block_8in16 = builder_->makeNewBlock(); - spv::Block& block_8in16_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_8in16_merge, - spv::SelectionControlMaskNone); - builder_->createConditionalBranch(is_8in16_or_8in32, &block_8in16, - &block_8in16_merge); - builder_->setBuildPoint(&block_8in16); - spv::Id swapped_8in16 = builder_->createBinOp( - spv::OpBitwiseOr, type, - builder_->createBinOp( - spv::OpBitwiseAnd, type, - builder_->createBinOp(spv::OpShiftRightLogical, type, value, - const_uint_8_typed), - const_uint_00ff00ff_typed), - builder_->createBinOp( - spv::OpShiftLeftLogical, type, - builder_->createBinOp(spv::OpBitwiseAnd, type, value, - const_uint_00ff00ff_typed), - const_uint_8_typed)); - builder_->createBranch(&block_8in16_merge); - builder_->setBuildPoint(&block_8in16_merge); + SpirvBuilder::IfBuilder if_8in16(is_8in16_or_8in32, + spv::SelectionControlMaskNone, *builder_); + spv::Id swapped_8in16; { - std::unique_ptr phi_op = - std::make_unique(builder_->getUniqueId(), type, - spv::OpPhi); - phi_op->addIdOperand(swapped_8in16); - phi_op->addIdOperand(block_8in16.getId()); - phi_op->addIdOperand(value); - phi_op->addIdOperand(block_pre_8in16.getId()); - value = phi_op->getResultId(); - builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + swapped_8in16 = builder_->createBinOp( + spv::OpBitwiseOr, type, + builder_->createBinOp( + spv::OpBitwiseAnd, type, + builder_->createBinOp(spv::OpShiftRightLogical, type, value, + const_uint_8_typed), + const_uint_00ff00ff_typed), + builder_->createBinOp( + spv::OpShiftLeftLogical, type, + builder_->createBinOp(spv::OpBitwiseAnd, type, value, + const_uint_00ff00ff_typed), + const_uint_8_typed)); } + if_8in16.makeEndIf(); + value = if_8in16.createMergePhi(swapped_8in16, value); // 16-in-32 or another half of 8-in-32 (doing 16-in-32 swap). spv::Id is_16in32 = builder_->createBinOp( @@ -2850,46 +2922,75 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) { static_cast(xenos::Endian::k16in32))); spv::Id is_8in32_or_16in32 = builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in32, is_16in32); - spv::Block& block_pre_16in32 = *builder_->getBuildPoint(); - spv::Block& block_16in32 = builder_->makeNewBlock(); - spv::Block& block_16in32_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_16in32_merge, - spv::SelectionControlMaskNone); - builder_->createConditionalBranch(is_8in32_or_16in32, &block_16in32, - &block_16in32_merge); - builder_->setBuildPoint(&block_16in32); - spv::Id swapped_16in32 = builder_->createQuadOp( - spv::OpBitFieldInsert, type, - builder_->createBinOp(spv::OpShiftRightLogical, type, value, - const_uint_16_typed), - value, builder_->makeIntConstant(16), builder_->makeIntConstant(16)); - builder_->createBranch(&block_16in32_merge); - builder_->setBuildPoint(&block_16in32_merge); + SpirvBuilder::IfBuilder if_16in32(is_8in32_or_16in32, + spv::SelectionControlMaskNone, *builder_); + spv::Id swapped_16in32; { - std::unique_ptr phi_op = - std::make_unique(builder_->getUniqueId(), type, - spv::OpPhi); - phi_op->addIdOperand(swapped_16in32); - phi_op->addIdOperand(block_16in32.getId()); - phi_op->addIdOperand(value); - phi_op->addIdOperand(block_pre_16in32.getId()); - value = phi_op->getResultId(); - builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + swapped_16in32 = builder_->createQuadOp( + spv::OpBitFieldInsert, type, + builder_->createBinOp(spv::OpShiftRightLogical, type, value, + const_uint_16_typed), + value, builder_->makeIntConstant(16), builder_->makeIntConstant(16)); } + if_16in32.makeEndIf(); + value = if_16in32.createMergePhi(swapped_16in32, value); return value; } +spv::Id SpirvShaderTranslator::EndianSwap128Uint4(spv::Id value, + spv::Id endian) { + // Change 8-in-64 and 8-in-128 to 8-in-32, and then swap within 32 bits. + + spv::Id is_8in64 = builder_->createBinOp( + spv::OpIEqual, type_bool_, endian, + builder_->makeUintConstant( + static_cast(xenos::Endian128::k8in64))); + uint_vector_temp_.clear(); + uint_vector_temp_.push_back(1); + uint_vector_temp_.push_back(0); + uint_vector_temp_.push_back(3); + uint_vector_temp_.push_back(2); + value = builder_->createTriOp( + spv::OpSelect, type_uint4_, is_8in64, + builder_->createRvalueSwizzle(spv::NoPrecision, type_uint4_, value, + uint_vector_temp_), + value); + + spv::Id is_8in128 = builder_->createBinOp( + spv::OpIEqual, type_bool_, endian, + builder_->makeUintConstant( + static_cast(xenos::Endian128::k8in128))); + uint_vector_temp_.clear(); + uint_vector_temp_.push_back(3); + uint_vector_temp_.push_back(2); + uint_vector_temp_.push_back(1); + uint_vector_temp_.push_back(0); + value = builder_->createTriOp( + spv::OpSelect, type_uint4_, is_8in128, + builder_->createRvalueSwizzle(spv::NoPrecision, type_uint4_, value, + uint_vector_temp_), + value); + + endian = builder_->createTriOp( + spv::OpSelect, type_uint_, + builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in64, is_8in128), + builder_->makeUintConstant( + static_cast(xenos::Endian128::k8in32)), + endian); + + return EndianSwap32Uint(value, endian); +} + spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory( spv::Id address_dwords_int) { - spv::Block& head_block = *builder_->getBuildPoint(); - assert_false(head_block.isTerminated()); - spv::StorageClass storage_class = features_.spirv_version >= spv::Spv_1_3 ? spv::StorageClassStorageBuffer : spv::StorageClassUniform; - uint32_t buffer_count_log2 = GetSharedMemoryStorageBufferCountLog2(); - if (!buffer_count_log2) { + + uint32_t binding_count_log2 = GetSharedMemoryStorageBufferCountLog2(); + + if (!binding_count_log2) { // Single binding - load directly. id_vector_temp_.clear(); // The only SSBO struct member. @@ -2903,8 +3004,10 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory( // The memory is split into multiple bindings - check which binding to load // from. 29 is log2(512 MB), but addressing in dwords (4 B). Not indexing the - // array with the variable itself because it needs VK_EXT_descriptor_indexing. - uint32_t binding_address_bits = (29 - 2) - buffer_count_log2; + // array with the variable itself because it needs non-uniform storage buffer + // indexing. + + uint32_t binding_address_bits = (29 - 2) - binding_count_log2; spv::Id binding_index = builder_->createBinOp( spv::OpShiftRightLogical, type_uint_, builder_->createUnaryOp(spv::OpBitcast, type_uint_, address_dwords_int), @@ -2913,51 +3016,119 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory( spv::OpBitwiseAnd, type_int_, address_dwords_int, builder_->makeIntConstant( int((uint32_t(1) << binding_address_bits) - 1))); - uint32_t buffer_count = 1 << buffer_count_log2; - spv::Block* switch_case_blocks[512 / 128]; - for (uint32_t i = 0; i < buffer_count; ++i) { - switch_case_blocks[i] = &builder_->makeNewBlock(); - } - spv::Block& switch_merge_block = builder_->makeNewBlock(); - spv::Id value_phi_result = builder_->getUniqueId(); - std::unique_ptr value_phi_op = - std::make_unique(value_phi_result, type_uint_, - spv::OpPhi); - builder_->createSelectionMerge(&switch_merge_block, - spv::SelectionControlDontFlattenMask); - { - std::unique_ptr switch_op = - std::make_unique(spv::OpSwitch); - switch_op->addIdOperand(binding_index); - // Highest binding index is the default case. - switch_op->addIdOperand(switch_case_blocks[buffer_count - 1]->getId()); - switch_case_blocks[buffer_count - 1]->addPredecessor(&head_block); - for (uint32_t i = 0; i < buffer_count - 1; ++i) { - switch_op->addImmediateOperand(int(i)); - switch_op->addIdOperand(switch_case_blocks[i]->getId()); - switch_case_blocks[i]->addPredecessor(&head_block); - } - builder_->getBuildPoint()->addInstruction(std::move(switch_op)); - } - for (uint32_t i = 0; i < buffer_count; ++i) { - builder_->setBuildPoint(switch_case_blocks[i]); - id_vector_temp_.clear(); - id_vector_temp_.push_back(builder_->makeIntConstant(int(i))); - // The only SSBO struct member. - id_vector_temp_.push_back(const_int_0_); - id_vector_temp_.push_back(binding_address); + + auto value_phi_op = std::make_unique( + builder_->getUniqueId(), type_uint_, spv::OpPhi); + // Zero if out of bounds. + value_phi_op->addIdOperand(const_uint_0_); + value_phi_op->addIdOperand(builder_->getBuildPoint()->getId()); + + SpirvBuilder::SwitchBuilder binding_switch( + binding_index, spv::SelectionControlDontFlattenMask, *builder_); + uint32_t binding_count = uint32_t(1) << binding_count_log2; + + id_vector_temp_.clear(); + id_vector_temp_.push_back(spv::NoResult); + // The only SSBO struct member. + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(binding_address); + + for (uint32_t i = 0; i < binding_count; ++i) { + binding_switch.makeBeginCase(i); + id_vector_temp_[0] = builder_->makeIntConstant(int(i)); value_phi_op->addIdOperand(builder_->createLoad( builder_->createAccessChain(storage_class, buffers_shared_memory_, id_vector_temp_), spv::NoPrecision)); - value_phi_op->addIdOperand(switch_case_blocks[i]->getId()); - builder_->createBranch(&switch_merge_block); + value_phi_op->addIdOperand(builder_->getBuildPoint()->getId()); } - builder_->setBuildPoint(&switch_merge_block); + + binding_switch.makeEndSwitch(); + + spv::Id value_phi_result = value_phi_op->getResultId(); builder_->getBuildPoint()->addInstruction(std::move(value_phi_op)); return value_phi_result; } +void SpirvShaderTranslator::StoreUint32ToSharedMemory( + spv::Id value, spv::Id address_dwords_int, spv::Id replace_mask) { + spv::StorageClass storage_class = features_.spirv_version >= spv::Spv_1_3 + ? spv::StorageClassStorageBuffer + : spv::StorageClassUniform; + + spv::Id keep_mask = spv::NoResult; + if (replace_mask != spv::NoResult) { + keep_mask = builder_->createUnaryOp(spv::OpNot, type_uint_, replace_mask); + value = builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, value, + replace_mask); + } + + auto store = [&](spv::Id pointer) { + if (replace_mask != spv::NoResult) { + // Don't touch the other bits in the buffer, just modify the needed bits + // in the most up to date uint32 at the address. + spv::Id const_scope_device = builder_->makeUintConstant( + static_cast(spv::ScopeDevice)); + spv::Id const_semantics_relaxed = const_uint_0_; + builder_->createQuadOp(spv::OpAtomicAnd, type_uint_, pointer, + const_scope_device, const_semantics_relaxed, + keep_mask); + builder_->createQuadOp(spv::OpAtomicOr, type_uint_, pointer, + const_scope_device, const_semantics_relaxed, + value); + } else { + builder_->createStore(value, pointer); + } + }; + + uint32_t binding_count_log2 = GetSharedMemoryStorageBufferCountLog2(); + + if (!binding_count_log2) { + // Single binding - store directly. + id_vector_temp_.clear(); + // The only SSBO struct member. + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(address_dwords_int); + store(builder_->createAccessChain(storage_class, buffers_shared_memory_, + id_vector_temp_)); + return; + } + + // The memory is split into multiple bindings - check which binding to store + // to. 29 is log2(512 MB), but addressing in dwords (4 B). Not indexing the + // array with the variable itself because it needs non-uniform storage buffer + // indexing. + + uint32_t binding_address_bits = (29 - 2) - binding_count_log2; + spv::Id binding_index = builder_->createBinOp( + spv::OpShiftRightLogical, type_uint_, + builder_->createUnaryOp(spv::OpBitcast, type_uint_, address_dwords_int), + builder_->makeUintConstant(binding_address_bits)); + spv::Id binding_address = builder_->createBinOp( + spv::OpBitwiseAnd, type_int_, address_dwords_int, + builder_->makeIntConstant( + int((uint32_t(1) << binding_address_bits) - 1))); + + SpirvBuilder::SwitchBuilder binding_switch( + binding_index, spv::SelectionControlDontFlattenMask, *builder_); + uint32_t binding_count = uint32_t(1) << binding_count_log2; + + id_vector_temp_.clear(); + id_vector_temp_.push_back(spv::NoResult); + // The only SSBO struct member. + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(binding_address); + + for (uint32_t i = 0; i < binding_count; ++i) { + binding_switch.makeBeginCase(i); + id_vector_temp_[0] = builder_->makeIntConstant(int(i)); + store(builder_->createAccessChain(storage_class, buffers_shared_memory_, + id_vector_temp_)); + } + + binding_switch.makeEndSwitch(); +} + spv::Id SpirvShaderTranslator::PWLGammaToLinear(spv::Id gamma, bool gamma_pre_saturated) { spv::Id value_type = builder_->getTypeId(gamma); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 0ed368ae4..aefb00bf6 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -323,17 +323,28 @@ class SpirvShaderTranslator : public ShaderTranslator { explicit Features( const ui::vulkan::VulkanProvider::DeviceInfo& device_info); explicit Features(bool all = false); + unsigned int spirv_version; + uint32_t max_storage_buffer_range; + + bool full_draw_index_uint32; + + bool vertex_pipeline_stores_and_atomics; + bool fragment_stores_and_atomics; + bool clip_distance; bool cull_distance; - bool demote_to_helper_invocation; - bool fragment_shader_sample_interlock; - bool full_draw_index_uint32; + bool image_view_format_swizzle; + bool signed_zero_inf_nan_preserve_float32; bool denorm_flush_to_zero_float32; bool rounding_mode_rte_float32; + + bool fragment_shader_sample_interlock; + + bool demote_to_helper_invocation; }; SpirvShaderTranslator(const Features& features, @@ -424,6 +435,8 @@ class SpirvShaderTranslator : public ShaderTranslator { void ProcessLoopEndInstruction( const ParsedLoopEndInstruction& instr) override; void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override; + void ProcessAllocInstruction(const ParsedAllocInstruction& instr, + uint8_t export_eM) override; void ProcessVertexFetchInstruction( const ParsedVertexFetchInstruction& instr) override; @@ -470,6 +483,11 @@ class SpirvShaderTranslator : public ShaderTranslator { Shader::IsHostVertexShaderTypeDomain( GetSpirvShaderModification().vertex.host_vertex_shader_type); } + bool IsSpirvComputeShader() const { + return is_vertex_shader() && + GetSpirvShaderModification().vertex.host_vertex_shader_type == + Shader::HostVertexShaderType::kMemExportCompute; + } bool IsExecutionModeEarlyFragmentTests() const { return is_pixel_shader() && @@ -567,24 +585,48 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id ZeroIfAnyOperandIsZero(spv::Id value, spv::Id operand_0_abs, spv::Id operand_1_abs); // Conditionally discard the current fragment. Changes the build point. - void KillPixel(spv::Id condition); + void KillPixel(spv::Id condition, + uint8_t memexport_eM_potentially_written_before); // Return type is a xe::bit_count(result.GetUsedResultComponents())-component // float vector or a single float, depending on whether it's a reduction // instruction (check getTypeId of the result), or returns spv::NoResult if // nothing to store. - spv::Id ProcessVectorAluOperation(const ParsedAluInstruction& instr, - bool& predicate_written); + spv::Id ProcessVectorAluOperation( + const ParsedAluInstruction& instr, + uint8_t memexport_eM_potentially_written_before, bool& predicate_written); // Returns a float value to write to the previous scalar register and to the // destination. If the return value is ps itself (in the retain_prev case), // returns spv::NoResult (handled as a special case, so if it's retain_prev, // but don't need to write to anywhere, no OpLoad(ps) will be done). - spv::Id ProcessScalarAluOperation(const ParsedAluInstruction& instr, - bool& predicate_written); + spv::Id ProcessScalarAluOperation( + const ParsedAluInstruction& instr, + uint8_t memexport_eM_potentially_written_before, bool& predicate_written); // Perform endian swap of a uint scalar or vector. spv::Id EndianSwap32Uint(spv::Id value, spv::Id endian); + // Perform endian swap of a uint4 vector. + spv::Id EndianSwap128Uint4(spv::Id value, spv::Id endian); spv::Id LoadUint32FromSharedMemory(spv::Id address_dwords_int); + // If `replace_mask` is provided, the bits specified in the mask will be + // replaced with those from the value via OpAtomicAnd/Or. + // Bits of `value` not in `replace_mask` will be ignored. + void StoreUint32ToSharedMemory(spv::Id value, spv::Id address_dwords_int, + spv::Id replace_mask = spv::NoResult); + + bool IsMemoryExportSupported() const { + if (is_pixel_shader()) { + return features_.fragment_stores_and_atomics; + } + return features_.vertex_pipeline_stores_and_atomics || + IsSpirvComputeShader(); + } + + bool IsMemoryExportUsed() const { + return current_shader().memexport_eM_written() && IsMemoryExportSupported(); + } + + void ExportToMemory(uint8_t export_eM); // The source may be a floating-point scalar or a vector. spv::Id PWLGammaToLinear(spv::Id gamma, bool gamma_pre_saturated); @@ -605,7 +647,7 @@ class SpirvShaderTranslator : public ShaderTranslator { void SampleTexture(spv::Builder::TextureParameters& texture_parameters, spv::ImageOperandsMask image_operands_mask, spv::Id image_unsigned, spv::Id image_signed, - spv::Id sampler, spv::Id is_all_signed, + spv::Id sampler, spv::Id is_any_unsigned, spv::Id is_any_signed, spv::Id& result_unsigned_out, spv::Id& result_signed_out, spv::Id lerp_factor = spv::NoResult, @@ -872,6 +914,21 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id var_main_tfetch_gradients_v_; // float4[register_count()]. spv::Id var_main_registers_; + // Memory export variables are created only when needed. + // float4. + spv::Id var_main_memexport_address_; + // Each is float4. + spv::Id var_main_memexport_data_[ucode::kMaxMemExportElementCount]; + // Bit field of which eM# elements have been written so far by the invocation + // since the last memory write - uint. + spv::Id var_main_memexport_data_written_; + // If memory export is disabled in certain invocations or (if emulating some + // primitive types without a geometry shader) at specific guest vertex loop + // iterations because the translated shader is executed multiple times for the + // same guest vertex or pixel, this contains whether memory export is allowed + // in the current execution of the translated code. + // bool. + spv::Id main_memexport_allowed_; // VS only - float3 (special exports). spv::Id var_main_point_size_edge_flag_kill_vertex_; // PS, only when needed - bool. diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 05e41d5ab..1e7580e34 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -39,31 +39,23 @@ spv::Id SpirvShaderTranslator::ZeroIfAnyOperandIsZero(spv::Id value, const_float_vectors_0_[num_components - 1], value); } -void SpirvShaderTranslator::KillPixel(spv::Id condition) { - // Same calls as in spv::Builder::If. - spv::Function& function = builder_->getBuildPoint()->getParent(); - spv::Block* kill_block = new spv::Block(builder_->getUniqueId(), function); - spv::Block* merge_block = new spv::Block(builder_->getUniqueId(), function); - spv::Block& header_block = *builder_->getBuildPoint(); - - function.addBlock(kill_block); - builder_->setBuildPoint(kill_block); - // Kill without influencing the control flow in the translated shader. - if (var_main_kill_pixel_ != spv::NoResult) { - builder_->createStore(builder_->makeBoolConstant(true), - var_main_kill_pixel_); +void SpirvShaderTranslator::KillPixel( + spv::Id condition, uint8_t memexport_eM_potentially_written_before) { + SpirvBuilder::IfBuilder kill_if(condition, spv::SelectionControlMaskNone, + *builder_); + { + // Perform outstanding memory exports before the invocation becomes inactive + // and storage writes are disabled. + ExportToMemory(memexport_eM_potentially_written_before); + if (var_main_kill_pixel_ != spv::NoResult) { + builder_->createStore(builder_->makeBoolConstant(true), + var_main_kill_pixel_); + } + if (features_.demote_to_helper_invocation) { + builder_->createNoResultOp(spv::OpDemoteToHelperInvocationEXT); + } } - if (features_.demote_to_helper_invocation) { - builder_->createNoResultOp(spv::OpDemoteToHelperInvocationEXT); - } - builder_->createBranch(merge_block); - - builder_->setBuildPoint(&header_block); - builder_->createSelectionMerge(merge_block, spv::SelectionControlMaskNone); - builder_->createConditionalBranch(condition, kill_block, merge_block); - - function.addBlock(merge_block); - builder_->setBuildPoint(merge_block); + kill_if.makeEndIf(); } void SpirvShaderTranslator::ProcessAluInstruction( @@ -89,12 +81,12 @@ void SpirvShaderTranslator::ProcessAluInstruction( // Whether the instruction has changed the predicate, and it needs to be // checked again later. bool predicate_written_vector = false; - spv::Id vector_result = - ProcessVectorAluOperation(instr, predicate_written_vector); + spv::Id vector_result = ProcessVectorAluOperation( + instr, memexport_eM_potentially_written_before, predicate_written_vector); bool predicate_written_scalar = false; - spv::Id scalar_result = - ProcessScalarAluOperation(instr, predicate_written_scalar); + spv::Id scalar_result = ProcessScalarAluOperation( + instr, memexport_eM_potentially_written_before, predicate_written_scalar); if (scalar_result != spv::NoResult) { EnsureBuildPointAvailable(); builder_->createStore(scalar_result, var_main_previous_scalar_); @@ -118,7 +110,8 @@ void SpirvShaderTranslator::ProcessAluInstruction( } spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( - const ParsedAluInstruction& instr, bool& predicate_written) { + const ParsedAluInstruction& instr, + uint8_t memexport_eM_potentially_written_before, bool& predicate_written) { predicate_written = false; uint32_t used_result_components = @@ -564,7 +557,7 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( spv::Id ma_z_result[4] = {}, ma_yx_result[4] = {}; // Check if the major axis is Z (abs(z) >= abs(x) && abs(z) >= abs(y)). - spv::Builder::If ma_z_if( + SpirvBuilder::IfBuilder ma_z_if( builder_->createBinOp( spv::OpLogicalAnd, type_bool_, builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, @@ -596,14 +589,13 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( } } } - spv::Block& ma_z_end_block = *builder_->getBuildPoint(); ma_z_if.makeBeginElse(); { spv::Id ma_y_result[4] = {}, ma_x_result[4] = {}; // The major axis is not Z - create an inner conditional to check if the // major axis is Y (abs(y) >= abs(x)). - spv::Builder::If ma_y_if( + SpirvBuilder::IfBuilder ma_y_if( builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, operand_abs[1], operand_abs[0]), spv::SelectionControlMaskNone, *builder_); @@ -629,7 +621,6 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( } } } - spv::Block& ma_y_end_block = *builder_->getBuildPoint(); ma_y_if.makeBeginElse(); { // The major axis is X. @@ -654,7 +645,6 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( } } } - spv::Block& ma_x_end_block = *builder_->getBuildPoint(); ma_y_if.makeEndIf(); // The major axis is Y or X - choose the options of the result from Y @@ -663,18 +653,10 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( if (!(used_result_components & (1 << i))) { continue; } - std::unique_ptr phi_op = - std::make_unique(builder_->getUniqueId(), - type_float_, spv::OpPhi); - phi_op->addIdOperand(ma_y_result[i]); - phi_op->addIdOperand(ma_y_end_block.getId()); - phi_op->addIdOperand(ma_x_result[i]); - phi_op->addIdOperand(ma_x_end_block.getId()); - ma_yx_result[i] = phi_op->getResultId(); - builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + ma_yx_result[i] = + ma_y_if.createMergePhi(ma_y_result[i], ma_x_result[i]); } } - spv::Block& ma_yx_end_block = *builder_->getBuildPoint(); ma_z_if.makeEndIf(); // Choose the result options from Z and YX cases. @@ -683,15 +665,8 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( if (!(used_result_components & (1 << i))) { continue; } - std::unique_ptr phi_op = - std::make_unique(builder_->getUniqueId(), - type_float_, spv::OpPhi); - phi_op->addIdOperand(ma_z_result[i]); - phi_op->addIdOperand(ma_z_end_block.getId()); - phi_op->addIdOperand(ma_yx_result[i]); - phi_op->addIdOperand(ma_yx_end_block.getId()); - id_vector_temp_.push_back(phi_op->getResultId()); - builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + id_vector_temp_.push_back( + ma_z_if.createMergePhi(ma_z_result[i], ma_yx_result[i])); } assert_true(id_vector_temp_.size() == used_result_component_count); if (used_result_components & 0b0100) { @@ -799,14 +774,16 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( case ucode::AluVectorOpcode::kKillGt: case ucode::AluVectorOpcode::kKillGe: case ucode::AluVectorOpcode::kKillNe: { - KillPixel(builder_->createUnaryOp( - spv::OpAny, type_bool_, - builder_->createBinOp( - spv::Op(kOps[size_t(instr.vector_opcode)]), type_bool4_, - GetOperandComponents(operand_storage[0], instr.vector_operands[0], - 0b1111), - GetOperandComponents(operand_storage[1], instr.vector_operands[1], - 0b1111)))); + KillPixel( + builder_->createUnaryOp( + spv::OpAny, type_bool_, + builder_->createBinOp( + spv::Op(kOps[size_t(instr.vector_opcode)]), type_bool4_, + GetOperandComponents(operand_storage[0], + instr.vector_operands[0], 0b1111), + GetOperandComponents(operand_storage[1], + instr.vector_operands[1], 0b1111))), + memexport_eM_potentially_written_before); return const_float_0_; } @@ -892,7 +869,8 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( } spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( - const ParsedAluInstruction& instr, bool& predicate_written) { + const ParsedAluInstruction& instr, + uint8_t memexport_eM_potentially_written_before, bool& predicate_written) { predicate_written = false; spv::Id operand_storage[2] = {}; @@ -1044,10 +1022,9 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( spv::OpLogicalAnd, type_bool_, condition, builder_->createBinOp(spv::OpFOrdGreaterThan, type_bool_, b, const_float_0_)); - spv::Block& pre_multiply_if_block = *builder_->getBuildPoint(); + SpirvBuilder::IfBuilder multiply_if( + condition, spv::SelectionControlMaskNone, *builder_); spv::Id product; - spv::Builder::If multiply_if(condition, spv::SelectionControlMaskNone, - *builder_); { // Multiplication case. spv::Id a = instr.scalar_operands[0].GetComponent(0) != @@ -1061,21 +1038,9 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( product = ZeroIfAnyOperandIsZero( product, GetAbsoluteOperand(a, instr.scalar_operands[0]), ps_abs); } - spv::Block& multiply_end_block = *builder_->getBuildPoint(); multiply_if.makeEndIf(); // Merge - choose between the product and -FLT_MAX. - { - std::unique_ptr phi_op = - std::make_unique(builder_->getUniqueId(), - type_float_, spv::OpPhi); - phi_op->addIdOperand(product); - phi_op->addIdOperand(multiply_end_block.getId()); - phi_op->addIdOperand(const_float_max_neg); - phi_op->addIdOperand(pre_multiply_if_block.getId()); - spv::Id phi_result = phi_op->getResultId(); - builder_->getBuildPoint()->addInstruction(std::move(phi_op)); - return phi_result; - } + return multiply_if.createMergePhi(product, const_float_max_neg); } case ucode::AluScalarOpcode::kMaxs: @@ -1300,12 +1265,13 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( case ucode::AluScalarOpcode::kKillsNe: case ucode::AluScalarOpcode::kKillsOne: { KillPixel(builder_->createBinOp( - spv::Op(kOps[size_t(instr.scalar_opcode)]), type_bool_, - GetOperandComponents(operand_storage[0], instr.scalar_operands[0], - 0b0001), - instr.scalar_opcode == ucode::AluScalarOpcode::kKillsOne - ? const_float_1_ - : const_float_0_)); + spv::Op(kOps[size_t(instr.scalar_opcode)]), type_bool_, + GetOperandComponents(operand_storage[0], + instr.scalar_operands[0], 0b0001), + instr.scalar_opcode == ucode::AluScalarOpcode::kKillsOne + ? const_float_1_ + : const_float_0_), + memexport_eM_potentially_written_before); return const_float_0_; } diff --git a/src/xenia/gpu/spirv_shader_translator_fetch.cc b/src/xenia/gpu/spirv_shader_translator_fetch.cc index 265082ba1..8f5a74690 100644 --- a/src/xenia/gpu/spirv_shader_translator_fetch.cc +++ b/src/xenia/gpu/spirv_shader_translator_fetch.cc @@ -1145,31 +1145,18 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( z_coordinate_ref = builder_->createNoContractionBinOp( spv::OpFAdd, type_float_, z_coordinate_ref, z_offset); } - spv::Block& block_dimension_head = *builder_->getBuildPoint(); - spv::Block& block_dimension_merge = builder_->makeNewBlock(); - spv::Block& block_dimension_3d = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_dimension_merge, - spv::SelectionControlDontFlattenMask); assert_true(data_is_3d != spv::NoResult); - builder_->createConditionalBranch(data_is_3d, &block_dimension_3d, - &block_dimension_merge); - builder_->setBuildPoint(&block_dimension_3d); - assert_true(z_size != spv::NoResult); - spv::Id z_3d = builder_->createNoContractionBinOp( - spv::OpFDiv, type_float_, z_coordinate_ref, z_size); - builder_->createBranch(&block_dimension_merge); - builder_->setBuildPoint(&block_dimension_merge); + SpirvBuilder::IfBuilder if_data_is_3d( + data_is_3d, spv::SelectionControlDontFlattenMask, *builder_); + spv::Id z_3d; { - std::unique_ptr z_phi_op = - std::make_unique(builder_->getUniqueId(), - type_float_, spv::OpPhi); - z_phi_op->addIdOperand(z_3d); - z_phi_op->addIdOperand(block_dimension_3d.getId()); - z_phi_op->addIdOperand(z_coordinate_ref); - z_phi_op->addIdOperand(block_dimension_head.getId()); - z_coordinate_ref = z_phi_op->getResultId(); - builder_->getBuildPoint()->addInstruction(std::move(z_phi_op)); + assert_true(z_size != spv::NoResult); + z_3d = builder_->createNoContractionBinOp(spv::OpFDiv, type_float_, + z_coordinate_ref, z_size); } + if_data_is_3d.makeEndIf(); + z_coordinate_ref = + if_data_is_3d.createMergePhi(z_3d, z_coordinate_ref); } else { // Denormalize the Z coordinate for a stacked texture, and apply the // offset. @@ -1394,63 +1381,39 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( // OpSampledImage must be in the same block as where its result is used. if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { // Check if the texture is 3D or stacked. - spv::Block& block_dimension_head = *builder_->getBuildPoint(); - spv::Block& block_dimension_3d_start = builder_->makeNewBlock(); - spv::Block& block_dimension_stacked_start = builder_->makeNewBlock(); - spv::Block& block_dimension_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_dimension_merge, - spv::SelectionControlDontFlattenMask); assert_true(data_is_3d != spv::NoResult); - builder_->createConditionalBranch(data_is_3d, - &block_dimension_3d_start, - &block_dimension_stacked_start); - - // 3D. - builder_->setBuildPoint(&block_dimension_3d_start); - id_vector_temp_.clear(); - for (uint32_t i = 0; i < 3; ++i) { - id_vector_temp_.push_back(coordinates[i]); - } - texture_parameters.coords = - builder_->createCompositeConstruct(type_float3_, id_vector_temp_); - spv::Id lod_3d = QueryTextureLod(texture_parameters, - image_3d_unsigned, image_3d_signed, - sampler, swizzled_signs_all_signed); - // Get the actual build point for phi. - spv::Block& block_dimension_3d_end = *builder_->getBuildPoint(); - builder_->createBranch(&block_dimension_merge); - - // 2D stacked. - builder_->setBuildPoint(&block_dimension_stacked_start); - id_vector_temp_.clear(); - for (uint32_t i = 0; i < 2; ++i) { - id_vector_temp_.push_back(coordinates[i]); - } - texture_parameters.coords = - builder_->createCompositeConstruct(type_float2_, id_vector_temp_); - spv::Id lod_stacked = QueryTextureLod( - texture_parameters, image_2d_array_or_cube_unsigned, - image_2d_array_or_cube_signed, sampler, - swizzled_signs_all_signed); - // Get the actual build point for phi. - spv::Block& block_dimension_stacked_end = *builder_->getBuildPoint(); - builder_->createBranch(&block_dimension_merge); - - // Choose between the 3D and the stacked result based on the actual - // data dimensionality. - builder_->setBuildPoint(&block_dimension_merge); + SpirvBuilder::IfBuilder if_data_is_3d( + data_is_3d, spv::SelectionControlDontFlattenMask, *builder_); + spv::Id lod_3d; { - std::unique_ptr dimension_phi_op = - std::make_unique(builder_->getUniqueId(), - type_float_, spv::OpPhi); - dimension_phi_op->addIdOperand(lod_3d); - dimension_phi_op->addIdOperand(block_dimension_3d_end.getId()); - dimension_phi_op->addIdOperand(lod_stacked); - dimension_phi_op->addIdOperand(block_dimension_stacked_end.getId()); - result[0] = dimension_phi_op->getResultId(); - builder_->getBuildPoint()->addInstruction( - std::move(dimension_phi_op)); + // 3D. + id_vector_temp_.clear(); + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + texture_parameters.coords = builder_->createCompositeConstruct( + type_float3_, id_vector_temp_); + lod_3d = QueryTextureLod(texture_parameters, image_3d_unsigned, + image_3d_signed, sampler, + swizzled_signs_all_signed); } + if_data_is_3d.makeBeginElse(); + spv::Id lod_stacked; + { + // 2D stacked. + id_vector_temp_.clear(); + for (uint32_t i = 0; i < 2; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + texture_parameters.coords = builder_->createCompositeConstruct( + type_float2_, id_vector_temp_); + lod_stacked = QueryTextureLod(texture_parameters, + image_2d_array_or_cube_unsigned, + image_2d_array_or_cube_signed, + sampler, swizzled_signs_all_signed); + } + if_data_is_3d.makeEndIf(); + result[0] = if_data_is_3d.createMergePhi(lod_3d, lod_stacked); } else { uint32_t lod_query_coordinate_component_count = instr.dimension == xenos::FetchOpDimension::kCube ? 3 : 2; @@ -1512,6 +1475,8 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( } } } + spv::Id is_any_unsigned = builder_->createUnaryOp( + spv::OpLogicalNot, type_bool_, is_all_signed); // Load the fetch constant word 4, needed unconditionally for LOD // biasing, for result exponent biasing, and conditionally for stacked @@ -1765,273 +1730,247 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( // component, 2 gradient components, two fetches if the Z axis is // linear-filtered). - spv::Block& block_dimension_head = *builder_->getBuildPoint(); - spv::Block& block_dimension_3d_start = builder_->makeNewBlock(); - spv::Block& block_dimension_stacked_start = builder_->makeNewBlock(); - spv::Block& block_dimension_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_dimension_merge, - spv::SelectionControlDontFlattenMask); assert_true(data_is_3d != spv::NoResult); - builder_->createConditionalBranch(data_is_3d, - &block_dimension_3d_start, - &block_dimension_stacked_start); - - // 3D. - builder_->setBuildPoint(&block_dimension_3d_start); - if (use_computed_lod) { - texture_parameters.gradX = gradients_h; - texture_parameters.gradY = gradients_v; - } - id_vector_temp_.clear(); - for (uint32_t i = 0; i < 3; ++i) { - id_vector_temp_.push_back(coordinates[i]); - } - texture_parameters.coords = - builder_->createCompositeConstruct(type_float3_, id_vector_temp_); + SpirvBuilder::IfBuilder if_data_is_3d( + data_is_3d, spv::SelectionControlDontFlattenMask, *builder_); spv::Id sample_result_unsigned_3d, sample_result_signed_3d; - SampleTexture(texture_parameters, image_operands_mask, - image_3d_unsigned, image_3d_signed, sampler, - is_all_signed, is_any_signed, sample_result_unsigned_3d, - sample_result_signed_3d); - // Get the actual build point after the SampleTexture call for phi. - spv::Block& block_dimension_3d_end = *builder_->getBuildPoint(); - builder_->createBranch(&block_dimension_merge); - - // 2D stacked. - builder_->setBuildPoint(&block_dimension_stacked_start); - if (use_computed_lod) { - // Extract 2D gradients for stacked textures which are 2D arrays. - uint_vector_temp_.clear(); - uint_vector_temp_.push_back(0); - uint_vector_temp_.push_back(1); - texture_parameters.gradX = builder_->createRvalueSwizzle( - spv::NoPrecision, type_float2_, gradients_h, uint_vector_temp_); - texture_parameters.gradY = builder_->createRvalueSwizzle( - spv::NoPrecision, type_float2_, gradients_v, uint_vector_temp_); - } - // Check if linear filtering is needed. - bool vol_mag_filter_is_fetch_const = - instr.attributes.vol_mag_filter == - xenos::TextureFilter::kUseFetchConst; - bool vol_min_filter_is_fetch_const = - instr.attributes.vol_min_filter == - xenos::TextureFilter::kUseFetchConst; - bool vol_mag_filter_is_linear = - instr.attributes.vol_mag_filter == xenos::TextureFilter::kLinear; - bool vol_min_filter_is_linear = - instr.attributes.vol_min_filter == xenos::TextureFilter::kLinear; - spv::Id vol_filter_is_linear = spv::NoResult; - if (use_computed_lod && - (vol_mag_filter_is_fetch_const || vol_min_filter_is_fetch_const || - vol_mag_filter_is_linear != vol_min_filter_is_linear)) { - // Check if minifying along layers (derivative > 1 along any axis). - spv::Id layer_max_gradient = builder_->createBinBuiltinCall( - type_float_, ext_inst_glsl_std_450_, GLSLstd450NMax, - builder_->createCompositeExtract(gradients_h, type_float_, 2), - builder_->createCompositeExtract(gradients_v, type_float_, 2)); - if (!instr.attributes.unnormalized_coordinates) { - // Denormalize the gradient if provided as normalized. - assert_true(size[2] != spv::NoResult); - layer_max_gradient = builder_->createNoContractionBinOp( - spv::OpFMul, type_float_, layer_max_gradient, size[2]); + { + // 3D. + if (use_computed_lod) { + texture_parameters.gradX = gradients_h; + texture_parameters.gradY = gradients_v; } - // For NaN, considering that magnification is being done. - spv::Id is_minifying_z = builder_->createBinOp( - spv::OpFOrdLessThan, type_bool_, layer_max_gradient, - builder_->makeFloatConstant(1.0f)); - // Choose what filter is actually used, the minification or the - // magnification one. - spv::Id vol_mag_filter_is_linear_loaded = - vol_mag_filter_is_fetch_const - ? builder_->createBinOp( - spv::OpINotEqual, type_bool_, - builder_->createBinOp( - spv::OpBitwiseAnd, type_uint_, - fetch_constant_word_4, - builder_->makeUintConstant(UINT32_C(1) << 0)), - const_uint_0_) - : builder_->makeBoolConstant(vol_mag_filter_is_linear); - spv::Id vol_min_filter_is_linear_loaded = - vol_min_filter_is_fetch_const - ? builder_->createBinOp( - spv::OpINotEqual, type_bool_, - builder_->createBinOp( - spv::OpBitwiseAnd, type_uint_, - fetch_constant_word_4, - builder_->makeUintConstant(UINT32_C(1) << 1)), - const_uint_0_) - : builder_->makeBoolConstant(vol_min_filter_is_linear); - vol_filter_is_linear = - builder_->createTriOp(spv::OpSelect, type_bool_, is_minifying_z, - vol_min_filter_is_linear_loaded, - vol_mag_filter_is_linear_loaded); - } else { - // No gradients, or using the same filter overrides for magnifying - // and minifying. Assume always magnifying if no gradients (LOD 0, - // always <= 0). LOD is within 2D layers, not between them (unlike - // in 3D textures, which have mips with depth reduced), so it - // shouldn't have effect on filtering between layers. - if (vol_mag_filter_is_fetch_const) { - vol_filter_is_linear = builder_->createBinOp( - spv::OpINotEqual, type_bool_, - builder_->createBinOp( - spv::OpBitwiseAnd, type_uint_, fetch_constant_word_4, - builder_->makeUintConstant(UINT32_C(1) << 0)), - const_uint_0_); + id_vector_temp_.clear(); + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp_.push_back(coordinates[i]); } + texture_parameters.coords = builder_->createCompositeConstruct( + type_float3_, id_vector_temp_); + SampleTexture(texture_parameters, image_operands_mask, + image_3d_unsigned, image_3d_signed, sampler, + is_any_unsigned, is_any_signed, + sample_result_unsigned_3d, sample_result_signed_3d); } - spv::Id layer_coordinate = coordinates[2]; - // Linear filtering may be needed either based on a dynamic condition - // (the filtering mode is taken from the fetch constant, or it's - // different for magnification and minification), or on a static one - // (with gradients - specified in the instruction for both - // magnification and minification as linear, without gradients - - // specified for magnification as linear). - // If the filter is linear, subtract 0.5 from the Z coordinate of the - // first layer in filtering because 0.5 is in the middle of it. - if (vol_filter_is_linear != spv::NoResult) { - layer_coordinate = builder_->createTriOp( - spv::OpSelect, type_float_, vol_filter_is_linear, - builder_->createNoContractionBinOp( - spv::OpFSub, type_float_, layer_coordinate, - builder_->makeFloatConstant(0.5f)), - layer_coordinate); - } else if (vol_mag_filter_is_linear) { - layer_coordinate = builder_->createNoContractionBinOp( - spv::OpFSub, type_float_, layer_coordinate, - builder_->makeFloatConstant(0.5f)); - } - // Sample the first layer, needed regardless of whether filtering is - // needed. - // Floor the array layer (Vulkan does rounding to nearest or + 0.5 and - // floor even for the layer index, but on the Xenos, addressing is - // similar to that of 3D textures). This is needed for both point and - // linear filtering (with linear, 0.5 was subtracted previously). - spv::Id layer_0_coordinate = builder_->createUnaryBuiltinCall( - type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor, - layer_coordinate); - id_vector_temp_.clear(); - id_vector_temp_.push_back(coordinates[0]); - id_vector_temp_.push_back(coordinates[1]); - id_vector_temp_.push_back(layer_0_coordinate); - texture_parameters.coords = - builder_->createCompositeConstruct(type_float3_, id_vector_temp_); + if_data_is_3d.makeBeginElse(); spv::Id sample_result_unsigned_stacked, sample_result_signed_stacked; - SampleTexture(texture_parameters, image_operands_mask, - image_2d_array_or_cube_unsigned, - image_2d_array_or_cube_signed, sampler, is_all_signed, - is_any_signed, sample_result_unsigned_stacked, - sample_result_signed_stacked); - // Sample the second layer if linear filtering is potentially needed - // (conditionally or unconditionally, depending on whether the filter - // needs to be chosen at runtime), and filter. - if (vol_filter_is_linear != spv::NoResult || - vol_mag_filter_is_linear) { - spv::Block& block_z_head = *builder_->getBuildPoint(); - spv::Block& block_z_linear = (vol_filter_is_linear != spv::NoResult) - ? builder_->makeNewBlock() - : block_z_head; - spv::Block& block_z_merge = (vol_filter_is_linear != spv::NoResult) - ? builder_->makeNewBlock() - : block_z_head; - if (vol_filter_is_linear != spv::NoResult) { - builder_->createSelectionMerge( - &block_z_merge, spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch( - vol_filter_is_linear, &block_z_linear, &block_z_merge); - builder_->setBuildPoint(&block_z_linear); + { + // 2D stacked. + if (use_computed_lod) { + // Extract 2D gradients for stacked textures which are 2D arrays. + uint_vector_temp_.clear(); + uint_vector_temp_.push_back(0); + uint_vector_temp_.push_back(1); + texture_parameters.gradX = + builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_, + gradients_h, uint_vector_temp_); + texture_parameters.gradY = + builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_, + gradients_v, uint_vector_temp_); } - spv::Id layer_1_coordinate = builder_->createBinOp( - spv::OpFAdd, type_float_, layer_0_coordinate, - builder_->makeFloatConstant(1.0f)); + // Check if linear filtering is needed. + bool vol_mag_filter_is_fetch_const = + instr.attributes.vol_mag_filter == + xenos::TextureFilter::kUseFetchConst; + bool vol_min_filter_is_fetch_const = + instr.attributes.vol_min_filter == + xenos::TextureFilter::kUseFetchConst; + bool vol_mag_filter_is_linear = instr.attributes.vol_mag_filter == + xenos::TextureFilter::kLinear; + bool vol_min_filter_is_linear = instr.attributes.vol_min_filter == + xenos::TextureFilter::kLinear; + spv::Id vol_filter_is_linear = spv::NoResult; + if (use_computed_lod && + (vol_mag_filter_is_fetch_const || + vol_min_filter_is_fetch_const || + vol_mag_filter_is_linear != vol_min_filter_is_linear)) { + // Check if minifying along layers (derivative > 1 along any + // axis). + spv::Id layer_max_gradient = builder_->createBinBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450NMax, + builder_->createCompositeExtract(gradients_h, type_float_, 2), + builder_->createCompositeExtract(gradients_v, type_float_, + 2)); + if (!instr.attributes.unnormalized_coordinates) { + // Denormalize the gradient if provided as normalized. + assert_true(size[2] != spv::NoResult); + layer_max_gradient = builder_->createNoContractionBinOp( + spv::OpFMul, type_float_, layer_max_gradient, size[2]); + } + // For NaN, considering that magnification is being done. + spv::Id is_minifying_z = builder_->createBinOp( + spv::OpFOrdLessThan, type_bool_, layer_max_gradient, + builder_->makeFloatConstant(1.0f)); + // Choose what filter is actually used, the minification or the + // magnification one. + spv::Id vol_mag_filter_is_linear_loaded = + vol_mag_filter_is_fetch_const + ? builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, + fetch_constant_word_4, + builder_->makeUintConstant(UINT32_C(1) << 0)), + const_uint_0_) + : builder_->makeBoolConstant(vol_mag_filter_is_linear); + spv::Id vol_min_filter_is_linear_loaded = + vol_min_filter_is_fetch_const + ? builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, + fetch_constant_word_4, + builder_->makeUintConstant(UINT32_C(1) << 1)), + const_uint_0_) + : builder_->makeBoolConstant(vol_min_filter_is_linear); + vol_filter_is_linear = builder_->createTriOp( + spv::OpSelect, type_bool_, is_minifying_z, + vol_min_filter_is_linear_loaded, + vol_mag_filter_is_linear_loaded); + } else { + // No gradients, or using the same filter overrides for magnifying + // and minifying. Assume always magnifying if no gradients (LOD 0, + // always <= 0). LOD is within 2D layers, not between them (unlike + // in 3D textures, which have mips with depth reduced), so it + // shouldn't have effect on filtering between layers. + if (vol_mag_filter_is_fetch_const) { + vol_filter_is_linear = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, fetch_constant_word_4, + builder_->makeUintConstant(UINT32_C(1) << 0)), + const_uint_0_); + } + } + spv::Id layer_coordinate = coordinates[2]; + // Linear filtering may be needed either based on a dynamic + // condition (the filtering mode is taken from the fetch constant, + // or it's different for magnification and minification), or on a + // static one (with gradients - specified in the instruction for + // both magnification and minification as linear, without + // gradients - specified for magnification as linear). + // If the filter is linear, subtract 0.5 from the Z coordinate of + // the first layer in filtering because 0.5 is in the middle of it. + if (vol_filter_is_linear != spv::NoResult) { + layer_coordinate = builder_->createTriOp( + spv::OpSelect, type_float_, vol_filter_is_linear, + builder_->createNoContractionBinOp( + spv::OpFSub, type_float_, layer_coordinate, + builder_->makeFloatConstant(0.5f)), + layer_coordinate); + } else if (vol_mag_filter_is_linear) { + layer_coordinate = builder_->createNoContractionBinOp( + spv::OpFSub, type_float_, layer_coordinate, + builder_->makeFloatConstant(0.5f)); + } + // Sample the first layer, needed regardless of whether filtering is + // needed. + // Floor the array layer (Vulkan does rounding to nearest or + 0.5 + // and floor even for the layer index, but on the Xenos, addressing + // is similar to that of 3D textures). This is needed for both point + // and linear filtering (with linear, 0.5 was subtracted + // previously). + spv::Id layer_0_coordinate = builder_->createUnaryBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor, + layer_coordinate); id_vector_temp_.clear(); id_vector_temp_.push_back(coordinates[0]); id_vector_temp_.push_back(coordinates[1]); - id_vector_temp_.push_back(layer_1_coordinate); + id_vector_temp_.push_back(layer_0_coordinate); texture_parameters.coords = builder_->createCompositeConstruct( type_float3_, id_vector_temp_); - spv::Id layer_lerp_factor = builder_->createUnaryBuiltinCall( - type_float_, ext_inst_glsl_std_450_, GLSLstd450Fract, - layer_coordinate); - spv::Id sample_result_unsigned_stacked_filtered; - spv::Id sample_result_signed_stacked_filtered; SampleTexture( texture_parameters, image_operands_mask, image_2d_array_or_cube_unsigned, image_2d_array_or_cube_signed, - sampler, is_all_signed, is_any_signed, - sample_result_unsigned_stacked_filtered, - sample_result_signed_stacked_filtered, layer_lerp_factor, + sampler, is_any_unsigned, is_any_signed, sample_result_unsigned_stacked, sample_result_signed_stacked); - if (vol_filter_is_linear != spv::NoResult) { - // Get the actual build point after the SampleTexture call for - // phi. - spv::Block& block_z_linear_end = *builder_->getBuildPoint(); - builder_->createBranch(&block_z_merge); - builder_->setBuildPoint(&block_z_merge); - { - std::unique_ptr filter_phi_op = - std::make_unique( - builder_->getUniqueId(), type_float4_, spv::OpPhi); - filter_phi_op->addIdOperand( - sample_result_unsigned_stacked_filtered); - filter_phi_op->addIdOperand(block_z_linear_end.getId()); - filter_phi_op->addIdOperand(sample_result_unsigned_stacked); - filter_phi_op->addIdOperand(block_z_head.getId()); - sample_result_unsigned_stacked = filter_phi_op->getResultId(); - builder_->getBuildPoint()->addInstruction( - std::move(filter_phi_op)); + // Sample the second layer if linear filtering is potentially needed + // (conditionally or unconditionally, depending on whether the + // filter needs to be chosen at runtime), and filter. + if (vol_filter_is_linear != spv::NoResult || + vol_mag_filter_is_linear) { + spv::Block& block_z_head = *builder_->getBuildPoint(); + spv::Block& block_z_linear = + (vol_filter_is_linear != spv::NoResult) + ? builder_->makeNewBlock() + : block_z_head; + spv::Block& block_z_merge = + (vol_filter_is_linear != spv::NoResult) + ? builder_->makeNewBlock() + : block_z_head; + if (vol_filter_is_linear != spv::NoResult) { + builder_->createSelectionMerge( + &block_z_merge, spv::SelectionControlDontFlattenMask); + builder_->createConditionalBranch( + vol_filter_is_linear, &block_z_linear, &block_z_merge); + builder_->setBuildPoint(&block_z_linear); } - { - std::unique_ptr filter_phi_op = - std::make_unique( - builder_->getUniqueId(), type_float4_, spv::OpPhi); - filter_phi_op->addIdOperand( - sample_result_signed_stacked_filtered); - filter_phi_op->addIdOperand(block_z_linear_end.getId()); - filter_phi_op->addIdOperand(sample_result_signed_stacked); - filter_phi_op->addIdOperand(block_z_head.getId()); - sample_result_signed_stacked = filter_phi_op->getResultId(); - builder_->getBuildPoint()->addInstruction( - std::move(filter_phi_op)); + spv::Id layer_1_coordinate = builder_->createBinOp( + spv::OpFAdd, type_float_, layer_0_coordinate, + builder_->makeFloatConstant(1.0f)); + id_vector_temp_.clear(); + id_vector_temp_.push_back(coordinates[0]); + id_vector_temp_.push_back(coordinates[1]); + id_vector_temp_.push_back(layer_1_coordinate); + texture_parameters.coords = builder_->createCompositeConstruct( + type_float3_, id_vector_temp_); + spv::Id layer_lerp_factor = builder_->createUnaryBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450Fract, + layer_coordinate); + spv::Id sample_result_unsigned_stacked_filtered; + spv::Id sample_result_signed_stacked_filtered; + SampleTexture( + texture_parameters, image_operands_mask, + image_2d_array_or_cube_unsigned, + image_2d_array_or_cube_signed, sampler, is_any_unsigned, + is_any_signed, sample_result_unsigned_stacked_filtered, + sample_result_signed_stacked_filtered, layer_lerp_factor, + sample_result_unsigned_stacked, sample_result_signed_stacked); + if (vol_filter_is_linear != spv::NoResult) { + // Get the actual build point after the SampleTexture call for + // phi. + spv::Block& block_z_linear_end = *builder_->getBuildPoint(); + builder_->createBranch(&block_z_merge); + builder_->setBuildPoint(&block_z_merge); + { + std::unique_ptr filter_phi_op = + std::make_unique( + builder_->getUniqueId(), type_float4_, spv::OpPhi); + filter_phi_op->addIdOperand( + sample_result_unsigned_stacked_filtered); + filter_phi_op->addIdOperand(block_z_linear_end.getId()); + filter_phi_op->addIdOperand(sample_result_unsigned_stacked); + filter_phi_op->addIdOperand(block_z_head.getId()); + sample_result_unsigned_stacked = filter_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(filter_phi_op)); + } + { + std::unique_ptr filter_phi_op = + std::make_unique( + builder_->getUniqueId(), type_float4_, spv::OpPhi); + filter_phi_op->addIdOperand( + sample_result_signed_stacked_filtered); + filter_phi_op->addIdOperand(block_z_linear_end.getId()); + filter_phi_op->addIdOperand(sample_result_signed_stacked); + filter_phi_op->addIdOperand(block_z_head.getId()); + sample_result_signed_stacked = filter_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(filter_phi_op)); + } + } else { + sample_result_unsigned_stacked = + sample_result_unsigned_stacked_filtered; + sample_result_signed_stacked = + sample_result_signed_stacked_filtered; } - } else { - sample_result_unsigned_stacked = - sample_result_unsigned_stacked_filtered; - sample_result_signed_stacked = - sample_result_signed_stacked_filtered; } } - // Get the actual build point for phi. - spv::Block& block_dimension_stacked_end = *builder_->getBuildPoint(); - builder_->createBranch(&block_dimension_merge); + if_data_is_3d.makeEndIf(); - // Choose between the 3D and the stacked result based on the actual - // data dimensionality. - builder_->setBuildPoint(&block_dimension_merge); - { - std::unique_ptr dimension_phi_op = - std::make_unique(builder_->getUniqueId(), - type_float4_, spv::OpPhi); - dimension_phi_op->addIdOperand(sample_result_unsigned_3d); - dimension_phi_op->addIdOperand(block_dimension_3d_end.getId()); - dimension_phi_op->addIdOperand(sample_result_unsigned_stacked); - dimension_phi_op->addIdOperand(block_dimension_stacked_end.getId()); - sample_result_unsigned = dimension_phi_op->getResultId(); - builder_->getBuildPoint()->addInstruction( - std::move(dimension_phi_op)); - } - { - std::unique_ptr dimension_phi_op = - std::make_unique(builder_->getUniqueId(), - type_float4_, spv::OpPhi); - dimension_phi_op->addIdOperand(sample_result_signed_3d); - dimension_phi_op->addIdOperand(block_dimension_3d_end.getId()); - dimension_phi_op->addIdOperand(sample_result_signed_stacked); - dimension_phi_op->addIdOperand(block_dimension_stacked_end.getId()); - sample_result_signed = dimension_phi_op->getResultId(); - builder_->getBuildPoint()->addInstruction( - std::move(dimension_phi_op)); - } + sample_result_unsigned = if_data_is_3d.createMergePhi( + sample_result_unsigned_3d, sample_result_unsigned_stacked); + sample_result_signed = if_data_is_3d.createMergePhi( + sample_result_signed_3d, sample_result_signed_stacked); } else { if (use_computed_lod) { texture_parameters.gradX = gradients_h; @@ -2045,7 +1984,7 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( builder_->createCompositeConstruct(type_float3_, id_vector_temp_); SampleTexture(texture_parameters, image_operands_mask, image_2d_array_or_cube_unsigned, - image_2d_array_or_cube_signed, sampler, is_all_signed, + image_2d_array_or_cube_signed, sampler, is_any_unsigned, is_any_signed, sample_result_unsigned, sample_result_signed); } @@ -2095,26 +2034,20 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( spv::OpBitwiseAnd, type_uint_, swizzle_word, builder_->makeUintConstant(swizzle_bit_0_value << 2)), const_uint_0_); - spv::Block& block_swizzle_head = *builder_->getBuildPoint(); - spv::Block& block_swizzle_constant = builder_->makeNewBlock(); - spv::Block& block_swizzle_component = builder_->makeNewBlock(); - spv::Block& block_swizzle_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge( - &block_swizzle_merge, spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(swizzle_bit_2, - &block_swizzle_constant, - &block_swizzle_component); - // Constant values. - builder_->setBuildPoint(&block_swizzle_constant); - // Bit 0 - 0 or 1. - spv::Id swizzle_result_constant = - builder_->createTriOp(spv::OpSelect, type_float_, swizzle_bit_0, - const_float_1, const_float_0_); - builder_->createBranch(&block_swizzle_merge); - // Fetched components. + SpirvBuilder::IfBuilder if_swizzle_constant( + swizzle_bit_2, spv::SelectionControlDontFlattenMask, *builder_); + spv::Id swizzle_result_constant; + { + // Constant values. + // Bit 0 - 0 or 1. + swizzle_result_constant = builder_->createTriOp( + spv::OpSelect, type_float_, swizzle_bit_0, const_float_1, + const_float_0_); + } + if_swizzle_constant.makeBeginElse(); spv::Id swizzle_result_component; { - builder_->setBuildPoint(&block_swizzle_component); + // Fetched components. // Select whether the result is signed or unsigned (or biased or // gamma-corrected) based on the post-swizzle signedness. spv::Id swizzle_sample_result = builder_->createTriOp( @@ -2146,22 +2079,11 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( swizzle_result_component = builder_->createTriOp( spv::OpSelect, type_float_, swizzle_bit_1, swizzle_z_or_w, swizzle_x_or_y); - builder_->createBranch(&block_swizzle_merge); } + if_swizzle_constant.makeEndIf(); // Select between the constants and the fetched components. - builder_->setBuildPoint(&block_swizzle_merge); - { - std::unique_ptr swizzle_phi_op = - std::make_unique(builder_->getUniqueId(), - type_float_, spv::OpPhi); - swizzle_phi_op->addIdOperand(swizzle_result_constant); - swizzle_phi_op->addIdOperand(block_swizzle_constant.getId()); - swizzle_phi_op->addIdOperand(swizzle_result_component); - swizzle_phi_op->addIdOperand(block_swizzle_component.getId()); - result[result_component_index] = swizzle_phi_op->getResultId(); - builder_->getBuildPoint()->addInstruction( - std::move(swizzle_phi_op)); - } + result[result_component_index] = if_swizzle_constant.createMergePhi( + swizzle_result_constant, swizzle_result_component); } } @@ -2441,58 +2363,43 @@ size_t SpirvShaderTranslator::FindOrAddSamplerBinding( void SpirvShaderTranslator::SampleTexture( spv::Builder::TextureParameters& texture_parameters, spv::ImageOperandsMask image_operands_mask, spv::Id image_unsigned, - spv::Id image_signed, spv::Id sampler, spv::Id is_all_signed, + spv::Id image_signed, spv::Id sampler, spv::Id is_any_unsigned, spv::Id is_any_signed, spv::Id& result_unsigned_out, spv::Id& result_signed_out, spv::Id lerp_factor, spv::Id lerp_first_unsigned, spv::Id lerp_first_signed) { for (uint32_t i = 0; i < 2; ++i) { - spv::Block& block_sign_head = *builder_->getBuildPoint(); - spv::Block& block_sign = builder_->makeNewBlock(); - spv::Block& block_sign_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_sign_merge, - spv::SelectionControlDontFlattenMask); - // Unsigned (i == 0) - if there are any non-signed components. - // Signed (i == 1) - if there are any signed components. - builder_->createConditionalBranch(i ? is_any_signed : is_all_signed, - i ? &block_sign : &block_sign_merge, - i ? &block_sign_merge : &block_sign); - builder_->setBuildPoint(&block_sign); - spv::Id image = i ? image_signed : image_unsigned; - // OpSampledImage must be in the same block as where its result is used. - texture_parameters.sampler = builder_->createBinOp( - spv::OpSampledImage, - builder_->makeSampledImageType(builder_->getTypeId(image)), image, - sampler); - spv::Id result = builder_->createTextureCall( - spv::NoPrecision, type_float4_, false, false, false, false, false, - texture_parameters, image_operands_mask); - if (lerp_factor != spv::NoResult) { - spv::Id lerp_first = i ? lerp_first_signed : lerp_first_unsigned; - if (lerp_first != spv::NoResult) { - spv::Id lerp_difference = builder_->createNoContractionBinOp( - spv::OpVectorTimesScalar, type_float4_, - builder_->createNoContractionBinOp(spv::OpFSub, type_float4_, - result, lerp_first), - lerp_factor); - result = builder_->createNoContractionBinOp(spv::OpFAdd, type_float4_, - result, lerp_difference); + SpirvBuilder::IfBuilder sign_if(i ? is_any_signed : is_any_unsigned, + spv::SelectionControlDontFlattenMask, + *builder_); + spv::Id sign_result; + { + spv::Id image = i ? image_signed : image_unsigned; + // OpSampledImage must be in the same block as where its result is used. + texture_parameters.sampler = builder_->createBinOp( + spv::OpSampledImage, + builder_->makeSampledImageType(builder_->getTypeId(image)), image, + sampler); + sign_result = builder_->createTextureCall( + spv::NoPrecision, type_float4_, false, false, false, false, false, + texture_parameters, image_operands_mask); + if (lerp_factor != spv::NoResult) { + spv::Id lerp_first = i ? lerp_first_signed : lerp_first_unsigned; + if (lerp_first != spv::NoResult) { + spv::Id lerp_difference = builder_->createNoContractionBinOp( + spv::OpVectorTimesScalar, type_float4_, + builder_->createNoContractionBinOp(spv::OpFSub, type_float4_, + sign_result, lerp_first), + lerp_factor); + sign_result = builder_->createNoContractionBinOp( + spv::OpFAdd, type_float4_, sign_result, lerp_difference); + } } } - builder_->createBranch(&block_sign_merge); - builder_->setBuildPoint(&block_sign_merge); - { - std::unique_ptr phi_op = - std::make_unique(builder_->getUniqueId(), - type_float4_, spv::OpPhi); - phi_op->addIdOperand(result); - phi_op->addIdOperand(block_sign.getId()); - phi_op->addIdOperand(const_float4_0_); - phi_op->addIdOperand(block_sign_head.getId()); - // This may overwrite the first lerp endpoint for the sign (such usage of - // this function is allowed). - (i ? result_signed_out : result_unsigned_out) = phi_op->getResultId(); - builder_->getBuildPoint()->addInstruction(std::move(phi_op)); - } + sign_if.makeEndIf(); + // This may overwrite the first lerp endpoint for the sign (such usage of + // this function is allowed). + (i ? result_signed_out : result_unsigned_out) = + sign_if.createMergePhi(sign_result, const_float4_0_); } } @@ -2500,48 +2407,33 @@ spv::Id SpirvShaderTranslator::QueryTextureLod( spv::Builder::TextureParameters& texture_parameters, spv::Id image_unsigned, spv::Id image_signed, spv::Id sampler, spv::Id is_all_signed) { // OpSampledImage must be in the same block as where its result is used. - spv::Block& block_sign_head = *builder_->getBuildPoint(); - spv::Block& block_sign_signed = builder_->makeNewBlock(); - spv::Block& block_sign_unsigned = builder_->makeNewBlock(); - spv::Block& block_sign_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_sign_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(is_all_signed, &block_sign_signed, - &block_sign_unsigned); - builder_->setBuildPoint(&block_sign_signed); - texture_parameters.sampler = builder_->createBinOp( - spv::OpSampledImage, - builder_->makeSampledImageType(builder_->getTypeId(image_signed)), - image_signed, sampler); - spv::Id lod_signed = builder_->createCompositeExtract( - builder_->createTextureQueryCall(spv::OpImageQueryLod, texture_parameters, - false), - type_float_, 1); - builder_->createBranch(&block_sign_merge); - builder_->setBuildPoint(&block_sign_unsigned); - texture_parameters.sampler = builder_->createBinOp( - spv::OpSampledImage, - builder_->makeSampledImageType(builder_->getTypeId(image_unsigned)), - image_unsigned, sampler); - spv::Id lod_unsigned = builder_->createCompositeExtract( - builder_->createTextureQueryCall(spv::OpImageQueryLod, texture_parameters, - false), - type_float_, 1); - builder_->createBranch(&block_sign_merge); - builder_->setBuildPoint(&block_sign_merge); - spv::Id result; + SpirvBuilder::IfBuilder if_signed( + is_all_signed, spv::SelectionControlDontFlattenMask, *builder_); + spv::Id lod_signed; { - std::unique_ptr sign_phi_op = - std::make_unique(builder_->getUniqueId(), type_float_, - spv::OpPhi); - sign_phi_op->addIdOperand(lod_signed); - sign_phi_op->addIdOperand(block_sign_signed.getId()); - sign_phi_op->addIdOperand(lod_unsigned); - sign_phi_op->addIdOperand(block_sign_unsigned.getId()); - result = sign_phi_op->getResultId(); - builder_->getBuildPoint()->addInstruction(std::move(sign_phi_op)); + texture_parameters.sampler = builder_->createBinOp( + spv::OpSampledImage, + builder_->makeSampledImageType(builder_->getTypeId(image_signed)), + image_signed, sampler); + lod_signed = builder_->createCompositeExtract( + builder_->createTextureQueryCall(spv::OpImageQueryLod, + texture_parameters, false), + type_float_, 1); } - return result; + if_signed.makeBeginElse(); + spv::Id lod_unsigned; + { + texture_parameters.sampler = builder_->createBinOp( + spv::OpSampledImage, + builder_->makeSampledImageType(builder_->getTypeId(image_unsigned)), + image_unsigned, sampler); + lod_unsigned = builder_->createCompositeExtract( + builder_->createTextureQueryCall(spv::OpImageQueryLod, + texture_parameters, false), + type_float_, 1); + } + if_signed.makeEndIf(); + return if_signed.createMergePhi(lod_signed, lod_unsigned); } } // namespace gpu diff --git a/src/xenia/gpu/spirv_shader_translator_memexport.cc b/src/xenia/gpu/spirv_shader_translator_memexport.cc new file mode 100644 index 000000000..94c0adf54 --- /dev/null +++ b/src/xenia/gpu/spirv_shader_translator_memexport.cc @@ -0,0 +1,950 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2024 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv_shader_translator.h" + +#include +#include +#include +#include +#include +#include + +#include "third_party/glslang/SPIRV/GLSL.std.450.h" +#include "xenia/base/assert.h" +#include "xenia/base/math.h" +#include "xenia/gpu/ucode.h" + +namespace xe { +namespace gpu { + +void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) { + if (!export_eM) { + return; + } + + assert_zero(export_eM & ~current_shader().memexport_eM_written()); + + if (!IsMemoryExportSupported()) { + return; + } + + // Check if memory export is allowed in this guest shader invocation. + std::optional if_memexport_allowed; + if (main_memexport_allowed_ != spv::NoResult) { + if_memexport_allowed.emplace(main_memexport_allowed_, + spv::SelectionControlDontFlattenMask, + *builder_); + } + + // If the pixel was killed (but the actual killing on the SPIR-V side has not + // been performed yet because the device doesn't support demotion to helper + // invocation that doesn't interfere with control flow), the current + // invocation is not considered active anymore. + std::optional if_pixel_not_killed; + if (var_main_kill_pixel_ != spv::NoResult) { + if_pixel_not_killed.emplace( + builder_->createUnaryOp( + spv::OpLogicalNot, type_bool_, + builder_->createLoad(var_main_kill_pixel_, spv::NoPrecision)), + spv::SelectionControlDontFlattenMask, *builder_); + } + + // Check if the address with the correct sign and exponent was written, and + // that the index doesn't overflow the mantissa bits. + // all((eA_vector >> uvec4(30, 23, 23, 23)) == uvec4(0x1, 0x96, 0x96, 0x96)) + spv::Id eA_vector = builder_->createUnaryOp( + spv::OpBitcast, type_uint4_, + builder_->createLoad(var_main_memexport_address_, spv::NoPrecision)); + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeUintConstant(30)); + id_vector_temp_.push_back(builder_->makeUintConstant(23)); + id_vector_temp_.push_back(id_vector_temp_.back()); + id_vector_temp_.push_back(id_vector_temp_.back()); + spv::Id address_validation_shift = + builder_->makeCompositeConstant(type_uint4_, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeUintConstant(0x1)); + id_vector_temp_.push_back(builder_->makeUintConstant(0x96)); + id_vector_temp_.push_back(id_vector_temp_.back()); + id_vector_temp_.push_back(id_vector_temp_.back()); + spv::Id address_validation_value = + builder_->makeCompositeConstant(type_uint4_, id_vector_temp_); + SpirvBuilder::IfBuilder if_address_valid( + builder_->createUnaryOp( + spv::OpAll, type_bool_, + builder_->createBinOp( + spv::OpIEqual, type_bool4_, + builder_->createBinOp(spv::OpShiftRightLogical, type_uint4_, + eA_vector, address_validation_shift), + address_validation_value)), + spv::SelectionControlDontFlattenMask, *builder_, 2, 1); + + using EMIdArray = std::array; + + auto for_each_eM = [&](std::function fn) { + uint8_t eM_remaining = export_eM; + uint32_t eM_index; + while (xe::bit_scan_forward(eM_remaining, &eM_index)) { + eM_remaining &= ~(uint8_t(1) << eM_index); + fn(eM_index); + } + }; + + // Load the original eM. + EMIdArray eM_original; + for_each_eM([&](uint32_t eM_index) { + eM_original[eM_index] = builder_->createLoad( + var_main_memexport_data_[eM_index], spv::NoPrecision); + }); + + // Swap red and blue if needed. + spv::Id format_info = + builder_->createCompositeExtract(eA_vector, type_uint_, 2); + spv::Id swap_red_blue = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, format_info, + builder_->makeUintConstant(uint32_t(1) << 19)), + const_uint_0_); + EMIdArray eM_swapped; + uint_vector_temp_.clear(); + uint_vector_temp_.push_back(2); + uint_vector_temp_.push_back(1); + uint_vector_temp_.push_back(0); + uint_vector_temp_.push_back(3); + for_each_eM([&](uint32_t eM_index) { + eM_swapped[eM_index] = builder_->createTriOp( + spv::OpSelect, type_float4_, swap_red_blue, + builder_->createRvalueSwizzle(spv::NoPrecision, type_float4_, + eM_original[eM_index], uint_vector_temp_), + eM_original[eM_index]); + }); + + // Extract the numeric format. + spv::Id is_signed = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, format_info, + builder_->makeUintConstant(uint32_t(1) << 16)), + const_uint_0_); + spv::Id is_norm = builder_->createBinOp( + spv::OpIEqual, type_bool_, + builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, format_info, + builder_->makeUintConstant(uint32_t(1) << 17)), + const_uint_0_); + + // Perform format packing. + + auto flush_nan = [&](const EMIdArray& eM) -> EMIdArray { + EMIdArray eM_flushed; + for_each_eM([&](uint32_t eM_index) { + spv::Id element_unflushed = eM[eM_index]; + unsigned int component_count = + builder_->getNumComponents(element_unflushed); + eM_flushed[eM_index] = builder_->createTriOp( + spv::OpSelect, type_float_vectors_[component_count - 1], + builder_->createUnaryOp(spv::OpIsNan, + type_bool_vectors_[component_count - 1], + element_unflushed), + const_float_vectors_0_[component_count - 1], element_unflushed); + }); + return eM_flushed; + }; + + auto make_float_constant_vectors = + [&](float value) -> std::array { + std::array const_vectors; + const_vectors[0] = builder_->makeFloatConstant(value); + id_vector_temp_.clear(); + id_vector_temp_.push_back(const_vectors[0]); + for (unsigned int component_count_minus_1 = 1; component_count_minus_1 < 4; + ++component_count_minus_1) { + id_vector_temp_.push_back(const_vectors[0]); + const_vectors[component_count_minus_1] = builder_->makeCompositeConstant( + type_float_vectors_[component_count_minus_1], id_vector_temp_); + } + return const_vectors; + }; + std::array const_float_vectors_minus_1 = + make_float_constant_vectors(-1.0f); + std::array const_float_vectors_minus_0_5 = + make_float_constant_vectors(-0.5f); + std::array const_float_vectors_0_5 = + make_float_constant_vectors(0.5f); + + // The widths must be without holes (R, RG, RGB, RGBA), and expecting the + // widths to add up to the size of the stored texel (8, 16 or 32 bits), as the + // unused upper bits will contain junk from the sign extension of X if the + // number is signed. + auto pack_8_16_32 = [&](std::array widths) -> EMIdArray { + unsigned int component_count; + std::array offsets{}; + for (component_count = 0; component_count < widths.size(); + ++component_count) { + if (!widths[component_count]) { + break; + } + // Only formats for which max + 0.5 can be represented exactly. + assert(widths[component_count] <= 23); + if (component_count) { + offsets[component_count] = + offsets[component_count - 1] + widths[component_count - 1]; + } + } + assert_not_zero(component_count); + + // Extract the needed components. + EMIdArray eM_unflushed = eM_swapped; + if (component_count < 4) { + if (component_count == 1) { + for_each_eM([&](uint32_t eM_index) { + eM_unflushed[eM_index] = builder_->createCompositeExtract( + eM_unflushed[eM_index], type_float_, 0); + }); + } else { + uint_vector_temp_.clear(); + for (unsigned int component_index = 0; + component_index < component_count; ++component_index) { + uint_vector_temp_.push_back(component_index); + } + for_each_eM([&](uint32_t eM_index) { + eM_unflushed[eM_index] = builder_->createRvalueSwizzle( + spv::NoPrecision, type_float_vectors_[component_count - 1], + eM_unflushed[eM_index], uint_vector_temp_); + }); + } + } + + // Flush NaNs. + EMIdArray eM_flushed = flush_nan(eM_unflushed); + + // Convert to integers. + SpirvBuilder::IfBuilder if_signed( + is_signed, spv::SelectionControlDontFlattenMask, *builder_); + EMIdArray eM_signed; + { + // Signed. + SpirvBuilder::IfBuilder if_norm( + is_norm, spv::SelectionControlDontFlattenMask, *builder_); + EMIdArray eM_norm; + { + // Signed normalized. + id_vector_temp_.clear(); + for (unsigned int component_index = 0; + component_index < component_count; ++component_index) { + id_vector_temp_.push_back(builder_->makeFloatConstant( + float((uint32_t(1) << (widths[component_index] - 1)) - 1))); + } + spv::Id const_max_value = + component_count > 1 + ? builder_->makeCompositeConstant( + type_float_vectors_[component_count - 1], id_vector_temp_) + : id_vector_temp_.front(); + for_each_eM([&](uint32_t eM_index) { + eM_norm[eM_index] = builder_->createNoContractionBinOp( + spv::OpFMul, type_float_vectors_[component_count - 1], + builder_->createTriBuiltinCall( + type_float_vectors_[component_count - 1], + ext_inst_glsl_std_450_, GLSLstd450FClamp, + eM_flushed[eM_index], + const_float_vectors_minus_1[component_count - 1], + const_float_vectors_1_[component_count - 1]), + const_max_value); + }); + } + if_norm.makeEndIf(); + // All phi instructions must be in the beginning of the block. + for_each_eM([&](uint32_t eM_index) { + eM_signed[eM_index] = + if_norm.createMergePhi(eM_norm[eM_index], eM_flushed[eM_index]); + }); + // Convert to signed integer, adding plus/minus 0.5 before truncating + // according to the Direct3D format conversion rules. + for_each_eM([&](uint32_t eM_index) { + eM_signed[eM_index] = builder_->createUnaryOp( + spv::OpBitcast, type_uint_vectors_[component_count - 1], + builder_->createUnaryOp( + spv::OpConvertFToS, type_int_vectors_[component_count - 1], + builder_->createNoContractionBinOp( + spv::OpFAdd, type_float_vectors_[component_count - 1], + eM_signed[eM_index], + builder_->createTriOp( + spv::OpSelect, type_float_vectors_[component_count - 1], + builder_->createBinOp( + spv::OpFOrdLessThan, + type_bool_vectors_[component_count - 1], + eM_signed[eM_index], + const_float_vectors_0_[component_count - 1]), + const_float_vectors_minus_0_5[component_count - 1], + const_float_vectors_0_5[component_count - 1])))); + }); + } + if_signed.makeBeginElse(); + EMIdArray eM_unsigned; + { + SpirvBuilder::IfBuilder if_norm( + is_norm, spv::SelectionControlDontFlattenMask, *builder_); + EMIdArray eM_norm; + { + // Unsigned normalized. + id_vector_temp_.clear(); + for (unsigned int component_index = 0; + component_index < component_count; ++component_index) { + id_vector_temp_.push_back(builder_->makeFloatConstant( + float((uint32_t(1) << widths[component_index]) - 1))); + } + spv::Id const_max_value = + component_count > 1 + ? builder_->makeCompositeConstant( + type_float_vectors_[component_count - 1], id_vector_temp_) + : id_vector_temp_.front(); + for_each_eM([&](uint32_t eM_index) { + eM_norm[eM_index] = builder_->createNoContractionBinOp( + spv::OpFMul, type_float_vectors_[component_count - 1], + builder_->createTriBuiltinCall( + type_float_vectors_[component_count - 1], + ext_inst_glsl_std_450_, GLSLstd450FClamp, + eM_flushed[eM_index], + const_float_vectors_0_[component_count - 1], + const_float_vectors_1_[component_count - 1]), + const_max_value); + }); + } + if_norm.makeEndIf(); + // All phi instructions must be in the beginning of the block. + for_each_eM([&](uint32_t eM_index) { + eM_unsigned[eM_index] = + if_norm.createMergePhi(eM_norm[eM_index], eM_flushed[eM_index]); + }); + // Convert to unsigned integer, adding 0.5 before truncating according to + // the Direct3D format conversion rules. + for_each_eM([&](uint32_t eM_index) { + eM_unsigned[eM_index] = builder_->createUnaryOp( + spv::OpConvertFToU, type_uint_vectors_[component_count - 1], + builder_->createNoContractionBinOp( + spv::OpFAdd, type_float_vectors_[component_count - 1], + eM_unsigned[eM_index], + const_float_vectors_0_5[component_count - 1])); + }); + } + if_signed.makeEndIf(); + EMIdArray eM_unpacked; + for_each_eM([&](uint32_t eM_index) { + eM_unpacked[eM_index] = + if_signed.createMergePhi(eM_signed[eM_index], eM_unsigned[eM_index]); + }); + + // Pack into a 32-bit value, and pad to a 4-component vector for the phi. + EMIdArray eM_packed; + for_each_eM([&](uint32_t eM_index) { + spv::Id element_unpacked = eM_unpacked[eM_index]; + eM_packed[eM_index] = component_count > 1 + ? builder_->createCompositeExtract( + element_unpacked, type_uint_, 0) + : element_unpacked; + for (unsigned int component_index = 1; component_index < component_count; + ++component_index) { + eM_packed[eM_index] = builder_->createQuadOp( + spv::OpBitFieldInsert, type_uint_, eM_packed[eM_index], + builder_->createCompositeExtract(element_unpacked, type_uint_, + component_index), + builder_->makeUintConstant(offsets[component_index]), + builder_->makeUintConstant(widths[component_index])); + } + id_vector_temp_.clear(); + id_vector_temp_.resize(4, const_uint_0_); + id_vector_temp_.front() = eM_packed[eM_index]; + eM_packed[eM_index] = + builder_->createCompositeConstruct(type_uint4_, id_vector_temp_); + }); + + return eM_packed; + }; + + SpirvBuilder::SwitchBuilder format_switch( + builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, format_info, + builder_->makeUintConstant(8), + builder_->makeUintConstant(6)), + spv::SelectionControlDontFlattenMask, *builder_); + + struct FormatCase { + EMIdArray eM_packed; + uint32_t element_bytes_log2; + spv::Id phi_parent; + }; + std::vector format_cases; + // Must be called at the end of the switch case segment for the correct phi + // parent. + auto add_format_case = [&](const EMIdArray& eM_packed, + uint32_t element_bytes_log2) { + FormatCase& format_case = format_cases.emplace_back(); + format_case.eM_packed = eM_packed; + format_case.element_bytes_log2 = element_bytes_log2; + format_case.phi_parent = builder_->getBuildPoint()->getId(); + }; + + // k_8, k_8_A, k_8_B + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_8)); + // TODO(Triang3l): Investigate how input should be treated for k_8_A, k_8_B. + format_switch.addCurrentCaseLiteral( + static_cast(xenos::ColorFormat::k_8_A)); + format_switch.addCurrentCaseLiteral( + static_cast(xenos::ColorFormat::k_8_B)); + add_format_case(pack_8_16_32({8}), 0); + + // k_1_5_5_5 + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_1_5_5_5)); + add_format_case(pack_8_16_32({5, 5, 5, 1}), 1); + + // k_5_6_5 + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_5_6_5)); + add_format_case(pack_8_16_32({5, 6, 5}), 1); + + // k_6_5_5 + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_6_5_5)); + add_format_case(pack_8_16_32({5, 5, 6}), 1); + + // k_8_8_8_8, k_8_8_8_8_A, k_8_8_8_8_AS_16_16_16_16 + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_8_8_8_8)); + // TODO(Triang3l): Investigate how input should be treated for k_8_8_8_8_A. + format_switch.addCurrentCaseLiteral( + static_cast(xenos::ColorFormat::k_8_8_8_8_A)); + format_switch.addCurrentCaseLiteral( + static_cast(xenos::ColorFormat::k_8_8_8_8_AS_16_16_16_16)); + add_format_case(pack_8_16_32({8, 8, 8, 8}), 2); + + // k_2_10_10_10, k_2_10_10_10_AS_16_16_16_16 + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_2_10_10_10)); + format_switch.addCurrentCaseLiteral(static_cast( + xenos::ColorFormat::k_2_10_10_10_AS_16_16_16_16)); + add_format_case(pack_8_16_32({10, 10, 10, 2}), 2); + + // k_8_8 + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_8_8)); + add_format_case(pack_8_16_32({8, 8}), 1); + + // k_4_4_4_4 + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_4_4_4_4)); + add_format_case(pack_8_16_32({4, 4, 4, 4}), 1); + + // k_10_11_11, k_10_11_11_AS_16_16_16_16 + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_10_11_11)); + format_switch.addCurrentCaseLiteral( + static_cast(xenos::ColorFormat::k_10_11_11_AS_16_16_16_16)); + add_format_case(pack_8_16_32({11, 11, 10}), 2); + + // k_11_11_10, k_11_11_10_AS_16_16_16_16 + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_11_11_10)); + format_switch.addCurrentCaseLiteral( + static_cast(xenos::ColorFormat::k_11_11_10_AS_16_16_16_16)); + add_format_case(pack_8_16_32({10, 11, 11}), 2); + + // k_16 + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_16)); + add_format_case(pack_8_16_32({16}), 1); + + // k_16_16 + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_16_16)); + add_format_case(pack_8_16_32({16, 16}), 2); + + // k_16_16_16_16 + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_16_16_16_16)); + { + // Flush NaNs. + EMIdArray fixed16_flushed = flush_nan(eM_swapped); + + // Convert to integers. + SpirvBuilder::IfBuilder if_signed( + is_signed, spv::SelectionControlDontFlattenMask, *builder_); + EMIdArray fixed16_signed; + { + // Signed. + SpirvBuilder::IfBuilder if_norm( + is_norm, spv::SelectionControlDontFlattenMask, *builder_); + EMIdArray fixed16_norm; + { + // Signed normalized. + id_vector_temp_.clear(); + id_vector_temp_.resize(4, builder_->makeFloatConstant( + float((uint32_t(1) << (16 - 1)) - 1))); + spv::Id const_snorm16_max_value = + builder_->makeCompositeConstant(type_float4_, id_vector_temp_); + for_each_eM([&](uint32_t eM_index) { + fixed16_norm[eM_index] = builder_->createNoContractionBinOp( + spv::OpFMul, type_float4_, + builder_->createTriBuiltinCall( + type_float4_, ext_inst_glsl_std_450_, GLSLstd450FClamp, + fixed16_flushed[eM_index], const_float_vectors_minus_1[3], + const_float4_1_), + const_snorm16_max_value); + }); + } + if_norm.makeEndIf(); + // All phi instructions must be in the beginning of the block. + for_each_eM([&](uint32_t eM_index) { + fixed16_signed[eM_index] = if_norm.createMergePhi( + fixed16_norm[eM_index], fixed16_flushed[eM_index]); + }); + // Convert to signed integer, adding plus/minus 0.5 before truncating + // according to the Direct3D format conversion rules. + for_each_eM([&](uint32_t eM_index) { + fixed16_signed[eM_index] = builder_->createUnaryOp( + spv::OpBitcast, type_uint4_, + builder_->createUnaryOp( + spv::OpConvertFToS, type_int4_, + builder_->createNoContractionBinOp( + spv::OpFAdd, type_float4_, fixed16_signed[eM_index], + builder_->createTriOp( + spv::OpSelect, type_float4_, + builder_->createBinOp(spv::OpFOrdLessThan, type_bool4_, + fixed16_signed[eM_index], + const_float4_0_), + const_float_vectors_minus_0_5[3], + const_float_vectors_0_5[3])))); + }); + } + if_signed.makeBeginElse(); + EMIdArray fixed16_unsigned; + { + // Unsigned. + SpirvBuilder::IfBuilder if_norm( + is_norm, spv::SelectionControlDontFlattenMask, *builder_); + EMIdArray fixed16_norm; + { + // Unsigned normalized. + id_vector_temp_.clear(); + id_vector_temp_.resize( + 4, builder_->makeFloatConstant(float((uint32_t(1) << 16) - 1))); + spv::Id const_unorm16_max_value = + builder_->makeCompositeConstant(type_float4_, id_vector_temp_); + for_each_eM([&](uint32_t eM_index) { + fixed16_norm[eM_index] = builder_->createNoContractionBinOp( + spv::OpFMul, type_float4_, + builder_->createTriBuiltinCall( + type_float4_, ext_inst_glsl_std_450_, GLSLstd450FClamp, + fixed16_flushed[eM_index], const_float4_0_, const_float4_1_), + const_unorm16_max_value); + }); + } + if_norm.makeEndIf(); + // All phi instructions must be in the beginning of the block. + for_each_eM([&](uint32_t eM_index) { + fixed16_unsigned[eM_index] = if_norm.createMergePhi( + fixed16_norm[eM_index], fixed16_flushed[eM_index]); + }); + // Convert to unsigned integer, adding 0.5 before truncating according to + // the Direct3D format conversion rules. + for_each_eM([&](uint32_t eM_index) { + fixed16_unsigned[eM_index] = builder_->createUnaryOp( + spv::OpConvertFToU, type_uint4_, + builder_->createNoContractionBinOp(spv::OpFAdd, type_float4_, + fixed16_unsigned[eM_index], + const_float_vectors_0_5[3])); + }); + } + if_signed.makeEndIf(); + EMIdArray fixed16_unpacked; + for_each_eM([&](uint32_t eM_index) { + fixed16_unpacked[eM_index] = if_signed.createMergePhi( + fixed16_signed[eM_index], fixed16_unsigned[eM_index]); + }); + + // Pack into two 32-bit values, and pad to a 4-component vector for the phi. + EMIdArray fixed16_packed; + spv::Id const_uint_16 = builder_->makeUintConstant(16); + for_each_eM([&](uint32_t eM_index) { + spv::Id fixed16_element_unpacked = fixed16_unpacked[eM_index]; + id_vector_temp_.clear(); + for (uint32_t component_index = 0; component_index < 2; + ++component_index) { + id_vector_temp_.push_back(builder_->createQuadOp( + spv::OpBitFieldInsert, type_uint_, + builder_->createCompositeExtract(fixed16_element_unpacked, + type_uint_, 2 * component_index), + builder_->createCompositeExtract( + fixed16_element_unpacked, type_uint_, 2 * component_index + 1), + const_uint_16, const_uint_16)); + } + for (uint32_t component_index = 2; component_index < 4; + ++component_index) { + id_vector_temp_.push_back(const_uint_0_); + } + fixed16_packed[eM_index] = + builder_->createCompositeConstruct(type_uint4_, id_vector_temp_); + }); + + add_format_case(fixed16_packed, 3); + } + + // TODO(Triang3l): Use the extended range float16 conversion. + + // k_16_FLOAT + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_16_FLOAT)); + { + EMIdArray format_packed_16_float; + for_each_eM([&](uint32_t eM_index) { + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->createCompositeExtract( + eM_swapped[eM_index], type_float_, 0)); + id_vector_temp_.push_back(const_float_0_); + spv::Id format_packed_16_float_x = builder_->createUnaryBuiltinCall( + type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16, + builder_->createCompositeConstruct(type_float2_, id_vector_temp_)); + id_vector_temp_.clear(); + id_vector_temp_.resize(4, const_uint_0_); + id_vector_temp_.front() = format_packed_16_float_x; + format_packed_16_float[eM_index] = + builder_->createCompositeConstruct(type_uint4_, id_vector_temp_); + }); + add_format_case(format_packed_16_float, 1); + } + + // k_16_16_FLOAT + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_16_16_FLOAT)); + { + EMIdArray format_packed_16_16_float; + for_each_eM([&](uint32_t eM_index) { + uint_vector_temp_.clear(); + uint_vector_temp_.push_back(0); + uint_vector_temp_.push_back(1); + spv::Id format_packed_16_16_float_xy = builder_->createUnaryBuiltinCall( + type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16, + builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_, + eM_swapped[eM_index], + uint_vector_temp_)); + id_vector_temp_.clear(); + id_vector_temp_.resize(4, const_uint_0_); + id_vector_temp_.front() = format_packed_16_16_float_xy; + format_packed_16_16_float[eM_index] = + builder_->createCompositeConstruct(type_uint4_, id_vector_temp_); + }); + add_format_case(format_packed_16_16_float, 2); + } + + // k_16_16_16_16_FLOAT + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_16_16_16_16_FLOAT)); + { + EMIdArray format_packed_16_16_16_16_float; + for_each_eM([&](uint32_t eM_index) { + spv::Id format_packed_16_16_16_16_float_xy_zw[2]; + for (uint32_t component_index = 0; component_index < 2; + ++component_index) { + uint_vector_temp_.clear(); + uint_vector_temp_.push_back(2 * component_index); + uint_vector_temp_.push_back(2 * component_index + 1); + format_packed_16_16_16_16_float_xy_zw[component_index] = + builder_->createUnaryBuiltinCall( + type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16, + builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_, + eM_swapped[eM_index], + uint_vector_temp_)); + } + id_vector_temp_.clear(); + id_vector_temp_.push_back(format_packed_16_16_16_16_float_xy_zw[0]); + id_vector_temp_.push_back(format_packed_16_16_16_16_float_xy_zw[1]); + id_vector_temp_.push_back(const_uint_0_); + id_vector_temp_.push_back(const_uint_0_); + format_packed_16_16_16_16_float[eM_index] = + builder_->createCompositeConstruct(type_uint4_, id_vector_temp_); + }); + add_format_case(format_packed_16_16_16_16_float, 3); + } + + // k_32_FLOAT + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_32_FLOAT)); + { + EMIdArray format_packed_32_float; + for_each_eM([&](uint32_t eM_index) { + format_packed_32_float[eM_index] = builder_->createUnaryOp( + spv::OpBitcast, type_uint4_, eM_swapped[eM_index]); + }); + add_format_case(format_packed_32_float, 2); + } + + // k_32_32_FLOAT + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_32_32_FLOAT)); + { + EMIdArray format_packed_32_32_float; + for_each_eM([&](uint32_t eM_index) { + format_packed_32_32_float[eM_index] = builder_->createUnaryOp( + spv::OpBitcast, type_uint4_, eM_swapped[eM_index]); + }); + add_format_case(format_packed_32_32_float, 3); + } + + // k_32_32_32_32_FLOAT + format_switch.makeBeginCase( + static_cast(xenos::ColorFormat::k_32_32_32_32_FLOAT)); + { + EMIdArray format_packed_32_32_32_32_float; + for_each_eM([&](uint32_t eM_index) { + format_packed_32_32_32_32_float[eM_index] = builder_->createUnaryOp( + spv::OpBitcast, type_uint4_, eM_swapped[eM_index]); + }); + add_format_case(format_packed_32_32_32_32_float, 4); + } + + format_switch.makeEndSwitch(); + + // Select the result and the element size based on the format. + // Phi must be the first instructions in a block. + EMIdArray eM_packed; + for_each_eM([&](uint32_t eM_index) { + auto eM_packed_phi = std::make_unique( + builder_->getUniqueId(), type_uint4_, spv::OpPhi); + // Default case for an invalid format. + eM_packed_phi->addIdOperand(const_uint4_0_); + eM_packed_phi->addIdOperand(format_switch.getDefaultPhiParent()); + for (const FormatCase& format_case : format_cases) { + eM_packed_phi->addIdOperand(format_case.eM_packed[eM_index]); + eM_packed_phi->addIdOperand(format_case.phi_parent); + } + eM_packed[eM_index] = eM_packed_phi->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(eM_packed_phi)); + }); + spv::Id element_bytes_log2; + { + auto element_bytes_log2_phi = std::make_unique( + builder_->getUniqueId(), type_uint_, spv::OpPhi); + // Default case for an invalid format (doesn't enter any element size + // conditional, skipped). + element_bytes_log2_phi->addIdOperand(builder_->makeUintConstant(5)); + element_bytes_log2_phi->addIdOperand(format_switch.getDefaultPhiParent()); + for (const FormatCase& format_case : format_cases) { + element_bytes_log2_phi->addIdOperand( + builder_->makeUintConstant(format_case.element_bytes_log2)); + element_bytes_log2_phi->addIdOperand(format_case.phi_parent); + } + element_bytes_log2 = element_bytes_log2_phi->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(element_bytes_log2_phi)); + } + + // Endian-swap. + spv::Id endian = + builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, format_info, + const_uint_0_, builder_->makeUintConstant(3)); + for_each_eM([&](uint32_t eM_index) { + eM_packed[eM_index] = EndianSwap128Uint4(eM_packed[eM_index], endian); + }); + + // Load the index of eM0 in the stream. + spv::Id eM0_index = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, + builder_->createCompositeExtract(eA_vector, type_uint_, 1), const_uint_0_, + builder_->makeUintConstant(23)); + + // Check how many elements starting from eM0 are within the bounds of the + // stream, and from the eM# that were written, exclude the out-of-bounds ones. + // The index can't be negative, and the index and the count are limited to 23 + // bits, so it's safe to use 32-bit signed subtraction and clamping to get the + // remaining eM# count. + spv::Id eM_indices_to_store = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, + builder_->createLoad(var_main_memexport_data_written_, spv::NoPrecision), + const_uint_0_, + builder_->createUnaryOp( + spv::OpBitcast, type_uint_, + builder_->createTriBuiltinCall( + type_int_, ext_inst_glsl_std_450_, GLSLstd450SClamp, + builder_->createBinOp( + spv::OpISub, type_int_, + builder_->createUnaryOp( + spv::OpBitcast, type_int_, + builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, + builder_->createCompositeExtract( + eA_vector, type_uint_, 3), + const_uint_0_, + builder_->makeUintConstant(23))), + builder_->createUnaryOp(spv::OpBitcast, type_int_, + eM0_index)), + const_int_0_, + builder_->makeIntConstant(ucode::kMaxMemExportElementCount)))); + + // Get the eM0 address in bytes. + // Left-shift the stream base address by 2 to both convert it from dwords to + // bytes and drop the upper bits. + spv::Id const_uint_2 = builder_->makeUintConstant(2); + spv::Id eM0_address_bytes = builder_->createBinOp( + spv::OpIAdd, type_uint_, + builder_->createBinOp( + spv::OpShiftLeftLogical, type_uint_, + builder_->createCompositeExtract(eA_vector, type_uint_, 0), + const_uint_2), + builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_, eM0_index, + element_bytes_log2)); + + // Store based on the element size. + auto store_needed_eM = [&](std::function fn) { + for_each_eM([&](uint32_t eM_index) { + SpirvBuilder::IfBuilder if_eM_needed( + builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, + eM_indices_to_store, + builder_->makeUintConstant(1u << eM_index)), + const_uint_0_), + spv::SelectionControlDontFlattenMask, *builder_, 2, 1); + fn(eM_index); + if_eM_needed.makeEndIf(); + }); + }; + SpirvBuilder::SwitchBuilder element_size_switch( + element_bytes_log2, spv::SelectionControlDontFlattenMask, *builder_); + element_size_switch.makeBeginCase(0); + { + store_needed_eM([&](uint32_t eM_index) { + spv::Id element_address_bytes = + eM_index != 0 ? builder_->createBinOp( + spv::OpIAdd, type_uint_, eM0_address_bytes, + builder_->makeUintConstant(eM_index)) + : eM0_address_bytes; + // replace_shift = 8 * (element_address_bytes & 3) + spv::Id replace_shift = builder_->createQuadOp( + spv::OpBitFieldInsert, type_uint_, const_uint_0_, + element_address_bytes, builder_->makeUintConstant(3), const_uint_2); + StoreUint32ToSharedMemory( + builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_, + builder_->createCompositeExtract( + eM_packed[eM_index], type_uint_, 0), + replace_shift), + builder_->createUnaryOp( + spv::OpBitcast, type_int_, + builder_->createBinOp(spv::OpShiftRightLogical, type_uint_, + element_address_bytes, const_uint_2)), + builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_, + builder_->makeUintConstant(0xFFu), + replace_shift)); + }); + } + element_size_switch.makeBeginCase(1); + { + spv::Id const_uint_1 = builder_->makeUintConstant(1); + spv::Id eM0_address_words = builder_->createBinOp( + spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_1); + store_needed_eM([&](uint32_t eM_index) { + spv::Id element_address_words = + eM_index != 0 ? builder_->createBinOp( + spv::OpIAdd, type_uint_, eM0_address_words, + builder_->makeUintConstant(eM_index)) + : eM0_address_words; + // replace_shift = 16 * (element_address_words & 1) + spv::Id replace_shift = builder_->createQuadOp( + spv::OpBitFieldInsert, type_uint_, const_uint_0_, + element_address_words, builder_->makeUintConstant(4), const_uint_1); + StoreUint32ToSharedMemory( + builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_, + builder_->createCompositeExtract( + eM_packed[eM_index], type_uint_, 0), + replace_shift), + builder_->createUnaryOp( + spv::OpBitcast, type_int_, + builder_->createBinOp(spv::OpShiftRightLogical, type_uint_, + element_address_words, const_uint_1)), + builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_, + builder_->makeUintConstant(0xFFFFu), + replace_shift)); + }); + } + element_size_switch.makeBeginCase(2); + { + spv::Id eM0_address_dwords = builder_->createBinOp( + spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_2); + store_needed_eM([&](uint32_t eM_index) { + StoreUint32ToSharedMemory( + builder_->createCompositeExtract(eM_packed[eM_index], type_uint_, 0), + builder_->createUnaryOp( + spv::OpBitcast, type_int_, + eM_index != 0 ? builder_->createBinOp( + spv::OpIAdd, type_uint_, eM0_address_dwords, + builder_->makeUintConstant(eM_index)) + : eM0_address_dwords)); + }); + } + element_size_switch.makeBeginCase(3); + { + spv::Id eM0_address_dwords = builder_->createBinOp( + spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_2); + store_needed_eM([&](uint32_t eM_index) { + spv::Id element_value = eM_packed[eM_index]; + spv::Id element_address_dwords_int = builder_->createUnaryOp( + spv::OpBitcast, type_int_, + eM_index != 0 ? builder_->createBinOp( + spv::OpIAdd, type_uint_, eM0_address_dwords, + builder_->makeUintConstant(2 * eM_index)) + : eM0_address_dwords); + StoreUint32ToSharedMemory( + builder_->createCompositeExtract(element_value, type_uint_, 0), + element_address_dwords_int); + StoreUint32ToSharedMemory( + builder_->createCompositeExtract(element_value, type_uint_, 1), + builder_->createBinOp(spv::OpIAdd, type_int_, + element_address_dwords_int, + builder_->makeIntConstant(1))); + }); + } + element_size_switch.makeBeginCase(4); + { + spv::Id eM0_address_dwords = builder_->createBinOp( + spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_2); + store_needed_eM([&](uint32_t eM_index) { + spv::Id element_value = eM_packed[eM_index]; + spv::Id element_address_dwords_int = builder_->createUnaryOp( + spv::OpBitcast, type_int_, + eM_index != 0 ? builder_->createBinOp( + spv::OpIAdd, type_uint_, eM0_address_dwords, + builder_->makeUintConstant(4 * eM_index)) + : eM0_address_dwords); + StoreUint32ToSharedMemory( + builder_->createCompositeExtract(element_value, type_uint_, 0), + element_address_dwords_int); + for (uint32_t element_dword_index = 1; element_dword_index < 4; + ++element_dword_index) { + StoreUint32ToSharedMemory( + builder_->createCompositeExtract(element_value, type_uint_, + element_dword_index), + builder_->createBinOp(spv::OpIAdd, type_int_, + element_address_dwords_int, + builder_->makeIntConstant( + static_cast(element_dword_index)))); + } + }); + } + element_size_switch.makeEndSwitch(); + + // Close the conditionals for whether memory export is allowed in this + // invocation. + if_address_valid.makeEndIf(); + if (if_pixel_not_killed.has_value()) { + if_pixel_not_killed->makeEndIf(); + } + if (if_memexport_allowed.has_value()) { + if_memexport_allowed->makeEndIf(); + } +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/spirv_shader_translator_rb.cc b/src/xenia/gpu/spirv_shader_translator_rb.cc index 65a01209d..e19fdd540 100644 --- a/src/xenia/gpu/spirv_shader_translator_rb.cc +++ b/src/xenia/gpu/spirv_shader_translator_rb.cc @@ -457,22 +457,14 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { // Kill the pixel once the guest control flow and derivatives are not // needed anymore. assert_true(var_main_kill_pixel_ != spv::NoResult); - // Load the condition before the OpSelectionMerge, which must be the - // penultimate instruction. - spv::Id kill_pixel = - builder_->createLoad(var_main_kill_pixel_, spv::NoPrecision); - spv::Block& block_kill = builder_->makeNewBlock(); - spv::Block& block_kill_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_kill_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(kill_pixel, &block_kill, - &block_kill_merge); - builder_->setBuildPoint(&block_kill); + SpirvBuilder::IfBuilder kill_pixel_if( + builder_->createLoad(var_main_kill_pixel_, spv::NoPrecision), + spv::SelectionControlMaskNone, *builder_); // TODO(Triang3l): Use OpTerminateInvocation when SPIR-V 1.6 is // targeted. builder_->createNoResultOp(spv::OpKill); // OpKill terminates the block. - builder_->setBuildPoint(&block_kill_merge); + kill_pixel_if.makeEndIf(false); } } } @@ -533,17 +525,11 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { builder_->makeUintConstant(3)); // Check if the comparison function is not "always" - that should pass even // for NaN likely, unlike "less, equal or greater". - spv::Id alpha_test_function_is_non_always = builder_->createBinOp( - spv::OpINotEqual, type_bool_, alpha_test_function, - builder_->makeUintConstant(uint32_t(xenos::CompareFunction::kAlways))); - spv::Block& block_alpha_test = builder_->makeNewBlock(); - spv::Block& block_alpha_test_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_alpha_test_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(alpha_test_function_is_non_always, - &block_alpha_test, - &block_alpha_test_merge); - builder_->setBuildPoint(&block_alpha_test); + SpirvBuilder::IfBuilder if_alpha_test_function_is_non_always( + builder_->createBinOp(spv::OpINotEqual, type_bool_, alpha_test_function, + builder_->makeUintConstant( + uint32_t(xenos::CompareFunction::kAlways))), + spv::SelectionControlDontFlattenMask, *builder_); { id_vector_temp_.clear(); id_vector_temp_.push_back(builder_->makeIntConstant(3)); @@ -564,28 +550,20 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { // The comparison function is not "always" - perform the alpha test. // Handle "not equal" specially (specifically as "not equal" so it's true // for NaN, not "less or greater" which is false for NaN). - spv::Id alpha_test_function_is_not_equal = builder_->createBinOp( - spv::OpIEqual, type_bool_, alpha_test_function, - builder_->makeUintConstant( - uint32_t(xenos::CompareFunction::kNotEqual))); - spv::Block& block_alpha_test_not_equal = builder_->makeNewBlock(); - spv::Block& block_alpha_test_non_not_equal = builder_->makeNewBlock(); - spv::Block& block_alpha_test_not_equal_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_alpha_test_not_equal_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(alpha_test_function_is_not_equal, - &block_alpha_test_not_equal, - &block_alpha_test_non_not_equal); - spv::Id alpha_test_result_not_equal, alpha_test_result_non_not_equal; - builder_->setBuildPoint(&block_alpha_test_not_equal); + SpirvBuilder::IfBuilder if_alpha_test_function_is_not_equal( + builder_->createBinOp(spv::OpIEqual, type_bool_, alpha_test_function, + builder_->makeUintConstant(uint32_t( + xenos::CompareFunction::kNotEqual))), + spv::SelectionControlDontFlattenMask, *builder_, 1, 2); + spv::Id alpha_test_result_not_equal; { // "Not equal" function. alpha_test_result_not_equal = builder_->createBinOp(spv::OpFUnordNotEqual, type_bool_, alpha_test_alpha, alpha_test_reference); - builder_->createBranch(&block_alpha_test_not_equal_merge); } - builder_->setBuildPoint(&block_alpha_test_non_not_equal); + if_alpha_test_function_is_not_equal.makeBeginElse(); + spv::Id alpha_test_result_non_not_equal; { // Function other than "not equal". static const spv::Op kAlphaTestOps[] = { @@ -609,16 +587,11 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { alpha_test_result_non_not_equal = alpha_test_comparison_result; } } - builder_->createBranch(&block_alpha_test_not_equal_merge); } - builder_->setBuildPoint(&block_alpha_test_not_equal_merge); - id_vector_temp_.clear(); - id_vector_temp_.push_back(alpha_test_result_not_equal); - id_vector_temp_.push_back(block_alpha_test_not_equal.getId()); - id_vector_temp_.push_back(alpha_test_result_non_not_equal); - id_vector_temp_.push_back(block_alpha_test_non_not_equal.getId()); + if_alpha_test_function_is_not_equal.makeEndIf(); spv::Id alpha_test_result = - builder_->createOp(spv::OpPhi, type_bool_, id_vector_temp_); + if_alpha_test_function_is_not_equal.createMergePhi( + alpha_test_result_not_equal, alpha_test_result_non_not_equal); // Discard the pixel if the alpha test has failed. if (edram_fragment_shader_interlock_ && !features_.demote_to_helper_invocation) { @@ -627,16 +600,11 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { spv::OpSelect, type_uint_, alpha_test_result, fsi_sample_mask_in_rt_0_alpha_tests, const_uint_0_); } else { - // Creating a merge block even though it will contain just one OpBranch - // since SPIR-V requires structured control flow in shaders. - spv::Block& block_alpha_test_kill = builder_->makeNewBlock(); - spv::Block& block_alpha_test_kill_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_alpha_test_kill_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(alpha_test_result, - &block_alpha_test_kill_merge, - &block_alpha_test_kill); - builder_->setBuildPoint(&block_alpha_test_kill); + SpirvBuilder::IfBuilder alpha_test_kill_if( + builder_->createUnaryOp(spv::OpLogicalNot, type_bool_, + alpha_test_result), + spv::SelectionControlDontFlattenMask, *builder_); + bool branch_to_alpha_test_kill_merge = true; if (edram_fragment_shader_interlock_) { assert_true(features_.demote_to_helper_invocation); fsi_pixel_potentially_killed = true; @@ -645,18 +613,17 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { builder_->addExtension("SPV_EXT_demote_to_helper_invocation"); builder_->addCapability(spv::CapabilityDemoteToHelperInvocationEXT); builder_->createNoResultOp(spv::OpDemoteToHelperInvocationEXT); - builder_->createBranch(&block_alpha_test_kill_merge); } else { // TODO(Triang3l): Use OpTerminateInvocation when SPIR-V 1.6 is // targeted. builder_->createNoResultOp(spv::OpKill); // OpKill terminates the block. + branch_to_alpha_test_kill_merge = false; } - builder_->setBuildPoint(&block_alpha_test_kill_merge); - builder_->createBranch(&block_alpha_test_merge); + alpha_test_kill_if.makeEndIf(branch_to_alpha_test_kill_merge); } } - builder_->setBuildPoint(&block_alpha_test_merge); + if_alpha_test_function_is_non_always.makeEndIf(); // TODO(Triang3l): Alpha to coverage. @@ -725,18 +692,9 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { spv::OpBitwiseAnd, type_uint_, main_fsi_sample_mask_, builder_->makeUintConstant(uint32_t(1) << (4 + i))), const_uint_0_); - spv::Block& block_sample_late_depth_stencil_write = - builder_->makeNewBlock(); - spv::Block& block_sample_late_depth_stencil_write_merge = - builder_->makeNewBlock(); - builder_->createSelectionMerge( - &block_sample_late_depth_stencil_write_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch( + SpirvBuilder::IfBuilder if_sample_late_depth_stencil_write_needed( sample_late_depth_stencil_write_needed, - &block_sample_late_depth_stencil_write, - &block_sample_late_depth_stencil_write_merge); - builder_->setBuildPoint(&block_sample_late_depth_stencil_write); + spv::SelectionControlDontFlattenMask, *builder_); spv::Id depth_stencil_sample_address = FSI_AddSampleOffset(main_fsi_address_depth_, i); id_vector_temp_.clear(); @@ -749,8 +707,7 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { ? spv::StorageClassStorageBuffer : spv::StorageClassUniform, buffer_edram_, id_vector_temp_)); - builder_->createBranch(&block_sample_late_depth_stencil_write_merge); - builder_->setBuildPoint(&block_sample_late_depth_stencil_write_merge); + if_sample_late_depth_stencil_write_needed.makeEndIf(); } if (color_targets_written) { // Only take the remaining coverage bits, not the late depth / stencil @@ -852,28 +809,10 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { spv::OpBitwiseAnd, type_uint_, fsi_color_targets_written, builder_->makeUintConstant(uint32_t(1) << color_target_index)), const_uint_0_); - spv::Block& fsi_color_written_if_head = *builder_->getBuildPoint(); - spv::Block& fsi_color_written_if = builder_->makeNewBlock(); - spv::Block& fsi_color_written_if_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&fsi_color_written_if_merge, - spv::SelectionControlDontFlattenMask); - { - std::unique_ptr rt_written_branch_conditional_op = - std::make_unique(spv::OpBranchConditional); - rt_written_branch_conditional_op->addIdOperand(fsi_color_written); - rt_written_branch_conditional_op->addIdOperand( - fsi_color_written_if.getId()); - rt_written_branch_conditional_op->addIdOperand( - fsi_color_written_if_merge.getId()); - // More likely to write to the render target than not. - rt_written_branch_conditional_op->addImmediateOperand(2); - rt_written_branch_conditional_op->addImmediateOperand(1); - builder_->getBuildPoint()->addInstruction( - std::move(rt_written_branch_conditional_op)); - } - fsi_color_written_if.addPredecessor(&fsi_color_written_if_head); - fsi_color_written_if_merge.addPredecessor(&fsi_color_written_if_head); - builder_->setBuildPoint(&fsi_color_written_if); + // More likely to write to the render target than not. + SpirvBuilder::IfBuilder if_fsi_color_written( + fsi_color_written, spv::SelectionControlDontFlattenMask, *builder_, + 2, 1); // For accessing uint2 arrays of per-render-target data which are passed // as uint4 arrays due to std140 array element alignment. @@ -914,14 +853,9 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { const_uint32_max), builder_->createBinOp(spv::OpINotEqual, type_bool_, rt_keep_mask[1], const_uint32_max)); - spv::Block& rt_write_mask_not_empty_if = builder_->makeNewBlock(); - spv::Block& rt_write_mask_not_empty_if_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&rt_write_mask_not_empty_if_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(rt_write_mask_not_empty, - &rt_write_mask_not_empty_if, - &rt_write_mask_not_empty_if_merge); - builder_->setBuildPoint(&rt_write_mask_not_empty_if); + SpirvBuilder::IfBuilder if_rt_write_mask_not_empty( + rt_write_mask_not_empty, spv::SelectionControlDontFlattenMask, + *builder_); spv::Id const_int_rt_index = builder_->makeIntConstant(color_target_index); @@ -982,17 +916,10 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { spv::Id rt_blend_enabled = builder_->createBinOp( spv::OpINotEqual, type_bool_, rt_blend_factors_equations, builder_->makeUintConstant(0x00010001)); - spv::Block& rt_blend_enabled_if = builder_->makeNewBlock(); - spv::Block& rt_blend_enabled_else = builder_->makeNewBlock(); - spv::Block& rt_blend_enabled_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&rt_blend_enabled_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch( - rt_blend_enabled, &rt_blend_enabled_if, &rt_blend_enabled_else); - - // Blending path. + SpirvBuilder::IfBuilder if_rt_blend_enabled( + rt_blend_enabled, spv::SelectionControlDontFlattenMask, *builder_); { - builder_->setBuildPoint(&rt_blend_enabled_if); + // Blending path. // Get various parameters used in blending. spv::Id rt_color_is_fixed_point = builder_->createBinOp( @@ -1097,15 +1024,9 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { // Blend and mask each sample. for (uint32_t i = 0; i < 4; ++i) { - spv::Block& block_sample_covered = builder_->makeNewBlock(); - spv::Block& block_sample_covered_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge( - &block_sample_covered_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(fsi_samples_covered[i], - &block_sample_covered, - &block_sample_covered_merge); - builder_->setBuildPoint(&block_sample_covered); + SpirvBuilder::IfBuilder if_sample_covered( + fsi_samples_covered[i], spv::SelectionControlDontFlattenMask, + *builder_); spv::Id rt_sample_address = FSI_AddSampleOffset(rt_sample_0_address, i, rt_is_64bpp); @@ -1131,26 +1052,13 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { dest_packed[0] = builder_->createLoad(rt_access_chain_0, spv::NoPrecision); { - spv::Block& block_load_64bpp_head = *builder_->getBuildPoint(); - spv::Block& block_load_64bpp = builder_->makeNewBlock(); - spv::Block& block_load_64bpp_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge( - &block_load_64bpp_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(rt_is_64bpp, &block_load_64bpp, - &block_load_64bpp_merge); - builder_->setBuildPoint(&block_load_64bpp); + SpirvBuilder::IfBuilder if_64bpp( + rt_is_64bpp, spv::SelectionControlDontFlattenMask, *builder_); spv::Id dest_packed_64bpp_high = builder_->createLoad(rt_access_chain_1, spv::NoPrecision); - builder_->createBranch(&block_load_64bpp_merge); - builder_->setBuildPoint(&block_load_64bpp_merge); - id_vector_temp_.clear(); - id_vector_temp_.push_back(dest_packed_64bpp_high); - id_vector_temp_.push_back(block_load_64bpp.getId()); - id_vector_temp_.push_back(const_uint_0_); - id_vector_temp_.push_back(block_load_64bpp_head.getId()); - dest_packed[1] = - builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_); + if_64bpp.makeEndIf(); + dest_packed[1] = if_64bpp.createMergePhi(dest_packed_64bpp_high, + const_uint_0_); } std::array dest_unpacked = FSI_UnpackColor(dest_packed, rt_format_with_flags); @@ -1203,35 +1111,27 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { result_packed[0], rt_replace_mask[0])), rt_access_chain_0); - spv::Block& block_store_64bpp = builder_->makeNewBlock(); - spv::Block& block_store_64bpp_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge( - &block_store_64bpp_merge, spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(rt_is_64bpp, &block_store_64bpp, - &block_store_64bpp_merge); - builder_->setBuildPoint(&block_store_64bpp); - builder_->createStore( - builder_->createBinOp( - spv::OpBitwiseOr, type_uint_, - builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, - dest_packed[1], rt_keep_mask[1]), - builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, - result_packed[1], - rt_replace_mask[1])), - rt_access_chain_0); - builder_->createBranch(&block_store_64bpp_merge); - builder_->setBuildPoint(&block_store_64bpp_merge); + SpirvBuilder::IfBuilder if_64bpp( + rt_is_64bpp, spv::SelectionControlDontFlattenMask, *builder_); + { + builder_->createStore( + builder_->createBinOp( + spv::OpBitwiseOr, type_uint_, + builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, + dest_packed[1], rt_keep_mask[1]), + builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, + result_packed[1], + rt_replace_mask[1])), + rt_access_chain_1); + } + if_64bpp.makeEndIf(); - builder_->createBranch(&block_sample_covered_merge); - builder_->setBuildPoint(&block_sample_covered_merge); + if_sample_covered.makeEndIf(); } - - builder_->createBranch(&rt_blend_enabled_merge); } - - // Non-blending paths. + if_rt_blend_enabled.makeBeginElse(); { - builder_->setBuildPoint(&rt_blend_enabled_else); + // Non-blending paths. // Pack the new color for all samples. std::array color_packed = @@ -1244,19 +1144,12 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { rt_keep_mask[0], const_uint_0_), builder_->createBinOp(spv::OpINotEqual, type_bool_, rt_keep_mask[1], const_uint_0_)); - spv::Block& rt_keep_mask_not_empty_if = builder_->makeNewBlock(); - spv::Block& rt_keep_mask_not_empty_if_else = builder_->makeNewBlock(); - spv::Block& rt_keep_mask_not_empty_if_merge = - builder_->makeNewBlock(); - builder_->createSelectionMerge(&rt_keep_mask_not_empty_if_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(rt_keep_mask_not_empty, - &rt_keep_mask_not_empty_if, - &rt_keep_mask_not_empty_if_else); - // Loading and masking path. + SpirvBuilder::IfBuilder if_rt_keep_mask_not_empty( + rt_keep_mask_not_empty, spv::SelectionControlDontFlattenMask, + *builder_); { - builder_->setBuildPoint(&rt_keep_mask_not_empty_if); + // Loading and masking path. std::array color_packed_masked; for (uint32_t i = 0; i < 2; ++i) { color_packed_masked[i] = builder_->createBinOp( @@ -1265,15 +1158,9 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { rt_keep_mask[i])); } for (uint32_t i = 0; i < 4; ++i) { - spv::Block& block_sample_covered = builder_->makeNewBlock(); - spv::Block& block_sample_covered_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge( - &block_sample_covered_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(fsi_samples_covered[i], - &block_sample_covered, - &block_sample_covered_merge); - builder_->setBuildPoint(&block_sample_covered); + SpirvBuilder::IfBuilder if_sample_covered( + fsi_samples_covered[i], spv::SelectionControlDontFlattenMask, + *builder_); spv::Id rt_sample_address = FSI_AddSampleOffset(rt_sample_0_address, i, rt_is_64bpp); id_vector_temp_.clear(); @@ -1295,52 +1182,38 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { rt_keep_mask[0]), color_packed_masked[0]), rt_access_chain_0); - spv::Block& block_store_64bpp = builder_->makeNewBlock(); - spv::Block& block_store_64bpp_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge( - &block_store_64bpp_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(rt_is_64bpp, &block_store_64bpp, - &block_store_64bpp_merge); - builder_->setBuildPoint(&block_store_64bpp); - id_vector_temp_.back() = builder_->createBinOp( - spv::OpIAdd, type_int_, rt_sample_address, fsi_const_int_1); - spv::Id rt_access_chain_1 = builder_->createAccessChain( - features_.spirv_version >= spv::Spv_1_3 - ? spv::StorageClassStorageBuffer - : spv::StorageClassUniform, - buffer_edram_, id_vector_temp_); - builder_->createStore( - builder_->createBinOp( - spv::OpBitwiseOr, type_uint_, - builder_->createBinOp( - spv::OpBitwiseAnd, type_uint_, - builder_->createLoad(rt_access_chain_1, - spv::NoPrecision), - rt_keep_mask[1]), - color_packed_masked[1]), - rt_access_chain_1); - builder_->createBranch(&block_store_64bpp_merge); - builder_->setBuildPoint(&block_store_64bpp_merge); - builder_->createBranch(&block_sample_covered_merge); - builder_->setBuildPoint(&block_sample_covered_merge); + SpirvBuilder::IfBuilder if_64bpp( + rt_is_64bpp, spv::SelectionControlDontFlattenMask, *builder_); + { + id_vector_temp_.back() = builder_->createBinOp( + spv::OpIAdd, type_int_, rt_sample_address, fsi_const_int_1); + spv::Id rt_access_chain_1 = builder_->createAccessChain( + features_.spirv_version >= spv::Spv_1_3 + ? spv::StorageClassStorageBuffer + : spv::StorageClassUniform, + buffer_edram_, id_vector_temp_); + builder_->createStore( + builder_->createBinOp( + spv::OpBitwiseOr, type_uint_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, + builder_->createLoad(rt_access_chain_1, + spv::NoPrecision), + rt_keep_mask[1]), + color_packed_masked[1]), + rt_access_chain_1); + } + if_64bpp.makeEndIf(); + if_sample_covered.makeEndIf(); } - builder_->createBranch(&rt_keep_mask_not_empty_if_merge); } - - // Fully overwriting path. + if_rt_keep_mask_not_empty.makeBeginElse(); { - builder_->setBuildPoint(&rt_keep_mask_not_empty_if_else); + // Fully overwriting path. for (uint32_t i = 0; i < 4; ++i) { - spv::Block& block_sample_covered = builder_->makeNewBlock(); - spv::Block& block_sample_covered_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge( - &block_sample_covered_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(fsi_samples_covered[i], - &block_sample_covered, - &block_sample_covered_merge); - builder_->setBuildPoint(&block_sample_covered); + SpirvBuilder::IfBuilder if_sample_covered( + fsi_samples_covered[i], spv::SelectionControlDontFlattenMask, + *builder_); spv::Id rt_sample_address = FSI_AddSampleOffset(rt_sample_0_address, i, rt_is_64bpp); id_vector_temp_.clear(); @@ -1353,40 +1226,29 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { ? spv::StorageClassStorageBuffer : spv::StorageClassUniform, buffer_edram_, id_vector_temp_)); - spv::Block& block_store_64bpp = builder_->makeNewBlock(); - spv::Block& block_store_64bpp_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge( - &block_store_64bpp_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(rt_is_64bpp, &block_store_64bpp, - &block_store_64bpp_merge); - builder_->setBuildPoint(&block_store_64bpp); - id_vector_temp_.back() = builder_->createBinOp( - spv::OpIAdd, type_int_, id_vector_temp_.back(), - fsi_const_int_1); - builder_->createStore(color_packed[1], - builder_->createAccessChain( - features_.spirv_version >= spv::Spv_1_3 - ? spv::StorageClassStorageBuffer - : spv::StorageClassUniform, - buffer_edram_, id_vector_temp_)); - builder_->createBranch(&block_store_64bpp_merge); - builder_->setBuildPoint(&block_store_64bpp_merge); - builder_->createBranch(&block_sample_covered_merge); - builder_->setBuildPoint(&block_sample_covered_merge); + SpirvBuilder::IfBuilder if_64bpp( + rt_is_64bpp, spv::SelectionControlDontFlattenMask, *builder_); + { + id_vector_temp_.back() = builder_->createBinOp( + spv::OpIAdd, type_int_, id_vector_temp_.back(), + fsi_const_int_1); + builder_->createStore( + color_packed[1], builder_->createAccessChain( + features_.spirv_version >= spv::Spv_1_3 + ? spv::StorageClassStorageBuffer + : spv::StorageClassUniform, + buffer_edram_, id_vector_temp_)); + } + if_64bpp.makeEndIf(); + if_sample_covered.makeEndIf(); } - builder_->createBranch(&rt_keep_mask_not_empty_if_merge); } - - builder_->setBuildPoint(&rt_keep_mask_not_empty_if_merge); - builder_->createBranch(&rt_blend_enabled_merge); + if_rt_keep_mask_not_empty.makeEndIf(); } + if_rt_blend_enabled.makeEndIf(); - builder_->setBuildPoint(&rt_blend_enabled_merge); - builder_->createBranch(&rt_write_mask_not_empty_if_merge); - builder_->setBuildPoint(&rt_write_mask_not_empty_if_merge); - builder_->createBranch(&fsi_color_written_if_merge); - builder_->setBuildPoint(&fsi_color_written_if_merge); + if_rt_write_mask_not_empty.makeEndIf(); + if_fsi_color_written.makeEndIf(); } else { // Convert to gamma space - this is incorrect, since it must be done // after blending on the Xbox 360, but this is just one of many blending @@ -1405,24 +1267,11 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { builder_->makeUintConstant(kSysFlag_ConvertColor0ToGamma << color_target_index)), const_uint_0_); - spv::Block& block_gamma_head = *builder_->getBuildPoint(); - spv::Block& block_gamma = builder_->makeNewBlock(); - spv::Block& block_gamma_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_gamma_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(is_gamma, &block_gamma, - &block_gamma_merge); - builder_->setBuildPoint(&block_gamma); + SpirvBuilder::IfBuilder if_gamma( + is_gamma, spv::SelectionControlDontFlattenMask, *builder_); spv::Id color_rgb_gamma = LinearToPWLGamma(color_rgb, false); - builder_->createBranch(&block_gamma_merge); - builder_->setBuildPoint(&block_gamma_merge); - id_vector_temp_.clear(); - id_vector_temp_.push_back(color_rgb_gamma); - id_vector_temp_.push_back(block_gamma.getId()); - id_vector_temp_.push_back(color_rgb); - id_vector_temp_.push_back(block_gamma_head.getId()); - color_rgb = - builder_->createOp(spv::OpPhi, type_float3_, id_vector_temp_); + if_gamma.makeEndIf(); + color_rgb = if_gamma.createMergePhi(color_rgb_gamma, color_rgb); { std::unique_ptr color_rgba_shuffle_op = std::make_unique( @@ -1752,15 +1601,8 @@ void SpirvShaderTranslator::FSI_DepthStencilTest( spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, builder_->makeUintConstant(kSysFlag_FSIDepthStencil)), const_uint_0_); - spv::Block& block_depth_stencil_enabled_head = *builder_->getBuildPoint(); - spv::Block& block_depth_stencil_enabled = builder_->makeNewBlock(); - spv::Block& block_depth_stencil_enabled_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_depth_stencil_enabled_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(depth_stencil_enabled, - &block_depth_stencil_enabled, - &block_depth_stencil_enabled_merge); - builder_->setBuildPoint(&block_depth_stencil_enabled); + SpirvBuilder::IfBuilder if_depth_stencil_enabled( + depth_stencil_enabled, spv::SelectionControlDontFlattenMask, *builder_); // Load the depth in the center of the pixel and calculate the derivatives of // the depth outside non-uniform control flow. @@ -1976,14 +1818,8 @@ void SpirvShaderTranslator::FSI_DepthStencilTest( builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, new_sample_mask, builder_->makeUintConstant(uint32_t(1) << i)), const_uint_0_); - spv::Block& block_sample_covered_head = *builder_->getBuildPoint(); - spv::Block& block_sample_covered = builder_->makeNewBlock(); - spv::Block& block_sample_covered_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_sample_covered_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(sample_covered, &block_sample_covered, - &block_sample_covered_merge); - builder_->setBuildPoint(&block_sample_covered); + SpirvBuilder::IfBuilder if_sample_covered( + sample_covered, spv::SelectionControlDontFlattenMask, *builder_); // Load the original depth and stencil for the sample. spv::Id sample_address = FSI_AddSampleOffset(main_fsi_address_depth_, i); @@ -2074,21 +1910,11 @@ void SpirvShaderTranslator::FSI_DepthStencilTest( const_float_0_, const_float_1_); // Convert the new depth to 24-bit. - spv::Block& block_depth_format_float = builder_->makeNewBlock(); - spv::Block& block_depth_format_unorm = builder_->makeNewBlock(); - spv::Block& block_depth_format_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_depth_format_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch( - depth_is_float24, &block_depth_format_float, &block_depth_format_unorm); - // Float24 case. - builder_->setBuildPoint(&block_depth_format_float); + SpirvBuilder::IfBuilder depth_format_if( + depth_is_float24, spv::SelectionControlDontFlattenMask, *builder_); spv::Id sample_depth_float24 = SpirvShaderTranslator::PreClampedDepthTo20e4( *builder_, sample_depth32, true, false, ext_inst_glsl_std_450_); - builder_->createBranch(&block_depth_format_merge); - spv::Block& block_depth_format_float_end = *builder_->getBuildPoint(); - // Unorm24 case. - builder_->setBuildPoint(&block_depth_format_unorm); + depth_format_if.makeBeginElse(); // Round to the nearest even integer. This seems to be the correct // conversion, adding +0.5 and rounding towards zero results in red instead // of black in the 4D5307E6 clear shader. @@ -2099,17 +1925,10 @@ void SpirvShaderTranslator::FSI_DepthStencilTest( builder_->createNoContractionBinOp( spv::OpFMul, type_float_, sample_depth32, builder_->makeFloatConstant(float(0xFFFFFF))))); - builder_->createBranch(&block_depth_format_merge); - spv::Block& block_depth_format_unorm_end = *builder_->getBuildPoint(); + depth_format_if.makeEndIf(); // Merge between the two formats. - builder_->setBuildPoint(&block_depth_format_merge); - id_vector_temp_.clear(); - id_vector_temp_.push_back(sample_depth_float24); - id_vector_temp_.push_back(block_depth_format_float_end.getId()); - id_vector_temp_.push_back(sample_depth_unorm24); - id_vector_temp_.push_back(block_depth_format_unorm_end.getId()); - spv::Id sample_depth24 = - builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_); + spv::Id sample_depth24 = depth_format_if.createMergePhi( + sample_depth_float24, sample_depth_unorm24); // Perform the depth test. spv::Id old_depth = builder_->createBinOp( @@ -2131,206 +1950,188 @@ void SpirvShaderTranslator::FSI_DepthStencilTest( builder_->createBinOp(spv::OpUGreaterThan, type_bool_, sample_depth24, old_depth))); - // Begin the stencil test. - spv::Block& block_stencil_enabled_head = *builder_->getBuildPoint(); - spv::Block& block_stencil_enabled = builder_->makeNewBlock(); - spv::Block& block_stencil_enabled_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_stencil_enabled_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(stencil_enabled, &block_stencil_enabled, - &block_stencil_enabled_merge); - builder_->setBuildPoint(&block_stencil_enabled); - - // Perform the stencil test. - // The read mask has zeros in the upper bits, applying it to the combined - // stencil and depth will remove the depth part. - spv::Id old_stencil_read_masked = builder_->createBinOp( - spv::OpBitwiseAnd, type_uint_, old_depth_stencil, stencil_read_mask); - spv::Id stencil_passed_if_enabled = builder_->createBinOp( - spv::OpLogicalAnd, type_bool_, stencil_pass_if_less, - builder_->createBinOp(spv::OpULessThan, type_bool_, - stencil_reference_read_masked, - old_stencil_read_masked)); - stencil_passed_if_enabled = builder_->createBinOp( - spv::OpLogicalOr, type_bool_, stencil_passed_if_enabled, - builder_->createBinOp( - spv::OpLogicalAnd, type_bool_, stencil_pass_if_equal, - builder_->createBinOp(spv::OpIEqual, type_bool_, - stencil_reference_read_masked, - old_stencil_read_masked))); - stencil_passed_if_enabled = builder_->createBinOp( - spv::OpLogicalOr, type_bool_, stencil_passed_if_enabled, - builder_->createBinOp( - spv::OpLogicalAnd, type_bool_, stencil_pass_if_greater, - builder_->createBinOp(spv::OpUGreaterThan, type_bool_, - stencil_reference_read_masked, - old_stencil_read_masked))); - spv::Id stencil_op = builder_->createTriOp( - spv::OpBitFieldUExtract, type_uint_, stencil_func_ops, - builder_->createTriOp( - spv::OpSelect, type_uint_, stencil_passed_if_enabled, - builder_->createTriOp(spv::OpSelect, type_uint_, depth_passed, - builder_->makeUintConstant(6), - builder_->makeUintConstant(9)), - builder_->makeUintConstant(3)), - builder_->makeUintConstant(3)); - spv::Block& block_stencil_op_head = *builder_->getBuildPoint(); - spv::Block& block_stencil_op_keep = builder_->makeNewBlock(); - spv::Block& block_stencil_op_zero = builder_->makeNewBlock(); - spv::Block& block_stencil_op_replace = builder_->makeNewBlock(); - spv::Block& block_stencil_op_increment_clamp = builder_->makeNewBlock(); - spv::Block& block_stencil_op_decrement_clamp = builder_->makeNewBlock(); - spv::Block& block_stencil_op_invert = builder_->makeNewBlock(); - spv::Block& block_stencil_op_increment_wrap = builder_->makeNewBlock(); - spv::Block& block_stencil_op_decrement_wrap = builder_->makeNewBlock(); - spv::Block& block_stencil_op_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_stencil_op_merge, - spv::SelectionControlDontFlattenMask); + // Perform the stencil test if enabled. + SpirvBuilder::IfBuilder stencil_if( + stencil_enabled, spv::SelectionControlDontFlattenMask, *builder_); + spv::Id stencil_passed_if_enabled; + spv::Id new_stencil_and_old_depth_if_stencil_enabled; { - std::unique_ptr stencil_op_switch_op = - std::make_unique(spv::OpSwitch); - stencil_op_switch_op->addIdOperand(stencil_op); - // Make keep the default. - stencil_op_switch_op->addIdOperand(block_stencil_op_keep.getId()); - stencil_op_switch_op->addImmediateOperand( - int32_t(xenos::StencilOp::kZero)); - stencil_op_switch_op->addIdOperand(block_stencil_op_zero.getId()); - stencil_op_switch_op->addImmediateOperand( - int32_t(xenos::StencilOp::kReplace)); - stencil_op_switch_op->addIdOperand(block_stencil_op_replace.getId()); - stencil_op_switch_op->addImmediateOperand( - int32_t(xenos::StencilOp::kIncrementClamp)); - stencil_op_switch_op->addIdOperand( - block_stencil_op_increment_clamp.getId()); - stencil_op_switch_op->addImmediateOperand( - int32_t(xenos::StencilOp::kDecrementClamp)); - stencil_op_switch_op->addIdOperand( - block_stencil_op_decrement_clamp.getId()); - stencil_op_switch_op->addImmediateOperand( - int32_t(xenos::StencilOp::kInvert)); - stencil_op_switch_op->addIdOperand(block_stencil_op_invert.getId()); - stencil_op_switch_op->addImmediateOperand( - int32_t(xenos::StencilOp::kIncrementWrap)); - stencil_op_switch_op->addIdOperand( - block_stencil_op_increment_wrap.getId()); - stencil_op_switch_op->addImmediateOperand( - int32_t(xenos::StencilOp::kDecrementWrap)); - stencil_op_switch_op->addIdOperand( - block_stencil_op_decrement_wrap.getId()); - builder_->getBuildPoint()->addInstruction( - std::move(stencil_op_switch_op)); + // The read mask has zeros in the upper bits, applying it to the combined + // stencil and depth will remove the depth part. + spv::Id old_stencil_read_masked = builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, old_depth_stencil, stencil_read_mask); + stencil_passed_if_enabled = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, stencil_pass_if_less, + builder_->createBinOp(spv::OpULessThan, type_bool_, + stencil_reference_read_masked, + old_stencil_read_masked)); + stencil_passed_if_enabled = builder_->createBinOp( + spv::OpLogicalOr, type_bool_, stencil_passed_if_enabled, + builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, stencil_pass_if_equal, + builder_->createBinOp(spv::OpIEqual, type_bool_, + stencil_reference_read_masked, + old_stencil_read_masked))); + stencil_passed_if_enabled = builder_->createBinOp( + spv::OpLogicalOr, type_bool_, stencil_passed_if_enabled, + builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, stencil_pass_if_greater, + builder_->createBinOp(spv::OpUGreaterThan, type_bool_, + stencil_reference_read_masked, + old_stencil_read_masked))); + spv::Id stencil_op = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, stencil_func_ops, + builder_->createTriOp( + spv::OpSelect, type_uint_, stencil_passed_if_enabled, + builder_->createTriOp(spv::OpSelect, type_uint_, depth_passed, + builder_->makeUintConstant(6), + builder_->makeUintConstant(9)), + builder_->makeUintConstant(3)), + builder_->makeUintConstant(3)); + spv::Block& block_stencil_op_head = *builder_->getBuildPoint(); + spv::Block& block_stencil_op_keep = builder_->makeNewBlock(); + spv::Block& block_stencil_op_zero = builder_->makeNewBlock(); + spv::Block& block_stencil_op_replace = builder_->makeNewBlock(); + spv::Block& block_stencil_op_increment_clamp = builder_->makeNewBlock(); + spv::Block& block_stencil_op_decrement_clamp = builder_->makeNewBlock(); + spv::Block& block_stencil_op_invert = builder_->makeNewBlock(); + spv::Block& block_stencil_op_increment_wrap = builder_->makeNewBlock(); + spv::Block& block_stencil_op_decrement_wrap = builder_->makeNewBlock(); + spv::Block& block_stencil_op_merge = builder_->makeNewBlock(); + builder_->createSelectionMerge(&block_stencil_op_merge, + spv::SelectionControlDontFlattenMask); + { + std::unique_ptr stencil_op_switch_op = + std::make_unique(spv::OpSwitch); + stencil_op_switch_op->addIdOperand(stencil_op); + // Make keep the default. + stencil_op_switch_op->addIdOperand(block_stencil_op_keep.getId()); + stencil_op_switch_op->addImmediateOperand( + int32_t(xenos::StencilOp::kZero)); + stencil_op_switch_op->addIdOperand(block_stencil_op_zero.getId()); + stencil_op_switch_op->addImmediateOperand( + int32_t(xenos::StencilOp::kReplace)); + stencil_op_switch_op->addIdOperand(block_stencil_op_replace.getId()); + stencil_op_switch_op->addImmediateOperand( + int32_t(xenos::StencilOp::kIncrementClamp)); + stencil_op_switch_op->addIdOperand( + block_stencil_op_increment_clamp.getId()); + stencil_op_switch_op->addImmediateOperand( + int32_t(xenos::StencilOp::kDecrementClamp)); + stencil_op_switch_op->addIdOperand( + block_stencil_op_decrement_clamp.getId()); + stencil_op_switch_op->addImmediateOperand( + int32_t(xenos::StencilOp::kInvert)); + stencil_op_switch_op->addIdOperand(block_stencil_op_invert.getId()); + stencil_op_switch_op->addImmediateOperand( + int32_t(xenos::StencilOp::kIncrementWrap)); + stencil_op_switch_op->addIdOperand( + block_stencil_op_increment_wrap.getId()); + stencil_op_switch_op->addImmediateOperand( + int32_t(xenos::StencilOp::kDecrementWrap)); + stencil_op_switch_op->addIdOperand( + block_stencil_op_decrement_wrap.getId()); + builder_->getBuildPoint()->addInstruction( + std::move(stencil_op_switch_op)); + } + block_stencil_op_keep.addPredecessor(&block_stencil_op_head); + block_stencil_op_zero.addPredecessor(&block_stencil_op_head); + block_stencil_op_replace.addPredecessor(&block_stencil_op_head); + block_stencil_op_increment_clamp.addPredecessor(&block_stencil_op_head); + block_stencil_op_decrement_clamp.addPredecessor(&block_stencil_op_head); + block_stencil_op_invert.addPredecessor(&block_stencil_op_head); + block_stencil_op_increment_wrap.addPredecessor(&block_stencil_op_head); + block_stencil_op_decrement_wrap.addPredecessor(&block_stencil_op_head); + // Keep - will use the old stencil in the phi. + builder_->setBuildPoint(&block_stencil_op_keep); + builder_->createBranch(&block_stencil_op_merge); + // Zero - will use the zero constant in the phi. + builder_->setBuildPoint(&block_stencil_op_zero); + builder_->createBranch(&block_stencil_op_merge); + // Replace - will use the stencil reference in the phi. + builder_->setBuildPoint(&block_stencil_op_replace); + builder_->createBranch(&block_stencil_op_merge); + // Increment and clamp. + builder_->setBuildPoint(&block_stencil_op_increment_clamp); + spv::Id new_stencil_in_low_bits_increment_clamp = builder_->createBinOp( + spv::OpIAdd, type_uint_, + builder_->createBinBuiltinCall( + type_uint_, ext_inst_glsl_std_450_, GLSLstd450UMin, + builder_->makeUintConstant(UINT8_MAX - 1), + builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, + old_depth_stencil, + builder_->makeUintConstant(UINT8_MAX))), + const_uint_1); + builder_->createBranch(&block_stencil_op_merge); + // Decrement and clamp. + builder_->setBuildPoint(&block_stencil_op_decrement_clamp); + spv::Id new_stencil_in_low_bits_decrement_clamp = builder_->createBinOp( + spv::OpISub, type_uint_, + builder_->createBinBuiltinCall( + type_uint_, ext_inst_glsl_std_450_, GLSLstd450UMax, const_uint_1, + builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, + old_depth_stencil, + builder_->makeUintConstant(UINT8_MAX))), + const_uint_1); + builder_->createBranch(&block_stencil_op_merge); + // Invert. + builder_->setBuildPoint(&block_stencil_op_invert); + spv::Id new_stencil_in_low_bits_invert = + builder_->createUnaryOp(spv::OpNot, type_uint_, old_depth_stencil); + builder_->createBranch(&block_stencil_op_merge); + // Increment and wrap. + // The upper bits containing the old depth have no effect on the behavior. + builder_->setBuildPoint(&block_stencil_op_increment_wrap); + spv::Id new_stencil_in_low_bits_increment_wrap = builder_->createBinOp( + spv::OpIAdd, type_uint_, old_depth_stencil, const_uint_1); + builder_->createBranch(&block_stencil_op_merge); + // Decrement and wrap. + // The upper bits containing the old depth have no effect on the behavior. + builder_->setBuildPoint(&block_stencil_op_decrement_wrap); + spv::Id new_stencil_in_low_bits_decrement_wrap = builder_->createBinOp( + spv::OpISub, type_uint_, old_depth_stencil, const_uint_1); + builder_->createBranch(&block_stencil_op_merge); + // Select the new stencil (with undefined data in bits starting from 8) + // based on the stencil operation. + builder_->setBuildPoint(&block_stencil_op_merge); + id_vector_temp_.clear(); + id_vector_temp_.reserve(2 * 8); + id_vector_temp_.push_back(old_depth_stencil); + id_vector_temp_.push_back(block_stencil_op_keep.getId()); + id_vector_temp_.push_back(const_uint_0_); + id_vector_temp_.push_back(block_stencil_op_zero.getId()); + id_vector_temp_.push_back(stencil_reference); + id_vector_temp_.push_back(block_stencil_op_replace.getId()); + id_vector_temp_.push_back(new_stencil_in_low_bits_increment_clamp); + id_vector_temp_.push_back(block_stencil_op_increment_clamp.getId()); + id_vector_temp_.push_back(new_stencil_in_low_bits_decrement_clamp); + id_vector_temp_.push_back(block_stencil_op_decrement_clamp.getId()); + id_vector_temp_.push_back(new_stencil_in_low_bits_invert); + id_vector_temp_.push_back(block_stencil_op_invert.getId()); + id_vector_temp_.push_back(new_stencil_in_low_bits_increment_wrap); + id_vector_temp_.push_back(block_stencil_op_increment_wrap.getId()); + id_vector_temp_.push_back(new_stencil_in_low_bits_decrement_wrap); + id_vector_temp_.push_back(block_stencil_op_decrement_wrap.getId()); + spv::Id new_stencil_in_low_bits_if_enabled = + builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_); + // Merge the old depth / stencil (old depth kept from the old depth / + // stencil so the separate old depth register is not needed anymore after + // the depth test) and the new stencil based on the write mask. + new_stencil_and_old_depth_if_stencil_enabled = builder_->createBinOp( + spv::OpBitwiseOr, type_uint_, + builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, + old_depth_stencil, stencil_write_keep_mask), + builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, + new_stencil_in_low_bits_if_enabled, + stencil_write_mask)); } - block_stencil_op_keep.addPredecessor(&block_stencil_op_head); - block_stencil_op_zero.addPredecessor(&block_stencil_op_head); - block_stencil_op_replace.addPredecessor(&block_stencil_op_head); - block_stencil_op_increment_clamp.addPredecessor(&block_stencil_op_head); - block_stencil_op_decrement_clamp.addPredecessor(&block_stencil_op_head); - block_stencil_op_invert.addPredecessor(&block_stencil_op_head); - block_stencil_op_increment_wrap.addPredecessor(&block_stencil_op_head); - block_stencil_op_decrement_wrap.addPredecessor(&block_stencil_op_head); - // Keep - will use the old stencil in the phi. - builder_->setBuildPoint(&block_stencil_op_keep); - builder_->createBranch(&block_stencil_op_merge); - // Zero - will use the zero constant in the phi. - builder_->setBuildPoint(&block_stencil_op_zero); - builder_->createBranch(&block_stencil_op_merge); - // Replace - will use the stencil reference in the phi. - builder_->setBuildPoint(&block_stencil_op_replace); - builder_->createBranch(&block_stencil_op_merge); - // Increment and clamp. - builder_->setBuildPoint(&block_stencil_op_increment_clamp); - spv::Id new_stencil_in_low_bits_increment_clamp = builder_->createBinOp( - spv::OpIAdd, type_uint_, - builder_->createBinBuiltinCall( - type_uint_, ext_inst_glsl_std_450_, GLSLstd450UMin, - builder_->makeUintConstant(UINT8_MAX - 1), - builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, - old_depth_stencil, - builder_->makeUintConstant(UINT8_MAX))), - const_uint_1); - builder_->createBranch(&block_stencil_op_merge); - // Decrement and clamp. - builder_->setBuildPoint(&block_stencil_op_decrement_clamp); - spv::Id new_stencil_in_low_bits_decrement_clamp = builder_->createBinOp( - spv::OpISub, type_uint_, - builder_->createBinBuiltinCall( - type_uint_, ext_inst_glsl_std_450_, GLSLstd450UMax, const_uint_1, - builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, - old_depth_stencil, - builder_->makeUintConstant(UINT8_MAX))), - const_uint_1); - builder_->createBranch(&block_stencil_op_merge); - // Invert. - builder_->setBuildPoint(&block_stencil_op_invert); - spv::Id new_stencil_in_low_bits_invert = - builder_->createUnaryOp(spv::OpNot, type_uint_, old_depth_stencil); - builder_->createBranch(&block_stencil_op_merge); - // Increment and wrap. - // The upper bits containing the old depth have no effect on the behavior. - builder_->setBuildPoint(&block_stencil_op_increment_wrap); - spv::Id new_stencil_in_low_bits_increment_wrap = builder_->createBinOp( - spv::OpIAdd, type_uint_, old_depth_stencil, const_uint_1); - builder_->createBranch(&block_stencil_op_merge); - // Decrement and wrap. - // The upper bits containing the old depth have no effect on the behavior. - builder_->setBuildPoint(&block_stencil_op_decrement_wrap); - spv::Id new_stencil_in_low_bits_decrement_wrap = builder_->createBinOp( - spv::OpISub, type_uint_, old_depth_stencil, const_uint_1); - builder_->createBranch(&block_stencil_op_merge); - // Select the new stencil (with undefined data in bits starting from 8) - // based on the stencil operation. - builder_->setBuildPoint(&block_stencil_op_merge); - id_vector_temp_.clear(); - id_vector_temp_.reserve(2 * 8); - id_vector_temp_.push_back(old_depth_stencil); - id_vector_temp_.push_back(block_stencil_op_keep.getId()); - id_vector_temp_.push_back(const_uint_0_); - id_vector_temp_.push_back(block_stencil_op_zero.getId()); - id_vector_temp_.push_back(stencil_reference); - id_vector_temp_.push_back(block_stencil_op_replace.getId()); - id_vector_temp_.push_back(new_stencil_in_low_bits_increment_clamp); - id_vector_temp_.push_back(block_stencil_op_increment_clamp.getId()); - id_vector_temp_.push_back(new_stencil_in_low_bits_decrement_clamp); - id_vector_temp_.push_back(block_stencil_op_decrement_clamp.getId()); - id_vector_temp_.push_back(new_stencil_in_low_bits_invert); - id_vector_temp_.push_back(block_stencil_op_invert.getId()); - id_vector_temp_.push_back(new_stencil_in_low_bits_increment_wrap); - id_vector_temp_.push_back(block_stencil_op_increment_wrap.getId()); - id_vector_temp_.push_back(new_stencil_in_low_bits_decrement_wrap); - id_vector_temp_.push_back(block_stencil_op_decrement_wrap.getId()); - spv::Id new_stencil_in_low_bits_if_enabled = - builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_); - // Merge the old depth / stencil (old depth kept from the old depth / - // stencil so the separate old depth register is not needed anymore after - // the depth test) and the new stencil based on the write mask. - spv::Id new_stencil_and_old_depth_if_stencil_enabled = - builder_->createBinOp( - spv::OpBitwiseOr, type_uint_, - builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, - old_depth_stencil, stencil_write_keep_mask), - builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, - new_stencil_in_low_bits_if_enabled, - stencil_write_mask)); - + stencil_if.makeEndIf(); // Choose the result based on whether the stencil test was done. // All phi operations must be the first in the block. - builder_->createBranch(&block_stencil_enabled_merge); - spv::Block& block_stencil_enabled_end = *builder_->getBuildPoint(); - builder_->setBuildPoint(&block_stencil_enabled_merge); - id_vector_temp_.clear(); - id_vector_temp_.push_back(stencil_passed_if_enabled); - id_vector_temp_.push_back(block_stencil_enabled_end.getId()); - id_vector_temp_.push_back(builder_->makeBoolConstant(true)); - id_vector_temp_.push_back(block_stencil_enabled_head.getId()); - spv::Id stencil_passed = - builder_->createOp(spv::OpPhi, type_bool_, id_vector_temp_); - id_vector_temp_.clear(); - id_vector_temp_.push_back(new_stencil_and_old_depth_if_stencil_enabled); - id_vector_temp_.push_back(block_stencil_enabled_end.getId()); - id_vector_temp_.push_back(old_depth_stencil); - id_vector_temp_.push_back(block_stencil_enabled_head.getId()); - spv::Id new_stencil_and_old_depth = - builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_); + spv::Id stencil_passed = stencil_if.createMergePhi( + stencil_passed_if_enabled, builder_->makeBoolConstant(true)); + spv::Id new_stencil_and_old_depth = stencil_if.createMergePhi( + new_stencil_and_old_depth_if_stencil_enabled, old_depth_stencil); // Check whether the tests have passed, and exclude the bit from the // coverage if not. @@ -2384,37 +2185,19 @@ void SpirvShaderTranslator::FSI_DepthStencilTest( new_depth_stencil_write_condition = new_depth_stencil_different; } if (new_depth_stencil_write_condition != spv::NoResult) { - spv::Block& block_depth_stencil_write = builder_->makeNewBlock(); - spv::Block& block_depth_stencil_write_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_depth_stencil_write_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(new_depth_stencil_write_condition, - &block_depth_stencil_write, - &block_depth_stencil_write_merge); - builder_->setBuildPoint(&block_depth_stencil_write); + SpirvBuilder::IfBuilder new_depth_stencil_write_if( + new_depth_stencil_write_condition, + spv::SelectionControlDontFlattenMask, *builder_); builder_->createStore(new_depth_stencil, sample_access_chain); - builder_->createBranch(&block_depth_stencil_write_merge); - builder_->setBuildPoint(&block_depth_stencil_write_merge); + new_depth_stencil_write_if.makeEndIf(); } - builder_->createBranch(&block_sample_covered_merge); - spv::Block& block_sample_covered_end = *builder_->getBuildPoint(); - builder_->setBuildPoint(&block_sample_covered_merge); - id_vector_temp_.clear(); - id_vector_temp_.push_back(new_sample_mask_after_sample); - id_vector_temp_.push_back(block_sample_covered_end.getId()); - id_vector_temp_.push_back(new_sample_mask); - id_vector_temp_.push_back(block_sample_covered_head.getId()); - new_sample_mask = - builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_); + if_sample_covered.makeEndIf(); + new_sample_mask = if_sample_covered.createMergePhi( + new_sample_mask_after_sample, new_sample_mask); if (is_early) { - id_vector_temp_.clear(); - id_vector_temp_.push_back(new_depth_stencil); - id_vector_temp_.push_back(block_sample_covered_end.getId()); - id_vector_temp_.push_back(const_uint_0_); - id_vector_temp_.push_back(block_sample_covered_head.getId()); late_write_depth_stencil[i] = - builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_); + if_sample_covered.createMergePhi(new_depth_stencil, const_uint_0_); } } @@ -2442,25 +2225,14 @@ void SpirvShaderTranslator::FSI_DepthStencilTest( } } } - builder_->createBranch(&block_depth_stencil_enabled_merge); - spv::Block& block_depth_stencil_enabled_end = *builder_->getBuildPoint(); - builder_->setBuildPoint(&block_depth_stencil_enabled_merge); - id_vector_temp_.clear(); - id_vector_temp_.push_back(new_sample_mask); - id_vector_temp_.push_back(block_depth_stencil_enabled_end.getId()); - id_vector_temp_.push_back(main_fsi_sample_mask_); - id_vector_temp_.push_back(block_depth_stencil_enabled_head.getId()); - main_fsi_sample_mask_ = - builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_); + if_depth_stencil_enabled.makeEndIf(); + main_fsi_sample_mask_ = if_depth_stencil_enabled.createMergePhi( + new_sample_mask, main_fsi_sample_mask_); if (is_early) { for (uint32_t i = 0; i < 4; ++i) { - id_vector_temp_.clear(); - id_vector_temp_.push_back(late_write_depth_stencil[i]); - id_vector_temp_.push_back(block_depth_stencil_enabled_end.getId()); - id_vector_temp_.push_back(const_uint_0_); - id_vector_temp_.push_back(block_depth_stencil_enabled_head.getId()); main_fsi_late_write_depth_stencil_[i] = - builder_->createOp(spv::OpPhi, type_uint_, id_vector_temp_); + if_depth_stencil_enabled.createMergePhi(late_write_depth_stencil[i], + const_uint_0_); } } } @@ -3160,32 +2932,25 @@ spv::Id SpirvShaderTranslator::FSI_FlushNaNClampAndInBlending( assert_true(builder_->getTypeId(min_value) == color_or_alpha_type); assert_true(builder_->getTypeId(max_value) == color_or_alpha_type); - spv::Block& block_is_fixed_point_head = *builder_->getBuildPoint(); - spv::Block& block_is_fixed_point_if = builder_->makeNewBlock(); - spv::Block& block_is_fixed_point_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_is_fixed_point_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(is_fixed_point, &block_is_fixed_point_if, - &block_is_fixed_point_merge); - builder_->setBuildPoint(&block_is_fixed_point_if); - // Flush NaN to 0 even for signed (NMax would flush it to the minimum value). - spv::Id color_or_alpha_clamped = builder_->createTriBuiltinCall( - color_or_alpha_type, ext_inst_glsl_std_450_, GLSLstd450FClamp, - builder_->createTriOp( - spv::OpSelect, color_or_alpha_type, - builder_->createUnaryOp(spv::OpIsNan, - type_bool_vectors_[component_count - 1], - color_or_alpha), - const_float_vectors_0_[component_count - 1], color_or_alpha), - min_value, max_value); - builder_->createBranch(&block_is_fixed_point_merge); - builder_->setBuildPoint(&block_is_fixed_point_merge); - id_vector_temp_.clear(); - id_vector_temp_.push_back(color_or_alpha_clamped); - id_vector_temp_.push_back(block_is_fixed_point_if.getId()); - id_vector_temp_.push_back(color_or_alpha); - id_vector_temp_.push_back(block_is_fixed_point_head.getId()); - return builder_->createOp(spv::OpPhi, color_or_alpha_type, id_vector_temp_); + SpirvBuilder::IfBuilder if_fixed_point( + is_fixed_point, spv::SelectionControlDontFlattenMask, *builder_); + spv::Id color_or_alpha_clamped; + { + // Flush NaN to 0 even for signed (NMax would flush it to the minimum + // value). + color_or_alpha_clamped = builder_->createTriBuiltinCall( + color_or_alpha_type, ext_inst_glsl_std_450_, GLSLstd450FClamp, + builder_->createTriOp( + spv::OpSelect, color_or_alpha_type, + builder_->createUnaryOp(spv::OpIsNan, + type_bool_vectors_[component_count - 1], + color_or_alpha), + const_float_vectors_0_[component_count - 1], color_or_alpha), + min_value, max_value); + } + if_fixed_point.makeEndIf(); + + return if_fixed_point.createMergePhi(color_or_alpha_clamped, color_or_alpha); } spv::Id SpirvShaderTranslator::FSI_ApplyColorBlendFactor( @@ -3197,21 +2962,14 @@ spv::Id SpirvShaderTranslator::FSI_ApplyColorBlendFactor( // infinity and NaN are not potentially involved in the multiplication. // Calculate the condition before the selection merge, which must be the // penultimate instruction in the block. - spv::Id factor_not_zero = builder_->createBinOp( - spv::OpINotEqual, type_bool_, factor, - builder_->makeUintConstant(uint32_t(xenos::BlendFactor::kZero))); - spv::Block& block_not_zero_head = *builder_->getBuildPoint(); - spv::Block& block_not_zero_if = builder_->makeNewBlock(); - spv::Block& block_not_zero_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_not_zero_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(factor_not_zero, &block_not_zero_if, - &block_not_zero_merge); + SpirvBuilder::IfBuilder factor_not_zero_if( + builder_->createBinOp( + spv::OpINotEqual, type_bool_, factor, + builder_->makeUintConstant(uint32_t(xenos::BlendFactor::kZero))), + spv::SelectionControlDontFlattenMask, *builder_); // Non-zero factor case. - builder_->setBuildPoint(&block_not_zero_if); - spv::Block& block_factor_head = *builder_->getBuildPoint(); spv::Block& block_factor_one = builder_->makeNewBlock(); std::array color_factor_blocks; @@ -3386,18 +3144,11 @@ spv::Id SpirvShaderTranslator::FSI_ApplyColorBlendFactor( builder_->createOp(spv::OpPhi, type_float3_, id_vector_temp_); spv::Id result = FSI_FlushNaNClampAndInBlending( result_unclamped, is_fixed_point, clamp_min_value, clamp_max_value); - builder_->createBranch(&block_not_zero_merge); - // Get the latest block for a non-zero factor after all the control flow. - spv::Block& block_not_zero_if_end = *builder_->getBuildPoint(); + + factor_not_zero_if.makeEndIf(); // Make the result zero if the factor is zero. - builder_->setBuildPoint(&block_not_zero_merge); - id_vector_temp_.clear(); - id_vector_temp_.push_back(result); - id_vector_temp_.push_back(block_not_zero_if_end.getId()); - id_vector_temp_.push_back(const_float3_0_); - id_vector_temp_.push_back(block_not_zero_head.getId()); - return builder_->createOp(spv::OpPhi, type_float3_, id_vector_temp_); + return factor_not_zero_if.createMergePhi(result, const_float3_0_); } spv::Id SpirvShaderTranslator::FSI_ApplyAlphaBlendFactor( @@ -3408,21 +3159,14 @@ spv::Id SpirvShaderTranslator::FSI_ApplyAlphaBlendFactor( // infinity and NaN are not potentially involved in the multiplication. // Calculate the condition before the selection merge, which must be the // penultimate instruction in the block. - spv::Id factor_not_zero = builder_->createBinOp( - spv::OpINotEqual, type_bool_, factor, - builder_->makeUintConstant(uint32_t(xenos::BlendFactor::kZero))); - spv::Block& block_not_zero_head = *builder_->getBuildPoint(); - spv::Block& block_not_zero_if = builder_->makeNewBlock(); - spv::Block& block_not_zero_merge = builder_->makeNewBlock(); - builder_->createSelectionMerge(&block_not_zero_merge, - spv::SelectionControlDontFlattenMask); - builder_->createConditionalBranch(factor_not_zero, &block_not_zero_if, - &block_not_zero_merge); + SpirvBuilder::IfBuilder factor_not_zero_if( + builder_->createBinOp( + spv::OpINotEqual, type_bool_, factor, + builder_->makeUintConstant(uint32_t(xenos::BlendFactor::kZero))), + spv::SelectionControlDontFlattenMask, *builder_); // Non-zero factor case. - builder_->setBuildPoint(&block_not_zero_if); - spv::Block& block_factor_head = *builder_->getBuildPoint(); spv::Block& block_factor_one = builder_->makeNewBlock(); std::array alpha_factor_blocks; @@ -3557,18 +3301,11 @@ spv::Id SpirvShaderTranslator::FSI_ApplyAlphaBlendFactor( builder_->createOp(spv::OpPhi, type_float_, id_vector_temp_); spv::Id result = FSI_FlushNaNClampAndInBlending( result_unclamped, is_fixed_point, clamp_min_value, clamp_max_value); - builder_->createBranch(&block_not_zero_merge); - // Get the latest block for a non-zero factor after all the control flow. - spv::Block& block_not_zero_if_end = *builder_->getBuildPoint(); + + factor_not_zero_if.makeEndIf(); // Make the result zero if the factor is zero. - builder_->setBuildPoint(&block_not_zero_merge); - id_vector_temp_.clear(); - id_vector_temp_.push_back(result); - id_vector_temp_.push_back(block_not_zero_if_end.getId()); - id_vector_temp_.push_back(const_float_0_); - id_vector_temp_.push_back(block_not_zero_head.getId()); - return builder_->createOp(spv::OpPhi, type_float_, id_vector_temp_); + return factor_not_zero_if.createMergePhi(result, const_float_0_); } spv::Id SpirvShaderTranslator::FSI_BlendColorOrAlphaWithUnclampedResult( diff --git a/src/xenia/gpu/texture_cache.cc b/src/xenia/gpu/texture_cache.cc index 14af42d0d..c36484df1 100644 --- a/src/xenia/gpu/texture_cache.cc +++ b/src/xenia/gpu/texture_cache.cc @@ -330,8 +330,7 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) { uint32_t index_bit = UINT32_C(1) << index; textures_remaining = xe::clear_lowest_bit(textures_remaining); TextureBinding& binding = texture_bindings_[index]; - const auto& fetch = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6); + xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(index); TextureKey old_key = binding.key; uint8_t old_swizzled_signs = binding.swizzled_signs; BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzled_signs); diff --git a/src/xenia/gpu/trace_viewer.cc b/src/xenia/gpu/trace_viewer.cc index a527edf35..c4b354e8a 100644 --- a/src/xenia/gpu/trace_viewer.cc +++ b/src/xenia/gpu/trace_viewer.cc @@ -19,6 +19,7 @@ #include "xenia/base/filesystem.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" +#include "xenia/base/memory.h" #include "xenia/base/platform.h" #include "xenia/base/string.h" #include "xenia/base/system.h" @@ -354,9 +355,10 @@ void TraceViewer::DrawPacketDisassemblerUI() { ImGui::NextColumn(); if (!register_info || register_info->type == RegisterInfo::Type::kDword) { - ImGui::Text("%.8X", action.register_write.value.u32); + ImGui::Text("%.8X", action.register_write.value); } else { - ImGui::Text("%8f", action.register_write.value.f32); + ImGui::Text("%8f", xe::memory::Reinterpret( + action.register_write.value)); } ImGui::Columns(1); break; @@ -706,10 +708,8 @@ void TraceViewer::DrawTextureInfo( const Shader::TextureBinding& texture_binding) { auto& regs = *graphics_system_->register_file(); - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + - texture_binding.fetch_constant * 6; - auto group = reinterpret_cast(®s.values[r]); - auto& fetch = group->texture_fetch; + xenos::xe_gpu_texture_fetch_t fetch = + regs.GetTextureFetch(texture_binding.fetch_constant); if (fetch.type != xenos::FetchConstantType::kTexture && (!cvars::gpu_allow_invalid_fetch_constants || fetch.type != xenos::FetchConstantType::kInvalidTexture)) { @@ -777,9 +777,9 @@ void TraceViewer::DrawFailedTextureInfo( void TraceViewer::DrawVertexFetcher(Shader* shader, const Shader::VertexBinding& vertex_binding, - const xe_gpu_vertex_fetch_t* fetch) { - const uint8_t* addr = memory_->TranslatePhysical(fetch->address << 2); - uint32_t vertex_count = fetch->size / vertex_binding.stride_words; + const xe_gpu_vertex_fetch_t& fetch) { + const uint8_t* addr = memory_->TranslatePhysical(fetch.address << 2); + uint32_t vertex_count = fetch.size / vertex_binding.stride_words; int column_count = 0; for (const auto& attrib : vertex_binding.attributes) { switch (attrib.fetch_instr.attributes.data_format) { @@ -880,7 +880,7 @@ void TraceViewer::DrawVertexFetcher(Shader* shader, #define LOADEL(type, wo) \ GpuSwap(xe::load(vstart + \ (attrib.fetch_instr.attributes.offset + wo) * 4), \ - fetch->endian) + fetch.endian) switch (attrib.fetch_instr.attributes.data_format) { case xenos::VertexFormat::k_32: ImGui::Text("%.8X", LOADEL(uint32_t, 0)); @@ -1062,7 +1062,7 @@ void ProgressBar(float frac, float width, float height = 0, if (height == 0) { height = ImGui::GetTextLineHeightWithSpacing(); } - frac = xe::saturate_unsigned(frac); + frac = xe::saturate(frac); auto pos = ImGui::GetCursorScreenPos(); auto col = ImGui::ColorConvertFloat4ToU32(color); @@ -1180,7 +1180,7 @@ void TraceViewer::DrawStateUI() { } auto enable_mode = - static_cast(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); + static_cast(regs[XE_GPU_REG_RB_MODECONTROL] & 0x7); switch (enable_mode) { case ModeControl::kIgnore: @@ -1202,7 +1202,7 @@ void TraceViewer::DrawStateUI() { break; } case ModeControl::kCopy: { - uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32; + uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE]; ImGui::Text("Copy Command %d (to %.8X)", player_->current_command_index(), copy_dest_base); break; @@ -1213,9 +1213,9 @@ void TraceViewer::DrawStateUI() { ImGui::BulletText("Viewport State:"); if (true) { ImGui::TreePush((const void*)0); - uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; + uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL]; if ((pa_su_sc_mode_cntl >> 16) & 1) { - uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; + uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET]; int16_t window_offset_x = window_offset & 0x7FFF; int16_t window_offset_y = (window_offset >> 16) & 0x7FFF; if (window_offset_x & 0x4000) { @@ -1229,8 +1229,8 @@ void TraceViewer::DrawStateUI() { } else { ImGui::BulletText("Window Offset: disabled"); } - uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; - uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; + uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL]; + uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR]; ImGui::BulletText( "Window Scissor: %d,%d to %d,%d (%d x %d)", window_scissor_tl & 0x7FFF, (window_scissor_tl >> 16) & 0x7FFF, window_scissor_br & 0x7FFF, @@ -1238,7 +1238,7 @@ void TraceViewer::DrawStateUI() { (window_scissor_br & 0x7FFF) - (window_scissor_tl & 0x7FFF), ((window_scissor_br >> 16) & 0x7FFF) - ((window_scissor_tl >> 16) & 0x7FFF)); - uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; + uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO]; uint32_t surface_hiz = (surface_info >> 18) & 0x3FFF; uint32_t surface_pitch = surface_info & 0x3FFF; auto surface_msaa = (surface_info >> 16) & 0x3; @@ -1250,7 +1250,7 @@ void TraceViewer::DrawStateUI() { ImGui::BulletText("Surface Pitch: %d", surface_pitch); ImGui::BulletText("Surface HI-Z Pitch: %d", surface_hiz); ImGui::BulletText("Surface MSAA: %s", kMsaaNames[surface_msaa]); - uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; + uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL]; bool vport_xscale_enable = (vte_control & (1 << 0)) > 0; bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0; bool vport_yscale_enable = (vte_control & (1 << 2)) > 0; @@ -1265,14 +1265,20 @@ void TraceViewer::DrawStateUI() { } ImGui::BulletText( "Viewport Offset: %f, %f, %f", - vport_xoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : 0, - vport_yoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : 0, - vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0); + vport_xoffset_enable ? regs.Get(XE_GPU_REG_PA_CL_VPORT_XOFFSET) + : 0.0f, + vport_yoffset_enable ? regs.Get(XE_GPU_REG_PA_CL_VPORT_YOFFSET) + : 0.0f, + vport_zoffset_enable ? regs.Get(XE_GPU_REG_PA_CL_VPORT_ZOFFSET) + : 0.0f); ImGui::BulletText( "Viewport Scale: %f, %f, %f", - vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1, - vport_yscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1, - vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1); + vport_xscale_enable ? regs.Get(XE_GPU_REG_PA_CL_VPORT_XSCALE) + : 1.0f, + vport_yscale_enable ? regs.Get(XE_GPU_REG_PA_CL_VPORT_YSCALE) + : 1.0f, + vport_zscale_enable ? regs.Get(XE_GPU_REG_PA_CL_VPORT_ZSCALE) + : 1.0f); if (!vport_xscale_enable) { ImGui::PopStyleColor(); } @@ -1282,7 +1288,7 @@ void TraceViewer::DrawStateUI() { ((vte_control >> 8) & 0x1) ? "y/w0" : "y", ((vte_control >> 9) & 0x1) ? "z/w0" : "z", ((vte_control >> 10) & 0x1) ? "w0" : "1/w0"); - uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32; + uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL]; bool clip_enabled = ((clip_control >> 17) & 0x1) == 0; bool dx_clip = ((clip_control >> 20) & 0x1) == 0x1; ImGui::BulletText("Clip Enabled: %s, DX Clip: %s", @@ -1294,11 +1300,9 @@ void TraceViewer::DrawStateUI() { ImGui::BulletText("Rasterizer State:"); if (true) { ImGui::TreePush((const void*)0); - uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; - uint32_t pa_sc_screen_scissor_tl = - regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32; - uint32_t pa_sc_screen_scissor_br = - regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32; + uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL]; + uint32_t pa_sc_screen_scissor_tl = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL]; + uint32_t pa_sc_screen_scissor_br = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR]; if (pa_sc_screen_scissor_tl != 0 && pa_sc_screen_scissor_br != 0x20002000) { int32_t screen_scissor_x = pa_sc_screen_scissor_tl & 0x7FFF; int32_t screen_scissor_y = (pa_sc_screen_scissor_tl >> 16) & 0x7FFF; @@ -1353,7 +1357,7 @@ void TraceViewer::DrawStateUI() { } ImGui::Columns(1); - auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; + auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO]; uint32_t surface_pitch = rb_surface_info & 0x3FFF; auto surface_msaa = static_cast((rb_surface_info >> 16) & 0x3); @@ -1362,39 +1366,39 @@ void TraceViewer::DrawStateUI() { if (enable_mode != ModeControl::kDepth) { // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; - uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL].u32; + uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL]; if ((color_control & 0x8) != 0) { ImGui::BulletText("Alpha Test: %s %.2f", kCompareFuncNames[color_control & 0x7], - regs[XE_GPU_REG_RB_ALPHA_REF].f32); + regs.Get(XE_GPU_REG_RB_ALPHA_REF)); } else { ImGui::PushStyleColor(ImGuiCol_Text, kColorIgnored); ImGui::BulletText("Alpha Test: disabled"); ImGui::PopStyleColor(); } - auto blend_color = ImVec4(regs[XE_GPU_REG_RB_BLEND_RED].f32, - regs[XE_GPU_REG_RB_BLEND_GREEN].f32, - regs[XE_GPU_REG_RB_BLEND_BLUE].f32, - regs[XE_GPU_REG_RB_BLEND_ALPHA].f32); + auto blend_color = ImVec4(regs.Get(XE_GPU_REG_RB_BLEND_RED), + regs.Get(XE_GPU_REG_RB_BLEND_GREEN), + regs.Get(XE_GPU_REG_RB_BLEND_BLUE), + regs.Get(XE_GPU_REG_RB_BLEND_ALPHA)); ImGui::BulletText("Blend Color: (%.2f,%.2f,%.2f,%.2f)", blend_color.x, blend_color.y, blend_color.z, blend_color.w); ImGui::SameLine(); // TODO small_height (was true) parameter was removed ImGui::ColorButton(nullptr, blend_color); - uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; + uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK]; uint32_t color_info[4] = { - regs[XE_GPU_REG_RB_COLOR_INFO].u32, - regs[XE_GPU_REG_RB_COLOR1_INFO].u32, - regs[XE_GPU_REG_RB_COLOR2_INFO].u32, - regs[XE_GPU_REG_RB_COLOR3_INFO].u32, + regs[XE_GPU_REG_RB_COLOR_INFO], + regs[XE_GPU_REG_RB_COLOR1_INFO], + regs[XE_GPU_REG_RB_COLOR2_INFO], + regs[XE_GPU_REG_RB_COLOR3_INFO], }; uint32_t rb_blendcontrol[4] = { - regs[XE_GPU_REG_RB_BLENDCONTROL0].u32, - regs[XE_GPU_REG_RB_BLENDCONTROL1].u32, - regs[XE_GPU_REG_RB_BLENDCONTROL2].u32, - regs[XE_GPU_REG_RB_BLENDCONTROL3].u32, + regs[XE_GPU_REG_RB_BLENDCONTROL0], + regs[XE_GPU_REG_RB_BLENDCONTROL1], + regs[XE_GPU_REG_RB_BLENDCONTROL2], + regs[XE_GPU_REG_RB_BLENDCONTROL3], }; ImGui::Columns(2); for (int i = 0; i < xe::countof(color_info); ++i) { @@ -1503,9 +1507,9 @@ void TraceViewer::DrawStateUI() { } if (ImGui::CollapsingHeader("Depth/Stencil Target")) { - auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; - auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; - auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; + auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL]; + auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK]; + auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO]; bool uses_depth = (rb_depthcontrol & 0x00000002) || (rb_depthcontrol & 0x00000004); uint32_t stencil_ref = (rb_stencilrefmask & 0xFF); @@ -1689,10 +1693,9 @@ void TraceViewer::DrawStateUI() { draw_info.index_buffer_size, kIndexFormatNames[int(draw_info.index_format)], kEndiannessNames[int(draw_info.index_endianness)]); - uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; + uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL]; if (pa_su_sc_mode_cntl & (1 << 21)) { - uint32_t reset_index = - regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32; + uint32_t reset_index = regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX]; if (draw_info.index_format == xenos::IndexFormat::kInt16) { ImGui::Text("Reset Index: %.4X", reset_index & 0xFFFF); } else { @@ -1752,30 +1755,16 @@ void TraceViewer::DrawStateUI() { auto shader = command_processor->active_vertex_shader(); if (shader) { for (const auto& vertex_binding : shader->vertex_bindings()) { - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + - (vertex_binding.fetch_constant / 3) * 6; - const auto group = - reinterpret_cast(®s.values[r]); - const xe_gpu_vertex_fetch_t* fetch = nullptr; - switch (vertex_binding.fetch_constant % 3) { - case 0: - fetch = &group->vertex_fetch_0; - break; - case 1: - fetch = &group->vertex_fetch_1; - break; - case 2: - fetch = &group->vertex_fetch_2; - break; - } - assert_true(fetch->endian == xenos::Endian::k8in32); + xe_gpu_vertex_fetch_t fetch = + regs.GetVertexFetch(vertex_binding.fetch_constant); + assert_true(fetch.endian == xenos::Endian::k8in32); char tree_root_id[32]; sprintf(tree_root_id, "#vertices_root_%d", vertex_binding.fetch_constant); if (ImGui::TreeNode(tree_root_id, "vf%d: 0x%.8X (%db), %s", - vertex_binding.fetch_constant, fetch->address << 2, - fetch->size * 4, - kEndiannessNames[int(fetch->endian)])) { + vertex_binding.fetch_constant, fetch.address << 2, + fetch.size * 4, + kEndiannessNames[int(fetch.endian)])) { ImGui::BeginChild("#vertices", ImVec2(0, 300)); DrawVertexFetcher(shader, vertex_binding, fetch); ImGui::EndChild(); @@ -1823,7 +1812,7 @@ void TraceViewer::DrawStateUI() { ImGui::Text("f%02d_%d", (i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6, (i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) % 6); ImGui::NextColumn(); - ImGui::Text("%.8X", regs[i].u32); + ImGui::Text("%.8X", regs[i]); ImGui::NextColumn(); } ImGui::Columns(1); @@ -1834,8 +1823,9 @@ void TraceViewer::DrawStateUI() { i <= XE_GPU_REG_SHADER_CONSTANT_511_X; i += 4) { ImGui::Text("c%d", (i - XE_GPU_REG_SHADER_CONSTANT_000_X) / 4); ImGui::NextColumn(); - ImGui::Text("%f, %f, %f, %f", regs[i + 0].f32, regs[i + 1].f32, - regs[i + 2].f32, regs[i + 3].f32); + ImGui::Text("%f, %f, %f, %f", regs.Get(i + 0), + regs.Get(i + 1), regs.Get(i + 2), + regs.Get(i + 3)); ImGui::NextColumn(); } ImGui::Columns(1); @@ -1848,7 +1838,7 @@ void TraceViewer::DrawStateUI() { (i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32, (i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32 + 31); ImGui::NextColumn(); - ImGui::Text("%.8X", regs[i].u32); + ImGui::Text("%.8X", regs[i]); ImGui::NextColumn(); } ImGui::Columns(1); @@ -1859,7 +1849,7 @@ void TraceViewer::DrawStateUI() { i <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31; ++i) { ImGui::Text("l%d", i - XE_GPU_REG_SHADER_CONSTANT_LOOP_00); ImGui::NextColumn(); - ImGui::Text("%.8X", regs[i].u32); + ImGui::Text("%.8X", regs[i]); ImGui::NextColumn(); } ImGui::Columns(1); diff --git a/src/xenia/gpu/trace_viewer.h b/src/xenia/gpu/trace_viewer.h index 188a6eb53..e5b7307fb 100644 --- a/src/xenia/gpu/trace_viewer.h +++ b/src/xenia/gpu/trace_viewer.h @@ -123,7 +123,7 @@ class TraceViewer : public xe::ui::WindowedApp { void DrawVertexFetcher(Shader* shader, const Shader::VertexBinding& vertex_binding, - const xenos::xe_gpu_vertex_fetch_t* fetch); + const xenos::xe_gpu_vertex_fetch_t& fetch); TraceViewerWindowListener window_listener_; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 118797a5d..317dd1cb7 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -2177,6 +2177,11 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, return IssueCopy(); } + const ui::vulkan::VulkanProvider::DeviceInfo& device_info = + GetVulkanProvider().device_info(); + + memexport_ranges_.clear(); + // Vertex shader analysis. auto vertex_shader = static_cast(active_vertex_shader()); if (!vertex_shader) { @@ -2184,7 +2189,14 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, return false; } pipeline_cache_->AnalyzeShaderUcode(*vertex_shader); - bool memexport_used_vertex = vertex_shader->memexport_eM_written() != 0; + // TODO(Triang3l): If the shader uses memory export, but + // vertexPipelineStoresAndAtomics is not supported, convert the vertex shader + // to a compute shader and dispatch it after the draw if the draw doesn't use + // tessellation. + if (vertex_shader->memexport_eM_written() != 0 && + device_info.vertexPipelineStoresAndAtomics) { + draw_util::AddMemExportRanges(regs, *vertex_shader, memexport_ranges_); + } // Pixel shader analysis. bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs); @@ -2207,12 +2219,15 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, } else { // Disabling pixel shader for this case is also required by the pipeline // cache. - if (!memexport_used_vertex) { + if (memexport_ranges_.empty()) { // This draw has no effect. return true; } } - // TODO(Triang3l): Memory export. + if (pixel_shader && pixel_shader->memexport_eM_written() != 0 && + device_info.fragmentStoresAndAtomics) { + draw_util::AddMemExportRanges(regs, *pixel_shader, memexport_ranges_); + } uint32_t ps_param_gen_pos = UINT32_MAX; uint32_t interpolator_mask = @@ -2428,9 +2443,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, current_guest_graphics_pipeline_layout_ = pipeline_layout; } - const ui::vulkan::VulkanProvider::DeviceInfo& device_info = - GetVulkanProvider().device_info(); - bool host_render_targets_used = render_target_cache_->GetPath() == RenderTargetCache::Path::kHostRenderTargets; @@ -2503,8 +2515,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, (uint64_t(1) << (vfetch_index & 63))) { continue; } - const auto& vfetch_constant = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); + xenos::xe_gpu_vertex_fetch_t vfetch_constant = + regs.GetVertexFetch(vfetch_index); switch (vfetch_constant.type) { case xenos::FetchConstantType::kVertex: break; @@ -2537,9 +2549,39 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, << (vfetch_index & 63); } + // Synchronize the memory pages backing memory scatter export streams, and + // calculate the range that includes the streams for the buffer barrier. + uint32_t memexport_extent_start = UINT32_MAX, memexport_extent_end = 0; + for (const draw_util::MemExportRange& memexport_range : memexport_ranges_) { + uint32_t memexport_range_base_bytes = memexport_range.base_address_dwords + << 2; + if (!shared_memory_->RequestRange(memexport_range_base_bytes, + memexport_range.size_bytes)) { + XELOGE( + "Failed to request memexport stream at 0x{:08X} (size {}) in the " + "shared memory", + memexport_range_base_bytes, memexport_range.size_bytes); + return false; + } + memexport_extent_start = + std::min(memexport_extent_start, memexport_range_base_bytes); + memexport_extent_end = + std::max(memexport_extent_end, + memexport_range_base_bytes + memexport_range.size_bytes); + } + // Insert the shared memory barrier if needed. - // TODO(Triang3l): Memory export. - shared_memory_->Use(VulkanSharedMemory::Usage::kRead); + // TODO(Triang3l): Find some PM4 command that can be used for indication of + // when memexports should be awaited instead of inserting the barrier in Use + // every time if memory export was done in the previous draw? + if (memexport_extent_start < memexport_extent_end) { + shared_memory_->Use( + VulkanSharedMemory::Usage::kGuestDrawReadWrite, + std::make_pair(memexport_extent_start, + memexport_extent_end - memexport_extent_start)); + } else { + shared_memory_->Use(VulkanSharedMemory::Usage::kRead); + } // After all commands that may dispatch, copy or insert barriers, submit the // barriers (may end the render pass), and (re)enter the render pass before @@ -2584,6 +2626,12 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, primitive_processing_result.host_draw_vertex_count, 1, 0, 0, 0); } + // Invalidate textures in memexported memory and watch for changes. + for (const draw_util::MemExportRange& memexport_range : memexport_ranges_) { + shared_memory_->RangeWrittenByGpu(memexport_range.base_address_dwords << 2, + memexport_range.size_bytes, false); + } + return true; } @@ -3306,10 +3354,10 @@ void VulkanCommandProcessor::UpdateDynamicState( // Blend constants. float blend_constants[] = { - regs[XE_GPU_REG_RB_BLEND_RED].f32, - regs[XE_GPU_REG_RB_BLEND_GREEN].f32, - regs[XE_GPU_REG_RB_BLEND_BLUE].f32, - regs[XE_GPU_REG_RB_BLEND_ALPHA].f32, + regs.Get(XE_GPU_REG_RB_BLEND_RED), + regs.Get(XE_GPU_REG_RB_BLEND_GREEN), + regs.Get(XE_GPU_REG_RB_BLEND_BLUE), + regs.Get(XE_GPU_REG_RB_BLEND_ALPHA), }; dynamic_blend_constants_update_needed_ |= std::memcmp(dynamic_blend_constants_, blend_constants, @@ -3455,7 +3503,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( const RegisterFile& regs = *register_file_; auto pa_cl_vte_cntl = regs.Get(); auto pa_su_sc_mode_cntl = regs.Get(); - float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; + auto rb_alpha_ref = regs.Get(XE_GPU_REG_RB_ALPHA_REF); auto rb_colorcontrol = regs.Get(); auto rb_depth_info = regs.Get(); auto rb_stencilrefmask = regs.Get(); @@ -3463,7 +3511,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( regs.Get(XE_GPU_REG_RB_STENCILREFMASK_BF); auto rb_surface_info = regs.Get(); auto vgt_draw_initiator = regs.Get(); - int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); + auto vgt_indx_offset = regs.Get(XE_GPU_REG_VGT_INDX_OFFSET); bool edram_fragment_shader_interlock = render_target_cache_->GetPath() == @@ -3776,7 +3824,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( dirty |= system_constants_.edram_rt_format_flags[i] != format_flags; system_constants_.edram_rt_format_flags[i] = format_flags; uint32_t blend_factors_ops = - regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF; + regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF; dirty |= system_constants_.edram_rt_blend_factors_ops[i] != blend_factors_ops; system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops; @@ -3805,22 +3853,22 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( if (primitive_polygonal) { if (pa_su_sc_mode_cntl.poly_offset_front_enable) { poly_offset_front_scale = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE); poly_offset_front_offset = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET); } if (pa_su_sc_mode_cntl.poly_offset_back_enable) { poly_offset_back_scale = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE); poly_offset_back_offset = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET); } } else { if (pa_su_sc_mode_cntl.poly_offset_para_enable) { poly_offset_front_scale = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE); poly_offset_front_offset = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; + regs.Get(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET); poly_offset_back_scale = poly_offset_front_scale; poly_offset_back_offset = poly_offset_front_offset; } @@ -3883,21 +3931,21 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( } dirty |= system_constants_.edram_blend_constant[0] != - regs[XE_GPU_REG_RB_BLEND_RED].f32; + regs.Get(XE_GPU_REG_RB_BLEND_RED); system_constants_.edram_blend_constant[0] = - regs[XE_GPU_REG_RB_BLEND_RED].f32; + regs.Get(XE_GPU_REG_RB_BLEND_RED); dirty |= system_constants_.edram_blend_constant[1] != - regs[XE_GPU_REG_RB_BLEND_GREEN].f32; + regs.Get(XE_GPU_REG_RB_BLEND_GREEN); system_constants_.edram_blend_constant[1] = - regs[XE_GPU_REG_RB_BLEND_GREEN].f32; + regs.Get(XE_GPU_REG_RB_BLEND_GREEN); dirty |= system_constants_.edram_blend_constant[2] != - regs[XE_GPU_REG_RB_BLEND_BLUE].f32; + regs.Get(XE_GPU_REG_RB_BLEND_BLUE); system_constants_.edram_blend_constant[2] = - regs[XE_GPU_REG_RB_BLEND_BLUE].f32; + regs.Get(XE_GPU_REG_RB_BLEND_BLUE); dirty |= system_constants_.edram_blend_constant[3] != - regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; + regs.Get(XE_GPU_REG_RB_BLEND_ALPHA); system_constants_.edram_blend_constant[3] = - regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; + regs.Get(XE_GPU_REG_RB_BLEND_ALPHA); } if (dirty) { @@ -3924,10 +3972,10 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, // These are the constant base addresses/ranges for shaders. // We have these hardcoded right now cause nothing seems to differ on the Xbox // 360 (however, OpenGL ES on Adreno 200 on Android has different ranges). - assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 || - regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); - assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || - regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); + assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 || + regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000); + assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 || + regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000); // Check if the float constant layout is still the same and get the counts. const Shader::ConstantRegisterMap& float_constant_map_vertex = vertex_shader->constant_register_map(); @@ -4022,8 +4070,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, float_constant_map_entry &= ~(1ull << float_constant_index); std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + - (float_constant_index << 2)] - .f32, + (float_constant_index << 2)], sizeof(float) * 4); mapping += sizeof(float) * 4; } @@ -4054,8 +4101,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, float_constant_map_entry &= ~(1ull << float_constant_index); std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + - (float_constant_index << 2)] - .f32, + (float_constant_index << 2)], sizeof(float) * 4); mapping += sizeof(float) * 4; } @@ -4076,7 +4122,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, return false; } buffer_info.range = VkDeviceSize(kBoolLoopConstantsSize); - std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, + std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031], kBoolLoopConstantsSize); current_constant_buffers_up_to_date_ |= UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop; @@ -4094,7 +4140,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, return false; } buffer_info.range = VkDeviceSize(kFetchConstantsSize); - std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, + std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0], kFetchConstantsSize); current_constant_buffers_up_to_date_ |= UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 5ebddf604..bd5cfa84f 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -744,6 +744,9 @@ class VulkanCommandProcessor final : public CommandProcessor { // System shader constants. SpirvShaderTranslator::SystemConstants system_constants_; + + // Temporary storage for memexport stream constants used in the draw. + std::vector memexport_ranges_; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index f91cc4e6b..eb2ee9b21 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -718,7 +718,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription( [common_blend_rt_index]), (((normalized_color_mask & ~(uint32_t(0b1111) << (4 * common_blend_rt_index))) - ? regs[XE_GPU_REG_RB_COLOR_MASK].u32 + ? regs[XE_GPU_REG_RB_COLOR_MASK] : normalized_color_mask) >> (4 * common_blend_rt_index)) & 0b1111, diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index bf1cda68d..8f7887b4e 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -4156,21 +4156,16 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( builder.createAccessChain(spv::StorageClassPushConstant, push_constants, id_vector_temp), spv::NoPrecision); - spv::Id stencil_sample_passed = builder.createBinOp( - spv::OpINotEqual, type_bool, - builder.createBinOp(spv::OpBitwiseAnd, type_uint, packed, - stencil_mask_constant), - builder.makeUintConstant(0)); - spv::Block& stencil_bit_kill_block = builder.makeNewBlock(); - spv::Block& stencil_bit_merge_block = builder.makeNewBlock(); - builder.createSelectionMerge(&stencil_bit_merge_block, - spv::SelectionControlMaskNone); - builder.createConditionalBranch(stencil_sample_passed, - &stencil_bit_merge_block, - &stencil_bit_kill_block); - builder.setBuildPoint(&stencil_bit_kill_block); + SpirvBuilder::IfBuilder stencil_kill_if( + builder.createBinOp( + spv::OpIEqual, type_bool, + builder.createBinOp(spv::OpBitwiseAnd, type_uint, packed, + stencil_mask_constant), + builder.makeUintConstant(0)), + spv::SelectionControlMaskNone, builder); builder.createNoResultOp(spv::OpKill); - builder.setBuildPoint(&stencil_bit_merge_block); + // OpKill terminates the block. + stencil_kill_if.makeEndIf(false); } } break; } diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index a10c1d44e..2ce46119b 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -612,8 +612,8 @@ VkImageView VulkanTextureCache::GetActiveBindingOrNullImageView( VulkanTextureCache::SamplerParameters VulkanTextureCache::GetSamplerParameters( const VulkanShader::SamplerBinding& binding) const { const auto& regs = register_file(); - const auto& fetch = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6); + xenos::xe_gpu_texture_fetch_t fetch = + regs.GetTextureFetch(binding.fetch_constant); SamplerParameters parameters; @@ -875,8 +875,7 @@ VkImageView VulkanTextureCache::RequestSwapTexture( uint32_t& width_scaled_out, uint32_t& height_scaled_out, xenos::TextureFormat& format_out) { const auto& regs = register_file(); - const auto& fetch = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0); + xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0); TextureKey key; BindingInfoFromFetchConstant(fetch, key, nullptr); if (!key.is_valid || key.base_page == 0 || diff --git a/src/xenia/gpu/xenos.cc b/src/xenia/gpu/xenos.cc index f1a3dff34..3f86ac256 100644 --- a/src/xenia/gpu/xenos.cc +++ b/src/xenia/gpu/xenos.cc @@ -8,6 +8,7 @@ */ #include "xenia/gpu/xenos.h" +#include "xenia/base/memory.h" namespace xe { namespace gpu { @@ -22,7 +23,7 @@ namespace xenos { float PWLGammaToLinear(float gamma) { // Not found in game executables, so just using the logic similar to that in // the Source Engine. - gamma = xe::saturate_unsigned(gamma); + gamma = xe::saturate(gamma); float scale, offset; // While the compiled code for linear to gamma conversion uses `vcmpgtfp // constant, value` comparison (constant > value, or value < constant), it's @@ -63,7 +64,7 @@ float PWLGammaToLinear(float gamma) { } float LinearToPWLGamma(float linear) { - linear = xe::saturate_unsigned(linear); + linear = xe::saturate(linear); float scale, offset; // While the compiled code uses `vcmpgtfp constant, value` comparison // (constant > value, or value < constant), it's preferable to use `value >= @@ -114,8 +115,8 @@ float Float7e3To32(uint32_t f10) { exponent = uint32_t(1 - int32_t(mantissa_lzcnt)); mantissa = (mantissa << mantissa_lzcnt) & 0x7F; } - uint32_t f32 = ((exponent + 124) << 23) | (mantissa << 3); - return *reinterpret_cast(&f32); + return xe::memory::Reinterpret( + uint32_t(((exponent + 124) << 23) | (mantissa << 3))); } // Based on CFloat24 from d3dref9.dll and the 6e4 code from: @@ -127,7 +128,7 @@ uint32_t Float32To20e4(float f32, bool round_to_nearest_even) noexcept { // Positive only, and not -0 or NaN. return 0; } - uint32_t f32u32 = *reinterpret_cast(&f32); + auto f32u32 = xe::memory::Reinterpret(f32); if (f32u32 >= 0x3FFFFFF8) { // Saturate. return 0xFFFFFF; @@ -161,8 +162,8 @@ float Float20e4To32(uint32_t f24) noexcept { exponent = uint32_t(1 - int32_t(mantissa_lzcnt)); mantissa = (mantissa << mantissa_lzcnt) & 0xFFFFF; } - uint32_t f32 = ((exponent + 112) << 23) | (mantissa << 3); - return *reinterpret_cast(&f32); + return xe::memory::Reinterpret( + uint32_t(((exponent + 112) << 23) | (mantissa << 3))); } const char* GetColorRenderTargetFormatName(ColorRenderTargetFormat format) { @@ -241,4 +242,4 @@ const char* GetPrimitiveTypeEnglishDescription(xenos::PrimitiveType prim_type) { } } // namespace xenos } // namespace gpu -} // namespace xe +} // namespace xe \ No newline at end of file diff --git a/src/xenia/ui/immediate_drawer.cc b/src/xenia/ui/immediate_drawer.cc index fb00be77f..4d3c6bb4e 100644 --- a/src/xenia/ui/immediate_drawer.cc +++ b/src/xenia/ui/immediate_drawer.cc @@ -12,6 +12,7 @@ #include #include "xenia/base/assert.h" +#include "xenia/base/math.h" #include "xenia/ui/graphics_util.h" #include "xenia/ui/presenter.h" @@ -67,24 +68,19 @@ bool ImmediateDrawer::ScissorToRenderTarget(const ImmediateDraw& immediate_draw, } float render_target_width_float = float(render_target_width); float render_target_height_float = float(render_target_height); - // Scale to render target coordinates, drop NaNs (by doing - // std::max(0.0f, variable) in this argument order), and clamp to the render + // Scale to render target coordinates, drop NaNs, and clamp to the render // target size, below which the values are representable as 16p8 fixed-point. float scale_x = render_target_width / coordinate_space_width(); float scale_y = render_target_height / coordinate_space_height(); - float x0_float = - std::min(render_target_width_float, - std::max(0.0f, immediate_draw.scissor_left * scale_x)); - float y0_float = - std::min(render_target_height_float, - std::max(0.0f, immediate_draw.scissor_top * scale_y)); + float x0_float = xe::clamp_float(immediate_draw.scissor_left * scale_x, 0.0f, + render_target_width_float); + float y0_float = xe::clamp_float(immediate_draw.scissor_top * scale_y, 0.0f, + render_target_height_float); // Also make sure the size is non-negative. - float x1_float = - std::min(render_target_width_float, - std::max(x0_float, immediate_draw.scissor_right * scale_x)); - float y1_float = - std::min(render_target_height_float, - std::max(y0_float, immediate_draw.scissor_bottom * scale_y)); + float x1_float = xe::clamp_float(immediate_draw.scissor_right * scale_x, + x0_float, render_target_width_float); + float y1_float = xe::clamp_float(immediate_draw.scissor_bottom * scale_y, + y0_float, render_target_height_float); // Top-left - include .5 (0.128 treated as 0 covered, 0.129 as 0 not covered). int32_t x0 = (FloatToD3D11Fixed16p8(x0_float) + 127) >> 8; int32_t y0 = (FloatToD3D11Fixed16p8(y0_float) + 127) >> 8; diff --git a/src/xenia/ui/window_android.cc b/src/xenia/ui/window_android.cc index d67d478d1..8de82f400 100644 --- a/src/xenia/ui/window_android.cc +++ b/src/xenia/ui/window_android.cc @@ -153,16 +153,16 @@ bool AndroidWindow::OnActivitySurfaceMotionEvent(jobject event) { // with out-of-bounds coordinates), when moving the mouse outside the // View, or when starting moving the mouse when the pointer was previously // outside the View in some cases. - int32_t mouse_x = int32_t( - std::min(float(GetActualPhysicalWidth()), - std::max(0.0f, jni_env->CallFloatMethod( - event, jni_ids.motion_event_get_x, 0))) + - 0.5f); - int32_t mouse_y = int32_t( - std::min(float(GetActualPhysicalHeight()), - std::max(0.0f, jni_env->CallFloatMethod( - event, jni_ids.motion_event_get_y, 0))) + - 0.5f); + int32_t mouse_x = + int32_t(xe::clamp_float(jni_env->CallFloatMethod( + event, jni_ids.motion_event_get_x, 0), + 0.0f, float(GetActualPhysicalWidth())) + + 0.5f); + int32_t mouse_y = + int32_t(xe::clamp_float(jni_env->CallFloatMethod( + event, jni_ids.motion_event_get_y, 0), + 0.0f, float(GetActualPhysicalHeight())) + + 0.5f); static const MouseEvent::Button kMouseEventButtons[] = { MouseEvent::Button::kLeft, MouseEvent::Button::kRight, MouseEvent::Button::kMiddle, MouseEvent::Button::kX1,