/** ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ #include "xenia/gpu/spirv_shader_translator.h" #include #include #include #include #include #include #include #include "third_party/fmt/include/fmt/format.h" #include "third_party/glslang/SPIRV/GLSL.std.450.h" #include "xenia/base/assert.h" #include "xenia/base/math.h" #include "xenia/base/string_buffer.h" #include "xenia/gpu/spirv_shader.h" namespace xe { namespace gpu { SpirvShaderTranslator::Features::Features(bool all) : spirv_version(all ? spv::Spv_1_5 : spv::Spv_1_0), max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)), full_draw_index_uint32(all), vertex_pipeline_stores_and_atomics(all), fragment_stores_and_atomics(all), clip_distance(all), cull_distance(all), image_view_format_swizzle(all), signed_zero_inf_nan_preserve_float32(all), denorm_flush_to_zero_float32(all), rounding_mode_rte_float32(all), fragment_shader_sample_interlock(all), demote_to_helper_invocation(all) {} SpirvShaderTranslator::Features::Features( const ui::vulkan::VulkanProvider::DeviceInfo& device_info) : max_storage_buffer_range(device_info.maxStorageBufferRange), full_draw_index_uint32(device_info.fullDrawIndexUint32), vertex_pipeline_stores_and_atomics( device_info.vertexPipelineStoresAndAtomics), fragment_stores_and_atomics(device_info.fragmentStoresAndAtomics), clip_distance(device_info.shaderClipDistance), cull_distance(device_info.shaderCullDistance), image_view_format_swizzle(device_info.imageViewFormatSwizzle), signed_zero_inf_nan_preserve_float32( device_info.shaderSignedZeroInfNanPreserveFloat32), denorm_flush_to_zero_float32(device_info.shaderDenormFlushToZeroFloat32), rounding_mode_rte_float32(device_info.shaderRoundingModeRTEFloat32), fragment_shader_sample_interlock( device_info.fragmentShaderSampleInterlock), demote_to_helper_invocation(device_info.shaderDemoteToHelperInvocation) { if (device_info.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) { spirv_version = spv::Spv_1_5; } else if (device_info.ext_1_2_VK_KHR_spirv_1_4) { spirv_version = spv::Spv_1_4; } else if (device_info.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { spirv_version = spv::Spv_1_3; } else { spirv_version = spv::Spv_1_0; } } uint64_t SpirvShaderTranslator::GetDefaultVertexShaderModification( uint32_t dynamic_addressable_register_count, Shader::HostVertexShaderType host_vertex_shader_type) const { Modification shader_modification; shader_modification.vertex.dynamic_addressable_register_count = dynamic_addressable_register_count; shader_modification.vertex.host_vertex_shader_type = host_vertex_shader_type; return shader_modification.value; } uint64_t SpirvShaderTranslator::GetDefaultPixelShaderModification( uint32_t dynamic_addressable_register_count) const { Modification shader_modification; shader_modification.pixel.dynamic_addressable_register_count = dynamic_addressable_register_count; return shader_modification.value; } std::vector SpirvShaderTranslator::CreateDepthOnlyFragmentShader() { is_depth_only_fragment_shader_ = true; // TODO(Triang3l): Handle in a nicer way (is_depth_only_fragment_shader_ is a // leftover from when a Shader object wasn't used during translation). Shader shader(xenos::ShaderType::kPixel, 0, nullptr, 0); StringBuffer instruction_disassembly_buffer; shader.AnalyzeUcode(instruction_disassembly_buffer); Shader::Translation& translation = *shader.GetOrCreateTranslation(0); TranslateAnalyzedShader(translation); is_depth_only_fragment_shader_ = false; return translation.translated_binary(); } void SpirvShaderTranslator::Reset() { ShaderTranslator::Reset(); builder_.reset(); uniform_float_constants_ = spv::NoResult; input_point_coordinates_ = spv::NoResult; input_fragment_coordinates_ = spv::NoResult; input_front_facing_ = spv::NoResult; input_sample_mask_ = spv::NoResult; std::fill(input_output_interpolators_.begin(), input_output_interpolators_.end(), spv::NoResult); output_point_coordinates_ = spv::NoResult; output_point_size_ = spv::NoResult; sampler_bindings_.clear(); texture_bindings_.clear(); main_interface_.clear(); var_main_registers_ = spv::NoResult; var_main_memexport_address_ = spv::NoResult; for (size_t memexport_eM_index = 0; memexport_eM_index < xe::countof(var_main_memexport_data_); ++memexport_eM_index) { var_main_memexport_data_[memexport_eM_index] = spv::NoResult; } var_main_memexport_data_written_ = spv::NoResult; main_memexport_allowed_ = spv::NoResult; var_main_point_size_edge_flag_kill_vertex_ = spv::NoResult; var_main_kill_pixel_ = spv::NoResult; var_main_fsi_color_written_ = spv::NoResult; main_switch_op_.reset(); main_switch_next_pc_phi_operands_.clear(); cf_exec_conditional_merge_ = nullptr; cf_instruction_predicate_merge_ = nullptr; } uint32_t SpirvShaderTranslator::GetModificationRegisterCount() const { Modification modification = GetSpirvShaderModification(); return is_vertex_shader() ? modification.vertex.dynamic_addressable_register_count : modification.pixel.dynamic_addressable_register_count; } void SpirvShaderTranslator::StartTranslation() { // TODO(Triang3l): Logger. builder_ = std::make_unique( features_.spirv_version, (kSpirvMagicToolId << 16) | 1, nullptr); builder_->addCapability(IsSpirvTessEvalShader() ? spv::CapabilityTessellation : spv::CapabilityShader); if (features_.spirv_version < spv::Spv_1_4) { if (features_.signed_zero_inf_nan_preserve_float32 || features_.denorm_flush_to_zero_float32 || features_.rounding_mode_rte_float32) { builder_->addExtension("SPV_KHR_float_controls"); } } ext_inst_glsl_std_450_ = builder_->import("GLSL.std.450"); builder_->setMemoryModel(spv::AddressingModelLogical, spv::MemoryModelGLSL450); builder_->setSource(spv::SourceLanguageUnknown, 0); type_void_ = builder_->makeVoidType(); type_bool_ = builder_->makeBoolType(); type_bool2_ = builder_->makeVectorType(type_bool_, 2); type_bool3_ = builder_->makeVectorType(type_bool_, 3); type_bool4_ = builder_->makeVectorType(type_bool_, 4); type_int_ = builder_->makeIntType(32); type_int2_ = builder_->makeVectorType(type_int_, 2); type_int3_ = builder_->makeVectorType(type_int_, 3); type_int4_ = builder_->makeVectorType(type_int_, 4); type_uint_ = builder_->makeUintType(32); type_uint2_ = builder_->makeVectorType(type_uint_, 2); type_uint3_ = builder_->makeVectorType(type_uint_, 3); type_uint4_ = builder_->makeVectorType(type_uint_, 4); type_float_ = builder_->makeFloatType(32); type_float2_ = builder_->makeVectorType(type_float_, 2); type_float3_ = builder_->makeVectorType(type_float_, 3); type_float4_ = builder_->makeVectorType(type_float_, 4); const_int_0_ = builder_->makeIntConstant(0); id_vector_temp_.clear(); for (uint32_t i = 0; i < 4; ++i) { id_vector_temp_.push_back(const_int_0_); } const_int4_0_ = builder_->makeCompositeConstant(type_int4_, id_vector_temp_); const_uint_0_ = builder_->makeUintConstant(0); id_vector_temp_.clear(); for (uint32_t i = 0; i < 4; ++i) { id_vector_temp_.push_back(const_uint_0_); } const_uint4_0_ = builder_->makeCompositeConstant(type_uint4_, id_vector_temp_); const_float_0_ = builder_->makeFloatConstant(0.0f); id_vector_temp_.clear(); id_vector_temp_.push_back(const_float_0_); for (uint32_t i = 1; i < 4; ++i) { id_vector_temp_.push_back(const_float_0_); const_float_vectors_0_[i] = builder_->makeCompositeConstant( type_float_vectors_[i], id_vector_temp_); } const_float_1_ = builder_->makeFloatConstant(1.0f); id_vector_temp_.clear(); id_vector_temp_.push_back(const_float_1_); for (uint32_t i = 1; i < 4; ++i) { id_vector_temp_.push_back(const_float_1_); const_float_vectors_1_[i] = builder_->makeCompositeConstant( type_float_vectors_[i], id_vector_temp_); } id_vector_temp_.clear(); id_vector_temp_.push_back(const_float_0_); id_vector_temp_.push_back(const_float_1_); const_float2_0_1_ = builder_->makeCompositeConstant(type_float2_, id_vector_temp_); // Common uniform buffer - system constants. struct SystemConstant { const char* name; size_t offset; spv::Id type; }; spv::Id type_float4_array_4 = builder_->makeArrayType( type_float4_, builder_->makeUintConstant(4), sizeof(float) * 4); builder_->addDecoration(type_float4_array_4, spv::DecorationArrayStride, sizeof(float) * 4); spv::Id type_uint4_array_2 = builder_->makeArrayType( type_uint4_, builder_->makeUintConstant(2), sizeof(uint32_t) * 4); builder_->addDecoration(type_uint4_array_2, spv::DecorationArrayStride, sizeof(uint32_t) * 4); spv::Id type_uint4_array_4 = builder_->makeArrayType( type_uint4_, builder_->makeUintConstant(4), sizeof(uint32_t) * 4); builder_->addDecoration(type_uint4_array_4, spv::DecorationArrayStride, sizeof(uint32_t) * 4); const SystemConstant system_constants[] = { {"flags", offsetof(SystemConstants, flags), type_uint_}, {"vertex_index_load_address", offsetof(SystemConstants, vertex_index_load_address), type_uint_}, {"vertex_index_endian", offsetof(SystemConstants, vertex_index_endian), type_uint_}, {"vertex_base_index", offsetof(SystemConstants, vertex_base_index), type_int_}, {"ndc_scale", offsetof(SystemConstants, ndc_scale), type_float3_}, {"point_vertex_diameter_min", offsetof(SystemConstants, point_vertex_diameter_min), type_float_}, {"ndc_offset", offsetof(SystemConstants, ndc_offset), type_float3_}, {"point_vertex_diameter_max", offsetof(SystemConstants, point_vertex_diameter_max), type_float_}, {"point_constant_diameter", offsetof(SystemConstants, point_constant_diameter), type_float2_}, {"point_screen_diameter_to_ndc_radius", offsetof(SystemConstants, point_screen_diameter_to_ndc_radius), type_float2_}, {"texture_swizzled_signs", offsetof(SystemConstants, texture_swizzled_signs), type_uint4_array_2}, {"texture_swizzles", offsetof(SystemConstants, texture_swizzles), type_uint4_array_4}, {"alpha_test_reference", offsetof(SystemConstants, alpha_test_reference), type_float_}, {"edram_32bpp_tile_pitch_dwords_scaled", offsetof(SystemConstants, edram_32bpp_tile_pitch_dwords_scaled), type_uint_}, {"edram_depth_base_dwords_scaled", offsetof(SystemConstants, edram_depth_base_dwords_scaled), type_uint_}, {"color_exp_bias", offsetof(SystemConstants, color_exp_bias), type_float4_}, {"edram_poly_offset_front_scale", offsetof(SystemConstants, edram_poly_offset_front_scale), type_float_}, {"edram_poly_offset_back_scale", offsetof(SystemConstants, edram_poly_offset_back_scale), type_float_}, {"edram_poly_offset_front_offset", offsetof(SystemConstants, edram_poly_offset_front_offset), type_float_}, {"edram_poly_offset_back_offset", offsetof(SystemConstants, edram_poly_offset_back_offset), type_float_}, {"edram_stencil_front", offsetof(SystemConstants, edram_stencil_front), type_uint2_}, {"edram_stencil_back", offsetof(SystemConstants, edram_stencil_back), type_uint2_}, {"edram_rt_base_dwords_scaled", offsetof(SystemConstants, edram_rt_base_dwords_scaled), type_uint4_}, {"edram_rt_format_flags", offsetof(SystemConstants, edram_rt_format_flags), type_uint4_}, {"edram_rt_blend_factors_ops", offsetof(SystemConstants, edram_rt_blend_factors_ops), type_uint4_}, {"edram_rt_keep_mask", offsetof(SystemConstants, edram_rt_keep_mask), type_uint4_array_2}, {"edram_rt_clamp", offsetof(SystemConstants, edram_rt_clamp), type_float4_array_4}, {"edram_blend_constant", offsetof(SystemConstants, edram_blend_constant), type_float4_}, }; id_vector_temp_.clear(); id_vector_temp_.reserve(xe::countof(system_constants)); for (size_t i = 0; i < xe::countof(system_constants); ++i) { id_vector_temp_.push_back(system_constants[i].type); } spv::Id type_system_constants = builder_->makeStructType(id_vector_temp_, "XeSystemConstants"); for (size_t i = 0; i < xe::countof(system_constants); ++i) { const SystemConstant& system_constant = system_constants[i]; builder_->addMemberName(type_system_constants, static_cast(i), system_constant.name); builder_->addMemberDecoration( type_system_constants, static_cast(i), spv::DecorationOffset, int(system_constant.offset)); } builder_->addDecoration(type_system_constants, spv::DecorationBlock); uniform_system_constants_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassUniform, type_system_constants, "xe_uniform_system_constants"); builder_->addDecoration(uniform_system_constants_, spv::DecorationDescriptorSet, int(kDescriptorSetConstants)); builder_->addDecoration(uniform_system_constants_, spv::DecorationBinding, int(kConstantBufferSystem)); if (features_.spirv_version >= spv::Spv_1_4) { main_interface_.push_back(uniform_system_constants_); } bool memexport_used = IsMemoryExportUsed(); if (!is_depth_only_fragment_shader_) { // Common uniform buffer - float constants. uint32_t float_constant_count = current_shader().constant_register_map().float_count; if (float_constant_count) { id_vector_temp_.clear(); id_vector_temp_.push_back(builder_->makeArrayType( type_float4_, builder_->makeUintConstant(float_constant_count), sizeof(float) * 4)); // Currently (as of October 24, 2020) makeArrayType only uses the stride // to check if deduplication can be done - the array stride decoration // needs to be applied explicitly. builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, sizeof(float) * 4); spv::Id type_float_constants = builder_->makeStructType(id_vector_temp_, "XeFloatConstants"); builder_->addMemberName(type_float_constants, 0, "float_constants"); builder_->addMemberDecoration(type_float_constants, 0, spv::DecorationOffset, 0); builder_->addDecoration(type_float_constants, spv::DecorationBlock); uniform_float_constants_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassUniform, type_float_constants, "xe_uniform_float_constants"); builder_->addDecoration(uniform_float_constants_, spv::DecorationDescriptorSet, int(kDescriptorSetConstants)); builder_->addDecoration( uniform_float_constants_, spv::DecorationBinding, int(is_pixel_shader() ? kConstantBufferFloatPixel : kConstantBufferFloatVertex)); if (features_.spirv_version >= spv::Spv_1_4) { main_interface_.push_back(uniform_float_constants_); } } // Common uniform buffer - bool and loop constants. // Uniform buffers must have std140 packing, so using arrays of 4-component // vectors instead of scalar arrays because the latter would have padding to // 16 bytes in each element. id_vector_temp_.clear(); // 256 bool constants. id_vector_temp_.push_back(builder_->makeArrayType( type_uint4_, builder_->makeUintConstant(2), sizeof(uint32_t) * 4)); builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, sizeof(uint32_t) * 4); // 32 loop constants. id_vector_temp_.push_back(builder_->makeArrayType( type_uint4_, builder_->makeUintConstant(8), sizeof(uint32_t) * 4)); builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, sizeof(uint32_t) * 4); spv::Id type_bool_loop_constants = builder_->makeStructType(id_vector_temp_, "XeBoolLoopConstants"); builder_->addMemberName(type_bool_loop_constants, 0, "bool_constants"); builder_->addMemberDecoration(type_bool_loop_constants, 0, spv::DecorationOffset, 0); builder_->addMemberName(type_bool_loop_constants, 1, "loop_constants"); builder_->addMemberDecoration(type_bool_loop_constants, 1, spv::DecorationOffset, sizeof(uint32_t) * 8); builder_->addDecoration(type_bool_loop_constants, spv::DecorationBlock); uniform_bool_loop_constants_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassUniform, type_bool_loop_constants, "xe_uniform_bool_loop_constants"); builder_->addDecoration(uniform_bool_loop_constants_, spv::DecorationDescriptorSet, int(kDescriptorSetConstants)); builder_->addDecoration(uniform_bool_loop_constants_, spv::DecorationBinding, int(kConstantBufferBoolLoop)); if (features_.spirv_version >= spv::Spv_1_4) { main_interface_.push_back(uniform_bool_loop_constants_); } // Common uniform buffer - fetch constants (32 x 6 uints packed in std140 as // 4-component vectors). id_vector_temp_.clear(); id_vector_temp_.push_back(builder_->makeArrayType( type_uint4_, builder_->makeUintConstant(32 * 6 / 4), sizeof(uint32_t) * 4)); builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, sizeof(uint32_t) * 4); spv::Id type_fetch_constants = builder_->makeStructType(id_vector_temp_, "XeFetchConstants"); builder_->addMemberName(type_fetch_constants, 0, "fetch_constants"); builder_->addMemberDecoration(type_fetch_constants, 0, spv::DecorationOffset, 0); builder_->addDecoration(type_fetch_constants, spv::DecorationBlock); uniform_fetch_constants_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassUniform, type_fetch_constants, "xe_uniform_fetch_constants"); builder_->addDecoration(uniform_fetch_constants_, spv::DecorationDescriptorSet, int(kDescriptorSetConstants)); builder_->addDecoration(uniform_fetch_constants_, spv::DecorationBinding, int(kConstantBufferFetch)); if (features_.spirv_version >= spv::Spv_1_4) { main_interface_.push_back(uniform_fetch_constants_); } // Common storage buffers - shared memory uint[], each 128 MB or larger, // depending on what's possible on the device. id_vector_temp_.clear(); id_vector_temp_.push_back(builder_->makeRuntimeArray(type_uint_)); // Storage buffers have std430 packing, no padding to 4-component vectors. builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, sizeof(uint32_t)); spv::Id type_shared_memory = builder_->makeStructType(id_vector_temp_, "XeSharedMemory"); builder_->addMemberName(type_shared_memory, 0, "shared_memory"); builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationRestrict); if (!memexport_used) { builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationNonWritable); } builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationOffset, 0); builder_->addDecoration(type_shared_memory, features_.spirv_version >= spv::Spv_1_3 ? spv::DecorationBlock : spv::DecorationBufferBlock); unsigned int shared_memory_binding_count = 1 << GetSharedMemoryStorageBufferCountLog2(); if (shared_memory_binding_count > 1) { type_shared_memory = builder_->makeArrayType( type_shared_memory, builder_->makeUintConstant(shared_memory_binding_count), 0); } buffers_shared_memory_ = builder_->createVariable( spv::NoPrecision, features_.spirv_version >= spv::Spv_1_3 ? spv::StorageClassStorageBuffer : spv::StorageClassUniform, type_shared_memory, "xe_shared_memory"); builder_->addDecoration(buffers_shared_memory_, spv::DecorationDescriptorSet, int(kDescriptorSetSharedMemoryAndEdram)); builder_->addDecoration(buffers_shared_memory_, spv::DecorationBinding, 0); if (features_.spirv_version >= spv::Spv_1_4) { main_interface_.push_back(buffers_shared_memory_); } } if (is_vertex_shader()) { StartVertexOrTessEvalShaderBeforeMain(); } else if (is_pixel_shader()) { StartFragmentShaderBeforeMain(); } // Begin the main function. std::vector main_param_types; std::vector> main_precisions; spv::Block* function_main_entry; function_main_ = builder_->makeFunctionEntry( spv::NoPrecision, type_void_, "main", main_param_types, main_precisions, &function_main_entry); // Load the flags system constant since it may be used in many places. id_vector_temp_.clear(); id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantFlags)); main_system_constant_flags_ = builder_->createLoad( builder_->createAccessChain(spv::StorageClassUniform, uniform_system_constants_, id_vector_temp_), spv::NoPrecision); if (!is_depth_only_fragment_shader_) { // Begin ucode translation. Initialize everything, even without defined // defaults, for safety. var_main_predicate_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_bool_, "xe_var_predicate", builder_->makeBoolConstant(false)); var_main_loop_count_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_uint4_, "xe_var_loop_count", const_uint4_0_); var_main_address_register_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_int_, "xe_var_address_register", const_int_0_); var_main_loop_address_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_int4_, "xe_var_loop_address", const_int4_0_); var_main_previous_scalar_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_float_, "xe_var_previous_scalar", const_float_0_); var_main_vfetch_address_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_int_, "xe_var_vfetch_address", const_int_0_); var_main_tfetch_lod_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_float_, "xe_var_tfetch_lod", const_float_0_); var_main_tfetch_gradients_h_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_float3_, "xe_var_tfetch_gradients_h", const_float3_0_); var_main_tfetch_gradients_v_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_float3_, "xe_var_tfetch_gradients_v", const_float3_0_); if (register_count()) { spv::Id type_register_array = builder_->makeArrayType( type_float4_, builder_->makeUintConstant(register_count()), 0); var_main_registers_ = builder_->createVariable(spv::NoPrecision, spv::StorageClassFunction, type_register_array, "xe_var_registers"); } if (memexport_used) { var_main_memexport_address_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_float4_, "xe_var_memexport_address", const_float4_0_); uint8_t memexport_eM_remaining = current_shader().memexport_eM_written(); uint32_t memexport_eM_index; while ( xe::bit_scan_forward(memexport_eM_remaining, &memexport_eM_index)) { memexport_eM_remaining &= ~(uint8_t(1) << memexport_eM_index); var_main_memexport_data_[memexport_eM_index] = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_float4_, fmt::format("xe_var_memexport_data_{}", memexport_eM_index).c_str(), const_float4_0_); } var_main_memexport_data_written_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_uint_, "xe_var_memexport_data_written", const_uint_0_); } } // Write the execution model-specific prologue with access to variables in the // main function. if (is_vertex_shader()) { StartVertexOrTessEvalShaderInMain(); } else if (is_pixel_shader()) { StartFragmentShaderInMain(); } if (is_depth_only_fragment_shader_) { return; } // Open the main loop. spv::Block& main_loop_pre_header = *builder_->getBuildPoint(); main_loop_header_ = &builder_->makeNewBlock(); spv::Block& main_loop_body = builder_->makeNewBlock(); // Added later because the body has nested control flow, but according to the // specification: // "The order of blocks in a function must satisfy the rule that blocks appear // before all blocks they dominate." main_loop_continue_ = new spv::Block(builder_->getUniqueId(), *function_main_); main_loop_merge_ = new spv::Block(builder_->getUniqueId(), *function_main_); builder_->createBranch(main_loop_header_); // If no jumps, don't create a switch, but still create a loop so exece can // break. bool has_main_switch = !current_shader().label_addresses().empty(); // Main loop header - based on whether it's the first iteration (entered from // the function or from the continuation), choose the program counter. builder_->setBuildPoint(main_loop_header_); spv::Id main_loop_pc_current = spv::NoResult; if (has_main_switch) { // OpPhi must be the first in the block. id_vector_temp_.clear(); id_vector_temp_.push_back(const_int_0_); id_vector_temp_.push_back(main_loop_pre_header.getId()); main_loop_pc_next_ = builder_->getUniqueId(); id_vector_temp_.push_back(main_loop_pc_next_); id_vector_temp_.push_back(main_loop_continue_->getId()); main_loop_pc_current = builder_->createOp(spv::OpPhi, type_int_, id_vector_temp_); } uint_vector_temp_.clear(); builder_->createLoopMerge(main_loop_merge_, main_loop_continue_, spv::LoopControlDontUnrollMask, uint_vector_temp_); builder_->createBranch(&main_loop_body); // Main loop body. builder_->setBuildPoint(&main_loop_body); if (has_main_switch) { // Create the program counter switch with cases for every label and for // label 0. main_switch_header_ = builder_->getBuildPoint(); main_switch_merge_ = new spv::Block(builder_->getUniqueId(), *function_main_); builder_->createSelectionMerge(main_switch_merge_, spv::SelectionControlDontFlattenMask); main_switch_op_ = std::make_unique(spv::OpSwitch); main_switch_op_->addIdOperand(main_loop_pc_current); main_switch_op_->addIdOperand(main_switch_merge_->getId()); // The default case (the merge here) must have the header as a predecessor. main_switch_merge_->addPredecessor(main_switch_header_); // The instruction will be inserted later, when all cases are filled. // Insert and enter case 0. spv::Block* main_switch_case_0_block = new spv::Block(builder_->getUniqueId(), *function_main_); main_switch_op_->addImmediateOperand(0); main_switch_op_->addIdOperand(main_switch_case_0_block->getId()); // Every switch case must have the OpSelectionMerge/OpSwitch block as a // predecessor. main_switch_case_0_block->addPredecessor(main_switch_header_); function_main_->addBlock(main_switch_case_0_block); builder_->setBuildPoint(main_switch_case_0_block); } } std::vector SpirvShaderTranslator::CompleteTranslation() { if (!is_depth_only_fragment_shader_) { // Close flow control within the last switch case. CloseExecConditionals(); bool has_main_switch = !current_shader().label_addresses().empty(); // After the final exec (if it happened to be not exece, which would already // have a break branch), break from the switch if it exists, or from the // loop it doesn't. if (!builder_->getBuildPoint()->isTerminated()) { builder_->createBranch(has_main_switch ? main_switch_merge_ : main_loop_merge_); } if (has_main_switch) { // Insert the switch instruction with all cases added as operands. builder_->setBuildPoint(main_switch_header_); builder_->getBuildPoint()->addInstruction(std::move(main_switch_op_)); // Build the main switch merge, breaking out of the loop after falling // through the end or breaking from exece (only continuing if a jump - // from a guest loop or from jmp/call - was made). function_main_->addBlock(main_switch_merge_); builder_->setBuildPoint(main_switch_merge_); builder_->createBranch(main_loop_merge_); } // Main loop continuation - choose the program counter based on the path // taken (-1 if not from a jump as a safe fallback, which would result in // not hitting any switch case and reaching the final break in the body). function_main_->addBlock(main_loop_continue_); builder_->setBuildPoint(main_loop_continue_); if (has_main_switch) { // OpPhi, if added, must be the first in the block. // If labels were added, but not jumps (for example, due to the call // instruction not being implemented as of October 18, 2020), send an // impossible program counter value (-1) to the OpPhi at the next // iteration. if (main_switch_next_pc_phi_operands_.empty()) { main_switch_next_pc_phi_operands_.push_back( builder_->makeIntConstant(-1)); } std::unique_ptr main_loop_pc_next_op = std::make_unique( main_loop_pc_next_, type_int_, main_switch_next_pc_phi_operands_.size() >= 2 ? spv::OpPhi : spv::OpCopyObject); for (spv::Id operand : main_switch_next_pc_phi_operands_) { main_loop_pc_next_op->addIdOperand(operand); } builder_->getBuildPoint()->addInstruction( std::move(main_loop_pc_next_op)); } builder_->createBranch(main_loop_header_); // Add the main loop merge block and go back to the function. function_main_->addBlock(main_loop_merge_); builder_->setBuildPoint(main_loop_merge_); } // Write data for the last memexport. ExportToMemory( current_shader().memexport_eM_potentially_written_before_end()); if (is_vertex_shader()) { CompleteVertexOrTessEvalShaderInMain(); } else if (is_pixel_shader()) { CompleteFragmentShaderInMain(); } // End the main function. builder_->leaveFunction(); // Make the main function the entry point. spv::ExecutionModel execution_model; if (is_pixel_shader()) { execution_model = spv::ExecutionModelFragment; builder_->addExecutionMode(function_main_, spv::ExecutionModeOriginUpperLeft); if (IsExecutionModeEarlyFragmentTests()) { builder_->addExecutionMode(function_main_, spv::ExecutionModeEarlyFragmentTests); } if (edram_fragment_shader_interlock_) { // Accessing per-sample values, so interlocking just when there's common // coverage is enough if the device exposes that. if (features_.fragment_shader_sample_interlock) { builder_->addCapability( spv::CapabilityFragmentShaderSampleInterlockEXT); builder_->addExecutionMode(function_main_, spv::ExecutionModeSampleInterlockOrderedEXT); } else { builder_->addCapability(spv::CapabilityFragmentShaderPixelInterlockEXT); builder_->addExecutionMode(function_main_, spv::ExecutionModePixelInterlockOrderedEXT); } } } else { assert_true(is_vertex_shader()); execution_model = IsSpirvTessEvalShader() ? spv::ExecutionModelTessellationEvaluation : spv::ExecutionModelVertex; } if (features_.denorm_flush_to_zero_float32) { // Flush to zero, similar to the real hardware, also for things like Shader // Model 3 multiplication emulation. builder_->addCapability(spv::CapabilityDenormFlushToZero); builder_->addExecutionMode(function_main_, spv::ExecutionModeDenormFlushToZero, 32); } if (features_.signed_zero_inf_nan_preserve_float32) { // Signed zero used to get VFACE from ps_param_gen, also special behavior // for infinity in certain instructions (such as logarithm, reciprocal, // muls_prev2). builder_->addCapability(spv::CapabilitySignedZeroInfNanPreserve); builder_->addExecutionMode(function_main_, spv::ExecutionModeSignedZeroInfNanPreserve, 32); } if (features_.rounding_mode_rte_float32) { builder_->addCapability(spv::CapabilityRoundingModeRTE); builder_->addExecutionMode(function_main_, spv::ExecutionModeRoundingModeRTE, 32); } spv::Instruction* entry_point = builder_->addEntryPoint(execution_model, function_main_, "main"); for (spv::Id interface_id : main_interface_) { entry_point->addIdOperand(interface_id); } if (!is_depth_only_fragment_shader_) { // Specify the binding indices for samplers when the number of textures is // known, as samplers are located after images in the texture descriptor // set. size_t texture_binding_count = texture_bindings_.size(); size_t sampler_binding_count = sampler_bindings_.size(); for (size_t i = 0; i < sampler_binding_count; ++i) { builder_->addDecoration(sampler_bindings_[i].variable, spv::DecorationBinding, int(texture_binding_count + i)); } } // TODO(Triang3l): Avoid copy? std::vector module_uints; builder_->dump(module_uints); std::vector module_bytes; module_bytes.reserve(sizeof(unsigned int) * module_uints.size()); module_bytes.insert(module_bytes.cend(), reinterpret_cast(module_uints.data()), reinterpret_cast(module_uints.data()) + sizeof(unsigned int) * module_uints.size()); return module_bytes; } void SpirvShaderTranslator::PostTranslation() { Shader::Translation& translation = current_translation(); if (!translation.is_valid()) { return; } SpirvShader* spirv_shader = dynamic_cast(&translation.shader()); if (spirv_shader && !spirv_shader->bindings_setup_entered_.test_and_set( std::memory_order_relaxed)) { spirv_shader->texture_bindings_.clear(); spirv_shader->texture_bindings_.reserve(texture_bindings_.size()); for (const TextureBinding& translator_binding : texture_bindings_) { SpirvShader::TextureBinding& shader_binding = spirv_shader->texture_bindings_.emplace_back(); // For a stable hash. std::memset(&shader_binding, 0, sizeof(shader_binding)); shader_binding.fetch_constant = translator_binding.fetch_constant; shader_binding.dimension = translator_binding.dimension; shader_binding.is_signed = translator_binding.is_signed; spirv_shader->used_texture_mask_ |= UINT32_C(1) << translator_binding.fetch_constant; } spirv_shader->sampler_bindings_.clear(); spirv_shader->sampler_bindings_.reserve(sampler_bindings_.size()); for (const SamplerBinding& translator_binding : sampler_bindings_) { SpirvShader::SamplerBinding& shader_binding = spirv_shader->sampler_bindings_.emplace_back(); shader_binding.fetch_constant = translator_binding.fetch_constant; shader_binding.mag_filter = translator_binding.mag_filter; shader_binding.min_filter = translator_binding.min_filter; shader_binding.mip_filter = translator_binding.mip_filter; shader_binding.aniso_filter = translator_binding.aniso_filter; } } } void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) { if (cf_index == 0) { // 0 already added in the beginning. return; } assert_false(current_shader().label_addresses().empty()); // Close flow control within the previous switch case. CloseExecConditionals(); spv::Function& function = builder_->getBuildPoint()->getParent(); // Create the next switch case and fallthrough to it. spv::Block* new_case = new spv::Block(builder_->getUniqueId(), function); main_switch_op_->addImmediateOperand(cf_index); main_switch_op_->addIdOperand(new_case->getId()); // Every switch case must have the OpSelectionMerge/OpSwitch block as a // predecessor. new_case->addPredecessor(main_switch_header_); // The previous block may have already been terminated if was exece. if (!builder_->getBuildPoint()->isTerminated()) { builder_->createBranch(new_case); } function.addBlock(new_case); builder_->setBuildPoint(new_case); } void SpirvShaderTranslator::ProcessExecInstructionBegin( const ParsedExecInstruction& instr) { UpdateExecConditionals(instr.type, instr.bool_constant_index, instr.condition); } void SpirvShaderTranslator::ProcessExecInstructionEnd( const ParsedExecInstruction& instr) { if (instr.is_end) { // Break out of the main switch (if exists) and the main loop. CloseInstructionPredication(); if (!builder_->getBuildPoint()->isTerminated()) { builder_->createBranch(current_shader().label_addresses().empty() ? main_loop_merge_ : main_switch_merge_); } } UpdateExecConditionals(instr.type, instr.bool_constant_index, instr.condition); } void SpirvShaderTranslator::ProcessLoopStartInstruction( const ParsedLoopStartInstruction& instr) { // loop il, L - loop with loop data il, end @ L // Loop control is outside execs - actually close the last exec. CloseExecConditionals(); EnsureBuildPointAvailable(); id_vector_temp_.clear(); // Loop constants (member 1). id_vector_temp_.push_back(builder_->makeIntConstant(1)); // 4-component vector. id_vector_temp_.push_back( builder_->makeIntConstant(int(instr.loop_constant_index >> 2))); // Scalar within the vector. id_vector_temp_.push_back( builder_->makeIntConstant(int(instr.loop_constant_index & 3))); // Count (unsigned) in bits 0:7 of the loop constant (struct member 1), // initial aL (unsigned) in 8:15. spv::Id loop_constant = builder_->createLoad(builder_->createAccessChain( spv::StorageClassUniform, uniform_bool_loop_constants_, id_vector_temp_), spv::NoPrecision); spv::Id const_int_8 = builder_->makeIntConstant(8); // Push the count to the loop count stack - move XYZ to YZW and set X to the // new iteration count (swizzling the way glslang does it for similar GLSL). spv::Id loop_count_stack_old = builder_->createLoad(var_main_loop_count_, spv::NoPrecision); spv::Id loop_count_new = builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, loop_constant, const_int_0_, const_int_8); id_vector_temp_.clear(); id_vector_temp_.push_back(loop_count_new); for (unsigned int i = 0; i < 3; ++i) { id_vector_temp_.push_back( builder_->createCompositeExtract(loop_count_stack_old, type_uint_, i)); } builder_->createStore( builder_->createCompositeConstruct(type_uint4_, id_vector_temp_), var_main_loop_count_); // Push aL - keep the same value as in the previous loop if repeating, or the // new one otherwise. spv::Id address_relative_stack_old = builder_->createLoad(var_main_loop_address_, spv::NoPrecision); id_vector_temp_.clear(); if (instr.is_repeat) { id_vector_temp_.emplace_back(); } else { id_vector_temp_.push_back(builder_->createUnaryOp( spv::OpBitcast, type_int_, builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, loop_constant, const_int_8, const_int_8))); } for (unsigned int i = 0; i < 3; ++i) { id_vector_temp_.push_back(builder_->createCompositeExtract( address_relative_stack_old, type_int_, i)); } if (instr.is_repeat) { id_vector_temp_[0] = id_vector_temp_[1]; } builder_->createStore( builder_->createCompositeConstruct(type_int4_, id_vector_temp_), var_main_loop_address_); // Break (jump to the skip label) if the loop counter is 0 (since the // condition is checked in the end). spv::Block& head_block = *builder_->getBuildPoint(); spv::Id loop_count_zero = builder_->createBinOp( spv::OpIEqual, type_bool_, loop_count_new, const_uint_0_); spv::Block& skip_block = builder_->makeNewBlock(); spv::Block& body_block = builder_->makeNewBlock(); builder_->createSelectionMerge(&body_block, spv::SelectionControlMaskNone); { std::unique_ptr branch_conditional_op = std::make_unique(spv::OpBranchConditional); branch_conditional_op->addIdOperand(loop_count_zero); branch_conditional_op->addIdOperand(skip_block.getId()); branch_conditional_op->addIdOperand(body_block.getId()); // More likely to enter than to skip. branch_conditional_op->addImmediateOperand(1); branch_conditional_op->addImmediateOperand(2); head_block.addInstruction(std::move(branch_conditional_op)); } skip_block.addPredecessor(&head_block); body_block.addPredecessor(&head_block); builder_->setBuildPoint(&skip_block); main_switch_next_pc_phi_operands_.push_back( builder_->makeIntConstant(int(instr.loop_skip_address))); main_switch_next_pc_phi_operands_.push_back( builder_->getBuildPoint()->getId()); builder_->createBranch(main_loop_continue_); builder_->setBuildPoint(&body_block); } void SpirvShaderTranslator::ProcessLoopEndInstruction( const ParsedLoopEndInstruction& instr) { // endloop il, L - end loop w/ data il, head @ L // Loop control is outside execs - actually close the last exec. CloseExecConditionals(); EnsureBuildPointAvailable(); // Subtract 1 from the loop counter (will store later). spv::Id loop_count_stack_old = builder_->createLoad(var_main_loop_count_, spv::NoPrecision); spv::Id loop_count = builder_->createBinOp( spv::OpISub, type_uint_, builder_->createCompositeExtract(loop_count_stack_old, type_uint_, 0), builder_->makeUintConstant(1)); spv::Id address_relative_stack_old = builder_->createLoad(var_main_loop_address_, spv::NoPrecision); // Predicated break works like break if (loop_count == 0 || [!]p0). // Three options, due to logical operations usage (so OpLogicalNot is not // required): // - Continue if (loop_count != 0). // - Continue if (loop_count != 0 && p0), if breaking if !p0. // - Break if (loop_count == 0 || p0), if breaking if p0. bool break_is_true = instr.is_predicated_break && instr.predicate_condition; spv::Id condition = builder_->createBinOp(break_is_true ? spv::OpIEqual : spv::OpINotEqual, type_bool_, loop_count, const_uint_0_); if (instr.is_predicated_break) { condition = builder_->createBinOp( instr.predicate_condition ? spv::OpLogicalOr : spv::OpLogicalAnd, type_bool_, condition, builder_->createLoad(var_main_predicate_, spv::NoPrecision)); } spv::Block& body_block = *builder_->getBuildPoint(); spv::Block& continue_block = builder_->makeNewBlock(); spv::Block& break_block = builder_->makeNewBlock(); builder_->createSelectionMerge(&break_block, spv::SelectionControlMaskNone); { std::unique_ptr branch_conditional_op = std::make_unique(spv::OpBranchConditional); branch_conditional_op->addIdOperand(condition); // More likely to continue than to break. if (break_is_true) { branch_conditional_op->addIdOperand(break_block.getId()); branch_conditional_op->addIdOperand(continue_block.getId()); branch_conditional_op->addImmediateOperand(1); branch_conditional_op->addImmediateOperand(2); } else { branch_conditional_op->addIdOperand(continue_block.getId()); branch_conditional_op->addIdOperand(break_block.getId()); branch_conditional_op->addImmediateOperand(2); branch_conditional_op->addImmediateOperand(1); } body_block.addInstruction(std::move(branch_conditional_op)); } continue_block.addPredecessor(&body_block); break_block.addPredecessor(&body_block); // Continue case. builder_->setBuildPoint(&continue_block); // Store the loop count with 1 subtracted. builder_->createStore(builder_->createCompositeInsert( loop_count, loop_count_stack_old, type_uint4_, 0), var_main_loop_count_); // Extract the value to add to aL (signed, in bits 16:23 of the loop // constant). id_vector_temp_.clear(); // Loop constants (member 1). id_vector_temp_.push_back(builder_->makeIntConstant(1)); // 4-component vector. id_vector_temp_.push_back( builder_->makeIntConstant(int(instr.loop_constant_index >> 2))); // Scalar within the vector. id_vector_temp_.push_back( builder_->makeIntConstant(int(instr.loop_constant_index & 3))); spv::Id loop_constant = builder_->createLoad(builder_->createAccessChain( spv::StorageClassUniform, uniform_bool_loop_constants_, id_vector_temp_), spv::NoPrecision); spv::Id address_relative_old = builder_->createCompositeExtract( address_relative_stack_old, type_int_, 0); builder_->createStore( builder_->createCompositeInsert( builder_->createBinOp( spv::OpIAdd, type_int_, address_relative_old, builder_->createTriOp( spv::OpBitFieldSExtract, type_int_, builder_->createUnaryOp(spv::OpBitcast, type_int_, loop_constant), builder_->makeIntConstant(16), builder_->makeIntConstant(8))), address_relative_stack_old, type_int4_, 0), var_main_loop_address_); // Jump back to the beginning of the loop body. main_switch_next_pc_phi_operands_.push_back( builder_->makeIntConstant(int(instr.loop_body_address))); main_switch_next_pc_phi_operands_.push_back( builder_->getBuildPoint()->getId()); builder_->createBranch(main_loop_continue_); // Break case. builder_->setBuildPoint(&break_block); // Pop the current loop off the loop counter and the relative address stacks - // move YZW to XYZ and set W to 0. id_vector_temp_.clear(); for (unsigned int i = 1; i < 4; ++i) { id_vector_temp_.push_back( builder_->createCompositeExtract(loop_count_stack_old, type_uint_, i)); } id_vector_temp_.push_back(const_uint_0_); builder_->createStore( builder_->createCompositeConstruct(type_uint4_, id_vector_temp_), var_main_loop_count_); id_vector_temp_.clear(); for (unsigned int i = 1; i < 4; ++i) { id_vector_temp_.push_back(builder_->createCompositeExtract( address_relative_stack_old, type_int_, i)); } id_vector_temp_.push_back(const_int_0_); builder_->createStore( builder_->createCompositeConstruct(type_int4_, id_vector_temp_), var_main_loop_address_); // Now going to fall through to the next control flow instruction. } void SpirvShaderTranslator::ProcessJumpInstruction( const ParsedJumpInstruction& instr) { // Treat like exec, merge with execs if possible, since it's an if too. ParsedExecInstruction::Type type; if (instr.type == ParsedJumpInstruction::Type::kConditional) { type = ParsedExecInstruction::Type::kConditional; } else if (instr.type == ParsedJumpInstruction::Type::kPredicated) { type = ParsedExecInstruction::Type::kPredicated; } else { type = ParsedExecInstruction::Type::kUnconditional; } UpdateExecConditionals(type, instr.bool_constant_index, instr.condition); // UpdateExecConditionals may not necessarily close the instruction-level // predicate check (it's not necessary if the execs are merged), but here the // instruction itself is on the control flow level, so the predicate check is // on the control flow level too. CloseInstructionPredication(); if (builder_->getBuildPoint()->isTerminated()) { // Unreachable for some reason. return; } main_switch_next_pc_phi_operands_.push_back( builder_->makeIntConstant(int(instr.target_address))); main_switch_next_pc_phi_operands_.push_back( builder_->getBuildPoint()->getId()); builder_->createBranch(main_loop_continue_); } void SpirvShaderTranslator::ProcessAllocInstruction( const ParsedAllocInstruction& instr, uint8_t export_eM) { bool start_memexport = instr.type == ucode::AllocType::kMemory && current_shader().memexport_eM_written(); if (export_eM || start_memexport) { CloseExecConditionals(); } if (export_eM) { ExportToMemory(export_eM); // Reset which eM# elements have been written. builder_->createStore(const_uint_0_, var_main_memexport_data_written_); // Break dependencies from the previous memexport. uint8_t export_eM_remaining = export_eM; uint32_t eM_index; while (xe::bit_scan_forward(export_eM_remaining, &eM_index)) { export_eM_remaining &= ~(uint8_t(1) << eM_index); builder_->createStore(const_float4_0_, var_main_memexport_data_[eM_index]); } } if (start_memexport) { // Initialize eA to an invalid address. builder_->createStore(const_float4_0_, var_main_memexport_address_); } } spv::Id SpirvShaderTranslator::SpirvSmearScalarResultOrConstant( spv::Id scalar, spv::Id vector_type) { bool is_constant = builder_->isConstant(scalar); bool is_spec_constant = builder_->isSpecConstant(scalar); if (!is_constant && !is_spec_constant) { return builder_->smearScalar(spv::NoPrecision, scalar, vector_type); } assert_true(builder_->getTypeClass(builder_->getTypeId(scalar)) == builder_->getTypeClass(builder_->getScalarTypeId(vector_type))); if (!builder_->isVectorType(vector_type)) { assert_true(builder_->isScalarType(vector_type)); return scalar; } int num_components = builder_->getNumTypeComponents(vector_type); id_vector_temp_util_.clear(); for (int i = 0; i < num_components; ++i) { id_vector_temp_util_.push_back(scalar); } return builder_->makeCompositeConstant(vector_type, id_vector_temp_util_, is_spec_constant); } uint32_t SpirvShaderTranslator::GetPsParamGenInterpolator() const { assert_true(is_pixel_shader()); Modification modification = GetSpirvShaderModification(); // param_gen_interpolator is already 4 bits, no need for an interpolator count // safety check. return (modification.pixel.param_gen_enable && modification.pixel.param_gen_interpolator < register_count()) ? modification.pixel.param_gen_interpolator : UINT32_MAX; } void SpirvShaderTranslator::EnsureBuildPointAvailable() { if (!builder_->getBuildPoint()->isTerminated()) { return; } spv::Block& new_block = builder_->makeNewBlock(); new_block.setUnreachable(); builder_->setBuildPoint(&new_block); } void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { // Create the inputs. if (IsSpirvTessEvalShader()) { input_primitive_id_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassInput, type_int_, "gl_PrimitiveID"); builder_->addDecoration(input_primitive_id_, spv::DecorationBuiltIn, spv::BuiltInPrimitiveId); main_interface_.push_back(input_primitive_id_); } else { input_vertex_index_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassInput, type_int_, "gl_VertexIndex"); builder_->addDecoration(input_vertex_index_, spv::DecorationBuiltIn, spv::BuiltInVertexIndex); main_interface_.push_back(input_vertex_index_); } uint32_t output_location = 0; // Create the interpolator outputs. { uint32_t interpolators_remaining = GetModificationInterpolatorMask(); uint32_t interpolator_index; while (xe::bit_scan_forward(interpolators_remaining, &interpolator_index)) { interpolators_remaining &= ~(UINT32_C(1) << interpolator_index); spv::Id interpolator = builder_->createVariable( spv::NoPrecision, spv::StorageClassOutput, type_float4_, fmt::format("xe_out_interpolator_{}", interpolator_index).c_str()); input_output_interpolators_[interpolator_index] = interpolator; builder_->addDecoration(interpolator, spv::DecorationLocation, int(output_location)); builder_->addDecoration(interpolator, spv::DecorationInvariant); main_interface_.push_back(interpolator); ++output_location; } } Modification shader_modification = GetSpirvShaderModification(); if (shader_modification.vertex.output_point_parameters) { if (shader_modification.vertex.host_vertex_shader_type == Shader::HostVertexShaderType::kPointListAsTriangleStrip) { // Create the point coordinates output. output_point_coordinates_ = builder_->createVariable(spv::NoPrecision, spv::StorageClassOutput, type_float2_, "xe_out_point_coordinates"); builder_->addDecoration(output_point_coordinates_, spv::DecorationLocation, int(output_location)); builder_->addDecoration(output_point_coordinates_, spv::DecorationInvariant); main_interface_.push_back(output_point_coordinates_); ++output_location; } else { // Create the point size output. Not using gl_PointSize from gl_PerVertex // not to rely on the shaderTessellationAndGeometryPointSize feature, and // also because the value written to gl_PointSize must be greater than // zero. output_point_size_ = builder_->createVariable(spv::NoPrecision, spv::StorageClassOutput, type_float_, "xe_out_point_size"); builder_->addDecoration(output_point_size_, spv::DecorationLocation, int(output_location)); builder_->addDecoration(output_point_size_, spv::DecorationInvariant); main_interface_.push_back(output_point_size_); ++output_location; } } // Create the gl_PerVertex output for used system outputs. std::vector struct_per_vertex_members; struct_per_vertex_members.reserve(kOutputPerVertexMemberCount); struct_per_vertex_members.push_back(type_float4_); spv::Id type_struct_per_vertex = builder_->makeStructType(struct_per_vertex_members, "gl_PerVertex"); builder_->addMemberName(type_struct_per_vertex, kOutputPerVertexMemberPosition, "gl_Position"); builder_->addMemberDecoration(type_struct_per_vertex, kOutputPerVertexMemberPosition, spv::DecorationBuiltIn, spv::BuiltInPosition); builder_->addDecoration(type_struct_per_vertex, spv::DecorationBlock); output_per_vertex_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassOutput, type_struct_per_vertex, ""); builder_->addDecoration(output_per_vertex_, spv::DecorationInvariant); main_interface_.push_back(output_per_vertex_); } void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { Modification shader_modification = GetSpirvShaderModification(); // The edge flag isn't used for any purpose by the translator. if (current_shader().writes_point_size_edge_flag_kill_vertex() & 0b101) { id_vector_temp_.clear(); // Set the point size to a negative value to tell the point sprite expansion // that it should use the default point size if the vertex shader does not // override it. id_vector_temp_.push_back(builder_->makeFloatConstant(-1.0f)); // The edge flag is ignored. id_vector_temp_.push_back(const_float_0_); // Don't kill by default (zero bits 0:30). id_vector_temp_.push_back(const_float_0_); var_main_point_size_edge_flag_kill_vertex_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_float3_, "xe_var_point_size_edge_flag_kill_vertex", builder_->makeCompositeConstant(type_float3_, id_vector_temp_)); } // Zero general-purpose registers to prevent crashes when the game // references them after only initializing them conditionally. for (uint32_t i = 0; i < register_count(); ++i) { id_vector_temp_.clear(); id_vector_temp_.push_back(builder_->makeIntConstant(int(i))); builder_->createStore( const_float4_0_, builder_->createAccessChain(spv::StorageClassFunction, var_main_registers_, id_vector_temp_)); } // Zero the interpolators. { uint32_t interpolators_remaining = GetModificationInterpolatorMask(); uint32_t interpolator_index; while (xe::bit_scan_forward(interpolators_remaining, &interpolator_index)) { interpolators_remaining &= ~(UINT32_C(1) << interpolator_index); builder_->createStore(const_float4_0_, input_output_interpolators_[interpolator_index]); } } // TODO(Triang3l): For HostVertexShaderType::kRectangeListAsTriangleStrip, // start the vertex loop, and load the index there. // Check if memory export should be allowed for this host vertex of the guest // primitive to make sure export is done only once for each guest vertex. if (IsMemoryExportUsed()) { spv::Id memexport_allowed_for_host_vertex_of_guest_primitive = spv::NoResult; if (shader_modification.vertex.host_vertex_shader_type == Shader::HostVertexShaderType::kPointListAsTriangleStrip) { // Only for one host vertex for the point. memexport_allowed_for_host_vertex_of_guest_primitive = builder_->createBinOp( spv::OpIEqual, type_bool_, builder_->createBinOp( spv::OpBitwiseAnd, type_uint_, builder_->createUnaryOp( spv::OpBitcast, type_uint_, builder_->createLoad(input_vertex_index_, spv::NoPrecision)), builder_->makeUintConstant(3)), const_uint_0_); } if (memexport_allowed_for_host_vertex_of_guest_primitive != spv::NoResult) { main_memexport_allowed_ = main_memexport_allowed_ != spv::NoResult ? builder_->createBinOp( spv::OpLogicalAnd, type_bool_, main_memexport_allowed_, memexport_allowed_for_host_vertex_of_guest_primitive) : memexport_allowed_for_host_vertex_of_guest_primitive; } } // Load the vertex index or the tessellation parameters. if (register_count()) { // TODO(Triang3l): Barycentric coordinates and patch index. if (IsSpirvVertexShader()) { spv::Id vertex_index = builder_->createUnaryOp( spv::OpBitcast, type_uint_, builder_->createLoad(input_vertex_index_, spv::NoPrecision)); if (shader_modification.vertex.host_vertex_shader_type == Shader::HostVertexShaderType::kPointListAsTriangleStrip) { // Load the point index, autogenerated or indirectly from the index // buffer. // Extract the primitive index from the two-triangle strip vertex index. spv::Id const_uint_2 = builder_->makeUintConstant(2); vertex_index = builder_->createBinOp( spv::OpShiftRightLogical, type_uint_, vertex_index, const_uint_2); // Check if the index needs to be loaded from the index buffer. spv::Id load_vertex_index = builder_->createBinOp( spv::OpINotEqual, type_bool_, builder_->createBinOp( spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, builder_->makeUintConstant(static_cast( kSysFlag_ComputeOrPrimitiveVertexIndexLoad))), const_uint_0_); SpirvBuilder::IfBuilder load_vertex_index_if( load_vertex_index, spv::SelectionControlDontFlattenMask, *builder_); spv::Id loaded_vertex_index; { // Check if the index is 32-bit. spv::Id vertex_index_is_32bit = builder_->createBinOp( spv::OpINotEqual, type_bool_, builder_->createBinOp( spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, builder_->makeUintConstant(static_cast( kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit))), const_uint_0_); // Calculate the vertex index address in the shared memory. id_vector_temp_.clear(); id_vector_temp_.push_back( builder_->makeIntConstant(kSystemConstantVertexIndexLoadAddress)); spv::Id vertex_index_address = builder_->createBinOp( spv::OpIAdd, type_uint_, builder_->createLoad( builder_->createAccessChain(spv::StorageClassUniform, uniform_system_constants_, id_vector_temp_), spv::NoPrecision), builder_->createBinOp( spv::OpShiftLeftLogical, type_uint_, vertex_index, builder_->createTriOp(spv::OpSelect, type_uint_, vertex_index_is_32bit, const_uint_2, builder_->makeUintConstant(1)))); // Load the 32 bits containing the whole vertex index or two 16-bit // vertex indices. // TODO(Triang3l): Bounds checking. loaded_vertex_index = LoadUint32FromSharedMemory(builder_->createUnaryOp( spv::OpBitcast, type_int_, builder_->createBinOp(spv::OpShiftRightLogical, type_uint_, vertex_index_address, const_uint_2))); // Extract the 16-bit index from the loaded 32 bits if needed. loaded_vertex_index = builder_->createTriOp( spv::OpSelect, type_uint_, vertex_index_is_32bit, loaded_vertex_index, builder_->createTriOp( spv::OpBitFieldUExtract, type_uint_, loaded_vertex_index, builder_->createBinOp( spv::OpShiftLeftLogical, type_uint_, builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, vertex_index_address, const_uint_2), builder_->makeUintConstant(4 - 1)), builder_->makeUintConstant(16))); // Endian-swap the loaded index. id_vector_temp_.clear(); id_vector_temp_.push_back( builder_->makeIntConstant(kSystemConstantVertexIndexEndian)); loaded_vertex_index = EndianSwap32Uint( loaded_vertex_index, builder_->createLoad( builder_->createAccessChain(spv::StorageClassUniform, uniform_system_constants_, id_vector_temp_), spv::NoPrecision)); } load_vertex_index_if.makeEndIf(); // Select between the loaded index and the original index from Vulkan. vertex_index = load_vertex_index_if.createMergePhi(loaded_vertex_index, vertex_index); } else { // TODO(Triang3l): Close line loop primitive. // Load the unswapped index as uint for swapping, or for indirect // loading if needed. if (!features_.full_draw_index_uint32) { // Check if the full 32-bit index needs to be loaded indirectly. spv::Id load_vertex_index = builder_->createBinOp( spv::OpINotEqual, type_bool_, builder_->createBinOp( spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, builder_->makeUintConstant( static_cast(kSysFlag_VertexIndexLoad))), const_uint_0_); SpirvBuilder::IfBuilder load_vertex_index_if( load_vertex_index, spv::SelectionControlDontFlattenMask, *builder_); spv::Id loaded_vertex_index; { // Load the 32-bit index. // TODO(Triang3l): Bounds checking. id_vector_temp_.clear(); id_vector_temp_.push_back(builder_->makeIntConstant( kSystemConstantVertexIndexLoadAddress)); loaded_vertex_index = LoadUint32FromSharedMemory(builder_->createUnaryOp( spv::OpBitcast, type_int_, builder_->createBinOp( spv::OpIAdd, type_uint_, builder_->createBinOp( spv::OpShiftRightLogical, type_uint_, builder_->createLoad( builder_->createAccessChain( spv::StorageClassUniform, uniform_system_constants_, id_vector_temp_), spv::NoPrecision), builder_->makeUintConstant(2)), vertex_index))); } load_vertex_index_if.makeEndIf(); // Select between the loaded index and the original index from Vulkan. vertex_index = load_vertex_index_if.createMergePhi( loaded_vertex_index, vertex_index); } // Endian-swap the index. id_vector_temp_.clear(); id_vector_temp_.push_back( builder_->makeIntConstant(kSystemConstantVertexIndexEndian)); vertex_index = EndianSwap32Uint( vertex_index, builder_->createLoad( builder_->createAccessChain( spv::StorageClassUniform, uniform_system_constants_, id_vector_temp_), spv::NoPrecision)); } // Convert the index to a signed integer. vertex_index = builder_->createUnaryOp(spv::OpBitcast, type_int_, vertex_index); // Add the base to the index. id_vector_temp_.clear(); id_vector_temp_.push_back( builder_->makeIntConstant(kSystemConstantVertexBaseIndex)); vertex_index = builder_->createBinOp( spv::OpIAdd, type_int_, vertex_index, builder_->createLoad(builder_->createAccessChain( spv::StorageClassUniform, uniform_system_constants_, id_vector_temp_), spv::NoPrecision)); // Write the index to r0.x as float. id_vector_temp_.clear(); id_vector_temp_.push_back(const_int_0_); id_vector_temp_.push_back(const_int_0_); builder_->createStore( builder_->createUnaryOp(spv::OpConvertSToF, type_float_, vertex_index), builder_->createAccessChain(spv::StorageClassFunction, var_main_registers_, id_vector_temp_)); } } } void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { id_vector_temp_.clear(); id_vector_temp_.push_back( builder_->makeIntConstant(kOutputPerVertexMemberPosition)); spv::Id position_ptr = builder_->createAccessChain( spv::StorageClassOutput, output_per_vertex_, id_vector_temp_); spv::Id guest_position = builder_->createLoad(position_ptr, spv::NoPrecision); // Check if the shader already returns W, not 1/W, and if it doesn't, turn 1/W // into W. spv::Id position_w = builder_->createCompositeExtract(guest_position, type_float_, 3); spv::Id is_w_not_reciprocal = builder_->createBinOp( spv::OpINotEqual, type_bool_, builder_->createBinOp( spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, builder_->makeUintConstant( static_cast(kSysFlag_WNotReciprocal))), const_uint_0_); spv::Id guest_position_w_inv = builder_->createNoContractionBinOp( spv::OpFDiv, type_float_, const_float_1_, position_w); position_w = builder_->createTriOp(spv::OpSelect, type_float_, is_w_not_reciprocal, position_w, guest_position_w_inv); spv::Id position_xyz; // Open a scope since position_xy and position_z won't be synchronized anymore // after position_xyz is built and modified later. { // Check if the shader returns XY/W rather than XY, and if it does, revert // that. uint_vector_temp_.clear(); uint_vector_temp_.push_back(0); uint_vector_temp_.push_back(1); spv::Id position_xy = builder_->createRvalueSwizzle( spv::NoPrecision, type_float2_, guest_position, uint_vector_temp_); spv::Id is_xy_divided_by_w = builder_->createBinOp( spv::OpINotEqual, type_bool_, builder_->createBinOp( spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, builder_->makeUintConstant( static_cast(kSysFlag_XYDividedByW))), const_uint_0_); spv::Id guest_position_xy_mul_w = builder_->createNoContractionBinOp( spv::OpVectorTimesScalar, type_float2_, position_xy, position_w); position_xy = builder_->createTriOp( spv::OpSelect, type_float2_, builder_->smearScalar(spv::NoPrecision, is_xy_divided_by_w, type_bool2_), guest_position_xy_mul_w, position_xy); // Check if the shader returns Z/W rather than Z, and if it does, revert // that. spv::Id position_z = builder_->createCompositeExtract(guest_position, type_float_, 2); spv::Id is_z_divided_by_w = builder_->createBinOp( spv::OpINotEqual, type_bool_, builder_->createBinOp( spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, builder_->makeUintConstant( static_cast(kSysFlag_ZDividedByW))), const_uint_0_); spv::Id guest_position_z_mul_w = builder_->createNoContractionBinOp( spv::OpFMul, type_float_, position_z, position_w); position_z = builder_->createTriOp(spv::OpSelect, type_float_, is_z_divided_by_w, guest_position_z_mul_w, position_z); // Build XYZ of the position with W format handled. { std::unique_ptr composite_construct_op = std::make_unique( builder_->getUniqueId(), type_float3_, spv::OpCompositeConstruct); composite_construct_op->addIdOperand(position_xy); composite_construct_op->addIdOperand(position_z); position_xyz = composite_construct_op->getResultId(); builder_->getBuildPoint()->addInstruction( std::move(composite_construct_op)); } } // Apply the NDC scale and offset for guest to host viewport transformation. id_vector_temp_.clear(); id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantNdcScale)); spv::Id ndc_scale = builder_->createLoad( builder_->createAccessChain(spv::StorageClassUniform, uniform_system_constants_, id_vector_temp_), spv::NoPrecision); position_xyz = builder_->createNoContractionBinOp(spv::OpFMul, type_float3_, position_xyz, ndc_scale); id_vector_temp_.clear(); id_vector_temp_.push_back( builder_->makeIntConstant(kSystemConstantNdcOffset)); spv::Id ndc_offset = builder_->createLoad( builder_->createAccessChain(spv::StorageClassUniform, uniform_system_constants_, id_vector_temp_), spv::NoPrecision); spv::Id ndc_offset_mul_w = builder_->createNoContractionBinOp( spv::OpVectorTimesScalar, type_float3_, ndc_offset, position_w); position_xyz = builder_->createNoContractionBinOp( spv::OpFAdd, type_float3_, position_xyz, ndc_offset_mul_w); // Write the point size. if (output_point_size_ != spv::NoResult) { spv::Id point_size; if (current_shader().writes_point_size_edge_flag_kill_vertex() & 0b001) { assert_true(var_main_point_size_edge_flag_kill_vertex_ != spv::NoResult); id_vector_temp_.clear(); // X vector component. id_vector_temp_.push_back(const_int_0_); point_size = builder_->createLoad( builder_->createAccessChain( spv::StorageClassFunction, var_main_point_size_edge_flag_kill_vertex_, id_vector_temp_), spv::NoPrecision); } else { // Not statically overridden - write a negative value. point_size = builder_->makeFloatConstant(-1.0f); } builder_->createStore(point_size, output_point_size_); } Modification shader_modification = GetSpirvShaderModification(); // Expand the point sprite. if (shader_modification.vertex.host_vertex_shader_type == Shader::HostVertexShaderType::kPointListAsTriangleStrip) { // Top-left, bottom-left, top-right, bottom-right order (chosen arbitrarily, // simply based on counterclockwise meaning front with // frontFace = VkFrontFace(0), but faceness is ignored for non-polygon // primitive types). id_vector_temp_.clear(); id_vector_temp_.push_back(builder_->makeUintConstant(0b10)); id_vector_temp_.push_back(builder_->makeUintConstant(0b01)); spv::Id point_vertex_positive = builder_->createBinOp( spv::OpINotEqual, type_bool2_, builder_->createBinOp( spv::OpBitwiseAnd, type_uint2_, builder_->smearScalar(spv::NoPrecision, builder_->createUnaryOp( spv::OpBitcast, type_uint_, builder_->createLoad(input_vertex_index_, spv::NoPrecision)), type_uint2_), builder_->createCompositeConstruct(type_uint2_, id_vector_temp_)), SpirvSmearScalarResultOrConstant(const_uint_0_, type_uint2_)); // Load the point diameter in guest pixels, with the override from the // vertex shader if provided. id_vector_temp_.clear(); id_vector_temp_.push_back( builder_->makeIntConstant(kSystemConstantPointConstantDiameter)); spv::Id point_guest_diameter = builder_->createLoad( builder_->createAccessChain(spv::StorageClassUniform, uniform_system_constants_, id_vector_temp_), spv::NoPrecision); if (current_shader().writes_point_size_edge_flag_kill_vertex() & 0b001) { assert_true(var_main_point_size_edge_flag_kill_vertex_ != spv::NoResult); id_vector_temp_.clear(); id_vector_temp_.push_back(const_int_0_); spv::Id point_vertex_diameter = builder_->createLoad( builder_->createAccessChain( spv::StorageClassFunction, var_main_point_size_edge_flag_kill_vertex_, id_vector_temp_), spv::NoPrecision); // The vertex shader's header writes -1.0 to point_size by default, so any // non-negative value means that it was overwritten by the translated // vertex shader, and needs to be used instead of the constant size. The // per-vertex diameter has already been clamped earlier in translation // (combined with making it non-negative). point_guest_diameter = builder_->createTriOp( spv::OpSelect, type_float2_, builder_->smearScalar( spv::NoPrecision, builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, point_vertex_diameter, const_float_0_), type_bool2_), builder_->smearScalar(spv::NoPrecision, point_vertex_diameter, type_float2_), point_guest_diameter); } // Transform the diameter in the guest screen coordinates to radius in the // normalized device coordinates. id_vector_temp_.clear(); id_vector_temp_.push_back(builder_->makeIntConstant( kSystemConstantPointScreenDiameterToNdcRadius)); spv::Id point_radius = builder_->createNoContractionBinOp( spv::OpFMul, type_float2_, point_guest_diameter, builder_->createLoad(builder_->createAccessChain( spv::StorageClassUniform, uniform_system_constants_, id_vector_temp_), spv::NoPrecision)); // Transform the radius from the normalized device coordinates to the clip // space. point_radius = builder_->createNoContractionBinOp( spv::OpVectorTimesScalar, type_float2_, point_radius, position_w); // Expand the point sprite in the direction for the current host vertex. uint_vector_temp_.clear(); uint_vector_temp_.push_back(0); uint_vector_temp_.push_back(1); spv::Id point_position_xy = builder_->createNoContractionBinOp( spv::OpFAdd, type_float2_, builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_, position_xyz, uint_vector_temp_), builder_->createTriOp(spv::OpSelect, type_float2_, point_vertex_positive, point_radius, builder_->createNoContractionUnaryOp( spv::OpFNegate, type_float2_, point_radius))); // Store the position. spv::Id position; { // Bypass the `getNumTypeConstituents(typeId) == (int)constituents.size()` // assertion in createCompositeConstruct, OpCompositeConstruct can // construct vectors not only from scalars, but also from other vectors. std::unique_ptr composite_construct_op = std::make_unique( builder_->getUniqueId(), type_float4_, spv::OpCompositeConstruct); composite_construct_op->addIdOperand(point_position_xy); composite_construct_op->addIdOperand( builder_->createCompositeExtract(position_xyz, type_float_, 2)); composite_construct_op->addIdOperand(position_w); position = composite_construct_op->getResultId(); builder_->getBuildPoint()->addInstruction( std::move(composite_construct_op)); } builder_->createStore(position, position_ptr); // Write the point coordinates. if (output_point_coordinates_ != spv::NoResult) { builder_->createStore( builder_->createTriOp(spv::OpSelect, type_float2_, point_vertex_positive, const_float2_1_, const_float2_0_), output_point_coordinates_); } // TODO(Triang3l): For points, handle ps_ucp_mode (take the guest clip space // coordinates instead of the host ones, calculate the distances to the user // clip planes, cull using the distance from the center for modes 0, 1 and // 2, cull and clip per-vertex for modes 2 and 3) in clip and cull // distances. } else { // Store the position converted to the host. spv::Id position; { // Bypass the `getNumTypeConstituents(typeId) == (int)constituents.size()` // assertion in createCompositeConstruct, OpCompositeConstruct can // construct vectors not only from scalars, but also from other vectors. std::unique_ptr composite_construct_op = std::make_unique( builder_->getUniqueId(), type_float4_, spv::OpCompositeConstruct); composite_construct_op->addIdOperand(position_xyz); composite_construct_op->addIdOperand(position_w); position = composite_construct_op->getResultId(); builder_->getBuildPoint()->addInstruction( std::move(composite_construct_op)); } builder_->createStore(position, position_ptr); } } void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { Modification shader_modification = GetSpirvShaderModification(); if (edram_fragment_shader_interlock_) { builder_->addExtension("SPV_EXT_fragment_shader_interlock"); // EDRAM buffer uint[]. id_vector_temp_.clear(); id_vector_temp_.push_back(builder_->makeRuntimeArray(type_uint_)); // Storage buffers have std430 packing, no padding to 4-component vectors. builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, sizeof(uint32_t)); spv::Id type_edram = builder_->makeStructType(id_vector_temp_, "XeEdram"); builder_->addMemberName(type_edram, 0, "edram"); builder_->addMemberDecoration(type_edram, 0, spv::DecorationCoherent); builder_->addMemberDecoration(type_edram, 0, spv::DecorationRestrict); builder_->addMemberDecoration(type_edram, 0, spv::DecorationOffset, 0); builder_->addDecoration(type_edram, features_.spirv_version >= spv::Spv_1_3 ? spv::DecorationBlock : spv::DecorationBufferBlock); buffer_edram_ = builder_->createVariable( spv::NoPrecision, features_.spirv_version >= spv::Spv_1_3 ? spv::StorageClassStorageBuffer : spv::StorageClassUniform, type_edram, "xe_edram"); builder_->addDecoration(buffer_edram_, spv::DecorationDescriptorSet, int(kDescriptorSetSharedMemoryAndEdram)); builder_->addDecoration(buffer_edram_, spv::DecorationBinding, 1); if (features_.spirv_version >= spv::Spv_1_4) { main_interface_.push_back(buffer_edram_); } } bool param_gen_needed = !is_depth_only_fragment_shader_ && GetPsParamGenInterpolator() != UINT32_MAX; if (!is_depth_only_fragment_shader_) { uint32_t input_location = 0; // Interpolator inputs. { uint32_t interpolators_remaining = GetModificationInterpolatorMask(); uint32_t interpolator_index; while ( xe::bit_scan_forward(interpolators_remaining, &interpolator_index)) { interpolators_remaining &= ~(UINT32_C(1) << interpolator_index); spv::Id interpolator = builder_->createVariable( spv::NoPrecision, spv::StorageClassInput, type_float4_, fmt::format("xe_in_interpolator_{}", interpolator_index).c_str()); input_output_interpolators_[interpolator_index] = interpolator; builder_->addDecoration(interpolator, spv::DecorationLocation, int(input_location)); if (shader_modification.pixel.interpolators_centroid & (UINT32_C(1) << interpolator_index)) { builder_->addDecoration(interpolator, spv::DecorationCentroid); } main_interface_.push_back(interpolator); ++input_location; } } // Point coordinate input. if (shader_modification.pixel.param_gen_point) { if (param_gen_needed) { input_point_coordinates_ = builder_->createVariable(spv::NoPrecision, spv::StorageClassInput, type_float2_, "xe_in_point_coordinates"); builder_->addDecoration(input_point_coordinates_, spv::DecorationLocation, int(input_location)); main_interface_.push_back(input_point_coordinates_); } ++input_location; } } // Fragment coordinates. // TODO(Triang3l): More conditions - alpha to coverage (if RT 0 is written, // and there's no early depth / stencil), depth writing in the fragment shader // (per-sample if supported). if (edram_fragment_shader_interlock_ || param_gen_needed) { input_fragment_coordinates_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassInput, type_float4_, "gl_FragCoord"); builder_->addDecoration(input_fragment_coordinates_, spv::DecorationBuiltIn, spv::BuiltInFragCoord); main_interface_.push_back(input_fragment_coordinates_); } // Is front facing. if (edram_fragment_shader_interlock_ || (param_gen_needed && !GetSpirvShaderModification().pixel.param_gen_point)) { input_front_facing_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassInput, type_bool_, "gl_FrontFacing"); builder_->addDecoration(input_front_facing_, spv::DecorationBuiltIn, spv::BuiltInFrontFacing); main_interface_.push_back(input_front_facing_); } // Sample mask input. if (edram_fragment_shader_interlock_) { // SampleMask depends on SampleRateShading in some SPIR-V revisions. builder_->addCapability(spv::CapabilitySampleRateShading); input_sample_mask_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassInput, builder_->makeArrayType(type_int_, builder_->makeUintConstant(1), 0), "gl_SampleMaskIn"); builder_->addDecoration(input_sample_mask_, spv::DecorationFlat); builder_->addDecoration(input_sample_mask_, spv::DecorationBuiltIn, spv::BuiltInSampleMask); main_interface_.push_back(input_sample_mask_); } if (!is_depth_only_fragment_shader_) { // Framebuffer color attachment outputs. if (!edram_fragment_shader_interlock_) { std::fill(output_or_var_fragment_data_.begin(), output_or_var_fragment_data_.end(), spv::NoResult); static const char* const kFragmentDataOutputNames[] = { "xe_out_fragment_data_0", "xe_out_fragment_data_1", "xe_out_fragment_data_2", "xe_out_fragment_data_3", }; uint32_t color_targets_remaining = current_shader().writes_color_targets(); uint32_t color_target_index; while ( xe::bit_scan_forward(color_targets_remaining, &color_target_index)) { color_targets_remaining &= ~(UINT32_C(1) << color_target_index); spv::Id output_fragment_data_rt = builder_->createVariable( spv::NoPrecision, spv::StorageClassOutput, type_float4_, kFragmentDataOutputNames[color_target_index]); output_or_var_fragment_data_[color_target_index] = output_fragment_data_rt; builder_->addDecoration(output_fragment_data_rt, spv::DecorationLocation, int(color_target_index)); // Make invariant as pixel shaders may be used for various precise // computations. builder_->addDecoration(output_fragment_data_rt, spv::DecorationInvariant); main_interface_.push_back(output_fragment_data_rt); } } } } void SpirvShaderTranslator::StartFragmentShaderInMain() { // TODO(Triang3l): Allow memory export with resolution scaling only for the // center host pixel, with sample shading (for depth format conversion) only // for the bottom-right sample (unlike in Direct3D, the sample mask input // doesn't include covered samples of the primitive that correspond to other // invocations, so use the sample that's the most friendly to the half-pixel // offset). // Set up pixel killing from within the translated shader without affecting // the control flow (unlike with OpKill), similarly to how pixel killing works // on the Xenos, and also keeping a single critical section exit and return // for safety across different Vulkan implementations with fragment shader // interlock. if (current_shader().kills_pixels()) { if (features_.demote_to_helper_invocation) { // TODO(Triang3l): Promoted to SPIR-V 1.6 - don't add the extension there. builder_->addExtension("SPV_EXT_demote_to_helper_invocation"); builder_->addCapability(spv::CapabilityDemoteToHelperInvocationEXT); } else { var_main_kill_pixel_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_bool_, "xe_var_kill_pixel", builder_->makeBoolConstant(false)); } // For killing with fragment shader interlock when demotion is supported, // using OpIsHelperInvocationEXT to avoid allocating a variable in addition // to the execution mask GPUs naturally have. } if (edram_fragment_shader_interlock_) { // Initialize color output variables with fragment shader interlock. std::fill(output_or_var_fragment_data_.begin(), output_or_var_fragment_data_.end(), spv::NoResult); var_main_fsi_color_written_ = spv::NoResult; uint32_t color_targets_written = current_shader().writes_color_targets(); if (color_targets_written) { static const char* const kFragmentDataVariableNames[] = { "xe_var_fragment_data_0", "xe_var_fragment_data_1", "xe_var_fragment_data_2", "xe_var_fragment_data_3", }; uint32_t color_targets_remaining = color_targets_written; uint32_t color_target_index; while ( xe::bit_scan_forward(color_targets_remaining, &color_target_index)) { color_targets_remaining &= ~(UINT32_C(1) << color_target_index); output_or_var_fragment_data_[color_target_index] = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_float4_, kFragmentDataVariableNames[color_target_index], const_float4_0_); } var_main_fsi_color_written_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_uint_, "xe_var_fsi_color_written", const_uint_0_); } } if (edram_fragment_shader_interlock_ && FSI_IsDepthStencilEarly()) { spv::Id msaa_samples = LoadMsaaSamplesFromFlags(); FSI_LoadSampleMask(msaa_samples); FSI_LoadEdramOffsets(msaa_samples); builder_->createNoResultOp(spv::OpBeginInvocationInterlockEXT); FSI_DepthStencilTest(msaa_samples, false); if (!is_depth_only_fragment_shader_) { // Skip the rest of the shader if the whole quad (due to derivatives) has // failed the depth / stencil test, and there are no depth and stencil // values to conditionally write after running the shader to check if // samples don't additionally need to be discarded. spv::Id quad_needs_execution = builder_->createBinOp( spv::OpINotEqual, type_bool_, main_fsi_sample_mask_, const_uint_0_); // TODO(Triang3l): Use GroupNonUniformQuad operations where supported. // If none of the pixels in the quad passed the depth / stencil test, the // value of (any samples covered ? 1.0f : 0.0f) for the current pixel will // be 0.0f, and since it will be 0.0f in other pixels too, the derivatives // will be zero as well. builder_->addCapability(spv::CapabilityDerivativeControl); // Query the horizontally adjacent pixel. quad_needs_execution = builder_->createBinOp( spv::OpLogicalOr, type_bool_, quad_needs_execution, builder_->createBinOp( spv::OpFOrdNotEqual, type_bool_, builder_->createUnaryOp( spv::OpDPdxFine, type_float_, builder_->createTriOp(spv::OpSelect, type_float_, quad_needs_execution, const_float_1_, const_float_0_)), const_float_0_)); // Query the vertically adjacent pair of pixels. quad_needs_execution = builder_->createBinOp( spv::OpLogicalOr, type_bool_, quad_needs_execution, builder_->createBinOp( spv::OpFOrdNotEqual, type_bool_, builder_->createUnaryOp( spv::OpDPdyCoarse, type_float_, builder_->createTriOp(spv::OpSelect, type_float_, quad_needs_execution, const_float_1_, const_float_0_)), const_float_0_)); spv::Block& main_fsi_early_depth_stencil_execute_quad = builder_->makeNewBlock(); main_fsi_early_depth_stencil_execute_quad_merge_ = &builder_->makeNewBlock(); builder_->createSelectionMerge( main_fsi_early_depth_stencil_execute_quad_merge_, spv::SelectionControlDontFlattenMask); builder_->createConditionalBranch( quad_needs_execution, &main_fsi_early_depth_stencil_execute_quad, main_fsi_early_depth_stencil_execute_quad_merge_); builder_->setBuildPoint(&main_fsi_early_depth_stencil_execute_quad); } } if (is_depth_only_fragment_shader_) { return; } uint32_t param_gen_interpolator = GetPsParamGenInterpolator(); // Zero general-purpose registers to prevent crashes when the game // references them after only initializing them conditionally, and copy // interpolants to GPRs. uint32_t interpolator_mask = GetModificationInterpolatorMask(); for (uint32_t i = 0; i < register_count(); ++i) { if (i == param_gen_interpolator) { continue; } id_vector_temp_.clear(); id_vector_temp_.push_back(builder_->makeIntConstant(int(i))); builder_->createStore( (i < xenos::kMaxInterpolators && (interpolator_mask & (UINT32_C(1) << i))) ? builder_->createLoad(input_output_interpolators_[i], spv::NoPrecision) : const_float4_0_, builder_->createAccessChain(spv::StorageClassFunction, var_main_registers_, id_vector_temp_)); } // Pixel parameters. if (param_gen_interpolator != UINT32_MAX) { Modification modification = GetSpirvShaderModification(); // Rounding the position down, and taking the absolute value, so in case the // host GPU for some reason has quads used for derivative calculation at odd // locations, the left and top edges will have correct derivative magnitude // and LODs. // Assuming that if PsParamGen is needed at all, param_gen_point is always // set for point primitives, and is always disabled for other primitive // types. // OpFNegate requires sign bit flipping even for 0.0 (in this case, the // first column or row of pixels) only since SPIR-V 1.5 revision 2 (not the // base 1.5). // TODO(Triang3l): When SPIR-V 1.6 is used in Xenia, see if OpFNegate can be // used there, should be cheaper because it may be implemented as a hardware // instruction modifier, though it respects the rule for subnormal numbers - // see the actual hardware instructions in both OpBitwiseXor and OpFNegate // cases. spv::Id const_sign_bit = builder_->makeUintConstant(UINT32_C(1) << 31); // TODO(Triang3l): Resolution scale inversion. // X - pixel X .0 in the magnitude, is back-facing in the sign bit. assert_true(input_fragment_coordinates_ != spv::NoResult); id_vector_temp_.clear(); id_vector_temp_.push_back(const_int_0_); spv::Id param_gen_x = builder_->createUnaryBuiltinCall( type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, builder_->createUnaryBuiltinCall( type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor, builder_->createLoad( builder_->createAccessChain(spv::StorageClassInput, input_fragment_coordinates_, id_vector_temp_), spv::NoPrecision))); if (!modification.pixel.param_gen_point) { assert_true(input_front_facing_ != spv::NoResult); param_gen_x = builder_->createTriOp( spv::OpSelect, type_float_, builder_->createBinOp( spv::OpLogicalOr, type_bool_, builder_->createBinOp( spv::OpIEqual, type_bool_, builder_->createBinOp( spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, builder_->makeUintConstant(kSysFlag_PrimitivePolygonal)), const_uint_0_), builder_->createLoad(input_front_facing_, spv::NoPrecision)), param_gen_x, builder_->createUnaryOp( spv::OpBitcast, type_float_, builder_->createBinOp( spv::OpBitwiseXor, type_uint_, builder_->createUnaryOp(spv::OpBitcast, type_uint_, param_gen_x), const_sign_bit))); } // Y - pixel Y .0 in the magnitude, is point in the sign bit. id_vector_temp_.clear(); id_vector_temp_.push_back(builder_->makeIntConstant(1)); spv::Id param_gen_y = builder_->createUnaryBuiltinCall( type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, builder_->createUnaryBuiltinCall( type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor, builder_->createLoad( builder_->createAccessChain(spv::StorageClassInput, input_fragment_coordinates_, id_vector_temp_), spv::NoPrecision))); if (modification.pixel.param_gen_point) { param_gen_y = builder_->createUnaryOp( spv::OpBitcast, type_float_, builder_->createBinOp( spv::OpBitwiseXor, type_uint_, builder_->createUnaryOp(spv::OpBitcast, type_uint_, param_gen_y), const_sign_bit)); } // Z - point S in the magnitude, is line in the sign bit. // W - point T in the magnitude. spv::Id param_gen_z, param_gen_w; if (modification.pixel.param_gen_point) { assert_true(input_point_coordinates_ != spv::NoResult); // Saturate to avoid negative point coordinates if the center of the pixel // is not covered, and extrapolation is done. spv::Id param_gen_point_coordinates = builder_->createTriBuiltinCall( type_float2_, ext_inst_glsl_std_450_, GLSLstd450NClamp, builder_->createLoad(input_point_coordinates_, spv::NoPrecision), const_float2_0_, const_float2_1_); param_gen_z = builder_->createCompositeExtract( param_gen_point_coordinates, type_float_, 0); param_gen_w = builder_->createCompositeExtract( param_gen_point_coordinates, type_float_, 1); } else { param_gen_z = builder_->createUnaryOp( spv::OpBitcast, type_float_, builder_->createTriOp( spv::OpSelect, type_uint_, builder_->createBinOp( spv::OpINotEqual, type_bool_, builder_->createBinOp( spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, builder_->makeUintConstant(kSysFlag_PrimitiveLine)), const_uint_0_), const_sign_bit, const_uint_0_)); param_gen_w = const_float_0_; } // Store the pixel parameters. id_vector_temp_.clear(); id_vector_temp_.push_back(param_gen_x); id_vector_temp_.push_back(param_gen_y); id_vector_temp_.push_back(param_gen_z); id_vector_temp_.push_back(param_gen_w); spv::Id param_gen = builder_->createCompositeConstruct(type_float4_, id_vector_temp_); id_vector_temp_.clear(); id_vector_temp_.push_back( builder_->makeIntConstant(int(param_gen_interpolator))); builder_->createStore(param_gen, builder_->createAccessChain( spv::StorageClassFunction, var_main_registers_, id_vector_temp_)); } if (!edram_fragment_shader_interlock_) { // Initialize the colors for safety. for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { spv::Id output_fragment_data_rt = output_or_var_fragment_data_[i]; if (output_fragment_data_rt != spv::NoResult) { builder_->createStore(const_float4_0_, output_fragment_data_rt); } } } } void SpirvShaderTranslator::UpdateExecConditionals( ParsedExecInstruction::Type type, uint32_t bool_constant_index, bool condition) { // Check if we can merge the new exec with the previous one, or the jump with // the previous exec. The instruction-level predicate check is also merged in // this case. if (type == ParsedExecInstruction::Type::kConditional) { // Can merge conditional with conditional, as long as the bool constant and // the expected values are the same. if (cf_exec_conditional_merge_ && cf_exec_bool_constant_or_predicate_ == bool_constant_index && cf_exec_condition_ == condition) { return; } } else if (type == ParsedExecInstruction::Type::kPredicated) { // Can merge predicated with predicated if the conditions are the same and // the previous exec hasn't modified the predicate register. if (!cf_exec_predicate_written_ && cf_exec_conditional_merge_ && cf_exec_bool_constant_or_predicate_ == kCfExecBoolConstantPredicate && cf_exec_condition_ == condition) { return; } } else { // Can merge unconditional with unconditional. assert_true(type == ParsedExecInstruction::Type::kUnconditional); if (!cf_exec_conditional_merge_) { return; } } CloseExecConditionals(); if (type == ParsedExecInstruction::Type::kUnconditional) { return; } EnsureBuildPointAvailable(); spv::Id condition_id; if (type == ParsedExecInstruction::Type::kConditional) { id_vector_temp_.clear(); // Bool constants (member 0). id_vector_temp_.push_back(const_int_0_); // 128-bit vector. id_vector_temp_.push_back( builder_->makeIntConstant(int(bool_constant_index >> 7))); // 32-bit scalar of a 128-bit vector. id_vector_temp_.push_back( builder_->makeIntConstant(int((bool_constant_index >> 5) & 3))); spv::Id bool_constant_scalar = builder_->createLoad(builder_->createAccessChain( spv::StorageClassUniform, uniform_bool_loop_constants_, id_vector_temp_), spv::NoPrecision); condition_id = builder_->createBinOp( spv::OpINotEqual, type_bool_, builder_->createBinOp( spv::OpBitwiseAnd, type_uint_, bool_constant_scalar, builder_->makeUintConstant(uint32_t(1) << (bool_constant_index & 31))), const_uint_0_); cf_exec_bool_constant_or_predicate_ = bool_constant_index; } else if (type == ParsedExecInstruction::Type::kPredicated) { condition_id = builder_->createLoad(var_main_predicate_, spv::NoPrecision); cf_exec_bool_constant_or_predicate_ = kCfExecBoolConstantPredicate; } else { assert_unhandled_case(type); return; } cf_exec_condition_ = condition; cf_exec_conditional_merge_ = new spv::Block( builder_->getUniqueId(), builder_->getBuildPoint()->getParent()); builder_->createSelectionMerge(cf_exec_conditional_merge_, spv::SelectionControlDontFlattenMask); spv::Block& inner_block = builder_->makeNewBlock(); builder_->createConditionalBranch( condition_id, condition ? &inner_block : cf_exec_conditional_merge_, condition ? cf_exec_conditional_merge_ : &inner_block); builder_->setBuildPoint(&inner_block); } void SpirvShaderTranslator::UpdateInstructionPredication(bool predicated, bool condition) { if (!predicated) { CloseInstructionPredication(); return; } if (cf_instruction_predicate_merge_) { if (cf_instruction_predicate_condition_ == condition) { // Already in the needed instruction-level conditional. return; } CloseInstructionPredication(); } // If the instruction predicate condition is the same as the exec predicate // condition, no need to open a check. However, if there was a `setp` prior // to this instruction, the predicate value now may be different than it was // in the beginning of the exec. if (!cf_exec_predicate_written_ && cf_exec_conditional_merge_ && cf_exec_bool_constant_or_predicate_ == kCfExecBoolConstantPredicate && cf_exec_condition_ == condition) { return; } cf_instruction_predicate_condition_ = condition; EnsureBuildPointAvailable(); spv::Id predicate_id = builder_->createLoad(var_main_predicate_, spv::NoPrecision); spv::Block& predicated_block = builder_->makeNewBlock(); cf_instruction_predicate_merge_ = new spv::Block( builder_->getUniqueId(), builder_->getBuildPoint()->getParent()); builder_->createSelectionMerge(cf_instruction_predicate_merge_, spv::SelectionControlMaskNone); builder_->createConditionalBranch( predicate_id, condition ? &predicated_block : cf_instruction_predicate_merge_, condition ? cf_instruction_predicate_merge_ : &predicated_block); builder_->setBuildPoint(&predicated_block); } void SpirvShaderTranslator::CloseInstructionPredication() { if (!cf_instruction_predicate_merge_) { return; } spv::Block& inner_block = *builder_->getBuildPoint(); if (!inner_block.isTerminated()) { builder_->createBranch(cf_instruction_predicate_merge_); } inner_block.getParent().addBlock(cf_instruction_predicate_merge_); builder_->setBuildPoint(cf_instruction_predicate_merge_); cf_instruction_predicate_merge_ = nullptr; } void SpirvShaderTranslator::CloseExecConditionals() { // Within the exec - instruction-level predicate check. CloseInstructionPredication(); // Exec level. if (cf_exec_conditional_merge_) { spv::Block& inner_block = *builder_->getBuildPoint(); if (!inner_block.isTerminated()) { builder_->createBranch(cf_exec_conditional_merge_); } inner_block.getParent().addBlock(cf_exec_conditional_merge_); builder_->setBuildPoint(cf_exec_conditional_merge_); cf_exec_conditional_merge_ = nullptr; } // Nothing relies on the predicate value being unchanged now. cf_exec_predicate_written_ = false; } spv::Id SpirvShaderTranslator::GetStorageAddressingIndex( InstructionStorageAddressingMode addressing_mode, uint32_t storage_index, bool is_float_constant) { const Shader::ConstantRegisterMap& constant_register_map = current_shader().constant_register_map(); EnsureBuildPointAvailable(); spv::Id base_pointer = spv::NoResult; switch (addressing_mode) { case InstructionStorageAddressingMode::kAbsolute: { uint32_t static_storage_index = storage_index; if (is_float_constant) { static_storage_index = constant_register_map.GetPackedFloatConstantIndex(storage_index); assert_true(static_storage_index != UINT32_MAX); if (static_storage_index == UINT32_MAX) { static_storage_index = 0; } } return builder_->makeIntConstant(int(static_storage_index)); } case InstructionStorageAddressingMode::kAddressRegisterRelative: base_pointer = var_main_address_register_; break; case InstructionStorageAddressingMode::kLoopRelative: // Load X component. id_vector_temp_util_.clear(); id_vector_temp_util_.push_back(const_int_0_); base_pointer = builder_->createAccessChain(spv::StorageClassFunction, var_main_loop_address_, id_vector_temp_util_); break; } assert_true(!is_float_constant || constant_register_map.float_dynamic_addressing); assert_true(base_pointer != spv::NoResult); spv::Id index = builder_->createLoad(base_pointer, spv::NoPrecision); if (storage_index) { index = builder_->createBinOp(spv::OpIAdd, type_int_, index, builder_->makeIntConstant(int(storage_index))); } return index; } spv::Id SpirvShaderTranslator::LoadOperandStorage( const InstructionOperand& operand) { spv::Id index = GetStorageAddressingIndex( operand.storage_addressing_mode, operand.storage_index, operand.storage_source == InstructionStorageSource::kConstantFloat); EnsureBuildPointAvailable(); spv::Id vec4_pointer = spv::NoResult; switch (operand.storage_source) { case InstructionStorageSource::kRegister: assert_true(var_main_registers_ != spv::NoResult); id_vector_temp_util_.clear(); // Array element. id_vector_temp_util_.push_back(index); vec4_pointer = builder_->createAccessChain( spv::StorageClassFunction, var_main_registers_, id_vector_temp_util_); break; case InstructionStorageSource::kConstantFloat: assert_true(uniform_float_constants_ != spv::NoResult); id_vector_temp_util_.clear(); // The first and the only structure member. id_vector_temp_util_.push_back(const_int_0_); // Array element. id_vector_temp_util_.push_back(index); vec4_pointer = builder_->createAccessChain(spv::StorageClassUniform, uniform_float_constants_, id_vector_temp_util_); break; default: assert_unhandled_case(operand.storage_source); } assert_true(vec4_pointer != spv::NoResult); return builder_->createLoad(vec4_pointer, spv::NoPrecision); } spv::Id SpirvShaderTranslator::ApplyOperandModifiers( spv::Id operand_value, const InstructionOperand& original_operand, bool invert_negate, bool force_absolute) { spv::Id type = builder_->getTypeId(operand_value); assert_true(type != spv::NoType); if (type == spv::NoType) { return operand_value; } if (original_operand.is_absolute_value || force_absolute) { EnsureBuildPointAvailable(); operand_value = builder_->createUnaryBuiltinCall( type, ext_inst_glsl_std_450_, GLSLstd450FAbs, operand_value); } if (original_operand.is_negated != invert_negate) { EnsureBuildPointAvailable(); operand_value = builder_->createNoContractionUnaryOp(spv::OpFNegate, type, operand_value); } return operand_value; } spv::Id SpirvShaderTranslator::GetUnmodifiedOperandComponents( spv::Id operand_storage, const InstructionOperand& original_operand, uint32_t components) { assert_not_zero(components); if (!components) { return spv::NoResult; } assert_true(components <= 0b1111); if (components == 0b1111 && original_operand.IsStandardSwizzle()) { return operand_storage; } EnsureBuildPointAvailable(); uint32_t component_count = xe::bit_count(components); if (component_count == 1) { uint32_t scalar_index; xe::bit_scan_forward(components, &scalar_index); return builder_->createCompositeExtract( operand_storage, type_float_, static_cast(original_operand.GetComponent(scalar_index)) - static_cast(SwizzleSource::kX)); } uint_vector_temp_util_.clear(); uint32_t components_remaining = components; uint32_t component_index; while (xe::bit_scan_forward(components_remaining, &component_index)) { components_remaining &= ~(uint32_t(1) << component_index); uint_vector_temp_util_.push_back( static_cast( original_operand.GetComponent(component_index)) - static_cast(SwizzleSource::kX)); } return builder_->createRvalueSwizzle(spv::NoPrecision, type_float_vectors_[component_count - 1], operand_storage, uint_vector_temp_util_); } void SpirvShaderTranslator::GetOperandScalarXY( spv::Id operand_storage, const InstructionOperand& original_operand, spv::Id& a_out, spv::Id& b_out, bool invert_negate, bool force_absolute) { spv::Id a = GetOperandComponents(operand_storage, original_operand, 0b0001, invert_negate, force_absolute); a_out = a; b_out = original_operand.GetComponent(0) != original_operand.GetComponent(1) ? GetOperandComponents(operand_storage, original_operand, 0b0010, invert_negate, force_absolute) : a; } spv::Id SpirvShaderTranslator::GetAbsoluteOperand( spv::Id operand_storage, const InstructionOperand& original_operand) { if (original_operand.is_absolute_value && !original_operand.is_negated) { return operand_storage; } EnsureBuildPointAvailable(); return builder_->createUnaryBuiltinCall(builder_->getTypeId(operand_storage), ext_inst_glsl_std_450_, GLSLstd450FAbs, operand_storage); } void SpirvShaderTranslator::StoreResult(const InstructionResult& result, spv::Id value) { uint32_t used_write_mask = result.GetUsedWriteMask(); if (!used_write_mask) { return; } EnsureBuildPointAvailable(); spv::Id target_pointer = spv::NoResult; switch (result.storage_target) { case InstructionStorageTarget::kNone: break; case InstructionStorageTarget::kRegister: { assert_true(var_main_registers_ != spv::NoResult); // Must call GetStorageAddressingIndex first because of // id_vector_temp_util_ usage in it. spv::Id register_index = GetStorageAddressingIndex( result.storage_addressing_mode, result.storage_index); id_vector_temp_util_.clear(); // Array element. id_vector_temp_util_.push_back(register_index); target_pointer = builder_->createAccessChain( spv::StorageClassFunction, var_main_registers_, id_vector_temp_util_); } break; case InstructionStorageTarget::kInterpolator: { assert_true(is_vertex_shader()); target_pointer = input_output_interpolators_[result.storage_index]; // Unused interpolators are spv::NoResult in input_output_interpolators_. } break; case InstructionStorageTarget::kPosition: { assert_true(is_vertex_shader()); id_vector_temp_util_.clear(); id_vector_temp_util_.push_back( builder_->makeIntConstant(kOutputPerVertexMemberPosition)); target_pointer = builder_->createAccessChain( spv::StorageClassOutput, output_per_vertex_, id_vector_temp_util_); } break; case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex: { assert_true(is_vertex_shader()); assert_zero(used_write_mask & 0b1000); target_pointer = var_main_point_size_edge_flag_kill_vertex_; } break; case InstructionStorageTarget::kColor: { assert_true(is_pixel_shader()); assert_not_zero(used_write_mask); assert_true(current_shader().writes_color_target(result.storage_index)); target_pointer = output_or_var_fragment_data_[result.storage_index]; if (edram_fragment_shader_interlock_) { assert_true(var_main_fsi_color_written_ != spv::NoResult); builder_->createStore( builder_->createBinOp( spv::OpBitwiseOr, type_uint_, builder_->createLoad(var_main_fsi_color_written_, spv::NoPrecision), builder_->makeUintConstant(uint32_t(1) << result.storage_index)), var_main_fsi_color_written_); } } break; case InstructionStorageTarget::kExportAddress: { // spv::NoResult if memory export usage is unsupported or invalid. target_pointer = var_main_memexport_address_; } break; case InstructionStorageTarget::kExportData: { // spv::NoResult if memory export usage is unsupported or invalid. target_pointer = var_main_memexport_data_[result.storage_index]; if (target_pointer != spv::NoResult) { // Mark that the eM# has been written to and needs to be exported. assert_true(var_main_memexport_data_written_ != spv::NoResult); builder_->createStore( builder_->createBinOp( spv::OpBitwiseOr, type_uint_, builder_->createLoad(var_main_memexport_data_written_, spv::NoPrecision), builder_->makeUintConstant(uint32_t(1) << result.storage_index)), var_main_memexport_data_written_); } } break; default: // TODO(Triang3l): All storage targets. break; } if (target_pointer == spv::NoResult) { return; } uint32_t constant_values; uint32_t constant_components = result.GetUsedConstantComponents(constant_values); if (value == spv::NoResult) { // The instruction processing function decided that nothing useful needs to // be stored for some reason, however, some components still need to be // written on the guest side - fill them with zeros. constant_components = used_write_mask; } uint32_t non_constant_components = used_write_mask & ~constant_components; unsigned int value_num_components = value != spv::NoResult ? static_cast(builder_->getNumComponents(value)) : 0; if (result.is_clamped && non_constant_components) { // Apply the saturation modifier to the result. value = builder_->createTriBuiltinCall( type_float_vectors_[value_num_components - 1], ext_inst_glsl_std_450_, GLSLstd450NClamp, value, const_float_vectors_0_[value_num_components - 1], const_float_vectors_1_[value_num_components - 1]); } // The value contains either result.GetUsedResultComponents() in a condensed // way, or a scalar to be replicated. Decompress them to create a mapping from // guest result components to the ones in the value vector. uint32_t used_result_components = result.GetUsedResultComponents(); unsigned int result_unswizzled_value_components[4] = {}; if (value_num_components > 1) { unsigned int value_component = 0; uint32_t used_result_components_remaining = used_result_components; uint32_t result_component; while (xe::bit_scan_forward(used_result_components_remaining, &result_component)) { used_result_components_remaining &= ~(uint32_t(1) << result_component); result_unswizzled_value_components[result_component] = std::min(value_component++, value_num_components - 1); } } // Get swizzled mapping of non-constant components to the components of // `value`. unsigned int result_swizzled_value_components[4] = {}; for (uint32_t i = 0; i < 4; ++i) { if (!(non_constant_components & (1 << i))) { continue; } SwizzleSource swizzle = result.components[i]; assert_true(swizzle >= SwizzleSource::kX && swizzle <= SwizzleSource::kW); result_swizzled_value_components[i] = result_unswizzled_value_components[uint32_t(swizzle) - uint32_t(SwizzleSource::kX)]; } spv::Id target_type = builder_->getDerefTypeId(target_pointer); unsigned int target_num_components = builder_->getNumTypeComponents(target_type); assert_true( target_num_components == GetInstructionStorageTargetUsedComponentCount(result.storage_target)); uint32_t target_component_mask = (1 << target_num_components) - 1; assert_zero(used_write_mask & ~target_component_mask); spv::Id value_to_store; if (target_component_mask == used_write_mask) { // All components are overwritten - no need to load the original value. // Possible cases: // * Non-constants only. // * Vector target. // * Vector source. // * Identity swizzle - store directly. // * Non-identity swizzle - shuffle. // * Scalar source - smear. // * Scalar target. // * Vector source - extract. // * Scalar source - store directly. // * Constants only. // * Vector target - make composite constant. // * Scalar target - store directly. // * Mixed non-constants and constants (only for vector targets - scalar // targets fully covered by the previous cases). // * Vector source - shuffle with {0, 1} also applying swizzle. // * Scalar source - construct composite. if (!constant_components) { if (target_num_components > 1) { if (value_num_components > 1) { // Non-constants only - vector target, vector source. bool is_identity_swizzle = target_num_components == value_num_components; for (uint32_t i = 0; is_identity_swizzle && i < target_num_components; ++i) { is_identity_swizzle &= result_swizzled_value_components[i] == i; } if (is_identity_swizzle) { value_to_store = value; } else { uint_vector_temp_util_.clear(); uint_vector_temp_util_.insert( uint_vector_temp_util_.cend(), result_swizzled_value_components, result_swizzled_value_components + target_num_components); value_to_store = builder_->createRvalueSwizzle( spv::NoPrecision, target_type, value, uint_vector_temp_util_); } } else { // Non-constants only - vector target, scalar source. value_to_store = builder_->smearScalar(spv::NoPrecision, value, target_type); } } else { if (value_num_components > 1) { // Non-constants only - scalar target, vector source. value_to_store = builder_->createCompositeExtract( value, type_float_, result_swizzled_value_components[0]); } else { // Non-constants only - scalar target, scalar source. value_to_store = value; } } } else if (!non_constant_components) { if (target_num_components > 1) { // Constants only - vector target. id_vector_temp_util_.clear(); for (uint32_t i = 0; i < target_num_components; ++i) { id_vector_temp_util_.push_back( (constant_values & (1 << i)) ? const_float_1_ : const_float_0_); } value_to_store = builder_->makeCompositeConstant(target_type, id_vector_temp_util_); } else { // Constants only - scalar target. value_to_store = (constant_values & 0b0001) ? const_float_1_ : const_float_0_; } } else { assert_true(target_num_components > 1); if (value_num_components > 1) { // Mixed non-constants and constants - vector source. std::unique_ptr shuffle_op = std::make_unique( builder_->getUniqueId(), target_type, spv::OpVectorShuffle); shuffle_op->addIdOperand(value); shuffle_op->addIdOperand(const_float2_0_1_); for (uint32_t i = 0; i < target_num_components; ++i) { shuffle_op->addImmediateOperand( (constant_components & (1 << i)) ? value_num_components + ((constant_values >> i) & 1) : result_swizzled_value_components[i]); } value_to_store = shuffle_op->getResultId(); builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); } else { // Mixed non-constants and constants - scalar source. id_vector_temp_util_.clear(); for (uint32_t i = 0; i < target_num_components; ++i) { if (constant_components & (1 << i)) { id_vector_temp_util_.push_back( (constant_values & (1 << i)) ? const_float_1_ : const_float_0_); } else { id_vector_temp_util_.push_back(value); } } value_to_store = builder_->createCompositeConstruct( target_type, id_vector_temp_util_); } } } else { // Only certain components are overwritten. // Scalar targets are always overwritten fully, can't reach this case for // them. assert_true(target_num_components > 1); value_to_store = builder_->createLoad(target_pointer, spv::NoPrecision); // Two steps: // 1) Insert constants by shuffling (first so dependency chain of step 2 is // simpler if constants are written first). // 2) Insert value components - via shuffling for vector source, via // composite inserts for scalar value. if (constant_components) { std::unique_ptr shuffle_op = std::make_unique(builder_->getUniqueId(), target_type, spv::OpVectorShuffle); shuffle_op->addIdOperand(value_to_store); shuffle_op->addIdOperand(const_float2_0_1_); for (uint32_t i = 0; i < target_num_components; ++i) { shuffle_op->addImmediateOperand((constant_components & (1 << i)) ? target_num_components + ((constant_values >> i) & 1) : i); } value_to_store = shuffle_op->getResultId(); builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); } if (non_constant_components) { if (value_num_components > 1) { std::unique_ptr shuffle_op = std::make_unique( builder_->getUniqueId(), target_type, spv::OpVectorShuffle); shuffle_op->addIdOperand(value_to_store); shuffle_op->addIdOperand(value); for (uint32_t i = 0; i < target_num_components; ++i) { shuffle_op->addImmediateOperand( (non_constant_components & (1 << i)) ? target_num_components + result_swizzled_value_components[i] : i); } value_to_store = shuffle_op->getResultId(); builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); } else { for (uint32_t i = 0; i < target_num_components; ++i) { if (non_constant_components & (1 << i)) { value_to_store = builder_->createCompositeInsert( value, value_to_store, target_type, i); } } } } } if (result.storage_target == InstructionStorageTarget::kPointSizeEdgeFlagKillVertex && used_write_mask & 0b001) { // Make the point size non-negative as negative is used to indicate that the // default size must be used, and also clamp it to the bounds the way the // R400 (Adreno 200, to be more precise) hardware clamps it (functionally // like a signed 32-bit integer, -NaN and -Infinity...-0 to the minimum, // +NaN to the maximum). spv::Id point_size = builder_->createUnaryOp( spv::OpBitcast, type_int_, builder_->createCompositeExtract(value_to_store, type_float_, 0)); id_vector_temp_util_.clear(); id_vector_temp_util_.push_back( builder_->makeIntConstant(kSystemConstantPointVertexDiameterMin)); spv::Id point_vertex_diameter_min = builder_->createUnaryOp( spv::OpBitcast, type_int_, builder_->createLoad( builder_->createAccessChain(spv::StorageClassUniform, uniform_system_constants_, id_vector_temp_util_), spv::NoPrecision)); point_size = builder_->createBinBuiltinCall( type_int_, ext_inst_glsl_std_450_, GLSLstd450SMax, point_vertex_diameter_min, point_size); id_vector_temp_util_.clear(); id_vector_temp_util_.push_back( builder_->makeIntConstant(kSystemConstantPointVertexDiameterMax)); spv::Id point_vertex_diameter_max = builder_->createUnaryOp( spv::OpBitcast, type_int_, builder_->createLoad( builder_->createAccessChain(spv::StorageClassUniform, uniform_system_constants_, id_vector_temp_util_), spv::NoPrecision)); point_size = builder_->createBinBuiltinCall( type_int_, ext_inst_glsl_std_450_, GLSLstd450SMin, point_vertex_diameter_max, point_size); value_to_store = builder_->createCompositeInsert( builder_->createUnaryOp(spv::OpBitcast, type_float_, point_size), value_to_store, type_float3_, 0); } builder_->createStore(value_to_store, target_pointer); } spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) { spv::Id type = builder_->getTypeId(value); spv::Id const_uint_8_scalar = builder_->makeUintConstant(8); spv::Id const_uint_00ff00ff_scalar = builder_->makeUintConstant(0x00FF00FF); spv::Id const_uint_16_scalar = builder_->makeUintConstant(16); spv::Id const_uint_8_typed, const_uint_00ff00ff_typed, const_uint_16_typed; int num_components = builder_->getNumTypeComponents(type); if (num_components > 1) { id_vector_temp_.clear(); id_vector_temp_.insert(id_vector_temp_.cend(), num_components, const_uint_8_scalar); const_uint_8_typed = builder_->makeCompositeConstant(type, id_vector_temp_); id_vector_temp_.clear(); id_vector_temp_.insert(id_vector_temp_.cend(), num_components, const_uint_00ff00ff_scalar); const_uint_00ff00ff_typed = builder_->makeCompositeConstant(type, id_vector_temp_); id_vector_temp_.clear(); id_vector_temp_.insert(id_vector_temp_.cend(), num_components, const_uint_16_scalar); const_uint_16_typed = builder_->makeCompositeConstant(type, id_vector_temp_); } else { const_uint_8_typed = const_uint_8_scalar; const_uint_00ff00ff_typed = const_uint_00ff00ff_scalar; const_uint_16_typed = const_uint_16_scalar; } // 8-in-16 or one half of 8-in-32 (doing 8-in-16 swap). spv::Id is_8in16 = builder_->createBinOp( spv::OpIEqual, type_bool_, endian, builder_->makeUintConstant( static_cast(xenos::Endian::k8in16))); spv::Id is_8in32 = builder_->createBinOp( spv::OpIEqual, type_bool_, endian, builder_->makeUintConstant( static_cast(xenos::Endian::k8in32))); spv::Id is_8in16_or_8in32 = builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in16, is_8in32); SpirvBuilder::IfBuilder if_8in16(is_8in16_or_8in32, spv::SelectionControlMaskNone, *builder_); spv::Id swapped_8in16; { swapped_8in16 = builder_->createBinOp( spv::OpBitwiseOr, type, builder_->createBinOp( spv::OpBitwiseAnd, type, builder_->createBinOp(spv::OpShiftRightLogical, type, value, const_uint_8_typed), const_uint_00ff00ff_typed), builder_->createBinOp( spv::OpShiftLeftLogical, type, builder_->createBinOp(spv::OpBitwiseAnd, type, value, const_uint_00ff00ff_typed), const_uint_8_typed)); } if_8in16.makeEndIf(); value = if_8in16.createMergePhi(swapped_8in16, value); // 16-in-32 or another half of 8-in-32 (doing 16-in-32 swap). spv::Id is_16in32 = builder_->createBinOp( spv::OpIEqual, type_bool_, endian, builder_->makeUintConstant( static_cast(xenos::Endian::k16in32))); spv::Id is_8in32_or_16in32 = builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in32, is_16in32); SpirvBuilder::IfBuilder if_16in32(is_8in32_or_16in32, spv::SelectionControlMaskNone, *builder_); spv::Id swapped_16in32; { swapped_16in32 = builder_->createQuadOp( spv::OpBitFieldInsert, type, builder_->createBinOp(spv::OpShiftRightLogical, type, value, const_uint_16_typed), value, builder_->makeIntConstant(16), builder_->makeIntConstant(16)); } if_16in32.makeEndIf(); value = if_16in32.createMergePhi(swapped_16in32, value); return value; } spv::Id SpirvShaderTranslator::EndianSwap128Uint4(spv::Id value, spv::Id endian) { // Change 8-in-64 and 8-in-128 to 8-in-32, and then swap within 32 bits. spv::Id is_8in64 = builder_->createBinOp( spv::OpIEqual, type_bool_, endian, builder_->makeUintConstant( static_cast(xenos::Endian128::k8in64))); uint_vector_temp_.clear(); uint_vector_temp_.push_back(1); uint_vector_temp_.push_back(0); uint_vector_temp_.push_back(3); uint_vector_temp_.push_back(2); value = builder_->createTriOp( spv::OpSelect, type_uint4_, is_8in64, builder_->createRvalueSwizzle(spv::NoPrecision, type_uint4_, value, uint_vector_temp_), value); spv::Id is_8in128 = builder_->createBinOp( spv::OpIEqual, type_bool_, endian, builder_->makeUintConstant( static_cast(xenos::Endian128::k8in128))); uint_vector_temp_.clear(); uint_vector_temp_.push_back(3); uint_vector_temp_.push_back(2); uint_vector_temp_.push_back(1); uint_vector_temp_.push_back(0); value = builder_->createTriOp( spv::OpSelect, type_uint4_, is_8in128, builder_->createRvalueSwizzle(spv::NoPrecision, type_uint4_, value, uint_vector_temp_), value); endian = builder_->createTriOp( spv::OpSelect, type_uint_, builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in64, is_8in128), builder_->makeUintConstant( static_cast(xenos::Endian128::k8in32)), endian); return EndianSwap32Uint(value, endian); } spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory( spv::Id address_dwords_int) { spv::StorageClass storage_class = features_.spirv_version >= spv::Spv_1_3 ? spv::StorageClassStorageBuffer : spv::StorageClassUniform; uint32_t binding_count_log2 = GetSharedMemoryStorageBufferCountLog2(); if (!binding_count_log2) { // Single binding - load directly. id_vector_temp_.clear(); // The only SSBO struct member. id_vector_temp_.push_back(const_int_0_); id_vector_temp_.push_back(address_dwords_int); return builder_->createLoad( builder_->createAccessChain(storage_class, buffers_shared_memory_, id_vector_temp_), spv::NoPrecision); } // The memory is split into multiple bindings - check which binding to load // from. 29 is log2(512 MB), but addressing in dwords (4 B). Not indexing the // array with the variable itself because it needs non-uniform storage buffer // indexing. uint32_t binding_address_bits = (29 - 2) - binding_count_log2; spv::Id binding_index = builder_->createBinOp( spv::OpShiftRightLogical, type_uint_, builder_->createUnaryOp(spv::OpBitcast, type_uint_, address_dwords_int), builder_->makeUintConstant(binding_address_bits)); spv::Id binding_address = builder_->createBinOp( spv::OpBitwiseAnd, type_int_, address_dwords_int, builder_->makeIntConstant( int((uint32_t(1) << binding_address_bits) - 1))); auto value_phi_op = std::make_unique( builder_->getUniqueId(), type_uint_, spv::OpPhi); // Zero if out of bounds. value_phi_op->addIdOperand(const_uint_0_); value_phi_op->addIdOperand(builder_->getBuildPoint()->getId()); SpirvBuilder::SwitchBuilder binding_switch( binding_index, spv::SelectionControlDontFlattenMask, *builder_); uint32_t binding_count = uint32_t(1) << binding_count_log2; id_vector_temp_.clear(); id_vector_temp_.push_back(spv::NoResult); // The only SSBO struct member. id_vector_temp_.push_back(const_int_0_); id_vector_temp_.push_back(binding_address); for (uint32_t i = 0; i < binding_count; ++i) { binding_switch.makeBeginCase(i); id_vector_temp_[0] = builder_->makeIntConstant(int(i)); value_phi_op->addIdOperand(builder_->createLoad( builder_->createAccessChain(storage_class, buffers_shared_memory_, id_vector_temp_), spv::NoPrecision)); value_phi_op->addIdOperand(builder_->getBuildPoint()->getId()); } binding_switch.makeEndSwitch(); spv::Id value_phi_result = value_phi_op->getResultId(); builder_->getBuildPoint()->addInstruction(std::move(value_phi_op)); return value_phi_result; } void SpirvShaderTranslator::StoreUint32ToSharedMemory( spv::Id value, spv::Id address_dwords_int, spv::Id replace_mask) { spv::StorageClass storage_class = features_.spirv_version >= spv::Spv_1_3 ? spv::StorageClassStorageBuffer : spv::StorageClassUniform; spv::Id keep_mask = spv::NoResult; if (replace_mask != spv::NoResult) { keep_mask = builder_->createUnaryOp(spv::OpNot, type_uint_, replace_mask); value = builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, value, replace_mask); } auto store = [&](spv::Id pointer) { if (replace_mask != spv::NoResult) { // Don't touch the other bits in the buffer, just modify the needed bits // in the most up to date uint32 at the address. spv::Id const_scope_device = builder_->makeUintConstant( static_cast(spv::ScopeDevice)); spv::Id const_semantics_relaxed = const_uint_0_; builder_->createQuadOp(spv::OpAtomicAnd, type_uint_, pointer, const_scope_device, const_semantics_relaxed, keep_mask); builder_->createQuadOp(spv::OpAtomicOr, type_uint_, pointer, const_scope_device, const_semantics_relaxed, value); } else { builder_->createStore(value, pointer); } }; uint32_t binding_count_log2 = GetSharedMemoryStorageBufferCountLog2(); if (!binding_count_log2) { // Single binding - store directly. id_vector_temp_.clear(); // The only SSBO struct member. id_vector_temp_.push_back(const_int_0_); id_vector_temp_.push_back(address_dwords_int); store(builder_->createAccessChain(storage_class, buffers_shared_memory_, id_vector_temp_)); return; } // The memory is split into multiple bindings - check which binding to store // to. 29 is log2(512 MB), but addressing in dwords (4 B). Not indexing the // array with the variable itself because it needs non-uniform storage buffer // indexing. uint32_t binding_address_bits = (29 - 2) - binding_count_log2; spv::Id binding_index = builder_->createBinOp( spv::OpShiftRightLogical, type_uint_, builder_->createUnaryOp(spv::OpBitcast, type_uint_, address_dwords_int), builder_->makeUintConstant(binding_address_bits)); spv::Id binding_address = builder_->createBinOp( spv::OpBitwiseAnd, type_int_, address_dwords_int, builder_->makeIntConstant( int((uint32_t(1) << binding_address_bits) - 1))); SpirvBuilder::SwitchBuilder binding_switch( binding_index, spv::SelectionControlDontFlattenMask, *builder_); uint32_t binding_count = uint32_t(1) << binding_count_log2; id_vector_temp_.clear(); id_vector_temp_.push_back(spv::NoResult); // The only SSBO struct member. id_vector_temp_.push_back(const_int_0_); id_vector_temp_.push_back(binding_address); for (uint32_t i = 0; i < binding_count; ++i) { binding_switch.makeBeginCase(i); id_vector_temp_[0] = builder_->makeIntConstant(int(i)); store(builder_->createAccessChain(storage_class, buffers_shared_memory_, id_vector_temp_)); } binding_switch.makeEndSwitch(); } spv::Id SpirvShaderTranslator::PWLGammaToLinear(spv::Id gamma, bool gamma_pre_saturated) { spv::Id value_type = builder_->getTypeId(gamma); assert_true(builder_->isFloatType(builder_->getScalarTypeId(value_type))); bool is_vector = builder_->isVectorType(value_type); assert_true(is_vector || builder_->isFloatType(value_type)); int num_components = builder_->getNumTypeComponents(value_type); assert_true(num_components < 4); spv::Id bool_type = type_bool_vectors_[num_components - 1]; spv::Id const_vector_0 = const_float_vectors_0_[num_components - 1]; spv::Id const_vector_1 = SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(1.0f), value_type); if (!gamma_pre_saturated) { // Saturate, flushing NaN to 0. gamma = builder_->createTriBuiltinCall(value_type, ext_inst_glsl_std_450_, GLSLstd450NClamp, gamma, const_vector_0, const_vector_1); } spv::Id is_piece_at_least_3 = builder_->createBinOp( spv::OpFOrdGreaterThanEqual, bool_type, gamma, SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(192.0f / 255.0f), value_type)); spv::Id scale_3_or_2 = builder_->createTriOp( spv::OpSelect, value_type, is_piece_at_least_3, SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(8.0f / 1024.0f), value_type), SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(4.0f / 1024.0f), value_type)); spv::Id offset_3_or_2 = builder_->createTriOp( spv::OpSelect, value_type, is_piece_at_least_3, SpirvSmearScalarResultOrConstant(builder_->makeFloatConstant(-1024.0f), value_type), SpirvSmearScalarResultOrConstant(builder_->makeFloatConstant(-256.0f), value_type)); spv::Id is_piece_at_least_1 = builder_->createBinOp( spv::OpFOrdGreaterThanEqual, bool_type, gamma, SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(64.0f / 255.0f), value_type)); spv::Id scale_1_or_0 = builder_->createTriOp( spv::OpSelect, value_type, is_piece_at_least_1, SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(2.0f / 1024.0f), value_type), SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(1.0f / 1024.0f), value_type)); spv::Id offset_1_or_0 = builder_->createTriOp( spv::OpSelect, value_type, is_piece_at_least_1, SpirvSmearScalarResultOrConstant(builder_->makeFloatConstant(-64.0f), value_type), const_vector_0); spv::Id is_piece_at_least_2 = builder_->createBinOp( spv::OpFOrdGreaterThanEqual, bool_type, gamma, SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(96.0f / 255.0f), value_type)); spv::Id scale = builder_->createTriOp(spv::OpSelect, value_type, is_piece_at_least_2, scale_3_or_2, scale_1_or_0); spv::Id offset = builder_->createTriOp(spv::OpSelect, value_type, is_piece_at_least_2, offset_3_or_2, offset_1_or_0); spv::Op value_times_scalar_opcode = is_vector ? spv::OpVectorTimesScalar : spv::OpFMul; // linear = gamma * (255.0f * 1024.0f) * scale + offset spv::Id linear = builder_->createNoContractionBinOp( spv::OpFAdd, value_type, builder_->createNoContractionBinOp( spv::OpFMul, value_type, builder_->createNoContractionBinOp( value_times_scalar_opcode, value_type, gamma, builder_->makeFloatConstant(255.0f * 1024.0f)), scale), offset); // linear += trunc(linear * scale) linear = builder_->createNoContractionBinOp( spv::OpFAdd, value_type, linear, builder_->createUnaryBuiltinCall( value_type, ext_inst_glsl_std_450_, GLSLstd450Trunc, builder_->createNoContractionBinOp(spv::OpFMul, value_type, linear, scale))); // linear *= 1.0f / 1023.0f linear = builder_->createNoContractionBinOp( value_times_scalar_opcode, value_type, linear, builder_->makeFloatConstant(1.0f / 1023.0f)); return linear; } spv::Id SpirvShaderTranslator::LinearToPWLGamma(spv::Id linear, bool linear_pre_saturated) { spv::Id value_type = builder_->getTypeId(linear); assert_true(builder_->isFloatType(builder_->getScalarTypeId(value_type))); bool is_vector = builder_->isVectorType(value_type); assert_true(is_vector || builder_->isFloatType(value_type)); int num_components = builder_->getNumTypeComponents(value_type); assert_true(num_components < 4); spv::Id bool_type = type_bool_vectors_[num_components - 1]; spv::Id const_vector_0 = const_float_vectors_0_[num_components - 1]; spv::Id const_vector_1 = SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(1.0f), value_type); if (!linear_pre_saturated) { // Saturate, flushing NaN to 0. linear = builder_->createTriBuiltinCall(value_type, ext_inst_glsl_std_450_, GLSLstd450NClamp, linear, const_vector_0, const_vector_1); } spv::Id is_piece_at_least_3 = builder_->createBinOp( spv::OpFOrdGreaterThanEqual, bool_type, linear, SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(512.0f / 1023.0f), value_type)); spv::Id scale_3_or_2 = builder_->createTriOp( spv::OpSelect, value_type, is_piece_at_least_3, SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(1023.0f / 8.0f), value_type), SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(1023.0f / 4.0f), value_type)); spv::Id offset_3_or_2 = builder_->createTriOp( spv::OpSelect, value_type, is_piece_at_least_3, SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(128.0f / 255.0f), value_type), SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(64.0f / 255.0f), value_type)); spv::Id is_piece_at_least_1 = builder_->createBinOp( spv::OpFOrdGreaterThanEqual, bool_type, linear, SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(64.0f / 1023.0f), value_type)); spv::Id scale_1_or_0 = builder_->createTriOp( spv::OpSelect, value_type, is_piece_at_least_1, SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(1023.0f / 2.0f), value_type), SpirvSmearScalarResultOrConstant(builder_->makeFloatConstant(1023.0f), value_type)); spv::Id offset_1_or_0 = builder_->createTriOp( spv::OpSelect, value_type, is_piece_at_least_1, SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(32.0f / 255.0f), value_type), const_vector_0); spv::Id is_piece_at_least_2 = builder_->createBinOp( spv::OpFOrdGreaterThanEqual, bool_type, linear, SpirvSmearScalarResultOrConstant( builder_->makeFloatConstant(128.0f / 1023.0f), value_type)); spv::Id scale = builder_->createTriOp(spv::OpSelect, value_type, is_piece_at_least_2, scale_3_or_2, scale_1_or_0); spv::Id offset = builder_->createTriOp(spv::OpSelect, value_type, is_piece_at_least_2, offset_3_or_2, offset_1_or_0); // gamma = trunc(linear * scale) * (1.0f / 255.0f) + offset return builder_->createNoContractionBinOp( spv::OpFAdd, value_type, builder_->createNoContractionBinOp( is_vector ? spv::OpVectorTimesScalar : spv::OpFMul, value_type, builder_->createUnaryBuiltinCall( value_type, ext_inst_glsl_std_450_, GLSLstd450Trunc, builder_->createNoContractionBinOp(spv::OpFMul, value_type, linear, scale)), builder_->makeFloatConstant(1.0f / 255.0f)), offset); } } // namespace gpu } // namespace xe