[Vulkan] Shader memory export (#145)
This commit is contained in:
parent
210ac4b2d2
commit
3d30b2eec3
|
@ -203,5 +203,95 @@ spv::Id SpirvBuilder::IfBuilder::createMergePhi(spv::Id then_variable,
|
||||||
getElsePhiParent());
|
getElsePhiParent());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SpirvBuilder::SwitchBuilder::SwitchBuilder(spv::Id selector,
|
||||||
|
unsigned int selection_control,
|
||||||
|
SpirvBuilder& builder)
|
||||||
|
: builder_(builder),
|
||||||
|
selector_(selector),
|
||||||
|
selection_control_(selection_control),
|
||||||
|
function_(builder.getBuildPoint()->getParent()),
|
||||||
|
header_block_(builder.getBuildPoint()),
|
||||||
|
default_phi_parent_(builder.getBuildPoint()->getId()) {
|
||||||
|
merge_block_ = new spv::Block(builder_.getUniqueId(), function_);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SpirvBuilder::SwitchBuilder::makeBeginDefault() {
|
||||||
|
assert_null(default_block_);
|
||||||
|
|
||||||
|
endSegment();
|
||||||
|
|
||||||
|
default_block_ = new spv::Block(builder_.getUniqueId(), function_);
|
||||||
|
function_.addBlock(default_block_);
|
||||||
|
default_block_->addPredecessor(header_block_);
|
||||||
|
builder_.setBuildPoint(default_block_);
|
||||||
|
|
||||||
|
current_branch_ = Branch::kDefault;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SpirvBuilder::SwitchBuilder::makeBeginCase(unsigned int literal) {
|
||||||
|
endSegment();
|
||||||
|
|
||||||
|
auto case_block = new spv::Block(builder_.getUniqueId(), function_);
|
||||||
|
function_.addBlock(case_block);
|
||||||
|
cases_.emplace_back(literal, case_block->getId());
|
||||||
|
case_block->addPredecessor(header_block_);
|
||||||
|
builder_.setBuildPoint(case_block);
|
||||||
|
|
||||||
|
current_branch_ = Branch::kCase;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SpirvBuilder::SwitchBuilder::addCurrentCaseLiteral(unsigned int literal) {
|
||||||
|
assert_true(current_branch_ == Branch::kCase);
|
||||||
|
|
||||||
|
cases_.emplace_back(literal, cases_.back().second);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SpirvBuilder::SwitchBuilder::makeEndSwitch() {
|
||||||
|
endSegment();
|
||||||
|
|
||||||
|
builder_.setBuildPoint(header_block_);
|
||||||
|
|
||||||
|
builder_.createSelectionMerge(merge_block_, selection_control_);
|
||||||
|
|
||||||
|
std::unique_ptr<spv::Instruction> switch_instruction =
|
||||||
|
std::make_unique<spv::Instruction>(spv::OpSwitch);
|
||||||
|
switch_instruction->addIdOperand(selector_);
|
||||||
|
if (default_block_) {
|
||||||
|
switch_instruction->addIdOperand(default_block_->getId());
|
||||||
|
} else {
|
||||||
|
switch_instruction->addIdOperand(merge_block_->getId());
|
||||||
|
merge_block_->addPredecessor(header_block_);
|
||||||
|
}
|
||||||
|
for (const std::pair<unsigned int, spv::Id>& case_pair : cases_) {
|
||||||
|
switch_instruction->addImmediateOperand(case_pair.first);
|
||||||
|
switch_instruction->addIdOperand(case_pair.second);
|
||||||
|
}
|
||||||
|
builder_.getBuildPoint()->addInstruction(std::move(switch_instruction));
|
||||||
|
|
||||||
|
function_.addBlock(merge_block_);
|
||||||
|
builder_.setBuildPoint(merge_block_);
|
||||||
|
|
||||||
|
current_branch_ = Branch::kMerge;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SpirvBuilder::SwitchBuilder::endSegment() {
|
||||||
|
assert_true(current_branch_ == Branch::kSelection ||
|
||||||
|
current_branch_ == Branch::kDefault ||
|
||||||
|
current_branch_ == Branch::kCase);
|
||||||
|
|
||||||
|
if (current_branch_ == Branch::kSelection) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!builder_.getBuildPoint()->isTerminated()) {
|
||||||
|
builder_.createBranch(merge_block_);
|
||||||
|
if (current_branch_ == Branch::kDefault) {
|
||||||
|
default_phi_parent_ = builder_.getBuildPoint()->getId();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
current_branch_ = Branch::kSelection;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
@ -10,7 +10,10 @@
|
||||||
#ifndef XENIA_GPU_SPIRV_BUILDER_H_
|
#ifndef XENIA_GPU_SPIRV_BUILDER_H_
|
||||||
#define XENIA_GPU_SPIRV_BUILDER_H_
|
#define XENIA_GPU_SPIRV_BUILDER_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "third_party/glslang/SPIRV/SpvBuilder.h"
|
#include "third_party/glslang/SPIRV/SpvBuilder.h"
|
||||||
#include "xenia/base/assert.h"
|
#include "xenia/base/assert.h"
|
||||||
|
@ -99,6 +102,50 @@ class SpirvBuilder : public spv::Builder {
|
||||||
Branch currentBranch = Branch::kThen;
|
Branch currentBranch = Branch::kThen;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Simpler and more flexible (such as multiple cases pointing to the same
|
||||||
|
// block) compared to makeSwitch.
|
||||||
|
class SwitchBuilder {
|
||||||
|
public:
|
||||||
|
SwitchBuilder(spv::Id selector, unsigned int selection_control,
|
||||||
|
SpirvBuilder& builder);
|
||||||
|
~SwitchBuilder() { assert_true(current_branch_ == Branch::kMerge); }
|
||||||
|
|
||||||
|
void makeBeginDefault();
|
||||||
|
void makeBeginCase(unsigned int literal);
|
||||||
|
void addCurrentCaseLiteral(unsigned int literal);
|
||||||
|
void makeEndSwitch();
|
||||||
|
|
||||||
|
// If there's no default block that branches to the merge block, the phi
|
||||||
|
// parent is the header block - this simplifies case-only usage.
|
||||||
|
spv::Id getDefaultPhiParent() const { return default_phi_parent_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
enum class Branch {
|
||||||
|
kSelection,
|
||||||
|
kDefault,
|
||||||
|
kCase,
|
||||||
|
kMerge,
|
||||||
|
};
|
||||||
|
|
||||||
|
void endSegment();
|
||||||
|
|
||||||
|
SpirvBuilder& builder_;
|
||||||
|
spv::Id selector_;
|
||||||
|
unsigned int selection_control_;
|
||||||
|
|
||||||
|
spv::Function& function_;
|
||||||
|
|
||||||
|
spv::Block* header_block_;
|
||||||
|
spv::Block* merge_block_;
|
||||||
|
spv::Block* default_block_ = nullptr;
|
||||||
|
|
||||||
|
std::vector<std::pair<unsigned int, spv::Id>> cases_;
|
||||||
|
|
||||||
|
spv::Id default_phi_parent_;
|
||||||
|
|
||||||
|
Branch current_branch_ = Branch::kSelection;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
|
|
@ -30,30 +30,35 @@ namespace gpu {
|
||||||
SpirvShaderTranslator::Features::Features(bool all)
|
SpirvShaderTranslator::Features::Features(bool all)
|
||||||
: spirv_version(all ? spv::Spv_1_5 : spv::Spv_1_0),
|
: spirv_version(all ? spv::Spv_1_5 : spv::Spv_1_0),
|
||||||
max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)),
|
max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)),
|
||||||
|
full_draw_index_uint32(all),
|
||||||
|
vertex_pipeline_stores_and_atomics(all),
|
||||||
|
fragment_stores_and_atomics(all),
|
||||||
clip_distance(all),
|
clip_distance(all),
|
||||||
cull_distance(all),
|
cull_distance(all),
|
||||||
demote_to_helper_invocation(all),
|
|
||||||
fragment_shader_sample_interlock(all),
|
|
||||||
full_draw_index_uint32(all),
|
|
||||||
image_view_format_swizzle(all),
|
image_view_format_swizzle(all),
|
||||||
signed_zero_inf_nan_preserve_float32(all),
|
signed_zero_inf_nan_preserve_float32(all),
|
||||||
denorm_flush_to_zero_float32(all),
|
denorm_flush_to_zero_float32(all),
|
||||||
rounding_mode_rte_float32(all) {}
|
rounding_mode_rte_float32(all),
|
||||||
|
fragment_shader_sample_interlock(all),
|
||||||
|
demote_to_helper_invocation(all) {}
|
||||||
|
|
||||||
SpirvShaderTranslator::Features::Features(
|
SpirvShaderTranslator::Features::Features(
|
||||||
const ui::vulkan::VulkanProvider::DeviceInfo& device_info)
|
const ui::vulkan::VulkanProvider::DeviceInfo& device_info)
|
||||||
: max_storage_buffer_range(device_info.maxStorageBufferRange),
|
: max_storage_buffer_range(device_info.maxStorageBufferRange),
|
||||||
|
full_draw_index_uint32(device_info.fullDrawIndexUint32),
|
||||||
|
vertex_pipeline_stores_and_atomics(
|
||||||
|
device_info.vertexPipelineStoresAndAtomics),
|
||||||
|
fragment_stores_and_atomics(device_info.fragmentStoresAndAtomics),
|
||||||
clip_distance(device_info.shaderClipDistance),
|
clip_distance(device_info.shaderClipDistance),
|
||||||
cull_distance(device_info.shaderCullDistance),
|
cull_distance(device_info.shaderCullDistance),
|
||||||
demote_to_helper_invocation(device_info.shaderDemoteToHelperInvocation),
|
|
||||||
fragment_shader_sample_interlock(
|
|
||||||
device_info.fragmentShaderSampleInterlock),
|
|
||||||
full_draw_index_uint32(device_info.fullDrawIndexUint32),
|
|
||||||
image_view_format_swizzle(device_info.imageViewFormatSwizzle),
|
image_view_format_swizzle(device_info.imageViewFormatSwizzle),
|
||||||
signed_zero_inf_nan_preserve_float32(
|
signed_zero_inf_nan_preserve_float32(
|
||||||
device_info.shaderSignedZeroInfNanPreserveFloat32),
|
device_info.shaderSignedZeroInfNanPreserveFloat32),
|
||||||
denorm_flush_to_zero_float32(device_info.shaderDenormFlushToZeroFloat32),
|
denorm_flush_to_zero_float32(device_info.shaderDenormFlushToZeroFloat32),
|
||||||
rounding_mode_rte_float32(device_info.shaderRoundingModeRTEFloat32) {
|
rounding_mode_rte_float32(device_info.shaderRoundingModeRTEFloat32),
|
||||||
|
fragment_shader_sample_interlock(
|
||||||
|
device_info.fragmentShaderSampleInterlock),
|
||||||
|
demote_to_helper_invocation(device_info.shaderDemoteToHelperInvocation) {
|
||||||
if (device_info.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) {
|
if (device_info.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) {
|
||||||
spirv_version = spv::Spv_1_5;
|
spirv_version = spv::Spv_1_5;
|
||||||
} else if (device_info.ext_1_2_VK_KHR_spirv_1_4) {
|
} else if (device_info.ext_1_2_VK_KHR_spirv_1_4) {
|
||||||
|
@ -117,6 +122,14 @@ void SpirvShaderTranslator::Reset() {
|
||||||
|
|
||||||
main_interface_.clear();
|
main_interface_.clear();
|
||||||
var_main_registers_ = spv::NoResult;
|
var_main_registers_ = spv::NoResult;
|
||||||
|
var_main_memexport_address_ = spv::NoResult;
|
||||||
|
for (size_t memexport_eM_index = 0;
|
||||||
|
memexport_eM_index < xe::countof(var_main_memexport_data_);
|
||||||
|
++memexport_eM_index) {
|
||||||
|
var_main_memexport_data_[memexport_eM_index] = spv::NoResult;
|
||||||
|
}
|
||||||
|
var_main_memexport_data_written_ = spv::NoResult;
|
||||||
|
main_memexport_allowed_ = spv::NoResult;
|
||||||
var_main_point_size_edge_flag_kill_vertex_ = spv::NoResult;
|
var_main_point_size_edge_flag_kill_vertex_ = spv::NoResult;
|
||||||
var_main_kill_pixel_ = spv::NoResult;
|
var_main_kill_pixel_ = spv::NoResult;
|
||||||
var_main_fsi_color_written_ = spv::NoResult;
|
var_main_fsi_color_written_ = spv::NoResult;
|
||||||
|
@ -310,6 +323,8 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
main_interface_.push_back(uniform_system_constants_);
|
main_interface_.push_back(uniform_system_constants_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool memexport_used = IsMemoryExportUsed();
|
||||||
|
|
||||||
if (!is_depth_only_fragment_shader_) {
|
if (!is_depth_only_fragment_shader_) {
|
||||||
// Common uniform buffer - float constants.
|
// Common uniform buffer - float constants.
|
||||||
uint32_t float_constant_count =
|
uint32_t float_constant_count =
|
||||||
|
@ -420,9 +435,10 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
builder_->addMemberName(type_shared_memory, 0, "shared_memory");
|
builder_->addMemberName(type_shared_memory, 0, "shared_memory");
|
||||||
builder_->addMemberDecoration(type_shared_memory, 0,
|
builder_->addMemberDecoration(type_shared_memory, 0,
|
||||||
spv::DecorationRestrict);
|
spv::DecorationRestrict);
|
||||||
// TODO(Triang3l): Make writable when memexport is implemented.
|
if (!memexport_used) {
|
||||||
builder_->addMemberDecoration(type_shared_memory, 0,
|
builder_->addMemberDecoration(type_shared_memory, 0,
|
||||||
spv::DecorationNonWritable);
|
spv::DecorationNonWritable);
|
||||||
|
}
|
||||||
builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationOffset,
|
builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationOffset,
|
||||||
0);
|
0);
|
||||||
builder_->addDecoration(type_shared_memory,
|
builder_->addDecoration(type_shared_memory,
|
||||||
|
@ -509,6 +525,24 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
builder_->createVariable(spv::NoPrecision, spv::StorageClassFunction,
|
builder_->createVariable(spv::NoPrecision, spv::StorageClassFunction,
|
||||||
type_register_array, "xe_var_registers");
|
type_register_array, "xe_var_registers");
|
||||||
}
|
}
|
||||||
|
if (memexport_used) {
|
||||||
|
var_main_memexport_address_ = builder_->createVariable(
|
||||||
|
spv::NoPrecision, spv::StorageClassFunction, type_float4_,
|
||||||
|
"xe_var_memexport_address", const_float4_0_);
|
||||||
|
uint8_t memexport_eM_remaining = current_shader().memexport_eM_written();
|
||||||
|
uint32_t memexport_eM_index;
|
||||||
|
while (
|
||||||
|
xe::bit_scan_forward(memexport_eM_remaining, &memexport_eM_index)) {
|
||||||
|
memexport_eM_remaining &= ~(uint8_t(1) << memexport_eM_index);
|
||||||
|
var_main_memexport_data_[memexport_eM_index] = builder_->createVariable(
|
||||||
|
spv::NoPrecision, spv::StorageClassFunction, type_float4_,
|
||||||
|
fmt::format("xe_var_memexport_data_{}", memexport_eM_index).c_str(),
|
||||||
|
const_float4_0_);
|
||||||
|
}
|
||||||
|
var_main_memexport_data_written_ = builder_->createVariable(
|
||||||
|
spv::NoPrecision, spv::StorageClassFunction, type_uint_,
|
||||||
|
"xe_var_memexport_data_written", const_uint_0_);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write the execution model-specific prologue with access to variables in the
|
// Write the execution model-specific prologue with access to variables in the
|
||||||
|
@ -647,6 +681,10 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
|
||||||
builder_->setBuildPoint(main_loop_merge_);
|
builder_->setBuildPoint(main_loop_merge_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Write data for the last memexport.
|
||||||
|
ExportToMemory(
|
||||||
|
current_shader().memexport_eM_potentially_written_before_end());
|
||||||
|
|
||||||
if (is_vertex_shader()) {
|
if (is_vertex_shader()) {
|
||||||
CompleteVertexOrTessEvalShaderInMain();
|
CompleteVertexOrTessEvalShaderInMain();
|
||||||
} else if (is_pixel_shader()) {
|
} else if (is_pixel_shader()) {
|
||||||
|
@ -1077,6 +1115,34 @@ void SpirvShaderTranslator::ProcessJumpInstruction(
|
||||||
builder_->createBranch(main_loop_continue_);
|
builder_->createBranch(main_loop_continue_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SpirvShaderTranslator::ProcessAllocInstruction(
|
||||||
|
const ParsedAllocInstruction& instr, uint8_t export_eM) {
|
||||||
|
bool start_memexport = instr.type == ucode::AllocType::kMemory &&
|
||||||
|
current_shader().memexport_eM_written();
|
||||||
|
if (export_eM || start_memexport) {
|
||||||
|
CloseExecConditionals();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (export_eM) {
|
||||||
|
ExportToMemory(export_eM);
|
||||||
|
// Reset which eM# elements have been written.
|
||||||
|
builder_->createStore(const_uint_0_, var_main_memexport_data_written_);
|
||||||
|
// Break dependencies from the previous memexport.
|
||||||
|
uint8_t export_eM_remaining = export_eM;
|
||||||
|
uint32_t eM_index;
|
||||||
|
while (xe::bit_scan_forward(export_eM_remaining, &eM_index)) {
|
||||||
|
export_eM_remaining &= ~(uint8_t(1) << eM_index);
|
||||||
|
builder_->createStore(const_float4_0_,
|
||||||
|
var_main_memexport_data_[eM_index]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (start_memexport) {
|
||||||
|
// Initialize eA to an invalid address.
|
||||||
|
builder_->createStore(const_float4_0_, var_main_memexport_address_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
spv::Id SpirvShaderTranslator::SpirvSmearScalarResultOrConstant(
|
spv::Id SpirvShaderTranslator::SpirvSmearScalarResultOrConstant(
|
||||||
spv::Id scalar, spv::Id vector_type) {
|
spv::Id scalar, spv::Id vector_type) {
|
||||||
bool is_constant = builder_->isConstant(scalar);
|
bool is_constant = builder_->isConstant(scalar);
|
||||||
|
@ -1205,6 +1271,8 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
|
void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
|
||||||
|
Modification shader_modification = GetSpirvShaderModification();
|
||||||
|
|
||||||
// The edge flag isn't used for any purpose by the translator.
|
// The edge flag isn't used for any purpose by the translator.
|
||||||
if (current_shader().writes_point_size_edge_flag_kill_vertex() & 0b101) {
|
if (current_shader().writes_point_size_edge_flag_kill_vertex() & 0b101) {
|
||||||
id_vector_temp_.clear();
|
id_vector_temp_.clear();
|
||||||
|
@ -1244,11 +1312,40 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Modification shader_modification = GetSpirvShaderModification();
|
|
||||||
|
|
||||||
// TODO(Triang3l): For HostVertexShaderType::kRectangeListAsTriangleStrip,
|
// TODO(Triang3l): For HostVertexShaderType::kRectangeListAsTriangleStrip,
|
||||||
// start the vertex loop, and load the index there.
|
// start the vertex loop, and load the index there.
|
||||||
|
|
||||||
|
// Check if memory export should be allowed for this host vertex of the guest
|
||||||
|
// primitive to make sure export is done only once for each guest vertex.
|
||||||
|
if (IsMemoryExportUsed()) {
|
||||||
|
spv::Id memexport_allowed_for_host_vertex_of_guest_primitive =
|
||||||
|
spv::NoResult;
|
||||||
|
if (shader_modification.vertex.host_vertex_shader_type ==
|
||||||
|
Shader::HostVertexShaderType::kPointListAsTriangleStrip) {
|
||||||
|
// Only for one host vertex for the point.
|
||||||
|
memexport_allowed_for_host_vertex_of_guest_primitive =
|
||||||
|
builder_->createBinOp(
|
||||||
|
spv::OpIEqual, type_bool_,
|
||||||
|
builder_->createBinOp(
|
||||||
|
spv::OpBitwiseAnd, type_uint_,
|
||||||
|
builder_->createUnaryOp(
|
||||||
|
spv::OpBitcast, type_uint_,
|
||||||
|
builder_->createLoad(input_vertex_index_,
|
||||||
|
spv::NoPrecision)),
|
||||||
|
builder_->makeUintConstant(3)),
|
||||||
|
const_uint_0_);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (memexport_allowed_for_host_vertex_of_guest_primitive != spv::NoResult) {
|
||||||
|
main_memexport_allowed_ =
|
||||||
|
main_memexport_allowed_ != spv::NoResult
|
||||||
|
? builder_->createBinOp(
|
||||||
|
spv::OpLogicalAnd, type_bool_, main_memexport_allowed_,
|
||||||
|
memexport_allowed_for_host_vertex_of_guest_primitive)
|
||||||
|
: memexport_allowed_for_host_vertex_of_guest_primitive;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Load the vertex index or the tessellation parameters.
|
// Load the vertex index or the tessellation parameters.
|
||||||
if (register_count()) {
|
if (register_count()) {
|
||||||
// TODO(Triang3l): Barycentric coordinates and patch index.
|
// TODO(Triang3l): Barycentric coordinates and patch index.
|
||||||
|
@ -1827,6 +1924,13 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
||||||
|
// TODO(Triang3l): Allow memory export with resolution scaling only for the
|
||||||
|
// center host pixel, with sample shading (for depth format conversion) only
|
||||||
|
// for the bottom-right sample (unlike in Direct3D, the sample mask input
|
||||||
|
// doesn't include covered samples of the primitive that correspond to other
|
||||||
|
// invocations, so use the sample that's the most friendly to the half-pixel
|
||||||
|
// offset).
|
||||||
|
|
||||||
// Set up pixel killing from within the translated shader without affecting
|
// Set up pixel killing from within the translated shader without affecting
|
||||||
// the control flow (unlike with OpKill), similarly to how pixel killing works
|
// the control flow (unlike with OpKill), similarly to how pixel killing works
|
||||||
// on the Xenos, and also keeping a single critical section exit and return
|
// on the Xenos, and also keeping a single critical section exit and return
|
||||||
|
@ -2460,6 +2564,26 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result,
|
||||||
var_main_fsi_color_written_);
|
var_main_fsi_color_written_);
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
case InstructionStorageTarget::kExportAddress: {
|
||||||
|
// spv::NoResult if memory export usage is unsupported or invalid.
|
||||||
|
target_pointer = var_main_memexport_address_;
|
||||||
|
} break;
|
||||||
|
case InstructionStorageTarget::kExportData: {
|
||||||
|
// spv::NoResult if memory export usage is unsupported or invalid.
|
||||||
|
target_pointer = var_main_memexport_data_[result.storage_index];
|
||||||
|
if (target_pointer != spv::NoResult) {
|
||||||
|
// Mark that the eM# has been written to and needs to be exported.
|
||||||
|
assert_true(var_main_memexport_data_written_ != spv::NoResult);
|
||||||
|
builder_->createStore(
|
||||||
|
builder_->createBinOp(
|
||||||
|
spv::OpBitwiseOr, type_uint_,
|
||||||
|
builder_->createLoad(var_main_memexport_data_written_,
|
||||||
|
spv::NoPrecision),
|
||||||
|
builder_->makeUintConstant(uint32_t(1)
|
||||||
|
<< result.storage_index)),
|
||||||
|
var_main_memexport_data_written_);
|
||||||
|
}
|
||||||
|
} break;
|
||||||
default:
|
default:
|
||||||
// TODO(Triang3l): All storage targets.
|
// TODO(Triang3l): All storage targets.
|
||||||
break;
|
break;
|
||||||
|
@ -2814,16 +2938,59 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spv::Id SpirvShaderTranslator::EndianSwap128Uint4(spv::Id value,
|
||||||
|
spv::Id endian) {
|
||||||
|
// Change 8-in-64 and 8-in-128 to 8-in-32, and then swap within 32 bits.
|
||||||
|
|
||||||
|
spv::Id is_8in64 = builder_->createBinOp(
|
||||||
|
spv::OpIEqual, type_bool_, endian,
|
||||||
|
builder_->makeUintConstant(
|
||||||
|
static_cast<unsigned int>(xenos::Endian128::k8in64)));
|
||||||
|
uint_vector_temp_.clear();
|
||||||
|
uint_vector_temp_.push_back(1);
|
||||||
|
uint_vector_temp_.push_back(0);
|
||||||
|
uint_vector_temp_.push_back(3);
|
||||||
|
uint_vector_temp_.push_back(2);
|
||||||
|
value = builder_->createTriOp(
|
||||||
|
spv::OpSelect, type_uint4_, is_8in64,
|
||||||
|
builder_->createRvalueSwizzle(spv::NoPrecision, type_uint4_, value,
|
||||||
|
uint_vector_temp_),
|
||||||
|
value);
|
||||||
|
|
||||||
|
spv::Id is_8in128 = builder_->createBinOp(
|
||||||
|
spv::OpIEqual, type_bool_, endian,
|
||||||
|
builder_->makeUintConstant(
|
||||||
|
static_cast<unsigned int>(xenos::Endian128::k8in128)));
|
||||||
|
uint_vector_temp_.clear();
|
||||||
|
uint_vector_temp_.push_back(3);
|
||||||
|
uint_vector_temp_.push_back(2);
|
||||||
|
uint_vector_temp_.push_back(1);
|
||||||
|
uint_vector_temp_.push_back(0);
|
||||||
|
value = builder_->createTriOp(
|
||||||
|
spv::OpSelect, type_uint4_, is_8in128,
|
||||||
|
builder_->createRvalueSwizzle(spv::NoPrecision, type_uint4_, value,
|
||||||
|
uint_vector_temp_),
|
||||||
|
value);
|
||||||
|
|
||||||
|
endian = builder_->createTriOp(
|
||||||
|
spv::OpSelect, type_uint_,
|
||||||
|
builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in64, is_8in128),
|
||||||
|
builder_->makeUintConstant(
|
||||||
|
static_cast<unsigned int>(xenos::Endian128::k8in32)),
|
||||||
|
endian);
|
||||||
|
|
||||||
|
return EndianSwap32Uint(value, endian);
|
||||||
|
}
|
||||||
|
|
||||||
spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory(
|
spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory(
|
||||||
spv::Id address_dwords_int) {
|
spv::Id address_dwords_int) {
|
||||||
spv::Block& head_block = *builder_->getBuildPoint();
|
|
||||||
assert_false(head_block.isTerminated());
|
|
||||||
|
|
||||||
spv::StorageClass storage_class = features_.spirv_version >= spv::Spv_1_3
|
spv::StorageClass storage_class = features_.spirv_version >= spv::Spv_1_3
|
||||||
? spv::StorageClassStorageBuffer
|
? spv::StorageClassStorageBuffer
|
||||||
: spv::StorageClassUniform;
|
: spv::StorageClassUniform;
|
||||||
uint32_t buffer_count_log2 = GetSharedMemoryStorageBufferCountLog2();
|
|
||||||
if (!buffer_count_log2) {
|
uint32_t binding_count_log2 = GetSharedMemoryStorageBufferCountLog2();
|
||||||
|
|
||||||
|
if (!binding_count_log2) {
|
||||||
// Single binding - load directly.
|
// Single binding - load directly.
|
||||||
id_vector_temp_.clear();
|
id_vector_temp_.clear();
|
||||||
// The only SSBO struct member.
|
// The only SSBO struct member.
|
||||||
|
@ -2837,8 +3004,10 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory(
|
||||||
|
|
||||||
// The memory is split into multiple bindings - check which binding to load
|
// The memory is split into multiple bindings - check which binding to load
|
||||||
// from. 29 is log2(512 MB), but addressing in dwords (4 B). Not indexing the
|
// from. 29 is log2(512 MB), but addressing in dwords (4 B). Not indexing the
|
||||||
// array with the variable itself because it needs VK_EXT_descriptor_indexing.
|
// array with the variable itself because it needs non-uniform storage buffer
|
||||||
uint32_t binding_address_bits = (29 - 2) - buffer_count_log2;
|
// indexing.
|
||||||
|
|
||||||
|
uint32_t binding_address_bits = (29 - 2) - binding_count_log2;
|
||||||
spv::Id binding_index = builder_->createBinOp(
|
spv::Id binding_index = builder_->createBinOp(
|
||||||
spv::OpShiftRightLogical, type_uint_,
|
spv::OpShiftRightLogical, type_uint_,
|
||||||
builder_->createUnaryOp(spv::OpBitcast, type_uint_, address_dwords_int),
|
builder_->createUnaryOp(spv::OpBitcast, type_uint_, address_dwords_int),
|
||||||
|
@ -2847,51 +3016,119 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory(
|
||||||
spv::OpBitwiseAnd, type_int_, address_dwords_int,
|
spv::OpBitwiseAnd, type_int_, address_dwords_int,
|
||||||
builder_->makeIntConstant(
|
builder_->makeIntConstant(
|
||||||
int((uint32_t(1) << binding_address_bits) - 1)));
|
int((uint32_t(1) << binding_address_bits) - 1)));
|
||||||
uint32_t buffer_count = 1 << buffer_count_log2;
|
|
||||||
spv::Block* switch_case_blocks[512 / 128];
|
auto value_phi_op = std::make_unique<spv::Instruction>(
|
||||||
for (uint32_t i = 0; i < buffer_count; ++i) {
|
builder_->getUniqueId(), type_uint_, spv::OpPhi);
|
||||||
switch_case_blocks[i] = &builder_->makeNewBlock();
|
// Zero if out of bounds.
|
||||||
}
|
value_phi_op->addIdOperand(const_uint_0_);
|
||||||
spv::Block& switch_merge_block = builder_->makeNewBlock();
|
value_phi_op->addIdOperand(builder_->getBuildPoint()->getId());
|
||||||
spv::Id value_phi_result = builder_->getUniqueId();
|
|
||||||
std::unique_ptr<spv::Instruction> value_phi_op =
|
SpirvBuilder::SwitchBuilder binding_switch(
|
||||||
std::make_unique<spv::Instruction>(value_phi_result, type_uint_,
|
binding_index, spv::SelectionControlDontFlattenMask, *builder_);
|
||||||
spv::OpPhi);
|
uint32_t binding_count = uint32_t(1) << binding_count_log2;
|
||||||
builder_->createSelectionMerge(&switch_merge_block,
|
|
||||||
spv::SelectionControlDontFlattenMask);
|
id_vector_temp_.clear();
|
||||||
{
|
id_vector_temp_.push_back(spv::NoResult);
|
||||||
std::unique_ptr<spv::Instruction> switch_op =
|
// The only SSBO struct member.
|
||||||
std::make_unique<spv::Instruction>(spv::OpSwitch);
|
id_vector_temp_.push_back(const_int_0_);
|
||||||
switch_op->addIdOperand(binding_index);
|
id_vector_temp_.push_back(binding_address);
|
||||||
// Highest binding index is the default case.
|
|
||||||
switch_op->addIdOperand(switch_case_blocks[buffer_count - 1]->getId());
|
for (uint32_t i = 0; i < binding_count; ++i) {
|
||||||
switch_case_blocks[buffer_count - 1]->addPredecessor(&head_block);
|
binding_switch.makeBeginCase(i);
|
||||||
for (uint32_t i = 0; i < buffer_count - 1; ++i) {
|
id_vector_temp_[0] = builder_->makeIntConstant(int(i));
|
||||||
switch_op->addImmediateOperand(int(i));
|
|
||||||
switch_op->addIdOperand(switch_case_blocks[i]->getId());
|
|
||||||
switch_case_blocks[i]->addPredecessor(&head_block);
|
|
||||||
}
|
|
||||||
builder_->getBuildPoint()->addInstruction(std::move(switch_op));
|
|
||||||
}
|
|
||||||
for (uint32_t i = 0; i < buffer_count; ++i) {
|
|
||||||
builder_->setBuildPoint(switch_case_blocks[i]);
|
|
||||||
id_vector_temp_.clear();
|
|
||||||
id_vector_temp_.push_back(builder_->makeIntConstant(int(i)));
|
|
||||||
// The only SSBO struct member.
|
|
||||||
id_vector_temp_.push_back(const_int_0_);
|
|
||||||
id_vector_temp_.push_back(binding_address);
|
|
||||||
value_phi_op->addIdOperand(builder_->createLoad(
|
value_phi_op->addIdOperand(builder_->createLoad(
|
||||||
builder_->createAccessChain(storage_class, buffers_shared_memory_,
|
builder_->createAccessChain(storage_class, buffers_shared_memory_,
|
||||||
id_vector_temp_),
|
id_vector_temp_),
|
||||||
spv::NoPrecision));
|
spv::NoPrecision));
|
||||||
value_phi_op->addIdOperand(switch_case_blocks[i]->getId());
|
value_phi_op->addIdOperand(builder_->getBuildPoint()->getId());
|
||||||
builder_->createBranch(&switch_merge_block);
|
|
||||||
}
|
}
|
||||||
builder_->setBuildPoint(&switch_merge_block);
|
|
||||||
|
binding_switch.makeEndSwitch();
|
||||||
|
|
||||||
|
spv::Id value_phi_result = value_phi_op->getResultId();
|
||||||
builder_->getBuildPoint()->addInstruction(std::move(value_phi_op));
|
builder_->getBuildPoint()->addInstruction(std::move(value_phi_op));
|
||||||
return value_phi_result;
|
return value_phi_result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SpirvShaderTranslator::StoreUint32ToSharedMemory(
|
||||||
|
spv::Id value, spv::Id address_dwords_int, spv::Id replace_mask) {
|
||||||
|
spv::StorageClass storage_class = features_.spirv_version >= spv::Spv_1_3
|
||||||
|
? spv::StorageClassStorageBuffer
|
||||||
|
: spv::StorageClassUniform;
|
||||||
|
|
||||||
|
spv::Id keep_mask = spv::NoResult;
|
||||||
|
if (replace_mask != spv::NoResult) {
|
||||||
|
keep_mask = builder_->createUnaryOp(spv::OpNot, type_uint_, replace_mask);
|
||||||
|
value = builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, value,
|
||||||
|
replace_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto store = [&](spv::Id pointer) {
|
||||||
|
if (replace_mask != spv::NoResult) {
|
||||||
|
// Don't touch the other bits in the buffer, just modify the needed bits
|
||||||
|
// in the most up to date uint32 at the address.
|
||||||
|
spv::Id const_scope_device = builder_->makeUintConstant(
|
||||||
|
static_cast<unsigned int>(spv::ScopeDevice));
|
||||||
|
spv::Id const_semantics_relaxed = const_uint_0_;
|
||||||
|
builder_->createQuadOp(spv::OpAtomicAnd, type_uint_, pointer,
|
||||||
|
const_scope_device, const_semantics_relaxed,
|
||||||
|
keep_mask);
|
||||||
|
builder_->createQuadOp(spv::OpAtomicOr, type_uint_, pointer,
|
||||||
|
const_scope_device, const_semantics_relaxed,
|
||||||
|
value);
|
||||||
|
} else {
|
||||||
|
builder_->createStore(value, pointer);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
uint32_t binding_count_log2 = GetSharedMemoryStorageBufferCountLog2();
|
||||||
|
|
||||||
|
if (!binding_count_log2) {
|
||||||
|
// Single binding - store directly.
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
// The only SSBO struct member.
|
||||||
|
id_vector_temp_.push_back(const_int_0_);
|
||||||
|
id_vector_temp_.push_back(address_dwords_int);
|
||||||
|
store(builder_->createAccessChain(storage_class, buffers_shared_memory_,
|
||||||
|
id_vector_temp_));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The memory is split into multiple bindings - check which binding to store
|
||||||
|
// to. 29 is log2(512 MB), but addressing in dwords (4 B). Not indexing the
|
||||||
|
// array with the variable itself because it needs non-uniform storage buffer
|
||||||
|
// indexing.
|
||||||
|
|
||||||
|
uint32_t binding_address_bits = (29 - 2) - binding_count_log2;
|
||||||
|
spv::Id binding_index = builder_->createBinOp(
|
||||||
|
spv::OpShiftRightLogical, type_uint_,
|
||||||
|
builder_->createUnaryOp(spv::OpBitcast, type_uint_, address_dwords_int),
|
||||||
|
builder_->makeUintConstant(binding_address_bits));
|
||||||
|
spv::Id binding_address = builder_->createBinOp(
|
||||||
|
spv::OpBitwiseAnd, type_int_, address_dwords_int,
|
||||||
|
builder_->makeIntConstant(
|
||||||
|
int((uint32_t(1) << binding_address_bits) - 1)));
|
||||||
|
|
||||||
|
SpirvBuilder::SwitchBuilder binding_switch(
|
||||||
|
binding_index, spv::SelectionControlDontFlattenMask, *builder_);
|
||||||
|
uint32_t binding_count = uint32_t(1) << binding_count_log2;
|
||||||
|
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
id_vector_temp_.push_back(spv::NoResult);
|
||||||
|
// The only SSBO struct member.
|
||||||
|
id_vector_temp_.push_back(const_int_0_);
|
||||||
|
id_vector_temp_.push_back(binding_address);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < binding_count; ++i) {
|
||||||
|
binding_switch.makeBeginCase(i);
|
||||||
|
id_vector_temp_[0] = builder_->makeIntConstant(int(i));
|
||||||
|
store(builder_->createAccessChain(storage_class, buffers_shared_memory_,
|
||||||
|
id_vector_temp_));
|
||||||
|
}
|
||||||
|
|
||||||
|
binding_switch.makeEndSwitch();
|
||||||
|
}
|
||||||
|
|
||||||
spv::Id SpirvShaderTranslator::PWLGammaToLinear(spv::Id gamma,
|
spv::Id SpirvShaderTranslator::PWLGammaToLinear(spv::Id gamma,
|
||||||
bool gamma_pre_saturated) {
|
bool gamma_pre_saturated) {
|
||||||
spv::Id value_type = builder_->getTypeId(gamma);
|
spv::Id value_type = builder_->getTypeId(gamma);
|
||||||
|
|
|
@ -323,17 +323,28 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
explicit Features(
|
explicit Features(
|
||||||
const ui::vulkan::VulkanProvider::DeviceInfo& device_info);
|
const ui::vulkan::VulkanProvider::DeviceInfo& device_info);
|
||||||
explicit Features(bool all = false);
|
explicit Features(bool all = false);
|
||||||
|
|
||||||
unsigned int spirv_version;
|
unsigned int spirv_version;
|
||||||
|
|
||||||
uint32_t max_storage_buffer_range;
|
uint32_t max_storage_buffer_range;
|
||||||
|
|
||||||
|
bool full_draw_index_uint32;
|
||||||
|
|
||||||
|
bool vertex_pipeline_stores_and_atomics;
|
||||||
|
bool fragment_stores_and_atomics;
|
||||||
|
|
||||||
bool clip_distance;
|
bool clip_distance;
|
||||||
bool cull_distance;
|
bool cull_distance;
|
||||||
bool demote_to_helper_invocation;
|
|
||||||
bool fragment_shader_sample_interlock;
|
|
||||||
bool full_draw_index_uint32;
|
|
||||||
bool image_view_format_swizzle;
|
bool image_view_format_swizzle;
|
||||||
|
|
||||||
bool signed_zero_inf_nan_preserve_float32;
|
bool signed_zero_inf_nan_preserve_float32;
|
||||||
bool denorm_flush_to_zero_float32;
|
bool denorm_flush_to_zero_float32;
|
||||||
bool rounding_mode_rte_float32;
|
bool rounding_mode_rte_float32;
|
||||||
|
|
||||||
|
bool fragment_shader_sample_interlock;
|
||||||
|
|
||||||
|
bool demote_to_helper_invocation;
|
||||||
};
|
};
|
||||||
|
|
||||||
SpirvShaderTranslator(const Features& features,
|
SpirvShaderTranslator(const Features& features,
|
||||||
|
@ -424,6 +435,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
void ProcessLoopEndInstruction(
|
void ProcessLoopEndInstruction(
|
||||||
const ParsedLoopEndInstruction& instr) override;
|
const ParsedLoopEndInstruction& instr) override;
|
||||||
void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override;
|
void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override;
|
||||||
|
void ProcessAllocInstruction(const ParsedAllocInstruction& instr,
|
||||||
|
uint8_t export_eM) override;
|
||||||
|
|
||||||
void ProcessVertexFetchInstruction(
|
void ProcessVertexFetchInstruction(
|
||||||
const ParsedVertexFetchInstruction& instr) override;
|
const ParsedVertexFetchInstruction& instr) override;
|
||||||
|
@ -470,6 +483,11 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
Shader::IsHostVertexShaderTypeDomain(
|
Shader::IsHostVertexShaderTypeDomain(
|
||||||
GetSpirvShaderModification().vertex.host_vertex_shader_type);
|
GetSpirvShaderModification().vertex.host_vertex_shader_type);
|
||||||
}
|
}
|
||||||
|
bool IsSpirvComputeShader() const {
|
||||||
|
return is_vertex_shader() &&
|
||||||
|
GetSpirvShaderModification().vertex.host_vertex_shader_type ==
|
||||||
|
Shader::HostVertexShaderType::kMemExportCompute;
|
||||||
|
}
|
||||||
|
|
||||||
bool IsExecutionModeEarlyFragmentTests() const {
|
bool IsExecutionModeEarlyFragmentTests() const {
|
||||||
return is_pixel_shader() &&
|
return is_pixel_shader() &&
|
||||||
|
@ -567,24 +585,48 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
spv::Id ZeroIfAnyOperandIsZero(spv::Id value, spv::Id operand_0_abs,
|
spv::Id ZeroIfAnyOperandIsZero(spv::Id value, spv::Id operand_0_abs,
|
||||||
spv::Id operand_1_abs);
|
spv::Id operand_1_abs);
|
||||||
// Conditionally discard the current fragment. Changes the build point.
|
// Conditionally discard the current fragment. Changes the build point.
|
||||||
void KillPixel(spv::Id condition);
|
void KillPixel(spv::Id condition,
|
||||||
|
uint8_t memexport_eM_potentially_written_before);
|
||||||
// Return type is a xe::bit_count(result.GetUsedResultComponents())-component
|
// Return type is a xe::bit_count(result.GetUsedResultComponents())-component
|
||||||
// float vector or a single float, depending on whether it's a reduction
|
// float vector or a single float, depending on whether it's a reduction
|
||||||
// instruction (check getTypeId of the result), or returns spv::NoResult if
|
// instruction (check getTypeId of the result), or returns spv::NoResult if
|
||||||
// nothing to store.
|
// nothing to store.
|
||||||
spv::Id ProcessVectorAluOperation(const ParsedAluInstruction& instr,
|
spv::Id ProcessVectorAluOperation(
|
||||||
bool& predicate_written);
|
const ParsedAluInstruction& instr,
|
||||||
|
uint8_t memexport_eM_potentially_written_before, bool& predicate_written);
|
||||||
// Returns a float value to write to the previous scalar register and to the
|
// Returns a float value to write to the previous scalar register and to the
|
||||||
// destination. If the return value is ps itself (in the retain_prev case),
|
// destination. If the return value is ps itself (in the retain_prev case),
|
||||||
// returns spv::NoResult (handled as a special case, so if it's retain_prev,
|
// returns spv::NoResult (handled as a special case, so if it's retain_prev,
|
||||||
// but don't need to write to anywhere, no OpLoad(ps) will be done).
|
// but don't need to write to anywhere, no OpLoad(ps) will be done).
|
||||||
spv::Id ProcessScalarAluOperation(const ParsedAluInstruction& instr,
|
spv::Id ProcessScalarAluOperation(
|
||||||
bool& predicate_written);
|
const ParsedAluInstruction& instr,
|
||||||
|
uint8_t memexport_eM_potentially_written_before, bool& predicate_written);
|
||||||
|
|
||||||
// Perform endian swap of a uint scalar or vector.
|
// Perform endian swap of a uint scalar or vector.
|
||||||
spv::Id EndianSwap32Uint(spv::Id value, spv::Id endian);
|
spv::Id EndianSwap32Uint(spv::Id value, spv::Id endian);
|
||||||
|
// Perform endian swap of a uint4 vector.
|
||||||
|
spv::Id EndianSwap128Uint4(spv::Id value, spv::Id endian);
|
||||||
|
|
||||||
spv::Id LoadUint32FromSharedMemory(spv::Id address_dwords_int);
|
spv::Id LoadUint32FromSharedMemory(spv::Id address_dwords_int);
|
||||||
|
// If `replace_mask` is provided, the bits specified in the mask will be
|
||||||
|
// replaced with those from the value via OpAtomicAnd/Or.
|
||||||
|
// Bits of `value` not in `replace_mask` will be ignored.
|
||||||
|
void StoreUint32ToSharedMemory(spv::Id value, spv::Id address_dwords_int,
|
||||||
|
spv::Id replace_mask = spv::NoResult);
|
||||||
|
|
||||||
|
bool IsMemoryExportSupported() const {
|
||||||
|
if (is_pixel_shader()) {
|
||||||
|
return features_.fragment_stores_and_atomics;
|
||||||
|
}
|
||||||
|
return features_.vertex_pipeline_stores_and_atomics ||
|
||||||
|
IsSpirvComputeShader();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsMemoryExportUsed() const {
|
||||||
|
return current_shader().memexport_eM_written() && IsMemoryExportSupported();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ExportToMemory(uint8_t export_eM);
|
||||||
|
|
||||||
// The source may be a floating-point scalar or a vector.
|
// The source may be a floating-point scalar or a vector.
|
||||||
spv::Id PWLGammaToLinear(spv::Id gamma, bool gamma_pre_saturated);
|
spv::Id PWLGammaToLinear(spv::Id gamma, bool gamma_pre_saturated);
|
||||||
|
@ -872,6 +914,21 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
spv::Id var_main_tfetch_gradients_v_;
|
spv::Id var_main_tfetch_gradients_v_;
|
||||||
// float4[register_count()].
|
// float4[register_count()].
|
||||||
spv::Id var_main_registers_;
|
spv::Id var_main_registers_;
|
||||||
|
// Memory export variables are created only when needed.
|
||||||
|
// float4.
|
||||||
|
spv::Id var_main_memexport_address_;
|
||||||
|
// Each is float4.
|
||||||
|
spv::Id var_main_memexport_data_[ucode::kMaxMemExportElementCount];
|
||||||
|
// Bit field of which eM# elements have been written so far by the invocation
|
||||||
|
// since the last memory write - uint.
|
||||||
|
spv::Id var_main_memexport_data_written_;
|
||||||
|
// If memory export is disabled in certain invocations or (if emulating some
|
||||||
|
// primitive types without a geometry shader) at specific guest vertex loop
|
||||||
|
// iterations because the translated shader is executed multiple times for the
|
||||||
|
// same guest vertex or pixel, this contains whether memory export is allowed
|
||||||
|
// in the current execution of the translated code.
|
||||||
|
// bool.
|
||||||
|
spv::Id main_memexport_allowed_;
|
||||||
// VS only - float3 (special exports).
|
// VS only - float3 (special exports).
|
||||||
spv::Id var_main_point_size_edge_flag_kill_vertex_;
|
spv::Id var_main_point_size_edge_flag_kill_vertex_;
|
||||||
// PS, only when needed - bool.
|
// PS, only when needed - bool.
|
||||||
|
|
|
@ -39,10 +39,14 @@ spv::Id SpirvShaderTranslator::ZeroIfAnyOperandIsZero(spv::Id value,
|
||||||
const_float_vectors_0_[num_components - 1], value);
|
const_float_vectors_0_[num_components - 1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SpirvShaderTranslator::KillPixel(spv::Id condition) {
|
void SpirvShaderTranslator::KillPixel(
|
||||||
|
spv::Id condition, uint8_t memexport_eM_potentially_written_before) {
|
||||||
SpirvBuilder::IfBuilder kill_if(condition, spv::SelectionControlMaskNone,
|
SpirvBuilder::IfBuilder kill_if(condition, spv::SelectionControlMaskNone,
|
||||||
*builder_);
|
*builder_);
|
||||||
{
|
{
|
||||||
|
// Perform outstanding memory exports before the invocation becomes inactive
|
||||||
|
// and storage writes are disabled.
|
||||||
|
ExportToMemory(memexport_eM_potentially_written_before);
|
||||||
if (var_main_kill_pixel_ != spv::NoResult) {
|
if (var_main_kill_pixel_ != spv::NoResult) {
|
||||||
builder_->createStore(builder_->makeBoolConstant(true),
|
builder_->createStore(builder_->makeBoolConstant(true),
|
||||||
var_main_kill_pixel_);
|
var_main_kill_pixel_);
|
||||||
|
@ -77,12 +81,12 @@ void SpirvShaderTranslator::ProcessAluInstruction(
|
||||||
// Whether the instruction has changed the predicate, and it needs to be
|
// Whether the instruction has changed the predicate, and it needs to be
|
||||||
// checked again later.
|
// checked again later.
|
||||||
bool predicate_written_vector = false;
|
bool predicate_written_vector = false;
|
||||||
spv::Id vector_result =
|
spv::Id vector_result = ProcessVectorAluOperation(
|
||||||
ProcessVectorAluOperation(instr, predicate_written_vector);
|
instr, memexport_eM_potentially_written_before, predicate_written_vector);
|
||||||
|
|
||||||
bool predicate_written_scalar = false;
|
bool predicate_written_scalar = false;
|
||||||
spv::Id scalar_result =
|
spv::Id scalar_result = ProcessScalarAluOperation(
|
||||||
ProcessScalarAluOperation(instr, predicate_written_scalar);
|
instr, memexport_eM_potentially_written_before, predicate_written_scalar);
|
||||||
if (scalar_result != spv::NoResult) {
|
if (scalar_result != spv::NoResult) {
|
||||||
EnsureBuildPointAvailable();
|
EnsureBuildPointAvailable();
|
||||||
builder_->createStore(scalar_result, var_main_previous_scalar_);
|
builder_->createStore(scalar_result, var_main_previous_scalar_);
|
||||||
|
@ -106,7 +110,8 @@ void SpirvShaderTranslator::ProcessAluInstruction(
|
||||||
}
|
}
|
||||||
|
|
||||||
spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
|
spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
|
||||||
const ParsedAluInstruction& instr, bool& predicate_written) {
|
const ParsedAluInstruction& instr,
|
||||||
|
uint8_t memexport_eM_potentially_written_before, bool& predicate_written) {
|
||||||
predicate_written = false;
|
predicate_written = false;
|
||||||
|
|
||||||
uint32_t used_result_components =
|
uint32_t used_result_components =
|
||||||
|
@ -769,14 +774,16 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
|
||||||
case ucode::AluVectorOpcode::kKillGt:
|
case ucode::AluVectorOpcode::kKillGt:
|
||||||
case ucode::AluVectorOpcode::kKillGe:
|
case ucode::AluVectorOpcode::kKillGe:
|
||||||
case ucode::AluVectorOpcode::kKillNe: {
|
case ucode::AluVectorOpcode::kKillNe: {
|
||||||
KillPixel(builder_->createUnaryOp(
|
KillPixel(
|
||||||
spv::OpAny, type_bool_,
|
builder_->createUnaryOp(
|
||||||
builder_->createBinOp(
|
spv::OpAny, type_bool_,
|
||||||
spv::Op(kOps[size_t(instr.vector_opcode)]), type_bool4_,
|
builder_->createBinOp(
|
||||||
GetOperandComponents(operand_storage[0], instr.vector_operands[0],
|
spv::Op(kOps[size_t(instr.vector_opcode)]), type_bool4_,
|
||||||
0b1111),
|
GetOperandComponents(operand_storage[0],
|
||||||
GetOperandComponents(operand_storage[1], instr.vector_operands[1],
|
instr.vector_operands[0], 0b1111),
|
||||||
0b1111))));
|
GetOperandComponents(operand_storage[1],
|
||||||
|
instr.vector_operands[1], 0b1111))),
|
||||||
|
memexport_eM_potentially_written_before);
|
||||||
return const_float_0_;
|
return const_float_0_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -862,7 +869,8 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
|
||||||
}
|
}
|
||||||
|
|
||||||
spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
|
spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
|
||||||
const ParsedAluInstruction& instr, bool& predicate_written) {
|
const ParsedAluInstruction& instr,
|
||||||
|
uint8_t memexport_eM_potentially_written_before, bool& predicate_written) {
|
||||||
predicate_written = false;
|
predicate_written = false;
|
||||||
|
|
||||||
spv::Id operand_storage[2] = {};
|
spv::Id operand_storage[2] = {};
|
||||||
|
@ -1257,12 +1265,13 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
|
||||||
case ucode::AluScalarOpcode::kKillsNe:
|
case ucode::AluScalarOpcode::kKillsNe:
|
||||||
case ucode::AluScalarOpcode::kKillsOne: {
|
case ucode::AluScalarOpcode::kKillsOne: {
|
||||||
KillPixel(builder_->createBinOp(
|
KillPixel(builder_->createBinOp(
|
||||||
spv::Op(kOps[size_t(instr.scalar_opcode)]), type_bool_,
|
spv::Op(kOps[size_t(instr.scalar_opcode)]), type_bool_,
|
||||||
GetOperandComponents(operand_storage[0], instr.scalar_operands[0],
|
GetOperandComponents(operand_storage[0],
|
||||||
0b0001),
|
instr.scalar_operands[0], 0b0001),
|
||||||
instr.scalar_opcode == ucode::AluScalarOpcode::kKillsOne
|
instr.scalar_opcode == ucode::AluScalarOpcode::kKillsOne
|
||||||
? const_float_1_
|
? const_float_1_
|
||||||
: const_float_0_));
|
: const_float_0_),
|
||||||
|
memexport_eM_potentially_written_before);
|
||||||
return const_float_0_;
|
return const_float_0_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,950 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "xenia/gpu/spirv_shader_translator.h"
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <functional>
|
||||||
|
#include <memory>
|
||||||
|
#include <optional>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "third_party/glslang/SPIRV/GLSL.std.450.h"
|
||||||
|
#include "xenia/base/assert.h"
|
||||||
|
#include "xenia/base/math.h"
|
||||||
|
#include "xenia/gpu/ucode.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) {
|
||||||
|
if (!export_eM) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_zero(export_eM & ~current_shader().memexport_eM_written());
|
||||||
|
|
||||||
|
if (!IsMemoryExportSupported()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if memory export is allowed in this guest shader invocation.
|
||||||
|
std::optional<SpirvBuilder::IfBuilder> if_memexport_allowed;
|
||||||
|
if (main_memexport_allowed_ != spv::NoResult) {
|
||||||
|
if_memexport_allowed.emplace(main_memexport_allowed_,
|
||||||
|
spv::SelectionControlDontFlattenMask,
|
||||||
|
*builder_);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the pixel was killed (but the actual killing on the SPIR-V side has not
|
||||||
|
// been performed yet because the device doesn't support demotion to helper
|
||||||
|
// invocation that doesn't interfere with control flow), the current
|
||||||
|
// invocation is not considered active anymore.
|
||||||
|
std::optional<SpirvBuilder::IfBuilder> if_pixel_not_killed;
|
||||||
|
if (var_main_kill_pixel_ != spv::NoResult) {
|
||||||
|
if_pixel_not_killed.emplace(
|
||||||
|
builder_->createUnaryOp(
|
||||||
|
spv::OpLogicalNot, type_bool_,
|
||||||
|
builder_->createLoad(var_main_kill_pixel_, spv::NoPrecision)),
|
||||||
|
spv::SelectionControlDontFlattenMask, *builder_);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the address with the correct sign and exponent was written, and
|
||||||
|
// that the index doesn't overflow the mantissa bits.
|
||||||
|
// all((eA_vector >> uvec4(30, 23, 23, 23)) == uvec4(0x1, 0x96, 0x96, 0x96))
|
||||||
|
spv::Id eA_vector = builder_->createUnaryOp(
|
||||||
|
spv::OpBitcast, type_uint4_,
|
||||||
|
builder_->createLoad(var_main_memexport_address_, spv::NoPrecision));
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
id_vector_temp_.push_back(builder_->makeUintConstant(30));
|
||||||
|
id_vector_temp_.push_back(builder_->makeUintConstant(23));
|
||||||
|
id_vector_temp_.push_back(id_vector_temp_.back());
|
||||||
|
id_vector_temp_.push_back(id_vector_temp_.back());
|
||||||
|
spv::Id address_validation_shift =
|
||||||
|
builder_->makeCompositeConstant(type_uint4_, id_vector_temp_);
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
id_vector_temp_.push_back(builder_->makeUintConstant(0x1));
|
||||||
|
id_vector_temp_.push_back(builder_->makeUintConstant(0x96));
|
||||||
|
id_vector_temp_.push_back(id_vector_temp_.back());
|
||||||
|
id_vector_temp_.push_back(id_vector_temp_.back());
|
||||||
|
spv::Id address_validation_value =
|
||||||
|
builder_->makeCompositeConstant(type_uint4_, id_vector_temp_);
|
||||||
|
SpirvBuilder::IfBuilder if_address_valid(
|
||||||
|
builder_->createUnaryOp(
|
||||||
|
spv::OpAll, type_bool_,
|
||||||
|
builder_->createBinOp(
|
||||||
|
spv::OpIEqual, type_bool4_,
|
||||||
|
builder_->createBinOp(spv::OpShiftRightLogical, type_uint4_,
|
||||||
|
eA_vector, address_validation_shift),
|
||||||
|
address_validation_value)),
|
||||||
|
spv::SelectionControlDontFlattenMask, *builder_, 2, 1);
|
||||||
|
|
||||||
|
using EMIdArray = std::array<spv::Id, ucode::kMaxMemExportElementCount>;
|
||||||
|
|
||||||
|
auto for_each_eM = [&](std::function<void(uint32_t eM_index)> fn) {
|
||||||
|
uint8_t eM_remaining = export_eM;
|
||||||
|
uint32_t eM_index;
|
||||||
|
while (xe::bit_scan_forward(eM_remaining, &eM_index)) {
|
||||||
|
eM_remaining &= ~(uint8_t(1) << eM_index);
|
||||||
|
fn(eM_index);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Load the original eM.
|
||||||
|
EMIdArray eM_original;
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
eM_original[eM_index] = builder_->createLoad(
|
||||||
|
var_main_memexport_data_[eM_index], spv::NoPrecision);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Swap red and blue if needed.
|
||||||
|
spv::Id format_info =
|
||||||
|
builder_->createCompositeExtract(eA_vector, type_uint_, 2);
|
||||||
|
spv::Id swap_red_blue = builder_->createBinOp(
|
||||||
|
spv::OpINotEqual, type_bool_,
|
||||||
|
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, format_info,
|
||||||
|
builder_->makeUintConstant(uint32_t(1) << 19)),
|
||||||
|
const_uint_0_);
|
||||||
|
EMIdArray eM_swapped;
|
||||||
|
uint_vector_temp_.clear();
|
||||||
|
uint_vector_temp_.push_back(2);
|
||||||
|
uint_vector_temp_.push_back(1);
|
||||||
|
uint_vector_temp_.push_back(0);
|
||||||
|
uint_vector_temp_.push_back(3);
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
eM_swapped[eM_index] = builder_->createTriOp(
|
||||||
|
spv::OpSelect, type_float4_, swap_red_blue,
|
||||||
|
builder_->createRvalueSwizzle(spv::NoPrecision, type_float4_,
|
||||||
|
eM_original[eM_index], uint_vector_temp_),
|
||||||
|
eM_original[eM_index]);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Extract the numeric format.
|
||||||
|
spv::Id is_signed = builder_->createBinOp(
|
||||||
|
spv::OpINotEqual, type_bool_,
|
||||||
|
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, format_info,
|
||||||
|
builder_->makeUintConstant(uint32_t(1) << 16)),
|
||||||
|
const_uint_0_);
|
||||||
|
spv::Id is_norm = builder_->createBinOp(
|
||||||
|
spv::OpIEqual, type_bool_,
|
||||||
|
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, format_info,
|
||||||
|
builder_->makeUintConstant(uint32_t(1) << 17)),
|
||||||
|
const_uint_0_);
|
||||||
|
|
||||||
|
// Perform format packing.
|
||||||
|
|
||||||
|
auto flush_nan = [&](const EMIdArray& eM) -> EMIdArray {
|
||||||
|
EMIdArray eM_flushed;
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
spv::Id element_unflushed = eM[eM_index];
|
||||||
|
unsigned int component_count =
|
||||||
|
builder_->getNumComponents(element_unflushed);
|
||||||
|
eM_flushed[eM_index] = builder_->createTriOp(
|
||||||
|
spv::OpSelect, type_float_vectors_[component_count - 1],
|
||||||
|
builder_->createUnaryOp(spv::OpIsNan,
|
||||||
|
type_bool_vectors_[component_count - 1],
|
||||||
|
element_unflushed),
|
||||||
|
const_float_vectors_0_[component_count - 1], element_unflushed);
|
||||||
|
});
|
||||||
|
return eM_flushed;
|
||||||
|
};
|
||||||
|
|
||||||
|
auto make_float_constant_vectors =
|
||||||
|
[&](float value) -> std::array<spv::Id, 4> {
|
||||||
|
std::array<spv::Id, 4> const_vectors;
|
||||||
|
const_vectors[0] = builder_->makeFloatConstant(value);
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
id_vector_temp_.push_back(const_vectors[0]);
|
||||||
|
for (unsigned int component_count_minus_1 = 1; component_count_minus_1 < 4;
|
||||||
|
++component_count_minus_1) {
|
||||||
|
id_vector_temp_.push_back(const_vectors[0]);
|
||||||
|
const_vectors[component_count_minus_1] = builder_->makeCompositeConstant(
|
||||||
|
type_float_vectors_[component_count_minus_1], id_vector_temp_);
|
||||||
|
}
|
||||||
|
return const_vectors;
|
||||||
|
};
|
||||||
|
std::array<spv::Id, 4> const_float_vectors_minus_1 =
|
||||||
|
make_float_constant_vectors(-1.0f);
|
||||||
|
std::array<spv::Id, 4> const_float_vectors_minus_0_5 =
|
||||||
|
make_float_constant_vectors(-0.5f);
|
||||||
|
std::array<spv::Id, 4> const_float_vectors_0_5 =
|
||||||
|
make_float_constant_vectors(0.5f);
|
||||||
|
|
||||||
|
// The widths must be without holes (R, RG, RGB, RGBA), and expecting the
|
||||||
|
// widths to add up to the size of the stored texel (8, 16 or 32 bits), as the
|
||||||
|
// unused upper bits will contain junk from the sign extension of X if the
|
||||||
|
// number is signed.
|
||||||
|
auto pack_8_16_32 = [&](std::array<uint32_t, 4> widths) -> EMIdArray {
|
||||||
|
unsigned int component_count;
|
||||||
|
std::array<uint32_t, 4> offsets{};
|
||||||
|
for (component_count = 0; component_count < widths.size();
|
||||||
|
++component_count) {
|
||||||
|
if (!widths[component_count]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Only formats for which max + 0.5 can be represented exactly.
|
||||||
|
assert(widths[component_count] <= 23);
|
||||||
|
if (component_count) {
|
||||||
|
offsets[component_count] =
|
||||||
|
offsets[component_count - 1] + widths[component_count - 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert_not_zero(component_count);
|
||||||
|
|
||||||
|
// Extract the needed components.
|
||||||
|
EMIdArray eM_unflushed = eM_swapped;
|
||||||
|
if (component_count < 4) {
|
||||||
|
if (component_count == 1) {
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
eM_unflushed[eM_index] = builder_->createCompositeExtract(
|
||||||
|
eM_unflushed[eM_index], type_float_, 0);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
uint_vector_temp_.clear();
|
||||||
|
for (unsigned int component_index = 0;
|
||||||
|
component_index < component_count; ++component_index) {
|
||||||
|
uint_vector_temp_.push_back(component_index);
|
||||||
|
}
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
eM_unflushed[eM_index] = builder_->createRvalueSwizzle(
|
||||||
|
spv::NoPrecision, type_float_vectors_[component_count - 1],
|
||||||
|
eM_unflushed[eM_index], uint_vector_temp_);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush NaNs.
|
||||||
|
EMIdArray eM_flushed = flush_nan(eM_unflushed);
|
||||||
|
|
||||||
|
// Convert to integers.
|
||||||
|
SpirvBuilder::IfBuilder if_signed(
|
||||||
|
is_signed, spv::SelectionControlDontFlattenMask, *builder_);
|
||||||
|
EMIdArray eM_signed;
|
||||||
|
{
|
||||||
|
// Signed.
|
||||||
|
SpirvBuilder::IfBuilder if_norm(
|
||||||
|
is_norm, spv::SelectionControlDontFlattenMask, *builder_);
|
||||||
|
EMIdArray eM_norm;
|
||||||
|
{
|
||||||
|
// Signed normalized.
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
for (unsigned int component_index = 0;
|
||||||
|
component_index < component_count; ++component_index) {
|
||||||
|
id_vector_temp_.push_back(builder_->makeFloatConstant(
|
||||||
|
float((uint32_t(1) << (widths[component_index] - 1)) - 1)));
|
||||||
|
}
|
||||||
|
spv::Id const_max_value =
|
||||||
|
component_count > 1
|
||||||
|
? builder_->makeCompositeConstant(
|
||||||
|
type_float_vectors_[component_count - 1], id_vector_temp_)
|
||||||
|
: id_vector_temp_.front();
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
eM_norm[eM_index] = builder_->createNoContractionBinOp(
|
||||||
|
spv::OpFMul, type_float_vectors_[component_count - 1],
|
||||||
|
builder_->createTriBuiltinCall(
|
||||||
|
type_float_vectors_[component_count - 1],
|
||||||
|
ext_inst_glsl_std_450_, GLSLstd450FClamp,
|
||||||
|
eM_flushed[eM_index],
|
||||||
|
const_float_vectors_minus_1[component_count - 1],
|
||||||
|
const_float_vectors_1_[component_count - 1]),
|
||||||
|
const_max_value);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if_norm.makeEndIf();
|
||||||
|
// All phi instructions must be in the beginning of the block.
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
eM_signed[eM_index] =
|
||||||
|
if_norm.createMergePhi(eM_norm[eM_index], eM_flushed[eM_index]);
|
||||||
|
});
|
||||||
|
// Convert to signed integer, adding plus/minus 0.5 before truncating
|
||||||
|
// according to the Direct3D format conversion rules.
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
eM_signed[eM_index] = builder_->createUnaryOp(
|
||||||
|
spv::OpBitcast, type_uint_vectors_[component_count - 1],
|
||||||
|
builder_->createUnaryOp(
|
||||||
|
spv::OpConvertFToS, type_int_vectors_[component_count - 1],
|
||||||
|
builder_->createNoContractionBinOp(
|
||||||
|
spv::OpFAdd, type_float_vectors_[component_count - 1],
|
||||||
|
eM_signed[eM_index],
|
||||||
|
builder_->createTriOp(
|
||||||
|
spv::OpSelect, type_float_vectors_[component_count - 1],
|
||||||
|
builder_->createBinOp(
|
||||||
|
spv::OpFOrdLessThan,
|
||||||
|
type_bool_vectors_[component_count - 1],
|
||||||
|
eM_signed[eM_index],
|
||||||
|
const_float_vectors_0_[component_count - 1]),
|
||||||
|
const_float_vectors_minus_0_5[component_count - 1],
|
||||||
|
const_float_vectors_0_5[component_count - 1]))));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if_signed.makeBeginElse();
|
||||||
|
EMIdArray eM_unsigned;
|
||||||
|
{
|
||||||
|
SpirvBuilder::IfBuilder if_norm(
|
||||||
|
is_norm, spv::SelectionControlDontFlattenMask, *builder_);
|
||||||
|
EMIdArray eM_norm;
|
||||||
|
{
|
||||||
|
// Unsigned normalized.
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
for (unsigned int component_index = 0;
|
||||||
|
component_index < component_count; ++component_index) {
|
||||||
|
id_vector_temp_.push_back(builder_->makeFloatConstant(
|
||||||
|
float((uint32_t(1) << widths[component_index]) - 1)));
|
||||||
|
}
|
||||||
|
spv::Id const_max_value =
|
||||||
|
component_count > 1
|
||||||
|
? builder_->makeCompositeConstant(
|
||||||
|
type_float_vectors_[component_count - 1], id_vector_temp_)
|
||||||
|
: id_vector_temp_.front();
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
eM_norm[eM_index] = builder_->createNoContractionBinOp(
|
||||||
|
spv::OpFMul, type_float_vectors_[component_count - 1],
|
||||||
|
builder_->createTriBuiltinCall(
|
||||||
|
type_float_vectors_[component_count - 1],
|
||||||
|
ext_inst_glsl_std_450_, GLSLstd450FClamp,
|
||||||
|
eM_flushed[eM_index],
|
||||||
|
const_float_vectors_0_[component_count - 1],
|
||||||
|
const_float_vectors_1_[component_count - 1]),
|
||||||
|
const_max_value);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if_norm.makeEndIf();
|
||||||
|
// All phi instructions must be in the beginning of the block.
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
eM_unsigned[eM_index] =
|
||||||
|
if_norm.createMergePhi(eM_norm[eM_index], eM_flushed[eM_index]);
|
||||||
|
});
|
||||||
|
// Convert to unsigned integer, adding 0.5 before truncating according to
|
||||||
|
// the Direct3D format conversion rules.
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
eM_unsigned[eM_index] = builder_->createUnaryOp(
|
||||||
|
spv::OpConvertFToU, type_uint_vectors_[component_count - 1],
|
||||||
|
builder_->createNoContractionBinOp(
|
||||||
|
spv::OpFAdd, type_float_vectors_[component_count - 1],
|
||||||
|
eM_unsigned[eM_index],
|
||||||
|
const_float_vectors_0_5[component_count - 1]));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if_signed.makeEndIf();
|
||||||
|
EMIdArray eM_unpacked;
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
eM_unpacked[eM_index] =
|
||||||
|
if_signed.createMergePhi(eM_signed[eM_index], eM_unsigned[eM_index]);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Pack into a 32-bit value, and pad to a 4-component vector for the phi.
|
||||||
|
EMIdArray eM_packed;
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
spv::Id element_unpacked = eM_unpacked[eM_index];
|
||||||
|
eM_packed[eM_index] = component_count > 1
|
||||||
|
? builder_->createCompositeExtract(
|
||||||
|
element_unpacked, type_uint_, 0)
|
||||||
|
: element_unpacked;
|
||||||
|
for (unsigned int component_index = 1; component_index < component_count;
|
||||||
|
++component_index) {
|
||||||
|
eM_packed[eM_index] = builder_->createQuadOp(
|
||||||
|
spv::OpBitFieldInsert, type_uint_, eM_packed[eM_index],
|
||||||
|
builder_->createCompositeExtract(element_unpacked, type_uint_,
|
||||||
|
component_index),
|
||||||
|
builder_->makeUintConstant(offsets[component_index]),
|
||||||
|
builder_->makeUintConstant(widths[component_index]));
|
||||||
|
}
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
id_vector_temp_.resize(4, const_uint_0_);
|
||||||
|
id_vector_temp_.front() = eM_packed[eM_index];
|
||||||
|
eM_packed[eM_index] =
|
||||||
|
builder_->createCompositeConstruct(type_uint4_, id_vector_temp_);
|
||||||
|
});
|
||||||
|
|
||||||
|
return eM_packed;
|
||||||
|
};
|
||||||
|
|
||||||
|
SpirvBuilder::SwitchBuilder format_switch(
|
||||||
|
builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, format_info,
|
||||||
|
builder_->makeUintConstant(8),
|
||||||
|
builder_->makeUintConstant(6)),
|
||||||
|
spv::SelectionControlDontFlattenMask, *builder_);
|
||||||
|
|
||||||
|
struct FormatCase {
|
||||||
|
EMIdArray eM_packed;
|
||||||
|
uint32_t element_bytes_log2;
|
||||||
|
spv::Id phi_parent;
|
||||||
|
};
|
||||||
|
std::vector<FormatCase> format_cases;
|
||||||
|
// Must be called at the end of the switch case segment for the correct phi
|
||||||
|
// parent.
|
||||||
|
auto add_format_case = [&](const EMIdArray& eM_packed,
|
||||||
|
uint32_t element_bytes_log2) {
|
||||||
|
FormatCase& format_case = format_cases.emplace_back();
|
||||||
|
format_case.eM_packed = eM_packed;
|
||||||
|
format_case.element_bytes_log2 = element_bytes_log2;
|
||||||
|
format_case.phi_parent = builder_->getBuildPoint()->getId();
|
||||||
|
};
|
||||||
|
|
||||||
|
// k_8, k_8_A, k_8_B
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_8));
|
||||||
|
// TODO(Triang3l): Investigate how input should be treated for k_8_A, k_8_B.
|
||||||
|
format_switch.addCurrentCaseLiteral(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_8_A));
|
||||||
|
format_switch.addCurrentCaseLiteral(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_8_B));
|
||||||
|
add_format_case(pack_8_16_32({8}), 0);
|
||||||
|
|
||||||
|
// k_1_5_5_5
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_1_5_5_5));
|
||||||
|
add_format_case(pack_8_16_32({5, 5, 5, 1}), 1);
|
||||||
|
|
||||||
|
// k_5_6_5
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_5_6_5));
|
||||||
|
add_format_case(pack_8_16_32({5, 6, 5}), 1);
|
||||||
|
|
||||||
|
// k_6_5_5
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_6_5_5));
|
||||||
|
add_format_case(pack_8_16_32({5, 5, 6}), 1);
|
||||||
|
|
||||||
|
// k_8_8_8_8, k_8_8_8_8_A, k_8_8_8_8_AS_16_16_16_16
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_8_8_8_8));
|
||||||
|
// TODO(Triang3l): Investigate how input should be treated for k_8_8_8_8_A.
|
||||||
|
format_switch.addCurrentCaseLiteral(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_8_8_8_8_A));
|
||||||
|
format_switch.addCurrentCaseLiteral(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_8_8_8_8_AS_16_16_16_16));
|
||||||
|
add_format_case(pack_8_16_32({8, 8, 8, 8}), 2);
|
||||||
|
|
||||||
|
// k_2_10_10_10, k_2_10_10_10_AS_16_16_16_16
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_2_10_10_10));
|
||||||
|
format_switch.addCurrentCaseLiteral(static_cast<unsigned int>(
|
||||||
|
xenos::ColorFormat::k_2_10_10_10_AS_16_16_16_16));
|
||||||
|
add_format_case(pack_8_16_32({10, 10, 10, 2}), 2);
|
||||||
|
|
||||||
|
// k_8_8
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_8_8));
|
||||||
|
add_format_case(pack_8_16_32({8, 8}), 1);
|
||||||
|
|
||||||
|
// k_4_4_4_4
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_4_4_4_4));
|
||||||
|
add_format_case(pack_8_16_32({4, 4, 4, 4}), 1);
|
||||||
|
|
||||||
|
// k_10_11_11, k_10_11_11_AS_16_16_16_16
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_10_11_11));
|
||||||
|
format_switch.addCurrentCaseLiteral(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_10_11_11_AS_16_16_16_16));
|
||||||
|
add_format_case(pack_8_16_32({11, 11, 10}), 2);
|
||||||
|
|
||||||
|
// k_11_11_10, k_11_11_10_AS_16_16_16_16
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_11_11_10));
|
||||||
|
format_switch.addCurrentCaseLiteral(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_11_11_10_AS_16_16_16_16));
|
||||||
|
add_format_case(pack_8_16_32({10, 11, 11}), 2);
|
||||||
|
|
||||||
|
// k_16
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_16));
|
||||||
|
add_format_case(pack_8_16_32({16}), 1);
|
||||||
|
|
||||||
|
// k_16_16
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_16_16));
|
||||||
|
add_format_case(pack_8_16_32({16, 16}), 2);
|
||||||
|
|
||||||
|
// k_16_16_16_16
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_16_16_16_16));
|
||||||
|
{
|
||||||
|
// Flush NaNs.
|
||||||
|
EMIdArray fixed16_flushed = flush_nan(eM_swapped);
|
||||||
|
|
||||||
|
// Convert to integers.
|
||||||
|
SpirvBuilder::IfBuilder if_signed(
|
||||||
|
is_signed, spv::SelectionControlDontFlattenMask, *builder_);
|
||||||
|
EMIdArray fixed16_signed;
|
||||||
|
{
|
||||||
|
// Signed.
|
||||||
|
SpirvBuilder::IfBuilder if_norm(
|
||||||
|
is_norm, spv::SelectionControlDontFlattenMask, *builder_);
|
||||||
|
EMIdArray fixed16_norm;
|
||||||
|
{
|
||||||
|
// Signed normalized.
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
id_vector_temp_.resize(4, builder_->makeFloatConstant(
|
||||||
|
float((uint32_t(1) << (16 - 1)) - 1)));
|
||||||
|
spv::Id const_snorm16_max_value =
|
||||||
|
builder_->makeCompositeConstant(type_float4_, id_vector_temp_);
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
fixed16_norm[eM_index] = builder_->createNoContractionBinOp(
|
||||||
|
spv::OpFMul, type_float4_,
|
||||||
|
builder_->createTriBuiltinCall(
|
||||||
|
type_float4_, ext_inst_glsl_std_450_, GLSLstd450FClamp,
|
||||||
|
fixed16_flushed[eM_index], const_float_vectors_minus_1[3],
|
||||||
|
const_float4_1_),
|
||||||
|
const_snorm16_max_value);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if_norm.makeEndIf();
|
||||||
|
// All phi instructions must be in the beginning of the block.
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
fixed16_signed[eM_index] = if_norm.createMergePhi(
|
||||||
|
fixed16_norm[eM_index], fixed16_flushed[eM_index]);
|
||||||
|
});
|
||||||
|
// Convert to signed integer, adding plus/minus 0.5 before truncating
|
||||||
|
// according to the Direct3D format conversion rules.
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
fixed16_signed[eM_index] = builder_->createUnaryOp(
|
||||||
|
spv::OpBitcast, type_uint4_,
|
||||||
|
builder_->createUnaryOp(
|
||||||
|
spv::OpConvertFToS, type_int4_,
|
||||||
|
builder_->createNoContractionBinOp(
|
||||||
|
spv::OpFAdd, type_float4_, fixed16_signed[eM_index],
|
||||||
|
builder_->createTriOp(
|
||||||
|
spv::OpSelect, type_float4_,
|
||||||
|
builder_->createBinOp(spv::OpFOrdLessThan, type_bool4_,
|
||||||
|
fixed16_signed[eM_index],
|
||||||
|
const_float4_0_),
|
||||||
|
const_float_vectors_minus_0_5[3],
|
||||||
|
const_float_vectors_0_5[3]))));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if_signed.makeBeginElse();
|
||||||
|
EMIdArray fixed16_unsigned;
|
||||||
|
{
|
||||||
|
// Unsigned.
|
||||||
|
SpirvBuilder::IfBuilder if_norm(
|
||||||
|
is_norm, spv::SelectionControlDontFlattenMask, *builder_);
|
||||||
|
EMIdArray fixed16_norm;
|
||||||
|
{
|
||||||
|
// Unsigned normalized.
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
id_vector_temp_.resize(
|
||||||
|
4, builder_->makeFloatConstant(float((uint32_t(1) << 16) - 1)));
|
||||||
|
spv::Id const_unorm16_max_value =
|
||||||
|
builder_->makeCompositeConstant(type_float4_, id_vector_temp_);
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
fixed16_norm[eM_index] = builder_->createNoContractionBinOp(
|
||||||
|
spv::OpFMul, type_float4_,
|
||||||
|
builder_->createTriBuiltinCall(
|
||||||
|
type_float4_, ext_inst_glsl_std_450_, GLSLstd450FClamp,
|
||||||
|
fixed16_flushed[eM_index], const_float4_0_, const_float4_1_),
|
||||||
|
const_unorm16_max_value);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if_norm.makeEndIf();
|
||||||
|
// All phi instructions must be in the beginning of the block.
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
fixed16_unsigned[eM_index] = if_norm.createMergePhi(
|
||||||
|
fixed16_norm[eM_index], fixed16_flushed[eM_index]);
|
||||||
|
});
|
||||||
|
// Convert to unsigned integer, adding 0.5 before truncating according to
|
||||||
|
// the Direct3D format conversion rules.
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
fixed16_unsigned[eM_index] = builder_->createUnaryOp(
|
||||||
|
spv::OpConvertFToU, type_uint4_,
|
||||||
|
builder_->createNoContractionBinOp(spv::OpFAdd, type_float4_,
|
||||||
|
fixed16_unsigned[eM_index],
|
||||||
|
const_float_vectors_0_5[3]));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if_signed.makeEndIf();
|
||||||
|
EMIdArray fixed16_unpacked;
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
fixed16_unpacked[eM_index] = if_signed.createMergePhi(
|
||||||
|
fixed16_signed[eM_index], fixed16_unsigned[eM_index]);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Pack into two 32-bit values, and pad to a 4-component vector for the phi.
|
||||||
|
EMIdArray fixed16_packed;
|
||||||
|
spv::Id const_uint_16 = builder_->makeUintConstant(16);
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
spv::Id fixed16_element_unpacked = fixed16_unpacked[eM_index];
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
for (uint32_t component_index = 0; component_index < 2;
|
||||||
|
++component_index) {
|
||||||
|
id_vector_temp_.push_back(builder_->createQuadOp(
|
||||||
|
spv::OpBitFieldInsert, type_uint_,
|
||||||
|
builder_->createCompositeExtract(fixed16_element_unpacked,
|
||||||
|
type_uint_, 2 * component_index),
|
||||||
|
builder_->createCompositeExtract(
|
||||||
|
fixed16_element_unpacked, type_uint_, 2 * component_index + 1),
|
||||||
|
const_uint_16, const_uint_16));
|
||||||
|
}
|
||||||
|
for (uint32_t component_index = 2; component_index < 4;
|
||||||
|
++component_index) {
|
||||||
|
id_vector_temp_.push_back(const_uint_0_);
|
||||||
|
}
|
||||||
|
fixed16_packed[eM_index] =
|
||||||
|
builder_->createCompositeConstruct(type_uint4_, id_vector_temp_);
|
||||||
|
});
|
||||||
|
|
||||||
|
add_format_case(fixed16_packed, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(Triang3l): Use the extended range float16 conversion.
|
||||||
|
|
||||||
|
// k_16_FLOAT
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_16_FLOAT));
|
||||||
|
{
|
||||||
|
EMIdArray format_packed_16_float;
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
id_vector_temp_.push_back(builder_->createCompositeExtract(
|
||||||
|
eM_swapped[eM_index], type_float_, 0));
|
||||||
|
id_vector_temp_.push_back(const_float_0_);
|
||||||
|
spv::Id format_packed_16_float_x = builder_->createUnaryBuiltinCall(
|
||||||
|
type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
|
||||||
|
builder_->createCompositeConstruct(type_float2_, id_vector_temp_));
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
id_vector_temp_.resize(4, const_uint_0_);
|
||||||
|
id_vector_temp_.front() = format_packed_16_float_x;
|
||||||
|
format_packed_16_float[eM_index] =
|
||||||
|
builder_->createCompositeConstruct(type_uint4_, id_vector_temp_);
|
||||||
|
});
|
||||||
|
add_format_case(format_packed_16_float, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// k_16_16_FLOAT
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_16_16_FLOAT));
|
||||||
|
{
|
||||||
|
EMIdArray format_packed_16_16_float;
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
uint_vector_temp_.clear();
|
||||||
|
uint_vector_temp_.push_back(0);
|
||||||
|
uint_vector_temp_.push_back(1);
|
||||||
|
spv::Id format_packed_16_16_float_xy = builder_->createUnaryBuiltinCall(
|
||||||
|
type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
|
||||||
|
builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_,
|
||||||
|
eM_swapped[eM_index],
|
||||||
|
uint_vector_temp_));
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
id_vector_temp_.resize(4, const_uint_0_);
|
||||||
|
id_vector_temp_.front() = format_packed_16_16_float_xy;
|
||||||
|
format_packed_16_16_float[eM_index] =
|
||||||
|
builder_->createCompositeConstruct(type_uint4_, id_vector_temp_);
|
||||||
|
});
|
||||||
|
add_format_case(format_packed_16_16_float, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// k_16_16_16_16_FLOAT
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_16_16_16_16_FLOAT));
|
||||||
|
{
|
||||||
|
EMIdArray format_packed_16_16_16_16_float;
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
spv::Id format_packed_16_16_16_16_float_xy_zw[2];
|
||||||
|
for (uint32_t component_index = 0; component_index < 2;
|
||||||
|
++component_index) {
|
||||||
|
uint_vector_temp_.clear();
|
||||||
|
uint_vector_temp_.push_back(2 * component_index);
|
||||||
|
uint_vector_temp_.push_back(2 * component_index + 1);
|
||||||
|
format_packed_16_16_16_16_float_xy_zw[component_index] =
|
||||||
|
builder_->createUnaryBuiltinCall(
|
||||||
|
type_uint_, ext_inst_glsl_std_450_, GLSLstd450PackHalf2x16,
|
||||||
|
builder_->createRvalueSwizzle(spv::NoPrecision, type_float2_,
|
||||||
|
eM_swapped[eM_index],
|
||||||
|
uint_vector_temp_));
|
||||||
|
}
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
id_vector_temp_.push_back(format_packed_16_16_16_16_float_xy_zw[0]);
|
||||||
|
id_vector_temp_.push_back(format_packed_16_16_16_16_float_xy_zw[1]);
|
||||||
|
id_vector_temp_.push_back(const_uint_0_);
|
||||||
|
id_vector_temp_.push_back(const_uint_0_);
|
||||||
|
format_packed_16_16_16_16_float[eM_index] =
|
||||||
|
builder_->createCompositeConstruct(type_uint4_, id_vector_temp_);
|
||||||
|
});
|
||||||
|
add_format_case(format_packed_16_16_16_16_float, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
// k_32_FLOAT
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_32_FLOAT));
|
||||||
|
{
|
||||||
|
EMIdArray format_packed_32_float;
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
format_packed_32_float[eM_index] = builder_->createUnaryOp(
|
||||||
|
spv::OpBitcast, type_uint4_, eM_swapped[eM_index]);
|
||||||
|
});
|
||||||
|
add_format_case(format_packed_32_float, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// k_32_32_FLOAT
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_32_32_FLOAT));
|
||||||
|
{
|
||||||
|
EMIdArray format_packed_32_32_float;
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
format_packed_32_32_float[eM_index] = builder_->createUnaryOp(
|
||||||
|
spv::OpBitcast, type_uint4_, eM_swapped[eM_index]);
|
||||||
|
});
|
||||||
|
add_format_case(format_packed_32_32_float, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
// k_32_32_32_32_FLOAT
|
||||||
|
format_switch.makeBeginCase(
|
||||||
|
static_cast<unsigned int>(xenos::ColorFormat::k_32_32_32_32_FLOAT));
|
||||||
|
{
|
||||||
|
EMIdArray format_packed_32_32_32_32_float;
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
format_packed_32_32_32_32_float[eM_index] = builder_->createUnaryOp(
|
||||||
|
spv::OpBitcast, type_uint4_, eM_swapped[eM_index]);
|
||||||
|
});
|
||||||
|
add_format_case(format_packed_32_32_32_32_float, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
format_switch.makeEndSwitch();
|
||||||
|
|
||||||
|
// Select the result and the element size based on the format.
|
||||||
|
// Phi must be the first instructions in a block.
|
||||||
|
EMIdArray eM_packed;
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
auto eM_packed_phi = std::make_unique<spv::Instruction>(
|
||||||
|
builder_->getUniqueId(), type_uint4_, spv::OpPhi);
|
||||||
|
// Default case for an invalid format.
|
||||||
|
eM_packed_phi->addIdOperand(const_uint4_0_);
|
||||||
|
eM_packed_phi->addIdOperand(format_switch.getDefaultPhiParent());
|
||||||
|
for (const FormatCase& format_case : format_cases) {
|
||||||
|
eM_packed_phi->addIdOperand(format_case.eM_packed[eM_index]);
|
||||||
|
eM_packed_phi->addIdOperand(format_case.phi_parent);
|
||||||
|
}
|
||||||
|
eM_packed[eM_index] = eM_packed_phi->getResultId();
|
||||||
|
builder_->getBuildPoint()->addInstruction(std::move(eM_packed_phi));
|
||||||
|
});
|
||||||
|
spv::Id element_bytes_log2;
|
||||||
|
{
|
||||||
|
auto element_bytes_log2_phi = std::make_unique<spv::Instruction>(
|
||||||
|
builder_->getUniqueId(), type_uint_, spv::OpPhi);
|
||||||
|
// Default case for an invalid format (doesn't enter any element size
|
||||||
|
// conditional, skipped).
|
||||||
|
element_bytes_log2_phi->addIdOperand(builder_->makeUintConstant(5));
|
||||||
|
element_bytes_log2_phi->addIdOperand(format_switch.getDefaultPhiParent());
|
||||||
|
for (const FormatCase& format_case : format_cases) {
|
||||||
|
element_bytes_log2_phi->addIdOperand(
|
||||||
|
builder_->makeUintConstant(format_case.element_bytes_log2));
|
||||||
|
element_bytes_log2_phi->addIdOperand(format_case.phi_parent);
|
||||||
|
}
|
||||||
|
element_bytes_log2 = element_bytes_log2_phi->getResultId();
|
||||||
|
builder_->getBuildPoint()->addInstruction(
|
||||||
|
std::move(element_bytes_log2_phi));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Endian-swap.
|
||||||
|
spv::Id endian =
|
||||||
|
builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, format_info,
|
||||||
|
const_uint_0_, builder_->makeUintConstant(3));
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
eM_packed[eM_index] = EndianSwap128Uint4(eM_packed[eM_index], endian);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Load the index of eM0 in the stream.
|
||||||
|
spv::Id eM0_index = builder_->createTriOp(
|
||||||
|
spv::OpBitFieldUExtract, type_uint_,
|
||||||
|
builder_->createCompositeExtract(eA_vector, type_uint_, 1), const_uint_0_,
|
||||||
|
builder_->makeUintConstant(23));
|
||||||
|
|
||||||
|
// Check how many elements starting from eM0 are within the bounds of the
|
||||||
|
// stream, and from the eM# that were written, exclude the out-of-bounds ones.
|
||||||
|
// The index can't be negative, and the index and the count are limited to 23
|
||||||
|
// bits, so it's safe to use 32-bit signed subtraction and clamping to get the
|
||||||
|
// remaining eM# count.
|
||||||
|
spv::Id eM_indices_to_store = builder_->createTriOp(
|
||||||
|
spv::OpBitFieldUExtract, type_uint_,
|
||||||
|
builder_->createLoad(var_main_memexport_data_written_, spv::NoPrecision),
|
||||||
|
const_uint_0_,
|
||||||
|
builder_->createUnaryOp(
|
||||||
|
spv::OpBitcast, type_uint_,
|
||||||
|
builder_->createTriBuiltinCall(
|
||||||
|
type_int_, ext_inst_glsl_std_450_, GLSLstd450SClamp,
|
||||||
|
builder_->createBinOp(
|
||||||
|
spv::OpISub, type_int_,
|
||||||
|
builder_->createUnaryOp(
|
||||||
|
spv::OpBitcast, type_int_,
|
||||||
|
builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_,
|
||||||
|
builder_->createCompositeExtract(
|
||||||
|
eA_vector, type_uint_, 3),
|
||||||
|
const_uint_0_,
|
||||||
|
builder_->makeUintConstant(23))),
|
||||||
|
builder_->createUnaryOp(spv::OpBitcast, type_int_,
|
||||||
|
eM0_index)),
|
||||||
|
const_int_0_,
|
||||||
|
builder_->makeIntConstant(ucode::kMaxMemExportElementCount))));
|
||||||
|
|
||||||
|
// Get the eM0 address in bytes.
|
||||||
|
// Left-shift the stream base address by 2 to both convert it from dwords to
|
||||||
|
// bytes and drop the upper bits.
|
||||||
|
spv::Id const_uint_2 = builder_->makeUintConstant(2);
|
||||||
|
spv::Id eM0_address_bytes = builder_->createBinOp(
|
||||||
|
spv::OpIAdd, type_uint_,
|
||||||
|
builder_->createBinOp(
|
||||||
|
spv::OpShiftLeftLogical, type_uint_,
|
||||||
|
builder_->createCompositeExtract(eA_vector, type_uint_, 0),
|
||||||
|
const_uint_2),
|
||||||
|
builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_, eM0_index,
|
||||||
|
element_bytes_log2));
|
||||||
|
|
||||||
|
// Store based on the element size.
|
||||||
|
auto store_needed_eM = [&](std::function<void(uint32_t eM_index)> fn) {
|
||||||
|
for_each_eM([&](uint32_t eM_index) {
|
||||||
|
SpirvBuilder::IfBuilder if_eM_needed(
|
||||||
|
builder_->createBinOp(
|
||||||
|
spv::OpINotEqual, type_bool_,
|
||||||
|
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
|
||||||
|
eM_indices_to_store,
|
||||||
|
builder_->makeUintConstant(1u << eM_index)),
|
||||||
|
const_uint_0_),
|
||||||
|
spv::SelectionControlDontFlattenMask, *builder_, 2, 1);
|
||||||
|
fn(eM_index);
|
||||||
|
if_eM_needed.makeEndIf();
|
||||||
|
});
|
||||||
|
};
|
||||||
|
SpirvBuilder::SwitchBuilder element_size_switch(
|
||||||
|
element_bytes_log2, spv::SelectionControlDontFlattenMask, *builder_);
|
||||||
|
element_size_switch.makeBeginCase(0);
|
||||||
|
{
|
||||||
|
store_needed_eM([&](uint32_t eM_index) {
|
||||||
|
spv::Id element_address_bytes =
|
||||||
|
eM_index != 0 ? builder_->createBinOp(
|
||||||
|
spv::OpIAdd, type_uint_, eM0_address_bytes,
|
||||||
|
builder_->makeUintConstant(eM_index))
|
||||||
|
: eM0_address_bytes;
|
||||||
|
// replace_shift = 8 * (element_address_bytes & 3)
|
||||||
|
spv::Id replace_shift = builder_->createQuadOp(
|
||||||
|
spv::OpBitFieldInsert, type_uint_, const_uint_0_,
|
||||||
|
element_address_bytes, builder_->makeUintConstant(3), const_uint_2);
|
||||||
|
StoreUint32ToSharedMemory(
|
||||||
|
builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_,
|
||||||
|
builder_->createCompositeExtract(
|
||||||
|
eM_packed[eM_index], type_uint_, 0),
|
||||||
|
replace_shift),
|
||||||
|
builder_->createUnaryOp(
|
||||||
|
spv::OpBitcast, type_int_,
|
||||||
|
builder_->createBinOp(spv::OpShiftRightLogical, type_uint_,
|
||||||
|
element_address_bytes, const_uint_2)),
|
||||||
|
builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_,
|
||||||
|
builder_->makeUintConstant(0xFFu),
|
||||||
|
replace_shift));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
element_size_switch.makeBeginCase(1);
|
||||||
|
{
|
||||||
|
spv::Id const_uint_1 = builder_->makeUintConstant(1);
|
||||||
|
spv::Id eM0_address_words = builder_->createBinOp(
|
||||||
|
spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_1);
|
||||||
|
store_needed_eM([&](uint32_t eM_index) {
|
||||||
|
spv::Id element_address_words =
|
||||||
|
eM_index != 0 ? builder_->createBinOp(
|
||||||
|
spv::OpIAdd, type_uint_, eM0_address_words,
|
||||||
|
builder_->makeUintConstant(eM_index))
|
||||||
|
: eM0_address_words;
|
||||||
|
// replace_shift = 16 * (element_address_words & 1)
|
||||||
|
spv::Id replace_shift = builder_->createQuadOp(
|
||||||
|
spv::OpBitFieldInsert, type_uint_, const_uint_0_,
|
||||||
|
element_address_words, builder_->makeUintConstant(4), const_uint_1);
|
||||||
|
StoreUint32ToSharedMemory(
|
||||||
|
builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_,
|
||||||
|
builder_->createCompositeExtract(
|
||||||
|
eM_packed[eM_index], type_uint_, 0),
|
||||||
|
replace_shift),
|
||||||
|
builder_->createUnaryOp(
|
||||||
|
spv::OpBitcast, type_int_,
|
||||||
|
builder_->createBinOp(spv::OpShiftRightLogical, type_uint_,
|
||||||
|
element_address_words, const_uint_1)),
|
||||||
|
builder_->createBinOp(spv::OpShiftLeftLogical, type_uint_,
|
||||||
|
builder_->makeUintConstant(0xFFFFu),
|
||||||
|
replace_shift));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
element_size_switch.makeBeginCase(2);
|
||||||
|
{
|
||||||
|
spv::Id eM0_address_dwords = builder_->createBinOp(
|
||||||
|
spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_2);
|
||||||
|
store_needed_eM([&](uint32_t eM_index) {
|
||||||
|
StoreUint32ToSharedMemory(
|
||||||
|
builder_->createCompositeExtract(eM_packed[eM_index], type_uint_, 0),
|
||||||
|
builder_->createUnaryOp(
|
||||||
|
spv::OpBitcast, type_int_,
|
||||||
|
eM_index != 0 ? builder_->createBinOp(
|
||||||
|
spv::OpIAdd, type_uint_, eM0_address_dwords,
|
||||||
|
builder_->makeUintConstant(eM_index))
|
||||||
|
: eM0_address_dwords));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
element_size_switch.makeBeginCase(3);
|
||||||
|
{
|
||||||
|
spv::Id eM0_address_dwords = builder_->createBinOp(
|
||||||
|
spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_2);
|
||||||
|
store_needed_eM([&](uint32_t eM_index) {
|
||||||
|
spv::Id element_value = eM_packed[eM_index];
|
||||||
|
spv::Id element_address_dwords_int = builder_->createUnaryOp(
|
||||||
|
spv::OpBitcast, type_int_,
|
||||||
|
eM_index != 0 ? builder_->createBinOp(
|
||||||
|
spv::OpIAdd, type_uint_, eM0_address_dwords,
|
||||||
|
builder_->makeUintConstant(2 * eM_index))
|
||||||
|
: eM0_address_dwords);
|
||||||
|
StoreUint32ToSharedMemory(
|
||||||
|
builder_->createCompositeExtract(element_value, type_uint_, 0),
|
||||||
|
element_address_dwords_int);
|
||||||
|
StoreUint32ToSharedMemory(
|
||||||
|
builder_->createCompositeExtract(element_value, type_uint_, 1),
|
||||||
|
builder_->createBinOp(spv::OpIAdd, type_int_,
|
||||||
|
element_address_dwords_int,
|
||||||
|
builder_->makeIntConstant(1)));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
element_size_switch.makeBeginCase(4);
|
||||||
|
{
|
||||||
|
spv::Id eM0_address_dwords = builder_->createBinOp(
|
||||||
|
spv::OpShiftRightLogical, type_uint_, eM0_address_bytes, const_uint_2);
|
||||||
|
store_needed_eM([&](uint32_t eM_index) {
|
||||||
|
spv::Id element_value = eM_packed[eM_index];
|
||||||
|
spv::Id element_address_dwords_int = builder_->createUnaryOp(
|
||||||
|
spv::OpBitcast, type_int_,
|
||||||
|
eM_index != 0 ? builder_->createBinOp(
|
||||||
|
spv::OpIAdd, type_uint_, eM0_address_dwords,
|
||||||
|
builder_->makeUintConstant(4 * eM_index))
|
||||||
|
: eM0_address_dwords);
|
||||||
|
StoreUint32ToSharedMemory(
|
||||||
|
builder_->createCompositeExtract(element_value, type_uint_, 0),
|
||||||
|
element_address_dwords_int);
|
||||||
|
for (uint32_t element_dword_index = 1; element_dword_index < 4;
|
||||||
|
++element_dword_index) {
|
||||||
|
StoreUint32ToSharedMemory(
|
||||||
|
builder_->createCompositeExtract(element_value, type_uint_,
|
||||||
|
element_dword_index),
|
||||||
|
builder_->createBinOp(spv::OpIAdd, type_int_,
|
||||||
|
element_address_dwords_int,
|
||||||
|
builder_->makeIntConstant(
|
||||||
|
static_cast<int>(element_dword_index))));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
element_size_switch.makeEndSwitch();
|
||||||
|
|
||||||
|
// Close the conditionals for whether memory export is allowed in this
|
||||||
|
// invocation.
|
||||||
|
if_address_valid.makeEndIf();
|
||||||
|
if (if_pixel_not_killed.has_value()) {
|
||||||
|
if_pixel_not_killed->makeEndIf();
|
||||||
|
}
|
||||||
|
if (if_memexport_allowed.has_value()) {
|
||||||
|
if_memexport_allowed->makeEndIf();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
|
@ -2165,6 +2165,11 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
||||||
return IssueCopy();
|
return IssueCopy();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
|
||||||
|
GetVulkanProvider().device_info();
|
||||||
|
|
||||||
|
memexport_ranges_.clear();
|
||||||
|
|
||||||
// Vertex shader analysis.
|
// Vertex shader analysis.
|
||||||
auto vertex_shader = static_cast<VulkanShader*>(active_vertex_shader());
|
auto vertex_shader = static_cast<VulkanShader*>(active_vertex_shader());
|
||||||
if (!vertex_shader) {
|
if (!vertex_shader) {
|
||||||
|
@ -2172,7 +2177,14 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
pipeline_cache_->AnalyzeShaderUcode(*vertex_shader);
|
pipeline_cache_->AnalyzeShaderUcode(*vertex_shader);
|
||||||
bool memexport_used_vertex = vertex_shader->memexport_eM_written() != 0;
|
// TODO(Triang3l): If the shader uses memory export, but
|
||||||
|
// vertexPipelineStoresAndAtomics is not supported, convert the vertex shader
|
||||||
|
// to a compute shader and dispatch it after the draw if the draw doesn't use
|
||||||
|
// tessellation.
|
||||||
|
if (vertex_shader->memexport_eM_written() != 0 &&
|
||||||
|
device_info.vertexPipelineStoresAndAtomics) {
|
||||||
|
draw_util::AddMemExportRanges(regs, *vertex_shader, memexport_ranges_);
|
||||||
|
}
|
||||||
|
|
||||||
// Pixel shader analysis.
|
// Pixel shader analysis.
|
||||||
bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs);
|
bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs);
|
||||||
|
@ -2195,12 +2207,15 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
||||||
} else {
|
} else {
|
||||||
// Disabling pixel shader for this case is also required by the pipeline
|
// Disabling pixel shader for this case is also required by the pipeline
|
||||||
// cache.
|
// cache.
|
||||||
if (!memexport_used_vertex) {
|
if (memexport_ranges_.empty()) {
|
||||||
// This draw has no effect.
|
// This draw has no effect.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// TODO(Triang3l): Memory export.
|
if (pixel_shader && pixel_shader->memexport_eM_written() != 0 &&
|
||||||
|
device_info.fragmentStoresAndAtomics) {
|
||||||
|
draw_util::AddMemExportRanges(regs, *pixel_shader, memexport_ranges_);
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t ps_param_gen_pos = UINT32_MAX;
|
uint32_t ps_param_gen_pos = UINT32_MAX;
|
||||||
uint32_t interpolator_mask =
|
uint32_t interpolator_mask =
|
||||||
|
@ -2416,9 +2431,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
||||||
current_guest_graphics_pipeline_layout_ = pipeline_layout;
|
current_guest_graphics_pipeline_layout_ = pipeline_layout;
|
||||||
}
|
}
|
||||||
|
|
||||||
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
|
|
||||||
GetVulkanProvider().device_info();
|
|
||||||
|
|
||||||
bool host_render_targets_used = render_target_cache_->GetPath() ==
|
bool host_render_targets_used = render_target_cache_->GetPath() ==
|
||||||
RenderTargetCache::Path::kHostRenderTargets;
|
RenderTargetCache::Path::kHostRenderTargets;
|
||||||
|
|
||||||
|
@ -2520,9 +2532,39 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
||||||
<< (vfetch_index & 63);
|
<< (vfetch_index & 63);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Synchronize the memory pages backing memory scatter export streams, and
|
||||||
|
// calculate the range that includes the streams for the buffer barrier.
|
||||||
|
uint32_t memexport_extent_start = UINT32_MAX, memexport_extent_end = 0;
|
||||||
|
for (const draw_util::MemExportRange& memexport_range : memexport_ranges_) {
|
||||||
|
uint32_t memexport_range_base_bytes = memexport_range.base_address_dwords
|
||||||
|
<< 2;
|
||||||
|
if (!shared_memory_->RequestRange(memexport_range_base_bytes,
|
||||||
|
memexport_range.size_bytes)) {
|
||||||
|
XELOGE(
|
||||||
|
"Failed to request memexport stream at 0x{:08X} (size {}) in the "
|
||||||
|
"shared memory",
|
||||||
|
memexport_range_base_bytes, memexport_range.size_bytes);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
memexport_extent_start =
|
||||||
|
std::min(memexport_extent_start, memexport_range_base_bytes);
|
||||||
|
memexport_extent_end =
|
||||||
|
std::max(memexport_extent_end,
|
||||||
|
memexport_range_base_bytes + memexport_range.size_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
// Insert the shared memory barrier if needed.
|
// Insert the shared memory barrier if needed.
|
||||||
// TODO(Triang3l): Memory export.
|
// TODO(Triang3l): Find some PM4 command that can be used for indication of
|
||||||
shared_memory_->Use(VulkanSharedMemory::Usage::kRead);
|
// when memexports should be awaited instead of inserting the barrier in Use
|
||||||
|
// every time if memory export was done in the previous draw?
|
||||||
|
if (memexport_extent_start < memexport_extent_end) {
|
||||||
|
shared_memory_->Use(
|
||||||
|
VulkanSharedMemory::Usage::kGuestDrawReadWrite,
|
||||||
|
std::make_pair(memexport_extent_start,
|
||||||
|
memexport_extent_end - memexport_extent_start));
|
||||||
|
} else {
|
||||||
|
shared_memory_->Use(VulkanSharedMemory::Usage::kRead);
|
||||||
|
}
|
||||||
|
|
||||||
// After all commands that may dispatch, copy or insert barriers, submit the
|
// After all commands that may dispatch, copy or insert barriers, submit the
|
||||||
// barriers (may end the render pass), and (re)enter the render pass before
|
// barriers (may end the render pass), and (re)enter the render pass before
|
||||||
|
@ -2567,6 +2609,12 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
||||||
primitive_processing_result.host_draw_vertex_count, 1, 0, 0, 0);
|
primitive_processing_result.host_draw_vertex_count, 1, 0, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Invalidate textures in memexported memory and watch for changes.
|
||||||
|
for (const draw_util::MemExportRange& memexport_range : memexport_ranges_) {
|
||||||
|
shared_memory_->RangeWrittenByGpu(memexport_range.base_address_dwords << 2,
|
||||||
|
memexport_range.size_bytes, false);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -737,6 +737,9 @@ class VulkanCommandProcessor : public CommandProcessor {
|
||||||
|
|
||||||
// System shader constants.
|
// System shader constants.
|
||||||
SpirvShaderTranslator::SystemConstants system_constants_;
|
SpirvShaderTranslator::SystemConstants system_constants_;
|
||||||
|
|
||||||
|
// Temporary storage for memexport stream constants used in the draw.
|
||||||
|
std::vector<draw_util::MemExportRange> memexport_ranges_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace vulkan
|
} // namespace vulkan
|
||||||
|
|
Loading…
Reference in New Issue