[SPIR-V] vfetch: 32_* and 32_*_FLOAT formats
This commit is contained in:
parent
533bdf2114
commit
a82b85a8ac
|
@ -99,8 +99,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
DxbcOpAnd(address_dest, fetch_constant_src.SelectFromSwizzled(0),
|
||||
DxbcSrc::LU(~uint32_t(3)));
|
||||
}
|
||||
// Add the word offset from the instruction, plus the offset of the first
|
||||
// needed word within the element.
|
||||
// Add the word offset from the instruction (signed), plus the offset of the
|
||||
// first needed word within the element.
|
||||
uint32_t first_word_index;
|
||||
xe::bit_scan_forward(needed_words, &first_word_index);
|
||||
int32_t first_word_buffer_offset =
|
||||
|
|
|
@ -97,8 +97,11 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
type_bool3_ = builder_->makeVectorType(type_bool_, 3);
|
||||
type_bool4_ = builder_->makeVectorType(type_bool_, 4);
|
||||
type_int_ = builder_->makeIntType(32);
|
||||
type_int2_ = builder_->makeVectorType(type_int_, 2);
|
||||
type_int3_ = builder_->makeVectorType(type_int_, 3);
|
||||
type_int4_ = builder_->makeVectorType(type_int_, 4);
|
||||
type_uint_ = builder_->makeUintType(32);
|
||||
type_uint2_ = builder_->makeVectorType(type_uint_, 2);
|
||||
type_uint3_ = builder_->makeVectorType(type_uint_, 3);
|
||||
type_uint4_ = builder_->makeVectorType(type_uint_, 4);
|
||||
type_float_ = builder_->makeFloatType(32);
|
||||
|
@ -254,6 +257,31 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
main_interface_.push_back(uniform_bool_loop_constants_);
|
||||
}
|
||||
|
||||
// Common uniform buffer - fetch constants (32 x 6 uints packed in std140 as
|
||||
// 4-component vectors).
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(builder_->makeArrayType(
|
||||
type_uint4_, builder_->makeUintConstant(32 * 6 / 4),
|
||||
sizeof(uint32_t) * 4));
|
||||
builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride,
|
||||
sizeof(uint32_t) * 4);
|
||||
spv::Id type_fetch_constants =
|
||||
builder_->makeStructType(id_vector_temp_, "XeFetchConstants");
|
||||
builder_->addMemberName(type_fetch_constants, 0, "fetch_constants");
|
||||
builder_->addMemberDecoration(type_fetch_constants, 0, spv::DecorationOffset,
|
||||
0);
|
||||
builder_->addDecoration(type_fetch_constants, spv::DecorationBlock);
|
||||
uniform_fetch_constants_ = builder_->createVariable(
|
||||
spv::NoPrecision, spv::StorageClassUniform, type_fetch_constants,
|
||||
"xe_uniform_fetch_constants");
|
||||
builder_->addDecoration(uniform_fetch_constants_,
|
||||
spv::DecorationDescriptorSet,
|
||||
int(kDescriptorSetFetchConstants));
|
||||
builder_->addDecoration(uniform_fetch_constants_, spv::DecorationBinding, 0);
|
||||
if (features_.spirv_version >= spv::Spv_1_4) {
|
||||
main_interface_.push_back(uniform_fetch_constants_);
|
||||
}
|
||||
|
||||
// Common storage buffers - shared memory uint[], each 128 MB or larger,
|
||||
// depending on what's possible on the device. glslang generates everything,
|
||||
// including all the types, for each storage buffer separately.
|
||||
|
@ -271,7 +299,7 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
'0' + i;
|
||||
spv::Id type_shared_memory =
|
||||
builder_->makeStructType(id_vector_temp_, shared_memory_struct_name);
|
||||
builder_->addMemberName(type_shared_memory, 0, "memory");
|
||||
builder_->addMemberName(type_shared_memory, 0, "shared_memory");
|
||||
// TODO(Triang3l): Make writable when memexport is implemented.
|
||||
builder_->addMemberDecoration(type_shared_memory, 0,
|
||||
spv::DecorationNonWritable);
|
||||
|
@ -1706,6 +1734,7 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory(
|
|||
}
|
||||
// Set up the access chain indices.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.reserve(2);
|
||||
// The only SSBO struct member.
|
||||
id_vector_temp_.push_back(const_int_0_);
|
||||
id_vector_temp_.push_back(binding_address);
|
||||
|
|
|
@ -105,6 +105,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
const ParsedLoopEndInstruction& instr) override;
|
||||
void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override;
|
||||
|
||||
void ProcessVertexFetchInstruction(
|
||||
const ParsedVertexFetchInstruction& instr) override;
|
||||
void ProcessAluInstruction(const ParsedAluInstruction& instr) override;
|
||||
|
||||
private:
|
||||
|
@ -247,11 +249,24 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
// Index = component count - 1.
|
||||
spv::Id type_bool_vectors_[4];
|
||||
};
|
||||
spv::Id type_int_;
|
||||
spv::Id type_int4_;
|
||||
spv::Id type_uint_;
|
||||
spv::Id type_uint3_;
|
||||
spv::Id type_uint4_;
|
||||
union {
|
||||
struct {
|
||||
spv::Id type_int_;
|
||||
spv::Id type_int2_;
|
||||
spv::Id type_int3_;
|
||||
spv::Id type_int4_;
|
||||
};
|
||||
spv::Id type_int_vectors_[4];
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
spv::Id type_uint_;
|
||||
spv::Id type_uint2_;
|
||||
spv::Id type_uint3_;
|
||||
spv::Id type_uint4_;
|
||||
};
|
||||
spv::Id type_uint_vectors_[4];
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
spv::Id type_float_;
|
||||
|
@ -295,6 +310,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
spv::Id uniform_system_constants_;
|
||||
spv::Id uniform_float_constants_;
|
||||
spv::Id uniform_bool_loop_constants_;
|
||||
spv::Id uniform_fetch_constants_;
|
||||
|
||||
spv::Id buffers_shared_memory_[512 / 128];
|
||||
|
||||
|
|
|
@ -0,0 +1,283 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "third_party/glslang/SPIRV/GLSL.std.450.h"
|
||||
#include "xenia/base/math.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
void SpirvShaderTranslator::ProcessVertexFetchInstruction(
|
||||
const ParsedVertexFetchInstruction& instr) {
|
||||
UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition);
|
||||
|
||||
uint32_t used_result_components = instr.result.GetUsedResultComponents();
|
||||
uint32_t needed_words = xenos::GetVertexFormatNeededWords(
|
||||
instr.attributes.data_format, used_result_components);
|
||||
if (!needed_words) {
|
||||
// Nothing to load - just constant 0/1 writes, or the swizzle includes only
|
||||
// components that don't exist in the format (writing zero instead of them).
|
||||
// Unpacking assumes at least some word is needed.
|
||||
StoreResult(instr.result, spv::NoResult);
|
||||
return;
|
||||
}
|
||||
|
||||
EnsureBuildPointAvailable();
|
||||
|
||||
// Get the base address in dwords from the bits 2:31 of the first fetch
|
||||
// constant word.
|
||||
uint32_t fetch_constant_word_0_index = instr.operands[1].storage_index << 1;
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.reserve(3);
|
||||
// The only element of the fetch constant buffer.
|
||||
id_vector_temp_.push_back(const_int_0_);
|
||||
// Vector index.
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(int(fetch_constant_word_0_index >> 2)));
|
||||
// Component index.
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(int(fetch_constant_word_0_index & 3)));
|
||||
spv::Id fetch_constant_word_0 = builder_->createLoad(
|
||||
builder_->createAccessChain(spv::StorageClassUniform,
|
||||
uniform_fetch_constants_, id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
// TODO(Triang3l): Verify the fetch constant type (that it's a vertex fetch,
|
||||
// not a texture fetch) here instead of dropping draws with invalid vertex
|
||||
// fetch constants on the CPU when proper bound checks are added - vfetch may
|
||||
// be conditional, so fetch constants may also be used conditionally.
|
||||
spv::Id address = builder_->createUnaryOp(
|
||||
spv::OpBitcast, type_int_,
|
||||
builder_->createBinOp(spv::OpShiftRightLogical, type_uint_,
|
||||
fetch_constant_word_0,
|
||||
builder_->makeUintConstant(2)));
|
||||
if (instr.attributes.stride) {
|
||||
// Convert the index to an integer by flooring or by rounding to the nearest
|
||||
// (as floor(index + 0.5) because rounding to the nearest even makes no
|
||||
// sense for addressing, both 1.5 and 2.5 would be 2).
|
||||
// http://web.archive.org/web/20100302145413/http://msdn.microsoft.com:80/en-us/library/bb313960.aspx
|
||||
spv::Id index = GetOperandComponents(LoadOperandStorage(instr.operands[0]),
|
||||
instr.operands[0], 0b0001);
|
||||
if (instr.attributes.is_index_rounded) {
|
||||
index = builder_->createBinOp(spv::OpFAdd, type_float_, index,
|
||||
builder_->makeFloatConstant(0.5f));
|
||||
builder_->addDecoration(index, spv::DecorationNoContraction);
|
||||
}
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(index);
|
||||
index = builder_->createUnaryOp(
|
||||
spv::OpConvertFToS, type_int_,
|
||||
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
|
||||
GLSLstd450Floor, id_vector_temp_));
|
||||
if (instr.attributes.stride > 1) {
|
||||
index = builder_->createBinOp(
|
||||
spv::OpIMul, type_int_, index,
|
||||
builder_->makeIntConstant(int(instr.attributes.stride)));
|
||||
}
|
||||
address = builder_->createBinOp(spv::OpIAdd, type_int_, address, index);
|
||||
}
|
||||
// Add the word offset from the instruction (signed), plus the offset of the
|
||||
// first needed word within the element.
|
||||
uint32_t first_word_index;
|
||||
xe::bit_scan_forward(needed_words, &first_word_index);
|
||||
int32_t first_word_buffer_offset =
|
||||
instr.attributes.offset + int32_t(first_word_index);
|
||||
if (first_word_buffer_offset) {
|
||||
// Add the constant word offset.
|
||||
address = builder_->createBinOp(
|
||||
spv::OpIAdd, type_int_, address,
|
||||
builder_->makeIntConstant(int(first_word_buffer_offset)));
|
||||
}
|
||||
|
||||
// Load the needed words.
|
||||
unsigned int word_composite_indices[4] = {};
|
||||
spv::Id word_composite_construct[4];
|
||||
uint32_t word_count = 0;
|
||||
uint32_t words_remaining = needed_words;
|
||||
uint32_t word_index;
|
||||
while (xe::bit_scan_forward(words_remaining, &word_index)) {
|
||||
words_remaining &= ~(1 << word_index);
|
||||
spv::Id word_address = address;
|
||||
// Add the word offset from the instruction (signed), plus the offset of the
|
||||
// word within the element.
|
||||
int32_t word_offset = instr.attributes.offset + word_index;
|
||||
if (word_offset) {
|
||||
word_address =
|
||||
builder_->createBinOp(spv::OpIAdd, type_int_, word_address,
|
||||
builder_->makeIntConstant(int(word_offset)));
|
||||
}
|
||||
word_composite_indices[word_index] = word_count;
|
||||
// FIXME(Triang3l): Bound checking is not done here, but haven't encountered
|
||||
// any games relying on out-of-bounds access. On Adreno 200 on Android (LG
|
||||
// P705), however, words (not full elements) out of glBufferData bounds
|
||||
// contain 0.
|
||||
word_composite_construct[word_count++] =
|
||||
LoadUint32FromSharedMemory(word_address);
|
||||
}
|
||||
spv::Id words;
|
||||
if (word_count > 1) {
|
||||
// Copying from the array to id_vector_temp_ now, not in the loop above,
|
||||
// because of the LoadUint32FromSharedMemory call (potentially using
|
||||
// id_vector_temp_ internally).
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.reserve(word_count);
|
||||
id_vector_temp_.insert(id_vector_temp_.cend(), word_composite_construct,
|
||||
word_composite_construct + word_count);
|
||||
words = builder_->createCompositeConstruct(
|
||||
type_uint_vectors_[word_count - 1], id_vector_temp_);
|
||||
} else {
|
||||
words = word_composite_construct[0];
|
||||
}
|
||||
|
||||
// Endian swap the words, getting the endianness from bits 0:1 of the second
|
||||
// fetch constant word.
|
||||
uint32_t fetch_constant_word_1_index = fetch_constant_word_0_index + 1;
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.reserve(3);
|
||||
// The only element of the fetch constant buffer.
|
||||
id_vector_temp_.push_back(const_int_0_);
|
||||
// Vector index.
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(int(fetch_constant_word_1_index >> 2)));
|
||||
// Component index.
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(int(fetch_constant_word_1_index & 3)));
|
||||
spv::Id fetch_constant_word_1 = builder_->createLoad(
|
||||
builder_->createAccessChain(spv::StorageClassUniform,
|
||||
uniform_fetch_constants_, id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
words = EndianSwap32Uint(
|
||||
words, builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
|
||||
fetch_constant_word_1,
|
||||
builder_->makeUintConstant(0b11)));
|
||||
|
||||
spv::Id result = spv::NoResult;
|
||||
|
||||
// Convert the format.
|
||||
uint32_t used_format_components =
|
||||
used_result_components & ((1 << xenos::GetVertexFormatComponentCount(
|
||||
instr.attributes.data_format)) -
|
||||
1);
|
||||
// If needed_words is not zero (checked in the beginning), this must not be
|
||||
// zero too. For simplicity, it's assumed that something will be unpacked
|
||||
// here.
|
||||
assert_not_zero(used_format_components);
|
||||
uint32_t used_format_component_count = xe::bit_count(used_format_components);
|
||||
spv::Id result_type = type_float_vectors_[used_format_component_count - 1];
|
||||
switch (instr.attributes.data_format) {
|
||||
// TODO(Triang3l): All format conversion.
|
||||
|
||||
case xenos::VertexFormat::k_32:
|
||||
case xenos::VertexFormat::k_32_32:
|
||||
case xenos::VertexFormat::k_32_32_32_32:
|
||||
assert_true(used_format_components == needed_words);
|
||||
if (instr.attributes.is_signed) {
|
||||
result = builder_->createUnaryOp(
|
||||
spv::OpBitcast, type_int_vectors_[used_format_component_count - 1],
|
||||
words);
|
||||
result =
|
||||
builder_->createUnaryOp(spv::OpConvertSToF, result_type, result);
|
||||
} else {
|
||||
result =
|
||||
builder_->createUnaryOp(spv::OpConvertUToF, result_type, words);
|
||||
}
|
||||
if (!instr.attributes.is_integer) {
|
||||
if (instr.attributes.is_signed) {
|
||||
switch (instr.attributes.signed_rf_mode) {
|
||||
case xenos::SignedRepeatingFractionMode::kZeroClampMinusOne:
|
||||
result = builder_->createBinOp(
|
||||
spv::OpVectorTimesScalar, result_type, result,
|
||||
builder_->makeFloatConstant(1.0f / 2147483647.0f));
|
||||
builder_->addDecoration(result, spv::DecorationNoContraction);
|
||||
// No need to clamp to -1 if signed - 1/(2^31-1) is rounded to
|
||||
// 1/(2^31) as float32.
|
||||
break;
|
||||
case xenos::SignedRepeatingFractionMode::kNoZero: {
|
||||
result = builder_->createBinOp(
|
||||
spv::OpVectorTimesScalar, result_type, result,
|
||||
builder_->makeFloatConstant(1.0f / 2147483647.5f));
|
||||
builder_->addDecoration(result, spv::DecorationNoContraction);
|
||||
spv::Id const_no_zero =
|
||||
builder_->makeFloatConstant(0.5f / 2147483647.5f);
|
||||
if (used_format_component_count > 1) {
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.reserve(used_format_component_count);
|
||||
id_vector_temp_.insert(id_vector_temp_.cend(),
|
||||
used_format_component_count,
|
||||
const_no_zero);
|
||||
const_no_zero = builder_->makeCompositeConstant(
|
||||
result_type, id_vector_temp_);
|
||||
}
|
||||
result = builder_->createBinOp(spv::OpFAdd, result_type, result,
|
||||
const_no_zero);
|
||||
builder_->addDecoration(result, spv::DecorationNoContraction);
|
||||
} break;
|
||||
default:
|
||||
assert_unhandled_case(instr.attributes.signed_rf_mode);
|
||||
}
|
||||
} else {
|
||||
result = builder_->createBinOp(
|
||||
spv::OpVectorTimesScalar, result_type, result,
|
||||
builder_->makeFloatConstant(1.0f / 4294967295.0f));
|
||||
builder_->addDecoration(result, spv::DecorationNoContraction);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case xenos::VertexFormat::k_32_FLOAT:
|
||||
case xenos::VertexFormat::k_32_32_FLOAT:
|
||||
case xenos::VertexFormat::k_32_32_32_32_FLOAT:
|
||||
case xenos::VertexFormat::k_32_32_32_FLOAT:
|
||||
assert_true(used_format_components == needed_words);
|
||||
result = builder_->createUnaryOp(
|
||||
spv::OpBitcast, type_float_vectors_[word_count - 1], words);
|
||||
break;
|
||||
}
|
||||
|
||||
if (result != spv::NoResult) {
|
||||
// Apply the exponent bias.
|
||||
if (instr.attributes.exp_adjust) {
|
||||
result = builder_->createBinOp(spv::OpVectorTimesScalar,
|
||||
builder_->getTypeId(result), result,
|
||||
builder_->makeFloatConstant(std::ldexp(
|
||||
1.0f, instr.attributes.exp_adjust)));
|
||||
builder_->addDecoration(result, spv::DecorationNoContraction);
|
||||
}
|
||||
|
||||
// If any components not present in the format were requested, pad the
|
||||
// resulting vector with zeros.
|
||||
uint32_t used_missing_components =
|
||||
used_result_components & ~used_format_components;
|
||||
if (used_missing_components) {
|
||||
// Bypassing the assertion in spv::Builder::createCompositeConstruct - can
|
||||
// construct vectors by concatenating vectors, not just from individual
|
||||
// scalars.
|
||||
std::unique_ptr<spv::Instruction> composite_construct_op =
|
||||
std::make_unique<spv::Instruction>(
|
||||
builder_->getUniqueId(),
|
||||
type_float_vectors_[xe::bit_count(used_result_components) - 1],
|
||||
spv::OpCompositeConstruct);
|
||||
composite_construct_op->addIdOperand(result);
|
||||
composite_construct_op->addIdOperand(
|
||||
const_float_vectors_0_[xe::bit_count(used_missing_components) - 1]);
|
||||
result = composite_construct_op->getResultId();
|
||||
builder_->getBuildPoint()->addInstruction(
|
||||
std::move(composite_construct_op));
|
||||
}
|
||||
}
|
||||
StoreResult(instr.result, result);
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
Loading…
Reference in New Issue