[SPIR-V] vfetch: 32_* and 32_*_FLOAT formats

This commit is contained in:
Triang3l 2020-11-04 23:56:12 +03:00
parent 533bdf2114
commit a82b85a8ac
4 changed files with 336 additions and 8 deletions

View File

@ -99,8 +99,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
DxbcOpAnd(address_dest, fetch_constant_src.SelectFromSwizzled(0), DxbcOpAnd(address_dest, fetch_constant_src.SelectFromSwizzled(0),
DxbcSrc::LU(~uint32_t(3))); DxbcSrc::LU(~uint32_t(3)));
} }
// Add the word offset from the instruction, plus the offset of the first // Add the word offset from the instruction (signed), plus the offset of the
// needed word within the element. // first needed word within the element.
uint32_t first_word_index; uint32_t first_word_index;
xe::bit_scan_forward(needed_words, &first_word_index); xe::bit_scan_forward(needed_words, &first_word_index);
int32_t first_word_buffer_offset = int32_t first_word_buffer_offset =

View File

@ -97,8 +97,11 @@ void SpirvShaderTranslator::StartTranslation() {
type_bool3_ = builder_->makeVectorType(type_bool_, 3); type_bool3_ = builder_->makeVectorType(type_bool_, 3);
type_bool4_ = builder_->makeVectorType(type_bool_, 4); type_bool4_ = builder_->makeVectorType(type_bool_, 4);
type_int_ = builder_->makeIntType(32); type_int_ = builder_->makeIntType(32);
type_int2_ = builder_->makeVectorType(type_int_, 2);
type_int3_ = builder_->makeVectorType(type_int_, 3);
type_int4_ = builder_->makeVectorType(type_int_, 4); type_int4_ = builder_->makeVectorType(type_int_, 4);
type_uint_ = builder_->makeUintType(32); type_uint_ = builder_->makeUintType(32);
type_uint2_ = builder_->makeVectorType(type_uint_, 2);
type_uint3_ = builder_->makeVectorType(type_uint_, 3); type_uint3_ = builder_->makeVectorType(type_uint_, 3);
type_uint4_ = builder_->makeVectorType(type_uint_, 4); type_uint4_ = builder_->makeVectorType(type_uint_, 4);
type_float_ = builder_->makeFloatType(32); type_float_ = builder_->makeFloatType(32);
@ -254,6 +257,31 @@ void SpirvShaderTranslator::StartTranslation() {
main_interface_.push_back(uniform_bool_loop_constants_); main_interface_.push_back(uniform_bool_loop_constants_);
} }
// Common uniform buffer - fetch constants (32 x 6 uints packed in std140 as
// 4-component vectors).
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeArrayType(
type_uint4_, builder_->makeUintConstant(32 * 6 / 4),
sizeof(uint32_t) * 4));
builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride,
sizeof(uint32_t) * 4);
spv::Id type_fetch_constants =
builder_->makeStructType(id_vector_temp_, "XeFetchConstants");
builder_->addMemberName(type_fetch_constants, 0, "fetch_constants");
builder_->addMemberDecoration(type_fetch_constants, 0, spv::DecorationOffset,
0);
builder_->addDecoration(type_fetch_constants, spv::DecorationBlock);
uniform_fetch_constants_ = builder_->createVariable(
spv::NoPrecision, spv::StorageClassUniform, type_fetch_constants,
"xe_uniform_fetch_constants");
builder_->addDecoration(uniform_fetch_constants_,
spv::DecorationDescriptorSet,
int(kDescriptorSetFetchConstants));
builder_->addDecoration(uniform_fetch_constants_, spv::DecorationBinding, 0);
if (features_.spirv_version >= spv::Spv_1_4) {
main_interface_.push_back(uniform_fetch_constants_);
}
// Common storage buffers - shared memory uint[], each 128 MB or larger, // Common storage buffers - shared memory uint[], each 128 MB or larger,
// depending on what's possible on the device. glslang generates everything, // depending on what's possible on the device. glslang generates everything,
// including all the types, for each storage buffer separately. // including all the types, for each storage buffer separately.
@ -271,7 +299,7 @@ void SpirvShaderTranslator::StartTranslation() {
'0' + i; '0' + i;
spv::Id type_shared_memory = spv::Id type_shared_memory =
builder_->makeStructType(id_vector_temp_, shared_memory_struct_name); builder_->makeStructType(id_vector_temp_, shared_memory_struct_name);
builder_->addMemberName(type_shared_memory, 0, "memory"); builder_->addMemberName(type_shared_memory, 0, "shared_memory");
// TODO(Triang3l): Make writable when memexport is implemented. // TODO(Triang3l): Make writable when memexport is implemented.
builder_->addMemberDecoration(type_shared_memory, 0, builder_->addMemberDecoration(type_shared_memory, 0,
spv::DecorationNonWritable); spv::DecorationNonWritable);
@ -1706,6 +1734,7 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory(
} }
// Set up the access chain indices. // Set up the access chain indices.
id_vector_temp_.clear(); id_vector_temp_.clear();
id_vector_temp_.reserve(2);
// The only SSBO struct member. // The only SSBO struct member.
id_vector_temp_.push_back(const_int_0_); id_vector_temp_.push_back(const_int_0_);
id_vector_temp_.push_back(binding_address); id_vector_temp_.push_back(binding_address);

View File

@ -105,6 +105,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
const ParsedLoopEndInstruction& instr) override; const ParsedLoopEndInstruction& instr) override;
void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override; void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override;
void ProcessVertexFetchInstruction(
const ParsedVertexFetchInstruction& instr) override;
void ProcessAluInstruction(const ParsedAluInstruction& instr) override; void ProcessAluInstruction(const ParsedAluInstruction& instr) override;
private: private:
@ -247,11 +249,24 @@ class SpirvShaderTranslator : public ShaderTranslator {
// Index = component count - 1. // Index = component count - 1.
spv::Id type_bool_vectors_[4]; spv::Id type_bool_vectors_[4];
}; };
spv::Id type_int_; union {
spv::Id type_int4_; struct {
spv::Id type_uint_; spv::Id type_int_;
spv::Id type_uint3_; spv::Id type_int2_;
spv::Id type_uint4_; spv::Id type_int3_;
spv::Id type_int4_;
};
spv::Id type_int_vectors_[4];
};
union {
struct {
spv::Id type_uint_;
spv::Id type_uint2_;
spv::Id type_uint3_;
spv::Id type_uint4_;
};
spv::Id type_uint_vectors_[4];
};
union { union {
struct { struct {
spv::Id type_float_; spv::Id type_float_;
@ -295,6 +310,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
spv::Id uniform_system_constants_; spv::Id uniform_system_constants_;
spv::Id uniform_float_constants_; spv::Id uniform_float_constants_;
spv::Id uniform_bool_loop_constants_; spv::Id uniform_bool_loop_constants_;
spv::Id uniform_fetch_constants_;
spv::Id buffers_shared_memory_[512 / 128]; spv::Id buffers_shared_memory_[512 / 128];

View File

@ -0,0 +1,283 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/spirv_shader_translator.h"
#include <cmath>
#include <memory>
#include <utility>
#include "third_party/glslang/SPIRV/GLSL.std.450.h"
#include "xenia/base/math.h"
namespace xe {
namespace gpu {
void SpirvShaderTranslator::ProcessVertexFetchInstruction(
const ParsedVertexFetchInstruction& instr) {
UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition);
uint32_t used_result_components = instr.result.GetUsedResultComponents();
uint32_t needed_words = xenos::GetVertexFormatNeededWords(
instr.attributes.data_format, used_result_components);
if (!needed_words) {
// Nothing to load - just constant 0/1 writes, or the swizzle includes only
// components that don't exist in the format (writing zero instead of them).
// Unpacking assumes at least some word is needed.
StoreResult(instr.result, spv::NoResult);
return;
}
EnsureBuildPointAvailable();
// Get the base address in dwords from the bits 2:31 of the first fetch
// constant word.
uint32_t fetch_constant_word_0_index = instr.operands[1].storage_index << 1;
id_vector_temp_.clear();
id_vector_temp_.reserve(3);
// The only element of the fetch constant buffer.
id_vector_temp_.push_back(const_int_0_);
// Vector index.
id_vector_temp_.push_back(
builder_->makeIntConstant(int(fetch_constant_word_0_index >> 2)));
// Component index.
id_vector_temp_.push_back(
builder_->makeIntConstant(int(fetch_constant_word_0_index & 3)));
spv::Id fetch_constant_word_0 = builder_->createLoad(
builder_->createAccessChain(spv::StorageClassUniform,
uniform_fetch_constants_, id_vector_temp_),
spv::NoPrecision);
// TODO(Triang3l): Verify the fetch constant type (that it's a vertex fetch,
// not a texture fetch) here instead of dropping draws with invalid vertex
// fetch constants on the CPU when proper bound checks are added - vfetch may
// be conditional, so fetch constants may also be used conditionally.
spv::Id address = builder_->createUnaryOp(
spv::OpBitcast, type_int_,
builder_->createBinOp(spv::OpShiftRightLogical, type_uint_,
fetch_constant_word_0,
builder_->makeUintConstant(2)));
if (instr.attributes.stride) {
// Convert the index to an integer by flooring or by rounding to the nearest
// (as floor(index + 0.5) because rounding to the nearest even makes no
// sense for addressing, both 1.5 and 2.5 would be 2).
// http://web.archive.org/web/20100302145413/http://msdn.microsoft.com:80/en-us/library/bb313960.aspx
spv::Id index = GetOperandComponents(LoadOperandStorage(instr.operands[0]),
instr.operands[0], 0b0001);
if (instr.attributes.is_index_rounded) {
index = builder_->createBinOp(spv::OpFAdd, type_float_, index,
builder_->makeFloatConstant(0.5f));
builder_->addDecoration(index, spv::DecorationNoContraction);
}
id_vector_temp_.clear();
id_vector_temp_.push_back(index);
index = builder_->createUnaryOp(
spv::OpConvertFToS, type_int_,
builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_,
GLSLstd450Floor, id_vector_temp_));
if (instr.attributes.stride > 1) {
index = builder_->createBinOp(
spv::OpIMul, type_int_, index,
builder_->makeIntConstant(int(instr.attributes.stride)));
}
address = builder_->createBinOp(spv::OpIAdd, type_int_, address, index);
}
// Add the word offset from the instruction (signed), plus the offset of the
// first needed word within the element.
uint32_t first_word_index;
xe::bit_scan_forward(needed_words, &first_word_index);
int32_t first_word_buffer_offset =
instr.attributes.offset + int32_t(first_word_index);
if (first_word_buffer_offset) {
// Add the constant word offset.
address = builder_->createBinOp(
spv::OpIAdd, type_int_, address,
builder_->makeIntConstant(int(first_word_buffer_offset)));
}
// Load the needed words.
unsigned int word_composite_indices[4] = {};
spv::Id word_composite_construct[4];
uint32_t word_count = 0;
uint32_t words_remaining = needed_words;
uint32_t word_index;
while (xe::bit_scan_forward(words_remaining, &word_index)) {
words_remaining &= ~(1 << word_index);
spv::Id word_address = address;
// Add the word offset from the instruction (signed), plus the offset of the
// word within the element.
int32_t word_offset = instr.attributes.offset + word_index;
if (word_offset) {
word_address =
builder_->createBinOp(spv::OpIAdd, type_int_, word_address,
builder_->makeIntConstant(int(word_offset)));
}
word_composite_indices[word_index] = word_count;
// FIXME(Triang3l): Bound checking is not done here, but haven't encountered
// any games relying on out-of-bounds access. On Adreno 200 on Android (LG
// P705), however, words (not full elements) out of glBufferData bounds
// contain 0.
word_composite_construct[word_count++] =
LoadUint32FromSharedMemory(word_address);
}
spv::Id words;
if (word_count > 1) {
// Copying from the array to id_vector_temp_ now, not in the loop above,
// because of the LoadUint32FromSharedMemory call (potentially using
// id_vector_temp_ internally).
id_vector_temp_.clear();
id_vector_temp_.reserve(word_count);
id_vector_temp_.insert(id_vector_temp_.cend(), word_composite_construct,
word_composite_construct + word_count);
words = builder_->createCompositeConstruct(
type_uint_vectors_[word_count - 1], id_vector_temp_);
} else {
words = word_composite_construct[0];
}
// Endian swap the words, getting the endianness from bits 0:1 of the second
// fetch constant word.
uint32_t fetch_constant_word_1_index = fetch_constant_word_0_index + 1;
id_vector_temp_.clear();
id_vector_temp_.reserve(3);
// The only element of the fetch constant buffer.
id_vector_temp_.push_back(const_int_0_);
// Vector index.
id_vector_temp_.push_back(
builder_->makeIntConstant(int(fetch_constant_word_1_index >> 2)));
// Component index.
id_vector_temp_.push_back(
builder_->makeIntConstant(int(fetch_constant_word_1_index & 3)));
spv::Id fetch_constant_word_1 = builder_->createLoad(
builder_->createAccessChain(spv::StorageClassUniform,
uniform_fetch_constants_, id_vector_temp_),
spv::NoPrecision);
words = EndianSwap32Uint(
words, builder_->createBinOp(spv::OpBitwiseAnd, type_uint_,
fetch_constant_word_1,
builder_->makeUintConstant(0b11)));
spv::Id result = spv::NoResult;
// Convert the format.
uint32_t used_format_components =
used_result_components & ((1 << xenos::GetVertexFormatComponentCount(
instr.attributes.data_format)) -
1);
// If needed_words is not zero (checked in the beginning), this must not be
// zero too. For simplicity, it's assumed that something will be unpacked
// here.
assert_not_zero(used_format_components);
uint32_t used_format_component_count = xe::bit_count(used_format_components);
spv::Id result_type = type_float_vectors_[used_format_component_count - 1];
switch (instr.attributes.data_format) {
// TODO(Triang3l): All format conversion.
case xenos::VertexFormat::k_32:
case xenos::VertexFormat::k_32_32:
case xenos::VertexFormat::k_32_32_32_32:
assert_true(used_format_components == needed_words);
if (instr.attributes.is_signed) {
result = builder_->createUnaryOp(
spv::OpBitcast, type_int_vectors_[used_format_component_count - 1],
words);
result =
builder_->createUnaryOp(spv::OpConvertSToF, result_type, result);
} else {
result =
builder_->createUnaryOp(spv::OpConvertUToF, result_type, words);
}
if (!instr.attributes.is_integer) {
if (instr.attributes.is_signed) {
switch (instr.attributes.signed_rf_mode) {
case xenos::SignedRepeatingFractionMode::kZeroClampMinusOne:
result = builder_->createBinOp(
spv::OpVectorTimesScalar, result_type, result,
builder_->makeFloatConstant(1.0f / 2147483647.0f));
builder_->addDecoration(result, spv::DecorationNoContraction);
// No need to clamp to -1 if signed - 1/(2^31-1) is rounded to
// 1/(2^31) as float32.
break;
case xenos::SignedRepeatingFractionMode::kNoZero: {
result = builder_->createBinOp(
spv::OpVectorTimesScalar, result_type, result,
builder_->makeFloatConstant(1.0f / 2147483647.5f));
builder_->addDecoration(result, spv::DecorationNoContraction);
spv::Id const_no_zero =
builder_->makeFloatConstant(0.5f / 2147483647.5f);
if (used_format_component_count > 1) {
id_vector_temp_.clear();
id_vector_temp_.reserve(used_format_component_count);
id_vector_temp_.insert(id_vector_temp_.cend(),
used_format_component_count,
const_no_zero);
const_no_zero = builder_->makeCompositeConstant(
result_type, id_vector_temp_);
}
result = builder_->createBinOp(spv::OpFAdd, result_type, result,
const_no_zero);
builder_->addDecoration(result, spv::DecorationNoContraction);
} break;
default:
assert_unhandled_case(instr.attributes.signed_rf_mode);
}
} else {
result = builder_->createBinOp(
spv::OpVectorTimesScalar, result_type, result,
builder_->makeFloatConstant(1.0f / 4294967295.0f));
builder_->addDecoration(result, spv::DecorationNoContraction);
}
}
break;
case xenos::VertexFormat::k_32_FLOAT:
case xenos::VertexFormat::k_32_32_FLOAT:
case xenos::VertexFormat::k_32_32_32_32_FLOAT:
case xenos::VertexFormat::k_32_32_32_FLOAT:
assert_true(used_format_components == needed_words);
result = builder_->createUnaryOp(
spv::OpBitcast, type_float_vectors_[word_count - 1], words);
break;
}
if (result != spv::NoResult) {
// Apply the exponent bias.
if (instr.attributes.exp_adjust) {
result = builder_->createBinOp(spv::OpVectorTimesScalar,
builder_->getTypeId(result), result,
builder_->makeFloatConstant(std::ldexp(
1.0f, instr.attributes.exp_adjust)));
builder_->addDecoration(result, spv::DecorationNoContraction);
}
// If any components not present in the format were requested, pad the
// resulting vector with zeros.
uint32_t used_missing_components =
used_result_components & ~used_format_components;
if (used_missing_components) {
// Bypassing the assertion in spv::Builder::createCompositeConstruct - can
// construct vectors by concatenating vectors, not just from individual
// scalars.
std::unique_ptr<spv::Instruction> composite_construct_op =
std::make_unique<spv::Instruction>(
builder_->getUniqueId(),
type_float_vectors_[xe::bit_count(used_result_components) - 1],
spv::OpCompositeConstruct);
composite_construct_op->addIdOperand(result);
composite_construct_op->addIdOperand(
const_float_vectors_0_[xe::bit_count(used_missing_components) - 1]);
result = composite_construct_op->getResultId();
builder_->getBuildPoint()->addInstruction(
std::move(composite_construct_op));
}
}
StoreResult(instr.result, result);
}
} // namespace gpu
} // namespace xe