From 4a2329da25d1d7eb2db7f8f8cba61193fe544b46 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 2 Sep 2018 21:12:36 +0300 Subject: [PATCH] [D3D12] DXBC source operand usage tokens --- src/xenia/gpu/dxbc_shader_translator.cc | 182 ++++++++++++++++++++++++ src/xenia/gpu/dxbc_shader_translator.h | 52 +++++++ 2 files changed, 234 insertions(+) diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 801274231..95d19bb8c 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -510,6 +510,188 @@ std::vector DxbcShaderTranslator::CompleteTranslation() { return shader_object_bytes; } +void DxbcShaderTranslator::LoadDxbcSourceOperand( + const InstructionOperand& operand, DxbcSourceOperand& dxbc_operand) { + // TODO(Triang3l): Load source operands. +} + +uint32_t DxbcShaderTranslator::DxbcSourceOperandLength( + const DxbcSourceOperand& operand) const { + uint32_t length; + switch (operand.type) { + case DxbcSourceOperand::Type::kRegister: + case DxbcSourceOperand::Type::kIntermediateRegister: + // Either a game register (for non-indexable GPRs) or the intermediate + // register with the data loaded (for indexable GPRs, bool and loop + // constants). + length = 2; + break; + case DxbcSourceOperand::Type::kConstantFloat: + if (operand.is_dynamic_indexed) { + // Constant buffer, 3D index - immediate 0, immediate + register 1, + // register 2. + length = 7; + } else { + // Constant buffer, 3D immediate index. + length = 4; + } + break; + default: + // Pre-negated literal of zeros and ones (no extension dword), or a + // totally invalid operand replaced by a literal. + return 5; + } + // Modifier extension - neg/abs or non-uniform binding index. + if (operand.is_negated || operand.is_absolute_value || + (operand.type == DxbcSourceOperand::Type::kConstantFloat && + operand.is_dynamic_indexed)) { + ++length; + } + return length; +} + +void DxbcShaderTranslator::UseDxbcSourceOperand( + const DxbcSourceOperand& operand) { + // Build OperandToken1 for modifiers (negate, absolute, minimum precision, + // non-uniform binding index) - if it has any, it will be non-zero. + uint32_t modifiers = 0; + if (operand.is_negated && operand.is_absolute_value) { + modifiers |= D3D10_SB_OPERAND_MODIFIER_ABSNEG + << D3D10_SB_OPERAND_MODIFIER_SHIFT; + } else if (operand.is_negated) { + modifiers |= D3D10_SB_OPERAND_MODIFIER_NEG + << D3D10_SB_OPERAND_MODIFIER_SHIFT; + } else if (operand.is_absolute_value) { + modifiers |= D3D10_SB_OPERAND_MODIFIER_ABS + << D3D10_SB_OPERAND_MODIFIER_SHIFT; + } + // Dynamic constant indices can have any values, and we use pages bound to + // different b#. + if (operand.type == DxbcSourceOperand::Type::kConstantFloat && + operand.is_dynamic_indexed) { + modifiers |= ENCODE_D3D12_SB_OPERAND_NON_UNIFORM(1); + } + if (modifiers != 0) { + // Mark the extension as containing modifiers. + modifiers |= ENCODE_D3D10_SB_EXTENDED_OPERAND_TYPE( + D3D10_SB_EXTENDED_OPERAND_MODIFIER); + } + uint32_t extended_bit = ENCODE_D3D10_SB_OPERAND_EXTENDED(modifiers); + + // Actually write the operand tokens. + switch (operand.type) { + case DxbcSourceOperand::Type::kRegister: + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, + operand.swizzle, 1) | + extended_bit); + if (modifiers != 0) { + shader_code_.push_back(modifiers); + } + shader_code_.push_back(operand.index); + break; + case DxbcSourceOperand::Type::kConstantFloat: + rdef_constants_used_ |= 1ull + << uint32_t(RdefConstantIndex::kFloatConstants); + if (operand.is_dynamic_indexed) { + // Index loaded as uint2 in the intermediate register, the page number + // in X, the vector number in the page in Y. + shader_code_.push_back( + EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, operand.swizzle, 3, + D3D10_SB_OPERAND_INDEX_IMMEDIATE32, + D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE, + D3D10_SB_OPERAND_INDEX_RELATIVE) | + extended_bit); + if (modifiers != 0) { + shader_code_.push_back(modifiers); + } + // Dimension 0 (CB#) - immediate. + shader_code_.push_back( + uint32_t(RdefConstantBufferIndex::kFloatConstants)); + // Dimension 1 (b#) - immediate + temporary X. + shader_code_.push_back(uint32_t(CbufferRegister::kFloatConstantsFirst)); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(operand.intermediate_temp_register); + // Dimension 2 (vector) - temporary Y. + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(operand.intermediate_temp_register); + } else { + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, + operand.swizzle, 3) | + extended_bit); + if (modifiers != 0) { + shader_code_.push_back(modifiers); + } + shader_code_.push_back( + uint32_t(RdefConstantBufferIndex::kFloatConstants)); + shader_code_.push_back(uint32_t(CbufferRegister::kFloatConstantsFirst) + + (operand.index >> 5)); + shader_code_.push_back(operand.index & 31); + } + break; + case DxbcSourceOperand::Type::kIntermediateRegister: + // Already loaded as float to the intermediate temporary register. + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, + operand.swizzle, 1) | + extended_bit); + if (modifiers != 0) { + shader_code_.push_back(modifiers); + } + shader_code_.push_back(operand.intermediate_temp_register); + break; + default: + // Only zeros and ones in the swizzle, or the safest replacement for an + // invalid operand (such as a fetch constant). + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + for (uint32_t i = 0; i < 4; ++i) { + if (operand.index & (1 << i)) { + shader_code_.push_back(operand.is_negated ? 0xBF800000u + : 0x3F800000u); + } else { + shader_code_.push_back(0); + } + } + } +} + +void DxbcShaderTranslator::UnloadDxbcSourceOperand( + const DxbcSourceOperand& operand) { + if (operand.intermediate_temp_register != + DxbcSourceOperand::kIntermediateTempRegisterNone) { + PopSystemTemp(); + } +} + +void DxbcShaderTranslator::ProcessVectorAluInstruction( + const ParsedAluInstruction& instr) { + // TODO(Triang3l): Predicate. +} + +void DxbcShaderTranslator::ProcessScalarAluInstruction( + const ParsedAluInstruction& instr) { + // TODO(Triang3l): Predicate. +} + +void DxbcShaderTranslator::ProcessAluInstruction( + const ParsedAluInstruction& instr) { + switch (instr.type) { + case ParsedAluInstruction::Type::kNop: + break; + case ParsedAluInstruction::Type::kVector: + ProcessVectorAluInstruction(instr); + break; + case ParsedAluInstruction::Type::kScalar: + ProcessScalarAluInstruction(instr); + break; + } +} + uint32_t DxbcShaderTranslator::AppendString(std::vector& dest, const char* source) { size_t size = std::strlen(source) + 1; diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 029b124eb..6a549339b 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -66,6 +66,8 @@ class DxbcShaderTranslator : public ShaderTranslator { std::vector CompleteTranslation() override; + void ProcessAluInstruction(const ParsedAluInstruction& instr) override; + private: static constexpr uint32_t kFloatConstantsPerPage = 32; static constexpr uint32_t kFloatConstantPageCount = 8; @@ -195,6 +197,54 @@ class DxbcShaderTranslator : public ShaderTranslator { void CompletePixelShader(); void CompleteShaderCode(); + // Abstract 4-component vector source operand. + struct DxbcSourceOperand { + enum class Type { + // GPR number in the index - used only when GPRs are not dynamically + // indexed in the shader and there are no constant zeros and ones in the + // swizzle. + kRegister, + // Immediate: float constant vector number in the index. + // Dynamic: intermediate X contains page number, intermediate Y contains + // vector number in the page. + kConstantFloat, + // The whole value preloaded to the intermediate register - used for GPRs + // when they are indexable, for bool/loop constants pre-converted to + // float, and for other operands if their swizzle contains 0 or 1. + kIntermediateRegister, + // Literal vector of zeros and positive or negative ones - when the + // swizzle contains only them, or when the parsed operand is invalid (for + // example, if it's a fetch constant in a non-tfetch texture instruction). + // 0 or 1 specified in the index as bits, can be negated. + kZerosOnes, + }; + + Type type; + uint32_t index; + bool is_dynamic_indexed; + + uint32_t swizzle; + bool is_negated; + bool is_absolute_value; + + // Temporary register containing data required to access the value if it has + // to be accessed in multiple operations (allocated with PushSystemTemp). + uint32_t intermediate_temp_register; + static constexpr uint32_t kIntermediateTempRegisterNone = UINT32_MAX; + }; + // Each Load must be followed by Unload, otherwise there may be a temporary + // register leak. + void LoadDxbcSourceOperand(const InstructionOperand& operand, + DxbcSourceOperand& dxbc_operand); + // Number of tokens this operand adds to the instruction length when used. + uint32_t DxbcSourceOperandLength(const DxbcSourceOperand& operand) const; + // Writes the operand access tokens to the instruction. + void UseDxbcSourceOperand(const DxbcSourceOperand& operand); + void UnloadDxbcSourceOperand(const DxbcSourceOperand& operand); + + void ProcessVectorAluInstruction(const ParsedAluInstruction& instr); + void ProcessScalarAluInstruction(const ParsedAluInstruction& instr); + // Appends a string to a DWORD stream, returns the DWORD-aligned length. static uint32_t AppendString(std::vector& dest, const char* source); @@ -350,7 +400,9 @@ class DxbcShaderTranslator : public ShaderTranslator { uint32_t float_instruction_count; uint32_t int_instruction_count; uint32_t uint_instruction_count; + // endif, ret. uint32_t static_flow_control_count; + // if (but not else). uint32_t dynamic_flow_control_count; // Unknown in Wine. uint32_t macro_instruction_count;