diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 95d19bb8c..50a338d47 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -512,7 +512,126 @@ std::vector DxbcShaderTranslator::CompleteTranslation() { void DxbcShaderTranslator::LoadDxbcSourceOperand( const InstructionOperand& operand, DxbcSourceOperand& dxbc_operand) { - // TODO(Triang3l): Load source operands. + // Initialize the values to their defaults. + dxbc_operand.is_dynamic_indexed = false; + dxbc_operand.is_negated = operand.is_negated; + dxbc_operand.is_absolute_value = operand.is_absolute_value; + dxbc_operand.intermediate_register = + DxbcSourceOperand::kIntermediateRegisterNone; + + if (operand.component_count == 0) { + // No components requested, probably totally invalid - give something more + // or less safe (zeros) and exit. + assert_always(); + dxbc_operand.type = DxbcSourceOperand::Type::kZerosOnes; + dxbc_operand.index = 0; + return; + } + + // Make the DXBC swizzle, and also check whether there are any components with + // constant zero or one values (in this case, the operand will have to be + // loaded into the intermediate register) and if there are any real components + // at all (if there aren't, a literal can just be loaded). + uint32_t swizzle = 0; + uint32_t constant_components = 0; + uint32_t constant_component_values = 0; + for (uint32_t i = 0; i < uint32_t(operand.component_count); ++i) { + if (operand.components[i] <= SwizzleSource::kW) { + swizzle |= uint32_t(operand.components[i]) << (2 * i); + } else { + constant_components |= 1 << i; + if (operand.components[i] == SwizzleSource::k1) { + constant_component_values |= 1 << i; + } + } + } + // Replicate the last component's swizzle into all unused components. + uint32_t component_last = uint32_t(operand.component_count) - 1; + for (uint32_t i = uint32_t(operand.component_count); i < 4; ++i) { + swizzle |= ((swizzle >> (2 * component_last)) & 0x3) << (2 * i); + constant_components |= ((constant_components >> component_last) & 0x1) << i; + constant_component_values |= + ((constant_component_values >> component_last) & 0x1) << i; + } + dxbc_operand.swizzle = swizzle; + + // If all components are constant, just write a literal. + if (constant_components == 0xF) { + dxbc_operand.type = DxbcSourceOperand::Type::kZerosOnes; + dxbc_operand.index = constant_component_values; + return; + } + + // If the index is dynamic, choose where it's taken from. + uint32_t dynamic_address_register; + uint32_t dynamic_address_component; + if (operand.storage_addressing_mode == + InstructionStorageAddressingMode::kAddressRelative) { + // Addressed by aL.x. + dynamic_address_register = system_temp_aL_; + dynamic_address_component = 0; + } else { + // Addressed by a0. + dynamic_address_register = system_temp_ps_pc_p0_a0_; + dynamic_address_component = 3; + } + + // Actually load the operand. + switch (operand.storage_source) { + case InstructionStorageSource::kRegister: + if (uses_register_dynamic_addressing()) { + // GPRs are in x0 - need to load to the intermediate register (indexable + // temps are only accessible via mov load/store). + if (dxbc_operand.intermediate_register == + DxbcSourceOperand::kIntermediateRegisterNone) { + dxbc_operand.intermediate_register = PushSystemTemp(); + } + dxbc_operand.type = DxbcSourceOperand::Type::kIntermediateRegister; + if (operand.storage_addressing_mode == + InstructionStorageAddressingMode::kStatic) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(dxbc_operand.intermediate_register); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, kSwizzleXYZW, 2)); + shader_code_.push_back(0); + shader_code_.push_back(uint32_t(operand.storage_index)); + } else { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(dxbc_operand.intermediate_register); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, kSwizzleXYZW, 2, + D3D10_SB_OPERAND_INDEX_IMMEDIATE32, + D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE)); + shader_code_.push_back(0); + shader_code_.push_back(uint32_t(operand.storage_index)); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, dynamic_address_component, 1)); + shader_code_.push_back(dynamic_address_register); + } + ++stat_.instruction_count; + ++stat_.array_instruction_count; + } else { + // GPRs are in r# - can access directly. + dxbc_operand.type = DxbcSourceOperand::Type::kRegister; + dxbc_operand.index = uint32_t(operand.storage_index); + } + break; + // TODO(Triang3l): Load other types. + default: + dxbc_operand.type = DxbcSourceOperand::Type::kZerosOnes; + dxbc_operand.index = constant_component_values; + return; + } + + // TODO(Triang3l): Load constant components. } uint32_t DxbcShaderTranslator::DxbcSourceOperandLength( @@ -613,11 +732,11 @@ void DxbcShaderTranslator::UseDxbcSourceOperand( shader_code_.push_back(uint32_t(CbufferRegister::kFloatConstantsFirst)); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(operand.intermediate_temp_register); + shader_code_.push_back(operand.intermediate_register); // Dimension 2 (vector) - temporary Y. shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(operand.intermediate_temp_register); + shader_code_.push_back(operand.intermediate_register); } else { shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, @@ -642,7 +761,7 @@ void DxbcShaderTranslator::UseDxbcSourceOperand( if (modifiers != 0) { shader_code_.push_back(modifiers); } - shader_code_.push_back(operand.intermediate_temp_register); + shader_code_.push_back(operand.intermediate_register); break; default: // Only zeros and ones in the swizzle, or the safest replacement for an @@ -662,8 +781,8 @@ void DxbcShaderTranslator::UseDxbcSourceOperand( void DxbcShaderTranslator::UnloadDxbcSourceOperand( const DxbcSourceOperand& operand) { - if (operand.intermediate_temp_register != - DxbcSourceOperand::kIntermediateTempRegisterNone) { + if (operand.intermediate_register != + DxbcSourceOperand::kIntermediateRegisterNone) { PopSystemTemp(); } } diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 6a549339b..707979f69 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -229,8 +229,8 @@ class DxbcShaderTranslator : public ShaderTranslator { // Temporary register containing data required to access the value if it has // to be accessed in multiple operations (allocated with PushSystemTemp). - uint32_t intermediate_temp_register; - static constexpr uint32_t kIntermediateTempRegisterNone = UINT32_MAX; + uint32_t intermediate_register; + static constexpr uint32_t kIntermediateRegisterNone = UINT32_MAX; }; // Each Load must be followed by Unload, otherwise there may be a temporary // register leak.