[D3D12] DXBC: Some vector ALU opcodes
This commit is contained in:
parent
b1247bd708
commit
72e6526067
|
@ -390,6 +390,7 @@ void DxbcShaderTranslator::StartTranslation() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Request global system temporary variables.
|
// Request global system temporary variables.
|
||||||
|
system_temp_pv_ = PushSystemTemp();
|
||||||
system_temp_ps_pc_p0_a0_ = PushSystemTemp(true);
|
system_temp_ps_pc_p0_a0_ = PushSystemTemp(true);
|
||||||
system_temp_aL_ = PushSystemTemp(true);
|
system_temp_aL_ = PushSystemTemp(true);
|
||||||
system_temp_loop_count_ = PushSystemTemp(true);
|
system_temp_loop_count_ = PushSystemTemp(true);
|
||||||
|
@ -401,10 +402,11 @@ void DxbcShaderTranslator::CompletePixelShader() {}
|
||||||
|
|
||||||
void DxbcShaderTranslator::CompleteShaderCode() {
|
void DxbcShaderTranslator::CompleteShaderCode() {
|
||||||
// Release the following system temporary values so epilogue can reuse them:
|
// Release the following system temporary values so epilogue can reuse them:
|
||||||
|
// - system_temp_pv_.
|
||||||
// - system_temp_ps_pc_p0_a0_.
|
// - system_temp_ps_pc_p0_a0_.
|
||||||
// - system_temp_aL_.
|
// - system_temp_aL_.
|
||||||
// - system_temp_loop_count_.
|
// - system_temp_loop_count_.
|
||||||
PopSystemTemp(3);
|
PopSystemTemp(4);
|
||||||
|
|
||||||
// Write stage-specific epilogue.
|
// Write stage-specific epilogue.
|
||||||
if (is_vertex_shader()) {
|
if (is_vertex_shader()) {
|
||||||
|
@ -1068,11 +1070,149 @@ void DxbcShaderTranslator::UnloadDxbcSourceOperand(
|
||||||
void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
const ParsedAluInstruction& instr) {
|
const ParsedAluInstruction& instr) {
|
||||||
// TODO(Triang3l): Predicate.
|
// TODO(Triang3l): Predicate.
|
||||||
|
|
||||||
|
// True if the result is only in X and all components should be remapped to X
|
||||||
|
// while storing.
|
||||||
|
bool replicate_result = false;
|
||||||
|
|
||||||
|
DxbcSourceOperand dxbc_operands[3];
|
||||||
|
uint32_t operand_length_sums[3];
|
||||||
|
for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) {
|
||||||
|
LoadDxbcSourceOperand(instr.operands[i], dxbc_operands[i]);
|
||||||
|
operand_length_sums[i] = DxbcSourceOperandLength(dxbc_operands[i]);
|
||||||
|
if (i != 0) {
|
||||||
|
operand_length_sums[i] += operand_length_sums[i - 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (instr.vector_opcode) {
|
||||||
|
case AluVectorOpcode::kAdd:
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
3 + operand_length_sums[1]));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[0]);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[1]);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
break;
|
||||||
|
case AluVectorOpcode::kMul:
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
3 + operand_length_sums[1]));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[0]);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[1]);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
break;
|
||||||
|
case AluVectorOpcode::kMax:
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
3 + operand_length_sums[1]));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[0]);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[1]);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
break;
|
||||||
|
// TODO(Triang3l): Many other opcodes.
|
||||||
|
case AluVectorOpcode::kDp4:
|
||||||
|
// Replicated implicitly.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DP4) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
3 + operand_length_sums[1]));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[0]);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[1]);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
break;
|
||||||
|
case AluVectorOpcode::kDp3:
|
||||||
|
// Replicated implicitly.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DP3) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
3 + operand_length_sums[1]));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[0]);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[1]);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
break;
|
||||||
|
case AluVectorOpcode::kDp2Add:
|
||||||
|
// (dot(src0.xy, src1.xy) + src2.x).xxxx
|
||||||
|
replicate_result = true;
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DP2) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
3 + operand_length_sums[1]));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[0]);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[1]);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||||
|
5 + DxbcSourceOperandLength(dxbc_operands[2])));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
UseDxbcSourceOperand(dxbc_operands[2]);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
break;
|
||||||
|
// TODO(Triang3l): Many other opcodes.
|
||||||
|
default:
|
||||||
|
// assert_always();
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.mov_instruction_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) {
|
||||||
|
UnloadDxbcSourceOperand(dxbc_operands[instr.operand_count - 1 - i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(Triang3l): Store pv.
|
||||||
}
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
void DxbcShaderTranslator::ProcessScalarAluInstruction(
|
||||||
const ParsedAluInstruction& instr) {
|
const ParsedAluInstruction& instr) {
|
||||||
// TODO(Triang3l): Predicate.
|
// TODO(Triang3l): Predicate.
|
||||||
|
|
||||||
|
DxbcSourceOperand dxbc_operands[3];
|
||||||
|
for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) {
|
||||||
|
LoadDxbcSourceOperand(instr.operands[i], dxbc_operands[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < uint32_t(instr.operand_count); ++i) {
|
||||||
|
UnloadDxbcSourceOperand(dxbc_operands[instr.operand_count - 1 - i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::ProcessAluInstruction(
|
void DxbcShaderTranslator::ProcessAluInstruction(
|
||||||
|
|
|
@ -378,6 +378,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
// translation (for the declaration).
|
// translation (for the declaration).
|
||||||
uint32_t system_temp_count_max_;
|
uint32_t system_temp_count_max_;
|
||||||
|
|
||||||
|
// Vector ALU result/scratch (since Xenos write masks can contain swizzles).
|
||||||
|
uint32_t system_temp_pv_;
|
||||||
// Temporary register ID for previous scalar result, program counter,
|
// Temporary register ID for previous scalar result, program counter,
|
||||||
// predicate and absolute address register.
|
// predicate and absolute address register.
|
||||||
uint32_t system_temp_ps_pc_p0_a0_;
|
uint32_t system_temp_ps_pc_p0_a0_;
|
||||||
|
|
Loading…
Reference in New Issue