[D3D12] DXBC vertex fetch (hangs GPU currently)

This commit is contained in:
Triang3l 2018-09-07 00:36:15 +03:00
parent 4212d2296a
commit edcb537f9e
1 changed files with 212 additions and 3 deletions

View File

@ -1796,13 +1796,14 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
assert_unhandled_case(instr.attributes.data_format);
return;
}
// Get the resulting component count.
uint32_t component_count =
// Get the result write mask.
uint32_t result_component_count =
GetVertexFormatComponentCount(instr.attributes.data_format);
if (component_count == 0) {
if (result_component_count == 0) {
assert_always();
return;
}
uint32_t result_write_mask = (1 << result_component_count) - 1;
// TODO(Triang3l): Predicate.
@ -1901,6 +1902,214 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
// Byte swap the data.
SwapVertexData(vfetch_index, (1 << load_dword_count) - 1);
// Get the data needed for unpacking and converting.
bool extract_signed = instr.attributes.is_signed;
uint32_t extract_widths[4] = {}, extract_offsets[4] = {};
uint32_t extract_swizzle = kSwizzleXXXX;
float normalize_scales[4] = {};
switch (instr.attributes.data_format) {
case VertexFormat::k_8_8_8_8:
extract_widths[0] = extract_widths[1] = extract_widths[2] =
extract_widths[3] = 8;
// Assuming little endian ByteAddressBuffer Load.
extract_offsets[1] = 8;
extract_offsets[2] = 16;
extract_offsets[3] = 24;
normalize_scales[0] = normalize_scales[1] = normalize_scales[2] =
normalize_scales[3] = instr.attributes.is_signed ? (1.0f / 127.0f)
: (1.0f / 255.0f);
break;
case VertexFormat::k_2_10_10_10:
extract_widths[0] = extract_widths[1] = extract_widths[2] = 10;
extract_widths[3] = 2;
extract_offsets[1] = 10;
extract_offsets[2] = 20;
extract_offsets[3] = 30;
normalize_scales[0] = normalize_scales[1] = normalize_scales[2] =
instr.attributes.is_signed ? (1.0f / 511.0f) : (1.0f / 1023.0f);
normalize_scales[3] = instr.attributes.is_signed ? 1.0f : (1.0f / 3.0f);
break;
case VertexFormat::k_10_11_11:
extract_widths[0] = extract_widths[1] = 11;
extract_widths[2] = 10;
extract_offsets[1] = 11;
extract_offsets[2] = 22;
normalize_scales[0] = normalize_scales[1] =
instr.attributes.is_signed ? (1.0f / 1023.0f) : (1.0f / 2047.0f);
normalize_scales[2] =
instr.attributes.is_signed ? (1.0f / 511.0f) : (1.0f / 1023.0f);
break;
case VertexFormat::k_11_11_10:
extract_widths[0] = 10;
extract_widths[1] = extract_widths[2] = 11;
extract_offsets[1] = 10;
extract_offsets[2] = 21;
normalize_scales[0] =
instr.attributes.is_signed ? (1.0f / 511.0f) : (1.0f / 1023.0f);
normalize_scales[1] = normalize_scales[2] =
instr.attributes.is_signed ? (1.0f / 1023.0f) : (1.0f / 2047.0f);
break;
case VertexFormat::k_16_16:
extract_widths[0] = extract_widths[1] = 16;
extract_offsets[1] = 16;
normalize_scales[0] = normalize_scales[1] =
instr.attributes.is_signed ? (1.0f / 32767.0f) : (1.0f / 65535.0f);
break;
case VertexFormat::k_16_16_16_16:
extract_widths[0] = extract_widths[1] = extract_widths[2] =
extract_widths[3] = 16;
extract_offsets[1] = extract_offsets[3] = 16;
extract_swizzle = 0b01010000;
normalize_scales[0] = normalize_scales[1] =
instr.attributes.is_signed ? (1.0f / 32767.0f) : (1.0f / 65535.0f);
break;
case VertexFormat::k_16_16_FLOAT:
extract_signed = false;
extract_widths[0] = extract_widths[1] = 16;
extract_offsets[1] = 16;
break;
case VertexFormat::k_16_16_16_16_FLOAT:
extract_signed = false;
extract_widths[0] = extract_widths[1] = extract_widths[2] =
extract_widths[3] = 16;
extract_offsets[1] = extract_offsets[3] = 16;
extract_swizzle = 0b01010000;
break;
// For 32-bit, extraction is not done at all, so its parameters are ignored.
case VertexFormat::k_32:
case VertexFormat::k_32_32:
case VertexFormat::k_32_32_32_32:
normalize_scales[0] = normalize_scales[1] = normalize_scales[2] =
normalize_scales[3] =
instr.attributes.is_signed ? (1.0f / 2147483647.0f)
: (1.0f / 4294967295.0f);
break;
}
// Extract components from packed data if needed.
if (extract_widths[0] != 0) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(extract_signed ? D3D11_SB_OPCODE_IBFE
: D3D11_SB_OPCODE_UBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, result_write_mask, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(extract_widths[0]);
shader_code_.push_back(extract_widths[1]);
shader_code_.push_back(extract_widths[2]);
shader_code_.push_back(extract_widths[3]);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(extract_offsets[0]);
shader_code_.push_back(extract_offsets[1]);
shader_code_.push_back(extract_offsets[2]);
shader_code_.push_back(extract_offsets[3]);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, extract_swizzle, 1));
shader_code_.push_back(system_temp_pv_);
++stat_.instruction_count;
if (extract_signed) {
++stat_.int_instruction_count;
} else {
++stat_.uint_instruction_count;
}
}
// Convert to float and normalize if needed.
if (instr.attributes.data_format == VertexFormat::k_16_16_FLOAT ||
instr.attributes.data_format == VertexFormat::k_16_16_16_16_FLOAT) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_F16TOF32) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, result_write_mask, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_);
++stat_.instruction_count;
++stat_.conversion_instruction_count;
} else if (normalize_scales[0] != 0.0f) {
// If no normalize_scales, it's a float value already. Otherwise, convert to
// float and normalize if needed.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(
instr.attributes.is_signed ? D3D10_SB_OPCODE_ITOF
: D3D10_SB_OPCODE_UTOF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, result_write_mask, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_);
++stat_.instruction_count;
++stat_.conversion_instruction_count;
if (!instr.attributes.is_integer) {
// Normalize.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, result_write_mask, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
for (uint32_t i = 0; i < 4; ++i) {
shader_code_.push_back(
reinterpret_cast<const uint32_t*>(normalize_scales)[i]);
}
++stat_.instruction_count;
++stat_.float_instruction_count;
// Clamp to -1 (both -127 and -128 should be -1 in graphics APIs for
// snorm8).
if (instr.attributes.is_signed) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, result_write_mask, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0xBF800000u);
shader_code_.push_back(0xBF800000u);
shader_code_.push_back(0xBF800000u);
shader_code_.push_back(0xBF800000u);
++stat_.instruction_count;
++stat_.float_instruction_count;
}
}
}
// Zero unused components if loaded a 32-bit component (because it's not
// bfe'd, in this case, the unused components would have been zeroed already).
if (extract_widths[0] == 0 && result_write_mask != 0b1111) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111 & ~result_write_mask, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
++stat_.instruction_count;
++stat_.mov_instruction_count;
}
StoreResult(instr.result, system_temp_pv_, false);
}
void DxbcShaderTranslator::ProcessVectorAluInstruction(