diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 0dc6278ec..2976229ac 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -1754,7 +1754,306 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor( // Release fixed_temp. PopSystemTemp(); - // TODO(Triang3l): Convert to 7e3. + // *************************************************************************** + // 7e3 conversion begins here. + // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp + // *************************************************************************** + + // Check if the target format is 7e3 and the conversion is needed (this is + // pretty long, better to branch here). + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( + D3D10_SB_INSTRUCTION_TEST_NONZERO) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(flags_temp); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + + uint32_t f10_temp1 = PushSystemTemp(), f10_temp2 = PushSystemTemp(); + + // Assuming RGB is already clamped to [0.0, 31.875], and alpha is a float and + // already clamped and multiplied by 3 to get [0.0, 3.0]. + + // Calculate the denormalized value if the numbers are too small to be + // represented as normalized 7e3 into f10_temp1. + + // t1 = f32 & 0x7FFFFF + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(f10_temp1); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(0x7FFFFF); + shader_code_.push_back(0x7FFFFF); + shader_code_.push_back(0x7FFFFF); + shader_code_.push_back(0x7FFFFF); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // t1 = (f32 & 0x7FFFFF) | 0x800000 + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(f10_temp1); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(f10_temp1); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(0x800000); + shader_code_.push_back(0x800000); + shader_code_.push_back(0x800000); + shader_code_.push_back(0x800000); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // t2 = f32 >> 23 + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(f10_temp2); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(23); + shader_code_.push_back(23); + shader_code_.push_back(23); + shader_code_.push_back(23); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // t2 = 125 - (f32 >> 23) + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(f10_temp2); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(125); + shader_code_.push_back(125); + shader_code_.push_back(125); + shader_code_.push_back(125); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1) | + ENCODE_D3D10_SB_OPERAND_EXTENDED(1)); + shader_code_.push_back( + ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(D3D10_SB_OPERAND_MODIFIER_NEG)); + shader_code_.push_back(f10_temp2); + ++stat_.instruction_count; + ++stat_.int_instruction_count; + + // t1 = ((f32 & 0x7FFFFF) | 0x800000) >> (125 - (f32 >> 23)) + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(f10_temp1); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(f10_temp1); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(f10_temp2); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Check if the numbers are too small to be represented as a normalized 7e3. + // t2 = f32 < 0x3E800000 + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ULT) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(f10_temp2); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(0x3E800000); + shader_code_.push_back(0x3E800000); + shader_code_.push_back(0x3E800000); + shader_code_.push_back(0x3E800000); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Bias the exponent. + // f32 += 0xC2000000 + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(0xC2000000u); + shader_code_.push_back(0xC2000000u); + shader_code_.push_back(0xC2000000u); + shader_code_.push_back(0xC2000000u); + ++stat_.instruction_count; + ++stat_.int_instruction_count; + + // Replace the number in f32 with a denormalized one if needed. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(f10_temp2); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(f10_temp1); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(source_and_scratch_temp); + ++stat_.instruction_count; + ++stat_.movc_instruction_count; + + // Build the 7e3 numbers. + // t1 = f32 >> 16 + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(f10_temp1); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(16); + shader_code_.push_back(16); + shader_code_.push_back(16); + shader_code_.push_back(16); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // t1 = (f32 >> 16) & 1 + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(f10_temp1); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(f10_temp1); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(1); + shader_code_.push_back(1); + shader_code_.push_back(1); + shader_code_.push_back(1); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // f10 = f32 + 0x7FFF + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(0x7FFF); + shader_code_.push_back(0x7FFF); + shader_code_.push_back(0x7FFF); + shader_code_.push_back(0x7FFF); + ++stat_.instruction_count; + ++stat_.int_instruction_count; + + // f10 = f32 + 0x7FFF + ((f32 >> 16) & 1) + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(f10_temp1); + ++stat_.instruction_count; + ++stat_.int_instruction_count; + + // f10 = (f32 + 0x7FFF + ((f32 >> 16) & 1)) >> 16 + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(16); + shader_code_.push_back(16); + shader_code_.push_back(16); + shader_code_.push_back(16); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // f10 = ((f32 + 0x7FFF + ((f32 >> 16) & 1)) >> 16) & 0x3FF + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(0x3FF); + shader_code_.push_back(0x3FF); + shader_code_.push_back(0x3FF); + shader_code_.push_back(0x3FF); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Release f10_temp1 and f10_temp2. + PopSystemTemp(2); + + // Convert alpha to fixed-point. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_FTOU) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1)); + shader_code_.push_back(source_and_scratch_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); + shader_code_.push_back(source_and_scratch_temp); + ++stat_.instruction_count; + ++stat_.conversion_instruction_count; + + // 7e3 conversion done. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + + // *************************************************************************** + // 7e3 conversion ends here. + // *************************************************************************** // Convert to 16-bit float. uint32_t f16_temp = PushSystemTemp();