[DXBC] OpenGL snorm vfetch because why not

This commit is contained in:
Triang3l 2020-05-24 19:25:35 +03:00
parent 04ca28c902
commit aa73d652ba
5 changed files with 83 additions and 36 deletions

View File

@ -303,24 +303,43 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
DxbcOpIToF(result_unpacked_dest, result_src); DxbcOpIToF(result_unpacked_dest, result_src);
if (!instr.attributes.is_integer) { if (!instr.attributes.is_integer) {
float packed_scales[4] = {}; float packed_scales[4] = {};
uint32_t packed_scales_mask = 0b0000; switch (instr.attributes.signed_rf_mode) {
for (uint32_t i = 0; i < 4; ++i) { case xenos::SignedRepeatingFractionMode::kZeroClampMinusOne: {
if (!(used_format_components & (1 << i))) { uint32_t packed_scales_mask = 0b0000;
continue; for (uint32_t i = 0; i < 4; ++i) {
} if (!(used_format_components & (1 << i))) {
if (packed_widths[i] > 2) { continue;
packed_scales[i] = }
1.0f / float((uint32_t(1) << (packed_widths[i] - 1)) - 1); if (packed_widths[i] > 2) {
packed_scales_mask |= 1 << i; packed_scales[i] =
} 1.0f / float((uint32_t(1) << (packed_widths[i] - 1)) - 1);
packed_scales_mask |= 1 << i;
}
}
if (packed_scales_mask) {
DxbcOpMul(DxbcDest::R(system_temp_result_, packed_scales_mask),
result_src, DxbcSrc::LP(packed_scales));
}
// Treat both -(2^(n-1)) and -(2^(n-1)-1) as -1.
DxbcOpMax(result_unpacked_dest, result_src, DxbcSrc::LF(-1.0f));
} break;
case xenos::SignedRepeatingFractionMode::kNoZero: {
float packed_zeros[4] = {};
for (uint32_t i = 0; i < 4; ++i) {
if (!(used_format_components & (1 << i))) {
continue;
}
assert_not_zero(packed_widths[i]);
packed_zeros[i] =
1.0f / float((uint32_t(1) << packed_widths[i]) - 1);
packed_scales[i] = 2.0f * packed_zeros[i];
}
DxbcOpMAd(result_unpacked_dest, result_src,
DxbcSrc::LP(packed_scales), DxbcSrc::LP(packed_zeros));
} break;
default:
assert_unhandled_case(instr.attributes.signed_rf_mode);
} }
if (packed_scales_mask) {
DxbcOpMul(DxbcDest::R(system_temp_result_, packed_scales_mask),
result_src, DxbcSrc::LP(packed_scales));
}
// Treat both -(2^(n-1)) and -(2^(n-1)-1) as -1, according to Direct3D
// snorm to float conversion rules.
DxbcOpMax(result_unpacked_dest, result_src, DxbcSrc::LF(-1.0f));
} }
} else { } else {
DxbcOpUBFE(result_unpacked_dest, DxbcSrc::LP(packed_widths), DxbcOpUBFE(result_unpacked_dest, DxbcSrc::LP(packed_widths),
@ -367,12 +386,26 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
DxbcOpUToF(result_unpacked_dest, result_src); DxbcOpUToF(result_unpacked_dest, result_src);
} }
if (!instr.attributes.is_integer) { if (!instr.attributes.is_integer) {
DxbcOpMul( if (instr.attributes.is_signed) {
result_unpacked_dest, result_src, switch (instr.attributes.signed_rf_mode) {
DxbcSrc::LF(instr.attributes.is_signed ? (1.0f / 2147483647.0f) case xenos::SignedRepeatingFractionMode::kZeroClampMinusOne:
: (1.0f / 4294967295.0f))); DxbcOpMul(result_unpacked_dest, result_src,
// No need to clamp to -1 if signed - 1/(2^31-1) is rounded to DxbcSrc::LF(1.0f / 2147483647.0f));
// 1/(2^31) as float32. // No need to clamp to -1 if signed - 1/(2^31-1) is rounded to
// 1/(2^31) as float32.
break;
case xenos::SignedRepeatingFractionMode::kNoZero:
DxbcOpMAd(result_unpacked_dest, result_src,
DxbcSrc::LF(1.0f / 2147483647.5f),
DxbcSrc::LF(0.5f / 2147483647.5f));
break;
default:
assert_unhandled_case(instr.attributes.signed_rf_mode);
}
} else {
DxbcOpMul(result_unpacked_dest, result_src,
DxbcSrc::LF(1.0f / 4294967295.0f));
}
} }
break; break;
case VertexFormat::k_32_FLOAT: case VertexFormat::k_32_FLOAT:

View File

@ -428,6 +428,8 @@ struct ParsedVertexFetchInstruction {
int32_t exp_adjust = 0; int32_t exp_adjust = 0;
// Prefetch count minus 1. // Prefetch count minus 1.
uint32_t prefetch_count = 0; uint32_t prefetch_count = 0;
xenos::SignedRepeatingFractionMode signed_rf_mode =
xenos::SignedRepeatingFractionMode::kZeroClampMinusOne;
bool is_index_rounded = false; bool is_index_rounded = false;
bool is_signed = false; bool is_signed = false;
bool is_integer = false; bool is_integer = false;

View File

@ -993,6 +993,7 @@ void ShaderTranslator::ParseVertexFetchInstruction(
i.attributes.is_index_rounded = op.is_index_rounded(); i.attributes.is_index_rounded = op.is_index_rounded();
i.attributes.is_signed = op.is_signed(); i.attributes.is_signed = op.is_signed();
i.attributes.is_integer = !op.is_normalized(); i.attributes.is_integer = !op.is_normalized();
i.attributes.signed_rf_mode = op.signed_rf_mode();
// Store for later use by mini fetches. // Store for later use by mini fetches.
if (!op.is_mini_fetch()) { if (!op.is_mini_fetch()) {

View File

@ -505,6 +505,9 @@ struct VertexFetchInstruction {
int exp_adjust() const { return data_.exp_adjust; } int exp_adjust() const { return data_.exp_adjust; }
bool is_signed() const { return data_.fomat_comp_all == 1; } bool is_signed() const { return data_.fomat_comp_all == 1; }
bool is_normalized() const { return data_.num_format_all == 0; } bool is_normalized() const { return data_.num_format_all == 0; }
xenos::SignedRepeatingFractionMode signed_rf_mode() const {
return data_.signed_rf_mode_all;
}
bool is_index_rounded() const { return data_.is_index_rounded == 1; } bool is_index_rounded() const { return data_.is_index_rounded == 1; }
// Dword stride, [0, 255]. // Dword stride, [0, 255].
uint32_t stride() const { return data_.stride; } uint32_t stride() const { return data_.stride; }
@ -536,7 +539,7 @@ struct VertexFetchInstruction {
uint32_t dst_swiz : 12; uint32_t dst_swiz : 12;
uint32_t fomat_comp_all : 1; uint32_t fomat_comp_all : 1;
uint32_t num_format_all : 1; uint32_t num_format_all : 1;
uint32_t signed_rf_mode_all : 1; xenos::SignedRepeatingFractionMode signed_rf_mode_all : 1;
uint32_t is_index_rounded : 1; uint32_t is_index_rounded : 1;
VertexFormat format : 6; VertexFormat format : 6;
uint32_t reserved2 : 2; uint32_t reserved2 : 2;

View File

@ -183,11 +183,11 @@ enum class IndexFormat : uint32_t {
kInt32, kInt32,
}; };
// GPUSURFACENUMBER from a game .pdb. "Repeat" means repeating fraction, it's // SurfaceNumberX from yamato_enum.h.
// what ATI calls normalized.
enum class SurfaceNumFormat : uint32_t { enum class SurfaceNumFormat : uint32_t {
kUnsignedRepeat = 0, kUnsignedRepeatingFraction = 0,
kSignedRepeat = 1, // Microsoft-style, scale factor (2^(n-1))-1.
kSignedRepeatingFraction = 1,
kUnsignedInteger = 2, kUnsignedInteger = 2,
kSignedInteger = 3, kSignedInteger = 3,
kFloat = 7, kFloat = 7,
@ -516,6 +516,14 @@ inline bool IsMajorModeExplicit(MajorMode major_mode,
primitive_type >= PrimitiveType::kExplicitMajorModeForceStart; primitive_type >= PrimitiveType::kExplicitMajorModeForceStart;
} }
enum class SignedRepeatingFractionMode : uint32_t {
// Microsoft-style representation with two -1 representations (one is slightly
// past -1 but clamped).
kZeroClampMinusOne,
// OpenGL "alternate mapping" format lacking representation for zero.
kNoZero,
};
// instr_arbitrary_filter_t // instr_arbitrary_filter_t
enum class ArbitraryFilter : uint32_t { enum class ArbitraryFilter : uint32_t {
k2x4Sym = 0, k2x4Sym = 0,
@ -699,14 +707,14 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, {
// which can be texture components 0/1/2/3 or constant 0/1) and R6xx // which can be texture components 0/1/2/3 or constant 0/1) and R6xx
// (signedness is FORMAT_COMP_X/Y/Z/W, while the swizzle is DST_SEL_X/Y/Z/W, // (signedness is FORMAT_COMP_X/Y/Z/W, while the swizzle is DST_SEL_X/Y/Z/W,
// which is named in resources the same as DST_SEL in fetch clauses). // which is named in resources the same as DST_SEL in fetch clauses).
TextureSign sign_x : 2; // +2 TextureSign sign_x : 2; // +2
TextureSign sign_y : 2; // +4 TextureSign sign_y : 2; // +4
TextureSign sign_z : 2; // +6 TextureSign sign_z : 2; // +6
TextureSign sign_w : 2; // +8 TextureSign sign_w : 2; // +8
ClampMode clamp_x : 3; // +10 ClampMode clamp_x : 3; // +10
ClampMode clamp_y : 3; // +13 ClampMode clamp_y : 3; // +13
ClampMode clamp_z : 3; // +16 ClampMode clamp_z : 3; // +16
uint32_t signed_rf_mode_all : 1; // +19 xenos::SignedRepeatingFractionMode signed_rf_mode_all : 1; // +19
// TODO(Triang3l): 1 or 2 dim_tbd bits? // TODO(Triang3l): 1 or 2 dim_tbd bits?
uint32_t unk_0 : 2; // +20 uint32_t unk_0 : 2; // +20
uint32_t pitch : 9; // +22 byte_pitch >> 5 uint32_t pitch : 9; // +22 byte_pitch >> 5