[DXBC] dxbc.h with non-translator-specific parts

This commit is contained in:
Triang3l 2021-01-04 16:15:34 +03:00
parent 2d9326e02d
commit 9a74df491f
7 changed files with 4784 additions and 4772 deletions

1611
src/xenia/gpu/dxbc.h Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -15,7 +15,7 @@ using namespace ucode;
void DxbcShaderTranslator::ExportToMemory_PackFixed32( void DxbcShaderTranslator::ExportToMemory_PackFixed32(
const uint32_t* eM_temps, uint32_t eM_count, const uint32_t bits[4], const uint32_t* eM_temps, uint32_t eM_count, const uint32_t bits[4],
const DxbcSrc& is_integer, const DxbcSrc& is_signed) { const dxbc::Src& is_integer, const dxbc::Src& is_signed) {
// Will insert with BFI - sign extension of red will be overwritten, not // Will insert with BFI - sign extension of red will be overwritten, not
// truncated. // truncated.
assert_not_zero(bits[0]); assert_not_zero(bits[0]);
@ -26,64 +26,64 @@ void DxbcShaderTranslator::ExportToMemory_PackFixed32(
mask |= 1 << i; mask |= 1 << i;
} }
} }
DxbcOpIf(true, is_signed); a_.OpIf(true, is_signed);
{ {
float range[4]; float range[4];
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
range[i] = bits[i] ? float((uint32_t(1) << (bits[i] - 1)) - 1) : 0.0f; range[i] = bits[i] ? float((uint32_t(1) << (bits[i] - 1)) - 1) : 0.0f;
} }
DxbcSrc range_src(DxbcSrc::LP(range)); dxbc::Src range_src(dxbc::Src::LP(range));
DxbcOpIf(false, is_integer); a_.OpIf(false, is_integer);
for (uint32_t i = 0; i < eM_count; ++i) { for (uint32_t i = 0; i < eM_count; ++i) {
uint32_t eM_temp = eM_temps[i]; uint32_t eM_temp = eM_temps[i];
DxbcOpMul(DxbcDest::R(eM_temp, mask), DxbcSrc::R(eM_temp), range_src); a_.OpMul(dxbc::Dest::R(eM_temp, mask), dxbc::Src::R(eM_temp), range_src);
} }
DxbcOpEndIf(); a_.OpEndIf();
for (uint32_t i = 0; i < eM_count; ++i) { for (uint32_t i = 0; i < eM_count; ++i) {
DxbcDest eM_dest(DxbcDest::R(eM_temps[i], mask)); dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[i], mask));
DxbcSrc eM_src(DxbcSrc::R(eM_temps[i])); dxbc::Src eM_src(dxbc::Src::R(eM_temps[i]));
// TODO(Triang3l): NaN should become zero, not -range. // TODO(Triang3l): NaN should become zero, not -range.
DxbcOpMax(eM_dest, eM_src, -range_src); a_.OpMax(eM_dest, eM_src, -range_src);
DxbcOpMin(eM_dest, eM_src, range_src); a_.OpMin(eM_dest, eM_src, range_src);
} }
} }
DxbcOpElse(); a_.OpElse();
{ {
float range[4]; float range[4];
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
range[i] = float((uint32_t(1) << bits[i]) - 1); range[i] = float((uint32_t(1) << bits[i]) - 1);
} }
DxbcSrc range_src(DxbcSrc::LP(range)); dxbc::Src range_src(dxbc::Src::LP(range));
DxbcOpIf(false, is_integer); a_.OpIf(false, is_integer);
for (uint32_t i = 0; i < eM_count; ++i) { for (uint32_t i = 0; i < eM_count; ++i) {
uint32_t eM_temp = eM_temps[i]; uint32_t eM_temp = eM_temps[i];
DxbcOpMul(DxbcDest::R(eM_temp, mask), DxbcSrc::R(eM_temp), range_src); a_.OpMul(dxbc::Dest::R(eM_temp, mask), dxbc::Src::R(eM_temp), range_src);
} }
DxbcOpEndIf(); a_.OpEndIf();
for (uint32_t i = 0; i < eM_count; ++i) { for (uint32_t i = 0; i < eM_count; ++i) {
DxbcDest eM_dest(DxbcDest::R(eM_temps[i], mask)); dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[i], mask));
DxbcSrc eM_src(DxbcSrc::R(eM_temps[i])); dxbc::Src eM_src(dxbc::Src::R(eM_temps[i]));
DxbcOpMax(eM_dest, eM_src, DxbcSrc::LF(0.0f)); a_.OpMax(eM_dest, eM_src, dxbc::Src::LF(0.0f));
DxbcOpMin(eM_dest, eM_src, range_src); a_.OpMin(eM_dest, eM_src, range_src);
} }
} }
DxbcOpEndIf(); a_.OpEndIf();
for (uint32_t i = 0; i < eM_count; ++i) { for (uint32_t i = 0; i < eM_count; ++i) {
uint32_t eM_temp = eM_temps[i]; uint32_t eM_temp = eM_temps[i];
// Round to the nearest integer, according to the rules of handling integer // Round to the nearest integer, according to the rules of handling integer
// formats in Direct3D. // formats in Direct3D.
// TODO(Triang3l): Round by adding +-0.5, not with round_ne. // TODO(Triang3l): Round by adding +-0.5, not with round_ne.
DxbcOpRoundNE(DxbcDest::R(eM_temp, mask), DxbcSrc::R(eM_temp)); a_.OpRoundNE(dxbc::Dest::R(eM_temp, mask), dxbc::Src::R(eM_temp));
DxbcOpFToI(DxbcDest::R(eM_temp, mask), DxbcSrc::R(eM_temp)); a_.OpFToI(dxbc::Dest::R(eM_temp, mask), dxbc::Src::R(eM_temp));
DxbcDest eM_packed_dest(DxbcDest::R(eM_temp, 0b0001)); dxbc::Dest eM_packed_dest(dxbc::Dest::R(eM_temp, 0b0001));
DxbcSrc eM_packed_src(DxbcSrc::R(eM_temp, DxbcSrc::kXXXX)); dxbc::Src eM_packed_src(dxbc::Src::R(eM_temp, dxbc::Src::kXXXX));
uint32_t offset = bits[0]; uint32_t offset = bits[0];
for (uint32_t j = 1; j < 4; ++j) { for (uint32_t j = 1; j < 4; ++j) {
if (!bits[j]) { if (!bits[j]) {
continue; continue;
} }
DxbcOpBFI(eM_packed_dest, DxbcSrc::LU(bits[j]), DxbcSrc::LU(offset), a_.OpBFI(eM_packed_dest, dxbc::Src::LU(bits[j]), dxbc::Src::LU(offset),
DxbcSrc::R(eM_temp).Select(j), eM_packed_src); dxbc::Src::R(eM_temp).Select(j), eM_packed_src);
offset += bits[j]; offset += bits[j];
} }
} }
@ -100,40 +100,40 @@ void DxbcShaderTranslator::ExportToMemory() {
// Safety check if the shared memory is bound as UAV. // Safety check if the shared memory is bound as UAV.
system_constants_used_ |= 1ull << kSysConst_Flags_Index; system_constants_used_ |= 1ull << kSysConst_Flags_Index;
DxbcOpAnd(DxbcDest::R(control_temp, 0b0001), a_.OpAnd(dxbc::Dest::R(control_temp, 0b0001),
DxbcSrc::CB(cbuffer_index_system_constants_, dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants), uint32_t(CbufferRegister::kSystemConstants),
kSysConst_Flags_Vec) kSysConst_Flags_Vec)
.Select(kSysConst_Flags_Comp), .Select(kSysConst_Flags_Comp),
DxbcSrc::LU(kSysFlag_SharedMemoryIsUAV)); dxbc::Src::LU(kSysFlag_SharedMemoryIsUAV));
if (is_pixel_shader()) { if (is_pixel_shader()) {
// Disable memexport in pixel shaders with supersampling since VPOS is // Disable memexport in pixel shaders with supersampling since VPOS is
// ambiguous. // ambiguous.
if (edram_rov_used_) { if (edram_rov_used_) {
system_constants_used_ |= 1ull system_constants_used_ |= 1ull
<< kSysConst_EdramResolutionSquareScale_Index; << kSysConst_EdramResolutionSquareScale_Index;
DxbcOpULT(DxbcDest::R(control_temp, 0b0010), a_.OpULT(dxbc::Dest::R(control_temp, 0b0010),
DxbcSrc::CB(cbuffer_index_system_constants_, dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants), uint32_t(CbufferRegister::kSystemConstants),
kSysConst_EdramResolutionSquareScale_Vec) kSysConst_EdramResolutionSquareScale_Vec)
.Select(kSysConst_EdramResolutionSquareScale_Comp), .Select(kSysConst_EdramResolutionSquareScale_Comp),
DxbcSrc::LU(2)); dxbc::Src::LU(2));
DxbcOpAnd(DxbcDest::R(control_temp, 0b0001), a_.OpAnd(dxbc::Dest::R(control_temp, 0b0001),
DxbcSrc::R(control_temp, DxbcSrc::kXXXX), dxbc::Src::R(control_temp, dxbc::Src::kXXXX),
DxbcSrc::R(control_temp, DxbcSrc::kYYYY)); dxbc::Src::R(control_temp, dxbc::Src::kYYYY));
} else { } else {
// Enough to check just Y because it's scaled for both 2x and 4x. // Enough to check just Y because it's scaled for both 2x and 4x.
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
DxbcOpMovC(DxbcDest::R(control_temp, 0b0001), a_.OpMovC(dxbc::Dest::R(control_temp, 0b0001),
DxbcSrc::CB(cbuffer_index_system_constants_, dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants), uint32_t(CbufferRegister::kSystemConstants),
kSysConst_SampleCountLog2_Vec) kSysConst_SampleCountLog2_Vec)
.Select(kSysConst_SampleCountLog2_Comp + 1), .Select(kSysConst_SampleCountLog2_Comp + 1),
DxbcSrc::LU(0), DxbcSrc::R(control_temp, DxbcSrc::kXXXX)); dxbc::Src::LU(0), dxbc::Src::R(control_temp, dxbc::Src::kXXXX));
} }
} }
// Check if memexport can be done. // Check if memexport can be done.
DxbcOpIf(true, DxbcSrc::R(control_temp, DxbcSrc::kXXXX)); a_.OpIf(true, dxbc::Src::R(control_temp, dxbc::Src::kXXXX));
// control_temp.x is now free. // control_temp.x is now free.
for (uint32_t i = 0; i < Shader::kMaxMemExports; ++i) { for (uint32_t i = 0; i < Shader::kMaxMemExports; ++i) {
@ -160,21 +160,21 @@ void DxbcShaderTranslator::ExportToMemory() {
} }
// Swap red and blue if needed. // Swap red and blue if needed.
DxbcOpAnd(DxbcDest::R(control_temp, 0b0001), a_.OpAnd(dxbc::Dest::R(control_temp, 0b0001),
DxbcSrc::R(eA_temp, DxbcSrc::kZZZZ), dxbc::Src::R(eA_temp, dxbc::Src::kZZZZ),
DxbcSrc::LU(uint32_t(1) << 19)); dxbc::Src::LU(uint32_t(1) << 19));
for (uint32_t j = 0; j < eM_count; ++j) { for (uint32_t j = 0; j < eM_count; ++j) {
uint32_t eM_temp = eM_temps[j]; uint32_t eM_temp = eM_temps[j];
DxbcOpMovC(DxbcDest::R(eM_temp, 0b0101), a_.OpMovC(dxbc::Dest::R(eM_temp, 0b0101),
DxbcSrc::R(control_temp, DxbcSrc::kXXXX), dxbc::Src::R(control_temp, dxbc::Src::kXXXX),
DxbcSrc::R(eM_temp, 0b000010), DxbcSrc::R(eM_temp)); dxbc::Src::R(eM_temp, 0b000010), dxbc::Src::R(eM_temp));
} }
// Initialize element size in control_temp.x to 4 bytes as this is the most // Initialize element size in control_temp.x to 4 bytes as this is the most
// common size. // common size.
DxbcDest element_size_dest(DxbcDest::R(control_temp, 0b0001)); dxbc::Dest element_size_dest(dxbc::Dest::R(control_temp, 0b0001));
DxbcSrc element_size_src(DxbcSrc::R(control_temp, DxbcSrc::kXXXX)); dxbc::Src element_size_src(dxbc::Src::R(control_temp, dxbc::Src::kXXXX));
DxbcOpMov(element_size_dest, DxbcSrc::LU(4)); a_.OpMov(element_size_dest, dxbc::Src::LU(4));
// Each eM should get a packed value in the destination format now. // Each eM should get a packed value in the destination format now.
@ -182,285 +182,288 @@ void DxbcShaderTranslator::ExportToMemory() {
// Y - signedness if fixed-point. // Y - signedness if fixed-point.
// Z - fractional/integer if fixed-point. // Z - fractional/integer if fixed-point.
// W - color format. // W - color format.
DxbcOpUBFE(DxbcDest::R(control_temp, 0b1110), DxbcSrc::LU(0, 1, 1, 6), a_.OpUBFE(dxbc::Dest::R(control_temp, 0b1110), dxbc::Src::LU(0, 1, 1, 6),
DxbcSrc::LU(0, 16, 17, 8), DxbcSrc::R(eA_temp, DxbcSrc::kZZZZ)); dxbc::Src::LU(0, 16, 17, 8),
DxbcSrc is_signed(DxbcSrc::R(control_temp, DxbcSrc::kYYYY)); dxbc::Src::R(eA_temp, dxbc::Src::kZZZZ));
DxbcSrc is_integer(DxbcSrc::R(control_temp, DxbcSrc::kZZZZ)); dxbc::Src is_signed(dxbc::Src::R(control_temp, dxbc::Src::kYYYY));
dxbc::Src is_integer(dxbc::Src::R(control_temp, dxbc::Src::kZZZZ));
// Convert and pack the format. // Convert and pack the format.
DxbcOpSwitch(DxbcSrc::R(control_temp, DxbcSrc::kWWWW)); a_.OpSwitch(dxbc::Src::R(control_temp, dxbc::Src::kWWWW));
// control_temp.w is now free. // control_temp.w is now free.
{ {
// k_8_8_8_8 // k_8_8_8_8
// k_8_8_8_8_AS_16_16_16_16 // k_8_8_8_8_AS_16_16_16_16
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_8_8_8_8))); a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_8_8_8_8)));
DxbcOpCase( a_.OpCase(dxbc::Src::LU(
DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_8_8_8_8_AS_16_16_16_16))); uint32_t(xenos::ColorFormat::k_8_8_8_8_AS_16_16_16_16)));
{ {
uint32_t bits[4] = {8, 8, 8, 8}; uint32_t bits[4] = {8, 8, 8, 8};
ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer, ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer,
is_signed); is_signed);
} }
DxbcOpBreak(); a_.OpBreak();
// k_2_10_10_10 // k_2_10_10_10
// k_2_10_10_10_AS_16_16_16_16 // k_2_10_10_10_AS_16_16_16_16
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_2_10_10_10))); a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_2_10_10_10)));
DxbcOpCase(DxbcSrc::LU( a_.OpCase(dxbc::Src::LU(
uint32_t(xenos::ColorFormat::k_2_10_10_10_AS_16_16_16_16))); uint32_t(xenos::ColorFormat::k_2_10_10_10_AS_16_16_16_16)));
{ {
uint32_t bits[4] = {10, 10, 10, 2}; uint32_t bits[4] = {10, 10, 10, 2};
ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer, ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer,
is_signed); is_signed);
} }
DxbcOpBreak(); a_.OpBreak();
// k_10_11_11 // k_10_11_11
// k_10_11_11_AS_16_16_16_16 // k_10_11_11_AS_16_16_16_16
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_10_11_11))); a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_10_11_11)));
DxbcOpCase( a_.OpCase(dxbc::Src::LU(
DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_10_11_11_AS_16_16_16_16))); uint32_t(xenos::ColorFormat::k_10_11_11_AS_16_16_16_16)));
{ {
uint32_t bits[4] = {11, 11, 10}; uint32_t bits[4] = {11, 11, 10};
ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer, ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer,
is_signed); is_signed);
} }
DxbcOpBreak(); a_.OpBreak();
// k_11_11_10 // k_11_11_10
// k_11_11_10_AS_16_16_16_16 // k_11_11_10_AS_16_16_16_16
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_11_11_10))); a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_11_11_10)));
DxbcOpCase( a_.OpCase(dxbc::Src::LU(
DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_11_11_10_AS_16_16_16_16))); uint32_t(xenos::ColorFormat::k_11_11_10_AS_16_16_16_16)));
{ {
uint32_t bits[4] = {10, 11, 11}; uint32_t bits[4] = {10, 11, 11};
ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer, ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer,
is_signed); is_signed);
} }
DxbcOpBreak(); a_.OpBreak();
// k_16_16 // k_16_16
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_16_16))); a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_16_16)));
{ {
uint32_t bits[4] = {16, 16}; uint32_t bits[4] = {16, 16};
ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer, ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer,
is_signed); is_signed);
} }
DxbcOpBreak(); a_.OpBreak();
// k_16_16_16_16 // k_16_16_16_16
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_16_16_16_16))); a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_16_16_16_16)));
DxbcOpMov(element_size_dest, DxbcSrc::LU(8)); a_.OpMov(element_size_dest, dxbc::Src::LU(8));
DxbcOpIf(true, is_signed); a_.OpIf(true, is_signed);
{ {
DxbcOpIf(false, is_integer); a_.OpIf(false, is_integer);
for (uint32_t j = 0; j < eM_count; ++j) { for (uint32_t j = 0; j < eM_count; ++j) {
uint32_t eM_temp = eM_temps[j]; uint32_t eM_temp = eM_temps[j];
DxbcOpMul(DxbcDest::R(eM_temp), DxbcSrc::R(eM_temp), a_.OpMul(dxbc::Dest::R(eM_temp), dxbc::Src::R(eM_temp),
DxbcSrc::LF(32767.0f)); dxbc::Src::LF(32767.0f));
} }
DxbcOpEndIf(); a_.OpEndIf();
for (uint32_t j = 0; j < eM_count; ++j) { for (uint32_t j = 0; j < eM_count; ++j) {
DxbcDest eM_dest(DxbcDest::R(eM_temps[j])); dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[j]));
DxbcSrc eM_src(DxbcSrc::R(eM_temps[j])); dxbc::Src eM_src(dxbc::Src::R(eM_temps[j]));
// TODO(Triang3l): NaN should become zero, not -range. // TODO(Triang3l): NaN should become zero, not -range.
DxbcOpMax(eM_dest, eM_src, DxbcSrc::LF(-32767.0f)); a_.OpMax(eM_dest, eM_src, dxbc::Src::LF(-32767.0f));
DxbcOpMin(eM_dest, eM_src, DxbcSrc::LF(32767.0f)); a_.OpMin(eM_dest, eM_src, dxbc::Src::LF(32767.0f));
} }
} }
DxbcOpElse(); a_.OpElse();
{ {
DxbcOpIf(false, is_integer); a_.OpIf(false, is_integer);
for (uint32_t j = 0; j < eM_count; ++j) { for (uint32_t j = 0; j < eM_count; ++j) {
uint32_t eM_temp = eM_temps[j]; uint32_t eM_temp = eM_temps[j];
DxbcOpMul(DxbcDest::R(eM_temp), DxbcSrc::R(eM_temp), a_.OpMul(dxbc::Dest::R(eM_temp), dxbc::Src::R(eM_temp),
DxbcSrc::LF(65535.0f)); dxbc::Src::LF(65535.0f));
} }
DxbcOpEndIf(); a_.OpEndIf();
for (uint32_t j = 0; j < eM_count; ++j) { for (uint32_t j = 0; j < eM_count; ++j) {
DxbcDest eM_dest(DxbcDest::R(eM_temps[j])); dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[j]));
DxbcSrc eM_src(DxbcSrc::R(eM_temps[j])); dxbc::Src eM_src(dxbc::Src::R(eM_temps[j]));
DxbcOpMax(eM_dest, eM_src, DxbcSrc::LF(0.0f)); a_.OpMax(eM_dest, eM_src, dxbc::Src::LF(0.0f));
DxbcOpMin(eM_dest, eM_src, DxbcSrc::LF(65535.0f)); a_.OpMin(eM_dest, eM_src, dxbc::Src::LF(65535.0f));
} }
} }
DxbcOpEndIf(); a_.OpEndIf();
for (uint32_t j = 0; j < eM_count; ++j) { for (uint32_t j = 0; j < eM_count; ++j) {
uint32_t eM_temp = eM_temps[j]; uint32_t eM_temp = eM_temps[j];
// Round to the nearest integer, according to the rules of handling // Round to the nearest integer, according to the rules of handling
// integer formats in Direct3D. // integer formats in Direct3D.
// TODO(Triang3l): Round by adding +-0.5, not with round_ne. // TODO(Triang3l): Round by adding +-0.5, not with round_ne.
DxbcOpRoundNE(DxbcDest::R(eM_temp), DxbcSrc::R(eM_temp)); a_.OpRoundNE(dxbc::Dest::R(eM_temp), dxbc::Src::R(eM_temp));
DxbcOpFToI(DxbcDest::R(eM_temp), DxbcSrc::R(eM_temp)); a_.OpFToI(dxbc::Dest::R(eM_temp), dxbc::Src::R(eM_temp));
DxbcOpBFI(DxbcDest::R(eM_temp, 0b0011), DxbcSrc::LU(16), a_.OpBFI(dxbc::Dest::R(eM_temp, 0b0011), dxbc::Src::LU(16),
DxbcSrc::LU(16), DxbcSrc::R(eM_temp, 0b1101), dxbc::Src::LU(16), dxbc::Src::R(eM_temp, 0b1101),
DxbcSrc::R(eM_temp, 0b1000)); dxbc::Src::R(eM_temp, 0b1000));
} }
DxbcOpBreak(); a_.OpBreak();
// k_16_16_FLOAT // k_16_16_FLOAT
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_16_16_FLOAT))); a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_16_16_FLOAT)));
for (uint32_t j = 0; j < eM_count; ++j) { for (uint32_t j = 0; j < eM_count; ++j) {
uint32_t eM_temp = eM_temps[j]; uint32_t eM_temp = eM_temps[j];
DxbcOpF32ToF16(DxbcDest::R(eM_temp, 0b0011), DxbcSrc::R(eM_temp)); a_.OpF32ToF16(dxbc::Dest::R(eM_temp, 0b0011), dxbc::Src::R(eM_temp));
DxbcOpBFI(DxbcDest::R(eM_temp, 0b0001), DxbcSrc::LU(16), a_.OpBFI(dxbc::Dest::R(eM_temp, 0b0001), dxbc::Src::LU(16),
DxbcSrc::LU(16), DxbcSrc::R(eM_temp, DxbcSrc::kYYYY), dxbc::Src::LU(16), dxbc::Src::R(eM_temp, dxbc::Src::kYYYY),
DxbcSrc::R(eM_temp, DxbcSrc::kXXXX)); dxbc::Src::R(eM_temp, dxbc::Src::kXXXX));
} }
DxbcOpBreak(); a_.OpBreak();
// k_16_16_16_16_FLOAT // k_16_16_16_16_FLOAT
DxbcOpCase( a_.OpCase(
DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_16_16_16_16_FLOAT))); dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_16_16_16_16_FLOAT)));
DxbcOpMov(element_size_dest, DxbcSrc::LU(8)); a_.OpMov(element_size_dest, dxbc::Src::LU(8));
for (uint32_t j = 0; j < eM_count; ++j) { for (uint32_t j = 0; j < eM_count; ++j) {
uint32_t eM_temp = eM_temps[j]; uint32_t eM_temp = eM_temps[j];
DxbcOpF32ToF16(DxbcDest::R(eM_temp), DxbcSrc::R(eM_temp)); a_.OpF32ToF16(dxbc::Dest::R(eM_temp), dxbc::Src::R(eM_temp));
DxbcOpBFI(DxbcDest::R(eM_temp, 0b0011), DxbcSrc::LU(16), a_.OpBFI(dxbc::Dest::R(eM_temp, 0b0011), dxbc::Src::LU(16),
DxbcSrc::LU(16), DxbcSrc::R(eM_temp, 0b1101), dxbc::Src::LU(16), dxbc::Src::R(eM_temp, 0b1101),
DxbcSrc::R(eM_temp, 0b1000)); dxbc::Src::R(eM_temp, 0b1000));
} }
DxbcOpBreak(); a_.OpBreak();
// k_32_FLOAT // k_32_FLOAT
// Already in the destination format, 4 bytes per element already // Already in the destination format, 4 bytes per element already
// selected. // selected.
// k_32_32_FLOAT // k_32_32_FLOAT
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_32_32_FLOAT))); a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_32_32_FLOAT)));
DxbcOpMov(element_size_dest, DxbcSrc::LU(8)); a_.OpMov(element_size_dest, dxbc::Src::LU(8));
// Already in the destination format. // Already in the destination format.
DxbcOpBreak(); a_.OpBreak();
// k_32_32_32_32_FLOAT // k_32_32_32_32_FLOAT
DxbcOpCase( a_.OpCase(
DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_32_32_32_32_FLOAT))); dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_32_32_32_32_FLOAT)));
DxbcOpMov(element_size_dest, DxbcSrc::LU(16)); a_.OpMov(element_size_dest, dxbc::Src::LU(16));
// Already in the destination format. // Already in the destination format.
DxbcOpBreak(); a_.OpBreak();
} }
DxbcOpEndSwitch(); a_.OpEndSwitch();
// control_temp.yz are now free. // control_temp.yz are now free.
// Do endian swap. // Do endian swap.
{ {
DxbcDest endian_dest(DxbcDest::R(control_temp, 0b0010)); dxbc::Dest endian_dest(dxbc::Dest::R(control_temp, 0b0010));
DxbcSrc endian_src(DxbcSrc::R(control_temp, DxbcSrc::kYYYY)); dxbc::Src endian_src(dxbc::Src::R(control_temp, dxbc::Src::kYYYY));
// Extract endianness into control_temp.y. // Extract endianness into control_temp.y.
DxbcOpAnd(endian_dest, DxbcSrc::R(eA_temp, DxbcSrc::kZZZZ), a_.OpAnd(endian_dest, dxbc::Src::R(eA_temp, dxbc::Src::kZZZZ),
DxbcSrc::LU(0b111)); dxbc::Src::LU(0b111));
// Change 8-in-64 and 8-in-128 to 8-in-32. // Change 8-in-64 and 8-in-128 to 8-in-32.
for (uint32_t j = 0; j < 2; ++j) { for (uint32_t j = 0; j < 2; ++j) {
DxbcOpIEq(DxbcDest::R(control_temp, 0b0100), endian_src, a_.OpIEq(dxbc::Dest::R(control_temp, 0b0100), endian_src,
DxbcSrc::LU(uint32_t(j ? xenos::Endian128::k8in128 dxbc::Src::LU(uint32_t(j ? xenos::Endian128::k8in128
: xenos::Endian128::k8in64))); : xenos::Endian128::k8in64)));
for (uint32_t k = 0; k < eM_count; ++k) { for (uint32_t k = 0; k < eM_count; ++k) {
uint32_t eM_temp = eM_temps[k]; uint32_t eM_temp = eM_temps[k];
DxbcOpMovC(DxbcDest::R(eM_temp), a_.OpMovC(dxbc::Dest::R(eM_temp),
DxbcSrc::R(control_temp, DxbcSrc::kZZZZ), dxbc::Src::R(control_temp, dxbc::Src::kZZZZ),
DxbcSrc::R(eM_temp, j ? 0b00011011 : 0b10110001), dxbc::Src::R(eM_temp, j ? 0b00011011 : 0b10110001),
DxbcSrc::R(eM_temp)); dxbc::Src::R(eM_temp));
} }
DxbcOpMovC(endian_dest, DxbcSrc::R(control_temp, DxbcSrc::kZZZZ), a_.OpMovC(endian_dest, dxbc::Src::R(control_temp, dxbc::Src::kZZZZ),
DxbcSrc::LU(uint32_t(xenos::Endian128::k8in32)), endian_src); dxbc::Src::LU(uint32_t(xenos::Endian128::k8in32)),
endian_src);
} }
uint32_t swap_temp = PushSystemTemp(); uint32_t swap_temp = PushSystemTemp();
DxbcDest swap_temp_dest(DxbcDest::R(swap_temp)); dxbc::Dest swap_temp_dest(dxbc::Dest::R(swap_temp));
DxbcSrc swap_temp_src(DxbcSrc::R(swap_temp)); dxbc::Src swap_temp_src(dxbc::Src::R(swap_temp));
// 8-in-16 or one half of 8-in-32. // 8-in-16 or one half of 8-in-32.
DxbcOpSwitch(endian_src); a_.OpSwitch(endian_src);
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k8in16))); a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k8in16)));
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k8in32))); a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k8in32)));
for (uint32_t j = 0; j < eM_count; ++j) { for (uint32_t j = 0; j < eM_count; ++j) {
DxbcDest eM_dest(DxbcDest::R(eM_temps[j])); dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[j]));
DxbcSrc eM_src(DxbcSrc::R(eM_temps[j])); dxbc::Src eM_src(dxbc::Src::R(eM_temps[j]));
// Temp = X0Z0. // Temp = X0Z0.
DxbcOpAnd(swap_temp_dest, eM_src, DxbcSrc::LU(0x00FF00FF)); a_.OpAnd(swap_temp_dest, eM_src, dxbc::Src::LU(0x00FF00FF));
// eM = YZW0. // eM = YZW0.
DxbcOpUShR(eM_dest, eM_src, DxbcSrc::LU(8)); a_.OpUShR(eM_dest, eM_src, dxbc::Src::LU(8));
// eM = Y0W0. // eM = Y0W0.
DxbcOpAnd(eM_dest, eM_src, DxbcSrc::LU(0x00FF00FF)); a_.OpAnd(eM_dest, eM_src, dxbc::Src::LU(0x00FF00FF));
// eM = YXWZ. // eM = YXWZ.
DxbcOpUMAd(eM_dest, swap_temp_src, DxbcSrc::LU(256), eM_src); a_.OpUMAd(eM_dest, swap_temp_src, dxbc::Src::LU(256), eM_src);
} }
DxbcOpBreak(); a_.OpBreak();
DxbcOpEndSwitch(); a_.OpEndSwitch();
// 16-in-32 or another half of 8-in-32. // 16-in-32 or another half of 8-in-32.
DxbcOpSwitch(endian_src); a_.OpSwitch(endian_src);
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k8in32))); a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k8in32)));
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k16in32))); a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k16in32)));
for (uint32_t j = 0; j < eM_count; ++j) { for (uint32_t j = 0; j < eM_count; ++j) {
DxbcDest eM_dest(DxbcDest::R(eM_temps[j])); dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[j]));
DxbcSrc eM_src(DxbcSrc::R(eM_temps[j])); dxbc::Src eM_src(dxbc::Src::R(eM_temps[j]));
// Temp = ZW00. // Temp = ZW00.
DxbcOpUShR(swap_temp_dest, eM_src, DxbcSrc::LU(16)); a_.OpUShR(swap_temp_dest, eM_src, dxbc::Src::LU(16));
// eM = ZWXY. // eM = ZWXY.
DxbcOpBFI(eM_dest, DxbcSrc::LU(16), DxbcSrc::LU(16), eM_src, a_.OpBFI(eM_dest, dxbc::Src::LU(16), dxbc::Src::LU(16), eM_src,
swap_temp_src); swap_temp_src);
} }
DxbcOpBreak(); a_.OpBreak();
DxbcOpEndSwitch(); a_.OpEndSwitch();
// Release swap_temp. // Release swap_temp.
PopSystemTemp(); PopSystemTemp();
} }
// control_temp.yz are now free. // control_temp.yz are now free.
DxbcDest address_dest(DxbcDest::R(eA_temp, 0b0001)); dxbc::Dest address_dest(dxbc::Dest::R(eA_temp, 0b0001));
DxbcSrc address_src(DxbcSrc::R(eA_temp, DxbcSrc::kXXXX)); dxbc::Src address_src(dxbc::Src::R(eA_temp, dxbc::Src::kXXXX));
// Multiply the base address by dword size, also dropping the 0x40000000 // Multiply the base address by dword size, also dropping the 0x40000000
// bit. // bit.
DxbcOpIShL(address_dest, address_src, DxbcSrc::LU(2)); a_.OpIShL(address_dest, address_src, dxbc::Src::LU(2));
// Drop the exponent in the element index. // Drop the exponent in the element index.
DxbcOpAnd(DxbcDest::R(eA_temp, 0b0010), DxbcSrc::R(eA_temp, DxbcSrc::kYYYY), a_.OpAnd(dxbc::Dest::R(eA_temp, 0b0010),
DxbcSrc::LU((1 << 23) - 1)); dxbc::Src::R(eA_temp, dxbc::Src::kYYYY),
dxbc::Src::LU((1 << 23) - 1));
// Add the offset of the first written element to the base address. // Add the offset of the first written element to the base address.
DxbcOpUMAd(address_dest, DxbcSrc::R(eA_temp, DxbcSrc::kYYYY), a_.OpUMAd(address_dest, dxbc::Src::R(eA_temp, dxbc::Src::kYYYY),
element_size_src, address_src); element_size_src, address_src);
// Do the writes. // Do the writes.
DxbcSrc eM_written_src( dxbc::Src eM_written_src(
DxbcSrc::R(system_temp_memexport_written_).Select(i >> 2)); dxbc::Src::R(system_temp_memexport_written_).Select(i >> 2));
uint32_t eM_written_base = 1u << ((i & 3) << 3); uint32_t eM_written_base = 1u << ((i & 3) << 3);
for (uint32_t j = 0; j < eM_count; ++j) { for (uint32_t j = 0; j < eM_count; ++j) {
// Go to the next eM#. // Go to the next eM#.
uint32_t eM_relative_offset = eM_offsets[j] - (j ? eM_offsets[j - 1] : 0); uint32_t eM_relative_offset = eM_offsets[j] - (j ? eM_offsets[j - 1] : 0);
if (eM_relative_offset) { if (eM_relative_offset) {
if (eM_relative_offset == 1) { if (eM_relative_offset == 1) {
DxbcOpIAdd(address_dest, element_size_src, address_src); a_.OpIAdd(address_dest, element_size_src, address_src);
} else { } else {
DxbcOpUMAd(address_dest, DxbcSrc::LU(eM_relative_offset), a_.OpUMAd(address_dest, dxbc::Src::LU(eM_relative_offset),
element_size_src, address_src); element_size_src, address_src);
} }
} }
// Check if the eM# was actually written to on the execution path. // Check if the eM# was actually written to on the execution path.
DxbcOpAnd(DxbcDest::R(control_temp, 0b0010), eM_written_src, a_.OpAnd(dxbc::Dest::R(control_temp, 0b0010), eM_written_src,
DxbcSrc::LU(eM_written_base << eM_offsets[j])); dxbc::Src::LU(eM_written_base << eM_offsets[j]));
DxbcOpIf(true, DxbcSrc::R(control_temp, DxbcSrc::kYYYY)); a_.OpIf(true, dxbc::Src::R(control_temp, dxbc::Src::kYYYY));
// Write the element of the needed size. // Write the element of the needed size.
DxbcSrc eM_src(DxbcSrc::R(eM_temps[j])); dxbc::Src eM_src(dxbc::Src::R(eM_temps[j]));
DxbcOpSwitch(element_size_src); a_.OpSwitch(element_size_src);
for (uint32_t k = 1; k <= 4; k <<= 1) { for (uint32_t k = 1; k <= 4; k <<= 1) {
DxbcOpCase(DxbcSrc::LU(k * 4)); a_.OpCase(dxbc::Src::LU(k * 4));
if (uav_index_shared_memory_ == kBindingIndexUnallocated) { if (uav_index_shared_memory_ == kBindingIndexUnallocated) {
uav_index_shared_memory_ = uav_count_++; uav_index_shared_memory_ = uav_count_++;
} }
DxbcOpStoreRaw( a_.OpStoreRaw(
DxbcDest::U(uav_index_shared_memory_, dxbc::Dest::U(uav_index_shared_memory_,
uint32_t(UAVRegister::kSharedMemory), (1 << k) - 1), uint32_t(UAVRegister::kSharedMemory), (1 << k) - 1),
address_src, eM_src); address_src, eM_src);
DxbcOpBreak(); a_.OpBreak();
} }
DxbcOpEndSwitch(); a_.OpEndSwitch();
DxbcOpEndIf(); a_.OpEndIf();
} }
// control_temp.y is now free. // control_temp.y is now free.
} }
// Close the memexport possibility check. // Close the memexport possibility check.
DxbcOpEndIf(); a_.OpEndIf();
// Release control_temp. // Release control_temp.
PopSystemTemp(); PopSystemTemp();

File diff suppressed because it is too large Load Diff