Merge branch 'master' into vulkan
This commit is contained in:
commit
cacf702948
|
@ -68,32 +68,34 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
|
||||||
break;
|
break;
|
||||||
case AluVectorOpcode::kMul:
|
case AluVectorOpcode::kMul:
|
||||||
case AluVectorOpcode::kMad: {
|
case AluVectorOpcode::kMad: {
|
||||||
bool is_mad = instr.vector_opcode == AluVectorOpcode::kMad;
|
// Not using DXBC mad to prevent fused multiply-add (mul followed by add
|
||||||
if (is_mad) {
|
// may be optimized into non-fused mad by the driver in the identical
|
||||||
DxbcOpMAd(per_component_dest, operands[0], operands[1], operands[2]);
|
// operands case also).
|
||||||
} else {
|
DxbcOpMul(per_component_dest, operands[0], operands[1]);
|
||||||
DxbcOpMul(per_component_dest, operands[0], operands[1]);
|
uint32_t multiplicands_different =
|
||||||
}
|
|
||||||
// Shader Model 3: 0 or denormal * anything = 0.
|
|
||||||
// FIXME(Triang3l): Signed zero needs research and handling.
|
|
||||||
uint32_t absolute_different =
|
|
||||||
used_result_components &
|
used_result_components &
|
||||||
~instr.vector_operands[0].GetAbsoluteIdenticalComponents(
|
~instr.vector_operands[0].GetIdenticalComponents(
|
||||||
instr.vector_operands[1]);
|
instr.vector_operands[1]);
|
||||||
if (absolute_different) {
|
if (multiplicands_different) {
|
||||||
|
// Shader Model 3: +-0 or denormal * anything = +0.
|
||||||
uint32_t is_zero_temp = PushSystemTemp();
|
uint32_t is_zero_temp = PushSystemTemp();
|
||||||
DxbcOpMin(DxbcDest::R(is_zero_temp, absolute_different),
|
DxbcOpMin(DxbcDest::R(is_zero_temp, multiplicands_different),
|
||||||
operands[0].Abs(), operands[1].Abs());
|
operands[0].Abs(), operands[1].Abs());
|
||||||
// min isn't required to flush denormals, eq is.
|
// min isn't required to flush denormals, eq is.
|
||||||
DxbcOpEq(DxbcDest::R(is_zero_temp, absolute_different),
|
DxbcOpEq(DxbcDest::R(is_zero_temp, multiplicands_different),
|
||||||
DxbcSrc::R(is_zero_temp), DxbcSrc::LF(0.0f));
|
DxbcSrc::R(is_zero_temp), DxbcSrc::LF(0.0f));
|
||||||
DxbcOpMovC(DxbcDest::R(system_temp_result_, absolute_different),
|
// Not replacing true `0 + term` with movc of the term because +0 + -0
|
||||||
DxbcSrc::R(is_zero_temp),
|
// should result in +0, not -0.
|
||||||
is_mad ? operands[2] : DxbcSrc::LF(0.0f),
|
DxbcOpMovC(DxbcDest::R(system_temp_result_, multiplicands_different),
|
||||||
|
DxbcSrc::R(is_zero_temp), DxbcSrc::LF(0.0f),
|
||||||
DxbcSrc::R(system_temp_result_));
|
DxbcSrc::R(system_temp_result_));
|
||||||
// Release is_zero_temp.
|
// Release is_zero_temp.
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
}
|
}
|
||||||
|
if (instr.vector_opcode == AluVectorOpcode::kMad) {
|
||||||
|
DxbcOpAdd(per_component_dest, DxbcSrc::R(system_temp_result_),
|
||||||
|
operands[2]);
|
||||||
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case AluVectorOpcode::kMax:
|
case AluVectorOpcode::kMax:
|
||||||
|
@ -179,69 +181,40 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
|
||||||
component_count = 4;
|
component_count = 4;
|
||||||
}
|
}
|
||||||
result_swizzle = DxbcSrc::kXXXX;
|
result_swizzle = DxbcSrc::kXXXX;
|
||||||
uint32_t absolute_different =
|
uint32_t different = uint32_t((1 << component_count) - 1) &
|
||||||
uint32_t((1 << component_count) - 1) &
|
~instr.vector_operands[0].GetIdenticalComponents(
|
||||||
~instr.vector_operands[0].GetAbsoluteIdenticalComponents(
|
instr.vector_operands[1]);
|
||||||
instr.vector_operands[1]);
|
for (uint32_t i = 0; i < component_count; ++i) {
|
||||||
if (absolute_different) {
|
DxbcOpMul(DxbcDest::R(system_temp_result_, i ? 0b0010 : 0b0001),
|
||||||
// Shader Model 3: 0 or denormal * anything = 0.
|
operands[0].SelectFromSwizzled(i),
|
||||||
// FIXME(Triang3l): Signed zero needs research and handling.
|
operands[1].SelectFromSwizzled(i));
|
||||||
// Add component products only if non-zero. For dp4, 16 scalar
|
if ((different & (1 << i)) != 0) {
|
||||||
// operations in the worst case (as opposed to always 20 for
|
// Shader Model 3: +-0 or denormal * anything = +0 (also not replacing
|
||||||
// eq/movc/eq/movc/dp4 or min/eq/movc/movc/dp4 for preparing operands
|
// true `0 + term` with movc of the term because +0 + -0 should result
|
||||||
// for dp4).
|
// in +0, not -0).
|
||||||
DxbcOpMul(DxbcDest::R(system_temp_result_, 0b0001),
|
DxbcOpMin(DxbcDest::R(system_temp_result_, 0b0100),
|
||||||
operands[0].SelectFromSwizzled(0),
|
operands[0].SelectFromSwizzled(i).Abs(),
|
||||||
operands[1].SelectFromSwizzled(0));
|
operands[1].SelectFromSwizzled(i).Abs());
|
||||||
if (absolute_different & 0b0001) {
|
DxbcOpEq(DxbcDest::R(system_temp_result_, 0b0100),
|
||||||
DxbcOpMin(DxbcDest::R(system_temp_result_, 0b0010),
|
DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ),
|
||||||
operands[0].SelectFromSwizzled(0).Abs(),
|
|
||||||
operands[1].SelectFromSwizzled(0).Abs());
|
|
||||||
DxbcOpEq(DxbcDest::R(system_temp_result_, 0b0010),
|
|
||||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY),
|
|
||||||
DxbcSrc::LF(0.0f));
|
DxbcSrc::LF(0.0f));
|
||||||
DxbcOpMovC(DxbcDest::R(system_temp_result_, 0b0001),
|
DxbcOpMovC(DxbcDest::R(system_temp_result_, i ? 0b0010 : 0b0001),
|
||||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY),
|
|
||||||
DxbcSrc::LF(0.0f),
|
|
||||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX));
|
|
||||||
}
|
|
||||||
for (uint32_t i = 1; i < component_count; ++i) {
|
|
||||||
bool component_different = (absolute_different & (1 << i)) != 0;
|
|
||||||
DxbcOpMAd(DxbcDest::R(system_temp_result_,
|
|
||||||
component_different ? 0b0010 : 0b0001),
|
|
||||||
operands[0].SelectFromSwizzled(i),
|
|
||||||
operands[1].SelectFromSwizzled(i),
|
|
||||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX));
|
|
||||||
if (component_different) {
|
|
||||||
DxbcOpMin(DxbcDest::R(system_temp_result_, 0b0100),
|
|
||||||
operands[0].SelectFromSwizzled(i).Abs(),
|
|
||||||
operands[1].SelectFromSwizzled(i).Abs());
|
|
||||||
DxbcOpEq(DxbcDest::R(system_temp_result_, 0b0100),
|
|
||||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ),
|
DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ),
|
||||||
DxbcSrc::LF(0.0f));
|
DxbcSrc::LF(0.0f),
|
||||||
DxbcOpMovC(DxbcDest::R(system_temp_result_, 0b0001),
|
DxbcSrc::R(system_temp_result_,
|
||||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ),
|
i ? DxbcSrc::kYYYY : DxbcSrc::kXXXX));
|
||||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX),
|
|
||||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
if (i) {
|
||||||
if (component_count == 2) {
|
// Not using DXBC dp# to avoid fused multiply-add, PC GPUs are scalar
|
||||||
DxbcOpDP2(DxbcDest::R(system_temp_result_, 0b0001), operands[0],
|
// as of 2020 anyway, and not using mad for the same reason (mul
|
||||||
operands[1]);
|
// followed by add may be optimized into non-fused mad by the driver
|
||||||
} else if (component_count == 3) {
|
// in the identical operands case also).
|
||||||
DxbcOpDP3(DxbcDest::R(system_temp_result_, 0b0001), operands[0],
|
DxbcOpAdd(DxbcDest::R(system_temp_result_, 0b0001),
|
||||||
operands[1]);
|
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX),
|
||||||
} else {
|
DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY));
|
||||||
assert_true(component_count == 4);
|
|
||||||
DxbcOpDP4(DxbcDest::R(system_temp_result_, 0b0001), operands[0],
|
|
||||||
operands[1]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (component_count == 2) {
|
if (component_count == 2) {
|
||||||
// Add the third operand. Since floating-point addition isn't
|
|
||||||
// associative, even though adding this in multiply-add for the first
|
|
||||||
// component would be faster, it's safer to add here, in the end.
|
|
||||||
DxbcOpAdd(DxbcDest::R(system_temp_result_, 0b0001),
|
DxbcOpAdd(DxbcDest::R(system_temp_result_, 0b0001),
|
||||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX),
|
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX),
|
||||||
operands[2].SelectFromSwizzled(0));
|
operands[2].SelectFromSwizzled(0));
|
||||||
|
@ -592,14 +565,13 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
|
||||||
DxbcOpMov(DxbcDest::R(system_temp_result_, 0b0001), DxbcSrc::LF(1.0f));
|
DxbcOpMov(DxbcDest::R(system_temp_result_, 0b0001), DxbcSrc::LF(1.0f));
|
||||||
}
|
}
|
||||||
if (used_result_components & 0b0010) {
|
if (used_result_components & 0b0010) {
|
||||||
// Shader Model 3: 0 or denormal * anything = 0.
|
|
||||||
// FIXME(Triang3l): Signed zero needs research and handling.
|
|
||||||
DxbcOpMul(DxbcDest::R(system_temp_result_, 0b0010),
|
DxbcOpMul(DxbcDest::R(system_temp_result_, 0b0010),
|
||||||
operands[0].SelectFromSwizzled(1),
|
operands[0].SelectFromSwizzled(1),
|
||||||
operands[1].SelectFromSwizzled(1));
|
operands[1].SelectFromSwizzled(1));
|
||||||
if (!(instr.vector_operands[0].GetAbsoluteIdenticalComponents(
|
if (!(instr.vector_operands[0].GetIdenticalComponents(
|
||||||
instr.vector_operands[1]) &
|
instr.vector_operands[1]) &
|
||||||
0b0010)) {
|
0b0010)) {
|
||||||
|
// Shader Model 3: +-0 or denormal * anything = +0.
|
||||||
DxbcOpMin(DxbcDest::R(system_temp_result_, 0b0100),
|
DxbcOpMin(DxbcDest::R(system_temp_result_, 0b0100),
|
||||||
operands[0].SelectFromSwizzled(1).Abs(),
|
operands[0].SelectFromSwizzled(1).Abs(),
|
||||||
operands[1].SelectFromSwizzled(1).Abs());
|
operands[1].SelectFromSwizzled(1).Abs());
|
||||||
|
@ -700,8 +672,7 @@ void DxbcShaderTranslator::ProcessScalarAluOperation(
|
||||||
DxbcOpMul(ps_dest, operand_0_a, operand_0_b);
|
DxbcOpMul(ps_dest, operand_0_a, operand_0_b);
|
||||||
if (instr.scalar_operands[0].components[0] !=
|
if (instr.scalar_operands[0].components[0] !=
|
||||||
instr.scalar_operands[0].components[1]) {
|
instr.scalar_operands[0].components[1]) {
|
||||||
// Shader Model 3: 0 or denormal * anything = 0.
|
// Shader Model 3: +-0 or denormal * anything = +0.
|
||||||
// FIXME(Triang3l): Signed zero needs research and handling.
|
|
||||||
uint32_t is_zero_temp = PushSystemTemp();
|
uint32_t is_zero_temp = PushSystemTemp();
|
||||||
DxbcOpMin(DxbcDest::R(is_zero_temp, 0b0001), operand_0_a.Abs(),
|
DxbcOpMin(DxbcDest::R(is_zero_temp, 0b0001), operand_0_a.Abs(),
|
||||||
operand_0_b.Abs());
|
operand_0_b.Abs());
|
||||||
|
@ -714,58 +685,50 @@ void DxbcShaderTranslator::ProcessScalarAluOperation(
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case AluScalarOpcode::kMulsPrev: {
|
case AluScalarOpcode::kMulsPrev:
|
||||||
// Shader Model 3: 0 or denormal * anything = 0.
|
|
||||||
// FIXME(Triang3l): Signed zero needs research and handling.
|
|
||||||
uint32_t is_zero_temp = PushSystemTemp();
|
|
||||||
DxbcOpMin(DxbcDest::R(is_zero_temp, 0b0001), operand_0_a.Abs(),
|
|
||||||
ps_src.Abs());
|
|
||||||
// min isn't required to flush denormals, eq is.
|
|
||||||
DxbcOpEq(DxbcDest::R(is_zero_temp, 0b0001),
|
|
||||||
DxbcSrc::R(is_zero_temp, DxbcSrc::kXXXX), DxbcSrc::LF(0.0f));
|
|
||||||
DxbcOpMul(ps_dest, operand_0_a, ps_src);
|
|
||||||
DxbcOpMovC(ps_dest, DxbcSrc::R(is_zero_temp, DxbcSrc::kXXXX),
|
|
||||||
DxbcSrc::LF(0.0f), ps_src);
|
|
||||||
// Release is_zero_temp.
|
|
||||||
PopSystemTemp();
|
|
||||||
} break;
|
|
||||||
case AluScalarOpcode::kMulsPrev2: {
|
case AluScalarOpcode::kMulsPrev2: {
|
||||||
uint32_t test_temp = PushSystemTemp();
|
uint32_t test_temp = PushSystemTemp();
|
||||||
// Check if need to select the src0.a * ps case.
|
if (instr.scalar_opcode == AluScalarOpcode::kMulsPrev2) {
|
||||||
// ps != -FLT_MAX.
|
// Check if need to select the src0.a * ps case.
|
||||||
DxbcOpNE(DxbcDest::R(test_temp, 0b0001), ps_src, DxbcSrc::LF(-FLT_MAX));
|
// ps != -FLT_MAX.
|
||||||
// isfinite(ps), or |ps| <= FLT_MAX, or -|ps| >= -FLT_MAX, since -FLT_MAX
|
DxbcOpNE(DxbcDest::R(test_temp, 0b0001), ps_src, DxbcSrc::LF(-FLT_MAX));
|
||||||
// is already loaded to an SGPR, this is also false if it's NaN.
|
// isfinite(ps), or |ps| <= FLT_MAX, or -|ps| >= -FLT_MAX, since
|
||||||
DxbcOpGE(DxbcDest::R(test_temp, 0b0010), -ps_src.Abs(),
|
// -FLT_MAX is already loaded to an SGPR, this is also false if it's
|
||||||
DxbcSrc::LF(-FLT_MAX));
|
// NaN.
|
||||||
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
|
DxbcOpGE(DxbcDest::R(test_temp, 0b0010), -ps_src.Abs(),
|
||||||
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
|
DxbcSrc::LF(-FLT_MAX));
|
||||||
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
|
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
|
||||||
// isfinite(src0.b).
|
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
|
||||||
DxbcOpGE(DxbcDest::R(test_temp, 0b0010), -operand_0_b.Abs(),
|
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
|
||||||
DxbcSrc::LF(-FLT_MAX));
|
// isfinite(src0.b).
|
||||||
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
|
DxbcOpGE(DxbcDest::R(test_temp, 0b0010), -operand_0_b.Abs(),
|
||||||
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
|
DxbcSrc::LF(-FLT_MAX));
|
||||||
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
|
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
|
||||||
// src0.b > 0 (need !(src0.b <= 0), but src0.b has already been checked
|
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
|
||||||
// for NaN).
|
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
|
||||||
DxbcOpLT(DxbcDest::R(test_temp, 0b0010), DxbcSrc::LF(0.0f), operand_0_b);
|
// src0.b > 0 (need !(src0.b <= 0), but src0.b has already been checked
|
||||||
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
|
// for NaN).
|
||||||
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
|
DxbcOpLT(DxbcDest::R(test_temp, 0b0010), DxbcSrc::LF(0.0f),
|
||||||
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
|
operand_0_b);
|
||||||
DxbcOpIf(true, DxbcSrc::R(test_temp, DxbcSrc::kXXXX));
|
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
|
||||||
// Shader Model 3: 0 or denormal * anything = 0.
|
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
|
||||||
// ps is already known to be not NaN or Infinity, so multiplying it by 0
|
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
|
||||||
// will result in 0. However, src0.a can be anything, so the result should
|
DxbcOpIf(true, DxbcSrc::R(test_temp, DxbcSrc::kXXXX));
|
||||||
// be zero if ps is zero.
|
}
|
||||||
// FIXME(Triang3l): Signed zero needs research and handling.
|
// Shader Model 3: +-0 or denormal * anything = +0.
|
||||||
DxbcOpEq(DxbcDest::R(test_temp, 0b0001), ps_src, DxbcSrc::LF(0.0f));
|
DxbcOpMin(DxbcDest::R(test_temp, 0b0001), operand_0_a.Abs(),
|
||||||
|
ps_src.Abs());
|
||||||
|
// min isn't required to flush denormals, eq is.
|
||||||
|
DxbcOpEq(DxbcDest::R(test_temp, 0b0001),
|
||||||
|
DxbcSrc::R(test_temp, DxbcSrc::kXXXX), DxbcSrc::LF(0.0f));
|
||||||
DxbcOpMul(ps_dest, operand_0_a, ps_src);
|
DxbcOpMul(ps_dest, operand_0_a, ps_src);
|
||||||
DxbcOpMovC(ps_dest, DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
|
DxbcOpMovC(ps_dest, DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
|
||||||
DxbcSrc::LF(0.0f), ps_src);
|
DxbcSrc::LF(0.0f), ps_src);
|
||||||
DxbcOpElse();
|
if (instr.scalar_opcode == AluScalarOpcode::kMulsPrev2) {
|
||||||
DxbcOpMov(ps_dest, DxbcSrc::LF(-FLT_MAX));
|
DxbcOpElse();
|
||||||
DxbcOpEndIf();
|
DxbcOpMov(ps_dest, DxbcSrc::LF(-FLT_MAX));
|
||||||
|
DxbcOpEndIf();
|
||||||
|
}
|
||||||
// Release test_temp.
|
// Release test_temp.
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
} break;
|
} break;
|
||||||
|
@ -1023,11 +986,10 @@ void DxbcShaderTranslator::ProcessScalarAluOperation(
|
||||||
case AluScalarOpcode::kMulsc0:
|
case AluScalarOpcode::kMulsc0:
|
||||||
case AluScalarOpcode::kMulsc1:
|
case AluScalarOpcode::kMulsc1:
|
||||||
DxbcOpMul(ps_dest, operand_0_a, operand_1);
|
DxbcOpMul(ps_dest, operand_0_a, operand_1);
|
||||||
if (!(instr.scalar_operands[0].GetAbsoluteIdenticalComponents(
|
if (!(instr.scalar_operands[0].GetIdenticalComponents(
|
||||||
instr.scalar_operands[1]) &
|
instr.scalar_operands[1]) &
|
||||||
0b0001)) {
|
0b0001)) {
|
||||||
// Shader Model 3: 0 or denormal * anything = 0.
|
// Shader Model 3: +-0 or denormal * anything = +0.
|
||||||
// FIXME(Triang3l): Signed zero needs research and handling.
|
|
||||||
uint32_t is_zero_temp = PushSystemTemp();
|
uint32_t is_zero_temp = PushSystemTemp();
|
||||||
DxbcOpMin(DxbcDest::R(is_zero_temp, 0b0001), operand_0_a.Abs(),
|
DxbcOpMin(DxbcDest::R(is_zero_temp, 0b0001), operand_0_a.Abs(),
|
||||||
operand_1.Abs());
|
operand_1.Abs());
|
||||||
|
|
|
@ -235,14 +235,18 @@ struct InstructionOperand {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns which components of two operands are identical, but may have
|
// Returns which components of two operands will always be bitwise equal
|
||||||
// different signs (for simplicity of usage with GetComponent, treating the
|
// (disregarding component_count for simplicity of usage with GetComponent,
|
||||||
// rightmost component as replicated).
|
// treating the rightmost component as replicated). This, strictly with all
|
||||||
uint32_t GetAbsoluteIdenticalComponents(
|
// conditions, must be used when emulating Shader Model 3 +-0 * x = +0
|
||||||
const InstructionOperand& other) const {
|
// multiplication behavior with IEEE-compliant multiplication (because
|
||||||
|
// -0 * |-0|, or -0 * +0, is -0, while the result must be +0).
|
||||||
|
uint32_t GetIdenticalComponents(const InstructionOperand& other) const {
|
||||||
if (storage_source != other.storage_source ||
|
if (storage_source != other.storage_source ||
|
||||||
storage_index != other.storage_index ||
|
storage_index != other.storage_index ||
|
||||||
storage_addressing_mode != other.storage_addressing_mode) {
|
storage_addressing_mode != other.storage_addressing_mode ||
|
||||||
|
is_negated != other.is_negated ||
|
||||||
|
is_absolute_value != other.is_absolute_value) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
uint32_t identical_components = 0;
|
uint32_t identical_components = 0;
|
||||||
|
@ -252,16 +256,6 @@ struct InstructionOperand {
|
||||||
}
|
}
|
||||||
return identical_components;
|
return identical_components;
|
||||||
}
|
}
|
||||||
// Returns which components of two operands will always be bitwise equal, but
|
|
||||||
// may have different signs (disregarding component_count for simplicity of
|
|
||||||
// usage with GetComponent, treating the rightmost component as replicated).
|
|
||||||
uint32_t GetIdenticalComponents(const InstructionOperand& other) const {
|
|
||||||
if (is_negated != other.is_negated ||
|
|
||||||
is_absolute_value != other.is_absolute_value) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return GetAbsoluteIdenticalComponents(other);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ParsedExecInstruction {
|
struct ParsedExecInstruction {
|
||||||
|
|
|
@ -800,13 +800,26 @@ static_assert_size(TextureFetchInstruction, 12);
|
||||||
// Both are valid only within the current ALU clause. They are not modified
|
// Both are valid only within the current ALU clause. They are not modified
|
||||||
// when the instruction that would write them fails its predication check.
|
// when the instruction that would write them fails its predication check.
|
||||||
// - Direct3D 9 rules (like in GCN v_*_legacy_f32 instructions) for
|
// - Direct3D 9 rules (like in GCN v_*_legacy_f32 instructions) for
|
||||||
// multiplication (0 or denormal * anything = 0) wherever it's present (mul,
|
// multiplication (+-0 or denormal * anything = +0) wherever it's present
|
||||||
// mad, dp, etc.) and for NaN in min/max. It's very important to respect this
|
// (mul, mad, dp, etc.) and for NaN in min/max. It's very important to respect
|
||||||
// rule for multiplication, as games often rely on it in vector normalization
|
// this rule for multiplication, as games often rely on it in vector
|
||||||
// (rcp and mul), Infinity * 0 resulting in NaN breaks a lot of things in
|
// normalization (rcp and mul), Infinity * 0 resulting in NaN breaks a lot of
|
||||||
// games - causes white screen in Halo 3, white specular on characters in GTA
|
// things in games - causes white screen in Halo 3, white specular on
|
||||||
// IV.
|
// characters in GTA IV. The result is always positive zero in this case, no
|
||||||
// TODO(Triang3l): Investigate signed zero handling in multiplication.
|
// matter what the signs of the other operands are, according to R5xx
|
||||||
|
// Acceleration section 8.7.5 "Legacy multiply behavior" and testing on
|
||||||
|
// Adreno 200. This means that the following need to be taken into account
|
||||||
|
// (according to 8.7.2 "ALU Non-Transcendental Floating Point"):
|
||||||
|
// - +0 * -0 is -0 with IEEE conformance, however, with this legacy SM3
|
||||||
|
// handling, it should result in +0.
|
||||||
|
// - +0 + -0 is +0, so multiply-add should not be replaced with conditional
|
||||||
|
// move of the third operand in case of zero multiplicands, because the term
|
||||||
|
// may be -0, while the result should be +0 in this case.
|
||||||
|
// http://developer.amd.com/wordpress/media/2013/10/R5xx_Acceleration_v1.5.pdf
|
||||||
|
// Multiply-add also appears to be not fused (the SM3 behavior instruction on
|
||||||
|
// GCN is called v_mad_legacy_f32, not v_fma_legacy_f32) - shader translators
|
||||||
|
// should not use instructions that may be interpreted by the host GPU as
|
||||||
|
// fused multiply-add.
|
||||||
|
|
||||||
enum class AluScalarOpcode : uint32_t {
|
enum class AluScalarOpcode : uint32_t {
|
||||||
// Floating-Point Add
|
// Floating-Point Add
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
# Copyright 2015 Ben Vanik. All Rights Reserved.
|
# Copyright 2015 Ben Vanik. All Rights Reserved.
|
||||||
|
|
||||||
|
@ -107,13 +107,14 @@ def has_bin(bin):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def shell_call(command, throw_on_error=True, stdout_path=None):
|
def shell_call(command, throw_on_error=True, stdout_path=None, stderr_path=None, shell=False):
|
||||||
"""Executes a shell command.
|
"""Executes a shell command.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
command: Command to execute, as a list of parameters.
|
command: Command to execute, as a list of parameters.
|
||||||
throw_on_error: Whether to throw an error or return the status code.
|
throw_on_error: Whether to throw an error or return the status code.
|
||||||
stdout_path: File path to write stdout output to.
|
stdout_path: File path to write stdout output to.
|
||||||
|
stderr_path: File path to write stderr output to.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
If throw_on_error is False the status code of the call will be returned.
|
If throw_on_error is False the status code of the call will be returned.
|
||||||
|
@ -121,17 +122,22 @@ def shell_call(command, throw_on_error=True, stdout_path=None):
|
||||||
stdout_file = None
|
stdout_file = None
|
||||||
if stdout_path:
|
if stdout_path:
|
||||||
stdout_file = open(stdout_path, 'w')
|
stdout_file = open(stdout_path, 'w')
|
||||||
|
stderr_file = None
|
||||||
|
if stderr_path:
|
||||||
|
stderr_file = open(stderr_path, 'w')
|
||||||
result = 0
|
result = 0
|
||||||
try:
|
try:
|
||||||
if throw_on_error:
|
if throw_on_error:
|
||||||
result = 1
|
result = 1
|
||||||
subprocess.check_call(command, shell=False, stdout=stdout_file)
|
subprocess.check_call(command, shell=shell, stdout=stdout_file, stderr=stderr_file)
|
||||||
result = 0
|
result = 0
|
||||||
else:
|
else:
|
||||||
result = subprocess.call(command, shell=False, stdout=stdout_file)
|
result = subprocess.call(command, shell=shell, stdout=stdout_file, stderr=stderr_file)
|
||||||
finally:
|
finally:
|
||||||
if stdout_file:
|
if stdout_file:
|
||||||
stdout_file.close()
|
stdout_file.close()
|
||||||
|
if stderr_file:
|
||||||
|
stderr_file.close()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@ -196,42 +202,5 @@ def import_subprocess_environment(args):
|
||||||
os.environ[var.upper()] = setting
|
os.environ[var.upper()] = setting
|
||||||
break
|
break
|
||||||
|
|
||||||
def git_submodule_update():
|
|
||||||
"""Runs a full recursive git submodule init and update.
|
|
||||||
|
|
||||||
Older versions of git do not support 'update --init --recursive'. We could
|
|
||||||
check and run it on versions that do support it and speed things up a bit.
|
|
||||||
"""
|
|
||||||
if True:
|
|
||||||
shell_call([
|
|
||||||
'git',
|
|
||||||
'submodule',
|
|
||||||
'update',
|
|
||||||
'--init',
|
|
||||||
'--recursive',
|
|
||||||
])
|
|
||||||
else:
|
|
||||||
shell_call([
|
|
||||||
'git',
|
|
||||||
'submodule',
|
|
||||||
'init',
|
|
||||||
])
|
|
||||||
shell_call([
|
|
||||||
'git',
|
|
||||||
'submodule',
|
|
||||||
'foreach',
|
|
||||||
'--recursive',
|
|
||||||
'git',
|
|
||||||
'submodule',
|
|
||||||
'init',
|
|
||||||
])
|
|
||||||
shell_call([
|
|
||||||
'git',
|
|
||||||
'submodule',
|
|
||||||
'update',
|
|
||||||
'--recursive',
|
|
||||||
])
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
111
xenia-build
111
xenia-build
|
@ -34,8 +34,11 @@ def main():
|
||||||
|
|
||||||
# Check git exists.
|
# Check git exists.
|
||||||
if not has_bin('git'):
|
if not has_bin('git'):
|
||||||
print('ERROR: git must be installed and on PATH.')
|
print('WARNING: Git should be installed and on PATH. Version info will be omitted from all binaries!')
|
||||||
sys.exit(1)
|
print('')
|
||||||
|
elif not git_is_repository():
|
||||||
|
print('WARNING: The source tree is unversioned. Version info will be omitted from all binaries!')
|
||||||
|
print('')
|
||||||
|
|
||||||
# Check python version.
|
# Check python version.
|
||||||
if not sys.version_info[:2] >= (3, 6):
|
if not sys.version_info[:2] >= (3, 6):
|
||||||
|
@ -185,13 +188,14 @@ def get_bin(binary):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def shell_call(command, throw_on_error=True, stdout_path=None, shell=False):
|
def shell_call(command, throw_on_error=True, stdout_path=None, stderr_path=None, shell=False):
|
||||||
"""Executes a shell command.
|
"""Executes a shell command.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
command: Command to execute, as a list of parameters.
|
command: Command to execute, as a list of parameters.
|
||||||
throw_on_error: Whether to throw an error or return the status code.
|
throw_on_error: Whether to throw an error or return the status code.
|
||||||
stdout_path: File path to write stdout output to.
|
stdout_path: File path to write stdout output to.
|
||||||
|
stderr_path: File path to write stderr output to.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
If throw_on_error is False the status code of the call will be returned.
|
If throw_on_error is False the status code of the call will be returned.
|
||||||
|
@ -199,21 +203,49 @@ def shell_call(command, throw_on_error=True, stdout_path=None, shell=False):
|
||||||
stdout_file = None
|
stdout_file = None
|
||||||
if stdout_path:
|
if stdout_path:
|
||||||
stdout_file = open(stdout_path, 'w')
|
stdout_file = open(stdout_path, 'w')
|
||||||
|
stderr_file = None
|
||||||
|
if stderr_path:
|
||||||
|
stderr_file = open(stderr_path, 'w')
|
||||||
result = 0
|
result = 0
|
||||||
try:
|
try:
|
||||||
if throw_on_error:
|
if throw_on_error:
|
||||||
result = 1
|
result = 1
|
||||||
subprocess.check_call(command, shell=shell, stdout=stdout_file)
|
subprocess.check_call(command, shell=shell, stdout=stdout_file, stderr=stderr_file)
|
||||||
result = 0
|
result = 0
|
||||||
else:
|
else:
|
||||||
result = subprocess.call(command, shell=shell, stdout=stdout_file)
|
result = subprocess.call(command, shell=shell, stdout=stdout_file, stderr=stderr_file)
|
||||||
finally:
|
finally:
|
||||||
if stdout_file:
|
if stdout_file:
|
||||||
stdout_file.close()
|
stdout_file.close()
|
||||||
|
if stderr_file:
|
||||||
|
stderr_file.close()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def get_git_head_info():
|
def generate_version_h():
|
||||||
|
"""Generates a build/version.h file that contains current git info.
|
||||||
|
"""
|
||||||
|
if git_is_repository():
|
||||||
|
(branch_name, commit, commit_short) = git_get_head_info()
|
||||||
|
else:
|
||||||
|
branch_name = 'tarball'
|
||||||
|
commit = ':(-dont-do-this'
|
||||||
|
commit_short = ':('
|
||||||
|
|
||||||
|
contents = '''// Autogenerated by `xb premake`.
|
||||||
|
#ifndef GENERATED_VERSION_H_
|
||||||
|
#define GENERATED_VERSION_H_
|
||||||
|
#define XE_BUILD_BRANCH "%s"
|
||||||
|
#define XE_BUILD_COMMIT "%s"
|
||||||
|
#define XE_BUILD_COMMIT_SHORT "%s"
|
||||||
|
#define XE_BUILD_DATE __DATE__
|
||||||
|
#endif // GENERATED_VERSION_H_
|
||||||
|
''' % (branch_name, commit, commit_short)
|
||||||
|
with open('build/version.h', 'w') as f:
|
||||||
|
f.write(contents)
|
||||||
|
|
||||||
|
|
||||||
|
def git_get_head_info():
|
||||||
"""Queries the current branch and commit checksum from git.
|
"""Queries the current branch and commit checksum from git.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
@ -247,58 +279,28 @@ def get_git_head_info():
|
||||||
return branch_name, commit, commit_short
|
return branch_name, commit, commit_short
|
||||||
|
|
||||||
|
|
||||||
def generate_version_h():
|
def git_is_repository():
|
||||||
"""Generates a build/version.h file that contains current git info.
|
"""Checks if git is available and this source tree is versioned.
|
||||||
"""
|
"""
|
||||||
(branch_name, commit, commit_short) = get_git_head_info()
|
if not has_bin('git'):
|
||||||
contents = '''// Autogenerated by `xb premake`.
|
return False
|
||||||
#ifndef GENERATED_VERSION_H_
|
return shell_call([
|
||||||
#define GENERATED_VERSION_H_
|
'git',
|
||||||
#define XE_BUILD_BRANCH "%s"
|
'rev-parse',
|
||||||
#define XE_BUILD_COMMIT "%s"
|
'--is-inside-work-tree',
|
||||||
#define XE_BUILD_COMMIT_SHORT "%s"
|
], throw_on_error=False, stdout_path=os.devnull, stderr_path=os.devnull) == 0
|
||||||
#define XE_BUILD_DATE __DATE__
|
|
||||||
#endif // GENERATED_VERSION_H_
|
|
||||||
''' % (branch_name, commit, commit_short)
|
|
||||||
with open('build/version.h', 'w') as f:
|
|
||||||
f.write(contents)
|
|
||||||
|
|
||||||
|
|
||||||
def git_submodule_update():
|
def git_submodule_update():
|
||||||
"""Runs a full recursive git submodule init and update.
|
"""Runs a full recursive git submodule init and update.
|
||||||
|
|
||||||
Older versions of git do not support 'update --init --recursive'. We could
|
|
||||||
check and run it on versions that do support it and speed things up a bit.
|
|
||||||
"""
|
"""
|
||||||
if True:
|
shell_call([
|
||||||
shell_call([
|
'git',
|
||||||
'git',
|
'submodule',
|
||||||
'submodule',
|
'update',
|
||||||
'update',
|
'--init',
|
||||||
'--init',
|
'--recursive',
|
||||||
'--recursive',
|
])
|
||||||
])
|
|
||||||
else:
|
|
||||||
shell_call([
|
|
||||||
'git',
|
|
||||||
'submodule',
|
|
||||||
'init',
|
|
||||||
])
|
|
||||||
shell_call([
|
|
||||||
'git',
|
|
||||||
'submodule',
|
|
||||||
'foreach',
|
|
||||||
'--recursive',
|
|
||||||
'git',
|
|
||||||
'submodule',
|
|
||||||
'init',
|
|
||||||
])
|
|
||||||
shell_call([
|
|
||||||
'git',
|
|
||||||
'submodule',
|
|
||||||
'update',
|
|
||||||
'--recursive',
|
|
||||||
])
|
|
||||||
|
|
||||||
|
|
||||||
def get_clang_format_binary():
|
def get_clang_format_binary():
|
||||||
|
@ -491,7 +493,10 @@ class SetupCommand(Command):
|
||||||
|
|
||||||
# Setup submodules.
|
# Setup submodules.
|
||||||
print('- git submodule init / update...')
|
print('- git submodule init / update...')
|
||||||
git_submodule_update()
|
if git_is_repository():
|
||||||
|
git_submodule_update()
|
||||||
|
else:
|
||||||
|
print('WARNING: Git not available or not a repository. Dependencies may be missing.')
|
||||||
print('')
|
print('')
|
||||||
|
|
||||||
print('- running premake...')
|
print('- running premake...')
|
||||||
|
|
Loading…
Reference in New Issue