Merge pull request #514 from DrChat/x64_speedups

X64 Speedups
This commit is contained in:
Ben Vanik 2016-01-13 18:17:07 -08:00
commit 0e11111326
22 changed files with 1519 additions and 529 deletions

View File

@ -2,6 +2,31 @@
<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
<!-- Automatically convert endianness for xe::be -->
<Type Name="xe::be&lt;unsigned __int64&gt;">
<DisplayString>
{(((value &amp; 0xFF00000000000000) &gt;&gt; 56) |
((value &amp; 0x00FF000000000000) &gt;&gt; 40) |
((value &amp; 0x0000FF0000000000) &gt;&gt; 24) |
((value &amp; 0x000000FF00000000) &gt;&gt; 8 ) |
((value &amp; 0x00000000FF000000) &lt;&lt; 8 ) |
((value &amp; 0x0000000000FF0000) &lt;&lt; 24) |
((value &amp; 0x000000000000FF00) &lt;&lt; 40) |
((value &amp; 0x00000000000000FF) &lt;&lt; 56))}
</DisplayString>
</Type>
<Type Name="xe::be&lt;__int64&gt;">
<DisplayString>
{(((value &amp; 0xFF00000000000000) &gt;&gt; 56) |
((value &amp; 0x00FF000000000000) &gt;&gt; 40) |
((value &amp; 0x0000FF0000000000) &gt;&gt; 24) |
((value &amp; 0x000000FF00000000) &gt;&gt; 8 ) |
((value &amp; 0x00000000FF000000) &lt;&lt; 8 ) |
((value &amp; 0x0000000000FF0000) &lt;&lt; 24) |
((value &amp; 0x000000000000FF00) &lt;&lt; 40) |
((value &amp; 0x00000000000000FF) &lt;&lt; 56))}
</DisplayString>
</Type>
<Type Name="xe::be&lt;unsigned int&gt;">
<DisplayString>
{(((value &amp; 0xFF000000) &gt;&gt; 24) |

View File

@ -105,12 +105,54 @@ typedef struct alignas(16) vec128_s {
};
};
vec128_s() = default;
vec128_s(const vec128_s& other) {
high = other.high;
low = other.low;
}
vec128_s& operator=(const vec128_s& b) {
high = b.high;
low = b.low;
return *this;
}
bool operator==(const vec128_s& b) const {
return low == b.low && high == b.high;
}
bool operator!=(const vec128_s& b) const {
return low != b.low || high != b.high;
}
vec128_s operator^(const vec128_s& b) const {
vec128_s a = *this;
a.high ^= b.high;
a.low ^= b.low;
return a;
};
vec128_s& operator^=(const vec128_s& b) {
*this = *this ^ b;
return *this;
};
vec128_s operator&(const vec128_s& b) const {
vec128_s a = *this;
a.high &= b.high;
a.low &= b.low;
return a;
};
vec128_s& operator&=(const vec128_s& b) {
*this = *this & b;
return *this;
};
vec128_s operator|(const vec128_s& b) const {
vec128_s a = *this;
a.high |= b.high;
a.low |= b.low;
return a;
};
vec128_s& operator|=(const vec128_s& b) {
*this = *this | b;
return *this;
};
} vec128_t;
static inline vec128_t vec128i(uint32_t src) {

View File

@ -70,7 +70,7 @@ class X64CodeCache : public CodeCache {
// This is picked to be high enough to cover whatever we can reasonably
// expect. If we hit issues with this it probably means some corner case
// in analysis triggering.
static const size_t kMaximumFunctionCount = 30000;
static const size_t kMaximumFunctionCount = 50000;
struct UnwindReservation {
size_t data_size = 0;

File diff suppressed because it is too large Load Diff

View File

@ -161,6 +161,13 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) {
i->Remove();
}
break;
case OPCODE_ROUND:
if (i->src1.value->IsConstant()) {
v->set_from(i->src1.value);
v->Round(RoundMode(i->flags));
i->Remove();
}
break;
case OPCODE_ZERO_EXTEND:
if (i->src1.value->IsConstant()) {
TypeName target_type = v->type;
@ -188,6 +195,7 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) {
case OPCODE_LOAD:
if (i->src1.value->IsConstant()) {
assert_false(i->flags & LOAD_STORE_BYTE_SWAP);
auto memory = processor_->memory();
auto address = i->src1.value->constant.i32;
auto mmio_range =
@ -253,12 +261,23 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) {
case OPCODE_SELECT:
if (i->src1.value->IsConstant()) {
if (i->src1.value->IsConstantTrue()) {
v->set_from(i->src2.value);
if (i->src1.value->type != VEC128_TYPE) {
if (i->src1.value->IsConstantTrue()) {
v->set_from(i->src2.value);
i->Remove();
} else if (i->src1.value->IsConstantFalse()) {
v->set_from(i->src3.value);
i->Remove();
} else if (i->src2.value->IsConstant() &&
i->src3.value->IsConstant()) {
// TODO: Select
// v->set_from(i->src2.value);
// v->Select(i->src3.value, i->src1.value);
// i->Remove();
}
} else {
v->set_from(i->src3.value);
// TODO: vec128 select
}
i->Remove();
}
break;
case OPCODE_IS_TRUE:
@ -355,7 +374,7 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) {
break;
case OPCODE_DID_SATURATE:
assert_true(!i->src1.value->IsConstant());
// assert_true(!i->src1.value->IsConstant());
break;
case OPCODE_ADD:
@ -413,8 +432,33 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) {
i->Remove();
}
break;
// case OPCODE_MUL_ADD:
// case OPCODE_MUL_SUB
case OPCODE_MUL_ADD:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
// Multiply part is constant.
if (i->src3.value->IsConstant()) {
v->set_from(i->src1.value);
Value::MulAdd(v, i->src1.value, i->src2.value, i->src3.value);
i->Remove();
}
}
break;
case OPCODE_MUL_SUB:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
// Multiply part is constant.
if (i->src3.value->IsConstant()) {
v->set_from(i->src1.value);
Value::MulSub(v, i->src1.value, i->src2.value, i->src3.value);
i->Remove();
}
}
break;
case OPCODE_MAX:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value);
v->Max(i->src2.value);
i->Remove();
}
break;
case OPCODE_NEG:
if (i->src1.value->IsConstant()) {
v->set_from(i->src1.value);
@ -484,7 +528,6 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) {
i->Remove();
}
break;
// TODO(benvanik): VECTOR_SHL
case OPCODE_SHR:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value);
@ -515,13 +558,80 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) {
}
break;
// TODO(benvanik): INSERT/EXTRACT
// TODO(benvanik): SPLAT/PERMUTE/SWIZZLE
case OPCODE_SPLAT:
if (i->src1.value->IsConstant()) {
// Quite a few of these, from building vec128s.
// TODO(benvanik): PERMUTE/SWIZZLE
case OPCODE_EXTRACT:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_zero(v->type);
v->Extract(i->src1.value, i->src2.value);
i->Remove();
}
break;
case OPCODE_SPLAT:
if (i->src1.value->IsConstant()) {
v->set_zero(v->type);
v->Splat(i->src1.value);
i->Remove();
}
break;
case OPCODE_VECTOR_COMPARE_EQ:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value);
v->VectorCompareEQ(i->src2.value, hir::TypeName(i->flags));
i->Remove();
}
break;
case OPCODE_VECTOR_COMPARE_SGT:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value);
v->VectorCompareSGT(i->src2.value, hir::TypeName(i->flags));
i->Remove();
}
break;
case OPCODE_VECTOR_CONVERT_F2I:
if (i->src1.value->IsConstant()) {
v->set_zero(VEC128_TYPE);
v->VectorConvertF2I(i->src1.value);
i->Remove();
}
break;
case OPCODE_VECTOR_CONVERT_I2F:
if (i->src1.value->IsConstant()) {
v->set_zero(VEC128_TYPE);
v->VectorConvertI2F(i->src1.value);
i->Remove();
}
break;
case OPCODE_VECTOR_SHL:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value);
v->VectorShl(i->src2.value, hir::TypeName(i->flags));
i->Remove();
}
break;
case OPCODE_VECTOR_SHR:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value);
v->VectorShr(i->src2.value, hir::TypeName(i->flags));
i->Remove();
}
break;
case OPCODE_VECTOR_ROTATE_LEFT:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value);
v->VectorRol(i->src2.value, hir::TypeName(i->flags));
i->Remove();
}
break;
case OPCODE_VECTOR_SUB:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value);
uint32_t arith_flags = i->flags >> 8;
v->VectorSub(i->src2.value, hir::TypeName(i->flags & 0xFF),
!!(arith_flags & ARITHMETIC_UNSIGNED),
!!(arith_flags & ARITHMETIC_SATURATE));
i->Remove();
}
break;
default:
// Ignored.
break;

View File

@ -28,6 +28,10 @@ DEFINE_bool(trace_function_references, false,
DEFINE_bool(trace_function_data, false,
"Generate tracing for function result data.");
DEFINE_bool(
disable_global_lock, false,
"Disables global lock usage in guest code. Does not affect host code.");
DEFINE_bool(validate_hir, false,
"Perform validation checks on the HIR during compilation.");

View File

@ -23,6 +23,8 @@ DECLARE_bool(trace_function_coverage);
DECLARE_bool(trace_function_references);
DECLARE_bool(trace_function_data);
DECLARE_bool(disable_global_lock);
DECLARE_bool(validate_hir);
DECLARE_uint64(break_on_instruction);

View File

@ -2074,6 +2074,17 @@ Value* HIRBuilder::AtomicExchange(Value* address, Value* new_value) {
return i->dest;
}
Value* HIRBuilder::AtomicCompareExchange(Value* address, Value* old_value,
Value* new_value) {
ASSERT_ADDRESS_TYPE(address);
Instr* i = AppendInstr(OPCODE_ATOMIC_COMPARE_EXCHANGE_info, 0,
AllocValue(INT8_TYPE));
i->set_src1(address);
i->set_src2(old_value);
i->set_src3(new_value);
return i->dest;
}
} // namespace hir
} // namespace cpu
} // namespace xe

View File

@ -236,6 +236,8 @@ class HIRBuilder {
Value* Unpack(Value* value, uint32_t pack_flags = 0);
Value* AtomicExchange(Value* address, Value* new_value);
Value* AtomicCompareExchange(Value* address, Value* old_value,
Value* new_value);
Value* AtomicAdd(Value* address, Value* value);
Value* AtomicSub(Value* address, Value* value);

View File

@ -76,13 +76,14 @@ enum PackType : uint16_t {
// Special types:
PACK_TYPE_D3DCOLOR = 0,
PACK_TYPE_FLOAT16_2 = 1,
PACK_TYPE_FLOAT16_4 = 2,
PACK_TYPE_SHORT_2 = 3,
PACK_TYPE_UINT_2101010 = 4,
PACK_TYPE_FLOAT16_3 = 2, // FIXME: Not verified, but looks correct.
PACK_TYPE_FLOAT16_4 = 3,
PACK_TYPE_SHORT_2 = 4,
PACK_TYPE_UINT_2101010 = 5,
// Types which use the bitmasks below for configuration:
PACK_TYPE_8_IN_16 = 5,
PACK_TYPE_16_IN_32 = 6,
PACK_TYPE_8_IN_16 = 6,
PACK_TYPE_16_IN_32 = 7,
PACK_TYPE_MODE = 0x000F, // just to get the mode
@ -220,6 +221,7 @@ enum Opcode {
OPCODE_PACK,
OPCODE_UNPACK,
OPCODE_ATOMIC_EXCHANGE,
OPCODE_ATOMIC_COMPARE_EXCHANGE,
__OPCODE_MAX_VALUE, // Keep at end.
};

View File

@ -631,3 +631,9 @@ DEFINE_OPCODE(
"atomic_exchange",
OPCODE_SIG_V_V_V,
OPCODE_FLAG_VOLATILE)
DEFINE_OPCODE(
OPCODE_ATOMIC_COMPARE_EXCHANGE,
"atomic_compare_exchange",
OPCODE_SIG_V_V_V_V,
OPCODE_FLAG_VOLATILE)

View File

@ -46,13 +46,13 @@ uint32_t Value::AsUint32() {
assert_true(IsConstant());
switch (type) {
case INT8_TYPE:
return constant.i8;
return constant.u8;
case INT16_TYPE:
return constant.i16;
return constant.u16;
case INT32_TYPE:
return constant.i32;
return constant.u32;
case INT64_TYPE:
return (uint32_t)constant.i64;
return (uint32_t)constant.u64;
default:
assert_unhandled_case(type);
return 0;
@ -63,13 +63,13 @@ uint64_t Value::AsUint64() {
assert_true(IsConstant());
switch (type) {
case INT8_TYPE:
return constant.i8;
return constant.u8;
case INT16_TYPE:
return constant.i16;
return constant.u16;
case INT32_TYPE:
return constant.i32;
return constant.u32;
case INT64_TYPE:
return constant.i64;
return constant.u64;
default:
assert_unhandled_case(type);
return 0;
@ -85,15 +85,15 @@ void Value::ZeroExtend(TypeName target_type) {
switch (type) {
case INT8_TYPE:
type = target_type;
constant.i64 = constant.i64 & 0xFF;
constant.u64 = constant.u8;
return;
case INT16_TYPE:
type = target_type;
constant.i64 = constant.i64 & 0xFFFF;
constant.u64 = constant.u16;
return;
case INT32_TYPE:
type = target_type;
constant.i64 = constant.i64 & 0xFFFFFFFF;
constant.u64 = constant.u32;
return;
default:
assert_unhandled_case(type);
@ -210,12 +210,30 @@ void Value::Convert(TypeName target_type, RoundMode round_mode) {
assert_unhandled_case(target_type);
return;
}
case INT64_TYPE:
switch (target_type) {
case FLOAT64_TYPE:
type = target_type;
constant.f64 = (double)constant.i64;
return;
default:
assert_unhandled_case(target_type);
return;
}
case FLOAT64_TYPE:
switch (target_type) {
case FLOAT32_TYPE:
type = target_type;
constant.f32 = (float)constant.f64;
return;
case INT32_TYPE:
type = target_type;
constant.i32 = (int32_t)constant.f64;
return;
case INT64_TYPE:
type = target_type;
constant.i64 = (int64_t)constant.f64;
return;
default:
assert_unhandled_case(target_type);
return;
@ -227,8 +245,28 @@ void Value::Convert(TypeName target_type, RoundMode round_mode) {
}
void Value::Round(RoundMode round_mode) {
// TODO(benvanik): big matrix.
assert_always();
switch (type) {
case FLOAT32_TYPE:
switch (round_mode) {
case ROUND_TO_NEAREST:
constant.f32 = std::round(constant.f32);
return;
}
return;
case FLOAT64_TYPE:
return;
case VEC128_TYPE:
for (int i = 0; i < 4; i++) {
switch (round_mode) {
case ROUND_TO_NEAREST:
constant.v128.f32[i] = std::round(constant.v128.f32[i]);
return;
}
}
return;
default:
assert_unhandled_case(type);
}
}
bool Value::Add(Value* other) {
@ -325,6 +363,11 @@ void Value::Mul(Value* other) {
case FLOAT64_TYPE:
constant.f64 *= other->constant.f64;
break;
case VEC128_TYPE:
for (int i = 0; i < 4; i++) {
constant.v128.f32[i] *= other->constant.v128.f32[i];
}
break;
default:
assert_unhandled_case(type);
break;
@ -406,6 +449,32 @@ void Value::Div(Value* other, bool is_unsigned) {
case FLOAT64_TYPE:
constant.f64 /= other->constant.f64;
break;
case VEC128_TYPE:
for (int i = 0; i < 4; i++) {
constant.v128.f32[i] /= other->constant.v128.f32[i];
}
break;
default:
assert_unhandled_case(type);
break;
}
}
void Value::Max(Value* other) {
assert_true(type == other->type);
switch (type) {
case FLOAT32_TYPE:
constant.f32 = std::max(constant.f32, other->constant.f32);
break;
case FLOAT64_TYPE:
constant.f64 = std::max(constant.f64, other->constant.f64);
break;
case VEC128_TYPE:
for (int i = 0; i < 4; i++) {
constant.v128.f32[i] =
std::max(constant.v128.f32[i], other->constant.v128.f32[i]);
}
break;
default:
assert_unhandled_case(type);
break;
@ -413,13 +482,49 @@ void Value::Div(Value* other, bool is_unsigned) {
}
void Value::MulAdd(Value* dest, Value* value1, Value* value2, Value* value3) {
// TODO(benvanik): big matrix.
assert_always();
switch (dest->type) {
case VEC128_TYPE:
for (int i = 0; i < 4; i++) {
dest->constant.v128.f32[i] =
(value1->constant.v128.f32[i] * value2->constant.v128.f32[i]) +
value3->constant.v128.f32[i];
}
break;
case FLOAT32_TYPE:
dest->constant.f32 =
(value1->constant.f32 * value2->constant.f32) + value3->constant.f32;
break;
case FLOAT64_TYPE:
dest->constant.f64 =
(value1->constant.f64 * value2->constant.f64) + value3->constant.f64;
break;
default:
assert_unhandled_case(dest->type);
break;
}
}
void Value::MulSub(Value* dest, Value* value1, Value* value2, Value* value3) {
// TODO(benvanik): big matrix.
assert_always();
switch (dest->type) {
case VEC128_TYPE:
for (int i = 0; i < 4; i++) {
dest->constant.v128.f32[i] =
(value1->constant.v128.f32[i] * value2->constant.v128.f32[i]) -
value3->constant.v128.f32[i];
}
break;
case FLOAT32_TYPE:
dest->constant.f32 =
(value1->constant.f32 * value2->constant.f32) - value3->constant.f32;
break;
case FLOAT64_TYPE:
dest->constant.f64 =
(value1->constant.f64 * value2->constant.f64) - value3->constant.f64;
break;
default:
assert_unhandled_case(dest->type);
break;
}
}
void Value::Neg() {
@ -527,6 +632,9 @@ void Value::And(Value* other) {
case INT64_TYPE:
constant.i64 &= other->constant.i64;
break;
case VEC128_TYPE:
constant.v128 &= other->constant.v128;
break;
default:
assert_unhandled_case(type);
break;
@ -548,6 +656,9 @@ void Value::Or(Value* other) {
case INT64_TYPE:
constant.i64 |= other->constant.i64;
break;
case VEC128_TYPE:
constant.v128 |= other->constant.v128;
break;
default:
assert_unhandled_case(type);
break;
@ -569,6 +680,9 @@ void Value::Xor(Value* other) {
case INT64_TYPE:
constant.i64 ^= other->constant.i64;
break;
case VEC128_TYPE:
constant.v128 ^= other->constant.v128;
break;
default:
assert_unhandled_case(type);
break;
@ -603,16 +717,16 @@ void Value::Shl(Value* other) {
assert_true(other->type == INT8_TYPE);
switch (type) {
case INT8_TYPE:
constant.i8 <<= other->constant.i8;
constant.u8 <<= other->constant.u8;
break;
case INT16_TYPE:
constant.i16 <<= other->constant.i8;
constant.u16 <<= other->constant.u8;
break;
case INT32_TYPE:
constant.i32 <<= other->constant.i8;
constant.u32 <<= other->constant.u8;
break;
case INT64_TYPE:
constant.i64 <<= other->constant.i8;
constant.u64 <<= other->constant.u8;
break;
default:
assert_unhandled_case(type);
@ -624,16 +738,16 @@ void Value::Shr(Value* other) {
assert_true(other->type == INT8_TYPE);
switch (type) {
case INT8_TYPE:
constant.i8 = (uint8_t)constant.i8 >> other->constant.i8;
constant.u8 = constant.u8 >> other->constant.u8;
break;
case INT16_TYPE:
constant.i16 = (uint16_t)constant.i16 >> other->constant.i8;
constant.u16 = constant.u16 >> other->constant.u8;
break;
case INT32_TYPE:
constant.i32 = (uint32_t)constant.i32 >> other->constant.i8;
constant.u32 = constant.u32 >> other->constant.u8;
break;
case INT64_TYPE:
constant.i64 = (uint64_t)constant.i64 >> other->constant.i8;
constant.u64 = constant.u64 >> other->constant.u8;
break;
default:
assert_unhandled_case(type);
@ -645,16 +759,16 @@ void Value::Sha(Value* other) {
assert_true(other->type == INT8_TYPE);
switch (type) {
case INT8_TYPE:
constant.i8 = constant.i8 >> other->constant.i8;
constant.i8 = constant.i8 >> other->constant.u8;
break;
case INT16_TYPE:
constant.i16 = constant.i16 >> other->constant.i8;
constant.i16 = constant.i16 >> other->constant.u8;
break;
case INT32_TYPE:
constant.i32 = constant.i32 >> other->constant.i8;
constant.i32 = constant.i32 >> other->constant.u8;
break;
case INT64_TYPE:
constant.i64 = constant.i64 >> other->constant.i8;
constant.i64 = constant.i64 >> other->constant.u8;
break;
default:
assert_unhandled_case(type);
@ -662,6 +776,246 @@ void Value::Sha(Value* other) {
}
}
void Value::Extract(Value* vec, Value* index) {
assert_true(vec->type == VEC128_TYPE);
switch (type) {
case INT8_TYPE:
constant.u8 = vec->constant.v128.u8[index->constant.u8];
break;
case INT16_TYPE:
constant.u16 = vec->constant.v128.u16[index->constant.u16];
break;
case INT32_TYPE:
constant.u32 = vec->constant.v128.u32[index->constant.u32];
break;
case INT64_TYPE:
constant.u64 = vec->constant.v128.u64[index->constant.u64];
break;
}
}
void Value::Select(Value* other, Value* ctrl) {
// TODO
assert_always();
}
void Value::Splat(Value* other) {
assert_true(type == VEC128_TYPE);
switch (other->type) {
case INT8_TYPE:
for (int i = 0; i < 16; i++) {
constant.v128.i8[i] = other->constant.i8;
}
break;
case INT16_TYPE:
for (int i = 0; i < 8; i++) {
constant.v128.i16[i] = other->constant.i16;
}
break;
case INT32_TYPE:
case FLOAT32_TYPE:
for (int i = 0; i < 4; i++) {
constant.v128.i32[i] = other->constant.i32;
}
break;
case INT64_TYPE:
case FLOAT64_TYPE:
for (int i = 0; i < 2; i++) {
constant.v128.i64[i] = other->constant.i64;
}
break;
default:
assert_unhandled_case(other->type);
break;
}
}
void Value::VectorCompareEQ(Value* other, TypeName type) {
assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE);
switch (type) {
case INT8_TYPE:
for (int i = 0; i < 16; i++) {
constant.v128.u8[i] =
constant.v128.u8[i] == other->constant.v128.u8[i] ? -1 : 0;
}
break;
case INT16_TYPE:
for (int i = 0; i < 8; i++) {
constant.v128.u16[i] =
constant.v128.u16[i] == other->constant.v128.u16[i] ? -1 : 0;
}
break;
case INT32_TYPE:
case FLOAT32_TYPE:
for (int i = 0; i < 4; i++) {
constant.v128.u32[i] =
constant.v128.u32[i] == other->constant.v128.u32[i] ? -1 : 0;
}
break;
case INT64_TYPE:
case FLOAT64_TYPE:
for (int i = 0; i < 2; i++) {
constant.v128.u64[i] =
constant.v128.u64[i] == other->constant.v128.u64[i] ? -1 : 0;
}
break;
default:
assert_unhandled_case(type);
break;
}
}
void Value::VectorCompareSGT(Value* other, TypeName type) {
assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE);
switch (type) {
case INT8_TYPE:
for (int i = 0; i < 16; i++) {
constant.v128.u8[i] =
constant.v128.i8[i] > other->constant.v128.i8[i] ? -1 : 0;
}
break;
case INT16_TYPE:
for (int i = 0; i < 8; i++) {
constant.v128.u16[i] =
constant.v128.i16[i] > other->constant.v128.i16[i] ? -1 : 0;
}
break;
case INT32_TYPE:
for (int i = 0; i < 4; i++) {
constant.v128.u32[i] =
constant.v128.i32[i] > other->constant.v128.i32[i] ? -1 : 0;
}
break;
case FLOAT32_TYPE:
for (int i = 0; i < 4; i++) {
constant.v128.u32[i] =
constant.v128.f32[i] > other->constant.v128.f32[i] ? -1 : 0;
}
break;
case INT64_TYPE:
for (int i = 0; i < 2; i++) {
constant.v128.u64[i] =
constant.v128.i64[i] > other->constant.v128.i64[i] ? -1 : 0;
}
break;
default:
assert_unhandled_case(type);
break;
}
}
void Value::VectorConvertI2F(Value* other) {
assert_true(type == VEC128_TYPE);
for (int i = 0; i < 4; i++) {
constant.v128.f32[i] = (float)other->constant.v128.i32[i];
}
}
void Value::VectorConvertF2I(Value* other) {
assert_true(type == VEC128_TYPE);
for (int i = 0; i < 4; i++) {
constant.v128.i32[i] = (int32_t)other->constant.v128.f32[i];
}
}
void Value::VectorShl(Value* other, TypeName type) {
assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE);
switch (type) {
case INT8_TYPE:
for (int i = 0; i < 16; i++) {
constant.v128.u8[i] <<= other->constant.v128.u8[i] & 0x7;
}
break;
case INT16_TYPE:
for (int i = 0; i < 8; i++) {
constant.v128.u16[i] <<= other->constant.v128.u16[i] & 0xF;
}
break;
case INT32_TYPE:
for (int i = 0; i < 4; i++) {
constant.v128.u32[i] <<= other->constant.v128.u32[i] & 0x1F;
}
break;
default:
assert_unhandled_case(type);
break;
}
}
void Value::VectorShr(Value* other, TypeName type) {
assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE);
switch (type) {
case INT8_TYPE:
for (int i = 0; i < 16; i++) {
constant.v128.u8[i] >>= other->constant.v128.u8[i] & 0x7;
}
break;
case INT16_TYPE:
for (int i = 0; i < 8; i++) {
constant.v128.u16[i] >>= other->constant.v128.u16[i] & 0xF;
}
break;
case INT32_TYPE:
for (int i = 0; i < 4; i++) {
constant.v128.u32[i] >>= other->constant.v128.u32[i] & 0x1F;
}
break;
default:
assert_unhandled_case(type);
break;
}
}
void Value::VectorRol(Value* other, TypeName type) {
assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE);
switch (type) {
case INT8_TYPE:
for (int i = 0; i < 16; i++) {
constant.v128.u8[i] = xe::rotate_left(constant.v128.u8[i],
other->constant.v128.i8[i] & 0x7);
}
break;
case INT16_TYPE:
for (int i = 0; i < 8; i++) {
constant.v128.u16[i] = xe::rotate_left(
constant.v128.u16[i], other->constant.v128.u16[i] & 0xF);
}
break;
case INT32_TYPE:
for (int i = 0; i < 4; i++) {
constant.v128.u32[i] = xe::rotate_left(
constant.v128.u32[i], other->constant.v128.u32[i] & 0x1F);
}
break;
default:
assert_unhandled_case(type);
break;
}
}
void Value::VectorSub(Value* other, TypeName type, bool is_unsigned,
bool saturate) {
assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE);
switch (type) {
case INT32_TYPE:
for (int i = 0; i < 4; i++) {
if (is_unsigned) {
if (saturate) {
assert_always();
} else {
constant.v128.u32[i] -= other->constant.v128.u32[i];
}
} else {
if (saturate) {
assert_always();
} else {
constant.v128.i32[i] -= other->constant.v128.i32[i];
}
}
}
}
}
void Value::ByteSwap() {
switch (type) {
case INT8_TYPE:

View File

@ -77,9 +77,13 @@ class Value {
} Use;
typedef union {
int8_t i8;
uint8_t u8;
int16_t i16;
uint16_t u16;
int32_t i32;
uint32_t u32;
int64_t i64;
uint64_t u64;
float f32;
double f64;
vec128_t v128;
@ -190,6 +194,8 @@ class Value {
return !!constant.f32;
case FLOAT64_TYPE:
return !!constant.f64;
case VEC128_TYPE:
return constant.v128.low || constant.v128.high;
default:
assert_unhandled_case(type);
return false;
@ -199,9 +205,6 @@ class Value {
}
}
bool IsConstantFalse() const {
if (type == VEC128_TYPE) {
assert_always();
}
if (flags & VALUE_IS_CONSTANT) {
switch (type) {
case INT8_TYPE:
@ -216,6 +219,8 @@ class Value {
return !constant.f32;
case FLOAT64_TYPE:
return !constant.f64;
case VEC128_TYPE:
return !(constant.v128.low || constant.v128.high);
default:
assert_unhandled_case(type);
return false;
@ -475,6 +480,7 @@ class Value {
void Mul(Value* other);
void MulHi(Value* other, bool is_unsigned);
void Div(Value* other, bool is_unsigned);
void Max(Value* other);
static void MulAdd(Value* dest, Value* value1, Value* value2, Value* value3);
static void MulSub(Value* dest, Value* value1, Value* value2, Value* value3);
void Neg();
@ -488,6 +494,17 @@ class Value {
void Shl(Value* other);
void Shr(Value* other);
void Sha(Value* other);
void Extract(Value* vec, Value* index);
void Select(Value* other, Value* ctrl);
void Splat(Value* other);
void VectorCompareEQ(Value* other, TypeName type);
void VectorCompareSGT(Value* other, TypeName type);
void VectorConvertI2F(Value* other);
void VectorConvertF2I(Value* other);
void VectorShl(Value* other, TypeName type);
void VectorShr(Value* other, TypeName type);
void VectorRol(Value* other, TypeName type);
void VectorSub(Value* other, TypeName type, bool is_unsigned, bool saturate);
void ByteSwap();
void CountLeadingZeros(const Value* other);
bool Compare(Opcode opcode, Value* other);

View File

@ -423,8 +423,8 @@ typedef struct PPCContext_s {
uint8_t* physical_membase;
// Keep the struct padded out to 64b total.
uint8_t _padding[8];
// Value of last reserved load
uint64_t reserved_val;
static std::string GetRegisterName(PPCRegister reg);
std::string GetStringFromValue(PPCRegister reg) const;

View File

@ -2149,6 +2149,9 @@ int InstrEmit_vupkd3d128(PPCHIRBuilder& f, const InstrData& i) {
case 3: // VPACK_... 2 FLOAT16s DXGI_FORMAT_R16G16_FLOAT
v = f.Unpack(v, PACK_TYPE_FLOAT16_2);
break;
case 4:
v = f.Unpack(v, PACK_TYPE_FLOAT16_3);
break;
case 5: // VPACK_... 4 FLOAT16s DXGI_FORMAT_R16G16B16A16_FLOAT
v = f.Unpack(v, PACK_TYPE_FLOAT16_4);
break;

View File

@ -10,6 +10,7 @@
#include "xenia/cpu/ppc/ppc_emit-private.h"
#include "xenia/base/assert.h"
#include "xenia/cpu/cpu_flags.h"
#include "xenia/cpu/ppc/ppc_context.h"
#include "xenia/cpu/ppc/ppc_frontend.h"
#include "xenia/cpu/ppc/ppc_hir_builder.h"
@ -725,10 +726,14 @@ int InstrEmit_mtmsr(PPCHIRBuilder& f, const InstrData& i) {
f.ZeroExtend(f.ZeroExtend(f.LoadGPR(i.X.RT), INT64_TYPE), INT64_TYPE));
if (i.X.RT == 13) {
// iff storing from r13 we are taking a lock (disable interrupts).
f.CallExtern(f.builtins()->enter_global_lock);
if (!FLAGS_disable_global_lock) {
f.CallExtern(f.builtins()->enter_global_lock);
}
} else {
// Otherwise we are restoring interrupts (probably).
f.CallExtern(f.builtins()->leave_global_lock);
if (!FLAGS_disable_global_lock) {
f.CallExtern(f.builtins()->leave_global_lock);
}
}
return 0;
} else {
@ -746,10 +751,14 @@ int InstrEmit_mtmsrd(PPCHIRBuilder& f, const InstrData& i) {
f.ZeroExtend(f.LoadGPR(i.X.RT), INT64_TYPE));
if (i.X.RT == 13) {
// iff storing from r13 we are taking a lock (disable interrupts).
f.CallExtern(f.builtins()->enter_global_lock);
if (!FLAGS_disable_global_lock) {
f.CallExtern(f.builtins()->enter_global_lock);
}
} else {
// Otherwise we are restoring interrupts (probably).
f.CallExtern(f.builtins()->leave_global_lock);
if (!FLAGS_disable_global_lock) {
f.CallExtern(f.builtins()->leave_global_lock);
}
}
return 0;
} else {

View File

@ -658,6 +658,7 @@ int InstrEmit_ldarx(PPCHIRBuilder& f, const InstrData& i) {
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
Value* rt = f.ByteSwap(f.Load(ea, INT64_TYPE));
f.StoreReserved(rt);
f.StoreGPR(i.X.RT, rt);
return 0;
}
@ -682,6 +683,7 @@ int InstrEmit_lwarx(PPCHIRBuilder& f, const InstrData& i) {
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
Value* rt = f.ZeroExtend(f.ByteSwap(f.Load(ea, INT32_TYPE)), INT64_TYPE);
f.StoreReserved(rt);
f.StoreGPR(i.X.RT, rt);
return 0;
}
@ -700,11 +702,15 @@ int InstrEmit_stdcx(PPCHIRBuilder& f, const InstrData& i) {
// NOTE: we assume we are within a global lock.
// As we have been exclusively executing this entire time, we assume that no
// one else could have possibly touched the memory and must always succeed.
// We use atomic compare exchange here to support reserved load/store without
// being under the global lock (flag disable_global_lock - see mtmsr/mtmsrd).
// This will always succeed if under the global lock, however.
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
Value* rt = f.ByteSwap(f.LoadGPR(i.X.RT));
f.Store(ea, rt);
f.StoreContext(offsetof(PPCContext, cr0.cr0_eq), f.LoadConstantInt8(1));
Value* res = f.ByteSwap(f.LoadReserved());
Value* v = f.AtomicCompareExchange(ea, res, rt);
f.StoreContext(offsetof(PPCContext, cr0.cr0_eq), v);
f.StoreContext(offsetof(PPCContext, cr0.cr0_lt), f.LoadZeroInt8());
f.StoreContext(offsetof(PPCContext, cr0.cr0_gt), f.LoadZeroInt8());
@ -729,11 +735,15 @@ int InstrEmit_stwcx(PPCHIRBuilder& f, const InstrData& i) {
// NOTE: we assume we are within a global lock.
// As we have been exclusively executing this entire time, we assume that no
// one else could have possibly touched the memory and must always succeed.
// We use atomic compare exchange here to support reserved load/store without
// being under the global lock (flag disable_global_lock - see mtmsr/mtmsrd).
// This will always succeed if under the global lock, however.
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
Value* rt = f.ByteSwap(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE));
f.Store(ea, rt);
f.StoreContext(offsetof(PPCContext, cr0.cr0_eq), f.LoadConstantInt8(1));
Value* res = f.ByteSwap(f.Truncate(f.LoadReserved(), INT32_TYPE));
Value* v = f.AtomicCompareExchange(ea, res, rt);
f.StoreContext(offsetof(PPCContext, cr0.cr0_eq), v);
f.StoreContext(offsetof(PPCContext, cr0.cr0_lt), f.LoadZeroInt8());
f.StoreContext(offsetof(PPCContext, cr0.cr0_gt), f.LoadZeroInt8());

View File

@ -511,6 +511,15 @@ void PPCHIRBuilder::StoreVR(uint32_t reg, Value* value) {
trace_reg.value = value;
}
void PPCHIRBuilder::StoreReserved(Value* val) {
assert_true(val->type == INT64_TYPE);
StoreContext(offsetof(PPCContext, reserved_val), val);
}
Value* PPCHIRBuilder::LoadReserved() {
return LoadContext(offsetof(PPCContext, reserved_val), INT64_TYPE);
}
} // namespace ppc
} // namespace cpu
} // namespace xe

View File

@ -78,6 +78,9 @@ class PPCHIRBuilder : public hir::HIRBuilder {
Value* LoadVR(uint32_t reg);
void StoreVR(uint32_t reg, Value* value);
void StoreReserved(Value* val);
Value* LoadReserved();
private:
void AnnotateLabel(uint32_t address, Label* label);

View File

@ -286,9 +286,6 @@ bool XexModule::Load(const std::string& name, const std::string& path,
}
// Setup memory protection.
// TODO: This introduces a load of constants into the JIT, and Xenia isn't
// quite set-up to handle constants yet...
/*
auto sec_header = xex_security_info();
auto heap = memory()->LookupHeap(sec_header->load_address);
auto page_size = heap->page_size();
@ -311,7 +308,6 @@ bool XexModule::Load(const std::string& name, const std::string& path,
page += desc.size;
}
*/
return true;
}

View File

@ -535,6 +535,14 @@ void GlslShaderTranslator::ProcessVertexFetchInstruction(
EmitSource("// ");
instr.Disassemble(&source_);
if (instr.operands[0].storage_index != 0) {
// Unimplemented for now.
EmitUnimplementedTranslationError();
EmitSourceDepth("pv.xyzw = vec4(0.0, 0.0, 0.0, 0.0);\n");
EmitStoreVectorResult(instr.result);
return;
}
if (instr.is_predicated) {
EmitSourceDepth("if (%cp0) {\n", instr.predicate_condition ? ' ' : '!');
Indent();

View File

@ -1251,22 +1251,20 @@ pointer_result_t InterlockedPushEntrySList(
assert_not_null(plist_ptr);
assert_not_null(entry);
// Hold a global lock during this method. Once in the lock we assume we have
// exclusive access to the structure.
auto global_lock = xe::global_critical_region::AcquireDirect();
alignas(8) X_SLIST_HEADER old_hdr = *plist_ptr;
alignas(8) X_SLIST_HEADER new_hdr = {0};
new_hdr.depth = old_hdr.depth + 1;
new_hdr.sequence = old_hdr.sequence + 1;
uint32_t old_head = 0;
do {
old_hdr = *plist_ptr;
new_hdr.depth = old_hdr.depth + 1;
new_hdr.sequence = old_hdr.sequence + 1;
uint32_t old_head = old_hdr.next.next;
entry->next = old_hdr.next.next;
new_hdr.next.next = entry.guest_address();
*reinterpret_cast<uint64_t*>(plist_ptr.host_address()) =
*reinterpret_cast<uint64_t*>(&new_hdr);
xe::threading::SyncMemory();
uint32_t old_head = old_hdr.next.next;
entry->next = old_hdr.next.next;
new_hdr.next.next = entry.guest_address();
} while (
!xe::atomic_cas(*(uint64_t*)(&old_hdr), *(uint64_t*)(&new_hdr),
reinterpret_cast<uint64_t*>(plist_ptr.host_address())));
return old_head;
}
@ -1276,28 +1274,24 @@ DECLARE_XBOXKRNL_EXPORT(InterlockedPushEntrySList,
pointer_result_t InterlockedPopEntrySList(pointer_t<X_SLIST_HEADER> plist_ptr) {
assert_not_null(plist_ptr);
// Hold a global lock during this method. Once in the lock we assume we have
// exclusive access to the structure.
auto global_lock = xe::global_critical_region::AcquireDirect();
uint32_t popped = 0;
alignas(8) X_SLIST_HEADER old_hdr = *plist_ptr;
alignas(8) X_SLIST_HEADER old_hdr = {0};
alignas(8) X_SLIST_HEADER new_hdr = {0};
auto next = kernel_memory()->TranslateVirtual<X_SINGLE_LIST_ENTRY*>(
old_hdr.next.next);
if (!old_hdr.next.next) {
return 0;
}
popped = old_hdr.next.next;
do {
old_hdr = *plist_ptr;
auto next = kernel_memory()->TranslateVirtual<X_SINGLE_LIST_ENTRY*>(
old_hdr.next.next);
if (!old_hdr.next.next) {
return 0;
}
popped = old_hdr.next.next;
new_hdr.depth = old_hdr.depth - 1;
new_hdr.next.next = next->next;
new_hdr.sequence = old_hdr.sequence;
*reinterpret_cast<uint64_t*>(plist_ptr.host_address()) =
*reinterpret_cast<uint64_t*>(&new_hdr);
xe::threading::SyncMemory();
new_hdr.depth = old_hdr.depth - 1;
new_hdr.next.next = next->next;
new_hdr.sequence = old_hdr.sequence;
} while (
!xe::atomic_cas(*(uint64_t*)(&old_hdr), *(uint64_t*)(&new_hdr),
reinterpret_cast<uint64_t*>(plist_ptr.host_address())));
return popped;
}
@ -1307,20 +1301,18 @@ DECLARE_XBOXKRNL_EXPORT(InterlockedPopEntrySList,
pointer_result_t InterlockedFlushSList(pointer_t<X_SLIST_HEADER> plist_ptr) {
assert_not_null(plist_ptr);
// Hold a global lock during this method. Once in the lock we assume we have
// exclusive access to the structure.
auto global_lock = xe::global_critical_region::AcquireDirect();
alignas(8) X_SLIST_HEADER old_hdr = *plist_ptr;
alignas(8) X_SLIST_HEADER new_hdr = {0};
uint32_t first = old_hdr.next.next;
new_hdr.next.next = 0;
new_hdr.depth = 0;
new_hdr.sequence = 0;
*reinterpret_cast<uint64_t*>(plist_ptr.host_address()) =
*reinterpret_cast<uint64_t*>(&new_hdr);
xe::threading::SyncMemory();
uint32_t first = 0;
do {
old_hdr = *plist_ptr;
first = old_hdr.next.next;
new_hdr.next.next = 0;
new_hdr.depth = 0;
new_hdr.sequence = 0;
} while (
!xe::atomic_cas(*(uint64_t*)(&old_hdr), *(uint64_t*)(&new_hdr),
reinterpret_cast<uint64_t*>(plist_ptr.host_address())));
return first;
}