Merge branch 'master' into vk_vfetch

This commit is contained in:
DrChat 2018-02-14 20:31:31 -06:00
commit 310606d0f6
19 changed files with 641 additions and 297 deletions

View File

@ -42,15 +42,16 @@ git:
submodules: false
before_script:
- export LIBVULKAN_VERSION=1.0.61.1
- export CXX=$CXX_COMPILER
- export CC=$C_COMPILER
# Dump useful info.
- $CXX --version
- python3 --version
# Add Vulkan dependencies
- travis_retry wget http://mirrors.kernel.org/ubuntu/pool/universe/v/vulkan/libvulkan1_1.0.42.0+dfsg1-1ubuntu1~16.04.1_amd64.deb
- travis_retry wget http://mirrors.kernel.org/ubuntu/pool/universe/v/vulkan/libvulkan-dev_1.0.42.0+dfsg1-1ubuntu1~16.04.1_amd64.deb
- if [[ $BUILD == true ]]; then sudo dpkg -i libvulkan1_1.0.42.0+dfsg1-1ubuntu1~16.04.1_amd64.deb libvulkan-dev_1.0.42.0+dfsg1-1ubuntu1~16.04.1_amd64.deb; fi
- travis_retry wget http://mirrors.kernel.org/ubuntu/pool/universe/v/vulkan/libvulkan1_$LIBVULKAN_VERSION+dfsg1-1ubuntu1~16.04.1_amd64.deb
- travis_retry wget http://mirrors.kernel.org/ubuntu/pool/universe/v/vulkan/libvulkan-dev_$LIBVULKAN_VERSION+dfsg1-1ubuntu1~16.04.1_amd64.deb
- if [[ $BUILD == true ]]; then sudo dpkg -i libvulkan1_$LIBVULKAN_VERSION+dfsg1-1ubuntu1~16.04.1_amd64.deb libvulkan-dev_$LIBVULKAN_VERSION+dfsg1-1ubuntu1~16.04.1_amd64.deb; fi
# Prepare environment (pull dependencies, build tools).
- travis_retry ./xenia-build setup

View File

@ -139,6 +139,7 @@ filter("platforms:Windows")
"/wd4127", -- 'conditional expression is constant'.
"/wd4324", -- 'structure was padded due to alignment specifier'.
"/wd4189", -- 'local variable is initialized but not referenced'.
"/utf-8", -- 'build correctly on systems with non-Latin codepages'.
})
flags({
"NoMinimalRebuild", -- Required for /MP above.

View File

@ -24,8 +24,20 @@ void copy_128_aligned(void* dest, const void* src, size_t count) {
}
#if XE_ARCH_AMD64
void copy_and_swap_16_aligned(void* dest, const void* src, size_t count) {
return copy_and_swap_16_unaligned(dest, src, count);
void copy_and_swap_16_aligned(void* dest_ptr, const void* src_ptr,
size_t count) {
auto dest = reinterpret_cast<uint16_t*>(dest_ptr);
auto src = reinterpret_cast<const uint16_t*>(src_ptr);
size_t i;
for (i = 0; i + 8 <= count; i += 8) {
__m128i input = _mm_load_si128(reinterpret_cast<const __m128i*>(&src[i]));
__m128i output =
_mm_or_si128(_mm_slli_epi16(input, 8), _mm_srli_epi16(input, 8));
_mm_store_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
}
for (; i < count; ++i) { // handle residual elements
dest[i] = byte_swap(src[i]);
}
}
void copy_and_swap_16_unaligned(void* dest_ptr, const void* src_ptr,
@ -44,8 +56,31 @@ void copy_and_swap_16_unaligned(void* dest_ptr, const void* src_ptr,
}
}
void copy_and_swap_32_aligned(void* dest, const void* src, size_t count) {
return copy_and_swap_32_unaligned(dest, src, count);
void copy_and_swap_32_aligned(void* dest_ptr, const void* src_ptr,
size_t count) {
auto dest = reinterpret_cast<uint32_t*>(dest_ptr);
auto src = reinterpret_cast<const uint32_t*>(src_ptr);
__m128i byte2mask = _mm_set1_epi32(0x00FF0000);
__m128i byte3mask = _mm_set1_epi32(0x0000FF00);
size_t i;
for (i = 0; i + 4 <= count; i += 4) {
__m128i input = _mm_load_si128(reinterpret_cast<const __m128i*>(&src[i]));
// Do the four shifts.
__m128i byte1 = _mm_slli_epi32(input, 24);
__m128i byte2 = _mm_slli_epi32(input, 8);
__m128i byte3 = _mm_srli_epi32(input, 8);
__m128i byte4 = _mm_srli_epi32(input, 24);
// OR bytes together.
__m128i output = _mm_or_si128(byte1, byte4);
byte2 = _mm_and_si128(byte2, byte2mask);
output = _mm_or_si128(output, byte2);
byte3 = _mm_and_si128(byte3, byte3mask);
output = _mm_or_si128(output, byte3);
_mm_store_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
}
for (; i < count; ++i) { // handle residual elements
dest[i] = byte_swap(src[i]);
}
}
void copy_and_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
@ -75,8 +110,33 @@ void copy_and_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
}
}
void copy_and_swap_64_aligned(void* dest, const void* src, size_t count) {
return copy_and_swap_64_unaligned(dest, src, count);
void copy_and_swap_64_aligned(void* dest_ptr, const void* src_ptr,
size_t count) {
auto dest = reinterpret_cast<uint64_t*>(dest_ptr);
auto src = reinterpret_cast<const uint64_t*>(src_ptr);
__m128i byte2mask = _mm_set1_epi32(0x00FF0000);
__m128i byte3mask = _mm_set1_epi32(0x0000FF00);
size_t i;
for (i = 0; i + 2 <= count; i += 2) {
__m128i input = _mm_load_si128(reinterpret_cast<const __m128i*>(&src[i]));
// Do the four shifts.
__m128i byte1 = _mm_slli_epi32(input, 24);
__m128i byte2 = _mm_slli_epi32(input, 8);
__m128i byte3 = _mm_srli_epi32(input, 8);
__m128i byte4 = _mm_srli_epi32(input, 24);
// OR bytes together.
__m128i output = _mm_or_si128(byte1, byte4);
byte2 = _mm_and_si128(byte2, byte2mask);
output = _mm_or_si128(output, byte2);
byte3 = _mm_and_si128(byte3, byte3mask);
output = _mm_or_si128(output, byte3);
// Reorder the two words.
output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1));
_mm_store_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
}
for (; i < count; ++i) { // handle residual elements
dest[i] = byte_swap(src[i]);
}
}
void copy_and_swap_64_unaligned(void* dest_ptr, const void* src_ptr,
@ -108,8 +168,20 @@ void copy_and_swap_64_unaligned(void* dest_ptr, const void* src_ptr,
}
}
void copy_and_swap_16_in_32_aligned(void* dest, const void* src, size_t count) {
return copy_and_swap_16_in_32_unaligned(dest, src, count);
void copy_and_swap_16_in_32_aligned(void* dest_ptr, const void* src_ptr,
size_t count) {
auto dest = reinterpret_cast<uint64_t*>(dest_ptr);
auto src = reinterpret_cast<const uint64_t*>(src_ptr);
size_t i;
for (i = 0; i + 4 <= count; i += 4) {
__m128i input = _mm_load_si128(reinterpret_cast<const __m128i*>(&src[i]));
__m128i output =
_mm_or_si128(_mm_slli_epi32(input, 16), _mm_srli_epi32(input, 16));
_mm_store_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
}
for (; i < count; ++i) { // handle residual elements
dest[i] = (src[i] >> 16) | (src[i] << 16);
}
}
void copy_and_swap_16_in_32_unaligned(void* dest_ptr, const void* src_ptr,

View File

@ -1663,7 +1663,6 @@ struct LOAD_VECTOR_SHL_I8
e.shl(e.dx, 4);
e.mov(e.rax, (uintptr_t)lvsl_table);
e.vmovaps(i.dest, e.ptr[e.rax + e.rdx]);
e.ReloadMembase();
}
}
};
@ -1705,7 +1704,6 @@ struct LOAD_VECTOR_SHR_I8
e.shl(e.dx, 4);
e.mov(e.rax, (uintptr_t)lvsr_table);
e.vmovaps(i.dest, e.ptr[e.rax + e.rdx]);
e.ReloadMembase();
}
}
};
@ -2129,6 +2127,176 @@ struct STORE_MMIO_I32
};
EMITTER_OPCODE_TABLE(OPCODE_STORE_MMIO, STORE_MMIO_I32);
// ============================================================================
// OPCODE_LOAD_OFFSET
// ============================================================================
template <typename T>
RegExp ComputeMemoryAddressOffset(X64Emitter& e, const T& guest,
const T& offset) {
int32_t offset_const = static_cast<int32_t>(offset.constant());
if (guest.is_constant) {
uint32_t address = static_cast<uint32_t>(guest.constant());
address += static_cast<int32_t>(offset.constant());
if (address < 0x80000000) {
return e.GetMembaseReg() + address;
} else {
e.mov(e.eax, address);
return e.GetMembaseReg() + e.rax;
}
} else {
// Clear the top 32 bits, as they are likely garbage.
// TODO(benvanik): find a way to avoid doing this.
e.mov(e.eax, guest.reg().cvt32());
return e.GetMembaseReg() + e.rax + offset_const;
}
}
struct LOAD_OFFSET_I8
: Sequence<LOAD_OFFSET_I8, I<OPCODE_LOAD_OFFSET, I8Op, I64Op, I64Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
e.mov(i.dest, e.byte[addr]);
}
};
struct LOAD_OFFSET_I16
: Sequence<LOAD_OFFSET_I16, I<OPCODE_LOAD_OFFSET, I16Op, I64Op, I64Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
if (e.IsFeatureEnabled(kX64EmitMovbe)) {
e.movbe(i.dest, e.word[addr]);
} else {
e.mov(i.dest, e.word[addr]);
e.ror(i.dest, 8);
}
} else {
e.mov(i.dest, e.word[addr]);
}
}
};
struct LOAD_OFFSET_I32
: Sequence<LOAD_OFFSET_I32, I<OPCODE_LOAD_OFFSET, I32Op, I64Op, I64Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
if (e.IsFeatureEnabled(kX64EmitMovbe)) {
e.movbe(i.dest, e.dword[addr]);
} else {
e.mov(i.dest, e.dword[addr]);
e.bswap(i.dest);
}
} else {
e.mov(i.dest, e.dword[addr]);
}
}
};
struct LOAD_OFFSET_I64
: Sequence<LOAD_OFFSET_I64, I<OPCODE_LOAD_OFFSET, I64Op, I64Op, I64Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
if (e.IsFeatureEnabled(kX64EmitMovbe)) {
e.movbe(i.dest, e.qword[addr]);
} else {
e.mov(i.dest, e.qword[addr]);
e.bswap(i.dest);
}
} else {
e.mov(i.dest, e.qword[addr]);
}
}
};
EMITTER_OPCODE_TABLE(OPCODE_LOAD_OFFSET, LOAD_OFFSET_I8, LOAD_OFFSET_I16,
LOAD_OFFSET_I32, LOAD_OFFSET_I64);
// ============================================================================
// OPCODE_STORE_OFFSET
// ============================================================================
struct STORE_OFFSET_I8
: Sequence<STORE_OFFSET_I8,
I<OPCODE_STORE_OFFSET, VoidOp, I64Op, I64Op, I8Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
if (i.src3.is_constant) {
e.mov(e.byte[addr], i.src3.constant());
} else {
e.mov(e.byte[addr], i.src3);
}
}
};
struct STORE_OFFSET_I16
: Sequence<STORE_OFFSET_I16,
I<OPCODE_STORE_OFFSET, VoidOp, I64Op, I64Op, I16Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
assert_false(i.src3.is_constant);
if (e.IsFeatureEnabled(kX64EmitMovbe)) {
e.movbe(e.word[addr], i.src3);
} else {
assert_always("not implemented");
}
} else {
if (i.src3.is_constant) {
e.mov(e.word[addr], i.src3.constant());
} else {
e.mov(e.word[addr], i.src3);
}
}
}
};
struct STORE_OFFSET_I32
: Sequence<STORE_OFFSET_I32,
I<OPCODE_STORE_OFFSET, VoidOp, I64Op, I64Op, I32Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
assert_false(i.src3.is_constant);
if (e.IsFeatureEnabled(kX64EmitMovbe)) {
e.movbe(e.dword[addr], i.src3);
} else {
assert_always("not implemented");
}
} else {
if (i.src3.is_constant) {
e.mov(e.dword[addr], i.src3.constant());
} else {
e.mov(e.dword[addr], i.src3);
}
}
}
};
struct STORE_OFFSET_I64
: Sequence<STORE_OFFSET_I64,
I<OPCODE_STORE_OFFSET, VoidOp, I64Op, I64Op, I64Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
assert_false(i.src3.is_constant);
if (e.IsFeatureEnabled(kX64EmitMovbe)) {
e.movbe(e.qword[addr], i.src3);
} else {
assert_always("not implemented");
}
} else {
if (i.src3.is_constant) {
e.MovMem64(addr, i.src3.constant());
} else {
e.mov(e.qword[addr], i.src3);
}
}
}
};
EMITTER_OPCODE_TABLE(OPCODE_STORE_OFFSET, STORE_OFFSET_I8, STORE_OFFSET_I16,
STORE_OFFSET_I32, STORE_OFFSET_I64);
// ============================================================================
// OPCODE_LOAD
// ============================================================================
@ -2139,8 +2307,13 @@ RegExp ComputeMemoryAddress(X64Emitter& e, const T& guest) {
// TODO(benvanik): figure out how to do this without a temp.
// Since the constant is often 0x8... if we tried to use that as a
// displacement it would be sign extended and mess things up.
e.mov(e.eax, static_cast<uint32_t>(guest.constant()));
return e.GetMembaseReg() + e.rax;
uint32_t address = static_cast<uint32_t>(guest.constant());
if (address < 0x80000000) {
return e.GetMembaseReg() + address;
} else {
e.mov(e.eax, address);
return e.GetMembaseReg() + e.rax;
}
} else {
// Clear the top 32 bits, as they are likely garbage.
// TODO(benvanik): find a way to avoid doing this.
@ -2779,13 +2952,13 @@ struct SELECT_F32
Xmm src2 = i.src2.is_constant ? e.xmm2 : i.src2;
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm2, i.src2.constant());
e.LoadConstantXmm(src2, i.src2.constant());
}
e.vpandn(e.xmm1, e.xmm0, src2);
Xmm src3 = i.src3.is_constant ? e.xmm2 : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm2, i.src3.constant());
e.LoadConstantXmm(src3, i.src3.constant());
}
e.vpand(i.dest, e.xmm0, src3);
e.vpor(i.dest, e.xmm1);
@ -2802,13 +2975,13 @@ struct SELECT_F64
Xmm src2 = i.src2.is_constant ? e.xmm2 : i.src2;
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm2, i.src2.constant());
e.LoadConstantXmm(src2, i.src2.constant());
}
e.vpandn(e.xmm1, e.xmm0, src2);
Xmm src3 = i.src3.is_constant ? e.xmm2 : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm2, i.src3.constant());
e.LoadConstantXmm(src3, i.src3.constant());
}
e.vpand(i.dest, e.xmm0, src3);
e.vpor(i.dest, e.xmm1);
@ -2827,13 +3000,13 @@ struct SELECT_V128_I8
Xmm src2 = i.src2.is_constant ? e.xmm2 : i.src2;
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm2, i.src2.constant());
e.LoadConstantXmm(src2, i.src2.constant());
}
e.vpandn(e.xmm1, e.xmm0, src2);
Xmm src3 = i.src3.is_constant ? e.xmm2 : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm2, i.src3.constant());
e.LoadConstantXmm(src3, i.src3.constant());
}
e.vpand(i.dest, e.xmm0, src3);
e.vpor(i.dest, e.xmm1);
@ -2845,18 +3018,18 @@ struct SELECT_V128_V128
static void Emit(X64Emitter& e, const EmitArgType& i) {
Xmm src1 = i.src1.is_constant ? e.xmm1 : i.src1;
if (i.src1.is_constant) {
e.LoadConstantXmm(e.xmm1, i.src1.constant());
e.LoadConstantXmm(src1, i.src1.constant());
}
Xmm src2 = i.src2.is_constant ? e.xmm0 : i.src2;
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.LoadConstantXmm(src2, i.src2.constant());
}
e.vpandn(e.xmm0, src1, src2);
Xmm src3 = i.src3.is_constant ? i.dest : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(i.dest, i.src3.constant());
e.LoadConstantXmm(src3, i.src3.constant());
}
e.vpand(i.dest, src1, src3);
@ -3863,8 +4036,6 @@ struct MUL_I8 : Sequence<MUL_I8, I<OPCODE_MUL, I8Op, I8Op, I8Op>> {
e.mov(i.dest, e.al);
}
}
e.ReloadMembase();
}
};
struct MUL_I16 : Sequence<MUL_I16, I<OPCODE_MUL, I16Op, I16Op, I16Op>> {
@ -3906,8 +4077,6 @@ struct MUL_I16 : Sequence<MUL_I16, I<OPCODE_MUL, I16Op, I16Op, I16Op>> {
e.movzx(i.dest, e.ax);
}
}
e.ReloadMembase();
}
};
struct MUL_I32 : Sequence<MUL_I32, I<OPCODE_MUL, I32Op, I32Op, I32Op>> {
@ -3950,8 +4119,6 @@ struct MUL_I32 : Sequence<MUL_I32, I<OPCODE_MUL, I32Op, I32Op, I32Op>> {
e.mov(i.dest, e.eax);
}
}
e.ReloadMembase();
}
};
struct MUL_I64 : Sequence<MUL_I64, I<OPCODE_MUL, I64Op, I64Op, I64Op>> {
@ -3993,8 +4160,6 @@ struct MUL_I64 : Sequence<MUL_I64, I<OPCODE_MUL, I64Op, I64Op, I64Op>> {
e.mov(i.dest, e.rax);
}
}
e.ReloadMembase();
}
};
struct MUL_F32 : Sequence<MUL_F32, I<OPCODE_MUL, F32Op, F32Op, F32Op>> {
@ -4072,7 +4237,6 @@ struct MUL_HI_I8 : Sequence<MUL_HI_I8, I<OPCODE_MUL_HI, I8Op, I8Op, I8Op>> {
}
e.mov(i.dest, e.ah);
}
e.ReloadMembase();
}
};
struct MUL_HI_I16
@ -4116,7 +4280,6 @@ struct MUL_HI_I16
}
e.mov(i.dest, e.dx);
}
e.ReloadMembase();
}
};
struct MUL_HI_I32
@ -4165,7 +4328,6 @@ struct MUL_HI_I32
}
e.mov(i.dest, e.edx);
}
e.ReloadMembase();
}
};
struct MUL_HI_I64
@ -4214,7 +4376,6 @@ struct MUL_HI_I64
}
e.mov(i.dest, e.rdx);
}
e.ReloadMembase();
}
};
EMITTER_OPCODE_TABLE(OPCODE_MUL_HI, MUL_HI_I8, MUL_HI_I16, MUL_HI_I32,
@ -4230,11 +4391,8 @@ struct DIV_I8 : Sequence<DIV_I8, I<OPCODE_DIV, I8Op, I8Op, I8Op>> {
Xbyak::Label skip;
e.inLocalLabel();
// NOTE: RDX clobbered.
bool clobbered_rcx = false;
if (i.src2.is_constant) {
assert_true(!i.src1.is_constant);
clobbered_rcx = true;
e.mov(e.cl, i.src2.constant());
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
e.movzx(e.ax, i.src1);
@ -4268,10 +4426,6 @@ struct DIV_I8 : Sequence<DIV_I8, I<OPCODE_DIV, I8Op, I8Op, I8Op>> {
e.L(skip);
e.outLocalLabel();
e.mov(i.dest, e.al);
if (clobbered_rcx) {
e.ReloadContext();
}
e.ReloadMembase();
}
};
struct DIV_I16 : Sequence<DIV_I16, I<OPCODE_DIV, I16Op, I16Op, I16Op>> {
@ -4279,11 +4433,8 @@ struct DIV_I16 : Sequence<DIV_I16, I<OPCODE_DIV, I16Op, I16Op, I16Op>> {
Xbyak::Label skip;
e.inLocalLabel();
// NOTE: RDX clobbered.
bool clobbered_rcx = false;
if (i.src2.is_constant) {
assert_true(!i.src1.is_constant);
clobbered_rcx = true;
e.mov(e.cx, i.src2.constant());
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
e.mov(e.ax, i.src1);
@ -4323,10 +4474,6 @@ struct DIV_I16 : Sequence<DIV_I16, I<OPCODE_DIV, I16Op, I16Op, I16Op>> {
e.L(skip);
e.outLocalLabel();
e.mov(i.dest, e.ax);
if (clobbered_rcx) {
e.ReloadContext();
}
e.ReloadMembase();
}
};
struct DIV_I32 : Sequence<DIV_I32, I<OPCODE_DIV, I32Op, I32Op, I32Op>> {
@ -4334,11 +4481,8 @@ struct DIV_I32 : Sequence<DIV_I32, I<OPCODE_DIV, I32Op, I32Op, I32Op>> {
Xbyak::Label skip;
e.inLocalLabel();
// NOTE: RDX clobbered.
bool clobbered_rcx = false;
if (i.src2.is_constant) {
assert_true(!i.src1.is_constant);
clobbered_rcx = true;
e.mov(e.ecx, i.src2.constant());
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
e.mov(e.eax, i.src1);
@ -4378,10 +4522,6 @@ struct DIV_I32 : Sequence<DIV_I32, I<OPCODE_DIV, I32Op, I32Op, I32Op>> {
e.L(skip);
e.outLocalLabel();
e.mov(i.dest, e.eax);
if (clobbered_rcx) {
e.ReloadContext();
}
e.ReloadMembase();
}
};
struct DIV_I64 : Sequence<DIV_I64, I<OPCODE_DIV, I64Op, I64Op, I64Op>> {
@ -4389,11 +4529,8 @@ struct DIV_I64 : Sequence<DIV_I64, I<OPCODE_DIV, I64Op, I64Op, I64Op>> {
Xbyak::Label skip;
e.inLocalLabel();
// NOTE: RDX clobbered.
bool clobbered_rcx = false;
if (i.src2.is_constant) {
assert_true(!i.src1.is_constant);
clobbered_rcx = true;
e.mov(e.rcx, i.src2.constant());
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
e.mov(e.rax, i.src1);
@ -4433,10 +4570,6 @@ struct DIV_I64 : Sequence<DIV_I64, I<OPCODE_DIV, I64Op, I64Op, I64Op>> {
e.L(skip);
e.outLocalLabel();
e.mov(i.dest, e.rax);
if (clobbered_rcx) {
e.ReloadContext();
}
e.ReloadMembase();
}
};
struct DIV_F32 : Sequence<DIV_F32, I<OPCODE_DIV, F32Op, F32Op, F32Op>> {
@ -4493,31 +4626,31 @@ struct MUL_ADD_F32
// FMA extension
if (e.IsFeatureEnabled(kX64EmitFMA)) {
EmitCommutativeBinaryXmmOp(
e, i,
[&i](X64Emitter& e, const Xmm& dest, const Xmm& src1,
const Xmm& src2) {
Xmm src3 = i.src3.is_constant ? e.xmm1 : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm1, i.src3.constant());
}
if (i.dest == src1) {
e.vfmadd213ss(i.dest, src2, src3);
} else if (i.dest == src2) {
e.vfmadd213ss(i.dest, src1, src3);
} else if (i.dest == i.src3) {
e.vfmadd231ss(i.dest, src1, src2);
} else {
// Dest not equal to anything
e.vmovss(i.dest, src1);
e.vfmadd213ss(i.dest, src2, src3);
}
});
EmitCommutativeBinaryXmmOp(e, i,
[&i](X64Emitter& e, const Xmm& dest,
const Xmm& src1, const Xmm& src2) {
Xmm src3 =
i.src3.is_constant ? e.xmm1 : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(src3, i.src3.constant());
}
if (i.dest == src1) {
e.vfmadd213ss(i.dest, src2, src3);
} else if (i.dest == src2) {
e.vfmadd213ss(i.dest, src1, src3);
} else if (i.dest == i.src3) {
e.vfmadd231ss(i.dest, src1, src2);
} else {
// Dest not equal to anything
e.vmovss(i.dest, src1);
e.vfmadd213ss(i.dest, src2, src3);
}
});
} else {
Xmm src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm1, i.src3.constant());
src3 = e.xmm1;
e.LoadConstantXmm(src3, i.src3.constant());
} else {
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
src3 = i.src3;
@ -4552,31 +4685,31 @@ struct MUL_ADD_F64
// FMA extension
if (e.IsFeatureEnabled(kX64EmitFMA)) {
EmitCommutativeBinaryXmmOp(
e, i,
[&i](X64Emitter& e, const Xmm& dest, const Xmm& src1,
const Xmm& src2) {
Xmm src3 = i.src3.is_constant ? e.xmm1 : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm1, i.src3.constant());
}
if (i.dest == src1) {
e.vfmadd213sd(i.dest, src2, src3);
} else if (i.dest == src2) {
e.vfmadd213sd(i.dest, src1, src3);
} else if (i.dest == i.src3) {
e.vfmadd231sd(i.dest, src1, src2);
} else {
// Dest not equal to anything
e.vmovsd(i.dest, src1);
e.vfmadd213sd(i.dest, src2, src3);
}
});
EmitCommutativeBinaryXmmOp(e, i,
[&i](X64Emitter& e, const Xmm& dest,
const Xmm& src1, const Xmm& src2) {
Xmm src3 =
i.src3.is_constant ? e.xmm1 : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(src3, i.src3.constant());
}
if (i.dest == src1) {
e.vfmadd213sd(i.dest, src2, src3);
} else if (i.dest == src2) {
e.vfmadd213sd(i.dest, src1, src3);
} else if (i.dest == i.src3) {
e.vfmadd231sd(i.dest, src1, src2);
} else {
// Dest not equal to anything
e.vmovsd(i.dest, src1);
e.vfmadd213sd(i.dest, src2, src3);
}
});
} else {
Xmm src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm1, i.src3.constant());
src3 = e.xmm1;
e.LoadConstantXmm(src3, i.src3.constant());
} else {
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
src3 = i.src3;
@ -4617,31 +4750,31 @@ struct MUL_ADD_V128
// than vmul+vadd and it'd be nice to know why. Until we know, it's
// disabled so tests pass.
if (false && e.IsFeatureEnabled(kX64EmitFMA)) {
EmitCommutativeBinaryXmmOp(
e, i,
[&i](X64Emitter& e, const Xmm& dest, const Xmm& src1,
const Xmm& src2) {
Xmm src3 = i.src3.is_constant ? e.xmm1 : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm1, i.src3.constant());
}
if (i.dest == src1) {
e.vfmadd213ps(i.dest, src2, src3);
} else if (i.dest == src2) {
e.vfmadd213ps(i.dest, src1, src3);
} else if (i.dest == i.src3) {
e.vfmadd231ps(i.dest, src1, src2);
} else {
// Dest not equal to anything
e.vmovdqa(i.dest, src1);
e.vfmadd213ps(i.dest, src2, src3);
}
});
EmitCommutativeBinaryXmmOp(e, i,
[&i](X64Emitter& e, const Xmm& dest,
const Xmm& src1, const Xmm& src2) {
Xmm src3 =
i.src3.is_constant ? e.xmm1 : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(src3, i.src3.constant());
}
if (i.dest == src1) {
e.vfmadd213ps(i.dest, src2, src3);
} else if (i.dest == src2) {
e.vfmadd213ps(i.dest, src1, src3);
} else if (i.dest == i.src3) {
e.vfmadd231ps(i.dest, src1, src2);
} else {
// Dest not equal to anything
e.vmovdqa(i.dest, src1);
e.vfmadd213ps(i.dest, src2, src3);
}
});
} else {
Xmm src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm1, i.src3.constant());
src3 = e.xmm1;
e.LoadConstantXmm(src3, i.src3.constant());
} else {
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
src3 = i.src3;
@ -4690,31 +4823,31 @@ struct MUL_SUB_F32
// FMA extension
if (e.IsFeatureEnabled(kX64EmitFMA)) {
EmitCommutativeBinaryXmmOp(
e, i,
[&i](X64Emitter& e, const Xmm& dest, const Xmm& src1,
const Xmm& src2) {
Xmm src3 = i.src3.is_constant ? e.xmm1 : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm1, i.src3.constant());
}
if (i.dest == src1) {
e.vfmsub213ss(i.dest, src2, src3);
} else if (i.dest == src2) {
e.vfmsub213ss(i.dest, src1, src3);
} else if (i.dest == i.src3) {
e.vfmsub231ss(i.dest, src1, src2);
} else {
// Dest not equal to anything
e.vmovss(i.dest, src1);
e.vfmsub213ss(i.dest, src2, src3);
}
});
EmitCommutativeBinaryXmmOp(e, i,
[&i](X64Emitter& e, const Xmm& dest,
const Xmm& src1, const Xmm& src2) {
Xmm src3 =
i.src3.is_constant ? e.xmm1 : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(src3, i.src3.constant());
}
if (i.dest == src1) {
e.vfmsub213ss(i.dest, src2, src3);
} else if (i.dest == src2) {
e.vfmsub213ss(i.dest, src1, src3);
} else if (i.dest == i.src3) {
e.vfmsub231ss(i.dest, src1, src2);
} else {
// Dest not equal to anything
e.vmovss(i.dest, src1);
e.vfmsub213ss(i.dest, src2, src3);
}
});
} else {
Xmm src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm1, i.src3.constant());
src3 = e.xmm1;
e.LoadConstantXmm(src3, i.src3.constant());
} else {
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
src3 = i.src3;
@ -4749,31 +4882,31 @@ struct MUL_SUB_F64
// FMA extension
if (e.IsFeatureEnabled(kX64EmitFMA)) {
EmitCommutativeBinaryXmmOp(
e, i,
[&i](X64Emitter& e, const Xmm& dest, const Xmm& src1,
const Xmm& src2) {
Xmm src3 = i.src3.is_constant ? e.xmm1 : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm1, i.src3.constant());
}
if (i.dest == src1) {
e.vfmsub213sd(i.dest, src2, src3);
} else if (i.dest == src2) {
e.vfmsub213sd(i.dest, src1, src3);
} else if (i.dest == i.src3) {
e.vfmsub231sd(i.dest, src1, src2);
} else {
// Dest not equal to anything
e.vmovsd(i.dest, src1);
e.vfmsub213sd(i.dest, src2, src3);
}
});
EmitCommutativeBinaryXmmOp(e, i,
[&i](X64Emitter& e, const Xmm& dest,
const Xmm& src1, const Xmm& src2) {
Xmm src3 =
i.src3.is_constant ? e.xmm1 : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(src3, i.src3.constant());
}
if (i.dest == src1) {
e.vfmsub213sd(i.dest, src2, src3);
} else if (i.dest == src2) {
e.vfmsub213sd(i.dest, src1, src3);
} else if (i.dest == i.src3) {
e.vfmsub231sd(i.dest, src1, src2);
} else {
// Dest not equal to anything
e.vmovsd(i.dest, src1);
e.vfmsub213sd(i.dest, src2, src3);
}
});
} else {
Xmm src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm1, i.src3.constant());
src3 = e.xmm1;
e.LoadConstantXmm(src3, i.src3.constant());
} else {
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
src3 = i.src3;
@ -4812,31 +4945,31 @@ struct MUL_SUB_V128
// FMA extension
if (e.IsFeatureEnabled(kX64EmitFMA)) {
EmitCommutativeBinaryXmmOp(
e, i,
[&i](X64Emitter& e, const Xmm& dest, const Xmm& src1,
const Xmm& src2) {
Xmm src3 = i.src3.is_constant ? e.xmm1 : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm1, i.src3.constant());
}
if (i.dest == src1) {
e.vfmsub213ps(i.dest, src2, src3);
} else if (i.dest == src2) {
e.vfmsub213ps(i.dest, src1, src3);
} else if (i.dest == i.src3) {
e.vfmsub231ps(i.dest, src1, src2);
} else {
// Dest not equal to anything
e.vmovdqa(i.dest, src1);
e.vfmsub213ps(i.dest, src2, src3);
}
});
EmitCommutativeBinaryXmmOp(e, i,
[&i](X64Emitter& e, const Xmm& dest,
const Xmm& src1, const Xmm& src2) {
Xmm src3 =
i.src3.is_constant ? e.xmm1 : i.src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(src3, i.src3.constant());
}
if (i.dest == src1) {
e.vfmsub213ps(i.dest, src2, src3);
} else if (i.dest == src2) {
e.vfmsub213ps(i.dest, src1, src3);
} else if (i.dest == i.src3) {
e.vfmsub231ps(i.dest, src1, src2);
} else {
// Dest not equal to anything
e.vmovdqa(i.dest, src1);
e.vfmsub213ps(i.dest, src2, src3);
}
});
} else {
Xmm src3;
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm1, i.src3.constant());
src3 = e.xmm1;
e.LoadConstantXmm(src3, i.src3.constant());
} else {
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
src3 = i.src3;
@ -5319,7 +5452,6 @@ void EmitShlXX(X64Emitter& e, const ARGS& i) {
} else {
e.mov(e.cl, src);
e.shl(dest_src, e.cl);
e.ReloadContext();
}
},
[](X64Emitter& e, const REG& dest_src, int8_t constant) {
@ -5397,7 +5529,6 @@ void EmitShrXX(X64Emitter& e, const ARGS& i) {
} else {
e.mov(e.cl, src);
e.shr(dest_src, e.cl);
e.ReloadContext();
}
},
[](X64Emitter& e, const REG& dest_src, int8_t constant) {
@ -5473,7 +5604,6 @@ void EmitSarXX(X64Emitter& e, const ARGS& i) {
} else {
e.mov(e.cl, src);
e.sar(dest_src, e.cl);
e.ReloadContext();
}
},
[](X64Emitter& e, const REG& dest_src, int8_t constant) {
@ -6088,7 +6218,6 @@ void EmitRotateLeftXX(X64Emitter& e, const ARGS& i) {
}
}
e.rol(i.dest, e.cl);
e.ReloadContext();
}
}
struct ROTATE_LEFT_I8
@ -6579,7 +6708,6 @@ struct EXTRACT_I32
e.vmovaps(e.xmm0, e.ptr[e.rdx + e.rax]);
e.vpshufb(e.xmm0, src1, e.xmm0);
e.vpextrd(i.dest, e.xmm0, 0);
e.ReloadMembase();
}
}
};
@ -6877,8 +7005,8 @@ struct SWIZZLE
uint8_t swizzle_mask = static_cast<uint8_t>(i.src2.value);
Xmm src1;
if (i.src1.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src1.constant());
src1 = e.xmm0;
e.LoadConstantXmm(src1, i.src1.constant());
} else {
src1 = i.src1;
}
@ -7135,7 +7263,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
// PACKUSWB / SaturateSignedWordToUnsignedByte
Xbyak::Xmm src2 = i.src2.is_constant ? e.xmm0 : i.src2;
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.LoadConstantXmm(src2, i.src2.constant());
}
e.vpackuswb(i.dest, i.src1, src2);
@ -7241,8 +7369,8 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
src2 = i.src2;
} else {
assert_false(i.src1 == e.xmm0);
e.LoadConstantXmm(e.xmm0, i.src2.constant());
src2 = e.xmm0;
e.LoadConstantXmm(src2, i.src2.constant());
}
e.vpackssdw(i.dest, i.src1, src2);
e.vpshuflw(i.dest, i.dest, 0b10110001);
@ -7352,8 +7480,8 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
} else {
Xmm src;
if (i.src1.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src1.constant());
src = e.xmm0;
e.LoadConstantXmm(src, i.src1.constant());
} else {
src = i.src1;
}
@ -7619,8 +7747,6 @@ struct ATOMIC_COMPARE_EXCHANGE_I32
e.lock();
e.cmpxchg(e.dword[e.GetMembaseReg() + e.rcx], i.src3);
e.sete(i.dest);
e.ReloadContext();
}
};
struct ATOMIC_COMPARE_EXCHANGE_I64
@ -7632,8 +7758,6 @@ struct ATOMIC_COMPARE_EXCHANGE_I64
e.lock();
e.cmpxchg(e.qword[e.GetMembaseReg() + e.rcx], i.src3);
e.sete(i.dest);
e.ReloadContext();
}
};
EMITTER_OPCODE_TABLE(OPCODE_ATOMIC_COMPARE_EXCHANGE,
@ -7696,6 +7820,8 @@ void RegisterSequences() {
Register_OPCODE_CONTEXT_BARRIER();
Register_OPCODE_LOAD_MMIO();
Register_OPCODE_STORE_MMIO();
Register_OPCODE_LOAD_OFFSET();
Register_OPCODE_STORE_OFFSET();
Register_OPCODE_LOAD();
Register_OPCODE_STORE();
Register_OPCODE_MEMSET();

View File

@ -195,10 +195,15 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) {
break;
case OPCODE_LOAD:
case OPCODE_LOAD_OFFSET:
if (i->src1.value->IsConstant()) {
assert_false(i->flags & LOAD_STORE_BYTE_SWAP);
auto memory = processor_->memory();
auto address = i->src1.value->constant.i32;
if (i->opcode->num == OPCODE_LOAD_OFFSET) {
address += i->src2.value->constant.i32;
}
auto mmio_range =
processor_->memory()->LookupVirtualMappedRange(address);
if (FLAGS_inline_mmio_access && mmio_range) {
@ -246,12 +251,21 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) {
}
break;
case OPCODE_STORE:
case OPCODE_STORE_OFFSET:
if (FLAGS_inline_mmio_access && i->src1.value->IsConstant()) {
auto address = i->src1.value->constant.i32;
if (i->opcode->num == OPCODE_STORE_OFFSET) {
address += i->src2.value->constant.i32;
}
auto mmio_range =
processor_->memory()->LookupVirtualMappedRange(address);
if (mmio_range) {
auto value = i->src2.value;
if (i->opcode->num == OPCODE_STORE_OFFSET) {
value = i->src3.value;
}
i->Replace(&OPCODE_STORE_MMIO_info, 0);
i->src1.offset = reinterpret_cast<uint64_t>(mmio_range);
i->src2.offset = address;

View File

@ -35,9 +35,11 @@ bool MemorySequenceCombinationPass::Run(HIRBuilder* builder) {
while (block) {
auto i = block->instr_head;
while (i) {
if (i->opcode == &OPCODE_LOAD_info) {
if (i->opcode == &OPCODE_LOAD_info ||
i->opcode == &OPCODE_LOAD_OFFSET_info) {
CombineLoadSequence(i);
} else if (i->opcode == &OPCODE_STORE_info) {
} else if (i->opcode == &OPCODE_STORE_info ||
i->opcode == &OPCODE_STORE_OFFSET_info) {
CombineStoreSequence(i);
}
i = i->next;
@ -112,6 +114,10 @@ void MemorySequenceCombinationPass::CombineStoreSequence(Instr* i) {
// store_convert v0, v1.i64, [swap|i64->i32,trunc]
auto src = i->src2.value;
if (i->opcode == &OPCODE_STORE_OFFSET_info) {
src = i->src3.value;
}
if (src->IsConstant()) {
// Constant value write - ignore.
return;
@ -135,7 +141,11 @@ void MemorySequenceCombinationPass::CombineStoreSequence(Instr* i) {
// Pull the original value (from before the byte swap).
// The byte swap itself will go away in DCE.
i->set_src2(def->src1.value);
if (i->opcode == &OPCODE_STORE_info) {
i->set_src2(def->src1.value);
} else if (i->opcode == &OPCODE_STORE_OFFSET_info) {
i->set_src3(def->src1.value);
}
// TODO(benvanik): extend/truncate.
}

View File

@ -1232,6 +1232,25 @@ void HIRBuilder::StoreMmio(cpu::MMIORange* mmio_range, uint32_t address,
i->set_src3(value);
}
Value* HIRBuilder::LoadOffset(Value* address, Value* offset, TypeName type,
uint32_t load_flags) {
ASSERT_ADDRESS_TYPE(address);
Instr* i = AppendInstr(OPCODE_LOAD_OFFSET_info, load_flags, AllocValue(type));
i->set_src1(address);
i->set_src2(offset);
i->src3.value = NULL;
return i->dest;
}
void HIRBuilder::StoreOffset(Value* address, Value* offset, Value* value,
uint32_t store_flags) {
ASSERT_ADDRESS_TYPE(address);
Instr* i = AppendInstr(OPCODE_STORE_OFFSET_info, store_flags);
i->set_src1(address);
i->set_src2(offset);
i->set_src3(value);
}
Value* HIRBuilder::Load(Value* address, TypeName type, uint32_t load_flags) {
ASSERT_ADDRESS_TYPE(address);
Instr* i = AppendInstr(OPCODE_LOAD_info, load_flags, AllocValue(type));

View File

@ -147,6 +147,11 @@ class HIRBuilder {
Value* LoadMmio(cpu::MMIORange* mmio_range, uint32_t address, TypeName type);
void StoreMmio(cpu::MMIORange* mmio_range, uint32_t address, Value* value);
Value* LoadOffset(Value* address, Value* offset, TypeName type,
uint32_t load_flags = 0);
void StoreOffset(Value* address, Value* offset, Value* value,
uint32_t store_flags = 0);
Value* Load(Value* address, TypeName type, uint32_t load_flags = 0);
void Store(Value* address, Value* value, uint32_t store_flags = 0);
void Memset(Value* address, Value* value, Value* length);

View File

@ -152,6 +152,8 @@ enum Opcode {
OPCODE_CONTEXT_BARRIER,
OPCODE_LOAD_MMIO,
OPCODE_STORE_MMIO,
OPCODE_LOAD_OFFSET,
OPCODE_STORE_OFFSET,
OPCODE_LOAD,
OPCODE_STORE,
OPCODE_MEMSET,

View File

@ -231,6 +231,18 @@ DEFINE_OPCODE(
OPCODE_SIG_X_O_O_V,
OPCODE_FLAG_MEMORY)
DEFINE_OPCODE(
OPCODE_LOAD_OFFSET,
"load_offset",
OPCODE_SIG_V_V_V,
OPCODE_FLAG_MEMORY)
DEFINE_OPCODE(
OPCODE_STORE_OFFSET,
"store_offset",
OPCODE_SIG_X_V_V_V,
OPCODE_FLAG_MEMORY)
DEFINE_OPCODE(
OPCODE_LOAD,
"load",

View File

@ -118,16 +118,16 @@ uintptr_t MMIOHandler::AddPhysicalAccessWatch(uint32_t guest_address,
bool hit = false;
auto entry = *it;
if (base_address < (*it)->address &&
if (base_address <= (*it)->address &&
base_address + length > (*it)->address) {
hit = true;
} else if ((*it)->address < base_address &&
} else if ((*it)->address <= base_address &&
(*it)->address + (*it)->length > base_address) {
hit = true;
} else if ((*it)->address < base_address &&
} else if ((*it)->address <= base_address &&
(*it)->address + (*it)->length > base_address + length) {
hit = true;
} else if ((*it)->address > base_address &&
} else if ((*it)->address >= base_address &&
(*it)->address + (*it)->length < base_address + length) {
hit = true;
}

View File

@ -73,7 +73,11 @@ class MMIOHandler {
WatchType type, AccessWatchCallback callback,
void* callback_context, void* callback_data);
void CancelAccessWatch(uintptr_t watch_handle);
// Fires and clears any access watches that overlap this range.
void InvalidateRange(uint32_t physical_address, size_t length);
// Returns true if /any/ part of this range is watched.
bool IsRangeWatched(uint32_t physical_address, size_t length);
protected:

View File

@ -249,22 +249,22 @@ enum class PPCRegister {
typedef struct PPCContext_s {
// Must be stored at 0x0 for now.
// TODO(benvanik): find a nice way to describe this to the JIT.
ThreadState* thread_state;
ThreadState* thread_state; // 0x0
// TODO(benvanik): this is getting nasty. Must be here.
uint8_t* virtual_membase;
uint8_t* virtual_membase; // 0x8
// Most frequently used registers first.
uint64_t lr; // Link register
uint64_t ctr; // Count register
uint64_t r[32]; // General purpose registers
double f[32]; // Floating-point registers
vec128_t v[128]; // VMX128 vector registers
uint64_t lr; // 0x10 Link register
uint64_t ctr; // 0x18 Count register
uint64_t r[32]; // 0x20 General purpose registers
double f[32]; // 0x120 Floating-point registers
vec128_t v[128]; // 0x220 VMX128 vector registers
// XER register:
// Split to make it easier to do individual updates.
uint8_t xer_ca;
uint8_t xer_ov;
uint8_t xer_so;
uint8_t xer_ca; // 0xA20
uint8_t xer_ov; // 0xA21
uint8_t xer_so; // 0xA22
// Condition registers:
// These are split to make it easier to do DCE on unused stores.
@ -279,7 +279,7 @@ typedef struct PPCContext_s {
// successfully
uint8_t cr0_so; // Summary Overflow (SO) - copy of XER[SO]
};
} cr0;
} cr0; // 0xA24
union {
uint32_t value;
struct {

View File

@ -984,8 +984,10 @@ int InstrEmit_rlwinmx(PPCHIRBuilder& f, const InstrData& i) {
// m <- MASK(MB+32, ME+32)
// RA <- r & m
Value* v = f.LoadGPR(i.M.RT);
// (x||x)
v = f.Or(f.Shl(v, 32), f.And(v, f.LoadConstantUint64(0xFFFFFFFF)));
v = f.Or(f.Shl(v, 32), f.ZeroExtend(f.Truncate(v, INT32_TYPE), INT64_TYPE));
// TODO(benvanik): optimize srwi
// TODO(benvanik): optimize slwi
// The compiler will generate a bunch of these for the special case of SH=0.

View File

@ -63,8 +63,15 @@ int InstrEmit_lbz(PPCHIRBuilder& f, const InstrData& i) {
// b <- (RA)
// EA <- b + EXTS(D)
// RT <- i56.0 || MEM(EA, 1)
Value* ea = CalculateEA_0_i(f, i.D.RA, XEEXTS16(i.D.DS));
Value* rt = f.ZeroExtend(f.Load(ea, INT8_TYPE), INT64_TYPE);
Value* b;
if (i.D.RA == 0) {
b = f.LoadZeroInt64();
} else {
b = f.LoadGPR(i.D.RA);
}
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
Value* rt = f.ZeroExtend(f.LoadOffset(b, offset, INT8_TYPE), INT64_TYPE);
f.StoreGPR(i.D.RT, rt);
return 0;
}
@ -73,10 +80,11 @@ int InstrEmit_lbzu(PPCHIRBuilder& f, const InstrData& i) {
// EA <- (RA) + EXTS(D)
// RT <- i56.0 || MEM(EA, 1)
// RA <- EA
Value* ea = CalculateEA_i(f, i.D.RA, XEEXTS16(i.D.DS));
Value* rt = f.ZeroExtend(f.Load(ea, INT8_TYPE), INT64_TYPE);
Value* ra = f.LoadGPR(i.D.RA);
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
Value* rt = f.ZeroExtend(f.LoadOffset(ra, offset, INT8_TYPE), INT64_TYPE);
f.StoreGPR(i.D.RT, rt);
StoreEA(f, i.D.RA, ea);
StoreEA(f, i.D.RA, f.Add(ra, offset));
return 0;
}
@ -111,8 +119,16 @@ int InstrEmit_lha(PPCHIRBuilder& f, const InstrData& i) {
// b <- (RA)
// EA <- b + EXTS(D)
// RT <- EXTS(MEM(EA, 2))
Value* ea = CalculateEA_0_i(f, i.D.RA, XEEXTS16(i.D.DS));
Value* rt = f.SignExtend(f.ByteSwap(f.Load(ea, INT16_TYPE)), INT64_TYPE);
Value* b;
if (i.D.RA == 0) {
b = f.LoadZeroInt64();
} else {
b = f.LoadGPR(i.D.RA);
}
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
Value* rt =
f.SignExtend(f.ByteSwap(f.LoadOffset(b, offset, INT16_TYPE)), INT64_TYPE);
f.StoreGPR(i.D.RT, rt);
return 0;
}
@ -121,10 +137,12 @@ int InstrEmit_lhau(PPCHIRBuilder& f, const InstrData& i) {
// EA <- (RA) + EXTS(D)
// RT <- EXTS(MEM(EA, 2))
// RA <- EA
Value* ea = CalculateEA_i(f, i.D.RA, XEEXTS16(i.D.DS));
Value* rt = f.SignExtend(f.ByteSwap(f.Load(ea, INT16_TYPE)), INT64_TYPE);
Value* ra = f.LoadGPR(i.D.RA);
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
Value* rt = f.SignExtend(f.ByteSwap(f.LoadOffset(ra, offset, INT16_TYPE)),
INT64_TYPE);
f.StoreGPR(i.D.RT, rt);
StoreEA(f, i.D.RA, ea);
StoreEA(f, i.D.RA, f.Add(ra, offset));
return 0;
}
@ -159,8 +177,16 @@ int InstrEmit_lhz(PPCHIRBuilder& f, const InstrData& i) {
// b <- (RA)
// EA <- b + EXTS(D)
// RT <- i48.0 || MEM(EA, 2)
Value* ea = CalculateEA_0_i(f, i.D.RA, XEEXTS16(i.D.DS));
Value* rt = f.ZeroExtend(f.ByteSwap(f.Load(ea, INT16_TYPE)), INT64_TYPE);
Value* b;
if (i.D.RA == 0) {
b = f.LoadZeroInt64();
} else {
b = f.LoadGPR(i.D.RA);
}
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
Value* rt =
f.ZeroExtend(f.ByteSwap(f.LoadOffset(b, offset, INT16_TYPE)), INT64_TYPE);
f.StoreGPR(i.D.RT, rt);
return 0;
}
@ -169,10 +195,12 @@ int InstrEmit_lhzu(PPCHIRBuilder& f, const InstrData& i) {
// EA <- (RA) + EXTS(D)
// RT <- i48.0 || MEM(EA, 2)
// RA <- EA
Value* ea = CalculateEA_i(f, i.D.RA, XEEXTS16(i.D.DS));
Value* rt = f.ZeroExtend(f.ByteSwap(f.Load(ea, INT16_TYPE)), INT64_TYPE);
Value* ra = f.LoadGPR(i.D.RA);
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
Value* rt = f.ZeroExtend(f.ByteSwap(f.LoadOffset(ra, offset, INT16_TYPE)),
INT64_TYPE);
f.StoreGPR(i.D.RT, rt);
StoreEA(f, i.D.RA, ea);
StoreEA(f, i.D.RA, f.Add(ra, offset));
return 0;
}
@ -207,8 +235,16 @@ int InstrEmit_lwa(PPCHIRBuilder& f, const InstrData& i) {
// b <- (RA)
// EA <- b + EXTS(D || 00)
// RT <- EXTS(MEM(EA, 4))
Value* ea = CalculateEA_0_i(f, i.DS.RA, XEEXTS16(i.DS.DS << 2));
Value* rt = f.SignExtend(f.ByteSwap(f.Load(ea, INT32_TYPE)), INT64_TYPE);
Value* b;
if (i.DS.RA == 0) {
b = f.LoadZeroInt64();
} else {
b = f.LoadGPR(i.DS.RA);
}
Value* offset = f.LoadConstantInt64(XEEXTS16(i.DS.DS << 2));
Value* rt =
f.SignExtend(f.ByteSwap(f.LoadOffset(b, offset, INT32_TYPE)), INT64_TYPE);
f.StoreGPR(i.DS.RT, rt);
return 0;
}
@ -244,8 +280,16 @@ int InstrEmit_lwz(PPCHIRBuilder& f, const InstrData& i) {
// b <- (RA)
// EA <- b + EXTS(D)
// RT <- i32.0 || MEM(EA, 4)
Value* ea = CalculateEA_0_i(f, i.D.RA, XEEXTS16(i.D.DS));
Value* rt = f.ZeroExtend(f.ByteSwap(f.Load(ea, INT32_TYPE)), INT64_TYPE);
Value* b;
if (i.D.RA == 0) {
b = f.LoadZeroInt64();
} else {
b = f.LoadGPR(i.D.RA);
}
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
Value* rt =
f.ZeroExtend(f.ByteSwap(f.LoadOffset(b, offset, INT32_TYPE)), INT64_TYPE);
f.StoreGPR(i.D.RT, rt);
return 0;
}
@ -254,10 +298,12 @@ int InstrEmit_lwzu(PPCHIRBuilder& f, const InstrData& i) {
// EA <- (RA) + EXTS(D)
// RT <- i32.0 || MEM(EA, 4)
// RA <- EA
Value* ea = CalculateEA_i(f, i.D.RA, XEEXTS16(i.D.DS));
Value* rt = f.ZeroExtend(f.ByteSwap(f.Load(ea, INT32_TYPE)), INT64_TYPE);
Value* ra = f.LoadGPR(i.D.RA);
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
Value* rt = f.ZeroExtend(f.ByteSwap(f.LoadOffset(ra, offset, INT32_TYPE)),
INT64_TYPE);
f.StoreGPR(i.D.RT, rt);
StoreEA(f, i.D.RA, ea);
StoreEA(f, i.D.RA, f.Add(ra, offset));
return 0;
}
@ -292,8 +338,15 @@ int InstrEmit_ld(PPCHIRBuilder& f, const InstrData& i) {
// b <- (RA)
// EA <- b + EXTS(DS || 0b00)
// RT <- MEM(EA, 8)
Value* ea = CalculateEA_0_i(f, i.DS.RA, XEEXTS16(i.DS.DS << 2));
Value* rt = f.ByteSwap(f.Load(ea, INT64_TYPE));
Value* b;
if (i.DS.RA == 0) {
b = f.LoadZeroInt64();
} else {
b = f.LoadGPR(i.DS.RA);
}
Value* offset = f.LoadConstantInt64(XEEXTS16(i.DS.DS << 2));
Value* rt = f.ByteSwap(f.LoadOffset(b, offset, INT64_TYPE));
f.StoreGPR(i.DS.RT, rt);
return 0;
}
@ -342,8 +395,15 @@ int InstrEmit_stb(PPCHIRBuilder& f, const InstrData& i) {
// b <- (RA)
// EA <- b + EXTS(D)
// MEM(EA, 1) <- (RS)[56:63]
Value* ea = CalculateEA_0_i(f, i.D.RA, XEEXTS16(i.D.DS));
f.Store(ea, f.Truncate(f.LoadGPR(i.D.RT), INT8_TYPE));
Value* b;
if (i.D.RA == 0) {
b = f.LoadZeroInt64();
} else {
b = f.LoadGPR(i.D.RA);
}
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
f.StoreOffset(b, offset, f.Truncate(f.LoadGPR(i.D.RT), INT8_TYPE));
return 0;
}
@ -386,8 +446,16 @@ int InstrEmit_sth(PPCHIRBuilder& f, const InstrData& i) {
// b <- (RA)
// EA <- b + EXTS(D)
// MEM(EA, 2) <- (RS)[48:63]
Value* ea = CalculateEA_0_i(f, i.D.RA, XEEXTS16(i.D.DS));
f.Store(ea, f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT), INT16_TYPE)));
Value* b;
if (i.D.RA == 0) {
b = f.LoadZeroInt64();
} else {
b = f.LoadGPR(i.D.RA);
}
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
f.StoreOffset(b, offset,
f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT), INT16_TYPE)));
return 0;
}
@ -430,8 +498,16 @@ int InstrEmit_stw(PPCHIRBuilder& f, const InstrData& i) {
// b <- (RA)
// EA <- b + EXTS(D)
// MEM(EA, 4) <- (RS)[32:63]
Value* ea = CalculateEA_0_i(f, i.D.RA, XEEXTS16(i.D.DS));
f.Store(ea, f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE)));
Value* b;
if (i.D.RA == 0) {
b = f.LoadZeroInt64();
} else {
b = f.LoadGPR(i.D.RA);
}
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
f.StoreOffset(b, offset,
f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE)));
return 0;
}
@ -474,8 +550,15 @@ int InstrEmit_std(PPCHIRBuilder& f, const InstrData& i) {
// b <- (RA)
// EA <- b + EXTS(DS || 0b00)
// MEM(EA, 8) <- (RS)
Value* ea = CalculateEA_0_i(f, i.DS.RA, XEEXTS16(i.DS.DS << 2));
f.Store(ea, f.ByteSwap(f.LoadGPR(i.DS.RT)));
Value* b;
if (i.DS.RA == 0) {
b = f.LoadZeroInt64();
} else {
b = f.LoadGPR(i.DS.RA);
}
Value* offset = f.LoadConstantInt64(XEEXTS16(i.DS.DS << 2));
f.StoreOffset(b, offset, f.ByteSwap(f.LoadGPR(i.DS.RT)));
return 0;
}

View File

@ -33,15 +33,18 @@ bool RawModule::LoadFile(uint32_t base_address, const std::wstring& path) {
// Allocate memory.
// Since we have no real heap just load it wherever.
base_address_ = base_address;
memory_->LookupHeap(base_address_)
->AllocFixed(base_address_, file_length, 0,
kMemoryAllocationReserve | kMemoryAllocationCommit,
kMemoryProtectRead | kMemoryProtectWrite);
auto heap = memory_->LookupHeap(base_address_);
if (!heap ||
!heap->AllocFixed(base_address_, file_length, 0,
kMemoryAllocationReserve | kMemoryAllocationCommit,
kMemoryProtectRead | kMemoryProtectWrite)) {
return false;
}
uint8_t* p = memory_->TranslateVirtual(base_address_);
// Read into memory.
fread(p, file_length, 1, file);
fclose(file);
// Setup debug info.

View File

@ -364,6 +364,22 @@ bool TextureCache::FreeTexture(Texture* texture) {
return true;
}
void TextureCache::WatchCallback(void* context_ptr, void* data_ptr,
uint32_t address) {
auto self = reinterpret_cast<TextureCache*>(context_ptr);
auto touched_texture = reinterpret_cast<Texture*>(data_ptr);
// Clear watch handle first so we don't redundantly
// remove.
assert_not_zero(touched_texture->access_watch_handle);
touched_texture->access_watch_handle = 0;
touched_texture->pending_invalidation = true;
// Add to pending list so Scavenge will clean it up.
self->invalidated_textures_mutex_.lock();
self->invalidated_textures_->push_back(touched_texture);
self->invalidated_textures_mutex_.unlock();
}
TextureCache::Texture* TextureCache::DemandResolveTexture(
const TextureInfo& texture_info) {
auto texture_hash = texture_info.hash();
@ -411,22 +427,7 @@ TextureCache::Texture* TextureCache::DemandResolveTexture(
// Setup an access watch. If this texture is touched, it is destroyed.
texture->access_watch_handle = memory_->AddPhysicalAccessWatch(
texture_info.guest_address, texture_info.input_length,
cpu::MMIOHandler::kWatchWrite,
[](void* context_ptr, void* data_ptr, uint32_t address) {
auto self = reinterpret_cast<TextureCache*>(context_ptr);
auto touched_texture = reinterpret_cast<Texture*>(data_ptr);
// Clear watch handle first so we don't redundantly
// remove.
assert_not_zero(touched_texture->access_watch_handle);
touched_texture->access_watch_handle = 0;
touched_texture->pending_invalidation = true;
// Add to pending list so Scavenge will clean it up.
self->invalidated_textures_mutex_.lock();
self->invalidated_textures_->push_back(touched_texture);
self->invalidated_textures_mutex_.unlock();
},
this, texture);
cpu::MMIOHandler::kWatchWrite, &WatchCallback, this, texture);
textures_[texture_hash] = texture;
return texture;
@ -486,21 +487,7 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info,
// guest.
texture->access_watch_handle = memory_->AddPhysicalAccessWatch(
texture_info.guest_address, texture_info.input_length,
cpu::MMIOHandler::kWatchWrite,
[](void* context_ptr, void* data_ptr, uint32_t address) {
auto self = reinterpret_cast<TextureCache*>(context_ptr);
auto touched_texture = reinterpret_cast<Texture*>(data_ptr);
// Clear watch handle first so we don't redundantly
// remove.
assert_not_zero(touched_texture->access_watch_handle);
touched_texture->access_watch_handle = 0;
touched_texture->pending_invalidation = true;
// Add to pending list so Scavenge will clean it up.
self->invalidated_textures_mutex_.lock();
self->invalidated_textures_->push_back(touched_texture);
self->invalidated_textures_mutex_.unlock();
},
this, texture);
cpu::MMIOHandler::kWatchWrite, &WatchCallback, this, texture);
if (!UploadTexture(command_buffer, completion_fence, texture, texture_info)) {
FreeTexture(texture);
@ -1306,6 +1293,7 @@ void TextureCache::HashTextureBindings(
// We've covered this binding.
continue;
}
fetch_mask |= fetch_bit;
auto& regs = *register_file_;
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6;
@ -1329,8 +1317,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet(
HashTextureBindings(&hash_state, fetch_mask, vertex_bindings);
HashTextureBindings(&hash_state, fetch_mask, pixel_bindings);
uint64_t hash = XXH64_digest(&hash_state);
for (auto it = texture_bindings_.find(hash); it != texture_bindings_.end();
++it) {
for (auto it = texture_sets_.find(hash); it != texture_sets_.end(); ++it) {
// TODO(DrChat): We need to compare the bindings and ensure they're equal.
return it->second;
}
@ -1378,7 +1365,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet(
update_set_info->image_writes, 0, nullptr);
}
texture_bindings_[hash] = descriptor_set;
texture_sets_[hash] = descriptor_set;
return descriptor_set;
}
@ -1515,7 +1502,7 @@ void TextureCache::Scavenge() {
// Free unused descriptor sets
// TODO(DrChat): These sets could persist across frames, we just need a smart
// way to detect if they're unused and free them.
texture_bindings_.clear();
texture_sets_.clear();
descriptor_pool_->Scavenge();
staging_buffer_.Scavenge();

View File

@ -134,6 +134,9 @@ class TextureCache {
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT);
bool FreeTexture(Texture* texture);
static void WatchCallback(void* context_ptr, void* data_ptr,
uint32_t address);
// Demands a texture. If command_buffer is null and the texture hasn't been
// uploaded to graphics memory already, we will return null and bail.
Texture* Demand(const TextureInfo& texture_info,
@ -188,7 +191,7 @@ class TextureCache {
std::unique_ptr<xe::ui::vulkan::CommandBufferPool> wb_command_pool_ = nullptr;
std::unique_ptr<xe::ui::vulkan::DescriptorPool> descriptor_pool_ = nullptr;
std::unordered_map<uint64_t, VkDescriptorSet> texture_bindings_;
std::unordered_map<uint64_t, VkDescriptorSet> texture_sets_;
VkDescriptorSetLayout texture_descriptor_set_layout_ = nullptr;
VmaAllocator mem_allocator_ = nullptr;

View File

@ -150,8 +150,8 @@ void VulkanContext::BeginSwap() {
// If it has been, we'll need to reinitialize the swap chain before we
// start touching it.
if (target_window_) {
if (target_window_->width() != swap_chain_->surface_width() ||
target_window_->height() != swap_chain_->surface_height()) {
if (target_window_->scaled_width() != swap_chain_->surface_width() ||
target_window_->scaled_height() != swap_chain_->surface_height()) {
// Resized!
swap_chain_->Reinitialize();
}