Merge branch 'master' into vk_vfetch
This commit is contained in:
commit
310606d0f6
|
@ -42,15 +42,16 @@ git:
|
|||
submodules: false
|
||||
|
||||
before_script:
|
||||
- export LIBVULKAN_VERSION=1.0.61.1
|
||||
- export CXX=$CXX_COMPILER
|
||||
- export CC=$C_COMPILER
|
||||
# Dump useful info.
|
||||
- $CXX --version
|
||||
- python3 --version
|
||||
# Add Vulkan dependencies
|
||||
- travis_retry wget http://mirrors.kernel.org/ubuntu/pool/universe/v/vulkan/libvulkan1_1.0.42.0+dfsg1-1ubuntu1~16.04.1_amd64.deb
|
||||
- travis_retry wget http://mirrors.kernel.org/ubuntu/pool/universe/v/vulkan/libvulkan-dev_1.0.42.0+dfsg1-1ubuntu1~16.04.1_amd64.deb
|
||||
- if [[ $BUILD == true ]]; then sudo dpkg -i libvulkan1_1.0.42.0+dfsg1-1ubuntu1~16.04.1_amd64.deb libvulkan-dev_1.0.42.0+dfsg1-1ubuntu1~16.04.1_amd64.deb; fi
|
||||
- travis_retry wget http://mirrors.kernel.org/ubuntu/pool/universe/v/vulkan/libvulkan1_$LIBVULKAN_VERSION+dfsg1-1ubuntu1~16.04.1_amd64.deb
|
||||
- travis_retry wget http://mirrors.kernel.org/ubuntu/pool/universe/v/vulkan/libvulkan-dev_$LIBVULKAN_VERSION+dfsg1-1ubuntu1~16.04.1_amd64.deb
|
||||
- if [[ $BUILD == true ]]; then sudo dpkg -i libvulkan1_$LIBVULKAN_VERSION+dfsg1-1ubuntu1~16.04.1_amd64.deb libvulkan-dev_$LIBVULKAN_VERSION+dfsg1-1ubuntu1~16.04.1_amd64.deb; fi
|
||||
# Prepare environment (pull dependencies, build tools).
|
||||
- travis_retry ./xenia-build setup
|
||||
|
||||
|
|
|
@ -139,6 +139,7 @@ filter("platforms:Windows")
|
|||
"/wd4127", -- 'conditional expression is constant'.
|
||||
"/wd4324", -- 'structure was padded due to alignment specifier'.
|
||||
"/wd4189", -- 'local variable is initialized but not referenced'.
|
||||
"/utf-8", -- 'build correctly on systems with non-Latin codepages'.
|
||||
})
|
||||
flags({
|
||||
"NoMinimalRebuild", -- Required for /MP above.
|
||||
|
|
|
@ -24,8 +24,20 @@ void copy_128_aligned(void* dest, const void* src, size_t count) {
|
|||
}
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
void copy_and_swap_16_aligned(void* dest, const void* src, size_t count) {
|
||||
return copy_and_swap_16_unaligned(dest, src, count);
|
||||
void copy_and_swap_16_aligned(void* dest_ptr, const void* src_ptr,
|
||||
size_t count) {
|
||||
auto dest = reinterpret_cast<uint16_t*>(dest_ptr);
|
||||
auto src = reinterpret_cast<const uint16_t*>(src_ptr);
|
||||
size_t i;
|
||||
for (i = 0; i + 8 <= count; i += 8) {
|
||||
__m128i input = _mm_load_si128(reinterpret_cast<const __m128i*>(&src[i]));
|
||||
__m128i output =
|
||||
_mm_or_si128(_mm_slli_epi16(input, 8), _mm_srli_epi16(input, 8));
|
||||
_mm_store_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
|
||||
}
|
||||
for (; i < count; ++i) { // handle residual elements
|
||||
dest[i] = byte_swap(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void copy_and_swap_16_unaligned(void* dest_ptr, const void* src_ptr,
|
||||
|
@ -44,8 +56,31 @@ void copy_and_swap_16_unaligned(void* dest_ptr, const void* src_ptr,
|
|||
}
|
||||
}
|
||||
|
||||
void copy_and_swap_32_aligned(void* dest, const void* src, size_t count) {
|
||||
return copy_and_swap_32_unaligned(dest, src, count);
|
||||
void copy_and_swap_32_aligned(void* dest_ptr, const void* src_ptr,
|
||||
size_t count) {
|
||||
auto dest = reinterpret_cast<uint32_t*>(dest_ptr);
|
||||
auto src = reinterpret_cast<const uint32_t*>(src_ptr);
|
||||
__m128i byte2mask = _mm_set1_epi32(0x00FF0000);
|
||||
__m128i byte3mask = _mm_set1_epi32(0x0000FF00);
|
||||
size_t i;
|
||||
for (i = 0; i + 4 <= count; i += 4) {
|
||||
__m128i input = _mm_load_si128(reinterpret_cast<const __m128i*>(&src[i]));
|
||||
// Do the four shifts.
|
||||
__m128i byte1 = _mm_slli_epi32(input, 24);
|
||||
__m128i byte2 = _mm_slli_epi32(input, 8);
|
||||
__m128i byte3 = _mm_srli_epi32(input, 8);
|
||||
__m128i byte4 = _mm_srli_epi32(input, 24);
|
||||
// OR bytes together.
|
||||
__m128i output = _mm_or_si128(byte1, byte4);
|
||||
byte2 = _mm_and_si128(byte2, byte2mask);
|
||||
output = _mm_or_si128(output, byte2);
|
||||
byte3 = _mm_and_si128(byte3, byte3mask);
|
||||
output = _mm_or_si128(output, byte3);
|
||||
_mm_store_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
|
||||
}
|
||||
for (; i < count; ++i) { // handle residual elements
|
||||
dest[i] = byte_swap(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void copy_and_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
|
||||
|
@ -75,8 +110,33 @@ void copy_and_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
|
|||
}
|
||||
}
|
||||
|
||||
void copy_and_swap_64_aligned(void* dest, const void* src, size_t count) {
|
||||
return copy_and_swap_64_unaligned(dest, src, count);
|
||||
void copy_and_swap_64_aligned(void* dest_ptr, const void* src_ptr,
|
||||
size_t count) {
|
||||
auto dest = reinterpret_cast<uint64_t*>(dest_ptr);
|
||||
auto src = reinterpret_cast<const uint64_t*>(src_ptr);
|
||||
__m128i byte2mask = _mm_set1_epi32(0x00FF0000);
|
||||
__m128i byte3mask = _mm_set1_epi32(0x0000FF00);
|
||||
size_t i;
|
||||
for (i = 0; i + 2 <= count; i += 2) {
|
||||
__m128i input = _mm_load_si128(reinterpret_cast<const __m128i*>(&src[i]));
|
||||
// Do the four shifts.
|
||||
__m128i byte1 = _mm_slli_epi32(input, 24);
|
||||
__m128i byte2 = _mm_slli_epi32(input, 8);
|
||||
__m128i byte3 = _mm_srli_epi32(input, 8);
|
||||
__m128i byte4 = _mm_srli_epi32(input, 24);
|
||||
// OR bytes together.
|
||||
__m128i output = _mm_or_si128(byte1, byte4);
|
||||
byte2 = _mm_and_si128(byte2, byte2mask);
|
||||
output = _mm_or_si128(output, byte2);
|
||||
byte3 = _mm_and_si128(byte3, byte3mask);
|
||||
output = _mm_or_si128(output, byte3);
|
||||
// Reorder the two words.
|
||||
output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
_mm_store_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
|
||||
}
|
||||
for (; i < count; ++i) { // handle residual elements
|
||||
dest[i] = byte_swap(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void copy_and_swap_64_unaligned(void* dest_ptr, const void* src_ptr,
|
||||
|
@ -108,8 +168,20 @@ void copy_and_swap_64_unaligned(void* dest_ptr, const void* src_ptr,
|
|||
}
|
||||
}
|
||||
|
||||
void copy_and_swap_16_in_32_aligned(void* dest, const void* src, size_t count) {
|
||||
return copy_and_swap_16_in_32_unaligned(dest, src, count);
|
||||
void copy_and_swap_16_in_32_aligned(void* dest_ptr, const void* src_ptr,
|
||||
size_t count) {
|
||||
auto dest = reinterpret_cast<uint64_t*>(dest_ptr);
|
||||
auto src = reinterpret_cast<const uint64_t*>(src_ptr);
|
||||
size_t i;
|
||||
for (i = 0; i + 4 <= count; i += 4) {
|
||||
__m128i input = _mm_load_si128(reinterpret_cast<const __m128i*>(&src[i]));
|
||||
__m128i output =
|
||||
_mm_or_si128(_mm_slli_epi32(input, 16), _mm_srli_epi32(input, 16));
|
||||
_mm_store_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
|
||||
}
|
||||
for (; i < count; ++i) { // handle residual elements
|
||||
dest[i] = (src[i] >> 16) | (src[i] << 16);
|
||||
}
|
||||
}
|
||||
|
||||
void copy_and_swap_16_in_32_unaligned(void* dest_ptr, const void* src_ptr,
|
||||
|
|
|
@ -1663,7 +1663,6 @@ struct LOAD_VECTOR_SHL_I8
|
|||
e.shl(e.dx, 4);
|
||||
e.mov(e.rax, (uintptr_t)lvsl_table);
|
||||
e.vmovaps(i.dest, e.ptr[e.rax + e.rdx]);
|
||||
e.ReloadMembase();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -1705,7 +1704,6 @@ struct LOAD_VECTOR_SHR_I8
|
|||
e.shl(e.dx, 4);
|
||||
e.mov(e.rax, (uintptr_t)lvsr_table);
|
||||
e.vmovaps(i.dest, e.ptr[e.rax + e.rdx]);
|
||||
e.ReloadMembase();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -2129,6 +2127,176 @@ struct STORE_MMIO_I32
|
|||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_STORE_MMIO, STORE_MMIO_I32);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_LOAD_OFFSET
|
||||
// ============================================================================
|
||||
template <typename T>
|
||||
RegExp ComputeMemoryAddressOffset(X64Emitter& e, const T& guest,
|
||||
const T& offset) {
|
||||
int32_t offset_const = static_cast<int32_t>(offset.constant());
|
||||
|
||||
if (guest.is_constant) {
|
||||
uint32_t address = static_cast<uint32_t>(guest.constant());
|
||||
address += static_cast<int32_t>(offset.constant());
|
||||
if (address < 0x80000000) {
|
||||
return e.GetMembaseReg() + address;
|
||||
} else {
|
||||
e.mov(e.eax, address);
|
||||
return e.GetMembaseReg() + e.rax;
|
||||
}
|
||||
} else {
|
||||
// Clear the top 32 bits, as they are likely garbage.
|
||||
// TODO(benvanik): find a way to avoid doing this.
|
||||
e.mov(e.eax, guest.reg().cvt32());
|
||||
return e.GetMembaseReg() + e.rax + offset_const;
|
||||
}
|
||||
}
|
||||
|
||||
struct LOAD_OFFSET_I8
|
||||
: Sequence<LOAD_OFFSET_I8, I<OPCODE_LOAD_OFFSET, I8Op, I64Op, I64Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
|
||||
e.mov(i.dest, e.byte[addr]);
|
||||
}
|
||||
};
|
||||
|
||||
struct LOAD_OFFSET_I16
|
||||
: Sequence<LOAD_OFFSET_I16, I<OPCODE_LOAD_OFFSET, I16Op, I64Op, I64Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
|
||||
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
|
||||
if (e.IsFeatureEnabled(kX64EmitMovbe)) {
|
||||
e.movbe(i.dest, e.word[addr]);
|
||||
} else {
|
||||
e.mov(i.dest, e.word[addr]);
|
||||
e.ror(i.dest, 8);
|
||||
}
|
||||
} else {
|
||||
e.mov(i.dest, e.word[addr]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct LOAD_OFFSET_I32
|
||||
: Sequence<LOAD_OFFSET_I32, I<OPCODE_LOAD_OFFSET, I32Op, I64Op, I64Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
|
||||
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
|
||||
if (e.IsFeatureEnabled(kX64EmitMovbe)) {
|
||||
e.movbe(i.dest, e.dword[addr]);
|
||||
} else {
|
||||
e.mov(i.dest, e.dword[addr]);
|
||||
e.bswap(i.dest);
|
||||
}
|
||||
} else {
|
||||
e.mov(i.dest, e.dword[addr]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct LOAD_OFFSET_I64
|
||||
: Sequence<LOAD_OFFSET_I64, I<OPCODE_LOAD_OFFSET, I64Op, I64Op, I64Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
|
||||
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
|
||||
if (e.IsFeatureEnabled(kX64EmitMovbe)) {
|
||||
e.movbe(i.dest, e.qword[addr]);
|
||||
} else {
|
||||
e.mov(i.dest, e.qword[addr]);
|
||||
e.bswap(i.dest);
|
||||
}
|
||||
} else {
|
||||
e.mov(i.dest, e.qword[addr]);
|
||||
}
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_LOAD_OFFSET, LOAD_OFFSET_I8, LOAD_OFFSET_I16,
|
||||
LOAD_OFFSET_I32, LOAD_OFFSET_I64);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_STORE_OFFSET
|
||||
// ============================================================================
|
||||
struct STORE_OFFSET_I8
|
||||
: Sequence<STORE_OFFSET_I8,
|
||||
I<OPCODE_STORE_OFFSET, VoidOp, I64Op, I64Op, I8Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
|
||||
if (i.src3.is_constant) {
|
||||
e.mov(e.byte[addr], i.src3.constant());
|
||||
} else {
|
||||
e.mov(e.byte[addr], i.src3);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct STORE_OFFSET_I16
|
||||
: Sequence<STORE_OFFSET_I16,
|
||||
I<OPCODE_STORE_OFFSET, VoidOp, I64Op, I64Op, I16Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
|
||||
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
|
||||
assert_false(i.src3.is_constant);
|
||||
if (e.IsFeatureEnabled(kX64EmitMovbe)) {
|
||||
e.movbe(e.word[addr], i.src3);
|
||||
} else {
|
||||
assert_always("not implemented");
|
||||
}
|
||||
} else {
|
||||
if (i.src3.is_constant) {
|
||||
e.mov(e.word[addr], i.src3.constant());
|
||||
} else {
|
||||
e.mov(e.word[addr], i.src3);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct STORE_OFFSET_I32
|
||||
: Sequence<STORE_OFFSET_I32,
|
||||
I<OPCODE_STORE_OFFSET, VoidOp, I64Op, I64Op, I32Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
|
||||
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
|
||||
assert_false(i.src3.is_constant);
|
||||
if (e.IsFeatureEnabled(kX64EmitMovbe)) {
|
||||
e.movbe(e.dword[addr], i.src3);
|
||||
} else {
|
||||
assert_always("not implemented");
|
||||
}
|
||||
} else {
|
||||
if (i.src3.is_constant) {
|
||||
e.mov(e.dword[addr], i.src3.constant());
|
||||
} else {
|
||||
e.mov(e.dword[addr], i.src3);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct STORE_OFFSET_I64
|
||||
: Sequence<STORE_OFFSET_I64,
|
||||
I<OPCODE_STORE_OFFSET, VoidOp, I64Op, I64Op, I64Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
|
||||
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
|
||||
assert_false(i.src3.is_constant);
|
||||
if (e.IsFeatureEnabled(kX64EmitMovbe)) {
|
||||
e.movbe(e.qword[addr], i.src3);
|
||||
} else {
|
||||
assert_always("not implemented");
|
||||
}
|
||||
} else {
|
||||
if (i.src3.is_constant) {
|
||||
e.MovMem64(addr, i.src3.constant());
|
||||
} else {
|
||||
e.mov(e.qword[addr], i.src3);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_STORE_OFFSET, STORE_OFFSET_I8, STORE_OFFSET_I16,
|
||||
STORE_OFFSET_I32, STORE_OFFSET_I64);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_LOAD
|
||||
// ============================================================================
|
||||
|
@ -2139,8 +2307,13 @@ RegExp ComputeMemoryAddress(X64Emitter& e, const T& guest) {
|
|||
// TODO(benvanik): figure out how to do this without a temp.
|
||||
// Since the constant is often 0x8... if we tried to use that as a
|
||||
// displacement it would be sign extended and mess things up.
|
||||
e.mov(e.eax, static_cast<uint32_t>(guest.constant()));
|
||||
return e.GetMembaseReg() + e.rax;
|
||||
uint32_t address = static_cast<uint32_t>(guest.constant());
|
||||
if (address < 0x80000000) {
|
||||
return e.GetMembaseReg() + address;
|
||||
} else {
|
||||
e.mov(e.eax, address);
|
||||
return e.GetMembaseReg() + e.rax;
|
||||
}
|
||||
} else {
|
||||
// Clear the top 32 bits, as they are likely garbage.
|
||||
// TODO(benvanik): find a way to avoid doing this.
|
||||
|
@ -2779,13 +2952,13 @@ struct SELECT_F32
|
|||
|
||||
Xmm src2 = i.src2.is_constant ? e.xmm2 : i.src2;
|
||||
if (i.src2.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm2, i.src2.constant());
|
||||
e.LoadConstantXmm(src2, i.src2.constant());
|
||||
}
|
||||
e.vpandn(e.xmm1, e.xmm0, src2);
|
||||
|
||||
Xmm src3 = i.src3.is_constant ? e.xmm2 : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm2, i.src3.constant());
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
}
|
||||
e.vpand(i.dest, e.xmm0, src3);
|
||||
e.vpor(i.dest, e.xmm1);
|
||||
|
@ -2802,13 +2975,13 @@ struct SELECT_F64
|
|||
|
||||
Xmm src2 = i.src2.is_constant ? e.xmm2 : i.src2;
|
||||
if (i.src2.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm2, i.src2.constant());
|
||||
e.LoadConstantXmm(src2, i.src2.constant());
|
||||
}
|
||||
e.vpandn(e.xmm1, e.xmm0, src2);
|
||||
|
||||
Xmm src3 = i.src3.is_constant ? e.xmm2 : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm2, i.src3.constant());
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
}
|
||||
e.vpand(i.dest, e.xmm0, src3);
|
||||
e.vpor(i.dest, e.xmm1);
|
||||
|
@ -2827,13 +3000,13 @@ struct SELECT_V128_I8
|
|||
|
||||
Xmm src2 = i.src2.is_constant ? e.xmm2 : i.src2;
|
||||
if (i.src2.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm2, i.src2.constant());
|
||||
e.LoadConstantXmm(src2, i.src2.constant());
|
||||
}
|
||||
e.vpandn(e.xmm1, e.xmm0, src2);
|
||||
|
||||
Xmm src3 = i.src3.is_constant ? e.xmm2 : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm2, i.src3.constant());
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
}
|
||||
e.vpand(i.dest, e.xmm0, src3);
|
||||
e.vpor(i.dest, e.xmm1);
|
||||
|
@ -2845,18 +3018,18 @@ struct SELECT_V128_V128
|
|||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
Xmm src1 = i.src1.is_constant ? e.xmm1 : i.src1;
|
||||
if (i.src1.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm1, i.src1.constant());
|
||||
e.LoadConstantXmm(src1, i.src1.constant());
|
||||
}
|
||||
|
||||
Xmm src2 = i.src2.is_constant ? e.xmm0 : i.src2;
|
||||
if (i.src2.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm0, i.src2.constant());
|
||||
e.LoadConstantXmm(src2, i.src2.constant());
|
||||
}
|
||||
e.vpandn(e.xmm0, src1, src2);
|
||||
|
||||
Xmm src3 = i.src3.is_constant ? i.dest : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(i.dest, i.src3.constant());
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
}
|
||||
e.vpand(i.dest, src1, src3);
|
||||
|
||||
|
@ -3863,8 +4036,6 @@ struct MUL_I8 : Sequence<MUL_I8, I<OPCODE_MUL, I8Op, I8Op, I8Op>> {
|
|||
e.mov(i.dest, e.al);
|
||||
}
|
||||
}
|
||||
|
||||
e.ReloadMembase();
|
||||
}
|
||||
};
|
||||
struct MUL_I16 : Sequence<MUL_I16, I<OPCODE_MUL, I16Op, I16Op, I16Op>> {
|
||||
|
@ -3906,8 +4077,6 @@ struct MUL_I16 : Sequence<MUL_I16, I<OPCODE_MUL, I16Op, I16Op, I16Op>> {
|
|||
e.movzx(i.dest, e.ax);
|
||||
}
|
||||
}
|
||||
|
||||
e.ReloadMembase();
|
||||
}
|
||||
};
|
||||
struct MUL_I32 : Sequence<MUL_I32, I<OPCODE_MUL, I32Op, I32Op, I32Op>> {
|
||||
|
@ -3950,8 +4119,6 @@ struct MUL_I32 : Sequence<MUL_I32, I<OPCODE_MUL, I32Op, I32Op, I32Op>> {
|
|||
e.mov(i.dest, e.eax);
|
||||
}
|
||||
}
|
||||
|
||||
e.ReloadMembase();
|
||||
}
|
||||
};
|
||||
struct MUL_I64 : Sequence<MUL_I64, I<OPCODE_MUL, I64Op, I64Op, I64Op>> {
|
||||
|
@ -3993,8 +4160,6 @@ struct MUL_I64 : Sequence<MUL_I64, I<OPCODE_MUL, I64Op, I64Op, I64Op>> {
|
|||
e.mov(i.dest, e.rax);
|
||||
}
|
||||
}
|
||||
|
||||
e.ReloadMembase();
|
||||
}
|
||||
};
|
||||
struct MUL_F32 : Sequence<MUL_F32, I<OPCODE_MUL, F32Op, F32Op, F32Op>> {
|
||||
|
@ -4072,7 +4237,6 @@ struct MUL_HI_I8 : Sequence<MUL_HI_I8, I<OPCODE_MUL_HI, I8Op, I8Op, I8Op>> {
|
|||
}
|
||||
e.mov(i.dest, e.ah);
|
||||
}
|
||||
e.ReloadMembase();
|
||||
}
|
||||
};
|
||||
struct MUL_HI_I16
|
||||
|
@ -4116,7 +4280,6 @@ struct MUL_HI_I16
|
|||
}
|
||||
e.mov(i.dest, e.dx);
|
||||
}
|
||||
e.ReloadMembase();
|
||||
}
|
||||
};
|
||||
struct MUL_HI_I32
|
||||
|
@ -4165,7 +4328,6 @@ struct MUL_HI_I32
|
|||
}
|
||||
e.mov(i.dest, e.edx);
|
||||
}
|
||||
e.ReloadMembase();
|
||||
}
|
||||
};
|
||||
struct MUL_HI_I64
|
||||
|
@ -4214,7 +4376,6 @@ struct MUL_HI_I64
|
|||
}
|
||||
e.mov(i.dest, e.rdx);
|
||||
}
|
||||
e.ReloadMembase();
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_MUL_HI, MUL_HI_I8, MUL_HI_I16, MUL_HI_I32,
|
||||
|
@ -4230,11 +4391,8 @@ struct DIV_I8 : Sequence<DIV_I8, I<OPCODE_DIV, I8Op, I8Op, I8Op>> {
|
|||
Xbyak::Label skip;
|
||||
e.inLocalLabel();
|
||||
|
||||
// NOTE: RDX clobbered.
|
||||
bool clobbered_rcx = false;
|
||||
if (i.src2.is_constant) {
|
||||
assert_true(!i.src1.is_constant);
|
||||
clobbered_rcx = true;
|
||||
e.mov(e.cl, i.src2.constant());
|
||||
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
||||
e.movzx(e.ax, i.src1);
|
||||
|
@ -4268,10 +4426,6 @@ struct DIV_I8 : Sequence<DIV_I8, I<OPCODE_DIV, I8Op, I8Op, I8Op>> {
|
|||
e.L(skip);
|
||||
e.outLocalLabel();
|
||||
e.mov(i.dest, e.al);
|
||||
if (clobbered_rcx) {
|
||||
e.ReloadContext();
|
||||
}
|
||||
e.ReloadMembase();
|
||||
}
|
||||
};
|
||||
struct DIV_I16 : Sequence<DIV_I16, I<OPCODE_DIV, I16Op, I16Op, I16Op>> {
|
||||
|
@ -4279,11 +4433,8 @@ struct DIV_I16 : Sequence<DIV_I16, I<OPCODE_DIV, I16Op, I16Op, I16Op>> {
|
|||
Xbyak::Label skip;
|
||||
e.inLocalLabel();
|
||||
|
||||
// NOTE: RDX clobbered.
|
||||
bool clobbered_rcx = false;
|
||||
if (i.src2.is_constant) {
|
||||
assert_true(!i.src1.is_constant);
|
||||
clobbered_rcx = true;
|
||||
e.mov(e.cx, i.src2.constant());
|
||||
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
||||
e.mov(e.ax, i.src1);
|
||||
|
@ -4323,10 +4474,6 @@ struct DIV_I16 : Sequence<DIV_I16, I<OPCODE_DIV, I16Op, I16Op, I16Op>> {
|
|||
e.L(skip);
|
||||
e.outLocalLabel();
|
||||
e.mov(i.dest, e.ax);
|
||||
if (clobbered_rcx) {
|
||||
e.ReloadContext();
|
||||
}
|
||||
e.ReloadMembase();
|
||||
}
|
||||
};
|
||||
struct DIV_I32 : Sequence<DIV_I32, I<OPCODE_DIV, I32Op, I32Op, I32Op>> {
|
||||
|
@ -4334,11 +4481,8 @@ struct DIV_I32 : Sequence<DIV_I32, I<OPCODE_DIV, I32Op, I32Op, I32Op>> {
|
|||
Xbyak::Label skip;
|
||||
e.inLocalLabel();
|
||||
|
||||
// NOTE: RDX clobbered.
|
||||
bool clobbered_rcx = false;
|
||||
if (i.src2.is_constant) {
|
||||
assert_true(!i.src1.is_constant);
|
||||
clobbered_rcx = true;
|
||||
e.mov(e.ecx, i.src2.constant());
|
||||
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
||||
e.mov(e.eax, i.src1);
|
||||
|
@ -4378,10 +4522,6 @@ struct DIV_I32 : Sequence<DIV_I32, I<OPCODE_DIV, I32Op, I32Op, I32Op>> {
|
|||
e.L(skip);
|
||||
e.outLocalLabel();
|
||||
e.mov(i.dest, e.eax);
|
||||
if (clobbered_rcx) {
|
||||
e.ReloadContext();
|
||||
}
|
||||
e.ReloadMembase();
|
||||
}
|
||||
};
|
||||
struct DIV_I64 : Sequence<DIV_I64, I<OPCODE_DIV, I64Op, I64Op, I64Op>> {
|
||||
|
@ -4389,11 +4529,8 @@ struct DIV_I64 : Sequence<DIV_I64, I<OPCODE_DIV, I64Op, I64Op, I64Op>> {
|
|||
Xbyak::Label skip;
|
||||
e.inLocalLabel();
|
||||
|
||||
// NOTE: RDX clobbered.
|
||||
bool clobbered_rcx = false;
|
||||
if (i.src2.is_constant) {
|
||||
assert_true(!i.src1.is_constant);
|
||||
clobbered_rcx = true;
|
||||
e.mov(e.rcx, i.src2.constant());
|
||||
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
||||
e.mov(e.rax, i.src1);
|
||||
|
@ -4433,10 +4570,6 @@ struct DIV_I64 : Sequence<DIV_I64, I<OPCODE_DIV, I64Op, I64Op, I64Op>> {
|
|||
e.L(skip);
|
||||
e.outLocalLabel();
|
||||
e.mov(i.dest, e.rax);
|
||||
if (clobbered_rcx) {
|
||||
e.ReloadContext();
|
||||
}
|
||||
e.ReloadMembase();
|
||||
}
|
||||
};
|
||||
struct DIV_F32 : Sequence<DIV_F32, I<OPCODE_DIV, F32Op, F32Op, F32Op>> {
|
||||
|
@ -4493,31 +4626,31 @@ struct MUL_ADD_F32
|
|||
|
||||
// FMA extension
|
||||
if (e.IsFeatureEnabled(kX64EmitFMA)) {
|
||||
EmitCommutativeBinaryXmmOp(
|
||||
e, i,
|
||||
[&i](X64Emitter& e, const Xmm& dest, const Xmm& src1,
|
||||
const Xmm& src2) {
|
||||
Xmm src3 = i.src3.is_constant ? e.xmm1 : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm1, i.src3.constant());
|
||||
}
|
||||
if (i.dest == src1) {
|
||||
e.vfmadd213ss(i.dest, src2, src3);
|
||||
} else if (i.dest == src2) {
|
||||
e.vfmadd213ss(i.dest, src1, src3);
|
||||
} else if (i.dest == i.src3) {
|
||||
e.vfmadd231ss(i.dest, src1, src2);
|
||||
} else {
|
||||
// Dest not equal to anything
|
||||
e.vmovss(i.dest, src1);
|
||||
e.vfmadd213ss(i.dest, src2, src3);
|
||||
}
|
||||
});
|
||||
EmitCommutativeBinaryXmmOp(e, i,
|
||||
[&i](X64Emitter& e, const Xmm& dest,
|
||||
const Xmm& src1, const Xmm& src2) {
|
||||
Xmm src3 =
|
||||
i.src3.is_constant ? e.xmm1 : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
}
|
||||
if (i.dest == src1) {
|
||||
e.vfmadd213ss(i.dest, src2, src3);
|
||||
} else if (i.dest == src2) {
|
||||
e.vfmadd213ss(i.dest, src1, src3);
|
||||
} else if (i.dest == i.src3) {
|
||||
e.vfmadd231ss(i.dest, src1, src2);
|
||||
} else {
|
||||
// Dest not equal to anything
|
||||
e.vmovss(i.dest, src1);
|
||||
e.vfmadd213ss(i.dest, src2, src3);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
Xmm src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm1, i.src3.constant());
|
||||
src3 = e.xmm1;
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
} else {
|
||||
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
|
||||
src3 = i.src3;
|
||||
|
@ -4552,31 +4685,31 @@ struct MUL_ADD_F64
|
|||
|
||||
// FMA extension
|
||||
if (e.IsFeatureEnabled(kX64EmitFMA)) {
|
||||
EmitCommutativeBinaryXmmOp(
|
||||
e, i,
|
||||
[&i](X64Emitter& e, const Xmm& dest, const Xmm& src1,
|
||||
const Xmm& src2) {
|
||||
Xmm src3 = i.src3.is_constant ? e.xmm1 : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm1, i.src3.constant());
|
||||
}
|
||||
if (i.dest == src1) {
|
||||
e.vfmadd213sd(i.dest, src2, src3);
|
||||
} else if (i.dest == src2) {
|
||||
e.vfmadd213sd(i.dest, src1, src3);
|
||||
} else if (i.dest == i.src3) {
|
||||
e.vfmadd231sd(i.dest, src1, src2);
|
||||
} else {
|
||||
// Dest not equal to anything
|
||||
e.vmovsd(i.dest, src1);
|
||||
e.vfmadd213sd(i.dest, src2, src3);
|
||||
}
|
||||
});
|
||||
EmitCommutativeBinaryXmmOp(e, i,
|
||||
[&i](X64Emitter& e, const Xmm& dest,
|
||||
const Xmm& src1, const Xmm& src2) {
|
||||
Xmm src3 =
|
||||
i.src3.is_constant ? e.xmm1 : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
}
|
||||
if (i.dest == src1) {
|
||||
e.vfmadd213sd(i.dest, src2, src3);
|
||||
} else if (i.dest == src2) {
|
||||
e.vfmadd213sd(i.dest, src1, src3);
|
||||
} else if (i.dest == i.src3) {
|
||||
e.vfmadd231sd(i.dest, src1, src2);
|
||||
} else {
|
||||
// Dest not equal to anything
|
||||
e.vmovsd(i.dest, src1);
|
||||
e.vfmadd213sd(i.dest, src2, src3);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
Xmm src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm1, i.src3.constant());
|
||||
src3 = e.xmm1;
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
} else {
|
||||
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
|
||||
src3 = i.src3;
|
||||
|
@ -4617,31 +4750,31 @@ struct MUL_ADD_V128
|
|||
// than vmul+vadd and it'd be nice to know why. Until we know, it's
|
||||
// disabled so tests pass.
|
||||
if (false && e.IsFeatureEnabled(kX64EmitFMA)) {
|
||||
EmitCommutativeBinaryXmmOp(
|
||||
e, i,
|
||||
[&i](X64Emitter& e, const Xmm& dest, const Xmm& src1,
|
||||
const Xmm& src2) {
|
||||
Xmm src3 = i.src3.is_constant ? e.xmm1 : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm1, i.src3.constant());
|
||||
}
|
||||
if (i.dest == src1) {
|
||||
e.vfmadd213ps(i.dest, src2, src3);
|
||||
} else if (i.dest == src2) {
|
||||
e.vfmadd213ps(i.dest, src1, src3);
|
||||
} else if (i.dest == i.src3) {
|
||||
e.vfmadd231ps(i.dest, src1, src2);
|
||||
} else {
|
||||
// Dest not equal to anything
|
||||
e.vmovdqa(i.dest, src1);
|
||||
e.vfmadd213ps(i.dest, src2, src3);
|
||||
}
|
||||
});
|
||||
EmitCommutativeBinaryXmmOp(e, i,
|
||||
[&i](X64Emitter& e, const Xmm& dest,
|
||||
const Xmm& src1, const Xmm& src2) {
|
||||
Xmm src3 =
|
||||
i.src3.is_constant ? e.xmm1 : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
}
|
||||
if (i.dest == src1) {
|
||||
e.vfmadd213ps(i.dest, src2, src3);
|
||||
} else if (i.dest == src2) {
|
||||
e.vfmadd213ps(i.dest, src1, src3);
|
||||
} else if (i.dest == i.src3) {
|
||||
e.vfmadd231ps(i.dest, src1, src2);
|
||||
} else {
|
||||
// Dest not equal to anything
|
||||
e.vmovdqa(i.dest, src1);
|
||||
e.vfmadd213ps(i.dest, src2, src3);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
Xmm src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm1, i.src3.constant());
|
||||
src3 = e.xmm1;
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
} else {
|
||||
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
|
||||
src3 = i.src3;
|
||||
|
@ -4690,31 +4823,31 @@ struct MUL_SUB_F32
|
|||
|
||||
// FMA extension
|
||||
if (e.IsFeatureEnabled(kX64EmitFMA)) {
|
||||
EmitCommutativeBinaryXmmOp(
|
||||
e, i,
|
||||
[&i](X64Emitter& e, const Xmm& dest, const Xmm& src1,
|
||||
const Xmm& src2) {
|
||||
Xmm src3 = i.src3.is_constant ? e.xmm1 : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm1, i.src3.constant());
|
||||
}
|
||||
if (i.dest == src1) {
|
||||
e.vfmsub213ss(i.dest, src2, src3);
|
||||
} else if (i.dest == src2) {
|
||||
e.vfmsub213ss(i.dest, src1, src3);
|
||||
} else if (i.dest == i.src3) {
|
||||
e.vfmsub231ss(i.dest, src1, src2);
|
||||
} else {
|
||||
// Dest not equal to anything
|
||||
e.vmovss(i.dest, src1);
|
||||
e.vfmsub213ss(i.dest, src2, src3);
|
||||
}
|
||||
});
|
||||
EmitCommutativeBinaryXmmOp(e, i,
|
||||
[&i](X64Emitter& e, const Xmm& dest,
|
||||
const Xmm& src1, const Xmm& src2) {
|
||||
Xmm src3 =
|
||||
i.src3.is_constant ? e.xmm1 : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
}
|
||||
if (i.dest == src1) {
|
||||
e.vfmsub213ss(i.dest, src2, src3);
|
||||
} else if (i.dest == src2) {
|
||||
e.vfmsub213ss(i.dest, src1, src3);
|
||||
} else if (i.dest == i.src3) {
|
||||
e.vfmsub231ss(i.dest, src1, src2);
|
||||
} else {
|
||||
// Dest not equal to anything
|
||||
e.vmovss(i.dest, src1);
|
||||
e.vfmsub213ss(i.dest, src2, src3);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
Xmm src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm1, i.src3.constant());
|
||||
src3 = e.xmm1;
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
} else {
|
||||
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
|
||||
src3 = i.src3;
|
||||
|
@ -4749,31 +4882,31 @@ struct MUL_SUB_F64
|
|||
|
||||
// FMA extension
|
||||
if (e.IsFeatureEnabled(kX64EmitFMA)) {
|
||||
EmitCommutativeBinaryXmmOp(
|
||||
e, i,
|
||||
[&i](X64Emitter& e, const Xmm& dest, const Xmm& src1,
|
||||
const Xmm& src2) {
|
||||
Xmm src3 = i.src3.is_constant ? e.xmm1 : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm1, i.src3.constant());
|
||||
}
|
||||
if (i.dest == src1) {
|
||||
e.vfmsub213sd(i.dest, src2, src3);
|
||||
} else if (i.dest == src2) {
|
||||
e.vfmsub213sd(i.dest, src1, src3);
|
||||
} else if (i.dest == i.src3) {
|
||||
e.vfmsub231sd(i.dest, src1, src2);
|
||||
} else {
|
||||
// Dest not equal to anything
|
||||
e.vmovsd(i.dest, src1);
|
||||
e.vfmsub213sd(i.dest, src2, src3);
|
||||
}
|
||||
});
|
||||
EmitCommutativeBinaryXmmOp(e, i,
|
||||
[&i](X64Emitter& e, const Xmm& dest,
|
||||
const Xmm& src1, const Xmm& src2) {
|
||||
Xmm src3 =
|
||||
i.src3.is_constant ? e.xmm1 : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
}
|
||||
if (i.dest == src1) {
|
||||
e.vfmsub213sd(i.dest, src2, src3);
|
||||
} else if (i.dest == src2) {
|
||||
e.vfmsub213sd(i.dest, src1, src3);
|
||||
} else if (i.dest == i.src3) {
|
||||
e.vfmsub231sd(i.dest, src1, src2);
|
||||
} else {
|
||||
// Dest not equal to anything
|
||||
e.vmovsd(i.dest, src1);
|
||||
e.vfmsub213sd(i.dest, src2, src3);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
Xmm src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm1, i.src3.constant());
|
||||
src3 = e.xmm1;
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
} else {
|
||||
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
|
||||
src3 = i.src3;
|
||||
|
@ -4812,31 +4945,31 @@ struct MUL_SUB_V128
|
|||
|
||||
// FMA extension
|
||||
if (e.IsFeatureEnabled(kX64EmitFMA)) {
|
||||
EmitCommutativeBinaryXmmOp(
|
||||
e, i,
|
||||
[&i](X64Emitter& e, const Xmm& dest, const Xmm& src1,
|
||||
const Xmm& src2) {
|
||||
Xmm src3 = i.src3.is_constant ? e.xmm1 : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm1, i.src3.constant());
|
||||
}
|
||||
if (i.dest == src1) {
|
||||
e.vfmsub213ps(i.dest, src2, src3);
|
||||
} else if (i.dest == src2) {
|
||||
e.vfmsub213ps(i.dest, src1, src3);
|
||||
} else if (i.dest == i.src3) {
|
||||
e.vfmsub231ps(i.dest, src1, src2);
|
||||
} else {
|
||||
// Dest not equal to anything
|
||||
e.vmovdqa(i.dest, src1);
|
||||
e.vfmsub213ps(i.dest, src2, src3);
|
||||
}
|
||||
});
|
||||
EmitCommutativeBinaryXmmOp(e, i,
|
||||
[&i](X64Emitter& e, const Xmm& dest,
|
||||
const Xmm& src1, const Xmm& src2) {
|
||||
Xmm src3 =
|
||||
i.src3.is_constant ? e.xmm1 : i.src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
}
|
||||
if (i.dest == src1) {
|
||||
e.vfmsub213ps(i.dest, src2, src3);
|
||||
} else if (i.dest == src2) {
|
||||
e.vfmsub213ps(i.dest, src1, src3);
|
||||
} else if (i.dest == i.src3) {
|
||||
e.vfmsub231ps(i.dest, src1, src2);
|
||||
} else {
|
||||
// Dest not equal to anything
|
||||
e.vmovdqa(i.dest, src1);
|
||||
e.vfmsub213ps(i.dest, src2, src3);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
Xmm src3;
|
||||
if (i.src3.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm1, i.src3.constant());
|
||||
src3 = e.xmm1;
|
||||
e.LoadConstantXmm(src3, i.src3.constant());
|
||||
} else {
|
||||
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
|
||||
src3 = i.src3;
|
||||
|
@ -5319,7 +5452,6 @@ void EmitShlXX(X64Emitter& e, const ARGS& i) {
|
|||
} else {
|
||||
e.mov(e.cl, src);
|
||||
e.shl(dest_src, e.cl);
|
||||
e.ReloadContext();
|
||||
}
|
||||
},
|
||||
[](X64Emitter& e, const REG& dest_src, int8_t constant) {
|
||||
|
@ -5397,7 +5529,6 @@ void EmitShrXX(X64Emitter& e, const ARGS& i) {
|
|||
} else {
|
||||
e.mov(e.cl, src);
|
||||
e.shr(dest_src, e.cl);
|
||||
e.ReloadContext();
|
||||
}
|
||||
},
|
||||
[](X64Emitter& e, const REG& dest_src, int8_t constant) {
|
||||
|
@ -5473,7 +5604,6 @@ void EmitSarXX(X64Emitter& e, const ARGS& i) {
|
|||
} else {
|
||||
e.mov(e.cl, src);
|
||||
e.sar(dest_src, e.cl);
|
||||
e.ReloadContext();
|
||||
}
|
||||
},
|
||||
[](X64Emitter& e, const REG& dest_src, int8_t constant) {
|
||||
|
@ -6088,7 +6218,6 @@ void EmitRotateLeftXX(X64Emitter& e, const ARGS& i) {
|
|||
}
|
||||
}
|
||||
e.rol(i.dest, e.cl);
|
||||
e.ReloadContext();
|
||||
}
|
||||
}
|
||||
struct ROTATE_LEFT_I8
|
||||
|
@ -6579,7 +6708,6 @@ struct EXTRACT_I32
|
|||
e.vmovaps(e.xmm0, e.ptr[e.rdx + e.rax]);
|
||||
e.vpshufb(e.xmm0, src1, e.xmm0);
|
||||
e.vpextrd(i.dest, e.xmm0, 0);
|
||||
e.ReloadMembase();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -6877,8 +7005,8 @@ struct SWIZZLE
|
|||
uint8_t swizzle_mask = static_cast<uint8_t>(i.src2.value);
|
||||
Xmm src1;
|
||||
if (i.src1.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm0, i.src1.constant());
|
||||
src1 = e.xmm0;
|
||||
e.LoadConstantXmm(src1, i.src1.constant());
|
||||
} else {
|
||||
src1 = i.src1;
|
||||
}
|
||||
|
@ -7135,7 +7263,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
|
|||
// PACKUSWB / SaturateSignedWordToUnsignedByte
|
||||
Xbyak::Xmm src2 = i.src2.is_constant ? e.xmm0 : i.src2;
|
||||
if (i.src2.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm0, i.src2.constant());
|
||||
e.LoadConstantXmm(src2, i.src2.constant());
|
||||
}
|
||||
|
||||
e.vpackuswb(i.dest, i.src1, src2);
|
||||
|
@ -7241,8 +7369,8 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
|
|||
src2 = i.src2;
|
||||
} else {
|
||||
assert_false(i.src1 == e.xmm0);
|
||||
e.LoadConstantXmm(e.xmm0, i.src2.constant());
|
||||
src2 = e.xmm0;
|
||||
e.LoadConstantXmm(src2, i.src2.constant());
|
||||
}
|
||||
e.vpackssdw(i.dest, i.src1, src2);
|
||||
e.vpshuflw(i.dest, i.dest, 0b10110001);
|
||||
|
@ -7352,8 +7480,8 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
|
|||
} else {
|
||||
Xmm src;
|
||||
if (i.src1.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm0, i.src1.constant());
|
||||
src = e.xmm0;
|
||||
e.LoadConstantXmm(src, i.src1.constant());
|
||||
} else {
|
||||
src = i.src1;
|
||||
}
|
||||
|
@ -7619,8 +7747,6 @@ struct ATOMIC_COMPARE_EXCHANGE_I32
|
|||
e.lock();
|
||||
e.cmpxchg(e.dword[e.GetMembaseReg() + e.rcx], i.src3);
|
||||
e.sete(i.dest);
|
||||
|
||||
e.ReloadContext();
|
||||
}
|
||||
};
|
||||
struct ATOMIC_COMPARE_EXCHANGE_I64
|
||||
|
@ -7632,8 +7758,6 @@ struct ATOMIC_COMPARE_EXCHANGE_I64
|
|||
e.lock();
|
||||
e.cmpxchg(e.qword[e.GetMembaseReg() + e.rcx], i.src3);
|
||||
e.sete(i.dest);
|
||||
|
||||
e.ReloadContext();
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_ATOMIC_COMPARE_EXCHANGE,
|
||||
|
@ -7696,6 +7820,8 @@ void RegisterSequences() {
|
|||
Register_OPCODE_CONTEXT_BARRIER();
|
||||
Register_OPCODE_LOAD_MMIO();
|
||||
Register_OPCODE_STORE_MMIO();
|
||||
Register_OPCODE_LOAD_OFFSET();
|
||||
Register_OPCODE_STORE_OFFSET();
|
||||
Register_OPCODE_LOAD();
|
||||
Register_OPCODE_STORE();
|
||||
Register_OPCODE_MEMSET();
|
||||
|
|
|
@ -195,10 +195,15 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) {
|
|||
break;
|
||||
|
||||
case OPCODE_LOAD:
|
||||
case OPCODE_LOAD_OFFSET:
|
||||
if (i->src1.value->IsConstant()) {
|
||||
assert_false(i->flags & LOAD_STORE_BYTE_SWAP);
|
||||
auto memory = processor_->memory();
|
||||
auto address = i->src1.value->constant.i32;
|
||||
if (i->opcode->num == OPCODE_LOAD_OFFSET) {
|
||||
address += i->src2.value->constant.i32;
|
||||
}
|
||||
|
||||
auto mmio_range =
|
||||
processor_->memory()->LookupVirtualMappedRange(address);
|
||||
if (FLAGS_inline_mmio_access && mmio_range) {
|
||||
|
@ -246,12 +251,21 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) {
|
|||
}
|
||||
break;
|
||||
case OPCODE_STORE:
|
||||
case OPCODE_STORE_OFFSET:
|
||||
if (FLAGS_inline_mmio_access && i->src1.value->IsConstant()) {
|
||||
auto address = i->src1.value->constant.i32;
|
||||
if (i->opcode->num == OPCODE_STORE_OFFSET) {
|
||||
address += i->src2.value->constant.i32;
|
||||
}
|
||||
|
||||
auto mmio_range =
|
||||
processor_->memory()->LookupVirtualMappedRange(address);
|
||||
if (mmio_range) {
|
||||
auto value = i->src2.value;
|
||||
if (i->opcode->num == OPCODE_STORE_OFFSET) {
|
||||
value = i->src3.value;
|
||||
}
|
||||
|
||||
i->Replace(&OPCODE_STORE_MMIO_info, 0);
|
||||
i->src1.offset = reinterpret_cast<uint64_t>(mmio_range);
|
||||
i->src2.offset = address;
|
||||
|
|
|
@ -35,9 +35,11 @@ bool MemorySequenceCombinationPass::Run(HIRBuilder* builder) {
|
|||
while (block) {
|
||||
auto i = block->instr_head;
|
||||
while (i) {
|
||||
if (i->opcode == &OPCODE_LOAD_info) {
|
||||
if (i->opcode == &OPCODE_LOAD_info ||
|
||||
i->opcode == &OPCODE_LOAD_OFFSET_info) {
|
||||
CombineLoadSequence(i);
|
||||
} else if (i->opcode == &OPCODE_STORE_info) {
|
||||
} else if (i->opcode == &OPCODE_STORE_info ||
|
||||
i->opcode == &OPCODE_STORE_OFFSET_info) {
|
||||
CombineStoreSequence(i);
|
||||
}
|
||||
i = i->next;
|
||||
|
@ -112,6 +114,10 @@ void MemorySequenceCombinationPass::CombineStoreSequence(Instr* i) {
|
|||
// store_convert v0, v1.i64, [swap|i64->i32,trunc]
|
||||
|
||||
auto src = i->src2.value;
|
||||
if (i->opcode == &OPCODE_STORE_OFFSET_info) {
|
||||
src = i->src3.value;
|
||||
}
|
||||
|
||||
if (src->IsConstant()) {
|
||||
// Constant value write - ignore.
|
||||
return;
|
||||
|
@ -135,7 +141,11 @@ void MemorySequenceCombinationPass::CombineStoreSequence(Instr* i) {
|
|||
|
||||
// Pull the original value (from before the byte swap).
|
||||
// The byte swap itself will go away in DCE.
|
||||
i->set_src2(def->src1.value);
|
||||
if (i->opcode == &OPCODE_STORE_info) {
|
||||
i->set_src2(def->src1.value);
|
||||
} else if (i->opcode == &OPCODE_STORE_OFFSET_info) {
|
||||
i->set_src3(def->src1.value);
|
||||
}
|
||||
|
||||
// TODO(benvanik): extend/truncate.
|
||||
}
|
||||
|
|
|
@ -1232,6 +1232,25 @@ void HIRBuilder::StoreMmio(cpu::MMIORange* mmio_range, uint32_t address,
|
|||
i->set_src3(value);
|
||||
}
|
||||
|
||||
Value* HIRBuilder::LoadOffset(Value* address, Value* offset, TypeName type,
|
||||
uint32_t load_flags) {
|
||||
ASSERT_ADDRESS_TYPE(address);
|
||||
Instr* i = AppendInstr(OPCODE_LOAD_OFFSET_info, load_flags, AllocValue(type));
|
||||
i->set_src1(address);
|
||||
i->set_src2(offset);
|
||||
i->src3.value = NULL;
|
||||
return i->dest;
|
||||
}
|
||||
|
||||
void HIRBuilder::StoreOffset(Value* address, Value* offset, Value* value,
|
||||
uint32_t store_flags) {
|
||||
ASSERT_ADDRESS_TYPE(address);
|
||||
Instr* i = AppendInstr(OPCODE_STORE_OFFSET_info, store_flags);
|
||||
i->set_src1(address);
|
||||
i->set_src2(offset);
|
||||
i->set_src3(value);
|
||||
}
|
||||
|
||||
Value* HIRBuilder::Load(Value* address, TypeName type, uint32_t load_flags) {
|
||||
ASSERT_ADDRESS_TYPE(address);
|
||||
Instr* i = AppendInstr(OPCODE_LOAD_info, load_flags, AllocValue(type));
|
||||
|
|
|
@ -147,6 +147,11 @@ class HIRBuilder {
|
|||
Value* LoadMmio(cpu::MMIORange* mmio_range, uint32_t address, TypeName type);
|
||||
void StoreMmio(cpu::MMIORange* mmio_range, uint32_t address, Value* value);
|
||||
|
||||
Value* LoadOffset(Value* address, Value* offset, TypeName type,
|
||||
uint32_t load_flags = 0);
|
||||
void StoreOffset(Value* address, Value* offset, Value* value,
|
||||
uint32_t store_flags = 0);
|
||||
|
||||
Value* Load(Value* address, TypeName type, uint32_t load_flags = 0);
|
||||
void Store(Value* address, Value* value, uint32_t store_flags = 0);
|
||||
void Memset(Value* address, Value* value, Value* length);
|
||||
|
|
|
@ -152,6 +152,8 @@ enum Opcode {
|
|||
OPCODE_CONTEXT_BARRIER,
|
||||
OPCODE_LOAD_MMIO,
|
||||
OPCODE_STORE_MMIO,
|
||||
OPCODE_LOAD_OFFSET,
|
||||
OPCODE_STORE_OFFSET,
|
||||
OPCODE_LOAD,
|
||||
OPCODE_STORE,
|
||||
OPCODE_MEMSET,
|
||||
|
|
|
@ -231,6 +231,18 @@ DEFINE_OPCODE(
|
|||
OPCODE_SIG_X_O_O_V,
|
||||
OPCODE_FLAG_MEMORY)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD_OFFSET,
|
||||
"load_offset",
|
||||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_MEMORY)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_STORE_OFFSET,
|
||||
"store_offset",
|
||||
OPCODE_SIG_X_V_V_V,
|
||||
OPCODE_FLAG_MEMORY)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD,
|
||||
"load",
|
||||
|
|
|
@ -118,16 +118,16 @@ uintptr_t MMIOHandler::AddPhysicalAccessWatch(uint32_t guest_address,
|
|||
bool hit = false;
|
||||
auto entry = *it;
|
||||
|
||||
if (base_address < (*it)->address &&
|
||||
if (base_address <= (*it)->address &&
|
||||
base_address + length > (*it)->address) {
|
||||
hit = true;
|
||||
} else if ((*it)->address < base_address &&
|
||||
} else if ((*it)->address <= base_address &&
|
||||
(*it)->address + (*it)->length > base_address) {
|
||||
hit = true;
|
||||
} else if ((*it)->address < base_address &&
|
||||
} else if ((*it)->address <= base_address &&
|
||||
(*it)->address + (*it)->length > base_address + length) {
|
||||
hit = true;
|
||||
} else if ((*it)->address > base_address &&
|
||||
} else if ((*it)->address >= base_address &&
|
||||
(*it)->address + (*it)->length < base_address + length) {
|
||||
hit = true;
|
||||
}
|
||||
|
|
|
@ -73,7 +73,11 @@ class MMIOHandler {
|
|||
WatchType type, AccessWatchCallback callback,
|
||||
void* callback_context, void* callback_data);
|
||||
void CancelAccessWatch(uintptr_t watch_handle);
|
||||
|
||||
// Fires and clears any access watches that overlap this range.
|
||||
void InvalidateRange(uint32_t physical_address, size_t length);
|
||||
|
||||
// Returns true if /any/ part of this range is watched.
|
||||
bool IsRangeWatched(uint32_t physical_address, size_t length);
|
||||
|
||||
protected:
|
||||
|
|
|
@ -249,22 +249,22 @@ enum class PPCRegister {
|
|||
typedef struct PPCContext_s {
|
||||
// Must be stored at 0x0 for now.
|
||||
// TODO(benvanik): find a nice way to describe this to the JIT.
|
||||
ThreadState* thread_state;
|
||||
ThreadState* thread_state; // 0x0
|
||||
// TODO(benvanik): this is getting nasty. Must be here.
|
||||
uint8_t* virtual_membase;
|
||||
uint8_t* virtual_membase; // 0x8
|
||||
|
||||
// Most frequently used registers first.
|
||||
uint64_t lr; // Link register
|
||||
uint64_t ctr; // Count register
|
||||
uint64_t r[32]; // General purpose registers
|
||||
double f[32]; // Floating-point registers
|
||||
vec128_t v[128]; // VMX128 vector registers
|
||||
uint64_t lr; // 0x10 Link register
|
||||
uint64_t ctr; // 0x18 Count register
|
||||
uint64_t r[32]; // 0x20 General purpose registers
|
||||
double f[32]; // 0x120 Floating-point registers
|
||||
vec128_t v[128]; // 0x220 VMX128 vector registers
|
||||
|
||||
// XER register:
|
||||
// Split to make it easier to do individual updates.
|
||||
uint8_t xer_ca;
|
||||
uint8_t xer_ov;
|
||||
uint8_t xer_so;
|
||||
uint8_t xer_ca; // 0xA20
|
||||
uint8_t xer_ov; // 0xA21
|
||||
uint8_t xer_so; // 0xA22
|
||||
|
||||
// Condition registers:
|
||||
// These are split to make it easier to do DCE on unused stores.
|
||||
|
@ -279,7 +279,7 @@ typedef struct PPCContext_s {
|
|||
// successfully
|
||||
uint8_t cr0_so; // Summary Overflow (SO) - copy of XER[SO]
|
||||
};
|
||||
} cr0;
|
||||
} cr0; // 0xA24
|
||||
union {
|
||||
uint32_t value;
|
||||
struct {
|
||||
|
|
|
@ -984,8 +984,10 @@ int InstrEmit_rlwinmx(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// m <- MASK(MB+32, ME+32)
|
||||
// RA <- r & m
|
||||
Value* v = f.LoadGPR(i.M.RT);
|
||||
|
||||
// (x||x)
|
||||
v = f.Or(f.Shl(v, 32), f.And(v, f.LoadConstantUint64(0xFFFFFFFF)));
|
||||
v = f.Or(f.Shl(v, 32), f.ZeroExtend(f.Truncate(v, INT32_TYPE), INT64_TYPE));
|
||||
|
||||
// TODO(benvanik): optimize srwi
|
||||
// TODO(benvanik): optimize slwi
|
||||
// The compiler will generate a bunch of these for the special case of SH=0.
|
||||
|
|
|
@ -63,8 +63,15 @@ int InstrEmit_lbz(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// b <- (RA)
|
||||
// EA <- b + EXTS(D)
|
||||
// RT <- i56.0 || MEM(EA, 1)
|
||||
Value* ea = CalculateEA_0_i(f, i.D.RA, XEEXTS16(i.D.DS));
|
||||
Value* rt = f.ZeroExtend(f.Load(ea, INT8_TYPE), INT64_TYPE);
|
||||
Value* b;
|
||||
if (i.D.RA == 0) {
|
||||
b = f.LoadZeroInt64();
|
||||
} else {
|
||||
b = f.LoadGPR(i.D.RA);
|
||||
}
|
||||
|
||||
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
|
||||
Value* rt = f.ZeroExtend(f.LoadOffset(b, offset, INT8_TYPE), INT64_TYPE);
|
||||
f.StoreGPR(i.D.RT, rt);
|
||||
return 0;
|
||||
}
|
||||
|
@ -73,10 +80,11 @@ int InstrEmit_lbzu(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// EA <- (RA) + EXTS(D)
|
||||
// RT <- i56.0 || MEM(EA, 1)
|
||||
// RA <- EA
|
||||
Value* ea = CalculateEA_i(f, i.D.RA, XEEXTS16(i.D.DS));
|
||||
Value* rt = f.ZeroExtend(f.Load(ea, INT8_TYPE), INT64_TYPE);
|
||||
Value* ra = f.LoadGPR(i.D.RA);
|
||||
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
|
||||
Value* rt = f.ZeroExtend(f.LoadOffset(ra, offset, INT8_TYPE), INT64_TYPE);
|
||||
f.StoreGPR(i.D.RT, rt);
|
||||
StoreEA(f, i.D.RA, ea);
|
||||
StoreEA(f, i.D.RA, f.Add(ra, offset));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -111,8 +119,16 @@ int InstrEmit_lha(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// b <- (RA)
|
||||
// EA <- b + EXTS(D)
|
||||
// RT <- EXTS(MEM(EA, 2))
|
||||
Value* ea = CalculateEA_0_i(f, i.D.RA, XEEXTS16(i.D.DS));
|
||||
Value* rt = f.SignExtend(f.ByteSwap(f.Load(ea, INT16_TYPE)), INT64_TYPE);
|
||||
Value* b;
|
||||
if (i.D.RA == 0) {
|
||||
b = f.LoadZeroInt64();
|
||||
} else {
|
||||
b = f.LoadGPR(i.D.RA);
|
||||
}
|
||||
|
||||
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
|
||||
Value* rt =
|
||||
f.SignExtend(f.ByteSwap(f.LoadOffset(b, offset, INT16_TYPE)), INT64_TYPE);
|
||||
f.StoreGPR(i.D.RT, rt);
|
||||
return 0;
|
||||
}
|
||||
|
@ -121,10 +137,12 @@ int InstrEmit_lhau(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// EA <- (RA) + EXTS(D)
|
||||
// RT <- EXTS(MEM(EA, 2))
|
||||
// RA <- EA
|
||||
Value* ea = CalculateEA_i(f, i.D.RA, XEEXTS16(i.D.DS));
|
||||
Value* rt = f.SignExtend(f.ByteSwap(f.Load(ea, INT16_TYPE)), INT64_TYPE);
|
||||
Value* ra = f.LoadGPR(i.D.RA);
|
||||
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
|
||||
Value* rt = f.SignExtend(f.ByteSwap(f.LoadOffset(ra, offset, INT16_TYPE)),
|
||||
INT64_TYPE);
|
||||
f.StoreGPR(i.D.RT, rt);
|
||||
StoreEA(f, i.D.RA, ea);
|
||||
StoreEA(f, i.D.RA, f.Add(ra, offset));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -159,8 +177,16 @@ int InstrEmit_lhz(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// b <- (RA)
|
||||
// EA <- b + EXTS(D)
|
||||
// RT <- i48.0 || MEM(EA, 2)
|
||||
Value* ea = CalculateEA_0_i(f, i.D.RA, XEEXTS16(i.D.DS));
|
||||
Value* rt = f.ZeroExtend(f.ByteSwap(f.Load(ea, INT16_TYPE)), INT64_TYPE);
|
||||
Value* b;
|
||||
if (i.D.RA == 0) {
|
||||
b = f.LoadZeroInt64();
|
||||
} else {
|
||||
b = f.LoadGPR(i.D.RA);
|
||||
}
|
||||
|
||||
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
|
||||
Value* rt =
|
||||
f.ZeroExtend(f.ByteSwap(f.LoadOffset(b, offset, INT16_TYPE)), INT64_TYPE);
|
||||
f.StoreGPR(i.D.RT, rt);
|
||||
return 0;
|
||||
}
|
||||
|
@ -169,10 +195,12 @@ int InstrEmit_lhzu(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// EA <- (RA) + EXTS(D)
|
||||
// RT <- i48.0 || MEM(EA, 2)
|
||||
// RA <- EA
|
||||
Value* ea = CalculateEA_i(f, i.D.RA, XEEXTS16(i.D.DS));
|
||||
Value* rt = f.ZeroExtend(f.ByteSwap(f.Load(ea, INT16_TYPE)), INT64_TYPE);
|
||||
Value* ra = f.LoadGPR(i.D.RA);
|
||||
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
|
||||
Value* rt = f.ZeroExtend(f.ByteSwap(f.LoadOffset(ra, offset, INT16_TYPE)),
|
||||
INT64_TYPE);
|
||||
f.StoreGPR(i.D.RT, rt);
|
||||
StoreEA(f, i.D.RA, ea);
|
||||
StoreEA(f, i.D.RA, f.Add(ra, offset));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -207,8 +235,16 @@ int InstrEmit_lwa(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// b <- (RA)
|
||||
// EA <- b + EXTS(D || 00)
|
||||
// RT <- EXTS(MEM(EA, 4))
|
||||
Value* ea = CalculateEA_0_i(f, i.DS.RA, XEEXTS16(i.DS.DS << 2));
|
||||
Value* rt = f.SignExtend(f.ByteSwap(f.Load(ea, INT32_TYPE)), INT64_TYPE);
|
||||
Value* b;
|
||||
if (i.DS.RA == 0) {
|
||||
b = f.LoadZeroInt64();
|
||||
} else {
|
||||
b = f.LoadGPR(i.DS.RA);
|
||||
}
|
||||
|
||||
Value* offset = f.LoadConstantInt64(XEEXTS16(i.DS.DS << 2));
|
||||
Value* rt =
|
||||
f.SignExtend(f.ByteSwap(f.LoadOffset(b, offset, INT32_TYPE)), INT64_TYPE);
|
||||
f.StoreGPR(i.DS.RT, rt);
|
||||
return 0;
|
||||
}
|
||||
|
@ -244,8 +280,16 @@ int InstrEmit_lwz(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// b <- (RA)
|
||||
// EA <- b + EXTS(D)
|
||||
// RT <- i32.0 || MEM(EA, 4)
|
||||
Value* ea = CalculateEA_0_i(f, i.D.RA, XEEXTS16(i.D.DS));
|
||||
Value* rt = f.ZeroExtend(f.ByteSwap(f.Load(ea, INT32_TYPE)), INT64_TYPE);
|
||||
Value* b;
|
||||
if (i.D.RA == 0) {
|
||||
b = f.LoadZeroInt64();
|
||||
} else {
|
||||
b = f.LoadGPR(i.D.RA);
|
||||
}
|
||||
|
||||
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
|
||||
Value* rt =
|
||||
f.ZeroExtend(f.ByteSwap(f.LoadOffset(b, offset, INT32_TYPE)), INT64_TYPE);
|
||||
f.StoreGPR(i.D.RT, rt);
|
||||
return 0;
|
||||
}
|
||||
|
@ -254,10 +298,12 @@ int InstrEmit_lwzu(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// EA <- (RA) + EXTS(D)
|
||||
// RT <- i32.0 || MEM(EA, 4)
|
||||
// RA <- EA
|
||||
Value* ea = CalculateEA_i(f, i.D.RA, XEEXTS16(i.D.DS));
|
||||
Value* rt = f.ZeroExtend(f.ByteSwap(f.Load(ea, INT32_TYPE)), INT64_TYPE);
|
||||
Value* ra = f.LoadGPR(i.D.RA);
|
||||
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
|
||||
Value* rt = f.ZeroExtend(f.ByteSwap(f.LoadOffset(ra, offset, INT32_TYPE)),
|
||||
INT64_TYPE);
|
||||
f.StoreGPR(i.D.RT, rt);
|
||||
StoreEA(f, i.D.RA, ea);
|
||||
StoreEA(f, i.D.RA, f.Add(ra, offset));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -292,8 +338,15 @@ int InstrEmit_ld(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// b <- (RA)
|
||||
// EA <- b + EXTS(DS || 0b00)
|
||||
// RT <- MEM(EA, 8)
|
||||
Value* ea = CalculateEA_0_i(f, i.DS.RA, XEEXTS16(i.DS.DS << 2));
|
||||
Value* rt = f.ByteSwap(f.Load(ea, INT64_TYPE));
|
||||
Value* b;
|
||||
if (i.DS.RA == 0) {
|
||||
b = f.LoadZeroInt64();
|
||||
} else {
|
||||
b = f.LoadGPR(i.DS.RA);
|
||||
}
|
||||
|
||||
Value* offset = f.LoadConstantInt64(XEEXTS16(i.DS.DS << 2));
|
||||
Value* rt = f.ByteSwap(f.LoadOffset(b, offset, INT64_TYPE));
|
||||
f.StoreGPR(i.DS.RT, rt);
|
||||
return 0;
|
||||
}
|
||||
|
@ -342,8 +395,15 @@ int InstrEmit_stb(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// b <- (RA)
|
||||
// EA <- b + EXTS(D)
|
||||
// MEM(EA, 1) <- (RS)[56:63]
|
||||
Value* ea = CalculateEA_0_i(f, i.D.RA, XEEXTS16(i.D.DS));
|
||||
f.Store(ea, f.Truncate(f.LoadGPR(i.D.RT), INT8_TYPE));
|
||||
Value* b;
|
||||
if (i.D.RA == 0) {
|
||||
b = f.LoadZeroInt64();
|
||||
} else {
|
||||
b = f.LoadGPR(i.D.RA);
|
||||
}
|
||||
|
||||
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
|
||||
f.StoreOffset(b, offset, f.Truncate(f.LoadGPR(i.D.RT), INT8_TYPE));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -386,8 +446,16 @@ int InstrEmit_sth(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// b <- (RA)
|
||||
// EA <- b + EXTS(D)
|
||||
// MEM(EA, 2) <- (RS)[48:63]
|
||||
Value* ea = CalculateEA_0_i(f, i.D.RA, XEEXTS16(i.D.DS));
|
||||
f.Store(ea, f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT), INT16_TYPE)));
|
||||
Value* b;
|
||||
if (i.D.RA == 0) {
|
||||
b = f.LoadZeroInt64();
|
||||
} else {
|
||||
b = f.LoadGPR(i.D.RA);
|
||||
}
|
||||
|
||||
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
|
||||
f.StoreOffset(b, offset,
|
||||
f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT), INT16_TYPE)));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -430,8 +498,16 @@ int InstrEmit_stw(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// b <- (RA)
|
||||
// EA <- b + EXTS(D)
|
||||
// MEM(EA, 4) <- (RS)[32:63]
|
||||
Value* ea = CalculateEA_0_i(f, i.D.RA, XEEXTS16(i.D.DS));
|
||||
f.Store(ea, f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE)));
|
||||
Value* b;
|
||||
if (i.D.RA == 0) {
|
||||
b = f.LoadZeroInt64();
|
||||
} else {
|
||||
b = f.LoadGPR(i.D.RA);
|
||||
}
|
||||
|
||||
Value* offset = f.LoadConstantInt64(XEEXTS16(i.D.DS));
|
||||
f.StoreOffset(b, offset,
|
||||
f.ByteSwap(f.Truncate(f.LoadGPR(i.D.RT), INT32_TYPE)));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -474,8 +550,15 @@ int InstrEmit_std(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// b <- (RA)
|
||||
// EA <- b + EXTS(DS || 0b00)
|
||||
// MEM(EA, 8) <- (RS)
|
||||
Value* ea = CalculateEA_0_i(f, i.DS.RA, XEEXTS16(i.DS.DS << 2));
|
||||
f.Store(ea, f.ByteSwap(f.LoadGPR(i.DS.RT)));
|
||||
Value* b;
|
||||
if (i.DS.RA == 0) {
|
||||
b = f.LoadZeroInt64();
|
||||
} else {
|
||||
b = f.LoadGPR(i.DS.RA);
|
||||
}
|
||||
|
||||
Value* offset = f.LoadConstantInt64(XEEXTS16(i.DS.DS << 2));
|
||||
f.StoreOffset(b, offset, f.ByteSwap(f.LoadGPR(i.DS.RT)));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -33,15 +33,18 @@ bool RawModule::LoadFile(uint32_t base_address, const std::wstring& path) {
|
|||
// Allocate memory.
|
||||
// Since we have no real heap just load it wherever.
|
||||
base_address_ = base_address;
|
||||
memory_->LookupHeap(base_address_)
|
||||
->AllocFixed(base_address_, file_length, 0,
|
||||
kMemoryAllocationReserve | kMemoryAllocationCommit,
|
||||
kMemoryProtectRead | kMemoryProtectWrite);
|
||||
auto heap = memory_->LookupHeap(base_address_);
|
||||
if (!heap ||
|
||||
!heap->AllocFixed(base_address_, file_length, 0,
|
||||
kMemoryAllocationReserve | kMemoryAllocationCommit,
|
||||
kMemoryProtectRead | kMemoryProtectWrite)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint8_t* p = memory_->TranslateVirtual(base_address_);
|
||||
|
||||
// Read into memory.
|
||||
fread(p, file_length, 1, file);
|
||||
|
||||
fclose(file);
|
||||
|
||||
// Setup debug info.
|
||||
|
|
|
@ -364,6 +364,22 @@ bool TextureCache::FreeTexture(Texture* texture) {
|
|||
return true;
|
||||
}
|
||||
|
||||
void TextureCache::WatchCallback(void* context_ptr, void* data_ptr,
|
||||
uint32_t address) {
|
||||
auto self = reinterpret_cast<TextureCache*>(context_ptr);
|
||||
auto touched_texture = reinterpret_cast<Texture*>(data_ptr);
|
||||
// Clear watch handle first so we don't redundantly
|
||||
// remove.
|
||||
assert_not_zero(touched_texture->access_watch_handle);
|
||||
touched_texture->access_watch_handle = 0;
|
||||
touched_texture->pending_invalidation = true;
|
||||
|
||||
// Add to pending list so Scavenge will clean it up.
|
||||
self->invalidated_textures_mutex_.lock();
|
||||
self->invalidated_textures_->push_back(touched_texture);
|
||||
self->invalidated_textures_mutex_.unlock();
|
||||
}
|
||||
|
||||
TextureCache::Texture* TextureCache::DemandResolveTexture(
|
||||
const TextureInfo& texture_info) {
|
||||
auto texture_hash = texture_info.hash();
|
||||
|
@ -411,22 +427,7 @@ TextureCache::Texture* TextureCache::DemandResolveTexture(
|
|||
// Setup an access watch. If this texture is touched, it is destroyed.
|
||||
texture->access_watch_handle = memory_->AddPhysicalAccessWatch(
|
||||
texture_info.guest_address, texture_info.input_length,
|
||||
cpu::MMIOHandler::kWatchWrite,
|
||||
[](void* context_ptr, void* data_ptr, uint32_t address) {
|
||||
auto self = reinterpret_cast<TextureCache*>(context_ptr);
|
||||
auto touched_texture = reinterpret_cast<Texture*>(data_ptr);
|
||||
// Clear watch handle first so we don't redundantly
|
||||
// remove.
|
||||
assert_not_zero(touched_texture->access_watch_handle);
|
||||
touched_texture->access_watch_handle = 0;
|
||||
touched_texture->pending_invalidation = true;
|
||||
|
||||
// Add to pending list so Scavenge will clean it up.
|
||||
self->invalidated_textures_mutex_.lock();
|
||||
self->invalidated_textures_->push_back(touched_texture);
|
||||
self->invalidated_textures_mutex_.unlock();
|
||||
},
|
||||
this, texture);
|
||||
cpu::MMIOHandler::kWatchWrite, &WatchCallback, this, texture);
|
||||
|
||||
textures_[texture_hash] = texture;
|
||||
return texture;
|
||||
|
@ -486,21 +487,7 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info,
|
|||
// guest.
|
||||
texture->access_watch_handle = memory_->AddPhysicalAccessWatch(
|
||||
texture_info.guest_address, texture_info.input_length,
|
||||
cpu::MMIOHandler::kWatchWrite,
|
||||
[](void* context_ptr, void* data_ptr, uint32_t address) {
|
||||
auto self = reinterpret_cast<TextureCache*>(context_ptr);
|
||||
auto touched_texture = reinterpret_cast<Texture*>(data_ptr);
|
||||
// Clear watch handle first so we don't redundantly
|
||||
// remove.
|
||||
assert_not_zero(touched_texture->access_watch_handle);
|
||||
touched_texture->access_watch_handle = 0;
|
||||
touched_texture->pending_invalidation = true;
|
||||
// Add to pending list so Scavenge will clean it up.
|
||||
self->invalidated_textures_mutex_.lock();
|
||||
self->invalidated_textures_->push_back(touched_texture);
|
||||
self->invalidated_textures_mutex_.unlock();
|
||||
},
|
||||
this, texture);
|
||||
cpu::MMIOHandler::kWatchWrite, &WatchCallback, this, texture);
|
||||
|
||||
if (!UploadTexture(command_buffer, completion_fence, texture, texture_info)) {
|
||||
FreeTexture(texture);
|
||||
|
@ -1306,6 +1293,7 @@ void TextureCache::HashTextureBindings(
|
|||
// We've covered this binding.
|
||||
continue;
|
||||
}
|
||||
fetch_mask |= fetch_bit;
|
||||
|
||||
auto& regs = *register_file_;
|
||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6;
|
||||
|
@ -1329,8 +1317,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet(
|
|||
HashTextureBindings(&hash_state, fetch_mask, vertex_bindings);
|
||||
HashTextureBindings(&hash_state, fetch_mask, pixel_bindings);
|
||||
uint64_t hash = XXH64_digest(&hash_state);
|
||||
for (auto it = texture_bindings_.find(hash); it != texture_bindings_.end();
|
||||
++it) {
|
||||
for (auto it = texture_sets_.find(hash); it != texture_sets_.end(); ++it) {
|
||||
// TODO(DrChat): We need to compare the bindings and ensure they're equal.
|
||||
return it->second;
|
||||
}
|
||||
|
@ -1378,7 +1365,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet(
|
|||
update_set_info->image_writes, 0, nullptr);
|
||||
}
|
||||
|
||||
texture_bindings_[hash] = descriptor_set;
|
||||
texture_sets_[hash] = descriptor_set;
|
||||
return descriptor_set;
|
||||
}
|
||||
|
||||
|
@ -1515,7 +1502,7 @@ void TextureCache::Scavenge() {
|
|||
// Free unused descriptor sets
|
||||
// TODO(DrChat): These sets could persist across frames, we just need a smart
|
||||
// way to detect if they're unused and free them.
|
||||
texture_bindings_.clear();
|
||||
texture_sets_.clear();
|
||||
descriptor_pool_->Scavenge();
|
||||
staging_buffer_.Scavenge();
|
||||
|
||||
|
|
|
@ -134,6 +134,9 @@ class TextureCache {
|
|||
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT);
|
||||
bool FreeTexture(Texture* texture);
|
||||
|
||||
static void WatchCallback(void* context_ptr, void* data_ptr,
|
||||
uint32_t address);
|
||||
|
||||
// Demands a texture. If command_buffer is null and the texture hasn't been
|
||||
// uploaded to graphics memory already, we will return null and bail.
|
||||
Texture* Demand(const TextureInfo& texture_info,
|
||||
|
@ -188,7 +191,7 @@ class TextureCache {
|
|||
|
||||
std::unique_ptr<xe::ui::vulkan::CommandBufferPool> wb_command_pool_ = nullptr;
|
||||
std::unique_ptr<xe::ui::vulkan::DescriptorPool> descriptor_pool_ = nullptr;
|
||||
std::unordered_map<uint64_t, VkDescriptorSet> texture_bindings_;
|
||||
std::unordered_map<uint64_t, VkDescriptorSet> texture_sets_;
|
||||
VkDescriptorSetLayout texture_descriptor_set_layout_ = nullptr;
|
||||
|
||||
VmaAllocator mem_allocator_ = nullptr;
|
||||
|
|
|
@ -150,8 +150,8 @@ void VulkanContext::BeginSwap() {
|
|||
// If it has been, we'll need to reinitialize the swap chain before we
|
||||
// start touching it.
|
||||
if (target_window_) {
|
||||
if (target_window_->width() != swap_chain_->surface_width() ||
|
||||
target_window_->height() != swap_chain_->surface_height()) {
|
||||
if (target_window_->scaled_width() != swap_chain_->surface_width() ||
|
||||
target_window_->scaled_height() != swap_chain_->surface_height()) {
|
||||
// Resized!
|
||||
swap_chain_->Reinitialize();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue