Minor decoder optimizations, kernel fixes, cpu backend fixes
This commit is contained in:
parent
ba66373d8c
commit
c1d922eebf
|
@ -36,7 +36,7 @@
|
||||||
url = https://github.com/skystrife/cpptoml.git
|
url = https://github.com/skystrife/cpptoml.git
|
||||||
[submodule "third_party/cxxopts"]
|
[submodule "third_party/cxxopts"]
|
||||||
path = third_party/cxxopts
|
path = third_party/cxxopts
|
||||||
url = https://github.com/chrisps/cxxopts.git
|
url = https://github.com/jarro2783/cxxopts.git
|
||||||
[submodule "third_party/SDL2"]
|
[submodule "third_party/SDL2"]
|
||||||
path = third_party/SDL2
|
path = third_party/SDL2
|
||||||
url = https://github.com/libsdl-org/SDL.git
|
url = https://github.com/libsdl-org/SDL.git
|
||||||
|
|
|
@ -614,7 +614,7 @@ bool EmulatorWindow::Initialize() {
|
||||||
MenuItem::Type::kString, "Build commit on GitHub...", "F2",
|
MenuItem::Type::kString, "Build commit on GitHub...", "F2",
|
||||||
std::bind(&EmulatorWindow::ShowBuildCommit, this)));
|
std::bind(&EmulatorWindow::ShowBuildCommit, this)));
|
||||||
help_menu->AddChild(MenuItem::Create(
|
help_menu->AddChild(MenuItem::Create(
|
||||||
MenuItem::Type::kString, "Recent changes on GitHub...", [this]() {
|
MenuItem::Type::kString, "Recent changes on GitHub...", []() {
|
||||||
LaunchWebBrowser(
|
LaunchWebBrowser(
|
||||||
"https://github.com/xenia-project/xenia/compare/" XE_BUILD_COMMIT
|
"https://github.com/xenia-project/xenia/compare/" XE_BUILD_COMMIT
|
||||||
"..." XE_BUILD_BRANCH);
|
"..." XE_BUILD_BRANCH);
|
||||||
|
@ -622,7 +622,7 @@ bool EmulatorWindow::Initialize() {
|
||||||
help_menu->AddChild(MenuItem::Create(MenuItem::Type::kSeparator));
|
help_menu->AddChild(MenuItem::Create(MenuItem::Type::kSeparator));
|
||||||
help_menu->AddChild(MenuItem::Create(
|
help_menu->AddChild(MenuItem::Create(
|
||||||
MenuItem::Type::kString, "&About...",
|
MenuItem::Type::kString, "&About...",
|
||||||
[this]() { LaunchWebBrowser("https://xenia.jp/about/"); }));
|
[]() { LaunchWebBrowser("https://xenia.jp/about/"); }));
|
||||||
}
|
}
|
||||||
main_menu->AddChild(std::move(help_menu));
|
main_menu->AddChild(std::move(help_menu));
|
||||||
|
|
||||||
|
|
|
@ -71,8 +71,6 @@ inline void sequential_6_BE_to_interleaved_2_LE(float* output,
|
||||||
const float* input,
|
const float* input,
|
||||||
size_t ch_sample_count) {
|
size_t ch_sample_count) {
|
||||||
assert_true(ch_sample_count % 4 == 0);
|
assert_true(ch_sample_count % 4 == 0);
|
||||||
const uint32_t* in = reinterpret_cast<const uint32_t*>(input);
|
|
||||||
uint32_t* out = reinterpret_cast<uint32_t*>(output);
|
|
||||||
const __m128i byte_swap_shuffle =
|
const __m128i byte_swap_shuffle =
|
||||||
_mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
|
_mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
|
||||||
const __m128 half = _mm_set1_ps(0.5f);
|
const __m128 half = _mm_set1_ps(0.5f);
|
||||||
|
|
|
@ -28,16 +28,16 @@ class XAudio2AudioDriver::VoiceCallback : public api::IXAudio2VoiceCallback {
|
||||||
: semaphore_(semaphore) {}
|
: semaphore_(semaphore) {}
|
||||||
~VoiceCallback() {}
|
~VoiceCallback() {}
|
||||||
|
|
||||||
void OnStreamEnd() {}
|
void OnStreamEnd() noexcept {}
|
||||||
void OnVoiceProcessingPassEnd() {}
|
void OnVoiceProcessingPassEnd() noexcept {}
|
||||||
void OnVoiceProcessingPassStart(uint32_t samples_required) {}
|
void OnVoiceProcessingPassStart(uint32_t samples_required) noexcept {}
|
||||||
void OnBufferEnd(void* context) {
|
void OnBufferEnd(void* context) noexcept {
|
||||||
auto ret = semaphore_->Release(1, nullptr);
|
auto ret = semaphore_->Release(1, nullptr);
|
||||||
assert_true(ret);
|
assert_true(ret);
|
||||||
}
|
}
|
||||||
void OnBufferStart(void* context) {}
|
void OnBufferStart(void* context) noexcept {}
|
||||||
void OnLoopEnd(void* context) {}
|
void OnLoopEnd(void* context) noexcept {}
|
||||||
void OnVoiceError(void* context, HRESULT result) {}
|
void OnVoiceError(void* context, HRESULT result) noexcept {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
xe::threading::Semaphore* semaphore_ = nullptr;
|
xe::threading::Semaphore* semaphore_ = nullptr;
|
||||||
|
|
|
@ -21,8 +21,11 @@ namespace xe {
|
||||||
"bad definition for " #type ": must be " #size " bytes")
|
"bad definition for " #type ": must be " #size " bytes")
|
||||||
|
|
||||||
// We rely on assert being compiled out in NDEBUG.
|
// We rely on assert being compiled out in NDEBUG.
|
||||||
|
#if defined(NDEBUG)
|
||||||
|
#define xenia_assert static_cast<void>
|
||||||
|
#else
|
||||||
#define xenia_assert assert
|
#define xenia_assert assert
|
||||||
|
#endif
|
||||||
#define __XENIA_EXPAND(x) x
|
#define __XENIA_EXPAND(x) x
|
||||||
#define __XENIA_ARGC(...) \
|
#define __XENIA_ARGC(...) \
|
||||||
__XENIA_EXPAND(__XENIA_ARGC_IMPL(__VA_ARGS__, 15, 14, 13, 12, 11, 10, 9, 8, \
|
__XENIA_EXPAND(__XENIA_ARGC_IMPL(__VA_ARGS__, 15, 14, 13, 12, 11, 10, 9, 8, \
|
||||||
|
|
|
@ -170,8 +170,10 @@ CommandVar<T>::CommandVar(const char* name, T* default_value,
|
||||||
const char* description)
|
const char* description)
|
||||||
: name_(name),
|
: name_(name),
|
||||||
default_value_(*default_value),
|
default_value_(*default_value),
|
||||||
description_(description),
|
current_value_(default_value),
|
||||||
current_value_(default_value) {}
|
commandline_value_(),
|
||||||
|
description_(description)
|
||||||
|
{}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
ConfigVar<T>::ConfigVar(const char* name, T* default_value,
|
ConfigVar<T>::ConfigVar(const char* name, T* default_value,
|
||||||
|
|
|
@ -149,7 +149,7 @@ class Win32FileHandle : public FileHandle {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool SetLength(size_t length) {
|
bool SetLength(size_t length) override {
|
||||||
LARGE_INTEGER position;
|
LARGE_INTEGER position;
|
||||||
position.QuadPart = length;
|
position.QuadPart = length;
|
||||||
if (!SetFilePointerEx(handle_, position, nullptr, SEEK_SET)) {
|
if (!SetFilePointerEx(handle_, position, nullptr, SEEK_SET)) {
|
||||||
|
|
|
@ -59,7 +59,7 @@ static void XeCopy16384StreamingAVX(CacheLine* XE_RESTRICT to,
|
||||||
|
|
||||||
CacheLine* dest4 = to + (NUM_CACHELINES_IN_PAGE * 3);
|
CacheLine* dest4 = to + (NUM_CACHELINES_IN_PAGE * 3);
|
||||||
CacheLine* src4 = from + (NUM_CACHELINES_IN_PAGE * 3);
|
CacheLine* src4 = from + (NUM_CACHELINES_IN_PAGE * 3);
|
||||||
#pragma loop(no_vector)
|
|
||||||
for (uint32_t i = 0; i < num_lines_for_8k; ++i) {
|
for (uint32_t i = 0; i < num_lines_for_8k; ++i) {
|
||||||
xe::swcache::CacheLine line0, line1, line2, line3;
|
xe::swcache::CacheLine line0, line1, line2, line3;
|
||||||
|
|
||||||
|
@ -92,7 +92,6 @@ static void XeCopy16384Movdir64M(CacheLine* XE_RESTRICT to,
|
||||||
|
|
||||||
CacheLine* dest4 = to + (NUM_CACHELINES_IN_PAGE * 3);
|
CacheLine* dest4 = to + (NUM_CACHELINES_IN_PAGE * 3);
|
||||||
CacheLine* src4 = from + (NUM_CACHELINES_IN_PAGE * 3);
|
CacheLine* src4 = from + (NUM_CACHELINES_IN_PAGE * 3);
|
||||||
#pragma loop(no_vector)
|
|
||||||
for (uint32_t i = 0; i < num_lines_for_8k; ++i) {
|
for (uint32_t i = 0; i < num_lines_for_8k; ++i) {
|
||||||
_movdir64b(dest1 + i, src1 + i);
|
_movdir64b(dest1 + i, src1 + i);
|
||||||
_movdir64b(dest2 + i, src2 + i);
|
_movdir64b(dest2 + i, src2 + i);
|
||||||
|
|
|
@ -620,23 +620,23 @@ static void Prefetch(const void* addr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void Prefetch<PrefetchTag::Write>(const void* addr) {
|
XE_MAYBE_UNUSED void Prefetch<PrefetchTag::Write>(const void* addr) {
|
||||||
PrefetchW(addr);
|
PrefetchW(addr);
|
||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
void Prefetch<PrefetchTag::Nontemporal>(const void* addr) {
|
XE_MAYBE_UNUSED void Prefetch<PrefetchTag::Nontemporal>(const void* addr) {
|
||||||
PrefetchNTA(addr);
|
PrefetchNTA(addr);
|
||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
void Prefetch<PrefetchTag::Level3>(const void* addr) {
|
XE_MAYBE_UNUSED void Prefetch<PrefetchTag::Level3>(const void* addr) {
|
||||||
PrefetchL3(addr);
|
PrefetchL3(addr);
|
||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
void Prefetch<PrefetchTag::Level2>(const void* addr) {
|
XE_MAYBE_UNUSED void Prefetch<PrefetchTag::Level2>(const void* addr) {
|
||||||
PrefetchL2(addr);
|
PrefetchL2(addr);
|
||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
void Prefetch<PrefetchTag::Level1>(const void* addr) {
|
XE_MAYBE_UNUSED void Prefetch<PrefetchTag::Level1>(const void* addr) {
|
||||||
PrefetchL1(addr);
|
PrefetchL1(addr);
|
||||||
}
|
}
|
||||||
// todo: does aarch64 have streaming stores/loads?
|
// todo: does aarch64 have streaming stores/loads?
|
||||||
|
|
|
@ -25,6 +25,7 @@ namespace xe {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class alignas(4096) xe_global_mutex {
|
class alignas(4096) xe_global_mutex {
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
char detail[64];
|
char detail[64];
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -38,6 +39,7 @@ class alignas(4096) xe_global_mutex {
|
||||||
using global_mutex_type = xe_global_mutex;
|
using global_mutex_type = xe_global_mutex;
|
||||||
|
|
||||||
class alignas(64) xe_fast_mutex {
|
class alignas(64) xe_fast_mutex {
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
char detail[64];
|
char detail[64];
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -62,8 +64,6 @@ class xe_unlikely_mutex {
|
||||||
~xe_unlikely_mutex() { mut = 0; }
|
~xe_unlikely_mutex() { mut = 0; }
|
||||||
|
|
||||||
void lock() {
|
void lock() {
|
||||||
uint32_t lock_expected = 0;
|
|
||||||
|
|
||||||
if (XE_LIKELY(_tryget())) {
|
if (XE_LIKELY(_tryget())) {
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -144,9 +144,11 @@
|
||||||
#define XE_MSVC_OPTIMIZE_SMALL()
|
#define XE_MSVC_OPTIMIZE_SMALL()
|
||||||
#define XE_MSVC_OPTIMIZE_REVERT()
|
#define XE_MSVC_OPTIMIZE_REVERT()
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if XE_COMPILER_HAS_GNU_EXTENSIONS == 1
|
#if XE_COMPILER_HAS_GNU_EXTENSIONS == 1
|
||||||
#define XE_LIKELY_IF(...) if (XE_LIKELY(__VA_ARGS__))
|
#define XE_LIKELY_IF(...) if (XE_LIKELY(__VA_ARGS__))
|
||||||
#define XE_UNLIKELY_IF(...) if (XE_UNLIKELY(__VA_ARGS__))
|
#define XE_UNLIKELY_IF(...) if (XE_UNLIKELY(__VA_ARGS__))
|
||||||
|
#define XE_MAYBE_UNUSED __attribute__((unused))
|
||||||
#else
|
#else
|
||||||
#if __cplusplus >= 202002
|
#if __cplusplus >= 202002
|
||||||
#define XE_LIKELY_IF(...) if (!!(__VA_ARGS__)) [[likely]]
|
#define XE_LIKELY_IF(...) if (!!(__VA_ARGS__)) [[likely]]
|
||||||
|
@ -155,6 +157,7 @@
|
||||||
#define XE_LIKELY_IF(...) if (!!(__VA_ARGS__))
|
#define XE_LIKELY_IF(...) if (!!(__VA_ARGS__))
|
||||||
#define XE_UNLIKELY_IF(...) if (!!(__VA_ARGS__))
|
#define XE_UNLIKELY_IF(...) if (!!(__VA_ARGS__))
|
||||||
#endif
|
#endif
|
||||||
|
#define XE_MAYBE_UNUSED
|
||||||
#endif
|
#endif
|
||||||
// only use __restrict if MSVC, for clang/gcc we can use -fstrict-aliasing which
|
// only use __restrict if MSVC, for clang/gcc we can use -fstrict-aliasing which
|
||||||
// acts as __restrict across the board todo: __restrict is part of the type
|
// acts as __restrict across the board todo: __restrict is part of the type
|
||||||
|
|
|
@ -78,7 +78,9 @@ size_t RingBuffer::Read(uint8_t* buffer, size_t _count) {
|
||||||
if (read_offset_ < write_offset_) {
|
if (read_offset_ < write_offset_) {
|
||||||
assert_true(read_offset_ + count <= write_offset_);
|
assert_true(read_offset_ + count <= write_offset_);
|
||||||
} else if (read_offset_ + count >= capacity_) {
|
} else if (read_offset_ + count >= capacity_) {
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
ring_size_t left_half = capacity_ - read_offset_;
|
ring_size_t left_half = capacity_ - read_offset_;
|
||||||
|
|
||||||
assert_true(count - left_half <= write_offset_);
|
assert_true(count - left_half <= write_offset_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -107,6 +109,7 @@ size_t RingBuffer::Write(const uint8_t* buffer, size_t _count) {
|
||||||
if (write_offset_ < read_offset_) {
|
if (write_offset_ < read_offset_) {
|
||||||
assert_true(write_offset_ + count <= read_offset_);
|
assert_true(write_offset_ + count <= read_offset_);
|
||||||
} else if (write_offset_ + count >= capacity_) {
|
} else if (write_offset_ + count >= capacity_) {
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
size_t left_half = capacity_ - write_offset_;
|
size_t left_half = capacity_ - write_offset_;
|
||||||
assert_true(count - left_half <= read_offset_);
|
assert_true(count - left_half <= read_offset_);
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,7 +68,6 @@ class RingBuffer {
|
||||||
ring_size_t offset_delta = write_offs - read_offs;
|
ring_size_t offset_delta = write_offs - read_offs;
|
||||||
ring_size_t wrap_read_count = (cap - read_offs) + write_offs;
|
ring_size_t wrap_read_count = (cap - read_offs) + write_offs;
|
||||||
|
|
||||||
ring_size_t comparison_value = read_offs <= write_offs;
|
|
||||||
|
|
||||||
if (XE_LIKELY(read_offs <= write_offs)) {
|
if (XE_LIKELY(read_offs <= write_offs)) {
|
||||||
return offset_delta; // will be 0 if they are equal, semantically
|
return offset_delta; // will be 0 if they are equal, semantically
|
||||||
|
|
|
@ -67,8 +67,6 @@ class split_map {
|
||||||
void InsertAt(TKey k, TValue v, uint32_t idx) {
|
void InsertAt(TKey k, TValue v, uint32_t idx) {
|
||||||
uint32_t old_size = size();
|
uint32_t old_size = size();
|
||||||
|
|
||||||
bool needs_shiftup = idx != old_size;
|
|
||||||
|
|
||||||
values_.insert(values_.begin() + idx, v);
|
values_.insert(values_.begin() + idx, v);
|
||||||
keys_.insert(keys_.begin() + idx, k);
|
keys_.insert(keys_.begin() + idx, k);
|
||||||
}
|
}
|
||||||
|
|
|
@ -117,7 +117,7 @@ void set_name(const std::string_view name) {
|
||||||
|
|
||||||
// checked ntoskrnl, it does not modify delay, so we can place this as a
|
// checked ntoskrnl, it does not modify delay, so we can place this as a
|
||||||
// constant and avoid creating a stack variable
|
// constant and avoid creating a stack variable
|
||||||
static const LARGE_INTEGER sleepdelay0_for_maybeyield{0LL};
|
static const LARGE_INTEGER sleepdelay0_for_maybeyield{{0LL}};
|
||||||
|
|
||||||
void MaybeYield() {
|
void MaybeYield() {
|
||||||
#if 0
|
#if 0
|
||||||
|
@ -314,7 +314,8 @@ class Win32Event : public Win32Handle<Event> {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
EventInfo Query() { EventInfo result{};
|
EventInfo Query() override {
|
||||||
|
EventInfo result{};
|
||||||
NtQueryEventPointer.invoke(handle_, 0, &result, sizeof(EventInfo), nullptr);
|
NtQueryEventPointer.invoke(handle_, 0, &result, sizeof(EventInfo), nullptr);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -429,7 +430,7 @@ class Win32Timer : public Win32Handle<Timer> {
|
||||||
}
|
}
|
||||||
bool SetRepeatingAt(GClock_::time_point due_time,
|
bool SetRepeatingAt(GClock_::time_point due_time,
|
||||||
std::chrono::milliseconds period,
|
std::chrono::milliseconds period,
|
||||||
std::function<void()> opt_callback = nullptr) {
|
std::function<void()> opt_callback = nullptr) override {
|
||||||
return SetRepeatingAt(date::clock_cast<WClock_>(due_time), period,
|
return SetRepeatingAt(date::clock_cast<WClock_>(due_time), period,
|
||||||
std::move(opt_callback));
|
std::move(opt_callback));
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,7 +93,8 @@ class X64CodeCache : public CodeCache {
|
||||||
// This is picked to be high enough to cover whatever we can reasonably
|
// This is picked to be high enough to cover whatever we can reasonably
|
||||||
// expect. If we hit issues with this it probably means some corner case
|
// expect. If we hit issues with this it probably means some corner case
|
||||||
// in analysis triggering.
|
// in analysis triggering.
|
||||||
static const size_t kMaximumFunctionCount = 100000;
|
//chrispy: raised this, some games that were compiled with low optimization levels can exceed this
|
||||||
|
static const size_t kMaximumFunctionCount = 1000000;
|
||||||
|
|
||||||
struct UnwindReservation {
|
struct UnwindReservation {
|
||||||
size_t data_size = 0;
|
size_t data_size = 0;
|
||||||
|
|
|
@ -209,7 +209,16 @@ bool Win32X64CodeCache::Initialize() {
|
||||||
|
|
||||||
Win32X64CodeCache::UnwindReservation
|
Win32X64CodeCache::UnwindReservation
|
||||||
Win32X64CodeCache::RequestUnwindReservation(uint8_t* entry_address) {
|
Win32X64CodeCache::RequestUnwindReservation(uint8_t* entry_address) {
|
||||||
|
#if defined(NDEBUG)
|
||||||
|
if (unwind_table_count_ >= kMaximumFunctionCount) {
|
||||||
|
// we should not just be ignoring this in release if it happens
|
||||||
|
xe::FatalError(
|
||||||
|
"Unwind table count (unwind_table_count_) exceeded maximum! Please report this to "
|
||||||
|
"Xenia/Canary developers");
|
||||||
|
}
|
||||||
|
#else
|
||||||
assert_false(unwind_table_count_ >= kMaximumFunctionCount);
|
assert_false(unwind_table_count_ >= kMaximumFunctionCount);
|
||||||
|
#endif
|
||||||
UnwindReservation unwind_reservation;
|
UnwindReservation unwind_reservation;
|
||||||
unwind_reservation.data_size = xe::round_up(kUnwindInfoSize, 16);
|
unwind_reservation.data_size = xe::round_up(kUnwindInfoSize, 16);
|
||||||
unwind_reservation.table_slot = unwind_table_count_++;
|
unwind_reservation.table_slot = unwind_table_count_++;
|
||||||
|
|
|
@ -46,10 +46,6 @@ DEFINE_bool(ignore_undefined_externs, true,
|
||||||
DEFINE_bool(emit_source_annotations, false,
|
DEFINE_bool(emit_source_annotations, false,
|
||||||
"Add extra movs and nops to make disassembly easier to read.",
|
"Add extra movs and nops to make disassembly easier to read.",
|
||||||
"CPU");
|
"CPU");
|
||||||
DEFINE_bool(resolve_rel32_guest_calls, true,
|
|
||||||
"Experimental optimization, directly call already resolved "
|
|
||||||
"functions via x86 rel32 call/jmp",
|
|
||||||
"CPU");
|
|
||||||
|
|
||||||
DEFINE_bool(enable_incorrect_roundingmode_behavior, false,
|
DEFINE_bool(enable_incorrect_roundingmode_behavior, false,
|
||||||
"Disables the FPU/VMX MXCSR sharing workaround, potentially "
|
"Disables the FPU/VMX MXCSR sharing workaround, potentially "
|
||||||
|
@ -78,7 +74,6 @@ using namespace xe::literals;
|
||||||
|
|
||||||
static const size_t kMaxCodeSize = 1_MiB;
|
static const size_t kMaxCodeSize = 1_MiB;
|
||||||
|
|
||||||
static const size_t kStashOffset = 32;
|
|
||||||
// static const size_t kStashOffsetHigh = 32 + 32;
|
// static const size_t kStashOffsetHigh = 32 + 32;
|
||||||
|
|
||||||
const uint32_t X64Emitter::gpr_reg_map_[X64Emitter::GPR_COUNT] = {
|
const uint32_t X64Emitter::gpr_reg_map_[X64Emitter::GPR_COUNT] = {
|
||||||
|
@ -141,55 +136,6 @@ bool X64Emitter::Emit(GuestFunction* function, HIRBuilder* builder,
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
#pragma pack(push, 1)
|
|
||||||
struct RGCEmitted {
|
|
||||||
uint8_t ff_;
|
|
||||||
uint32_t rgcid_;
|
|
||||||
};
|
|
||||||
#pragma pack(pop)
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
void X64Emitter::InjectCallAddresses(void* new_execute_address) {
|
|
||||||
for (auto&& callsite : call_sites_) {
|
|
||||||
RGCEmitted* hunter = (RGCEmitted*)new_execute_address;
|
|
||||||
while (hunter->ff_ != 0xFF || hunter->rgcid_ != callsite.offset_) {
|
|
||||||
hunter =
|
|
||||||
reinterpret_cast<RGCEmitted*>(reinterpret_cast<char*>(hunter) + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
hunter->ff_ = callsite.is_jump_ ? 0xE9 : 0xE8;
|
|
||||||
hunter->rgcid_ =
|
|
||||||
static_cast<uint32_t>(static_cast<intptr_t>(callsite.destination_) -
|
|
||||||
reinterpret_cast<intptr_t>(hunter + 1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
void X64Emitter::InjectCallAddresses(void* new_execute_address) {
|
|
||||||
#if 0
|
|
||||||
RGCEmitted* hunter = (RGCEmitted*)new_execute_address;
|
|
||||||
|
|
||||||
std::map<uint32_t, ResolvableGuestCall*> id_to_rgc{};
|
|
||||||
|
|
||||||
for (auto&& callsite : call_sites_) {
|
|
||||||
id_to_rgc[callsite.offset_] = &callsite;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
RGCEmitted* hunter = (RGCEmitted*)new_execute_address;
|
|
||||||
for (auto&& callsite : call_sites_) {
|
|
||||||
while (hunter->ff_ != 0xFF || hunter->rgcid_ != callsite.offset_) {
|
|
||||||
hunter =
|
|
||||||
reinterpret_cast<RGCEmitted*>(reinterpret_cast<char*>(hunter) + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
hunter->ff_ = callsite.is_jump_ ? 0xE9 : 0xE8;
|
|
||||||
hunter->rgcid_ =
|
|
||||||
static_cast<uint32_t>(static_cast<intptr_t>(callsite.destination_) -
|
|
||||||
reinterpret_cast<intptr_t>(hunter + 1));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
void* X64Emitter::Emplace(const EmitFunctionInfo& func_info,
|
void* X64Emitter::Emplace(const EmitFunctionInfo& func_info,
|
||||||
GuestFunction* function) {
|
GuestFunction* function) {
|
||||||
// To avoid changing xbyak, we do a switcharoo here.
|
// To avoid changing xbyak, we do a switcharoo here.
|
||||||
|
@ -207,10 +153,6 @@ void* X64Emitter::Emplace(const EmitFunctionInfo& func_info,
|
||||||
if (function) {
|
if (function) {
|
||||||
code_cache_->PlaceGuestCode(function->address(), top_, func_info, function,
|
code_cache_->PlaceGuestCode(function->address(), top_, func_info, function,
|
||||||
new_execute_address, new_write_address);
|
new_execute_address, new_write_address);
|
||||||
|
|
||||||
if (cvars::resolve_rel32_guest_calls) {
|
|
||||||
InjectCallAddresses(new_execute_address);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
code_cache_->PlaceHostCode(0, top_, func_info, new_execute_address,
|
code_cache_->PlaceHostCode(0, top_, func_info, new_execute_address,
|
||||||
new_write_address);
|
new_write_address);
|
||||||
|
@ -219,7 +161,6 @@ void* X64Emitter::Emplace(const EmitFunctionInfo& func_info,
|
||||||
ready();
|
ready();
|
||||||
top_ = old_address;
|
top_ = old_address;
|
||||||
reset();
|
reset();
|
||||||
call_sites_.clear();
|
|
||||||
tail_code_.clear();
|
tail_code_.clear();
|
||||||
for (auto&& cached_label : label_cache_) {
|
for (auto&& cached_label : label_cache_) {
|
||||||
delete cached_label;
|
delete cached_label;
|
||||||
|
@ -336,7 +277,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
|
||||||
// Mark block labels.
|
// Mark block labels.
|
||||||
auto label = block->label_head;
|
auto label = block->label_head;
|
||||||
while (label) {
|
while (label) {
|
||||||
L(label->name);
|
L(std::to_string(label->id));
|
||||||
label = label->next;
|
label = label->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -418,7 +359,6 @@ void X64Emitter::EmitProfilerEpilogue() {
|
||||||
// actually... lets just try without atomics lol
|
// actually... lets just try without atomics lol
|
||||||
// lock();
|
// lock();
|
||||||
add(qword[r10], rdx);
|
add(qword[r10], rdx);
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -534,44 +474,23 @@ void X64Emitter::Call(const hir::Instr* instr, GuestFunction* function) {
|
||||||
auto fn = static_cast<X64Function*>(function);
|
auto fn = static_cast<X64Function*>(function);
|
||||||
// Resolve address to the function to call and store in rax.
|
// Resolve address to the function to call and store in rax.
|
||||||
|
|
||||||
if (cvars::resolve_rel32_guest_calls && fn->machine_code()) {
|
if (fn->machine_code()) {
|
||||||
ResolvableGuestCall rgc;
|
|
||||||
rgc.destination_ = uint32_t(uint64_t(fn->machine_code()));
|
|
||||||
rgc.offset_ = current_rgc_id_;
|
|
||||||
current_rgc_id_++;
|
|
||||||
|
|
||||||
if (!(instr->flags & hir::CALL_TAIL)) {
|
if (!(instr->flags & hir::CALL_TAIL)) {
|
||||||
mov(rcx, qword[rsp + StackLayout::GUEST_CALL_RET_ADDR]);
|
mov(rcx, qword[rsp + StackLayout::GUEST_CALL_RET_ADDR]);
|
||||||
|
|
||||||
db(0xFF);
|
call((void*)fn->machine_code());
|
||||||
rgc.is_jump_ = false;
|
|
||||||
|
|
||||||
dd(rgc.offset_);
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// tail call
|
// tail call
|
||||||
EmitTraceUserCallReturn();
|
EmitTraceUserCallReturn();
|
||||||
|
EmitProfilerEpilogue();
|
||||||
rgc.is_jump_ = true;
|
|
||||||
// Pass the callers return address over.
|
// Pass the callers return address over.
|
||||||
mov(rcx, qword[rsp + StackLayout::GUEST_RET_ADDR]);
|
mov(rcx, qword[rsp + StackLayout::GUEST_RET_ADDR]);
|
||||||
|
|
||||||
add(rsp, static_cast<uint32_t>(stack_size()));
|
add(rsp, static_cast<uint32_t>(stack_size()));
|
||||||
db(0xFF);
|
jmp((void*)fn->machine_code(), T_NEAR);
|
||||||
dd(rgc.offset_);
|
|
||||||
}
|
}
|
||||||
call_sites_.push_back(rgc);
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
if (fn->machine_code()) {
|
|
||||||
// TODO(benvanik): is it worth it to do this? It removes the need for
|
|
||||||
// a ResolveFunction call, but makes the table less useful.
|
|
||||||
assert_zero(uint64_t(fn->machine_code()) & 0xFFFFFFFF00000000);
|
|
||||||
// todo: this should be changed so that we can actually do a call to
|
|
||||||
// fn->machine_code. the code will be emitted near us, so 32 bit rel jmp
|
|
||||||
// should be possible
|
|
||||||
mov(eax, uint32_t(uint64_t(fn->machine_code())));
|
|
||||||
} else if (code_cache_->has_indirection_table()) {
|
} else if (code_cache_->has_indirection_table()) {
|
||||||
// Load the pointer to the indirection table maintained in X64CodeCache.
|
// Load the pointer to the indirection table maintained in X64CodeCache.
|
||||||
// The target dword will either contain the address of the generated code
|
// The target dword will either contain the address of the generated code
|
||||||
|
@ -1017,7 +936,10 @@ static const vec128_t xmm_consts[] = {
|
||||||
/*XMMSTVLShuffle*/
|
/*XMMSTVLShuffle*/
|
||||||
v128_setr_bytes(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
|
v128_setr_bytes(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
|
||||||
/* XMMSTVRSwapMask*/
|
/* XMMSTVRSwapMask*/
|
||||||
vec128b((uint8_t)0x83)};
|
vec128b((uint8_t)0x83), /*XMMVSRShlByteshuf*/
|
||||||
|
v128_setr_bytes(13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 0x80),
|
||||||
|
// XMMVSRMask
|
||||||
|
vec128b(1)};
|
||||||
|
|
||||||
void* X64Emitter::FindByteConstantOffset(unsigned bytevalue) {
|
void* X64Emitter::FindByteConstantOffset(unsigned bytevalue) {
|
||||||
for (auto& vec : xmm_consts) {
|
for (auto& vec : xmm_consts) {
|
||||||
|
|
|
@ -66,7 +66,7 @@ enum class SimdDomain : uint32_t {
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class MXCSRMode : uint32_t { Unknown, Fpu, Vmx };
|
enum class MXCSRMode : uint32_t { Unknown, Fpu, Vmx };
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
static SimdDomain PickDomain2(SimdDomain dom1, SimdDomain dom2) {
|
static SimdDomain PickDomain2(SimdDomain dom1, SimdDomain dom2) {
|
||||||
if (dom1 == dom2) {
|
if (dom1 == dom2) {
|
||||||
return dom1;
|
return dom1;
|
||||||
|
@ -172,7 +172,9 @@ enum XmmConst {
|
||||||
XMMLVLShuffle,
|
XMMLVLShuffle,
|
||||||
XMMLVRCmp16,
|
XMMLVRCmp16,
|
||||||
XMMSTVLShuffle,
|
XMMSTVLShuffle,
|
||||||
XMMSTVRSwapMask // swapwordmask with bit 7 set
|
XMMSTVRSwapMask, // swapwordmask with bit 7 set
|
||||||
|
XMMVSRShlByteshuf,
|
||||||
|
XMMVSRMask
|
||||||
|
|
||||||
};
|
};
|
||||||
using amdfx::xopcompare_e;
|
using amdfx::xopcompare_e;
|
||||||
|
@ -190,13 +192,6 @@ class XbyakAllocator : public Xbyak::Allocator {
|
||||||
virtual bool useProtect() const { return false; }
|
virtual bool useProtect() const { return false; }
|
||||||
};
|
};
|
||||||
|
|
||||||
class ResolvableGuestCall {
|
|
||||||
public:
|
|
||||||
bool is_jump_;
|
|
||||||
uintptr_t destination_;
|
|
||||||
// rgcid
|
|
||||||
unsigned offset_;
|
|
||||||
};
|
|
||||||
class X64Emitter;
|
class X64Emitter;
|
||||||
using TailEmitCallback = std::function<void(X64Emitter& e, Xbyak::Label& lbl)>;
|
using TailEmitCallback = std::function<void(X64Emitter& e, Xbyak::Label& lbl)>;
|
||||||
struct TailEmitter {
|
struct TailEmitter {
|
||||||
|
@ -220,7 +215,6 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
||||||
uint32_t debug_info_flags, FunctionDebugInfo* debug_info,
|
uint32_t debug_info_flags, FunctionDebugInfo* debug_info,
|
||||||
void** out_code_address, size_t* out_code_size,
|
void** out_code_address, size_t* out_code_size,
|
||||||
std::vector<SourceMapEntry>* out_source_map);
|
std::vector<SourceMapEntry>* out_source_map);
|
||||||
void InjectCallAddresses(void* new_execute_addr);
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// Reserved: rsp, rsi, rdi
|
// Reserved: rsp, rsi, rdi
|
||||||
|
@ -230,7 +224,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
||||||
// xmm4-xmm15 (save to get xmm3)
|
// xmm4-xmm15 (save to get xmm3)
|
||||||
static const int GPR_COUNT = 7;
|
static const int GPR_COUNT = 7;
|
||||||
static const int XMM_COUNT = 12;
|
static const int XMM_COUNT = 12;
|
||||||
|
static constexpr size_t kStashOffset = 32;
|
||||||
static void SetupReg(const hir::Value* v, Xbyak::Reg8& r) {
|
static void SetupReg(const hir::Value* v, Xbyak::Reg8& r) {
|
||||||
auto idx = gpr_reg_map_[v->reg.index];
|
auto idx = gpr_reg_map_[v->reg.index];
|
||||||
r = Xbyak::Reg8(idx);
|
r = Xbyak::Reg8(idx);
|
||||||
|
@ -410,8 +404,6 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
||||||
|
|
||||||
static const uint32_t gpr_reg_map_[GPR_COUNT];
|
static const uint32_t gpr_reg_map_[GPR_COUNT];
|
||||||
static const uint32_t xmm_reg_map_[XMM_COUNT];
|
static const uint32_t xmm_reg_map_[XMM_COUNT];
|
||||||
uint32_t current_rgc_id_ = 0xEEDDF00F;
|
|
||||||
std::vector<ResolvableGuestCall> call_sites_;
|
|
||||||
/*
|
/*
|
||||||
set to true if the low 32 bits of membase == 0.
|
set to true if the low 32 bits of membase == 0.
|
||||||
only really advantageous if you are storing 32 bit 0 to a displaced address,
|
only really advantageous if you are storing 32 bit 0 to a displaced address,
|
||||||
|
|
|
@ -398,21 +398,22 @@ struct I<OPCODE, DEST, SRC1, SRC2, SRC3> : DestField<DEST> {
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
static const T GetTempReg(X64Emitter& e);
|
static const T GetTempReg(X64Emitter& e);
|
||||||
template <>
|
template <>
|
||||||
const Reg8 GetTempReg<Reg8>(X64Emitter& e) {
|
XE_MAYBE_UNUSED const Reg8 GetTempReg<Reg8>(X64Emitter& e) {
|
||||||
return e.al;
|
return e.al;
|
||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
const Reg16 GetTempReg<Reg16>(X64Emitter& e) {
|
XE_MAYBE_UNUSED const Reg16 GetTempReg<Reg16>(X64Emitter& e) {
|
||||||
return e.ax;
|
return e.ax;
|
||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
const Reg32 GetTempReg<Reg32>(X64Emitter& e) {
|
XE_MAYBE_UNUSED const Reg32 GetTempReg<Reg32>(X64Emitter& e) {
|
||||||
return e.eax;
|
return e.eax;
|
||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
const Reg64 GetTempReg<Reg64>(X64Emitter& e) {
|
XE_MAYBE_UNUSED const Reg64 GetTempReg<Reg64>(X64Emitter& e) {
|
||||||
return e.rax;
|
return e.rax;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,46 +25,46 @@ static void EmitFusedBranch(X64Emitter& e, const T& i) {
|
||||||
bool valid = i.instr->prev && i.instr->prev->dest == i.src1.value;
|
bool valid = i.instr->prev && i.instr->prev->dest == i.src1.value;
|
||||||
auto opcode = valid ? i.instr->prev->opcode->num : -1;
|
auto opcode = valid ? i.instr->prev->opcode->num : -1;
|
||||||
if (valid) {
|
if (valid) {
|
||||||
auto name = i.src2.value->name;
|
std::string name = i.src2.value->GetIdString();
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
case OPCODE_COMPARE_EQ:
|
case OPCODE_COMPARE_EQ:
|
||||||
e.je(name, e.T_NEAR);
|
e.je(std::move(name), e.T_NEAR);
|
||||||
break;
|
break;
|
||||||
case OPCODE_COMPARE_NE:
|
case OPCODE_COMPARE_NE:
|
||||||
e.jne(name, e.T_NEAR);
|
e.jne(std::move(name), e.T_NEAR);
|
||||||
break;
|
break;
|
||||||
case OPCODE_COMPARE_SLT:
|
case OPCODE_COMPARE_SLT:
|
||||||
e.jl(name, e.T_NEAR);
|
e.jl(std::move(name), e.T_NEAR);
|
||||||
break;
|
break;
|
||||||
case OPCODE_COMPARE_SLE:
|
case OPCODE_COMPARE_SLE:
|
||||||
e.jle(name, e.T_NEAR);
|
e.jle(std::move(name), e.T_NEAR);
|
||||||
break;
|
break;
|
||||||
case OPCODE_COMPARE_SGT:
|
case OPCODE_COMPARE_SGT:
|
||||||
e.jg(name, e.T_NEAR);
|
e.jg(std::move(name), e.T_NEAR);
|
||||||
break;
|
break;
|
||||||
case OPCODE_COMPARE_SGE:
|
case OPCODE_COMPARE_SGE:
|
||||||
e.jge(name, e.T_NEAR);
|
e.jge(std::move(name), e.T_NEAR);
|
||||||
break;
|
break;
|
||||||
case OPCODE_COMPARE_ULT:
|
case OPCODE_COMPARE_ULT:
|
||||||
e.jb(name, e.T_NEAR);
|
e.jb(std::move(name), e.T_NEAR);
|
||||||
break;
|
break;
|
||||||
case OPCODE_COMPARE_ULE:
|
case OPCODE_COMPARE_ULE:
|
||||||
e.jbe(name, e.T_NEAR);
|
e.jbe(std::move(name), e.T_NEAR);
|
||||||
break;
|
break;
|
||||||
case OPCODE_COMPARE_UGT:
|
case OPCODE_COMPARE_UGT:
|
||||||
e.ja(name, e.T_NEAR);
|
e.ja(std::move(name), e.T_NEAR);
|
||||||
break;
|
break;
|
||||||
case OPCODE_COMPARE_UGE:
|
case OPCODE_COMPARE_UGE:
|
||||||
e.jae(name, e.T_NEAR);
|
e.jae(std::move(name), e.T_NEAR);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
e.test(i.src1, i.src1);
|
e.test(i.src1, i.src1);
|
||||||
e.jnz(name, e.T_NEAR);
|
e.jnz(std::move(name), e.T_NEAR);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
e.test(i.src1, i.src1);
|
e.test(i.src1, i.src1);
|
||||||
e.jnz(i.src2.value->name, e.T_NEAR);
|
e.jnz(i.src2.value->GetIdString(), e.T_NEAR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
@ -490,7 +490,7 @@ EMITTER_OPCODE_TABLE(OPCODE_SET_RETURN_ADDRESS, SET_RETURN_ADDRESS);
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
struct BRANCH : Sequence<BRANCH, I<OPCODE_BRANCH, VoidOp, LabelOp>> {
|
struct BRANCH : Sequence<BRANCH, I<OPCODE_BRANCH, VoidOp, LabelOp>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.jmp(i.src1.value->name, e.T_NEAR);
|
e.jmp(i.src1.value->GetIdString(), e.T_NEAR);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
EMITTER_OPCODE_TABLE(OPCODE_BRANCH, BRANCH);
|
EMITTER_OPCODE_TABLE(OPCODE_BRANCH, BRANCH);
|
||||||
|
@ -534,7 +534,7 @@ struct BRANCH_TRUE_F32
|
||||||
Xmm input = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
Xmm input = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
||||||
e.vmovd(e.eax, input);
|
e.vmovd(e.eax, input);
|
||||||
e.test(e.eax, e.eax);
|
e.test(e.eax, e.eax);
|
||||||
e.jnz(i.src2.value->name, e.T_NEAR);
|
e.jnz(i.src2.value->GetIdString(), e.T_NEAR);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct BRANCH_TRUE_F64
|
struct BRANCH_TRUE_F64
|
||||||
|
@ -543,7 +543,7 @@ struct BRANCH_TRUE_F64
|
||||||
Xmm input = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
Xmm input = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
||||||
e.vmovq(e.rax, input);
|
e.vmovq(e.rax, input);
|
||||||
e.test(e.rax, e.rax);
|
e.test(e.rax, e.rax);
|
||||||
e.jnz(i.src2.value->name, e.T_NEAR);
|
e.jnz(i.src2.value->GetIdString(), e.T_NEAR);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
EMITTER_OPCODE_TABLE(OPCODE_BRANCH_TRUE, BRANCH_TRUE_I8, BRANCH_TRUE_I16,
|
EMITTER_OPCODE_TABLE(OPCODE_BRANCH_TRUE, BRANCH_TRUE_I8, BRANCH_TRUE_I16,
|
||||||
|
@ -557,7 +557,7 @@ struct BRANCH_FALSE_I8
|
||||||
: Sequence<BRANCH_FALSE_I8, I<OPCODE_BRANCH_FALSE, VoidOp, I8Op, LabelOp>> {
|
: Sequence<BRANCH_FALSE_I8, I<OPCODE_BRANCH_FALSE, VoidOp, I8Op, LabelOp>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.test(i.src1, i.src1);
|
e.test(i.src1, i.src1);
|
||||||
e.jz(i.src2.value->name, e.T_NEAR);
|
e.jz(i.src2.value->GetIdString(), e.T_NEAR);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct BRANCH_FALSE_I16
|
struct BRANCH_FALSE_I16
|
||||||
|
@ -565,7 +565,7 @@ struct BRANCH_FALSE_I16
|
||||||
I<OPCODE_BRANCH_FALSE, VoidOp, I16Op, LabelOp>> {
|
I<OPCODE_BRANCH_FALSE, VoidOp, I16Op, LabelOp>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.test(i.src1, i.src1);
|
e.test(i.src1, i.src1);
|
||||||
e.jz(i.src2.value->name, e.T_NEAR);
|
e.jz(i.src2.value->GetIdString(), e.T_NEAR);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct BRANCH_FALSE_I32
|
struct BRANCH_FALSE_I32
|
||||||
|
@ -573,7 +573,7 @@ struct BRANCH_FALSE_I32
|
||||||
I<OPCODE_BRANCH_FALSE, VoidOp, I32Op, LabelOp>> {
|
I<OPCODE_BRANCH_FALSE, VoidOp, I32Op, LabelOp>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.test(i.src1, i.src1);
|
e.test(i.src1, i.src1);
|
||||||
e.jz(i.src2.value->name, e.T_NEAR);
|
e.jz(i.src2.value->GetIdString(), e.T_NEAR);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct BRANCH_FALSE_I64
|
struct BRANCH_FALSE_I64
|
||||||
|
@ -581,7 +581,7 @@ struct BRANCH_FALSE_I64
|
||||||
I<OPCODE_BRANCH_FALSE, VoidOp, I64Op, LabelOp>> {
|
I<OPCODE_BRANCH_FALSE, VoidOp, I64Op, LabelOp>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.test(i.src1, i.src1);
|
e.test(i.src1, i.src1);
|
||||||
e.jz(i.src2.value->name, e.T_NEAR);
|
e.jz(i.src2.value->GetIdString(), e.T_NEAR);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct BRANCH_FALSE_F32
|
struct BRANCH_FALSE_F32
|
||||||
|
@ -591,7 +591,7 @@ struct BRANCH_FALSE_F32
|
||||||
Xmm input = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
Xmm input = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
||||||
e.vmovd(e.eax, input);
|
e.vmovd(e.eax, input);
|
||||||
e.test(e.eax, e.eax);
|
e.test(e.eax, e.eax);
|
||||||
e.jz(i.src2.value->name, e.T_NEAR);
|
e.jz(i.src2.value->GetIdString(), e.T_NEAR);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct BRANCH_FALSE_F64
|
struct BRANCH_FALSE_F64
|
||||||
|
@ -601,7 +601,7 @@ struct BRANCH_FALSE_F64
|
||||||
Xmm input = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
Xmm input = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
||||||
e.vmovq(e.rax, input);
|
e.vmovq(e.rax, input);
|
||||||
e.test(e.rax, e.rax);
|
e.test(e.rax, e.rax);
|
||||||
e.jz(i.src2.value->name, e.T_NEAR);
|
e.jz(i.src2.value->GetIdString(), e.T_NEAR);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
EMITTER_OPCODE_TABLE(OPCODE_BRANCH_FALSE, BRANCH_FALSE_I8, BRANCH_FALSE_I16,
|
EMITTER_OPCODE_TABLE(OPCODE_BRANCH_FALSE, BRANCH_FALSE_I8, BRANCH_FALSE_I16,
|
||||||
|
|
|
@ -805,22 +805,7 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SUB, VECTOR_SUB);
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// OPCODE_VECTOR_SHL
|
// OPCODE_VECTOR_SHL
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
template <typename T, std::enable_if_t<std::is_integral<T>::value, int> = 0>
|
|
||||||
static __m128i EmulateVectorShl(void*, __m128i src1, __m128i src2) {
|
|
||||||
alignas(16) T value[16 / sizeof(T)];
|
|
||||||
alignas(16) T shamt[16 / sizeof(T)];
|
|
||||||
|
|
||||||
// Load SSE registers into a C array.
|
|
||||||
_mm_store_si128(reinterpret_cast<__m128i*>(value), src1);
|
|
||||||
_mm_store_si128(reinterpret_cast<__m128i*>(shamt), src2);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < (16 / sizeof(T)); ++i) {
|
|
||||||
value[i] = value[i] << (shamt[i] & ((sizeof(T) * 8) - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store result and return it.
|
|
||||||
return _mm_load_si128(reinterpret_cast<__m128i*>(value));
|
|
||||||
}
|
|
||||||
static XmmConst GetShiftmaskForType(unsigned typ) {
|
static XmmConst GetShiftmaskForType(unsigned typ) {
|
||||||
if (typ == INT8_TYPE) {
|
if (typ == INT8_TYPE) {
|
||||||
return XMMXOPByteShiftMask;
|
return XMMXOPByteShiftMask;
|
||||||
|
@ -914,28 +899,14 @@ struct VECTOR_SHL_V128
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (all_same) {
|
if (all_same) {
|
||||||
// mul by two
|
e.vpmovzxbw(e.ymm0, i.src1);
|
||||||
/*if (seenvalue == 1) {
|
e.vpsllw(e.ymm0, e.ymm0, seenvalue);
|
||||||
e.vpaddb(i.dest, i.src1, i.src1);
|
e.vextracti128(e.xmm1, e.ymm0, 1);
|
||||||
} else if (seenvalue == 2) {
|
|
||||||
e.vpaddb(i.dest, i.src1, i.src1);
|
|
||||||
e.vpaddb(i.dest, i.dest, i.dest);
|
|
||||||
} else if (seenvalue == 3) {
|
|
||||||
// mul by 8
|
|
||||||
e.vpaddb(i.dest, i.src1, i.src1);
|
|
||||||
e.vpaddb(i.dest, i.dest, i.dest);
|
|
||||||
e.vpaddb(i.dest, i.dest, i.dest);
|
|
||||||
} else*/
|
|
||||||
{
|
|
||||||
e.vpmovzxbw(e.ymm0, i.src1);
|
|
||||||
e.vpsllw(e.ymm0, e.ymm0, seenvalue);
|
|
||||||
e.vextracti128(e.xmm1, e.ymm0, 1);
|
|
||||||
|
|
||||||
e.vpshufb(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMMShortsToBytes));
|
e.vpshufb(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMMShortsToBytes));
|
||||||
e.vpshufb(e.xmm1, e.xmm1, e.GetXmmConstPtr(XMMShortsToBytes));
|
e.vpshufb(e.xmm1, e.xmm1, e.GetXmmConstPtr(XMMShortsToBytes));
|
||||||
e.vpunpcklqdq(i.dest, e.xmm0, e.xmm1);
|
e.vpunpcklqdq(i.dest, e.xmm0, e.xmm1);
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
e.LoadConstantXmm(e.xmm2, constmask);
|
e.LoadConstantXmm(e.xmm2, constmask);
|
||||||
|
@ -966,14 +937,41 @@ struct VECTOR_SHL_V128
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (i.src2.is_constant) {
|
|
||||||
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
unsigned stack_offset_src1 = StackLayout::GUEST_SCRATCH;
|
||||||
|
unsigned stack_offset_src2 = StackLayout::GUEST_SCRATCH + 16;
|
||||||
|
|
||||||
|
if (i.src1.is_constant) {
|
||||||
|
e.StashConstantXmm(0, i.src1.constant());
|
||||||
|
stack_offset_src1 = X64Emitter::kStashOffset;
|
||||||
} else {
|
} else {
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src1], i.src1);
|
||||||
}
|
}
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
if (i.src2.is_constant) {
|
||||||
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShl<uint8_t>));
|
e.StashConstantXmm(1, i.src2.constant());
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
stack_offset_src2 = X64Emitter::kStashOffset + 16;
|
||||||
|
} else {
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src2], i.src2);
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Label looper;
|
||||||
|
|
||||||
|
e.xor_(e.edx, e.edx);
|
||||||
|
|
||||||
|
e.L(looper);
|
||||||
|
e.movzx(e.ecx, e.byte[e.rsp + stack_offset_src2 + e.rdx]);
|
||||||
|
|
||||||
|
e.shl(e.byte[e.rsp + stack_offset_src1 + e.rdx], e.cl);
|
||||||
|
|
||||||
|
if (e.IsFeatureEnabled(kX64FlagsIndependentVars)) {
|
||||||
|
e.inc(e.edx);
|
||||||
|
} else {
|
||||||
|
e.add(e.edx, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
e.cmp(e.edx, 16);
|
||||||
|
e.jnz(looper);
|
||||||
|
e.vmovdqa(i.dest, e.byte[e.rsp + stack_offset_src1]);
|
||||||
}
|
}
|
||||||
static void EmitInt16(X64Emitter& e, const EmitArgType& i) {
|
static void EmitInt16(X64Emitter& e, const EmitArgType& i) {
|
||||||
Xmm src1;
|
Xmm src1;
|
||||||
|
@ -1022,14 +1020,32 @@ struct VECTOR_SHL_V128
|
||||||
|
|
||||||
// TODO(benvanik): native version (with shift magic).
|
// TODO(benvanik): native version (with shift magic).
|
||||||
e.L(emu);
|
e.L(emu);
|
||||||
|
|
||||||
|
unsigned stack_offset_src1 = StackLayout::GUEST_SCRATCH;
|
||||||
|
unsigned stack_offset_src2 = StackLayout::GUEST_SCRATCH + 16;
|
||||||
|
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src1], src1);
|
||||||
if (i.src2.is_constant) {
|
if (i.src2.is_constant) {
|
||||||
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
e.StashConstantXmm(1, i.src2.constant());
|
||||||
|
stack_offset_src2 = X64Emitter::kStashOffset + 16;
|
||||||
} else {
|
} else {
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src2], i.src2);
|
||||||
}
|
}
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, src1));
|
|
||||||
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShl<uint16_t>));
|
Xbyak::Label looper;
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
|
||||||
|
e.xor_(e.edx, e.edx);
|
||||||
|
|
||||||
|
e.L(looper);
|
||||||
|
e.movzx(e.ecx, e.word[e.rsp + stack_offset_src2 + e.rdx]);
|
||||||
|
|
||||||
|
e.shl(e.word[e.rsp + stack_offset_src1 + e.rdx], e.cl);
|
||||||
|
|
||||||
|
e.add(e.edx, 2);
|
||||||
|
|
||||||
|
e.cmp(e.edx, 16);
|
||||||
|
e.jnz(looper);
|
||||||
|
e.vmovdqa(i.dest, e.byte[e.rsp + stack_offset_src1]);
|
||||||
|
|
||||||
e.L(end);
|
e.L(end);
|
||||||
}
|
}
|
||||||
|
@ -1098,14 +1114,32 @@ struct VECTOR_SHL_V128
|
||||||
|
|
||||||
// TODO(benvanik): native version (with shift magic).
|
// TODO(benvanik): native version (with shift magic).
|
||||||
e.L(emu);
|
e.L(emu);
|
||||||
|
|
||||||
|
unsigned stack_offset_src1 = StackLayout::GUEST_SCRATCH;
|
||||||
|
unsigned stack_offset_src2 = StackLayout::GUEST_SCRATCH + 16;
|
||||||
|
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src1], src1);
|
||||||
if (i.src2.is_constant) {
|
if (i.src2.is_constant) {
|
||||||
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
e.StashConstantXmm(1, i.src2.constant());
|
||||||
|
stack_offset_src2 = X64Emitter::kStashOffset + 16;
|
||||||
} else {
|
} else {
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src2], i.src2);
|
||||||
}
|
}
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, src1));
|
|
||||||
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShl<uint32_t>));
|
Xbyak::Label looper;
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
|
||||||
|
e.xor_(e.edx, e.edx);
|
||||||
|
|
||||||
|
e.L(looper);
|
||||||
|
e.mov(e.ecx, e.dword[e.rsp + stack_offset_src2 + e.rdx]);
|
||||||
|
|
||||||
|
e.shl(e.dword[e.rsp + stack_offset_src1 + e.rdx], e.cl);
|
||||||
|
|
||||||
|
e.add(e.edx, 4);
|
||||||
|
|
||||||
|
e.cmp(e.edx, 16);
|
||||||
|
e.jnz(looper);
|
||||||
|
e.vmovdqa(i.dest, e.byte[e.rsp + stack_offset_src1]);
|
||||||
|
|
||||||
e.L(end);
|
e.L(end);
|
||||||
}
|
}
|
||||||
|
@ -1116,22 +1150,6 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHL, VECTOR_SHL_V128);
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// OPCODE_VECTOR_SHR
|
// OPCODE_VECTOR_SHR
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
template <typename T, std::enable_if_t<std::is_integral<T>::value, int> = 0>
|
|
||||||
static __m128i EmulateVectorShr(void*, __m128i src1, __m128i src2) {
|
|
||||||
alignas(16) T value[16 / sizeof(T)];
|
|
||||||
alignas(16) T shamt[16 / sizeof(T)];
|
|
||||||
|
|
||||||
// Load SSE registers into a C array.
|
|
||||||
_mm_store_si128(reinterpret_cast<__m128i*>(value), src1);
|
|
||||||
_mm_store_si128(reinterpret_cast<__m128i*>(shamt), src2);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < (16 / sizeof(T)); ++i) {
|
|
||||||
value[i] = value[i] >> (shamt[i] & ((sizeof(T) * 8) - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store result and return it.
|
|
||||||
return _mm_load_si128(reinterpret_cast<__m128i*>(value));
|
|
||||||
}
|
|
||||||
|
|
||||||
struct VECTOR_SHR_V128
|
struct VECTOR_SHR_V128
|
||||||
: Sequence<VECTOR_SHR_V128, I<OPCODE_VECTOR_SHR, V128Op, V128Op, V128Op>> {
|
: Sequence<VECTOR_SHR_V128, I<OPCODE_VECTOR_SHR, V128Op, V128Op, V128Op>> {
|
||||||
|
@ -1179,34 +1197,63 @@ struct VECTOR_SHR_V128
|
||||||
}
|
}
|
||||||
|
|
||||||
static void EmitInt8(X64Emitter& e, const EmitArgType& i) {
|
static void EmitInt8(X64Emitter& e, const EmitArgType& i) {
|
||||||
// TODO(benvanik): native version (with shift magic).
|
if (i.src2.is_constant && e.IsFeatureEnabled(kX64EmitGFNI)) {
|
||||||
if (i.src2.is_constant) {
|
const auto& shamt = i.src2.constant();
|
||||||
if (e.IsFeatureEnabled(kX64EmitGFNI)) {
|
bool all_same = true;
|
||||||
const auto& shamt = i.src2.constant();
|
for (size_t n = 0; n < 16 - n; ++n) {
|
||||||
bool all_same = true;
|
if (shamt.u8[n] != shamt.u8[n + 1]) {
|
||||||
for (size_t n = 0; n < 16 - n; ++n) {
|
all_same = false;
|
||||||
if (shamt.u8[n] != shamt.u8[n + 1]) {
|
break;
|
||||||
all_same = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (all_same) {
|
|
||||||
// Every count is the same, so we can use gf2p8affineqb.
|
|
||||||
const uint8_t shift_amount = shamt.u8[0] & 0b111;
|
|
||||||
const uint64_t shift_matrix = UINT64_C(0x0102040810204080)
|
|
||||||
<< (shift_amount * 8);
|
|
||||||
e.vgf2p8affineqb(i.dest, i.src1,
|
|
||||||
e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
if (all_same) {
|
||||||
} else {
|
// Every count is the same, so we can use gf2p8affineqb.
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
const uint8_t shift_amount = shamt.u8[0] & 0b111;
|
||||||
|
const uint64_t shift_matrix = UINT64_C(0x0102040810204080)
|
||||||
|
<< (shift_amount * 8);
|
||||||
|
e.vgf2p8affineqb(i.dest, i.src1,
|
||||||
|
e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
unsigned stack_offset_src1 = StackLayout::GUEST_SCRATCH;
|
||||||
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShr<uint8_t>));
|
unsigned stack_offset_src2 = StackLayout::GUEST_SCRATCH + 16;
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
|
||||||
|
if (i.src1.is_constant) {
|
||||||
|
e.StashConstantXmm(0, i.src1.constant());
|
||||||
|
stack_offset_src1 = X64Emitter::kStashOffset;
|
||||||
|
} else {
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src1], i.src1);
|
||||||
|
}
|
||||||
|
if (i.src2.is_constant) {
|
||||||
|
e.StashConstantXmm(1, i.src2.constant());
|
||||||
|
stack_offset_src2 = X64Emitter::kStashOffset + 16;
|
||||||
|
} else {
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src2], i.src2);
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Label looper;
|
||||||
|
|
||||||
|
e.xor_(e.edx, e.edx);
|
||||||
|
|
||||||
|
e.L(looper);
|
||||||
|
// movzx is to eliminate any possible dep on previous value of rcx at start
|
||||||
|
// of loop
|
||||||
|
e.movzx(e.ecx, e.byte[e.rsp + stack_offset_src2 + e.rdx]);
|
||||||
|
// maybe using a memory operand as the left side isn't the best idea lol,
|
||||||
|
// still better than callnativesafe though agners docs have no timing info
|
||||||
|
// on shx [m], cl so shrug
|
||||||
|
e.shr(e.byte[e.rsp + stack_offset_src1 + e.rdx], e.cl);
|
||||||
|
|
||||||
|
if (e.IsFeatureEnabled(kX64FlagsIndependentVars)) {
|
||||||
|
e.inc(e.edx);
|
||||||
|
} else {
|
||||||
|
e.add(e.edx, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
e.cmp(e.edx, 16);
|
||||||
|
e.jnz(looper);
|
||||||
|
e.vmovdqa(i.dest, e.byte[e.rsp + stack_offset_src1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void EmitInt16(X64Emitter& e, const EmitArgType& i) {
|
static void EmitInt16(X64Emitter& e, const EmitArgType& i) {
|
||||||
|
@ -1248,14 +1295,38 @@ struct VECTOR_SHR_V128
|
||||||
|
|
||||||
// TODO(benvanik): native version (with shift magic).
|
// TODO(benvanik): native version (with shift magic).
|
||||||
e.L(emu);
|
e.L(emu);
|
||||||
if (i.src2.is_constant) {
|
|
||||||
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
unsigned stack_offset_src1 = StackLayout::GUEST_SCRATCH;
|
||||||
|
unsigned stack_offset_src2 = StackLayout::GUEST_SCRATCH + 16;
|
||||||
|
if (i.src1.is_constant) {
|
||||||
|
e.StashConstantXmm(0, i.src1.constant());
|
||||||
|
stack_offset_src1 = X64Emitter::kStashOffset;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src1], i.src1);
|
||||||
}
|
}
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
||||||
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShr<uint16_t>));
|
if (i.src2.is_constant) {
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
e.StashConstantXmm(1, i.src2.constant());
|
||||||
|
stack_offset_src2 = X64Emitter::kStashOffset + 16;
|
||||||
|
} else {
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src2], i.src2);
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Label looper;
|
||||||
|
|
||||||
|
e.xor_(e.edx, e.edx);
|
||||||
|
|
||||||
|
e.L(looper);
|
||||||
|
e.movzx(e.ecx, e.word[e.rsp + stack_offset_src2 + e.rdx]);
|
||||||
|
|
||||||
|
e.shr(e.word[e.rsp + stack_offset_src1 + e.rdx], e.cl);
|
||||||
|
|
||||||
|
e.add(e.edx, 2);
|
||||||
|
|
||||||
|
e.cmp(e.edx, 16);
|
||||||
|
e.jnz(looper);
|
||||||
|
e.vmovdqa(i.dest, e.byte[e.rsp + stack_offset_src1]);
|
||||||
|
|
||||||
e.L(end);
|
e.L(end);
|
||||||
}
|
}
|
||||||
|
@ -1324,14 +1395,37 @@ struct VECTOR_SHR_V128
|
||||||
|
|
||||||
// TODO(benvanik): native version.
|
// TODO(benvanik): native version.
|
||||||
e.L(emu);
|
e.L(emu);
|
||||||
if (i.src2.is_constant) {
|
|
||||||
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
unsigned stack_offset_src1 = StackLayout::GUEST_SCRATCH;
|
||||||
|
unsigned stack_offset_src2 = StackLayout::GUEST_SCRATCH + 16;
|
||||||
|
if (i.src1.is_constant) {
|
||||||
|
e.StashConstantXmm(0, i.src1.constant());
|
||||||
|
stack_offset_src1 = X64Emitter::kStashOffset;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src1], i.src1);
|
||||||
}
|
}
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, src1));
|
|
||||||
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShr<uint32_t>));
|
if (i.src2.is_constant) {
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
e.StashConstantXmm(1, i.src2.constant());
|
||||||
|
stack_offset_src2 = X64Emitter::kStashOffset + 16;
|
||||||
|
} else {
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src2], i.src2);
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Label looper;
|
||||||
|
|
||||||
|
e.xor_(e.edx, e.edx);
|
||||||
|
|
||||||
|
e.L(looper);
|
||||||
|
e.mov(e.ecx, e.dword[e.rsp + stack_offset_src2 + e.rdx]);
|
||||||
|
e.shr(e.dword[e.rsp + stack_offset_src1 + e.rdx], e.cl);
|
||||||
|
|
||||||
|
e.add(e.edx, 4);
|
||||||
|
|
||||||
|
e.cmp(e.edx, 16);
|
||||||
|
e.jnz(looper);
|
||||||
|
e.vmovdqa(i.dest, e.byte[e.rsp + stack_offset_src1]);
|
||||||
|
|
||||||
e.L(end);
|
e.L(end);
|
||||||
}
|
}
|
||||||
|
@ -1388,7 +1482,8 @@ struct VECTOR_SHA_V128
|
||||||
}
|
}
|
||||||
|
|
||||||
static void EmitInt8(X64Emitter& e, const EmitArgType& i) {
|
static void EmitInt8(X64Emitter& e, const EmitArgType& i) {
|
||||||
// TODO(benvanik): native version (with shift magic).
|
unsigned stack_offset_src1 = StackLayout::GUEST_SCRATCH;
|
||||||
|
unsigned stack_offset_src2 = StackLayout::GUEST_SCRATCH + 16;
|
||||||
if (i.src2.is_constant) {
|
if (i.src2.is_constant) {
|
||||||
const auto& shamt = i.src2.constant();
|
const auto& shamt = i.src2.constant();
|
||||||
bool all_same = true;
|
bool all_same = true;
|
||||||
|
@ -1399,7 +1494,6 @@ struct VECTOR_SHA_V128
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (e.IsFeatureEnabled(kX64EmitGFNI)) {
|
if (e.IsFeatureEnabled(kX64EmitGFNI)) {
|
||||||
if (all_same) {
|
if (all_same) {
|
||||||
// Every count is the same, so we can use gf2p8affineqb.
|
// Every count is the same, so we can use gf2p8affineqb.
|
||||||
|
@ -1412,8 +1506,7 @@ struct VECTOR_SHA_V128
|
||||||
e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
|
e.StashConstantXmm(0, vec128q(shift_matrix)), 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
} else if (all_same) {
|
||||||
else if (all_same) {
|
|
||||||
Xmm to_be_shifted = GetInputRegOrConstant(e, i.src1, e.xmm1);
|
Xmm to_be_shifted = GetInputRegOrConstant(e, i.src1, e.xmm1);
|
||||||
|
|
||||||
e.vpmovsxbw(e.xmm0, to_be_shifted); //_mm_srai_epi16 / psraw
|
e.vpmovsxbw(e.xmm0, to_be_shifted); //_mm_srai_epi16 / psraw
|
||||||
|
@ -1425,14 +1518,41 @@ struct VECTOR_SHA_V128
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
e.StashConstantXmm(1, i.src2.constant());
|
||||||
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
stack_offset_src2 = X64Emitter::kStashOffset + 16;
|
||||||
} else {
|
} else {
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src2], i.src2);
|
||||||
}
|
}
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
||||||
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShr<int8_t>));
|
if (i.src1.is_constant) {
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
e.StashConstantXmm(0, i.src1.constant());
|
||||||
|
stack_offset_src1 = X64Emitter::kStashOffset;
|
||||||
|
} else {
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src1], i.src1);
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Label looper;
|
||||||
|
|
||||||
|
e.xor_(e.edx, e.edx);
|
||||||
|
|
||||||
|
e.L(looper);
|
||||||
|
// movzx is to eliminate any possible dep on previous value of rcx at start
|
||||||
|
// of loop
|
||||||
|
e.movzx(e.ecx, e.byte[e.rsp + stack_offset_src2 + e.rdx]);
|
||||||
|
// maybe using a memory operand as the left side isn't the best idea lol,
|
||||||
|
// still better than callnativesafe though agners docs have no timing info
|
||||||
|
// on shx [m], cl so shrug
|
||||||
|
e.sar(e.byte[e.rsp + stack_offset_src1 + e.rdx], e.cl);
|
||||||
|
|
||||||
|
if (e.IsFeatureEnabled(kX64FlagsIndependentVars)) {
|
||||||
|
e.inc(e.edx);
|
||||||
|
} else {
|
||||||
|
e.add(e.edx, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
e.cmp(e.edx, 16);
|
||||||
|
e.jnz(looper);
|
||||||
|
e.vmovdqa(i.dest, e.byte[e.rsp + stack_offset_src1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void EmitInt16(X64Emitter& e, const EmitArgType& i) {
|
static void EmitInt16(X64Emitter& e, const EmitArgType& i) {
|
||||||
|
@ -1474,14 +1594,38 @@ struct VECTOR_SHA_V128
|
||||||
|
|
||||||
// TODO(benvanik): native version (with shift magic).
|
// TODO(benvanik): native version (with shift magic).
|
||||||
e.L(emu);
|
e.L(emu);
|
||||||
if (i.src2.is_constant) {
|
|
||||||
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
unsigned stack_offset_src1 = StackLayout::GUEST_SCRATCH;
|
||||||
|
unsigned stack_offset_src2 = StackLayout::GUEST_SCRATCH + 16;
|
||||||
|
if (i.src1.is_constant) {
|
||||||
|
e.StashConstantXmm(0, i.src1.constant());
|
||||||
|
stack_offset_src1 = X64Emitter::kStashOffset;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src1], i.src1);
|
||||||
}
|
}
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
||||||
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShr<int16_t>));
|
if (i.src2.is_constant) {
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
e.StashConstantXmm(1, i.src2.constant());
|
||||||
|
stack_offset_src2 = X64Emitter::kStashOffset + 16;
|
||||||
|
} else {
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src2], i.src2);
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Label looper;
|
||||||
|
|
||||||
|
e.xor_(e.edx, e.edx);
|
||||||
|
|
||||||
|
e.L(looper);
|
||||||
|
e.movzx(e.ecx, e.word[e.rsp + stack_offset_src2 + e.rdx]);
|
||||||
|
|
||||||
|
e.sar(e.word[e.rsp + stack_offset_src1 + e.rdx], e.cl);
|
||||||
|
|
||||||
|
e.add(e.edx, 2);
|
||||||
|
|
||||||
|
e.cmp(e.edx, 16);
|
||||||
|
e.jnz(looper);
|
||||||
|
e.vmovdqa(i.dest, e.byte[e.rsp + stack_offset_src1]);
|
||||||
|
|
||||||
e.L(end);
|
e.L(end);
|
||||||
}
|
}
|
||||||
|
@ -1508,9 +1652,9 @@ struct VECTOR_SHA_V128
|
||||||
// that happens so we mask.
|
// that happens so we mask.
|
||||||
if (i.src2.is_constant) {
|
if (i.src2.is_constant) {
|
||||||
e.LoadConstantXmm(e.xmm0, i.src2.constant());
|
e.LoadConstantXmm(e.xmm0, i.src2.constant());
|
||||||
e.vandps(e.xmm0, e.GetXmmConstPtr(XMMShiftMaskPS));
|
e.vpand(e.xmm0, e.GetXmmConstPtr(XMMShiftMaskPS));
|
||||||
} else {
|
} else {
|
||||||
e.vandps(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskPS));
|
e.vpand(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskPS));
|
||||||
}
|
}
|
||||||
e.vpsravd(i.dest, i.src1, e.xmm0);
|
e.vpsravd(i.dest, i.src1, e.xmm0);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1535,14 +1679,36 @@ struct VECTOR_SHA_V128
|
||||||
|
|
||||||
// TODO(benvanik): native version.
|
// TODO(benvanik): native version.
|
||||||
e.L(emu);
|
e.L(emu);
|
||||||
if (i.src2.is_constant) {
|
unsigned stack_offset_src1 = StackLayout::GUEST_SCRATCH;
|
||||||
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
unsigned stack_offset_src2 = StackLayout::GUEST_SCRATCH + 16;
|
||||||
|
if (i.src1.is_constant) {
|
||||||
|
e.StashConstantXmm(0, i.src1.constant());
|
||||||
|
stack_offset_src1 = X64Emitter::kStashOffset;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src1], i.src1);
|
||||||
}
|
}
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
||||||
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShr<int32_t>));
|
if (i.src2.is_constant) {
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
e.StashConstantXmm(1, i.src2.constant());
|
||||||
|
stack_offset_src2 = X64Emitter::kStashOffset + 16;
|
||||||
|
} else {
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src2], i.src2);
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Label looper;
|
||||||
|
|
||||||
|
e.xor_(e.edx, e.edx);
|
||||||
|
|
||||||
|
e.L(looper);
|
||||||
|
e.mov(e.ecx, e.dword[e.rsp + stack_offset_src2 + e.rdx]);
|
||||||
|
e.sar(e.dword[e.rsp + stack_offset_src1 + e.rdx], e.cl);
|
||||||
|
|
||||||
|
e.add(e.edx, 4);
|
||||||
|
|
||||||
|
e.cmp(e.edx, 16);
|
||||||
|
e.jnz(looper);
|
||||||
|
e.vmovdqa(i.dest, e.byte[e.rsp + stack_offset_src1]);
|
||||||
|
|
||||||
e.L(end);
|
e.L(end);
|
||||||
}
|
}
|
||||||
|
@ -1550,26 +1716,6 @@ struct VECTOR_SHA_V128
|
||||||
};
|
};
|
||||||
EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHA, VECTOR_SHA_V128);
|
EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHA, VECTOR_SHA_V128);
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// OPCODE_VECTOR_ROTATE_LEFT
|
|
||||||
// ============================================================================
|
|
||||||
template <typename T, std::enable_if_t<std::is_integral<T>::value, int> = 0>
|
|
||||||
static __m128i EmulateVectorRotateLeft(void*, __m128i src1, __m128i src2) {
|
|
||||||
alignas(16) T value[16 / sizeof(T)];
|
|
||||||
alignas(16) T shamt[16 / sizeof(T)];
|
|
||||||
|
|
||||||
// Load SSE registers into a C array.
|
|
||||||
_mm_store_si128(reinterpret_cast<__m128i*>(value), src1);
|
|
||||||
_mm_store_si128(reinterpret_cast<__m128i*>(shamt), src2);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < (16 / sizeof(T)); ++i) {
|
|
||||||
value[i] = xe::rotate_left<T>(value[i], shamt[i] & ((sizeof(T) * 8) - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store result and return it.
|
|
||||||
return _mm_load_si128(reinterpret_cast<__m128i*>(value));
|
|
||||||
}
|
|
||||||
|
|
||||||
struct VECTOR_ROTATE_LEFT_V128
|
struct VECTOR_ROTATE_LEFT_V128
|
||||||
: Sequence<VECTOR_ROTATE_LEFT_V128,
|
: Sequence<VECTOR_ROTATE_LEFT_V128,
|
||||||
I<OPCODE_VECTOR_ROTATE_LEFT, V128Op, V128Op, V128Op>> {
|
I<OPCODE_VECTOR_ROTATE_LEFT, V128Op, V128Op, V128Op>> {
|
||||||
|
@ -1594,33 +1740,72 @@ struct VECTOR_ROTATE_LEFT_V128
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
unsigned stack_offset_src1 = StackLayout::GUEST_SCRATCH;
|
||||||
|
unsigned stack_offset_src2 = StackLayout::GUEST_SCRATCH + 16;
|
||||||
switch (i.instr->flags) {
|
switch (i.instr->flags) {
|
||||||
case INT8_TYPE:
|
case INT8_TYPE: {
|
||||||
// TODO(benvanik): native version (with shift magic).
|
if (i.src1.is_constant) {
|
||||||
if (i.src2.is_constant) {
|
e.StashConstantXmm(0, i.src1.constant());
|
||||||
e.lea(e.GetNativeParam(1),
|
stack_offset_src1 = X64Emitter::kStashOffset;
|
||||||
e.StashConstantXmm(1, i.src2.constant()));
|
|
||||||
} else {
|
} else {
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src1], i.src1);
|
||||||
}
|
}
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
||||||
e.CallNativeSafe(
|
|
||||||
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint8_t>));
|
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
|
||||||
break;
|
|
||||||
case INT16_TYPE:
|
|
||||||
// TODO(benvanik): native version (with shift magic).
|
|
||||||
if (i.src2.is_constant) {
|
if (i.src2.is_constant) {
|
||||||
e.lea(e.GetNativeParam(1),
|
e.StashConstantXmm(1, i.src2.constant());
|
||||||
e.StashConstantXmm(1, i.src2.constant()));
|
stack_offset_src2 = X64Emitter::kStashOffset + 16;
|
||||||
} else {
|
} else {
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src2], i.src2);
|
||||||
}
|
}
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
||||||
e.CallNativeSafe(
|
Xbyak::Label rotate_iter;
|
||||||
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint16_t>));
|
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
e.xor_(e.edx, e.edx);
|
||||||
break;
|
|
||||||
|
e.L(rotate_iter);
|
||||||
|
e.movzx(e.ecx, e.byte[e.rsp + stack_offset_src2 + e.rdx]);
|
||||||
|
|
||||||
|
e.rol(e.byte[e.rsp + stack_offset_src1 + e.rdx], e.cl);
|
||||||
|
|
||||||
|
e.add(e.edx, 1);
|
||||||
|
|
||||||
|
e.cmp(e.edx, 16);
|
||||||
|
e.jnz(rotate_iter);
|
||||||
|
e.vmovdqa(i.dest, e.byte[e.rsp + stack_offset_src1]);
|
||||||
|
|
||||||
|
} break;
|
||||||
|
case INT16_TYPE: {
|
||||||
|
if (i.src1.is_constant) {
|
||||||
|
e.StashConstantXmm(0, i.src1.constant());
|
||||||
|
stack_offset_src1 = X64Emitter::kStashOffset;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src1], i.src1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i.src2.is_constant) {
|
||||||
|
e.StashConstantXmm(1, i.src2.constant());
|
||||||
|
stack_offset_src2 = X64Emitter::kStashOffset + 16;
|
||||||
|
} else {
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src2], i.src2);
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Label rotate_iter;
|
||||||
|
|
||||||
|
e.xor_(e.edx, e.edx);
|
||||||
|
|
||||||
|
e.L(rotate_iter);
|
||||||
|
e.movzx(e.ecx, e.word[e.rsp + stack_offset_src2 + e.rdx]);
|
||||||
|
e.rol(e.word[e.rsp + stack_offset_src1 + e.rdx], e.cl);
|
||||||
|
|
||||||
|
e.add(e.edx, 2);
|
||||||
|
|
||||||
|
e.cmp(e.edx, 16);
|
||||||
|
e.jnz(rotate_iter);
|
||||||
|
e.vmovdqa(i.dest, e.byte[e.rsp + stack_offset_src1]);
|
||||||
|
|
||||||
|
} break;
|
||||||
case INT32_TYPE: {
|
case INT32_TYPE: {
|
||||||
if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) {
|
if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) {
|
||||||
e.vprolvd(i.dest, i.src1, i.src2);
|
e.vprolvd(i.dest, i.src1, i.src2);
|
||||||
|
@ -1638,23 +1823,40 @@ struct VECTOR_ROTATE_LEFT_V128
|
||||||
}
|
}
|
||||||
e.vpsllvd(e.xmm1, i.src1, e.xmm0);
|
e.vpsllvd(e.xmm1, i.src1, e.xmm0);
|
||||||
// Shift right (to get low bits):
|
// Shift right (to get low bits):
|
||||||
e.vmovaps(temp, e.GetXmmConstPtr(XMMPI32));
|
e.vmovdqa(temp, e.GetXmmConstPtr(XMMPI32));
|
||||||
e.vpsubd(temp, e.xmm0);
|
e.vpsubd(temp, e.xmm0);
|
||||||
e.vpsrlvd(i.dest, i.src1, temp);
|
e.vpsrlvd(i.dest, i.src1, temp);
|
||||||
// Merge:
|
// Merge:
|
||||||
e.vpor(i.dest, e.xmm1);
|
e.vpor(i.dest, e.xmm1);
|
||||||
} else {
|
} else {
|
||||||
// TODO(benvanik): non-AVX2 native version.
|
if (i.src1.is_constant) {
|
||||||
if (i.src2.is_constant) {
|
e.StashConstantXmm(0, i.src1.constant());
|
||||||
e.lea(e.GetNativeParam(1),
|
stack_offset_src1 = X64Emitter::kStashOffset;
|
||||||
e.StashConstantXmm(1, i.src2.constant()));
|
|
||||||
} else {
|
} else {
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src1], i.src1);
|
||||||
}
|
}
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
||||||
e.CallNativeSafe(
|
if (i.src2.is_constant) {
|
||||||
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint32_t>));
|
e.StashConstantXmm(1, i.src2.constant());
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
stack_offset_src2 = X64Emitter::kStashOffset + 16;
|
||||||
|
} else {
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src2], i.src2);
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Label rotate_iter;
|
||||||
|
|
||||||
|
e.xor_(e.edx, e.edx);
|
||||||
|
|
||||||
|
e.L(rotate_iter);
|
||||||
|
e.mov(e.ecx, e.dword[e.rsp + stack_offset_src2 + e.rdx]);
|
||||||
|
e.rol(e.dword[e.rsp + stack_offset_src1 + e.rdx], e.cl);
|
||||||
|
|
||||||
|
e.add(e.edx, 4);
|
||||||
|
|
||||||
|
e.cmp(e.edx, 16);
|
||||||
|
e.jnz(rotate_iter);
|
||||||
|
e.vmovdqa(i.dest, e.byte[e.rsp + stack_offset_src1]);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1667,80 +1869,120 @@ struct VECTOR_ROTATE_LEFT_V128
|
||||||
};
|
};
|
||||||
EMITTER_OPCODE_TABLE(OPCODE_VECTOR_ROTATE_LEFT, VECTOR_ROTATE_LEFT_V128);
|
EMITTER_OPCODE_TABLE(OPCODE_VECTOR_ROTATE_LEFT, VECTOR_ROTATE_LEFT_V128);
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// OPCODE_VECTOR_AVERAGE
|
|
||||||
// ============================================================================
|
|
||||||
template <typename T, std::enable_if_t<std::is_integral<T>::value, int> = 0>
|
|
||||||
static __m128i EmulateVectorAverage(void*, __m128i src1, __m128i src2) {
|
|
||||||
alignas(16) T src1v[16 / sizeof(T)];
|
|
||||||
alignas(16) T src2v[16 / sizeof(T)];
|
|
||||||
alignas(16) T value[16 / sizeof(T)];
|
|
||||||
|
|
||||||
// Load SSE registers into a C array.
|
|
||||||
_mm_store_si128(reinterpret_cast<__m128i*>(src1v), src1);
|
|
||||||
_mm_store_si128(reinterpret_cast<__m128i*>(src2v), src2);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < (16 / sizeof(T)); ++i) {
|
|
||||||
auto t = (uint64_t(src1v[i]) + uint64_t(src2v[i]) + 1) / 2;
|
|
||||||
value[i] = T(t);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store result and return it.
|
|
||||||
return _mm_load_si128(reinterpret_cast<__m128i*>(value));
|
|
||||||
}
|
|
||||||
|
|
||||||
struct VECTOR_AVERAGE
|
struct VECTOR_AVERAGE
|
||||||
: Sequence<VECTOR_AVERAGE,
|
: Sequence<VECTOR_AVERAGE,
|
||||||
I<OPCODE_VECTOR_AVERAGE, V128Op, V128Op, V128Op>> {
|
I<OPCODE_VECTOR_AVERAGE, V128Op, V128Op, V128Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
|
auto i_flags = i.instr->flags;
|
||||||
EmitCommutativeBinaryXmmOp(
|
EmitCommutativeBinaryXmmOp(
|
||||||
e, i,
|
e, i,
|
||||||
[&i](X64Emitter& e, const Xmm& dest, const Xmm& src1, const Xmm& src2) {
|
[i_flags](X64Emitter& e, const Xmm& dest, const Xmm& src1,
|
||||||
const TypeName part_type =
|
const Xmm& src2) {
|
||||||
static_cast<TypeName>(i.instr->flags & 0xFF);
|
const TypeName part_type = static_cast<TypeName>(i_flags & 0xFF);
|
||||||
const uint32_t arithmetic_flags = i.instr->flags >> 8;
|
const uint32_t arithmetic_flags = i_flags >> 8;
|
||||||
bool is_unsigned = !!(arithmetic_flags & ARITHMETIC_UNSIGNED);
|
bool is_unsigned = !!(arithmetic_flags & ARITHMETIC_UNSIGNED);
|
||||||
|
unsigned stack_offset_src1 = StackLayout::GUEST_SCRATCH;
|
||||||
|
unsigned stack_offset_src2 = StackLayout::GUEST_SCRATCH + 16;
|
||||||
switch (part_type) {
|
switch (part_type) {
|
||||||
case INT8_TYPE:
|
case INT8_TYPE:
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
e.vpavgb(dest, src1, src2);
|
e.vpavgb(dest, src1, src2);
|
||||||
} else {
|
} else {
|
||||||
assert_always();
|
// todo: avx2 version or version that sign extends to two __m128
|
||||||
|
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src1], src1);
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src2], src2);
|
||||||
|
|
||||||
|
Xbyak::Label looper;
|
||||||
|
|
||||||
|
e.xor_(e.edx, e.edx);
|
||||||
|
|
||||||
|
e.L(looper);
|
||||||
|
|
||||||
|
e.movsx(e.ecx, e.byte[e.rsp + stack_offset_src2 + e.rdx]);
|
||||||
|
e.movsx(e.eax, e.byte[e.rsp + stack_offset_src1 + e.rdx]);
|
||||||
|
|
||||||
|
e.lea(e.ecx, e.ptr[e.ecx + e.eax + 1]);
|
||||||
|
e.sar(e.ecx, 1);
|
||||||
|
e.mov(e.byte[e.rsp + stack_offset_src1 + e.rdx], e.cl);
|
||||||
|
|
||||||
|
if (e.IsFeatureEnabled(kX64FlagsIndependentVars)) {
|
||||||
|
e.inc(e.edx);
|
||||||
|
} else {
|
||||||
|
e.add(e.edx, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
e.cmp(e.edx, 16);
|
||||||
|
e.jnz(looper);
|
||||||
|
e.vmovdqa(dest, e.ptr[e.rsp + stack_offset_src1]);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case INT16_TYPE:
|
case INT16_TYPE:
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
e.vpavgw(dest, src1, src2);
|
e.vpavgw(dest, src1, src2);
|
||||||
} else {
|
} else {
|
||||||
assert_always();
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src1], src1);
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src2], src2);
|
||||||
|
|
||||||
|
Xbyak::Label looper;
|
||||||
|
|
||||||
|
e.xor_(e.edx, e.edx);
|
||||||
|
|
||||||
|
e.L(looper);
|
||||||
|
|
||||||
|
e.movsx(e.ecx, e.word[e.rsp + stack_offset_src2 + e.rdx]);
|
||||||
|
e.movsx(e.eax, e.word[e.rsp + stack_offset_src1 + e.rdx]);
|
||||||
|
|
||||||
|
e.lea(e.ecx, e.ptr[e.ecx + e.eax + 1]);
|
||||||
|
e.sar(e.ecx, 1);
|
||||||
|
e.mov(e.word[e.rsp + stack_offset_src1 + e.rdx], e.cx);
|
||||||
|
|
||||||
|
e.add(e.edx, 2);
|
||||||
|
|
||||||
|
e.cmp(e.edx, 16);
|
||||||
|
e.jnz(looper);
|
||||||
|
e.vmovdqa(dest, e.ptr[e.rsp + stack_offset_src1]);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case INT32_TYPE:
|
case INT32_TYPE: {
|
||||||
// No 32bit averages in AVX.
|
// No 32bit averages in AVX.
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src1], src1);
|
||||||
|
e.vmovdqa(e.ptr[e.rsp + stack_offset_src2], src2);
|
||||||
|
|
||||||
|
Xbyak::Label looper;
|
||||||
|
|
||||||
|
e.xor_(e.edx, e.edx);
|
||||||
|
|
||||||
|
e.L(looper);
|
||||||
|
auto src2_current_ptr =
|
||||||
|
e.dword[e.rsp + stack_offset_src2 + e.rdx];
|
||||||
|
auto src1_current_ptr =
|
||||||
|
e.dword[e.rsp + stack_offset_src1 + e.rdx];
|
||||||
|
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
if (i.src2.is_constant) {
|
// implicit zero-ext
|
||||||
e.lea(e.GetNativeParam(1),
|
e.mov(e.ecx, src2_current_ptr);
|
||||||
e.StashConstantXmm(1, i.src2.constant()));
|
e.mov(e.eax, src1_current_ptr);
|
||||||
} else {
|
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
|
||||||
}
|
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
||||||
e.CallNativeSafe(
|
|
||||||
reinterpret_cast<void*>(EmulateVectorAverage<uint32_t>));
|
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
|
||||||
} else {
|
} else {
|
||||||
if (i.src2.is_constant) {
|
e.movsxd(e.rcx, src2_current_ptr);
|
||||||
e.lea(e.GetNativeParam(1),
|
e.movsxd(e.rax, src1_current_ptr);
|
||||||
e.StashConstantXmm(1, i.src2.constant()));
|
|
||||||
} else {
|
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
|
||||||
}
|
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
||||||
e.CallNativeSafe(
|
|
||||||
reinterpret_cast<void*>(EmulateVectorAverage<int32_t>));
|
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
|
e.lea(e.rcx, e.ptr[e.rcx + e.rax + 1]);
|
||||||
|
if (is_unsigned) {
|
||||||
|
e.shr(e.rcx, 1);
|
||||||
|
} else {
|
||||||
|
e.sar(e.rcx, 1);
|
||||||
|
}
|
||||||
|
e.mov(e.dword[e.rsp + stack_offset_src1 + e.rdx], e.ecx);
|
||||||
|
|
||||||
|
e.add(e.edx, 4);
|
||||||
|
|
||||||
|
e.cmp(e.edx, 16);
|
||||||
|
e.jnz(looper);
|
||||||
|
e.vmovdqa(dest, e.ptr[e.rsp + stack_offset_src1]);
|
||||||
|
} break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
assert_unhandled_case(part_type);
|
assert_unhandled_case(part_type);
|
||||||
break;
|
break;
|
||||||
|
@ -2163,82 +2405,6 @@ struct PERMUTE_V128
|
||||||
};
|
};
|
||||||
EMITTER_OPCODE_TABLE(OPCODE_PERMUTE, PERMUTE_I32, PERMUTE_V128);
|
EMITTER_OPCODE_TABLE(OPCODE_PERMUTE, PERMUTE_I32, PERMUTE_V128);
|
||||||
|
|
||||||
#define LCPI(name, quad1) const __m128i name = _mm_set1_epi32(quad1)
|
|
||||||
// xmm0 is precasted to int, but contains float
|
|
||||||
// chrispy: todo: make available to gpu code
|
|
||||||
static __m128i xenos_float4_to_float16_x4(__m128i xmm0) {
|
|
||||||
LCPI(LCPI0_0, 2147483647);
|
|
||||||
LCPI(LCPI0_1, 1207951360);
|
|
||||||
LCPI(LCPI0_2, 134217728);
|
|
||||||
LCPI(LCPI0_3, 3347054592);
|
|
||||||
LCPI(LCPI0_4, 260038655);
|
|
||||||
LCPI(LCPI0_5, 32767);
|
|
||||||
LCPI(LCPI0_6, 4294934528);
|
|
||||||
|
|
||||||
__m128i xmm1 = _mm_and_si128(xmm0, LCPI0_0);
|
|
||||||
|
|
||||||
__m128i xmm2 = LCPI0_1;
|
|
||||||
|
|
||||||
__m128i xmm3 = _mm_add_epi32(xmm0, LCPI0_2);
|
|
||||||
xmm2 = _mm_cmpgt_epi32(xmm2, xmm1);
|
|
||||||
xmm3 = _mm_srli_epi32(xmm3, 13);
|
|
||||||
xmm1 = _mm_add_epi32(xmm1, LCPI0_3);
|
|
||||||
__m128i xmm4 = _mm_min_epu32(xmm1, LCPI0_4);
|
|
||||||
xmm1 = _mm_cmpeq_epi32(xmm1, xmm4);
|
|
||||||
xmm4 = LCPI0_5;
|
|
||||||
xmm3 = _mm_and_si128(xmm3, xmm4);
|
|
||||||
xmm1 = _mm_and_si128(xmm1, xmm3);
|
|
||||||
|
|
||||||
xmm1 = _mm_castps_si128(_mm_blendv_ps(
|
|
||||||
_mm_castsi128_ps(xmm4), _mm_castsi128_ps(xmm1), _mm_castsi128_ps(xmm2)));
|
|
||||||
xmm0 = _mm_srli_epi32(xmm0, 16);
|
|
||||||
xmm0 = _mm_and_si128(xmm0, LCPI0_6);
|
|
||||||
xmm0 = _mm_or_si128(xmm1, xmm0);
|
|
||||||
xmm0 = _mm_packus_epi32(xmm0, _mm_setzero_si128());
|
|
||||||
return xmm0;
|
|
||||||
}
|
|
||||||
// returns floats, uncasted
|
|
||||||
// chrispy: todo, make this available to gpu code?
|
|
||||||
static __m128i xenos_halves_to_floats(__m128i xmm0) {
|
|
||||||
LCPI(LCPI3_0, 0x1f);
|
|
||||||
LCPI(LCPI3_1, 0x80000000);
|
|
||||||
LCPI(LCPI3_2, 0x38000000);
|
|
||||||
LCPI(LCPI3_3, 0x7fe000);
|
|
||||||
|
|
||||||
__m128i xmm1, xmm2, xmm3, xmm4;
|
|
||||||
|
|
||||||
xmm1 = _mm_cvtepu16_epi32(xmm0);
|
|
||||||
|
|
||||||
xmm2 = _mm_srli_epi32(xmm1, 10);
|
|
||||||
|
|
||||||
xmm2 = _mm_and_si128(xmm2, LCPI3_0);
|
|
||||||
|
|
||||||
xmm0 = _mm_cvtepi16_epi32(xmm0);
|
|
||||||
|
|
||||||
xmm0 = _mm_and_si128(xmm0, LCPI3_1);
|
|
||||||
|
|
||||||
xmm3 = _mm_setzero_si128();
|
|
||||||
|
|
||||||
xmm4 = _mm_slli_epi32(xmm2, 23);
|
|
||||||
|
|
||||||
xmm4 = _mm_add_epi32(xmm4, LCPI3_2);
|
|
||||||
|
|
||||||
xmm2 = _mm_cmpeq_epi32(xmm2, xmm3);
|
|
||||||
|
|
||||||
xmm1 = _mm_slli_epi32(xmm1, 13);
|
|
||||||
|
|
||||||
xmm1 = _mm_and_si128(xmm1, LCPI3_3);
|
|
||||||
|
|
||||||
xmm3 = _mm_andnot_si128(xmm2, xmm4);
|
|
||||||
|
|
||||||
xmm1 = _mm_andnot_si128(xmm2, xmm1);
|
|
||||||
|
|
||||||
xmm0 = _mm_or_si128(xmm1, xmm0);
|
|
||||||
xmm0 = _mm_or_si128(xmm0, xmm3);
|
|
||||||
return xmm0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef LCPI
|
|
||||||
template <typename Inst>
|
template <typename Inst>
|
||||||
static void emit_fast_f16_unpack(X64Emitter& e, const Inst& i,
|
static void emit_fast_f16_unpack(X64Emitter& e, const Inst& i,
|
||||||
XmmConst initial_shuffle) {
|
XmmConst initial_shuffle) {
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -48,9 +48,7 @@ bool ConditionalGroupPass::Initialize(Compiler* compiler) {
|
||||||
|
|
||||||
bool ConditionalGroupPass::Run(HIRBuilder* builder) {
|
bool ConditionalGroupPass::Run(HIRBuilder* builder) {
|
||||||
bool dirty;
|
bool dirty;
|
||||||
int loops = 0;
|
|
||||||
do {
|
do {
|
||||||
assert_true(loops < 20); // arbitrary number
|
|
||||||
dirty = false;
|
dirty = false;
|
||||||
for (size_t i = 0; i < passes_.size(); ++i) {
|
for (size_t i = 0; i < passes_.size(); ++i) {
|
||||||
scratch_arena()->Reset();
|
scratch_arena()->Reset();
|
||||||
|
@ -68,7 +66,6 @@ bool ConditionalGroupPass::Run(HIRBuilder* builder) {
|
||||||
dirty |= result;
|
dirty |= result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
loops++;
|
|
||||||
} while (dirty);
|
} while (dirty);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,18 +41,6 @@ bool FinalizationPass::Run(HIRBuilder* builder) {
|
||||||
block->ordinal = block_ordinal++;
|
block->ordinal = block_ordinal++;
|
||||||
|
|
||||||
// Ensure all labels have names.
|
// Ensure all labels have names.
|
||||||
auto label = block->label_head;
|
|
||||||
while (label) {
|
|
||||||
if (!label->name) {
|
|
||||||
const size_t label_len = 6 + 4;
|
|
||||||
char* name = reinterpret_cast<char*>(arena->Alloc(label_len + 1, 1));
|
|
||||||
assert_true(label->id <= 9999);
|
|
||||||
auto end = fmt::format_to_n(name, label_len, "_label{}", label->id);
|
|
||||||
name[end.size] = '\0';
|
|
||||||
label->name = name;
|
|
||||||
}
|
|
||||||
label = label->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove unneeded jumps.
|
// Remove unneeded jumps.
|
||||||
auto tail = block->instr_tail;
|
auto tail = block->instr_tail;
|
||||||
|
|
|
@ -23,52 +23,6 @@ using namespace xe::cpu::hir;
|
||||||
using xe::cpu::hir::HIRBuilder;
|
using xe::cpu::hir::HIRBuilder;
|
||||||
using xe::cpu::hir::Instr;
|
using xe::cpu::hir::Instr;
|
||||||
using xe::cpu::hir::Value;
|
using xe::cpu::hir::Value;
|
||||||
using vmask_portion_t = uint64_t;
|
|
||||||
template <uint32_t Ndwords>
|
|
||||||
struct Valuemask_t {
|
|
||||||
vmask_portion_t bits[Ndwords];
|
|
||||||
|
|
||||||
static Valuemask_t create_empty(vmask_portion_t fill = 0) {
|
|
||||||
Valuemask_t result;
|
|
||||||
for (uint32_t i = 0; i < Ndwords; ++i) {
|
|
||||||
result.bits[i] = fill;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
template <typename TCallable>
|
|
||||||
Valuemask_t operate(TCallable&& oper) const {
|
|
||||||
Valuemask_t result = create_empty();
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < Ndwords; ++i) {
|
|
||||||
result.bits[i] = oper(bits[i]);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
template <typename TCallable>
|
|
||||||
Valuemask_t operate(TCallable&& oper, Valuemask_t other) const {
|
|
||||||
Valuemask_t result = create_empty();
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < Ndwords; ++i) {
|
|
||||||
result.bits[i] = oper(bits[i], other.bits[i]);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
Valuemask_t operator&(ValueMask other) const {
|
|
||||||
return operate([](vmask_portion_t x, vmask_portion_t y) { return x & y; },
|
|
||||||
other);
|
|
||||||
}
|
|
||||||
Valuemask_t operator|(ValueMask other) const {
|
|
||||||
return operate([](vmask_portion_t x, vmask_portion_t y) { return x | y; },
|
|
||||||
other);
|
|
||||||
}
|
|
||||||
Valuemask_t operator^(ValueMask other) const {
|
|
||||||
return operate([](vmask_portion_t x, vmask_portion_t y) { return x ^ y; },
|
|
||||||
other);
|
|
||||||
}
|
|
||||||
Valuemask_t operator~() const {
|
|
||||||
return operate([](vmask_portion_t x) { return ~x; }, other);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
SimplificationPass::SimplificationPass() : ConditionalGroupSubpass() {}
|
SimplificationPass::SimplificationPass() : ConditionalGroupSubpass() {}
|
||||||
|
|
||||||
|
@ -76,17 +30,13 @@ SimplificationPass::~SimplificationPass() {}
|
||||||
|
|
||||||
bool SimplificationPass::Run(HIRBuilder* builder, bool& result) {
|
bool SimplificationPass::Run(HIRBuilder* builder, bool& result) {
|
||||||
result = false;
|
result = false;
|
||||||
bool iter_result = false;
|
|
||||||
|
|
||||||
do {
|
result |= SimplifyBitArith(builder);
|
||||||
iter_result = false;
|
result |= EliminateConversions(builder);
|
||||||
iter_result |= SimplifyBitArith(builder);
|
result |= SimplifyAssignments(builder);
|
||||||
iter_result |= EliminateConversions(builder);
|
result |= SimplifyBasicArith(builder);
|
||||||
iter_result |= SimplifyAssignments(builder);
|
result |= SimplifyVectorOps(builder);
|
||||||
iter_result |= SimplifyBasicArith(builder);
|
|
||||||
iter_result |= SimplifyVectorOps(builder);
|
|
||||||
result |= iter_result;
|
|
||||||
} while (iter_result);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
// simplifications that apply to both or and xor
|
// simplifications that apply to both or and xor
|
||||||
|
@ -735,7 +685,9 @@ bool SimplificationPass::CheckAdd(hir::Instr* i, hir::HIRBuilder* builder) {
|
||||||
auto [added_constant_neg, added_var_neg] =
|
auto [added_constant_neg, added_var_neg] =
|
||||||
i->BinaryValueArrangeAsConstAndVar();
|
i->BinaryValueArrangeAsConstAndVar();
|
||||||
|
|
||||||
if (!added_constant_neg) return false;
|
if (!added_constant_neg) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (added_constant_neg->AsUint64() &
|
if (added_constant_neg->AsUint64() &
|
||||||
GetScalarSignbitMask(added_constant_neg->type)) {
|
GetScalarSignbitMask(added_constant_neg->type)) {
|
||||||
// adding a value that has its signbit set!
|
// adding a value that has its signbit set!
|
||||||
|
@ -882,11 +834,6 @@ bool SimplificationPass::CheckScalarConstCmp(hir::Instr* i,
|
||||||
|
|
||||||
} else if (cmpop == OPCODE_COMPARE_UGT) {
|
} else if (cmpop == OPCODE_COMPARE_UGT) {
|
||||||
// impossible, cannot be greater than mask
|
// impossible, cannot be greater than mask
|
||||||
|
|
||||||
/* i->Replace(&OPCODE_ASSIGN_info, 0);
|
|
||||||
i->set_src1(builder->LoadZeroInt8());
|
|
||||||
return true;
|
|
||||||
*/
|
|
||||||
constant_replacement = builder->LoadZeroInt8();
|
constant_replacement = builder->LoadZeroInt8();
|
||||||
|
|
||||||
} else if (cmpop == OPCODE_COMPARE_ULE) { // less than or equal to mask =
|
} else if (cmpop == OPCODE_COMPARE_ULE) { // less than or equal to mask =
|
||||||
|
@ -914,9 +861,9 @@ bool SimplificationPass::CheckIsTrueIsFalse(hir::Instr* i,
|
||||||
bool istrue = i->opcode == &OPCODE_COMPARE_NE_info;
|
bool istrue = i->opcode == &OPCODE_COMPARE_NE_info;
|
||||||
bool isfalse = i->opcode == &OPCODE_COMPARE_EQ_info;
|
bool isfalse = i->opcode == &OPCODE_COMPARE_EQ_info;
|
||||||
|
|
||||||
auto [input_cosntant, input] = i->BinaryValueArrangeAsConstAndVar();
|
auto [input_constant, input] = i->BinaryValueArrangeAsConstAndVar();
|
||||||
|
|
||||||
if (!input_cosntant || input_cosntant->AsUint64() != 0) {
|
if (!input_constant || input_constant->AsUint64() != 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -957,12 +904,6 @@ bool SimplificationPass::CheckIsTrueIsFalse(hir::Instr* i,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Instr* input_def = input->def;
|
|
||||||
if (!input_def) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
input_def = input_def->GetDestDefSkipAssigns();*/
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
bool SimplificationPass::CheckSHRByConst(hir::Instr* i,
|
bool SimplificationPass::CheckSHRByConst(hir::Instr* i,
|
||||||
|
|
|
@ -26,6 +26,13 @@ class Label {
|
||||||
char* name;
|
char* name;
|
||||||
|
|
||||||
void* tag;
|
void* tag;
|
||||||
|
// just use stringification of label id
|
||||||
|
// this will later be used as an input to xbyak. xbyak only accepts
|
||||||
|
// std::string as a value, not passed by reference, so precomputing the
|
||||||
|
// stringification does not help
|
||||||
|
std::string GetIdString() {
|
||||||
|
return std::to_string(id);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace hir
|
} // namespace hir
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
#define XENIA_CPU_HIR_OPCODES_H_
|
#define XENIA_CPU_HIR_OPCODES_H_
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#include "xenia/base/platform.h"
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace cpu {
|
namespace cpu {
|
||||||
namespace hir {
|
namespace hir {
|
||||||
|
@ -361,13 +361,16 @@ enum OpcodeSignature {
|
||||||
#define GET_OPCODE_SIG_TYPE_SRC1(sig) (OpcodeSignatureType)((sig >> 3) & 0x7)
|
#define GET_OPCODE_SIG_TYPE_SRC1(sig) (OpcodeSignatureType)((sig >> 3) & 0x7)
|
||||||
#define GET_OPCODE_SIG_TYPE_SRC2(sig) (OpcodeSignatureType)((sig >> 6) & 0x7)
|
#define GET_OPCODE_SIG_TYPE_SRC2(sig) (OpcodeSignatureType)((sig >> 6) & 0x7)
|
||||||
#define GET_OPCODE_SIG_TYPE_SRC3(sig) (OpcodeSignatureType)((sig >> 9) & 0x7)
|
#define GET_OPCODE_SIG_TYPE_SRC3(sig) (OpcodeSignatureType)((sig >> 9) & 0x7)
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
static bool IsOpcodeBinaryValue(uint32_t signature) {
|
static bool IsOpcodeBinaryValue(uint32_t signature) {
|
||||||
return (signature & ~(0x7)) ==
|
return (signature & ~(0x7)) ==
|
||||||
((OPCODE_SIG_TYPE_V << 3) | (OPCODE_SIG_TYPE_V << 6));
|
((OPCODE_SIG_TYPE_V << 3) | (OPCODE_SIG_TYPE_V << 6));
|
||||||
}
|
}
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
static bool IsOpcodeUnaryValue(uint32_t signature) {
|
static bool IsOpcodeUnaryValue(uint32_t signature) {
|
||||||
return (signature & ~(0x7)) == ((OPCODE_SIG_TYPE_V << 3));
|
return (signature & ~(0x7)) == ((OPCODE_SIG_TYPE_V << 3));
|
||||||
}
|
}
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
static void UnpackOpcodeSig(uint32_t sig, OpcodeSignatureType& dest,
|
static void UnpackOpcodeSig(uint32_t sig, OpcodeSignatureType& dest,
|
||||||
OpcodeSignatureType& src1,
|
OpcodeSignatureType& src1,
|
||||||
OpcodeSignatureType& src2,
|
OpcodeSignatureType& src2,
|
||||||
|
|
|
@ -185,7 +185,7 @@ bool MMIOHandler::TryDecodeLoadStore(const uint8_t* p,
|
||||||
uint8_t rex_b = rex & 0b0001;
|
uint8_t rex_b = rex & 0b0001;
|
||||||
uint8_t rex_x = rex & 0b0010;
|
uint8_t rex_x = rex & 0b0010;
|
||||||
uint8_t rex_r = rex & 0b0100;
|
uint8_t rex_r = rex & 0b0100;
|
||||||
uint8_t rex_w = rex & 0b1000;
|
//uint8_t rex_w = rex & 0b1000;
|
||||||
|
|
||||||
// http://www.sandpile.org/x86/opc_rm.htm
|
// http://www.sandpile.org/x86/opc_rm.htm
|
||||||
// http://www.sandpile.org/x86/opc_sib.htm
|
// http://www.sandpile.org/x86/opc_sib.htm
|
||||||
|
@ -418,7 +418,6 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) {
|
||||||
// Quick kill anything outside our mapping.
|
// Quick kill anything outside our mapping.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
uint64_t hostip = ex->pc();
|
|
||||||
|
|
||||||
void* fault_host_address = reinterpret_cast<void*>(ex->fault_address());
|
void* fault_host_address = reinterpret_cast<void*>(ex->fault_address());
|
||||||
|
|
||||||
|
|
|
@ -54,6 +54,7 @@ class Module {
|
||||||
|
|
||||||
bool ReadMap(const char* file_name);
|
bool ReadMap(const char* file_name);
|
||||||
|
|
||||||
|
virtual void Precompile() {}
|
||||||
protected:
|
protected:
|
||||||
virtual std::unique_ptr<Function> CreateFunction(uint32_t address) = 0;
|
virtual std::unique_ptr<Function> CreateFunction(uint32_t address) = 0;
|
||||||
|
|
||||||
|
|
|
@ -425,6 +425,27 @@ typedef struct alignas(64) PPCContext_s {
|
||||||
uint64_t reserved_val;
|
uint64_t reserved_val;
|
||||||
ThreadState* thread_state;
|
ThreadState* thread_state;
|
||||||
uint8_t* virtual_membase;
|
uint8_t* virtual_membase;
|
||||||
|
|
||||||
|
template <typename T = uint8_t*>
|
||||||
|
inline T TranslateVirtual(uint32_t guest_address) XE_RESTRICT const {
|
||||||
|
#if XE_PLATFORM_WIN32 == 1
|
||||||
|
uint8_t* host_address = virtual_membase + guest_address;
|
||||||
|
if (guest_address >= static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this))) {
|
||||||
|
host_address += 0x1000;
|
||||||
|
}
|
||||||
|
return reinterpret_cast<T>(host_address);
|
||||||
|
#else
|
||||||
|
return processor->memory()->TranslateVirtual<T>(guest_address);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
//for convenience in kernel functions, version that auto narrows to uint32
|
||||||
|
template <typename T = uint8_t*>
|
||||||
|
inline T TranslateVirtualGPR(uint64_t guest_address) XE_RESTRICT const {
|
||||||
|
return TranslateVirtual<T>(static_cast<uint32_t>(guest_address));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
static std::string GetRegisterName(PPCRegister reg);
|
static std::string GetRegisterName(PPCRegister reg);
|
||||||
std::string GetStringFromValue(PPCRegister reg) const;
|
std::string GetStringFromValue(PPCRegister reg) const;
|
||||||
void SetValueFromString(PPCRegister reg, std::string value);
|
void SetValueFromString(PPCRegister reg, std::string value);
|
||||||
|
|
|
@ -46,6 +46,7 @@ struct PPCDecodeData {
|
||||||
uint32_t LEV() const { return bits_.LEV; }
|
uint32_t LEV() const { return bits_.LEV; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -74,6 +75,7 @@ struct PPCDecodeData {
|
||||||
uint32_t L() const { return bits_.RT & 0x1; }
|
uint32_t L() const { return bits_.RT & 0x1; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -95,6 +97,7 @@ struct PPCDecodeData {
|
||||||
int32_t ds() const { return static_cast<int32_t>(XEEXTS16(DS() << 2)); }
|
int32_t ds() const { return static_cast<int32_t>(XEEXTS16(DS() << 2)); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -174,6 +177,7 @@ struct PPCDecodeData {
|
||||||
uint32_t CRFS() const { return bits_.RA >> 2; }
|
uint32_t CRFS() const { return bits_.RA >> 2; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -200,6 +204,7 @@ struct PPCDecodeData {
|
||||||
uint32_t CRFS() const { return CRBA() >> 2; }
|
uint32_t CRFS() const { return CRBA() >> 2; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -223,6 +228,7 @@ struct PPCDecodeData {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -244,6 +250,7 @@ struct PPCDecodeData {
|
||||||
bool Rc() const { return bits_.Rc ? true : false; }
|
bool Rc() const { return bits_.Rc ? true : false; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -266,6 +273,7 @@ struct PPCDecodeData {
|
||||||
bool Rc() const { return bits_.Rc ? true : false; }
|
bool Rc() const { return bits_.Rc ? true : false; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -289,6 +297,7 @@ struct PPCDecodeData {
|
||||||
bool Rc() const { return bits_.Rc ? true : false; }
|
bool Rc() const { return bits_.Rc ? true : false; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -314,6 +323,7 @@ struct PPCDecodeData {
|
||||||
bool Rc() const { return bits_.Rc ? true : false; }
|
bool Rc() const { return bits_.Rc ? true : false; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -339,6 +349,7 @@ struct PPCDecodeData {
|
||||||
bool Rc() const { return bits_.Rc ? true : false; }
|
bool Rc() const { return bits_.Rc ? true : false; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -363,6 +374,7 @@ struct PPCDecodeData {
|
||||||
bool Rc() const { return bits_.Rc ? true : false; }
|
bool Rc() const { return bits_.Rc ? true : false; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -389,6 +401,7 @@ struct PPCDecodeData {
|
||||||
bool Rc() const { return bits_.Rc ? true : false; }
|
bool Rc() const { return bits_.Rc ? true : false; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -412,6 +425,7 @@ struct PPCDecodeData {
|
||||||
int32_t SIMM() const { return static_cast<int32_t>(XEEXTS16(VA())); }
|
int32_t SIMM() const { return static_cast<int32_t>(XEEXTS16(VA())); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -431,6 +445,7 @@ struct PPCDecodeData {
|
||||||
bool Rc() const { return bits_.Rc ? true : false; }
|
bool Rc() const { return bits_.Rc ? true : false; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -452,6 +467,7 @@ struct PPCDecodeData {
|
||||||
uint32_t SHB() const { return VC() & 0xF; }
|
uint32_t SHB() const { return VC() & 0xF; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -473,6 +489,7 @@ struct PPCDecodeData {
|
||||||
uint32_t VB() const { return bits_.VB128l | (bits_.VB128h << 5); }
|
uint32_t VB() const { return bits_.VB128l | (bits_.VB128h << 5); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -498,6 +515,7 @@ struct PPCDecodeData {
|
||||||
uint32_t RB() const { return bits_.RB; }
|
uint32_t RB() const { return bits_.RB; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -521,6 +539,7 @@ struct PPCDecodeData {
|
||||||
uint32_t VC() const { return bits_.VC; }
|
uint32_t VC() const { return bits_.VC; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -546,6 +565,7 @@ struct PPCDecodeData {
|
||||||
int32_t SIMM() const { return static_cast<int32_t>(XEEXTS16(bits_.UIMM)); }
|
int32_t SIMM() const { return static_cast<int32_t>(XEEXTS16(bits_.UIMM)); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -567,6 +587,7 @@ struct PPCDecodeData {
|
||||||
uint32_t z() const { return bits_.z; }
|
uint32_t z() const { return bits_.z; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -592,6 +613,7 @@ struct PPCDecodeData {
|
||||||
uint32_t SH() const { return bits_.SH; }
|
uint32_t SH() const { return bits_.SH; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -618,6 +640,7 @@ struct PPCDecodeData {
|
||||||
bool Rc() const { return bits_.Rc ? true : false; }
|
bool Rc() const { return bits_.Rc ? true : false; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
@ -642,6 +665,7 @@ struct PPCDecodeData {
|
||||||
uint32_t UIMM() const { return bits_.PERMl | (bits_.PERMh << 5); }
|
uint32_t UIMM() const { return bits_.PERMl | (bits_.PERMh << 5); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
uint32_t address_;
|
uint32_t address_;
|
||||||
union {
|
union {
|
||||||
uint32_t value_;
|
uint32_t value_;
|
||||||
|
|
|
@ -2014,8 +2014,7 @@ int InstrEmit_vupkhsh(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
return InstrEmit_vupkhsh_(f, i.VX.VD, i.VX.VB);
|
return InstrEmit_vupkhsh_(f, i.VX.VD, i.VX.VB);
|
||||||
}
|
}
|
||||||
int InstrEmit_vupkhsh128(PPCHIRBuilder& f, const InstrData& i) {
|
int InstrEmit_vupkhsh128(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
uint32_t va = VX128_VA128;
|
assert_zero(VX128_VA128);
|
||||||
assert_zero(va);
|
|
||||||
return InstrEmit_vupkhsh_(f, VX128_VD128, VX128_VB128);
|
return InstrEmit_vupkhsh_(f, VX128_VD128, VX128_VB128);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2032,8 +2031,7 @@ int InstrEmit_vupklsh(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
return InstrEmit_vupklsh_(f, i.VX.VD, i.VX.VB);
|
return InstrEmit_vupklsh_(f, i.VX.VD, i.VX.VB);
|
||||||
}
|
}
|
||||||
int InstrEmit_vupklsh128(PPCHIRBuilder& f, const InstrData& i) {
|
int InstrEmit_vupklsh128(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
uint32_t va = VX128_VA128;
|
assert_zero(VX128_VA128);
|
||||||
assert_zero(va);
|
|
||||||
return InstrEmit_vupklsh_(f, VX128_VD128, VX128_VB128);
|
return InstrEmit_vupklsh_(f, VX128_VD128, VX128_VB128);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
#include "xenia/cpu/ppc/ppc_hir_builder.h"
|
#include "xenia/cpu/ppc/ppc_hir_builder.h"
|
||||||
|
|
||||||
DEFINE_bool(
|
DEFINE_bool(
|
||||||
disable_prefetch_and_cachecontrol, false,
|
disable_prefetch_and_cachecontrol, true,
|
||||||
"Disables translating ppc prefetch/cache flush instructions to host "
|
"Disables translating ppc prefetch/cache flush instructions to host "
|
||||||
"prefetch/cacheflush instructions. This may improve performance as these "
|
"prefetch/cacheflush instructions. This may improve performance as these "
|
||||||
"instructions were written with the Xbox 360's cache in mind, and modern "
|
"instructions were written with the Xbox 360's cache in mind, and modern "
|
||||||
|
|
|
@ -105,6 +105,11 @@ bool PPCFrontend::Initialize() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PPCFrontend::DeclareFunction(GuestFunction* function) {
|
bool PPCFrontend::DeclareFunction(GuestFunction* function) {
|
||||||
|
|
||||||
|
//chrispy: make sure we aren't declaring a function that is actually padding data, this will mess up PPCScanner and is hard to debug
|
||||||
|
//wow, this halo reach actually has branches into 0 opcodes, look into further
|
||||||
|
//xenia_assert(*reinterpret_cast<const uint32_t*>(
|
||||||
|
// this->memory()->TranslateVirtual(function->address())) != 0);
|
||||||
// Could scan or something here.
|
// Could scan or something here.
|
||||||
// Could also check to see if it's a well-known function type and classify
|
// Could also check to see if it's a well-known function type and classify
|
||||||
// for later.
|
// for later.
|
||||||
|
|
|
@ -34,6 +34,11 @@ DEFINE_bool(
|
||||||
"unimplemented PowerPC instruction is encountered.",
|
"unimplemented PowerPC instruction is encountered.",
|
||||||
"CPU");
|
"CPU");
|
||||||
|
|
||||||
|
DEFINE_bool(
|
||||||
|
emit_useless_fpscr_updates, false,
|
||||||
|
"Emit useless fpscr update instructions (pre-10/30/2022 behavior). ",
|
||||||
|
"CPU");
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace cpu {
|
namespace cpu {
|
||||||
namespace ppc {
|
namespace ppc {
|
||||||
|
@ -89,6 +94,9 @@ bool PPCHIRBuilder::Emit(GuestFunction* function, uint32_t flags) {
|
||||||
|
|
||||||
function_ = function;
|
function_ = function;
|
||||||
start_address_ = function_->address();
|
start_address_ = function_->address();
|
||||||
|
//chrispy: i've seen this one happen, not sure why but i think from trying to precompile twice
|
||||||
|
//i've also seen ones with a start and end address that are the same...
|
||||||
|
assert_true(function_->address() <= function_->end_address());
|
||||||
instr_count_ = (function_->end_address() - function_->address()) / 4 + 1;
|
instr_count_ = (function_->end_address() - function_->address()) / 4 + 1;
|
||||||
|
|
||||||
with_debug_info_ = (flags & EMIT_DEBUG_COMMENTS) == EMIT_DEBUG_COMMENTS;
|
with_debug_info_ = (flags & EMIT_DEBUG_COMMENTS) == EMIT_DEBUG_COMMENTS;
|
||||||
|
@ -242,6 +250,7 @@ void PPCHIRBuilder::MaybeBreakOnInstruction(uint32_t address) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void PPCHIRBuilder::AnnotateLabel(uint32_t address, Label* label) {
|
void PPCHIRBuilder::AnnotateLabel(uint32_t address, Label* label) {
|
||||||
|
//chrispy: label->name is unused, it would be nice to be able to remove the field and this code
|
||||||
char name_buffer[13];
|
char name_buffer[13];
|
||||||
auto format_result = fmt::format_to_n(name_buffer, 12, "loc_{:08X}", address);
|
auto format_result = fmt::format_to_n(name_buffer, 12, "loc_{:08X}", address);
|
||||||
name_buffer[format_result.size] = '\0';
|
name_buffer[format_result.size] = '\0';
|
||||||
|
@ -447,31 +456,38 @@ void PPCHIRBuilder::StoreFPSCR(Value* value) {
|
||||||
void PPCHIRBuilder::UpdateFPSCR(Value* result, bool update_cr1) {
|
void PPCHIRBuilder::UpdateFPSCR(Value* result, bool update_cr1) {
|
||||||
// TODO(benvanik): detect overflow and nan cases.
|
// TODO(benvanik): detect overflow and nan cases.
|
||||||
// fx and vx are the most important.
|
// fx and vx are the most important.
|
||||||
Value* fx = LoadConstantInt8(0);
|
/*
|
||||||
Value* fex = LoadConstantInt8(0);
|
chrispy: stubbed this out because right now all it does is waste
|
||||||
Value* vx = LoadConstantInt8(0);
|
memory and CPU time
|
||||||
Value* ox = LoadConstantInt8(0);
|
*/
|
||||||
|
if (cvars::emit_useless_fpscr_updates) {
|
||||||
|
Value* fx = LoadConstantInt8(0);
|
||||||
|
Value* fex = LoadConstantInt8(0);
|
||||||
|
Value* vx = LoadConstantInt8(0);
|
||||||
|
Value* ox = LoadConstantInt8(0);
|
||||||
|
|
||||||
if (update_cr1) {
|
if (update_cr1) {
|
||||||
// Store into the CR1 field.
|
// Store into the CR1 field.
|
||||||
// We do this instead of just calling CopyFPSCRToCR1 so that we don't
|
// We do this instead of just calling CopyFPSCRToCR1 so that we don't
|
||||||
// have to read back the bits and do shifting work.
|
// have to read back the bits and do shifting work.
|
||||||
StoreContext(offsetof(PPCContext, cr1.cr1_fx), fx);
|
StoreContext(offsetof(PPCContext, cr1.cr1_fx), fx);
|
||||||
StoreContext(offsetof(PPCContext, cr1.cr1_fex), fex);
|
StoreContext(offsetof(PPCContext, cr1.cr1_fex), fex);
|
||||||
StoreContext(offsetof(PPCContext, cr1.cr1_vx), vx);
|
StoreContext(offsetof(PPCContext, cr1.cr1_vx), vx);
|
||||||
StoreContext(offsetof(PPCContext, cr1.cr1_ox), ox);
|
StoreContext(offsetof(PPCContext, cr1.cr1_ox), ox);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate our new bits.
|
||||||
|
Value* new_bits = Shl(ZeroExtend(fx, INT32_TYPE), 31);
|
||||||
|
new_bits = Or(new_bits, Shl(ZeroExtend(fex, INT32_TYPE), 30));
|
||||||
|
new_bits = Or(new_bits, Shl(ZeroExtend(vx, INT32_TYPE), 29));
|
||||||
|
new_bits = Or(new_bits, Shl(ZeroExtend(ox, INT32_TYPE), 28));
|
||||||
|
|
||||||
|
// Mix into fpscr while preserving sticky bits (FX and OX).
|
||||||
|
Value* bits = LoadFPSCR();
|
||||||
|
bits = Or(And(bits, LoadConstantUint32(0x9FFFFFFF)), new_bits);
|
||||||
|
StoreFPSCR(bits);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate our new bits.
|
|
||||||
Value* new_bits = Shl(ZeroExtend(fx, INT32_TYPE), 31);
|
|
||||||
new_bits = Or(new_bits, Shl(ZeroExtend(fex, INT32_TYPE), 30));
|
|
||||||
new_bits = Or(new_bits, Shl(ZeroExtend(vx, INT32_TYPE), 29));
|
|
||||||
new_bits = Or(new_bits, Shl(ZeroExtend(ox, INT32_TYPE), 28));
|
|
||||||
|
|
||||||
// Mix into fpscr while preserving sticky bits (FX and OX).
|
|
||||||
Value* bits = LoadFPSCR();
|
|
||||||
bits = Or(And(bits, LoadConstantUint32(0x9FFFFFFF)), new_bits);
|
|
||||||
StoreFPSCR(bits);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void PPCHIRBuilder::CopyFPSCRToCR1() {
|
void PPCHIRBuilder::CopyFPSCRToCR1() {
|
||||||
|
|
|
@ -21,13 +21,7 @@ namespace xe {
|
||||||
namespace cpu {
|
namespace cpu {
|
||||||
namespace ppc {
|
namespace ppc {
|
||||||
|
|
||||||
// DEPRECATED
|
struct PPCOpcodeBits {
|
||||||
// TODO(benvanik): move code to PPCDecodeData.
|
|
||||||
struct InstrData {
|
|
||||||
PPCOpcode opcode;
|
|
||||||
const PPCOpcodeInfo* opcode_info;
|
|
||||||
uint32_t address;
|
|
||||||
|
|
||||||
union {
|
union {
|
||||||
uint32_t code;
|
uint32_t code;
|
||||||
|
|
||||||
|
@ -329,6 +323,14 @@ struct InstrData {
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// DEPRECATED
|
||||||
|
// TODO(benvanik): move code to PPCDecodeData.
|
||||||
|
struct InstrData : public PPCOpcodeBits {
|
||||||
|
PPCOpcode opcode;
|
||||||
|
const PPCOpcodeInfo* opcode_info;
|
||||||
|
uint32_t address;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace ppc
|
} // namespace ppc
|
||||||
} // namespace cpu
|
} // namespace cpu
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
@ -31,14 +31,17 @@
|
||||||
#include "third_party/crypto/rijndael-alg-fst.c"
|
#include "third_party/crypto/rijndael-alg-fst.c"
|
||||||
#include "third_party/crypto/rijndael-alg-fst.h"
|
#include "third_party/crypto/rijndael-alg-fst.h"
|
||||||
#include "third_party/pe/pe_image.h"
|
#include "third_party/pe/pe_image.h"
|
||||||
|
#include "xenia/cpu/ppc/ppc_decode_data.h"
|
||||||
|
#include "xenia/cpu/ppc/ppc_instr.h"
|
||||||
DEFINE_bool(disable_instruction_infocache, false,
|
DEFINE_bool(disable_instruction_infocache, false,
|
||||||
"Disables caching records of called instructions/mmio accesses.",
|
"Disables caching records of called instructions/mmio accesses.",
|
||||||
"CPU");
|
"CPU");
|
||||||
DEFINE_bool(disable_function_precompilation, true,
|
|
||||||
"Disables pre-compiling guest functions that we know we've called "
|
DEFINE_bool(
|
||||||
"on previous runs",
|
disable_early_precompilation, false,
|
||||||
"CPU");
|
"Disables pre-compiling guest functions that we know we've called/that "
|
||||||
|
"we've recognized as being functions via simple heuristics.",
|
||||||
|
"CPU");
|
||||||
|
|
||||||
static const uint8_t xe_xex2_retail_key[16] = {
|
static const uint8_t xe_xex2_retail_key[16] = {
|
||||||
0x20, 0xB1, 0x85, 0xA5, 0x9D, 0x28, 0xFD, 0xC3,
|
0x20, 0xB1, 0x85, 0xA5, 0x9D, 0x28, 0xFD, 0xC3,
|
||||||
|
@ -1057,29 +1060,6 @@ bool XexModule::LoadContinue() {
|
||||||
library_offset += library->size;
|
library_offset += library->size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sha1::SHA1 final_image_sha_;
|
|
||||||
|
|
||||||
final_image_sha_.reset();
|
|
||||||
|
|
||||||
unsigned high_code = this->high_address_ - this->low_address_;
|
|
||||||
|
|
||||||
final_image_sha_.processBytes(memory()->TranslateVirtual(this->low_address_),
|
|
||||||
high_code);
|
|
||||||
final_image_sha_.finalize(image_sha_bytes_);
|
|
||||||
|
|
||||||
char fmtbuf[16];
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < 16; ++i) {
|
|
||||||
sprintf_s(fmtbuf, "%X", image_sha_bytes_[i]);
|
|
||||||
image_sha_str_ += &fmtbuf[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
info_cache_.Init(this);
|
|
||||||
// Find __savegprlr_* and __restgprlr_* and the others.
|
|
||||||
// We can flag these for special handling (inlining/etc).
|
|
||||||
if (!FindSaveRest()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load a specified module map and diff.
|
// Load a specified module map and diff.
|
||||||
if (cvars::load_module_map.size()) {
|
if (cvars::load_module_map.size()) {
|
||||||
|
@ -1112,6 +1092,32 @@ bool XexModule::LoadContinue() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void XexModule::Precompile() {
|
||||||
|
sha1::SHA1 final_image_sha_;
|
||||||
|
|
||||||
|
final_image_sha_.reset();
|
||||||
|
|
||||||
|
unsigned high_code = this->high_address_ - this->low_address_;
|
||||||
|
|
||||||
|
final_image_sha_.processBytes(memory()->TranslateVirtual(this->low_address_),
|
||||||
|
high_code);
|
||||||
|
final_image_sha_.finalize(image_sha_bytes_);
|
||||||
|
|
||||||
|
char fmtbuf[16];
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < 16; ++i) {
|
||||||
|
sprintf_s(fmtbuf, "%X", image_sha_bytes_[i]);
|
||||||
|
image_sha_str_ += &fmtbuf[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find __savegprlr_* and __restgprlr_* and the others.
|
||||||
|
// We can flag these for special handling (inlining/etc).
|
||||||
|
if (!FindSaveRest()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
info_cache_.Init(this);
|
||||||
|
PrecompileDiscoveredFunctions();
|
||||||
|
}
|
||||||
bool XexModule::Unload() {
|
bool XexModule::Unload() {
|
||||||
if (!loaded_) {
|
if (!loaded_) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -1363,9 +1369,25 @@ InfoCacheFlags* XexModule::GetInstructionAddressFlags(uint32_t guest_addr) {
|
||||||
|
|
||||||
return info_cache_.LookupFlags(guest_addr);
|
return info_cache_.LookupFlags(guest_addr);
|
||||||
}
|
}
|
||||||
|
void XexModule::PrecompileDiscoveredFunctions() {
|
||||||
|
if (cvars::disable_early_precompilation) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto others = PreanalyzeCode();
|
||||||
|
|
||||||
|
for (auto&& other : others) {
|
||||||
|
if (other < low_address_ || other >= high_address_) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto sym = processor_->LookupFunction(other);
|
||||||
|
|
||||||
|
if (!sym || sym->status() != Symbol::Status::kDefined) {
|
||||||
|
processor_->ResolveFunction(other);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
void XexModule::PrecompileKnownFunctions() {
|
void XexModule::PrecompileKnownFunctions() {
|
||||||
if (cvars::disable_function_precompilation) {
|
if (cvars::disable_early_precompilation) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
uint32_t start = 0;
|
uint32_t start = 0;
|
||||||
|
@ -1374,12 +1396,160 @@ void XexModule::PrecompileKnownFunctions() {
|
||||||
if (!flags) {
|
if (!flags) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
//maybe should pre-acquire global crit?
|
||||||
for (uint32_t i = 0; i < end; i++) {
|
for (uint32_t i = 0; i < end; i++) {
|
||||||
if (flags[i].was_resolved) {
|
if (flags[i].was_resolved) {
|
||||||
processor_->ResolveFunction(low_address_ + (i * 4));
|
uint32_t addr = low_address_ + (i * 4);
|
||||||
|
auto sym = processor_->LookupFunction(addr);
|
||||||
|
|
||||||
|
if (!sym || sym->status() != Symbol::Status::kDefined) {
|
||||||
|
processor_->ResolveFunction(addr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t GetBLCalledFunction(XexModule* xexmod, uint32_t current_base,
|
||||||
|
ppc::PPCOpcodeBits wrd) {
|
||||||
|
int32_t displ = static_cast<int32_t>(ppc::XEEXTS26(wrd.I.LI << 2));
|
||||||
|
|
||||||
|
if (wrd.I.AA) {
|
||||||
|
return static_cast<uint32_t>(displ);
|
||||||
|
} else {
|
||||||
|
return static_cast<uint32_t>(static_cast<int32_t>(current_base) + displ);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static bool IsOpcodeBL(unsigned w) {
|
||||||
|
return (w >> (32 - 6)) == 18 && ppc::PPCOpcodeBits{w}.I.LK;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<uint32_t> XexModule::PreanalyzeCode() {
|
||||||
|
uint32_t low_8_aligned = xe::align<uint32_t>(low_address_, 8);
|
||||||
|
uint32_t high_8_aligned = high_address_ & ~(8U - 1);
|
||||||
|
|
||||||
|
uint32_t n_possible_8byte_addresses = (high_8_aligned - low_8_aligned) / 8;
|
||||||
|
uint32_t* funcstart_candidate_stack =
|
||||||
|
new uint32_t[n_possible_8byte_addresses];
|
||||||
|
uint32_t* funcstart_candstack2 = new uint32_t[n_possible_8byte_addresses];
|
||||||
|
|
||||||
|
uint32_t stack_pos = 0;
|
||||||
|
{
|
||||||
|
// all functions seem to start on 8 byte boundaries, except for obvious ones
|
||||||
|
// like the save/rest funcs
|
||||||
|
uint32_t* range_start =
|
||||||
|
(uint32_t*)memory()->TranslateVirtual(low_8_aligned);
|
||||||
|
uint32_t* range_end = (uint32_t*)memory()->TranslateVirtual(
|
||||||
|
high_8_aligned); // align down to multiple of 8
|
||||||
|
|
||||||
|
const uint8_t mfspr_r12_lr[4] = {0x7D, 0x88, 0x02, 0xA6};
|
||||||
|
|
||||||
|
// a blr instruction, with 4 zero bytes afterwards to pad the next address
|
||||||
|
// to 8 byte alignment
|
||||||
|
// if we see this prior to our address, we can assume we are a function
|
||||||
|
// start
|
||||||
|
const uint8_t blr[4] = {0x4E, 0x80, 0x0, 0x20};
|
||||||
|
|
||||||
|
uint32_t blr32 = *reinterpret_cast<const uint32_t*>(&blr[0]);
|
||||||
|
|
||||||
|
uint32_t mfspr_r12_lr32 =
|
||||||
|
*reinterpret_cast<const uint32_t*>(&mfspr_r12_lr[0]);
|
||||||
|
/*
|
||||||
|
First pass: detect save of the link register at an eight byte
|
||||||
|
aligned address
|
||||||
|
*/
|
||||||
|
for (uint32_t* first_pass = range_start; first_pass < range_end;
|
||||||
|
first_pass += 2) {
|
||||||
|
if (*first_pass == mfspr_r12_lr32) {
|
||||||
|
// Push our newly discovered function start into our list
|
||||||
|
// All addresses in the list are sorted until the second pass
|
||||||
|
funcstart_candidate_stack[stack_pos++] =
|
||||||
|
static_cast<uint32_t>(reinterpret_cast<uintptr_t>(first_pass) -
|
||||||
|
reinterpret_cast<uintptr_t>(range_start)) +
|
||||||
|
low_8_aligned;
|
||||||
|
} else if (first_pass[-1] == 0 && *first_pass != 0) {
|
||||||
|
// originally i checked for blr followed by 0, but some functions are
|
||||||
|
// actually aligned to greater boundaries. something that appears to be
|
||||||
|
// longjmp (it occurs in most games, so standard library, and loads ctx,
|
||||||
|
// so longjmp) is aligned to 16 bytes in most games
|
||||||
|
uint32_t* check_iter = &first_pass[-2];
|
||||||
|
|
||||||
|
while (!*check_iter) {
|
||||||
|
--check_iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
XE_LIKELY_IF(*check_iter == blr32) {
|
||||||
|
funcstart_candidate_stack[stack_pos++] =
|
||||||
|
static_cast<uint32_t>(reinterpret_cast<uintptr_t>(first_pass) -
|
||||||
|
reinterpret_cast<uintptr_t>(range_start)) +
|
||||||
|
low_8_aligned;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint32_t current_guestaddr = low_8_aligned;
|
||||||
|
// Second pass: detect branch with link instructions and decode the target
|
||||||
|
// address. We can safely assume that if bl is to address, that address is
|
||||||
|
// the start of the function
|
||||||
|
for (uint32_t* second_pass = range_start; second_pass < range_end;
|
||||||
|
second_pass++, current_guestaddr += 4) {
|
||||||
|
uint32_t current_call = xe::byte_swap(*second_pass);
|
||||||
|
|
||||||
|
if (IsOpcodeBL(current_call)) {
|
||||||
|
funcstart_candidate_stack[stack_pos++] = GetBLCalledFunction(
|
||||||
|
this, current_guestaddr, ppc::PPCOpcodeBits{current_call});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto pdata = this->GetPESection(".pdata");
|
||||||
|
|
||||||
|
if (pdata) {
|
||||||
|
uint32_t* pdata_base =
|
||||||
|
(uint32_t*)this->memory()->TranslateVirtual(pdata->address);
|
||||||
|
|
||||||
|
uint32_t n_pdata_entries = pdata->raw_size / 8;
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < n_pdata_entries; ++i) {
|
||||||
|
uint32_t funcaddr = xe::load_and_swap<uint32_t>(&pdata_base[i * 2]);
|
||||||
|
if (funcaddr >= low_address_ && funcaddr <= high_address_) {
|
||||||
|
funcstart_candidate_stack[stack_pos++] = funcaddr;
|
||||||
|
} else {
|
||||||
|
// we hit 0 for func addr, that means we're done
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort the list of function starts and then ensure that all addresses are
|
||||||
|
// unique
|
||||||
|
uint32_t n_known_funcaddrs = 0;
|
||||||
|
{
|
||||||
|
// make addresses unique
|
||||||
|
|
||||||
|
std::sort(funcstart_candidate_stack, funcstart_candidate_stack + stack_pos);
|
||||||
|
|
||||||
|
uint32_t read_pos = 0;
|
||||||
|
uint32_t write_pos = 0;
|
||||||
|
uint32_t previous_addr = ~0u;
|
||||||
|
while (read_pos < stack_pos) {
|
||||||
|
uint32_t current_addr = funcstart_candidate_stack[read_pos++];
|
||||||
|
|
||||||
|
if (current_addr != previous_addr) {
|
||||||
|
previous_addr = current_addr;
|
||||||
|
funcstart_candstack2[write_pos++] = current_addr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
n_known_funcaddrs = write_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] funcstart_candidate_stack;
|
||||||
|
|
||||||
|
std::vector<uint32_t> result;
|
||||||
|
result.resize(n_known_funcaddrs);
|
||||||
|
memcpy(&result[0], funcstart_candstack2,
|
||||||
|
sizeof(uint32_t) * n_known_funcaddrs);
|
||||||
|
delete[] funcstart_candstack2;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
bool XexModule::FindSaveRest() {
|
bool XexModule::FindSaveRest() {
|
||||||
// Special stack save/restore functions.
|
// Special stack save/restore functions.
|
||||||
// http://research.microsoft.com/en-us/um/redmond/projects/invisible/src/crt/md/ppc/xxx.s.htm
|
// http://research.microsoft.com/en-us/um/redmond/projects/invisible/src/crt/md/ppc/xxx.s.htm
|
||||||
|
@ -1552,6 +1722,8 @@ bool XexModule::FindSaveRest() {
|
||||||
|
|
||||||
auto page_size = base_address_ <= 0x90000000 ? 64 * 1024 : 4 * 1024;
|
auto page_size = base_address_ <= 0x90000000 ? 64 * 1024 : 4 * 1024;
|
||||||
auto sec_header = xex_security_info();
|
auto sec_header = xex_security_info();
|
||||||
|
std::vector<uint32_t> resolve_on_exit{};
|
||||||
|
resolve_on_exit.reserve(256);
|
||||||
for (uint32_t i = 0, page = 0; i < sec_header->page_descriptor_count; i++) {
|
for (uint32_t i = 0, page = 0; i < sec_header->page_descriptor_count; i++) {
|
||||||
// Byteswap the bitfield manually.
|
// Byteswap the bitfield manually.
|
||||||
xex2_page_descriptor desc;
|
xex2_page_descriptor desc;
|
||||||
|
@ -1586,13 +1758,20 @@ bool XexModule::FindSaveRest() {
|
||||||
|
|
||||||
// Add function stubs.
|
// Add function stubs.
|
||||||
char name[32];
|
char name[32];
|
||||||
|
|
||||||
|
auto AddXexFunction = [this, &resolve_on_exit](uint32_t address,
|
||||||
|
Function** function) {
|
||||||
|
DeclareFunction(address, function);
|
||||||
|
resolve_on_exit.push_back(address);
|
||||||
|
};
|
||||||
if (gplr_start) {
|
if (gplr_start) {
|
||||||
uint32_t address = gplr_start;
|
uint32_t address = gplr_start;
|
||||||
for (int n = 14; n <= 31; n++) {
|
for (int n = 14; n <= 31; n++) {
|
||||||
auto format_result =
|
auto format_result =
|
||||||
fmt::format_to_n(name, xe::countof(name), "__savegprlr_{}", n);
|
fmt::format_to_n(name, xe::countof(name), "__savegprlr_{}", n);
|
||||||
Function* function;
|
Function* function;
|
||||||
DeclareFunction(address, &function);
|
|
||||||
|
AddXexFunction(address, &function);
|
||||||
function->set_end_address(address + (31 - n) * 4 + 2 * 4);
|
function->set_end_address(address + (31 - n) * 4 + 2 * 4);
|
||||||
function->set_name(std::string_view(name, format_result.size));
|
function->set_name(std::string_view(name, format_result.size));
|
||||||
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
||||||
|
@ -1608,7 +1787,7 @@ bool XexModule::FindSaveRest() {
|
||||||
auto format_result =
|
auto format_result =
|
||||||
fmt::format_to_n(name, xe::countof(name), "__restgprlr_{}", n);
|
fmt::format_to_n(name, xe::countof(name), "__restgprlr_{}", n);
|
||||||
Function* function;
|
Function* function;
|
||||||
DeclareFunction(address, &function);
|
AddXexFunction(address, &function);
|
||||||
function->set_end_address(address + (31 - n) * 4 + 3 * 4);
|
function->set_end_address(address + (31 - n) * 4 + 3 * 4);
|
||||||
function->set_name(std::string_view(name, format_result.size));
|
function->set_name(std::string_view(name, format_result.size));
|
||||||
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
||||||
|
@ -1625,7 +1804,7 @@ bool XexModule::FindSaveRest() {
|
||||||
auto format_result =
|
auto format_result =
|
||||||
fmt::format_to_n(name, xe::countof(name), "__savefpr_{}", n);
|
fmt::format_to_n(name, xe::countof(name), "__savefpr_{}", n);
|
||||||
Function* function;
|
Function* function;
|
||||||
DeclareFunction(address, &function);
|
AddXexFunction(address, &function);
|
||||||
function->set_end_address(address + (31 - n) * 4 + 1 * 4);
|
function->set_end_address(address + (31 - n) * 4 + 1 * 4);
|
||||||
function->set_name(std::string_view(name, format_result.size));
|
function->set_name(std::string_view(name, format_result.size));
|
||||||
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
||||||
|
@ -1641,7 +1820,7 @@ bool XexModule::FindSaveRest() {
|
||||||
auto format_result =
|
auto format_result =
|
||||||
fmt::format_to_n(name, xe::countof(name), "__restfpr_{}", n);
|
fmt::format_to_n(name, xe::countof(name), "__restfpr_{}", n);
|
||||||
Function* function;
|
Function* function;
|
||||||
DeclareFunction(address, &function);
|
AddXexFunction(address, &function);
|
||||||
function->set_end_address(address + (31 - n) * 4 + 1 * 4);
|
function->set_end_address(address + (31 - n) * 4 + 1 * 4);
|
||||||
function->set_name(std::string_view(name, format_result.size));
|
function->set_name(std::string_view(name, format_result.size));
|
||||||
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
||||||
|
@ -1663,7 +1842,7 @@ bool XexModule::FindSaveRest() {
|
||||||
auto format_result =
|
auto format_result =
|
||||||
fmt::format_to_n(name, xe::countof(name), "__savevmx_{}", n);
|
fmt::format_to_n(name, xe::countof(name), "__savevmx_{}", n);
|
||||||
Function* function;
|
Function* function;
|
||||||
DeclareFunction(address, &function);
|
AddXexFunction(address, &function);
|
||||||
function->set_name(std::string_view(name, format_result.size));
|
function->set_name(std::string_view(name, format_result.size));
|
||||||
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
||||||
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagSaveVmx;
|
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagSaveVmx;
|
||||||
|
@ -1677,7 +1856,7 @@ bool XexModule::FindSaveRest() {
|
||||||
auto format_result =
|
auto format_result =
|
||||||
fmt::format_to_n(name, xe::countof(name), "__savevmx_{}", n);
|
fmt::format_to_n(name, xe::countof(name), "__savevmx_{}", n);
|
||||||
Function* function;
|
Function* function;
|
||||||
DeclareFunction(address, &function);
|
AddXexFunction(address, &function);
|
||||||
function->set_name(std::string_view(name, format_result.size));
|
function->set_name(std::string_view(name, format_result.size));
|
||||||
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
||||||
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagSaveVmx;
|
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagSaveVmx;
|
||||||
|
@ -1691,7 +1870,7 @@ bool XexModule::FindSaveRest() {
|
||||||
auto format_result =
|
auto format_result =
|
||||||
fmt::format_to_n(name, xe::countof(name), "__restvmx_{}", n);
|
fmt::format_to_n(name, xe::countof(name), "__restvmx_{}", n);
|
||||||
Function* function;
|
Function* function;
|
||||||
DeclareFunction(address, &function);
|
AddXexFunction(address, &function);
|
||||||
function->set_name(std::string_view(name, format_result.size));
|
function->set_name(std::string_view(name, format_result.size));
|
||||||
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
||||||
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagRestVmx;
|
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagRestVmx;
|
||||||
|
@ -1705,7 +1884,7 @@ bool XexModule::FindSaveRest() {
|
||||||
auto format_result =
|
auto format_result =
|
||||||
fmt::format_to_n(name, xe::countof(name), "__restvmx_{}", n);
|
fmt::format_to_n(name, xe::countof(name), "__restvmx_{}", n);
|
||||||
Function* function;
|
Function* function;
|
||||||
DeclareFunction(address, &function);
|
AddXexFunction(address, &function);
|
||||||
function->set_name(std::string_view(name, format_result.size));
|
function->set_name(std::string_view(name, format_result.size));
|
||||||
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
// TODO(benvanik): set type fn->type = FunctionSymbol::User;
|
||||||
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagRestVmx;
|
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagRestVmx;
|
||||||
|
@ -1715,7 +1894,15 @@ bool XexModule::FindSaveRest() {
|
||||||
address += 2 * 4;
|
address += 2 * 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!cvars::disable_early_precompilation) {
|
||||||
|
for (auto&& to_ensure_precompiled : resolve_on_exit) {
|
||||||
|
// we want to make sure an address for these functions is available before
|
||||||
|
// any other functions are compiled for code generation purposes but we do
|
||||||
|
// it outside of our loops, because we also want to make sure we've marked
|
||||||
|
// up the symbol with info about it being save/rest and whatnot
|
||||||
|
processor_->ResolveFunction(to_ensure_precompiled);
|
||||||
|
}
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,8 @@ struct InfoCacheFlags {
|
||||||
uint32_t was_resolved : 1; // has this address ever been called/requested
|
uint32_t was_resolved : 1; // has this address ever been called/requested
|
||||||
// via resolvefunction?
|
// via resolvefunction?
|
||||||
uint32_t accessed_mmio : 1;
|
uint32_t accessed_mmio : 1;
|
||||||
uint32_t reserved : 30;
|
uint32_t is_syscall_func : 1;
|
||||||
|
uint32_t reserved : 29;
|
||||||
};
|
};
|
||||||
struct XexInfoCache {
|
struct XexInfoCache {
|
||||||
struct InfoCacheFlagsHeader {
|
struct InfoCacheFlagsHeader {
|
||||||
|
@ -208,12 +209,15 @@ class XexModule : public xe::cpu::Module {
|
||||||
}
|
}
|
||||||
|
|
||||||
InfoCacheFlags* GetInstructionAddressFlags(uint32_t guest_addr);
|
InfoCacheFlags* GetInstructionAddressFlags(uint32_t guest_addr);
|
||||||
void PrecompileKnownFunctions();
|
|
||||||
|
|
||||||
|
virtual void Precompile() override;
|
||||||
protected:
|
protected:
|
||||||
std::unique_ptr<Function> CreateFunction(uint32_t address) override;
|
std::unique_ptr<Function> CreateFunction(uint32_t address) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void PrecompileKnownFunctions();
|
||||||
|
void PrecompileDiscoveredFunctions();
|
||||||
|
std::vector<uint32_t> PreanalyzeCode();
|
||||||
friend struct XexInfoCache;
|
friend struct XexInfoCache;
|
||||||
void ReadSecurityInfo();
|
void ReadSecurityInfo();
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ namespace ui {
|
||||||
|
|
||||||
class DebugWindow : public cpu::DebugListener {
|
class DebugWindow : public cpu::DebugListener {
|
||||||
public:
|
public:
|
||||||
~DebugWindow();
|
virtual ~DebugWindow();
|
||||||
|
|
||||||
static std::unique_ptr<DebugWindow> Create(
|
static std::unique_ptr<DebugWindow> Create(
|
||||||
Emulator* emulator, xe::ui::WindowedAppContext& app_context);
|
Emulator* emulator, xe::ui::WindowedAppContext& app_context);
|
||||||
|
|
|
@ -2042,10 +2042,6 @@ D3D12CommandProcessor::WriteRegisterRangeFromRing_WithKnownBound(
|
||||||
RingBuffer::ReadRange range =
|
RingBuffer::ReadRange range =
|
||||||
ring->BeginRead(num_registers * sizeof(uint32_t));
|
ring->BeginRead(num_registers * sizeof(uint32_t));
|
||||||
|
|
||||||
constexpr auto bounds_has_reg =
|
|
||||||
bounds_may_have_reg<register_lower_bound, register_upper_bound>;
|
|
||||||
constexpr auto bounds_has_bounds =
|
|
||||||
bounds_may_have_bounds<register_lower_bound, register_upper_bound>;
|
|
||||||
|
|
||||||
XE_LIKELY_IF(!range.second) {
|
XE_LIKELY_IF(!range.second) {
|
||||||
WriteRegisterRangeFromMem_WithKnownBound<register_lower_bound,
|
WriteRegisterRangeFromMem_WithKnownBound<register_lower_bound,
|
||||||
|
@ -5152,6 +5148,7 @@ void D3D12CommandProcessor::WriteGammaRampSRV(
|
||||||
#define COMMAND_PROCESSOR D3D12CommandProcessor
|
#define COMMAND_PROCESSOR D3D12CommandProcessor
|
||||||
|
|
||||||
#include "../pm4_command_processor_implement.h"
|
#include "../pm4_command_processor_implement.h"
|
||||||
|
#undef COMMAND_PROCESSOR
|
||||||
} // namespace d3d12
|
} // namespace d3d12
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
/**
|
|
||||||
/**
|
|
||||||
/**
|
/**
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
* Xenia : Xbox 360 Emulator Research Project *
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
@ -50,8 +48,9 @@ struct MemExportRange {
|
||||||
};
|
};
|
||||||
class D3D12CommandProcessor final : public CommandProcessor {
|
class D3D12CommandProcessor final : public CommandProcessor {
|
||||||
protected:
|
protected:
|
||||||
|
#define OVERRIDING_BASE_CMDPROCESSOR
|
||||||
#include "../pm4_command_processor_declare.h"
|
#include "../pm4_command_processor_declare.h"
|
||||||
|
#undef OVERRIDING_BASE_CMDPROCESSOR
|
||||||
public:
|
public:
|
||||||
explicit D3D12CommandProcessor(D3D12GraphicsSystem* graphics_system,
|
explicit D3D12CommandProcessor(D3D12GraphicsSystem* graphics_system,
|
||||||
kernel::KernelState* kernel_state);
|
kernel::KernelState* kernel_state);
|
||||||
|
|
|
@ -108,15 +108,4 @@ inline nvapi_state_t::~nvapi_state_t() {
|
||||||
call_deinit_interface();
|
call_deinit_interface();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inline void init_nvapi() {
|
|
||||||
/// HMODULE moddy = LoadLibraryA("nvapi64.dll");
|
|
||||||
|
|
||||||
// FARPROC quif = GetProcAddress(moddy, "nvapi_QueryInterface");
|
|
||||||
|
|
||||||
nvapi_state_t nvapi{};
|
|
||||||
|
|
||||||
auto queryvisible = nvapi.query_interface<void>(0x26322BC3);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace lightweight_nvapi
|
} // namespace lightweight_nvapi
|
|
@ -87,7 +87,7 @@ class D3D12TextureCache final : public TextureCache {
|
||||||
|
|
||||||
~D3D12TextureCache();
|
~D3D12TextureCache();
|
||||||
|
|
||||||
void ClearCache();
|
void ClearCache() override;
|
||||||
|
|
||||||
void BeginSubmission(uint64_t new_submission_index) override;
|
void BeginSubmission(uint64_t new_submission_index) override;
|
||||||
void BeginFrame() override;
|
void BeginFrame() override;
|
||||||
|
|
|
@ -1,8 +1,15 @@
|
||||||
|
|
||||||
|
|
||||||
void ExecuteIndirectBuffer(uint32_t ptr, uint32_t count) XE_RESTRICT;
|
#if defined(OVERRIDING_BASE_CMDPROCESSOR)
|
||||||
virtual uint32_t ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index) XE_RESTRICT;
|
#define PM4_OVERRIDE override
|
||||||
virtual bool ExecutePacket();
|
#else
|
||||||
|
#define PM4_OVERRIDE
|
||||||
|
#endif
|
||||||
|
void ExecuteIndirectBuffer(uint32_t ptr,
|
||||||
|
uint32_t count) XE_RESTRICT;
|
||||||
|
virtual uint32_t ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index)
|
||||||
|
XE_RESTRICT PM4_OVERRIDE;
|
||||||
|
virtual bool ExecutePacket() PM4_OVERRIDE;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void ExecutePacket(uint32_t ptr, uint32_t count);
|
void ExecutePacket(uint32_t ptr, uint32_t count);
|
||||||
|
@ -111,4 +118,6 @@ XE_COLD
|
||||||
bool ExecutePacketType3_CountOverflow(uint32_t count);
|
bool ExecutePacketType3_CountOverflow(uint32_t count);
|
||||||
XE_NOINLINE
|
XE_NOINLINE
|
||||||
XE_COLD
|
XE_COLD
|
||||||
bool ExecutePacketType0_CountOverflow(uint32_t count);
|
bool ExecutePacketType0_CountOverflow(uint32_t count);
|
||||||
|
|
||||||
|
#undef PM4_OVERRIDE
|
|
@ -4,32 +4,38 @@ void COMMAND_PROCESSOR::ExecuteIndirectBuffer(uint32_t ptr,
|
||||||
uint32_t count) XE_RESTRICT {
|
uint32_t count) XE_RESTRICT {
|
||||||
SCOPE_profile_cpu_f("gpu");
|
SCOPE_profile_cpu_f("gpu");
|
||||||
|
|
||||||
|
|
||||||
trace_writer_.WriteIndirectBufferStart(ptr, count * sizeof(uint32_t));
|
trace_writer_.WriteIndirectBufferStart(ptr, count * sizeof(uint32_t));
|
||||||
|
if (count != 0) {
|
||||||
|
RingBuffer old_reader = reader_;
|
||||||
|
|
||||||
RingBuffer old_reader = reader_;
|
// Execute commands!
|
||||||
|
new (&reader_)
|
||||||
|
RingBuffer(memory_->TranslatePhysical(ptr), count * sizeof(uint32_t));
|
||||||
|
reader_.set_write_offset(count * sizeof(uint32_t));
|
||||||
|
// prefetch the wraparound range
|
||||||
|
// it likely is already in L3 cache, but in a zen system it may be another
|
||||||
|
// chiplets l3
|
||||||
|
reader_.BeginPrefetchedRead<swcache::PrefetchTag::Level2>(
|
||||||
|
COMMAND_PROCESSOR::GetCurrentRingReadCount());
|
||||||
|
do {
|
||||||
|
if (COMMAND_PROCESSOR::ExecutePacket()) {
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
// Return up a level if we encounter a bad packet.
|
||||||
|
XELOGE("**** INDIRECT RINGBUFFER: Failed to execute packet.");
|
||||||
|
assert_always();
|
||||||
|
// break;
|
||||||
|
}
|
||||||
|
} while (reader_.read_count());
|
||||||
|
|
||||||
// Execute commands!
|
trace_writer_.WriteIndirectBufferEnd();
|
||||||
new (&reader_)
|
reader_ = old_reader;
|
||||||
RingBuffer(memory_->TranslatePhysical(ptr), count * sizeof(uint32_t));
|
} else {
|
||||||
reader_.set_write_offset(count * sizeof(uint32_t));
|
//rare, but i've seen it happen! (and then a division by 0 occurs)
|
||||||
// prefetch the wraparound range
|
return;
|
||||||
// it likely is already in L3 cache, but in a zen system it may be another
|
}
|
||||||
// chiplets l3
|
|
||||||
reader_.BeginPrefetchedRead<swcache::PrefetchTag::Level2>(
|
|
||||||
COMMAND_PROCESSOR::GetCurrentRingReadCount());
|
|
||||||
do {
|
|
||||||
if (COMMAND_PROCESSOR::ExecutePacket()) {
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
// Return up a level if we encounter a bad packet.
|
|
||||||
XELOGE("**** INDIRECT RINGBUFFER: Failed to execute packet.");
|
|
||||||
assert_always();
|
|
||||||
// break;
|
|
||||||
}
|
|
||||||
} while (reader_.read_count());
|
|
||||||
|
|
||||||
trace_writer_.WriteIndirectBufferEnd();
|
|
||||||
reader_ = old_reader;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool COMMAND_PROCESSOR::ExecutePacket() {
|
bool COMMAND_PROCESSOR::ExecutePacket() {
|
||||||
|
|
|
@ -229,9 +229,10 @@ class RenderTargetCache {
|
||||||
TraceWriter* trace_writer, uint32_t draw_resolution_scale_x,
|
TraceWriter* trace_writer, uint32_t draw_resolution_scale_x,
|
||||||
uint32_t draw_resolution_scale_y)
|
uint32_t draw_resolution_scale_y)
|
||||||
: register_file_(register_file),
|
: register_file_(register_file),
|
||||||
draw_extent_estimator_(register_file, memory, trace_writer),
|
|
||||||
draw_resolution_scale_x_(draw_resolution_scale_x),
|
draw_resolution_scale_x_(draw_resolution_scale_x),
|
||||||
draw_resolution_scale_y_(draw_resolution_scale_y) {
|
draw_resolution_scale_y_(draw_resolution_scale_y),
|
||||||
|
draw_extent_estimator_(register_file, memory, trace_writer)
|
||||||
|
{
|
||||||
assert_not_zero(draw_resolution_scale_x);
|
assert_not_zero(draw_resolution_scale_x);
|
||||||
assert_not_zero(draw_resolution_scale_y);
|
assert_not_zero(draw_resolution_scale_y);
|
||||||
}
|
}
|
||||||
|
|
|
@ -941,7 +941,6 @@ void TraceViewer::DrawVertexFetcher(Shader* shader,
|
||||||
ImGui::NextColumn();
|
ImGui::NextColumn();
|
||||||
break;
|
break;
|
||||||
case xenos::VertexFormat::k_2_10_10_10: {
|
case xenos::VertexFormat::k_2_10_10_10: {
|
||||||
auto e0 = LOADEL(uint32_t, 0);
|
|
||||||
ImGui::Text("??");
|
ImGui::Text("??");
|
||||||
ImGui::NextColumn();
|
ImGui::NextColumn();
|
||||||
ImGui::Text("??");
|
ImGui::Text("??");
|
||||||
|
@ -1066,8 +1065,6 @@ void ProgressBar(float frac, float width, float height = 0,
|
||||||
}
|
}
|
||||||
frac = xe::saturate_unsigned(frac);
|
frac = xe::saturate_unsigned(frac);
|
||||||
|
|
||||||
const auto fontAtlas = ImGui::GetIO().Fonts;
|
|
||||||
|
|
||||||
auto pos = ImGui::GetCursorScreenPos();
|
auto pos = ImGui::GetCursorScreenPos();
|
||||||
auto col = ImGui::ColorConvertFloat4ToU32(color);
|
auto col = ImGui::ColorConvertFloat4ToU32(color);
|
||||||
auto border_col = ImGui::ColorConvertFloat4ToU32(border_color);
|
auto border_col = ImGui::ColorConvertFloat4ToU32(border_color);
|
||||||
|
@ -1137,7 +1134,6 @@ void TraceViewer::DrawStateUI() {
|
||||||
std::memset(&draw_info, 0, sizeof(draw_info));
|
std::memset(&draw_info, 0, sizeof(draw_info));
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
case PM4_DRAW_INDX: {
|
case PM4_DRAW_INDX: {
|
||||||
uint32_t dword0 = xe::load_and_swap<uint32_t>(packet_head + 4);
|
|
||||||
uint32_t dword1 = xe::load_and_swap<uint32_t>(packet_head + 8);
|
uint32_t dword1 = xe::load_and_swap<uint32_t>(packet_head + 8);
|
||||||
draw_info.index_count = dword1 >> 16;
|
draw_info.index_count = dword1 >> 16;
|
||||||
draw_info.prim_type = static_cast<xenos::PrimitiveType>(dword1 & 0x3F);
|
draw_info.prim_type = static_cast<xenos::PrimitiveType>(dword1 & 0x3F);
|
||||||
|
@ -1187,7 +1183,6 @@ void TraceViewer::DrawStateUI() {
|
||||||
auto enable_mode =
|
auto enable_mode =
|
||||||
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
|
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
|
||||||
|
|
||||||
const char* mode_name = "Unknown";
|
|
||||||
switch (enable_mode) {
|
switch (enable_mode) {
|
||||||
case ModeControl::kIgnore:
|
case ModeControl::kIgnore:
|
||||||
ImGui::Text("Ignored Command %d", player_->current_command_index());
|
ImGui::Text("Ignored Command %d", player_->current_command_index());
|
||||||
|
|
|
@ -48,8 +48,9 @@ namespace vulkan {
|
||||||
|
|
||||||
class VulkanCommandProcessor final : public CommandProcessor {
|
class VulkanCommandProcessor final : public CommandProcessor {
|
||||||
protected:
|
protected:
|
||||||
|
#define OVERRIDING_BASE_CMDPROCESSOR
|
||||||
#include "../pm4_command_processor_declare.h"
|
#include "../pm4_command_processor_declare.h"
|
||||||
|
#undef OVERRIDING_BASE_CMDPROCESSOR
|
||||||
public:
|
public:
|
||||||
// Single-descriptor layouts for use within a single frame.
|
// Single-descriptor layouts for use within a single frame.
|
||||||
enum class SingleTransientDescriptorLayout {
|
enum class SingleTransientDescriptorLayout {
|
||||||
|
|
|
@ -421,6 +421,9 @@ X_RESULT KernelState::FinishLoadingUserModule(
|
||||||
emulator_->patcher()->ApplyPatchesForTitle(memory_, module->title_id(),
|
emulator_->patcher()->ApplyPatchesForTitle(memory_, module->title_id(),
|
||||||
module->hash());
|
module->hash());
|
||||||
emulator_->on_patch_apply();
|
emulator_->on_patch_apply();
|
||||||
|
if (module->xex_module()) {
|
||||||
|
module->xex_module()->Precompile();
|
||||||
|
}
|
||||||
|
|
||||||
if (module->is_dll_module() && module->entry_point() && call_entry) {
|
if (module->is_dll_module() && module->entry_point() && call_entry) {
|
||||||
// Call DllMain(DLL_PROCESS_ATTACH):
|
// Call DllMain(DLL_PROCESS_ATTACH):
|
||||||
|
|
|
@ -36,7 +36,7 @@ using PPCContext = xe::cpu::ppc::PPCContext;
|
||||||
(xe::cpu::xe_kernel_export_shim_fn)export_name##_entry);
|
(xe::cpu::xe_kernel_export_shim_fn)export_name##_entry);
|
||||||
|
|
||||||
#define SHIM_MEM_ADDR(a) \
|
#define SHIM_MEM_ADDR(a) \
|
||||||
((a) ? ppc_context->kernel_state->memory()->TranslateVirtual(a) : nullptr)
|
((a) ? ppc_context->TranslateVirtual(a) : nullptr)
|
||||||
|
|
||||||
#define SHIM_MEM_8(a) xe::load_and_swap<uint8_t>(SHIM_MEM_ADDR(a))
|
#define SHIM_MEM_8(a) xe::load_and_swap<uint8_t>(SHIM_MEM_ADDR(a))
|
||||||
#define SHIM_MEM_16(a) xe::load_and_swap<uint16_t>(SHIM_MEM_ADDR(a))
|
#define SHIM_MEM_16(a) xe::load_and_swap<uint16_t>(SHIM_MEM_ADDR(a))
|
||||||
|
@ -159,7 +159,7 @@ class Param {
|
||||||
uint32_t stack_ptr =
|
uint32_t stack_ptr =
|
||||||
uint32_t(init.ppc_context->r[1]) + 0x54 + (ordinal_ - 8) * 8;
|
uint32_t(init.ppc_context->r[1]) + 0x54 + (ordinal_ - 8) * 8;
|
||||||
*out_value = xe::load_and_swap<V>(
|
*out_value = xe::load_and_swap<V>(
|
||||||
init.ppc_context->kernel_state->memory()->TranslateVirtual(
|
init.ppc_context->TranslateVirtual(
|
||||||
stack_ptr));
|
stack_ptr));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -212,7 +212,7 @@ class PointerParam : public ParamBase<uint32_t> {
|
||||||
PointerParam(Init& init) : ParamBase(init) {
|
PointerParam(Init& init) : ParamBase(init) {
|
||||||
host_ptr_ =
|
host_ptr_ =
|
||||||
value_
|
value_
|
||||||
? init.ppc_context->kernel_state->memory()->TranslateVirtual(value_)
|
? init.ppc_context->TranslateVirtual(value_)
|
||||||
: nullptr;
|
: nullptr;
|
||||||
}
|
}
|
||||||
PointerParam(void* host_ptr) : ParamBase(), host_ptr_(host_ptr) {}
|
PointerParam(void* host_ptr) : ParamBase(), host_ptr_(host_ptr) {}
|
||||||
|
@ -251,8 +251,7 @@ template <typename T>
|
||||||
class PrimitivePointerParam : public ParamBase<uint32_t> {
|
class PrimitivePointerParam : public ParamBase<uint32_t> {
|
||||||
public:
|
public:
|
||||||
PrimitivePointerParam(Init& init) : ParamBase(init) {
|
PrimitivePointerParam(Init& init) : ParamBase(init) {
|
||||||
host_ptr_ = value_ ? init.ppc_context->kernel_state->memory()
|
host_ptr_ = value_ ? init.ppc_context->TranslateVirtual<xe::be<T>*>(value_)
|
||||||
->TranslateVirtual<xe::be<T>*>(value_)
|
|
||||||
: nullptr;
|
: nullptr;
|
||||||
}
|
}
|
||||||
PrimitivePointerParam(T* host_ptr) : ParamBase() {
|
PrimitivePointerParam(T* host_ptr) : ParamBase() {
|
||||||
|
@ -285,7 +284,7 @@ class StringPointerParam : public ParamBase<uint32_t> {
|
||||||
StringPointerParam(Init& init) : ParamBase(init) {
|
StringPointerParam(Init& init) : ParamBase(init) {
|
||||||
host_ptr_ =
|
host_ptr_ =
|
||||||
value_
|
value_
|
||||||
? init.ppc_context->kernel_state->memory()->TranslateVirtual<CHAR*>(
|
? init.ppc_context->TranslateVirtual<CHAR*>(
|
||||||
value_)
|
value_)
|
||||||
: nullptr;
|
: nullptr;
|
||||||
}
|
}
|
||||||
|
@ -311,7 +310,7 @@ class TypedPointerParam : public ParamBase<uint32_t> {
|
||||||
public:
|
public:
|
||||||
TypedPointerParam(Init& init) : ParamBase(init) {
|
TypedPointerParam(Init& init) : ParamBase(init) {
|
||||||
host_ptr_ =
|
host_ptr_ =
|
||||||
value_ ? init.ppc_context->kernel_state->memory()->TranslateVirtual<T*>(
|
value_ ? init.ppc_context->TranslateVirtual<T*>(
|
||||||
value_)
|
value_)
|
||||||
: nullptr;
|
: nullptr;
|
||||||
}
|
}
|
||||||
|
|
|
@ -195,7 +195,8 @@ void XCustomRegisterDynamicActions_entry() {
|
||||||
DECLARE_XAM_EXPORT1(XCustomRegisterDynamicActions, kNone, kStub);
|
DECLARE_XAM_EXPORT1(XCustomRegisterDynamicActions, kNone, kStub);
|
||||||
|
|
||||||
dword_result_t XGetAVPack_entry() {
|
dword_result_t XGetAVPack_entry() {
|
||||||
// Value from https://github.com/Free60Project/libxenon/blob/920146f/libxenon/drivers/xenos/xenos_videomodes.h
|
// Value from
|
||||||
|
// https://github.com/Free60Project/libxenon/blob/920146f/libxenon/drivers/xenos/xenos_videomodes.h
|
||||||
// DWORD
|
// DWORD
|
||||||
// Not sure what the values are for this, but 6 is VGA.
|
// Not sure what the values are for this, but 6 is VGA.
|
||||||
// Other likely values are 3/4/8 for HDMI or something.
|
// Other likely values are 3/4/8 for HDMI or something.
|
||||||
|
@ -321,11 +322,16 @@ void XamLoaderTerminateTitle_entry() {
|
||||||
}
|
}
|
||||||
DECLARE_XAM_EXPORT1(XamLoaderTerminateTitle, kNone, kSketchy);
|
DECLARE_XAM_EXPORT1(XamLoaderTerminateTitle, kNone, kSketchy);
|
||||||
|
|
||||||
dword_result_t XamAlloc_entry(dword_t unk, dword_t size, lpdword_t out_ptr) {
|
dword_result_t XamAlloc_entry(dword_t flags, dword_t size, lpdword_t out_ptr) {
|
||||||
assert_true(unk == 0);
|
if (flags & 0x00100000) { // HEAP_ZERO_memory used unless this flag
|
||||||
|
// do nothing!
|
||||||
|
// maybe we ought to fill it with nonzero garbage, but otherwise this is a
|
||||||
|
// flag we can safely ignore
|
||||||
|
}
|
||||||
|
|
||||||
// Allocate from the heap. Not sure why XAM does this specially, perhaps
|
// Allocate from the heap. Not sure why XAM does this specially, perhaps
|
||||||
// it keeps stuff in a separate heap?
|
// it keeps stuff in a separate heap?
|
||||||
|
//chrispy: there is a set of different heaps it uses, an array of them. the top 4 bits of the 32 bit flags seems to select the heap
|
||||||
uint32_t ptr = kernel_state()->memory()->SystemHeapAlloc(size);
|
uint32_t ptr = kernel_state()->memory()->SystemHeapAlloc(size);
|
||||||
*out_ptr = ptr;
|
*out_ptr = ptr;
|
||||||
|
|
||||||
|
|
|
@ -55,6 +55,7 @@ class XamDialog : public xe::ui::ImGuiDialog {
|
||||||
XamDialog(xe::ui::ImGuiDrawer* imgui_drawer)
|
XamDialog(xe::ui::ImGuiDrawer* imgui_drawer)
|
||||||
: xe::ui::ImGuiDialog(imgui_drawer) {}
|
: xe::ui::ImGuiDialog(imgui_drawer) {}
|
||||||
|
|
||||||
|
virtual ~XamDialog() {}
|
||||||
void OnClose() override {
|
void OnClose() override {
|
||||||
if (close_callback_) {
|
if (close_callback_) {
|
||||||
close_callback_();
|
close_callback_();
|
||||||
|
@ -254,6 +255,7 @@ class MessageBoxDialog : public XamDialog {
|
||||||
Close();
|
Close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
virtual ~MessageBoxDialog() {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool has_opened_ = false;
|
bool has_opened_ = false;
|
||||||
|
@ -264,8 +266,7 @@ class MessageBoxDialog : public XamDialog {
|
||||||
uint32_t chosen_button_ = 0;
|
uint32_t chosen_button_ = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
// https://www.se7ensins.com/forums/threads/working-xshowmessageboxui.844116/
|
static dword_result_t XamShowMessageBoxUi(
|
||||||
dword_result_t XamShowMessageBoxUI_entry(
|
|
||||||
dword_t user_index, lpu16string_t title_ptr, lpu16string_t text_ptr,
|
dword_t user_index, lpu16string_t title_ptr, lpu16string_t text_ptr,
|
||||||
dword_t button_count, lpdword_t button_ptrs, dword_t active_button,
|
dword_t button_count, lpdword_t button_ptrs, dword_t active_button,
|
||||||
dword_t flags, lpdword_t result_ptr, pointer_t<XAM_OVERLAPPED> overlapped) {
|
dword_t flags, lpdword_t result_ptr, pointer_t<XAM_OVERLAPPED> overlapped) {
|
||||||
|
@ -321,8 +322,28 @@ dword_result_t XamShowMessageBoxUI_entry(
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// https://www.se7ensins.com/forums/threads/working-xshowmessageboxui.844116/
|
||||||
|
dword_result_t XamShowMessageBoxUI_entry(
|
||||||
|
dword_t user_index, lpu16string_t title_ptr, lpu16string_t text_ptr,
|
||||||
|
dword_t button_count, lpdword_t button_ptrs, dword_t active_button,
|
||||||
|
dword_t flags, lpdword_t result_ptr, pointer_t<XAM_OVERLAPPED> overlapped) {
|
||||||
|
return XamShowMessageBoxUi(user_index, title_ptr, text_ptr, button_count,
|
||||||
|
button_ptrs, active_button, flags, result_ptr,
|
||||||
|
overlapped);
|
||||||
|
}
|
||||||
DECLARE_XAM_EXPORT1(XamShowMessageBoxUI, kUI, kImplemented);
|
DECLARE_XAM_EXPORT1(XamShowMessageBoxUI, kUI, kImplemented);
|
||||||
|
|
||||||
|
dword_result_t XamShowMessageBoxUIEx_entry(
|
||||||
|
dword_t user_index, lpu16string_t title_ptr, lpu16string_t text_ptr,
|
||||||
|
dword_t button_count, lpdword_t button_ptrs, dword_t active_button,
|
||||||
|
dword_t flags, dword_t unknown_unused, lpdword_t result_ptr,
|
||||||
|
pointer_t<XAM_OVERLAPPED> overlapped) {
|
||||||
|
return XamShowMessageBoxUi(user_index, title_ptr, text_ptr, button_count,
|
||||||
|
button_ptrs, active_button, flags, result_ptr,
|
||||||
|
overlapped);
|
||||||
|
}
|
||||||
|
DECLARE_XAM_EXPORT1(XamShowMessageBoxUIEx, kUI, kImplemented);
|
||||||
class KeyboardInputDialog : public XamDialog {
|
class KeyboardInputDialog : public XamDialog {
|
||||||
public:
|
public:
|
||||||
KeyboardInputDialog(xe::ui::ImGuiDrawer* imgui_drawer, std::string title,
|
KeyboardInputDialog(xe::ui::ImGuiDrawer* imgui_drawer, std::string title,
|
||||||
|
@ -347,6 +368,7 @@ class KeyboardInputDialog : public XamDialog {
|
||||||
xe::string_util::copy_truncating(text_buffer_.data(), default_text_,
|
xe::string_util::copy_truncating(text_buffer_.data(), default_text_,
|
||||||
text_buffer_.size());
|
text_buffer_.size());
|
||||||
}
|
}
|
||||||
|
virtual ~KeyboardInputDialog() {}
|
||||||
|
|
||||||
const std::string& text() const { return text_; }
|
const std::string& text() const { return text_; }
|
||||||
bool cancelled() const { return cancelled_; }
|
bool cancelled() const { return cancelled_; }
|
||||||
|
|
|
@ -13,11 +13,12 @@
|
||||||
#include "xenia/kernel/xbdm/xbdm_private.h"
|
#include "xenia/kernel/xbdm/xbdm_private.h"
|
||||||
#include "xenia/kernel/xthread.h"
|
#include "xenia/kernel/xthread.h"
|
||||||
#include "xenia/xbox.h"
|
#include "xenia/xbox.h"
|
||||||
|
//chrispy: no idea what a real valid value is for this
|
||||||
|
static constexpr const char DmXboxName[] = "Xbox360Name";
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace kernel {
|
namespace kernel {
|
||||||
namespace xbdm {
|
namespace xbdm {
|
||||||
|
#define XBDM_SUCCESSFULL 0x02DA0000
|
||||||
#define MAKE_DUMMY_STUB_PTR(x) \
|
#define MAKE_DUMMY_STUB_PTR(x) \
|
||||||
dword_result_t x##_entry() { return 0; } \
|
dword_result_t x##_entry() { return 0; } \
|
||||||
DECLARE_XBDM_EXPORT1(x, kDebug, kStub)
|
DECLARE_XBDM_EXPORT1(x, kDebug, kStub)
|
||||||
|
@ -36,11 +37,27 @@ MAKE_DUMMY_STUB_STATUS(DmFreePool);
|
||||||
dword_result_t DmGetXbeInfo_entry() {
|
dword_result_t DmGetXbeInfo_entry() {
|
||||||
// TODO(gibbed): 4D5307DC appears to expect this as success?
|
// TODO(gibbed): 4D5307DC appears to expect this as success?
|
||||||
// Unknown arguments -- let's hope things don't explode.
|
// Unknown arguments -- let's hope things don't explode.
|
||||||
return 0x02DA0000;
|
return XBDM_SUCCESSFULL;
|
||||||
}
|
}
|
||||||
DECLARE_XBDM_EXPORT1(DmGetXbeInfo, kDebug, kStub);
|
DECLARE_XBDM_EXPORT1(DmGetXbeInfo, kDebug, kStub);
|
||||||
|
|
||||||
MAKE_DUMMY_STUB_STATUS(DmGetXboxName);
|
dword_result_t DmGetXboxName_entry(const ppc_context_t& ctx) {
|
||||||
|
uint64_t arg1 = ctx->r[3];
|
||||||
|
uint64_t arg2 = ctx->r[4];
|
||||||
|
if (!arg1 || !arg2) {
|
||||||
|
return 0x80070057;
|
||||||
|
}
|
||||||
|
char* name_out = ctx->TranslateVirtualGPR<char*>(arg1);
|
||||||
|
|
||||||
|
uint32_t* max_name_chars_ptr = ctx->TranslateVirtualGPR<uint32_t*>(arg2);
|
||||||
|
|
||||||
|
uint32_t max_name_chars = xe::load_and_swap<uint32_t>(max_name_chars_ptr);
|
||||||
|
strncpy(name_out, DmXboxName, sizeof(DmXboxName));
|
||||||
|
|
||||||
|
|
||||||
|
return XBDM_SUCCESSFULL;
|
||||||
|
}
|
||||||
|
DECLARE_XBDM_EXPORT1(DmGetXboxName, kDebug, kImplemented)
|
||||||
|
|
||||||
dword_result_t DmIsDebuggerPresent_entry() { return 0; }
|
dword_result_t DmIsDebuggerPresent_entry() { return 0; }
|
||||||
DECLARE_XBDM_EXPORT1(DmIsDebuggerPresent, kDebug, kStub);
|
DECLARE_XBDM_EXPORT1(DmIsDebuggerPresent, kDebug, kStub);
|
||||||
|
@ -49,15 +66,15 @@ void DmSendNotificationString_entry(lpdword_t unk0_ptr) {}
|
||||||
DECLARE_XBDM_EXPORT1(DmSendNotificationString, kDebug, kStub);
|
DECLARE_XBDM_EXPORT1(DmSendNotificationString, kDebug, kStub);
|
||||||
|
|
||||||
dword_result_t DmRegisterCommandProcessor_entry(lpdword_t name_ptr,
|
dword_result_t DmRegisterCommandProcessor_entry(lpdword_t name_ptr,
|
||||||
lpdword_t handler_fn) {
|
lpdword_t handler_fn) {
|
||||||
// Return success to prevent some games from crashing
|
// Return success to prevent some games from crashing
|
||||||
return X_STATUS_SUCCESS;
|
return X_STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
DECLARE_XBDM_EXPORT1(DmRegisterCommandProcessor, kDebug, kStub);
|
DECLARE_XBDM_EXPORT1(DmRegisterCommandProcessor, kDebug, kStub);
|
||||||
|
|
||||||
dword_result_t DmRegisterCommandProcessorEx_entry(lpdword_t name_ptr,
|
dword_result_t DmRegisterCommandProcessorEx_entry(lpdword_t name_ptr,
|
||||||
lpdword_t handler_fn,
|
lpdword_t handler_fn,
|
||||||
dword_t unk3) {
|
dword_t unk3) {
|
||||||
// Return success to prevent some games from stalling
|
// Return success to prevent some games from stalling
|
||||||
return X_STATUS_SUCCESS;
|
return X_STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -65,9 +82,12 @@ DECLARE_XBDM_EXPORT1(DmRegisterCommandProcessorEx, kDebug, kStub);
|
||||||
|
|
||||||
MAKE_DUMMY_STUB_STATUS(DmStartProfiling);
|
MAKE_DUMMY_STUB_STATUS(DmStartProfiling);
|
||||||
MAKE_DUMMY_STUB_STATUS(DmStopProfiling);
|
MAKE_DUMMY_STUB_STATUS(DmStopProfiling);
|
||||||
|
// two arguments, first is num frames i think, second is some kind of pointer to
|
||||||
dword_result_t DmCaptureStackBackTrace_entry(lpdword_t unk0_ptr,
|
// where to capture
|
||||||
lpdword_t unk1_ptr) {
|
dword_result_t DmCaptureStackBackTrace_entry(const ppc_context_t& ctx) {
|
||||||
|
uint32_t nframes = static_cast<uint32_t>(ctx->r[3]);
|
||||||
|
uint8_t* unknown_addr =
|
||||||
|
ctx->TranslateVirtual(static_cast<uint32_t>(ctx->r[4]));
|
||||||
return X_STATUS_INVALID_PARAMETER;
|
return X_STATUS_INVALID_PARAMETER;
|
||||||
}
|
}
|
||||||
DECLARE_XBDM_EXPORT1(DmCaptureStackBackTrace, kDebug, kStub);
|
DECLARE_XBDM_EXPORT1(DmCaptureStackBackTrace, kDebug, kStub);
|
||||||
|
@ -82,7 +102,10 @@ dword_result_t DmWalkLoadedModules_entry(lpdword_t unk0_ptr,
|
||||||
}
|
}
|
||||||
DECLARE_XBDM_EXPORT1(DmWalkLoadedModules, kDebug, kStub);
|
DECLARE_XBDM_EXPORT1(DmWalkLoadedModules, kDebug, kStub);
|
||||||
|
|
||||||
void DmMapDevkitDrive_entry() {}
|
void DmMapDevkitDrive_entry(const ppc_context_t& ctx) {
|
||||||
|
// games check for nonzero result, failure if nz
|
||||||
|
ctx->r[3] = 0ULL;
|
||||||
|
}
|
||||||
DECLARE_XBDM_EXPORT1(DmMapDevkitDrive, kDebug, kStub);
|
DECLARE_XBDM_EXPORT1(DmMapDevkitDrive, kDebug, kStub);
|
||||||
|
|
||||||
dword_result_t DmFindPdbSignature_entry(lpdword_t unk0_ptr,
|
dword_result_t DmFindPdbSignature_entry(lpdword_t unk0_ptr,
|
||||||
|
|
|
@ -28,7 +28,11 @@
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace kernel {
|
namespace kernel {
|
||||||
namespace xboxkrnl {
|
namespace xboxkrnl {
|
||||||
|
struct X_STRING {
|
||||||
|
unsigned short length;
|
||||||
|
unsigned short pad;
|
||||||
|
uint32_t ptr;
|
||||||
|
};
|
||||||
// https://msdn.microsoft.com/en-us/library/ff561778
|
// https://msdn.microsoft.com/en-us/library/ff561778
|
||||||
dword_result_t RtlCompareMemory_entry(lpvoid_t source1, lpvoid_t source2,
|
dword_result_t RtlCompareMemory_entry(lpvoid_t source1, lpvoid_t source2,
|
||||||
dword_t length) {
|
dword_t length) {
|
||||||
|
@ -142,38 +146,80 @@ dword_result_t RtlLowerChar_entry(dword_t in) {
|
||||||
}
|
}
|
||||||
DECLARE_XBOXKRNL_EXPORT1(RtlLowerChar, kNone, kImplemented);
|
DECLARE_XBOXKRNL_EXPORT1(RtlLowerChar, kNone, kImplemented);
|
||||||
|
|
||||||
dword_result_t RtlCompareString_entry(lpstring_t string_1, lpstring_t string_2,
|
static int RtlCompareStringN_impl(uint8_t* string_1, unsigned int string_1_len,
|
||||||
dword_t case_insensitive) {
|
uint8_t* string_2, unsigned int string_2_len,
|
||||||
int ret = case_insensitive ? xe_strcasecmp(string_1, string_2)
|
int case_insensitive) {
|
||||||
: std::strcmp(string_1, string_2);
|
if (string_1_len == 0xFFFFFFFF) {
|
||||||
|
uint8_t* string1_strlen_iter = string_1;
|
||||||
return ret;
|
while (*string1_strlen_iter++)
|
||||||
|
;
|
||||||
|
string_1_len =
|
||||||
|
static_cast<unsigned int>(string1_strlen_iter - string_1 - 1);
|
||||||
|
}
|
||||||
|
if (string_2_len == 0xFFFFFFFF) {
|
||||||
|
uint8_t* string2_strlen_iter = string_2;
|
||||||
|
while (*string2_strlen_iter++)
|
||||||
|
;
|
||||||
|
string_2_len =
|
||||||
|
static_cast<unsigned int>(string2_strlen_iter - string_2 - 1);
|
||||||
|
}
|
||||||
|
uint8_t* string1_end = &string_1[std::min(string_2_len, string_1_len)];
|
||||||
|
if (case_insensitive) {
|
||||||
|
while (string_1 < string1_end) {
|
||||||
|
unsigned c1 = *string_1++;
|
||||||
|
unsigned c2 = *string_2++;
|
||||||
|
if (c1 != c2) {
|
||||||
|
unsigned cu1 = rtl_upper_table[c1];
|
||||||
|
unsigned cu2 = rtl_upper_table[c2];
|
||||||
|
if (cu1 != cu2) {
|
||||||
|
return cu1 - cu2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
while (string_1 < string1_end) {
|
||||||
|
unsigned c1 = *string_1++;
|
||||||
|
unsigned c2 = *string_2++;
|
||||||
|
if (c1 != c2) {
|
||||||
|
return c1 - c2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// why? not sure, but its the original logic
|
||||||
|
return string_1_len - string_2_len;
|
||||||
}
|
}
|
||||||
DECLARE_XBOXKRNL_EXPORT1(RtlCompareString, kNone, kImplemented);
|
|
||||||
|
|
||||||
dword_result_t RtlCompareStringN_entry(lpstring_t string_1,
|
dword_result_t RtlCompareStringN_entry(lpstring_t string_1,
|
||||||
dword_t string_1_len,
|
dword_t string_1_len,
|
||||||
lpstring_t string_2,
|
lpstring_t string_2,
|
||||||
dword_t string_2_len,
|
dword_t string_2_len,
|
||||||
dword_t case_insensitive) {
|
dword_t case_insensitive) {
|
||||||
uint32_t len1 = string_1_len;
|
return RtlCompareStringN_impl(
|
||||||
uint32_t len2 = string_2_len;
|
reinterpret_cast<uint8_t*>(string_1.host_address()), string_1_len,
|
||||||
|
reinterpret_cast<uint8_t*>(string_2.host_address()), string_2_len,
|
||||||
if (string_1_len == 0xFFFF) {
|
case_insensitive);
|
||||||
len1 = uint32_t(std::strlen(string_1));
|
|
||||||
}
|
|
||||||
if (string_2_len == 0xFFFF) {
|
|
||||||
len2 = uint32_t(std::strlen(string_2));
|
|
||||||
}
|
|
||||||
auto len = std::min(string_1_len, string_2_len);
|
|
||||||
|
|
||||||
int ret = case_insensitive ? xe_strncasecmp(string_1, string_2, len)
|
|
||||||
: std::strncmp(string_1, string_2, len);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLARE_XBOXKRNL_EXPORT1(RtlCompareStringN, kNone, kImplemented);
|
DECLARE_XBOXKRNL_EXPORT1(RtlCompareStringN, kNone, kImplemented);
|
||||||
|
|
||||||
|
dword_result_t RtlCompareString_entry(lpvoid_t string_1, lpvoid_t string_2,
|
||||||
|
dword_t case_insensitive) {
|
||||||
|
X_STRING* xs1 = string_1.as<X_STRING*>();
|
||||||
|
X_STRING* xs2 = string_2.as<X_STRING*>();
|
||||||
|
|
||||||
|
unsigned length_1 = xe::load_and_swap<uint16_t>(&xs1->length);
|
||||||
|
unsigned length_2 = xe::load_and_swap<uint16_t>(&xs2->length);
|
||||||
|
|
||||||
|
uint32_t ptr_1 = xe::load_and_swap<uint32_t>(&xs1->ptr);
|
||||||
|
|
||||||
|
uint32_t ptr_2 = xe::load_and_swap<uint32_t>(&xs2->ptr);
|
||||||
|
|
||||||
|
auto kmem = kernel_memory();
|
||||||
|
|
||||||
|
return RtlCompareStringN_impl(
|
||||||
|
kmem->TranslateVirtual<uint8_t*>(ptr_1), length_1,
|
||||||
|
kmem->TranslateVirtual<uint8_t*>(ptr_2), length_2, case_insensitive);
|
||||||
|
}
|
||||||
|
DECLARE_XBOXKRNL_EXPORT1(RtlCompareString, kNone, kImplemented);
|
||||||
// https://msdn.microsoft.com/en-us/library/ff561918
|
// https://msdn.microsoft.com/en-us/library/ff561918
|
||||||
void RtlInitAnsiString_entry(pointer_t<X_ANSI_STRING> destination,
|
void RtlInitAnsiString_entry(pointer_t<X_ANSI_STRING> destination,
|
||||||
lpstring_t source) {
|
lpstring_t source) {
|
||||||
|
@ -188,13 +234,13 @@ void RtlInitAnsiString_entry(pointer_t<X_ANSI_STRING> destination,
|
||||||
destination->pointer = source.guest_address();
|
destination->pointer = source.guest_address();
|
||||||
}
|
}
|
||||||
DECLARE_XBOXKRNL_EXPORT1(RtlInitAnsiString, kNone, kImplemented);
|
DECLARE_XBOXKRNL_EXPORT1(RtlInitAnsiString, kNone, kImplemented);
|
||||||
//https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-rtlupcaseunicodechar
|
// https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-rtlupcaseunicodechar
|
||||||
dword_result_t RtlUpcaseUnicodeChar_entry(dword_t SourceCharacter) {
|
dword_result_t RtlUpcaseUnicodeChar_entry(dword_t SourceCharacter) {
|
||||||
return std::use_facet<std::ctype<char16_t>>(std::locale()).toupper(SourceCharacter);
|
return std::use_facet<std::ctype<char16_t>>(std::locale())
|
||||||
|
.toupper(SourceCharacter);
|
||||||
}
|
}
|
||||||
DECLARE_XBOXKRNL_EXPORT1(RtlUpcaseUnicodeChar, kNone, kImplemented);
|
DECLARE_XBOXKRNL_EXPORT1(RtlUpcaseUnicodeChar, kNone, kImplemented);
|
||||||
|
|
||||||
|
|
||||||
// https://msdn.microsoft.com/en-us/library/ff561899
|
// https://msdn.microsoft.com/en-us/library/ff561899
|
||||||
void RtlFreeAnsiString_entry(pointer_t<X_ANSI_STRING> string) {
|
void RtlFreeAnsiString_entry(pointer_t<X_ANSI_STRING> string) {
|
||||||
if (string->pointer) {
|
if (string->pointer) {
|
||||||
|
@ -206,8 +252,8 @@ void RtlFreeAnsiString_entry(pointer_t<X_ANSI_STRING> string) {
|
||||||
DECLARE_XBOXKRNL_EXPORT1(RtlFreeAnsiString, kNone, kImplemented);
|
DECLARE_XBOXKRNL_EXPORT1(RtlFreeAnsiString, kNone, kImplemented);
|
||||||
|
|
||||||
// https://msdn.microsoft.com/en-us/library/ff561934
|
// https://msdn.microsoft.com/en-us/library/ff561934
|
||||||
void RtlInitUnicodeString_entry(pointer_t<X_UNICODE_STRING> destination,
|
pointer_result_t RtlInitUnicodeString_entry(
|
||||||
lpu16string_t source) {
|
pointer_t<X_UNICODE_STRING> destination, lpu16string_t source) {
|
||||||
if (source) {
|
if (source) {
|
||||||
destination->length = (uint16_t)source.value().size() * 2;
|
destination->length = (uint16_t)source.value().size() * 2;
|
||||||
destination->maximum_length = (uint16_t)(source.value().size() + 1) * 2;
|
destination->maximum_length = (uint16_t)(source.value().size() + 1) * 2;
|
||||||
|
@ -215,6 +261,7 @@ void RtlInitUnicodeString_entry(pointer_t<X_UNICODE_STRING> destination,
|
||||||
} else {
|
} else {
|
||||||
destination->reset();
|
destination->reset();
|
||||||
}
|
}
|
||||||
|
return destination.guest_address();
|
||||||
}
|
}
|
||||||
DECLARE_XBOXKRNL_EXPORT1(RtlInitUnicodeString, kNone, kImplemented);
|
DECLARE_XBOXKRNL_EXPORT1(RtlInitUnicodeString, kNone, kImplemented);
|
||||||
|
|
||||||
|
@ -671,6 +718,26 @@ dword_result_t RtlComputeCrc32_entry(dword_t seed, lpvoid_t buffer,
|
||||||
}
|
}
|
||||||
DECLARE_XBOXKRNL_EXPORT1(RtlComputeCrc32, kNone, kImplemented);
|
DECLARE_XBOXKRNL_EXPORT1(RtlComputeCrc32, kNone, kImplemented);
|
||||||
|
|
||||||
|
static void RtlRip_entry(const ppc_context_t& ctx) {
|
||||||
|
uint32_t arg1 = static_cast<uint32_t>(ctx->r[3]);
|
||||||
|
uint32_t arg2 = static_cast<uint32_t>(ctx->r[4]);
|
||||||
|
const char* msg_str1 = "";
|
||||||
|
|
||||||
|
const char* msg_str2 = "";
|
||||||
|
|
||||||
|
if (arg1) {
|
||||||
|
msg_str1 = ctx->TranslateVirtual<const char*>(arg1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (arg2) {
|
||||||
|
msg_str2 = ctx->TranslateVirtual<const char*>(arg2);
|
||||||
|
}
|
||||||
|
|
||||||
|
XELOGE("RtlRip called, arg1 = {}, arg2 = {}\n", msg_str1, msg_str2);
|
||||||
|
|
||||||
|
//we should break here... not sure what to do exactly
|
||||||
|
}
|
||||||
|
DECLARE_XBOXKRNL_EXPORT1(RtlRip, kNone, kImportant);
|
||||||
} // namespace xboxkrnl
|
} // namespace xboxkrnl
|
||||||
} // namespace kernel
|
} // namespace kernel
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "xenia/base/atomic.h"
|
#include "xenia/base/atomic.h"
|
||||||
#include "xenia/base/clock.h"
|
#include "xenia/base/clock.h"
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
|
@ -913,7 +912,7 @@ dword_result_t NtWaitForMultipleObjectsEx_entry(
|
||||||
dword_t count, lpdword_t handles, dword_t wait_type, dword_t wait_mode,
|
dword_t count, lpdword_t handles, dword_t wait_type, dword_t wait_mode,
|
||||||
dword_t alertable, lpqword_t timeout_ptr) {
|
dword_t alertable, lpqword_t timeout_ptr) {
|
||||||
uint64_t timeout = timeout_ptr ? static_cast<uint64_t>(*timeout_ptr) : 0u;
|
uint64_t timeout = timeout_ptr ? static_cast<uint64_t>(*timeout_ptr) : 0u;
|
||||||
if (!count || count > 64 || wait_type != 1 && wait_type) {
|
if (!count || count > 64 || (wait_type != 1 && wait_type)) {
|
||||||
return X_STATUS_INVALID_PARAMETER;
|
return X_STATUS_INVALID_PARAMETER;
|
||||||
}
|
}
|
||||||
return xeNtWaitForMultipleObjectsEx(count, handles, wait_type, wait_mode,
|
return xeNtWaitForMultipleObjectsEx(count, handles, wait_type, wait_mode,
|
||||||
|
@ -964,7 +963,7 @@ uint32_t xeKeKfAcquireSpinLock(uint32_t* lock, uint64_t r13 = 1) {
|
||||||
PrefetchForCAS(lock);
|
PrefetchForCAS(lock);
|
||||||
assert_true(*lock != static_cast<uint32_t>(r13));
|
assert_true(*lock != static_cast<uint32_t>(r13));
|
||||||
// Lock.
|
// Lock.
|
||||||
while (!xe::atomic_cas(0, static_cast<uint32_t>(r13), lock)) {
|
while (!xe::atomic_cas(0, xe::byte_swap(static_cast<uint32_t>(r13)), lock)) {
|
||||||
// Spin!
|
// Spin!
|
||||||
// TODO(benvanik): error on deadlock?
|
// TODO(benvanik): error on deadlock?
|
||||||
xe::threading::MaybeYield();
|
xe::threading::MaybeYield();
|
||||||
|
@ -978,7 +977,7 @@ uint32_t xeKeKfAcquireSpinLock(uint32_t* lock, uint64_t r13 = 1) {
|
||||||
}
|
}
|
||||||
|
|
||||||
dword_result_t KfAcquireSpinLock_entry(lpdword_t lock_ptr,
|
dword_result_t KfAcquireSpinLock_entry(lpdword_t lock_ptr,
|
||||||
ppc_context_t& ppc_context) {
|
const ppc_context_t& ppc_context) {
|
||||||
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
|
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
|
||||||
return xeKeKfAcquireSpinLock(lock, ppc_context->r[13]);
|
return xeKeKfAcquireSpinLock(lock, ppc_context->r[13]);
|
||||||
}
|
}
|
||||||
|
@ -997,9 +996,7 @@ void xeKeKfReleaseSpinLock(uint32_t* lock, dword_t old_irql) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void KfReleaseSpinLock_entry(lpdword_t lock_ptr, dword_t old_irql,
|
void KfReleaseSpinLock_entry(lpdword_t lock_ptr, dword_t old_irql,
|
||||||
ppc_context_t& ppc_ctx) {
|
const ppc_context_t& ppc_ctx) {
|
||||||
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
|
|
||||||
|
|
||||||
assert_true(*lock_ptr == static_cast<uint32_t>(ppc_ctx->r[13]));
|
assert_true(*lock_ptr == static_cast<uint32_t>(ppc_ctx->r[13]));
|
||||||
|
|
||||||
*lock_ptr = 0;
|
*lock_ptr = 0;
|
||||||
|
@ -1014,14 +1011,14 @@ DECLARE_XBOXKRNL_EXPORT2(KfReleaseSpinLock, kThreading, kImplemented,
|
||||||
kHighFrequency);
|
kHighFrequency);
|
||||||
// todo: this is not accurate
|
// todo: this is not accurate
|
||||||
void KeAcquireSpinLockAtRaisedIrql_entry(lpdword_t lock_ptr,
|
void KeAcquireSpinLockAtRaisedIrql_entry(lpdword_t lock_ptr,
|
||||||
ppc_context_t& ppc_ctx) {
|
const ppc_context_t& ppc_ctx) {
|
||||||
// Lock.
|
// Lock.
|
||||||
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
|
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
|
||||||
// must not be our own thread
|
// must not be our own thread
|
||||||
assert_true(*lock_ptr != static_cast<uint32_t>(ppc_ctx->r[13]));
|
assert_true(*lock_ptr != static_cast<uint32_t>(ppc_ctx->r[13]));
|
||||||
|
|
||||||
PrefetchForCAS(lock);
|
PrefetchForCAS(lock);
|
||||||
while (!xe::atomic_cas(0, static_cast<uint32_t>(ppc_ctx->r[13]), lock)) {
|
while (!xe::atomic_cas(0, xe::byte_swap(static_cast<uint32_t>(ppc_ctx->r[13])), lock)) {
|
||||||
#if XE_ARCH_AMD64 == 1
|
#if XE_ARCH_AMD64 == 1
|
||||||
// todo: this is just a nop if they don't have SMT, which is not great
|
// todo: this is just a nop if they don't have SMT, which is not great
|
||||||
// either...
|
// either...
|
||||||
|
@ -1036,12 +1033,12 @@ DECLARE_XBOXKRNL_EXPORT3(KeAcquireSpinLockAtRaisedIrql, kThreading,
|
||||||
kImplemented, kBlocking, kHighFrequency);
|
kImplemented, kBlocking, kHighFrequency);
|
||||||
|
|
||||||
dword_result_t KeTryToAcquireSpinLockAtRaisedIrql_entry(
|
dword_result_t KeTryToAcquireSpinLockAtRaisedIrql_entry(
|
||||||
lpdword_t lock_ptr, ppc_context_t& ppc_ctx) {
|
lpdword_t lock_ptr, const ppc_context_t& ppc_ctx) {
|
||||||
// Lock.
|
// Lock.
|
||||||
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
|
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
|
||||||
assert_true(*lock_ptr != static_cast<uint32_t>(ppc_ctx->r[13]));
|
assert_true(*lock_ptr != static_cast<uint32_t>(ppc_ctx->r[13]));
|
||||||
PrefetchForCAS(lock);
|
PrefetchForCAS(lock);
|
||||||
if (!xe::atomic_cas(0, static_cast<uint32_t>(ppc_ctx->r[13]), lock)) {
|
if (!xe::atomic_cas(0, xe::byte_swap(static_cast<uint32_t>(ppc_ctx->r[13])), lock)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -1050,10 +1047,9 @@ DECLARE_XBOXKRNL_EXPORT4(KeTryToAcquireSpinLockAtRaisedIrql, kThreading,
|
||||||
kImplemented, kBlocking, kHighFrequency, kSketchy);
|
kImplemented, kBlocking, kHighFrequency, kSketchy);
|
||||||
|
|
||||||
void KeReleaseSpinLockFromRaisedIrql_entry(lpdword_t lock_ptr,
|
void KeReleaseSpinLockFromRaisedIrql_entry(lpdword_t lock_ptr,
|
||||||
ppc_context_t& ppc_ctx) {
|
const ppc_context_t& ppc_ctx) {
|
||||||
// Unlock.
|
// Unlock.
|
||||||
assert_true(*lock_ptr == static_cast<uint32_t>(ppc_ctx->r[13]));
|
assert_true(*lock_ptr == static_cast<uint32_t>(ppc_ctx->r[13]));
|
||||||
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
|
|
||||||
*lock_ptr = 0;
|
*lock_ptr = 0;
|
||||||
}
|
}
|
||||||
DECLARE_XBOXKRNL_EXPORT2(KeReleaseSpinLockFromRaisedIrql, kThreading,
|
DECLARE_XBOXKRNL_EXPORT2(KeReleaseSpinLockFromRaisedIrql, kThreading,
|
||||||
|
@ -1283,7 +1279,8 @@ void ExInitializeReadWriteLock_entry(pointer_t<X_ERWLOCK> lock_ptr) {
|
||||||
}
|
}
|
||||||
DECLARE_XBOXKRNL_EXPORT1(ExInitializeReadWriteLock, kThreading, kImplemented);
|
DECLARE_XBOXKRNL_EXPORT1(ExInitializeReadWriteLock, kThreading, kImplemented);
|
||||||
|
|
||||||
void ExAcquireReadWriteLockExclusive_entry(pointer_t<X_ERWLOCK> lock_ptr, ppc_context_t& ppc_context) {
|
void ExAcquireReadWriteLockExclusive_entry(pointer_t<X_ERWLOCK> lock_ptr,
|
||||||
|
const ppc_context_t& ppc_context) {
|
||||||
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
|
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
|
||||||
|
|
||||||
int32_t lock_count = ++lock_ptr->lock_count;
|
int32_t lock_count = ++lock_ptr->lock_count;
|
||||||
|
@ -1301,7 +1298,7 @@ DECLARE_XBOXKRNL_EXPORT2(ExAcquireReadWriteLockExclusive, kThreading,
|
||||||
kImplemented, kBlocking);
|
kImplemented, kBlocking);
|
||||||
|
|
||||||
dword_result_t ExTryToAcquireReadWriteLockExclusive_entry(
|
dword_result_t ExTryToAcquireReadWriteLockExclusive_entry(
|
||||||
pointer_t<X_ERWLOCK> lock_ptr, ppc_context_t& ppc_context) {
|
pointer_t<X_ERWLOCK> lock_ptr, const ppc_context_t& ppc_context) {
|
||||||
auto old_irql =
|
auto old_irql =
|
||||||
xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
|
xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
|
||||||
|
|
||||||
|
@ -1320,7 +1317,7 @@ DECLARE_XBOXKRNL_EXPORT1(ExTryToAcquireReadWriteLockExclusive, kThreading,
|
||||||
kImplemented);
|
kImplemented);
|
||||||
|
|
||||||
void ExAcquireReadWriteLockShared_entry(pointer_t<X_ERWLOCK> lock_ptr,
|
void ExAcquireReadWriteLockShared_entry(pointer_t<X_ERWLOCK> lock_ptr,
|
||||||
ppc_context_t& ppc_context) {
|
const ppc_context_t& ppc_context) {
|
||||||
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
|
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
|
||||||
|
|
||||||
int32_t lock_count = ++lock_ptr->lock_count;
|
int32_t lock_count = ++lock_ptr->lock_count;
|
||||||
|
@ -1340,7 +1337,7 @@ DECLARE_XBOXKRNL_EXPORT2(ExAcquireReadWriteLockShared, kThreading, kImplemented,
|
||||||
kBlocking);
|
kBlocking);
|
||||||
|
|
||||||
dword_result_t ExTryToAcquireReadWriteLockShared_entry(
|
dword_result_t ExTryToAcquireReadWriteLockShared_entry(
|
||||||
pointer_t<X_ERWLOCK> lock_ptr, ppc_context_t& ppc_context) {
|
pointer_t<X_ERWLOCK> lock_ptr, const ppc_context_t& ppc_context) {
|
||||||
auto old_irql =
|
auto old_irql =
|
||||||
xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
|
xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
|
||||||
|
|
||||||
|
@ -1361,7 +1358,7 @@ DECLARE_XBOXKRNL_EXPORT1(ExTryToAcquireReadWriteLockShared, kThreading,
|
||||||
kImplemented);
|
kImplemented);
|
||||||
|
|
||||||
void ExReleaseReadWriteLock_entry(pointer_t<X_ERWLOCK> lock_ptr,
|
void ExReleaseReadWriteLock_entry(pointer_t<X_ERWLOCK> lock_ptr,
|
||||||
ppc_context_t& ppc_context) {
|
const ppc_context_t& ppc_context) {
|
||||||
auto old_irql =
|
auto old_irql =
|
||||||
xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
|
xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
|
||||||
|
|
||||||
|
@ -1404,7 +1401,7 @@ pointer_result_t InterlockedPushEntrySList_entry(
|
||||||
assert_not_null(entry);
|
assert_not_null(entry);
|
||||||
|
|
||||||
alignas(8) X_SLIST_HEADER old_hdr = *plist_ptr;
|
alignas(8) X_SLIST_HEADER old_hdr = *plist_ptr;
|
||||||
alignas(8) X_SLIST_HEADER new_hdr = {0};
|
alignas(8) X_SLIST_HEADER new_hdr = {{0}, 0, 0};
|
||||||
uint32_t old_head = 0;
|
uint32_t old_head = 0;
|
||||||
do {
|
do {
|
||||||
old_hdr = *plist_ptr;
|
old_hdr = *plist_ptr;
|
||||||
|
@ -1428,8 +1425,8 @@ pointer_result_t InterlockedPopEntrySList_entry(
|
||||||
assert_not_null(plist_ptr);
|
assert_not_null(plist_ptr);
|
||||||
|
|
||||||
uint32_t popped = 0;
|
uint32_t popped = 0;
|
||||||
alignas(8) X_SLIST_HEADER old_hdr = {0};
|
alignas(8) X_SLIST_HEADER old_hdr = {{0}, 0, 0};
|
||||||
alignas(8) X_SLIST_HEADER new_hdr = {0};
|
alignas(8) X_SLIST_HEADER new_hdr = {{0}, 0, 0};
|
||||||
do {
|
do {
|
||||||
old_hdr = *plist_ptr;
|
old_hdr = *plist_ptr;
|
||||||
auto next = kernel_memory()->TranslateVirtual<X_SINGLE_LIST_ENTRY*>(
|
auto next = kernel_memory()->TranslateVirtual<X_SINGLE_LIST_ENTRY*>(
|
||||||
|
@ -1456,7 +1453,7 @@ pointer_result_t InterlockedFlushSList_entry(
|
||||||
assert_not_null(plist_ptr);
|
assert_not_null(plist_ptr);
|
||||||
|
|
||||||
alignas(8) X_SLIST_HEADER old_hdr = *plist_ptr;
|
alignas(8) X_SLIST_HEADER old_hdr = *plist_ptr;
|
||||||
alignas(8) X_SLIST_HEADER new_hdr = {0};
|
alignas(8) X_SLIST_HEADER new_hdr = {{0}, 0, 0};
|
||||||
uint32_t first = 0;
|
uint32_t first = 0;
|
||||||
do {
|
do {
|
||||||
old_hdr = *plist_ptr;
|
old_hdr = *plist_ptr;
|
||||||
|
|
|
@ -433,7 +433,7 @@ void VdSwap_entry(
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
gpu_fetch.base_address = frontbuffer_physical_address >> 12;
|
gpu_fetch.base_address = frontbuffer_physical_address >> 12;
|
||||||
|
XE_MAYBE_UNUSED
|
||||||
auto texture_format = gpu::xenos::TextureFormat(texture_format_ptr.value());
|
auto texture_format = gpu::xenos::TextureFormat(texture_format_ptr.value());
|
||||||
auto color_space = *color_space_ptr;
|
auto color_space = *color_space_ptr;
|
||||||
assert_true(texture_format == gpu::xenos::TextureFormat::k_8_8_8_8 ||
|
assert_true(texture_format == gpu::xenos::TextureFormat::k_8_8_8_8 ||
|
||||||
|
|
|
@ -41,8 +41,7 @@ struct XAPC {
|
||||||
// KAPC is 0x28(40) bytes? (what's passed to ExAllocatePoolWithTag)
|
// KAPC is 0x28(40) bytes? (what's passed to ExAllocatePoolWithTag)
|
||||||
// This is 4b shorter than NT - looks like the reserved dword at +4 is gone.
|
// This is 4b shorter than NT - looks like the reserved dword at +4 is gone.
|
||||||
// NOTE: stored in guest memory.
|
// NOTE: stored in guest memory.
|
||||||
uint8_t type; // +0
|
uint16_t type; // +0
|
||||||
uint8_t unk1; // +1
|
|
||||||
uint8_t processor_mode; // +2
|
uint8_t processor_mode; // +2
|
||||||
uint8_t enqueued; // +3
|
uint8_t enqueued; // +3
|
||||||
xe::be<uint32_t> thread_ptr; // +4
|
xe::be<uint32_t> thread_ptr; // +4
|
||||||
|
@ -57,7 +56,6 @@ struct XAPC {
|
||||||
|
|
||||||
void Initialize() {
|
void Initialize() {
|
||||||
type = 18; // ApcObject
|
type = 18; // ApcObject
|
||||||
unk1 = 0;
|
|
||||||
processor_mode = 0;
|
processor_mode = 0;
|
||||||
enqueued = 0;
|
enqueued = 0;
|
||||||
thread_ptr = 0;
|
thread_ptr = 0;
|
||||||
|
|
|
@ -316,9 +316,10 @@ void Memory::Reset() {
|
||||||
heaps_.v90000000.Reset();
|
heaps_.v90000000.Reset();
|
||||||
heaps_.physical.Reset();
|
heaps_.physical.Reset();
|
||||||
}
|
}
|
||||||
|
//clang does not like non-standard layout offsetof
|
||||||
|
#if XE_COMPILER_MSVC == 1 && XE_COMPILER_CLANG_CL==0
|
||||||
XE_NOALIAS
|
XE_NOALIAS
|
||||||
const BaseHeap* Memory::LookupHeap(uint32_t address) const {
|
const BaseHeap* Memory::LookupHeap(uint32_t address) const {
|
||||||
#if 1
|
|
||||||
#define HEAP_INDEX(name) \
|
#define HEAP_INDEX(name) \
|
||||||
offsetof(Memory, heaps_.name) - offsetof(Memory, heaps_)
|
offsetof(Memory, heaps_.name) - offsetof(Memory, heaps_)
|
||||||
|
|
||||||
|
@ -354,8 +355,11 @@ const BaseHeap* Memory::LookupHeap(uint32_t address) const {
|
||||||
heap_select = nullptr;
|
heap_select = nullptr;
|
||||||
}
|
}
|
||||||
return reinterpret_cast<const BaseHeap*>(selected_heap_offset + heap_select);
|
return reinterpret_cast<const BaseHeap*>(selected_heap_offset + heap_select);
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
|
XE_NOALIAS
|
||||||
|
const BaseHeap* Memory::LookupHeap(uint32_t address) const {
|
||||||
|
|
||||||
if (address < 0x40000000) {
|
if (address < 0x40000000) {
|
||||||
return &heaps_.v00000000;
|
return &heaps_.v00000000;
|
||||||
} else if (address < 0x7F000000) {
|
} else if (address < 0x7F000000) {
|
||||||
|
@ -375,9 +379,8 @@ const BaseHeap* Memory::LookupHeap(uint32_t address) const {
|
||||||
} else {
|
} else {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
BaseHeap* Memory::LookupHeapByType(bool physical, uint32_t page_size) {
|
BaseHeap* Memory::LookupHeapByType(bool physical, uint32_t page_size) {
|
||||||
if (physical) {
|
if (physical) {
|
||||||
if (page_size <= 4096) {
|
if (page_size <= 4096) {
|
||||||
|
@ -1069,7 +1072,7 @@ bool BaseHeap::AllocRange(uint32_t low_address, uint32_t high_address,
|
||||||
if (start_page_number == UINT_MAX || end_page_number == UINT_MAX) {
|
if (start_page_number == UINT_MAX || end_page_number == UINT_MAX) {
|
||||||
// Out of memory.
|
// Out of memory.
|
||||||
XELOGE("BaseHeap::Alloc failed to find contiguous range");
|
XELOGE("BaseHeap::Alloc failed to find contiguous range");
|
||||||
assert_always("Heap exhausted!");
|
//assert_always("Heap exhausted!");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -285,7 +285,8 @@ class PhysicalHeap : public BaseHeap {
|
||||||
uint32_t GetPhysicalAddress(uint32_t address) const;
|
uint32_t GetPhysicalAddress(uint32_t address) const;
|
||||||
|
|
||||||
uint32_t SystemPagenumToGuestPagenum(uint32_t num) const {
|
uint32_t SystemPagenumToGuestPagenum(uint32_t num) const {
|
||||||
return ((num << system_page_shift_) - host_address_offset()) >> page_size_shift_;
|
return ((num << system_page_shift_) - host_address_offset()) >>
|
||||||
|
page_size_shift_;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t GuestPagenumToSystemPagenum(uint32_t num) {
|
uint32_t GuestPagenumToSystemPagenum(uint32_t num) {
|
||||||
|
@ -294,6 +295,7 @@ class PhysicalHeap : public BaseHeap {
|
||||||
num >>= system_page_shift_;
|
num >>= system_page_shift_;
|
||||||
return num;
|
return num;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
VirtualHeap* parent_heap_;
|
VirtualHeap* parent_heap_;
|
||||||
|
|
||||||
|
@ -351,12 +353,21 @@ class Memory {
|
||||||
// Note that the contents at the specified host address are big-endian.
|
// Note that the contents at the specified host address are big-endian.
|
||||||
template <typename T = uint8_t*>
|
template <typename T = uint8_t*>
|
||||||
inline T TranslateVirtual(uint32_t guest_address) const {
|
inline T TranslateVirtual(uint32_t guest_address) const {
|
||||||
|
#if XE_PLATFORM_WIN32 == 1
|
||||||
|
uint8_t* host_address = virtual_membase_ + guest_address;
|
||||||
|
if (guest_address >= 0xE0000000) {
|
||||||
|
host_address += 0x1000;
|
||||||
|
}
|
||||||
|
return reinterpret_cast<T>(host_address);
|
||||||
|
#else
|
||||||
uint8_t* host_address = virtual_membase_ + guest_address;
|
uint8_t* host_address = virtual_membase_ + guest_address;
|
||||||
const auto heap = LookupHeap(guest_address);
|
const auto heap = LookupHeap(guest_address);
|
||||||
if (heap) {
|
if (heap) {
|
||||||
host_address += heap->host_address_offset();
|
host_address += heap->host_address_offset();
|
||||||
}
|
}
|
||||||
return reinterpret_cast<T>(host_address);
|
return reinterpret_cast<T>(host_address);
|
||||||
|
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Base address of physical memory in the host address space.
|
// Base address of physical memory in the host address space.
|
||||||
|
|
|
@ -21,7 +21,7 @@ namespace vfs {
|
||||||
|
|
||||||
NullDevice::NullDevice(const std::string& mount_path,
|
NullDevice::NullDevice(const std::string& mount_path,
|
||||||
const std::initializer_list<std::string>& null_paths)
|
const std::initializer_list<std::string>& null_paths)
|
||||||
: Device(mount_path), null_paths_(null_paths), name_("NullDevice") {}
|
: Device(mount_path), name_("NullDevice"), null_paths_(null_paths) {}
|
||||||
|
|
||||||
NullDevice::~NullDevice() = default;
|
NullDevice::~NullDevice() = default;
|
||||||
|
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit a437fe6d8efef17c8ad33d39f5815032e7adf5d7
|
Subproject commit a14f5c03834a79fc401626a4dad7a58a2da0c445
|
Loading…
Reference in New Issue