Compare commits
4 Commits
761f3b84ea
...
5122da3af7
Author | SHA1 | Date |
---|---|---|
Mateusz Dukat | 5122da3af7 | |
Gliniak | b9061e6292 | |
Gliniak | e8afad8f8a | |
Mateusz Dukat | 08c740c788 |
|
@ -44,37 +44,51 @@ on:
|
|||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
name: Lint
|
||||
runs-on: windows-2022
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Check Clang-Format Version
|
||||
run: clang-format --version
|
||||
|
||||
- name: Lint
|
||||
run: .\xb lint --all
|
||||
|
||||
build-windows:
|
||||
name: Build (Windows) # runner.os can't be used here
|
||||
runs-on: windows-2022
|
||||
env:
|
||||
POWERSHELL_TELEMETRY_OPTOUT: 1
|
||||
needs: lint
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup
|
||||
run: .\xb setup
|
||||
|
||||
- name: Build
|
||||
run: .\xb build --config=Release --target=src\xenia-app --target=src\xenia-vfs-dump
|
||||
run: .\xb build --config=Release --target=src\xenia-app
|
||||
|
||||
- name: Prepare artifacts
|
||||
run: |
|
||||
robocopy . build\bin\${{ runner.os }}\Release LICENSE /r:0 /w:0
|
||||
robocopy build\bin\${{ runner.os }}\Release artifacts\xenia_canary xenia_canary.exe xenia_canary.pdb LICENSE /r:0 /w:0
|
||||
robocopy build\bin\${{ runner.os }}\Release artifacts\xenia-vfs-dump xenia-vfs-dump.exe xenia-vfs-dump.pdb LICENSE /r:0 /w:0
|
||||
If ($LastExitCode -le 7) { echo "LastExitCode = $LastExitCode";$LastExitCode = 0 }
|
||||
- name: Upload xenia-vfs-dump artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: xenia-vfs-dump_canary
|
||||
path: artifacts\xenia-vfs-dump
|
||||
if-no-files-found: error
|
||||
|
||||
- name: Upload xenia canary artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: xenia_canary
|
||||
path: artifacts\xenia_canary
|
||||
if-no-files-found: error
|
||||
|
||||
- name: Create release
|
||||
if: |
|
||||
github.repository == 'xenia-canary/xenia-canary' &&
|
||||
|
|
|
@ -61,8 +61,8 @@ class EmulatorWindow {
|
|||
int32_t selected_title_index = -1;
|
||||
|
||||
static constexpr int64_t diff_in_ms(
|
||||
const steady_clock::time_point t1,
|
||||
const steady_clock::time_point t2) noexcept {
|
||||
const steady_clock::time_point t1,
|
||||
const steady_clock::time_point t2) noexcept {
|
||||
using ms = std::chrono::milliseconds;
|
||||
return std::chrono::duration_cast<ms>(t1 - t2).count();
|
||||
}
|
||||
|
|
|
@ -35,9 +35,10 @@
|
|||
// and let the normal AudioSystem handling take it, to prevent duplicate
|
||||
// implementations. They can be found in xboxkrnl_audio_xma.cc
|
||||
|
||||
DEFINE_uint32(
|
||||
apu_max_queued_frames, 64,
|
||||
"Allows changing max buffered audio frames to reduce audio delay. Minimum is 16.", "APU");
|
||||
DEFINE_uint32(apu_max_queued_frames, 64,
|
||||
"Allows changing max buffered audio frames to reduce audio "
|
||||
"delay. Minimum is 16.",
|
||||
"APU");
|
||||
|
||||
namespace xe {
|
||||
namespace apu {
|
||||
|
@ -76,11 +77,14 @@ X_STATUS AudioSystem::Setup(kernel::KernelState* kernel_state) {
|
|||
}
|
||||
|
||||
worker_running_ = true;
|
||||
worker_thread_ = kernel::object_ref<kernel::XHostThread>(
|
||||
new kernel::XHostThread(kernel_state, 128 * 1024, 0, [this]() {
|
||||
WorkerThreadMain();
|
||||
return 0;
|
||||
}, kernel_state->GetSystemProcess()));
|
||||
worker_thread_ =
|
||||
kernel::object_ref<kernel::XHostThread>(new kernel::XHostThread(
|
||||
kernel_state, 128 * 1024, 0,
|
||||
[this]() {
|
||||
WorkerThreadMain();
|
||||
return 0;
|
||||
},
|
||||
kernel_state->GetSystemProcess()));
|
||||
// As we run audio callbacks the debugger must be able to suspend us.
|
||||
worker_thread_->set_can_debugger_suspend(true);
|
||||
worker_thread_->set_name("Audio Worker");
|
||||
|
|
|
@ -21,7 +21,6 @@ namespace conversion {
|
|||
|
||||
#if XE_ARCH_AMD64
|
||||
|
||||
|
||||
XE_NOINLINE
|
||||
static void _generic_sequential_6_BE_to_interleaved_6_LE(
|
||||
float* XE_RESTRICT output, const float* XE_RESTRICT input,
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
//#include <vector>
|
||||
// #include <vector>
|
||||
|
||||
#include "xenia/memory.h"
|
||||
#include "xenia/xbox.h"
|
||||
|
@ -79,8 +79,8 @@ struct XMA_CONTEXT_DATA {
|
|||
uint32_t unk_dword_2 : 6; // ErrorStatus/ErrorSet (?)
|
||||
|
||||
// DWORD 3
|
||||
uint32_t loop_start : 26; // XMASetLoopData LoopStartOffset
|
||||
// frame offset in bits
|
||||
uint32_t loop_start : 26; // XMASetLoopData LoopStartOffset
|
||||
// frame offset in bits
|
||||
uint32_t parser_error_status : 6; // ? ParserErrorStatus/ParserErrorSet(?)
|
||||
|
||||
// DWORD 4
|
||||
|
|
|
@ -18,8 +18,8 @@
|
|||
#include "xenia/base/string_buffer.h"
|
||||
#include "xenia/cpu/processor.h"
|
||||
#include "xenia/cpu/thread_state.h"
|
||||
#include "xenia/kernel/xthread.h"
|
||||
#include "xenia/kernel/kernel_state.h"
|
||||
#include "xenia/kernel/xthread.h"
|
||||
extern "C" {
|
||||
#include "third_party/FFmpeg/libavutil/log.h"
|
||||
} // extern "C"
|
||||
|
@ -102,8 +102,7 @@ void av_log_callback(void* avcl, int level, const char* fmt, va_list va) {
|
|||
StringBuffer buff;
|
||||
buff.AppendVarargs(fmt, va);
|
||||
xe::logging::AppendLogLineFormat(LogSrc::Apu, log_level, level_char,
|
||||
"ffmpeg: {}",
|
||||
buff.to_string_view());
|
||||
"ffmpeg: {}", buff.to_string_view());
|
||||
}
|
||||
|
||||
X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) {
|
||||
|
@ -141,11 +140,16 @@ X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) {
|
|||
worker_running_ = true;
|
||||
work_event_ = xe::threading::Event::CreateAutoResetEvent(false);
|
||||
assert_not_null(work_event_);
|
||||
worker_thread_ = kernel::object_ref<kernel::XHostThread>(
|
||||
new kernel::XHostThread(kernel_state, 128 * 1024, 0, [this]() {
|
||||
WorkerThreadMain();
|
||||
return 0;
|
||||
}, kernel_state->GetIdleProcess()));//this one doesnt need any process actually. never calls any guest code
|
||||
worker_thread_ =
|
||||
kernel::object_ref<kernel::XHostThread>(new kernel::XHostThread(
|
||||
kernel_state, 128 * 1024, 0,
|
||||
[this]() {
|
||||
WorkerThreadMain();
|
||||
return 0;
|
||||
},
|
||||
kernel_state
|
||||
->GetIdleProcess())); // this one doesnt need any process
|
||||
// actually. never calls any guest code
|
||||
worker_thread_->set_name("XMA Decoder");
|
||||
worker_thread_->set_can_debugger_suspend(true);
|
||||
worker_thread_->Create();
|
||||
|
|
|
@ -39,8 +39,8 @@ class Clock {
|
|||
// Host tick count. Generally QueryHostTickCount() should be used.
|
||||
static uint64_t host_tick_count_platform();
|
||||
#if XE_CLOCK_RAW_AVAILABLE
|
||||
//chrispy: the way msvc was ordering the branches was causing rdtsc to be speculatively executed each time
|
||||
//the branch history was lost
|
||||
// chrispy: the way msvc was ordering the branches was causing rdtsc to be
|
||||
// speculatively executed each time the branch history was lost
|
||||
XE_NOINLINE
|
||||
static uint64_t host_tick_count_raw();
|
||||
#endif
|
||||
|
|
|
@ -41,9 +41,6 @@
|
|||
"\n" \
|
||||
"Set the cvar 'clock_source_raw' to 'false'.");
|
||||
|
||||
|
||||
|
||||
|
||||
namespace xe {
|
||||
// Getting the TSC frequency can be a bit tricky. This method here only works on
|
||||
// Intel as it seems. There is no easy way to get the frequency outside of ring0
|
||||
|
@ -75,8 +72,6 @@ uint64_t Clock::host_tick_frequency_raw() {
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (max_cpuid >= 0x15) {
|
||||
// 15H Get TSC/Crystal ratio and Crystal Hz.
|
||||
xe_cpu_cpuid(0x15, eax, ebx, ecx, edx);
|
||||
|
@ -98,7 +93,6 @@ uint64_t Clock::host_tick_frequency_raw() {
|
|||
return cpu_base_freq;
|
||||
}
|
||||
|
||||
|
||||
CLOCK_FATAL("The clock frequency could not be determined.");
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -35,15 +35,14 @@ static bool has_shell_environment_variable() {
|
|||
}
|
||||
|
||||
void AttachConsole() {
|
||||
|
||||
bool has_console = ::AttachConsole(ATTACH_PARENT_PROCESS) == TRUE;
|
||||
bool has_console = ::AttachConsole(ATTACH_PARENT_PROCESS) == TRUE;
|
||||
#if 0
|
||||
if (!has_console || !has_shell_environment_variable()) {
|
||||
// We weren't launched from a console, so just return.
|
||||
has_console_attached_ = false;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
AllocConsole();
|
||||
|
||||
has_console_attached_ = true;
|
||||
|
|
|
@ -172,8 +172,7 @@ CommandVar<T>::CommandVar(const char* name, T* default_value,
|
|||
default_value_(*default_value),
|
||||
current_value_(default_value),
|
||||
commandline_value_(),
|
||||
description_(description)
|
||||
{}
|
||||
description_(description) {}
|
||||
|
||||
template <class T>
|
||||
ConfigVar<T>::ConfigVar(const char* name, T* default_value,
|
||||
|
|
|
@ -457,7 +457,7 @@ static ArchFloatMask ArchANDFloatMask(ArchFloatMask x, ArchFloatMask y) {
|
|||
|
||||
XE_FORCEINLINE
|
||||
static uint32_t ArchFloatMaskSignbit(ArchFloatMask x) {
|
||||
return static_cast<uint32_t>(_mm_movemask_ps(x) &1);
|
||||
return static_cast<uint32_t>(_mm_movemask_ps(x) & 1);
|
||||
}
|
||||
|
||||
constexpr ArchFloatMask floatmask_zero{.0f};
|
||||
|
@ -606,12 +606,13 @@ union IDivExtraInfo {
|
|||
} info;
|
||||
};
|
||||
// returns magicnum multiplier
|
||||
static constexpr uint32_t PregenerateUint32Div(uint32_t _denom, uint32_t& out_extra) {
|
||||
static constexpr uint32_t PregenerateUint32Div(uint32_t _denom,
|
||||
uint32_t& out_extra) {
|
||||
IDivExtraInfo extra{};
|
||||
|
||||
uint32_t d = _denom;
|
||||
int p=0;
|
||||
uint32_t nc=0, delta=0, q1=0, r1=0, q2=0, r2=0;
|
||||
int p = 0;
|
||||
uint32_t nc = 0, delta = 0, q1 = 0, r1 = 0, q2 = 0, r2 = 0;
|
||||
struct {
|
||||
unsigned M;
|
||||
int a;
|
||||
|
@ -662,7 +663,8 @@ static constexpr uint32_t ApplyUint32Div(uint32_t num, uint32_t mul,
|
|||
|
||||
extra.value_ = extradata;
|
||||
|
||||
uint32_t result = static_cast<uint32_t>((static_cast<uint64_t>(num) * static_cast<uint64_t>(mul)) >> 32);
|
||||
uint32_t result = static_cast<uint32_t>(
|
||||
(static_cast<uint64_t>(num) * static_cast<uint64_t>(mul)) >> 32);
|
||||
if (extra.info.add_) {
|
||||
uint32_t addend = result + num;
|
||||
addend = ((addend < result ? 0x80000000 : 0) | addend);
|
||||
|
@ -672,7 +674,8 @@ static constexpr uint32_t ApplyUint32Div(uint32_t num, uint32_t mul,
|
|||
}
|
||||
|
||||
static constexpr uint32_t ApplyUint32UMod(uint32_t num, uint32_t mul,
|
||||
uint32_t extradata, uint32_t original) {
|
||||
uint32_t extradata,
|
||||
uint32_t original) {
|
||||
uint32_t dived = ApplyUint32Div(num, mul, extradata);
|
||||
unsigned result = num - (dived * original);
|
||||
|
||||
|
@ -701,8 +704,7 @@ struct MagicDiv {
|
|||
return extra.info.shift_;
|
||||
}
|
||||
|
||||
constexpr uint32_t GetMultiplier() const { return multiplier_;
|
||||
}
|
||||
constexpr uint32_t GetMultiplier() const { return multiplier_; }
|
||||
constexpr uint32_t Apply(uint32_t numerator) const {
|
||||
return ApplyUint32Div(numerator, multiplier_, extradata_);
|
||||
}
|
||||
|
|
|
@ -180,7 +180,8 @@ static void vastcpy_impl_repmovs(CacheLine* XE_RESTRICT physaddr,
|
|||
__movsq((unsigned long long*)physaddr, (unsigned long long*)rdmapping,
|
||||
written_length / 8);
|
||||
#else
|
||||
memcpy((unsigned char*)physaddr, (const unsigned char*)rdmapping, written_length);
|
||||
memcpy((unsigned char*)physaddr, (const unsigned char*)rdmapping,
|
||||
written_length);
|
||||
#endif
|
||||
}
|
||||
XE_COLD
|
||||
|
@ -331,17 +332,17 @@ void copy_and_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
|
|||
|
||||
__m256i output1 = _mm256_shuffle_epi8(input1, shufmask);
|
||||
__m256i output2 = _mm256_shuffle_epi8(input2, shufmask);
|
||||
//chrispy: todo, benchmark this w/ and w/out these prefetches here on multiple machines
|
||||
//finding a good distance for prefetchw in particular is probably important
|
||||
//for when we're writing across 2 cachelines
|
||||
#if 0
|
||||
// chrispy: todo, benchmark this w/ and w/out these prefetches here on multiple
|
||||
// machines finding a good distance for prefetchw in particular is probably
|
||||
// important for when we're writing across 2 cachelines
|
||||
#if 0
|
||||
if (i + 48 <= count) {
|
||||
swcache::PrefetchNTA(&src[i + 32]);
|
||||
if (amd64::GetFeatureFlags() & amd64::kX64EmitPrefetchW) {
|
||||
swcache::PrefetchW(&dest[i + 32]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
_mm256_storeu_si256(reinterpret_cast<__m256i*>(&dest[i]), output1);
|
||||
_mm256_storeu_si256(reinterpret_cast<__m256i*>(&dest[i + 8]), output2);
|
||||
}
|
||||
|
|
|
@ -17,10 +17,8 @@
|
|||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
|
||||
#include "xenia/base/byte_order.h"
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace memory {
|
||||
|
||||
|
|
|
@ -10,8 +10,8 @@
|
|||
#ifndef XENIA_BASE_MUTEX_H_
|
||||
#define XENIA_BASE_MUTEX_H_
|
||||
#include <mutex>
|
||||
#include "platform.h"
|
||||
#include "memory.h"
|
||||
#include "platform.h"
|
||||
#define XE_ENABLE_FAST_WIN32_MUTEX 1
|
||||
namespace xe {
|
||||
|
||||
|
@ -25,7 +25,7 @@ namespace xe {
|
|||
*/
|
||||
|
||||
class alignas(4096) xe_global_mutex {
|
||||
XE_MAYBE_UNUSED
|
||||
XE_MAYBE_UNUSED
|
||||
char detail[64];
|
||||
|
||||
public:
|
||||
|
@ -39,7 +39,7 @@ class alignas(4096) xe_global_mutex {
|
|||
using global_mutex_type = xe_global_mutex;
|
||||
|
||||
class alignas(64) xe_fast_mutex {
|
||||
XE_MAYBE_UNUSED
|
||||
XE_MAYBE_UNUSED
|
||||
char detail[64];
|
||||
|
||||
public:
|
||||
|
|
|
@ -148,7 +148,7 @@
|
|||
#if XE_COMPILER_HAS_GNU_EXTENSIONS == 1
|
||||
#define XE_LIKELY_IF(...) if (XE_LIKELY(__VA_ARGS__))
|
||||
#define XE_UNLIKELY_IF(...) if (XE_UNLIKELY(__VA_ARGS__))
|
||||
#define XE_MAYBE_UNUSED __attribute__((unused))
|
||||
#define XE_MAYBE_UNUSED __attribute__((unused))
|
||||
#else
|
||||
#if __cplusplus >= 202002
|
||||
#define XE_LIKELY_IF(...) if (!!(__VA_ARGS__)) [[likely]]
|
||||
|
@ -157,7 +157,7 @@
|
|||
#define XE_LIKELY_IF(...) if (!!(__VA_ARGS__))
|
||||
#define XE_UNLIKELY_IF(...) if (!!(__VA_ARGS__))
|
||||
#endif
|
||||
#define XE_MAYBE_UNUSED
|
||||
#define XE_MAYBE_UNUSED
|
||||
#endif
|
||||
// only use __restrict if MSVC, for clang/gcc we can use -fstrict-aliasing which
|
||||
// acts as __restrict across the board todo: __restrict is part of the type
|
||||
|
|
|
@ -44,21 +44,21 @@
|
|||
ntdll versions of functions often skip through a lot of extra garbage in
|
||||
KernelBase
|
||||
*/
|
||||
#define XE_NTDLL_IMPORT(name, cls, clsvar) \
|
||||
static class cls { \
|
||||
public: \
|
||||
FARPROC fn; \
|
||||
cls() : fn(nullptr) { \
|
||||
auto ntdll = GetModuleHandleA("ntdll.dll"); \
|
||||
if (ntdll) { \
|
||||
fn = GetProcAddress(ntdll, #name); \
|
||||
} \
|
||||
} \
|
||||
template <typename TRet = void, typename... TArgs> \
|
||||
inline TRet invoke(TArgs... args) { \
|
||||
return reinterpret_cast<TRet(NTAPI*)(TArgs...)>(fn)(args...); \
|
||||
} \
|
||||
inline operator bool() const { return fn != nullptr; } \
|
||||
#define XE_NTDLL_IMPORT(name, cls, clsvar) \
|
||||
static class cls { \
|
||||
public: \
|
||||
FARPROC fn; \
|
||||
cls() : fn(nullptr) { \
|
||||
auto ntdll = GetModuleHandleA("ntdll.dll"); \
|
||||
if (ntdll) { \
|
||||
fn = GetProcAddress(ntdll, #name); \
|
||||
} \
|
||||
} \
|
||||
template <typename TRet = void, typename... TArgs> \
|
||||
inline TRet invoke(TArgs... args) { \
|
||||
return reinterpret_cast<TRet(NTAPI*)(TArgs...)>(fn)(args...); \
|
||||
} \
|
||||
inline operator bool() const { return fn != nullptr; } \
|
||||
} clsvar
|
||||
#else
|
||||
#define XE_NTDLL_IMPORT(name, cls, clsvar) static constexpr bool clsvar = false
|
||||
|
|
|
@ -68,7 +68,6 @@ class RingBuffer {
|
|||
ring_size_t offset_delta = write_offs - read_offs;
|
||||
ring_size_t wrap_read_count = (cap - read_offs) + write_offs;
|
||||
|
||||
|
||||
if (XE_LIKELY(read_offs <= write_offs)) {
|
||||
return offset_delta; // will be 0 if they are equal, semantically
|
||||
// identical to old code (i checked the asm, msvc
|
||||
|
|
|
@ -34,7 +34,6 @@ struct SimpleFreelist {
|
|||
node->next_ = head_;
|
||||
head_ = node;
|
||||
}
|
||||
void Reset() { head_ = nullptr;
|
||||
}
|
||||
void Reset() { head_ = nullptr; }
|
||||
};
|
||||
} // namespace xe
|
|
@ -906,9 +906,9 @@ class PosixEvent : public PosixConditionHandle<Event> {
|
|||
void Set() override { handle_.Signal(); }
|
||||
void Reset() override { handle_.Reset(); }
|
||||
EventInfo Query() {
|
||||
EventInfo result{};
|
||||
assert_always();
|
||||
return result;
|
||||
EventInfo result{};
|
||||
assert_always();
|
||||
return result;
|
||||
}
|
||||
void Pulse() override {
|
||||
using namespace std::chrono_literals;
|
||||
|
|
|
@ -33,7 +33,9 @@ using WaitItem = TimerQueueWaitItem;
|
|||
*/
|
||||
|
||||
/*
|
||||
edit: actually had to change it back, when i was testing it only worked because i fixed disruptorplus' code to compile (it gives wrong args to condition_variable::wait_until) but now builds
|
||||
edit: actually had to change it back, when i was testing it only worked
|
||||
because i fixed disruptorplus' code to compile (it gives wrong args to
|
||||
condition_variable::wait_until) but now builds
|
||||
|
||||
*/
|
||||
using WaitStrat = dp::blocking_wait_strategy;
|
||||
|
@ -205,7 +207,7 @@ void TimerQueueWaitItem::Disarm() {
|
|||
spinner.spin_once();
|
||||
}
|
||||
}
|
||||
//unused
|
||||
// unused
|
||||
std::weak_ptr<WaitItem> QueueTimerOnce(std::function<void(void*)> callback,
|
||||
void* userdata,
|
||||
WaitItem::clock::time_point due) {
|
||||
|
|
|
@ -78,7 +78,8 @@ class Backend {
|
|||
virtual void InitializeBackendContext(void* ctx) {}
|
||||
|
||||
/*
|
||||
Free any dynamically allocated data/resources that the backendcontext uses
|
||||
Free any dynamically allocated data/resources that the backendcontext
|
||||
uses
|
||||
*/
|
||||
virtual void DeinitializeBackendContext(void* ctx) {}
|
||||
virtual void SetGuestRoundingMode(void* ctx, unsigned int mode){};
|
||||
|
|
|
@ -314,7 +314,6 @@ SIMPLE_THREEOPERAND(vpshaw, xop_VPSHAW)
|
|||
SIMPLE_THREEOPERAND(vpshad, xop_VPSHAD)
|
||||
SIMPLE_THREEOPERAND(vpshaq, xop_VPSHAQ)
|
||||
|
||||
|
||||
SIMPLE_THREEOPERAND(vpshlb, xop_VPSHLB)
|
||||
SIMPLE_THREEOPERAND(vpshlw, xop_VPSHLW)
|
||||
SIMPLE_THREEOPERAND(vpshld, xop_VPSHLD)
|
||||
|
|
|
@ -924,7 +924,8 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
|
|||
Xbyak::Label L18, L2, L35, L4, L9, L8, L10, L11, L12, L13, L1;
|
||||
Xbyak::Label LC1, _LCPI3_1;
|
||||
Xbyak::Label handle_denormal_input;
|
||||
Xbyak::Label specialcheck_1, convert_to_signed_inf_and_ret, handle_oddball_denormal;
|
||||
Xbyak::Label specialcheck_1, convert_to_signed_inf_and_ret,
|
||||
handle_oddball_denormal;
|
||||
|
||||
auto emulate_lzcnt_helper_unary_reg = [this](auto& reg, auto& scratch_reg) {
|
||||
inLocalLabel();
|
||||
|
@ -941,19 +942,19 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
|
|||
vmovd(r8d, xmm0);
|
||||
vmovaps(xmm1, xmm0);
|
||||
mov(ecx, r8d);
|
||||
//extract mantissa
|
||||
// extract mantissa
|
||||
and_(ecx, 0x7fffff);
|
||||
mov(edx, ecx);
|
||||
cmp(r8d, 0xff800000);
|
||||
jz(specialcheck_1, CodeGenerator::T_NEAR);
|
||||
//is exponent zero?
|
||||
// is exponent zero?
|
||||
test(r8d, 0x7f800000);
|
||||
jne(L18);
|
||||
test(ecx, ecx);
|
||||
jne(L2);
|
||||
|
||||
L(L18);
|
||||
//extract biased exponent and unbias
|
||||
// extract biased exponent and unbias
|
||||
mov(r9d, r8d);
|
||||
shr(r9d, 23);
|
||||
movzx(r9d, r9b);
|
||||
|
@ -988,7 +989,7 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
|
|||
vxorps(xmm0, xmm0, xmm0);
|
||||
vcomiss(xmm0, xmm1);
|
||||
jbe(L9);
|
||||
vmovss(xmm2, ptr[rip+LC1]);
|
||||
vmovss(xmm2, ptr[rip + LC1]);
|
||||
vandps(xmm1, GetXmmConstPtr(XMMSignMaskF32));
|
||||
|
||||
test(edx, edx);
|
||||
|
@ -1019,7 +1020,7 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
|
|||
|
||||
L(L11);
|
||||
vxorps(xmm2, xmm2, xmm2);
|
||||
vmovss(xmm0, ptr[rip+LC1]);
|
||||
vmovss(xmm0, ptr[rip + LC1]);
|
||||
vcomiss(xmm2, xmm1);
|
||||
ja(L1, CodeGenerator::T_NEAR);
|
||||
mov(ecx, 127);
|
||||
|
@ -1080,7 +1081,7 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
|
|||
or_(ecx, r8d);
|
||||
or_(ecx, eax);
|
||||
vmovd(xmm0, ecx);
|
||||
vaddss(xmm0, xmm1);//apply DAZ behavior to output
|
||||
vaddss(xmm0, xmm1); // apply DAZ behavior to output
|
||||
|
||||
L(L1);
|
||||
ret();
|
||||
|
@ -1107,7 +1108,8 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
|
|||
xchg(ecx, edx);
|
||||
// esi is just the value of xmm0's low word, so we can restore it from there
|
||||
shl(r8d, cl);
|
||||
mov(ecx, edx); // restore ecx, dont xchg because we're going to spoil edx anyway
|
||||
mov(ecx,
|
||||
edx); // restore ecx, dont xchg because we're going to spoil edx anyway
|
||||
mov(edx, r8d);
|
||||
vmovd(r8d, xmm0);
|
||||
}
|
||||
|
@ -1115,8 +1117,8 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
|
|||
jmp(L4);
|
||||
|
||||
L(specialcheck_1);
|
||||
//should be extremely rare
|
||||
vmovss(xmm0, ptr[rip+LC1]);
|
||||
// should be extremely rare
|
||||
vmovss(xmm0, ptr[rip + LC1]);
|
||||
ret();
|
||||
|
||||
L(handle_oddball_denormal);
|
||||
|
@ -1131,7 +1133,8 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
|
|||
dd(0xFF800000);
|
||||
dd(0x7F800000);
|
||||
L(LC1);
|
||||
//the position of 7FC00000 here matters, this address will be indexed in handle_oddball_denormal
|
||||
// the position of 7FC00000 here matters, this address will be indexed in
|
||||
// handle_oddball_denormal
|
||||
dd(0x7FC00000);
|
||||
dd(0x5F34FD00);
|
||||
|
||||
|
@ -1148,11 +1151,13 @@ void* X64HelperEmitter::EmitVectorVRsqrteHelper(void* scalar_helper) {
|
|||
Xbyak::Label check_scalar_operation_in_vmx, actual_vector_version;
|
||||
auto result_ptr =
|
||||
GetBackendCtxPtr(offsetof(X64BackendContext, helper_scratch_xmms[0]));
|
||||
auto counter_ptr = GetBackendCtxPtr(offsetof(X64BackendContext, helper_scratch_u64s[2]));
|
||||
auto counter_ptr =
|
||||
GetBackendCtxPtr(offsetof(X64BackendContext, helper_scratch_u64s[2]));
|
||||
counter_ptr.setBit(64);
|
||||
|
||||
//shuffle and xor to check whether all lanes are equal
|
||||
//sadly has to leave the float pipeline for the vptest, which is moderate yikes
|
||||
// shuffle and xor to check whether all lanes are equal
|
||||
// sadly has to leave the float pipeline for the vptest, which is moderate
|
||||
// yikes
|
||||
vmovhlps(xmm2, xmm0, xmm0);
|
||||
vmovsldup(xmm1, xmm0);
|
||||
vxorps(xmm1, xmm1, xmm0);
|
||||
|
@ -1160,7 +1165,7 @@ void* X64HelperEmitter::EmitVectorVRsqrteHelper(void* scalar_helper) {
|
|||
vorps(xmm2, xmm1, xmm2);
|
||||
vptest(xmm2, xmm2);
|
||||
jnz(check_scalar_operation_in_vmx);
|
||||
//jmp(scalar_helper, CodeGenerator::T_NEAR);
|
||||
// jmp(scalar_helper, CodeGenerator::T_NEAR);
|
||||
call(scalar_helper);
|
||||
vshufps(xmm0, xmm0, xmm0, 0);
|
||||
ret();
|
||||
|
@ -1169,7 +1174,7 @@ void* X64HelperEmitter::EmitVectorVRsqrteHelper(void* scalar_helper) {
|
|||
|
||||
vptest(xmm0, ptr[backend()->LookupXMMConstantAddress(XMMThreeFloatMask)]);
|
||||
jnz(actual_vector_version);
|
||||
vshufps(xmm0, xmm0,xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
vshufps(xmm0, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
call(scalar_helper);
|
||||
// this->DebugBreak();
|
||||
vinsertps(xmm0, xmm0, (3 << 4));
|
||||
|
@ -1189,11 +1194,11 @@ void* X64HelperEmitter::EmitVectorVRsqrteHelper(void* scalar_helper) {
|
|||
|
||||
L(loop);
|
||||
lea(rax, result_ptr);
|
||||
vmovss(xmm0, ptr[rax+rcx*4]);
|
||||
vmovss(xmm0, ptr[rax + rcx * 4]);
|
||||
call(scalar_helper);
|
||||
mov(rcx, counter_ptr);
|
||||
lea(rax, result_ptr);
|
||||
vmovss(ptr[rax+rcx*4], xmm0);
|
||||
vmovss(ptr[rax + rcx * 4], xmm0);
|
||||
inc(ecx);
|
||||
cmp(ecx, 4);
|
||||
mov(counter_ptr, rcx);
|
||||
|
@ -1274,7 +1279,7 @@ void* X64HelperEmitter::EmitFrsqrteHelper() {
|
|||
xor_(eax, 8);
|
||||
sub(edx, ecx);
|
||||
lea(rcx, ptr[rip + frsqrte_table2]);
|
||||
movzx(eax, byte[rax+rcx]);
|
||||
movzx(eax, byte[rax + rcx]);
|
||||
sal(rdx, 52);
|
||||
sal(rax, 44);
|
||||
or_(rax, rdx);
|
||||
|
|
|
@ -12,8 +12,8 @@
|
|||
|
||||
#include <memory>
|
||||
|
||||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/base/bit_map.h"
|
||||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/cpu/backend/backend.h"
|
||||
|
||||
#if XE_PLATFORM_WIN32 == 1
|
||||
|
@ -44,9 +44,10 @@ typedef void* (*GuestToHostThunk)(void* target, void* arg0, void* arg1);
|
|||
typedef void (*ResolveFunctionThunk)();
|
||||
|
||||
/*
|
||||
place guest trampolines in the memory range that the HV normally occupies.
|
||||
This way guests can call in via the indirection table and we don't have to clobber/reuse an existing memory range
|
||||
The xboxkrnl range is already used by export trampolines (see kernel/kernel_module.cc)
|
||||
place guest trampolines in the memory range that the HV normally occupies.
|
||||
This way guests can call in via the indirection table and we don't have to
|
||||
clobber/reuse an existing memory range The xboxkrnl range is already used by
|
||||
export trampolines (see kernel/kernel_module.cc)
|
||||
*/
|
||||
static constexpr uint32_t GUEST_TRAMPOLINE_BASE = 0x80000000;
|
||||
static constexpr uint32_t GUEST_TRAMPOLINE_END = 0x80040000;
|
||||
|
@ -75,11 +76,13 @@ struct X64BackendStackpoint {
|
|||
// use
|
||||
unsigned guest_return_address_;
|
||||
};
|
||||
enum : uint32_t {
|
||||
kX64BackendMXCSRModeBit = 0,
|
||||
kX64BackendHasReserveBit = 1,
|
||||
kX64BackendNJMOn = 2, //non-java mode bit is currently set. for use in software fp routines
|
||||
kX64BackendNonIEEEMode = 3, //non-ieee mode is currently enabled for scalar fpu.
|
||||
enum : uint32_t {
|
||||
kX64BackendMXCSRModeBit = 0,
|
||||
kX64BackendHasReserveBit = 1,
|
||||
kX64BackendNJMOn =
|
||||
2, // non-java mode bit is currently set. for use in software fp routines
|
||||
kX64BackendNonIEEEMode =
|
||||
3, // non-ieee mode is currently enabled for scalar fpu.
|
||||
};
|
||||
// located prior to the ctx register
|
||||
// some things it would be nice to have be per-emulator instance instead of per
|
||||
|
@ -170,8 +173,8 @@ class X64Backend : public Backend {
|
|||
reinterpret_cast<intptr_t>(ctx) - sizeof(X64BackendContext));
|
||||
}
|
||||
virtual uint32_t CreateGuestTrampoline(GuestTrampolineProc proc,
|
||||
void* userdata1,
|
||||
void* userdata2, bool long_term) override;
|
||||
void* userdata1, void* userdata2,
|
||||
bool long_term) override;
|
||||
|
||||
virtual void FreeGuestTrampoline(uint32_t trampoline_addr) override;
|
||||
virtual void SetGuestRoundingMode(void* ctx, unsigned int mode) override;
|
||||
|
@ -213,6 +216,7 @@ class X64Backend : public Backend {
|
|||
void* vrsqrtefp_vector_helper = nullptr;
|
||||
void* vrsqrtefp_scalar_helper = nullptr;
|
||||
void* frsqrtefp_helper = nullptr;
|
||||
|
||||
private:
|
||||
#if XE_X64_PROFILER_AVAILABLE == 1
|
||||
GuestProfilerData profiler_data_;
|
||||
|
|
|
@ -93,7 +93,8 @@ class X64CodeCache : public CodeCache {
|
|||
// This is picked to be high enough to cover whatever we can reasonably
|
||||
// expect. If we hit issues with this it probably means some corner case
|
||||
// in analysis triggering.
|
||||
//chrispy: raised this, some games that were compiled with low optimization levels can exceed this
|
||||
// chrispy: raised this, some games that were compiled with low optimization
|
||||
// levels can exceed this
|
||||
static const size_t kMaximumFunctionCount = 1000000;
|
||||
|
||||
struct UnwindReservation {
|
||||
|
|
|
@ -213,7 +213,8 @@ Win32X64CodeCache::RequestUnwindReservation(uint8_t* entry_address) {
|
|||
if (unwind_table_count_ >= kMaximumFunctionCount) {
|
||||
// we should not just be ignoring this in release if it happens
|
||||
xe::FatalError(
|
||||
"Unwind table count (unwind_table_count_) exceeded maximum! Please report this to "
|
||||
"Unwind table count (unwind_table_count_) exceeded maximum! Please "
|
||||
"report this to "
|
||||
"Xenia/Canary developers");
|
||||
}
|
||||
#else
|
||||
|
|
|
@ -210,24 +210,27 @@ bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
|
|||
// Adding or changing anything here must be matched!
|
||||
|
||||
/*
|
||||
pick a page to use as the local base as close to the commonly accessed page that contains most backend fields
|
||||
the sizes that are checked are chosen based on PTE coalescing sizes. zen does 16k or 32k
|
||||
pick a page to use as the local base as close to the commonly accessed page
|
||||
that contains most backend fields the sizes that are checked are chosen
|
||||
based on PTE coalescing sizes. zen does 16k or 32k
|
||||
*/
|
||||
size_t stack_size = StackLayout::GUEST_STACK_SIZE;
|
||||
if (stack_offset < (4096 - sizeof(X64BackendContext))) {
|
||||
locals_page_delta_ = 4096;
|
||||
} else if (stack_offset < (16384 - sizeof(X64BackendContext))) {//16k PTE coalescing
|
||||
} else if (stack_offset <
|
||||
(16384 - sizeof(X64BackendContext))) { // 16k PTE coalescing
|
||||
locals_page_delta_ = 16384;
|
||||
} else if (stack_offset < (32768 - sizeof(X64BackendContext))) {
|
||||
locals_page_delta_ = 32768;
|
||||
} else if (stack_offset < (65536 - sizeof(X64BackendContext))) {
|
||||
locals_page_delta_ = 65536;
|
||||
} else {
|
||||
//extremely unlikely, fall back to stack
|
||||
stack_size = xe::align<size_t>(StackLayout::GUEST_STACK_SIZE + stack_offset, 16);
|
||||
// extremely unlikely, fall back to stack
|
||||
stack_size =
|
||||
xe::align<size_t>(StackLayout::GUEST_STACK_SIZE + stack_offset, 16);
|
||||
locals_page_delta_ = 0;
|
||||
}
|
||||
|
||||
|
||||
assert_true((stack_size + 8) % 16 == 0);
|
||||
func_info.stack_size = stack_size;
|
||||
stack_size_ = stack_size;
|
||||
|
@ -1002,7 +1005,7 @@ static inline vec128_t v128_setr_bytes(unsigned char v0, unsigned char v1,
|
|||
}
|
||||
|
||||
static inline vec128_t v128_setr_words(uint32_t v0, uint32_t v1, uint32_t v2,
|
||||
uint32_t v3) {
|
||||
uint32_t v3) {
|
||||
vec128_t result;
|
||||
result.u32[0] = v0;
|
||||
result.u32[1] = v1;
|
||||
|
@ -1181,7 +1184,7 @@ static const vec128_t xmm_consts[] = {
|
|||
v128_setr_bytes(13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 0x80),
|
||||
// XMMVSRMask
|
||||
vec128b(1),
|
||||
//XMMVRsqrteTableStart
|
||||
// XMMVRsqrteTableStart
|
||||
v128_setr_words(0x568B4FD, 0x4F3AF97, 0x48DAAA5, 0x435A618),
|
||||
v128_setr_words(0x3E7A1E4, 0x3A29DFE, 0x3659A5C, 0x32E96F8),
|
||||
v128_setr_words(0x2FC93CA, 0x2D090CE, 0x2A88DFE, 0x2838B57),
|
||||
|
@ -1190,8 +1193,8 @@ static const vec128_t xmm_consts[] = {
|
|||
v128_setr_words(0x2C27279, 0x2926FB7, 0x2666D26, 0x23F6AC0),
|
||||
v128_setr_words(0x21D6881, 0x1FD6665, 0x1E16468, 0x1C76287),
|
||||
v128_setr_words(0x1AF60C1, 0x1995F12, 0x1855D79, 0x1735BF4),
|
||||
//XMMVRsqrteTableBase
|
||||
vec128i(0) //filled in later
|
||||
// XMMVRsqrteTableBase
|
||||
vec128i(0) // filled in later
|
||||
};
|
||||
|
||||
void* X64Emitter::FindByteConstantOffset(unsigned bytevalue) {
|
||||
|
@ -1267,12 +1270,13 @@ uintptr_t X64Emitter::PlaceConstData() {
|
|||
|
||||
std::memcpy(mem, xmm_consts, sizeof(xmm_consts));
|
||||
/*
|
||||
set each 32-bit element of the constant XMMVRsqrteTableBase to be the address of the start of the constant XMMVRsqrteTableStart
|
||||
this
|
||||
set each 32-bit element of the constant XMMVRsqrteTableBase to be the
|
||||
address of the start of the constant XMMVRsqrteTableStart this
|
||||
*/
|
||||
vec128_t* deferred_constants = reinterpret_cast<vec128_t*>(mem);
|
||||
vec128_t* vrsqrte_table_base = &deferred_constants[XMMVRsqrteTableBase];
|
||||
uint32_t ptr_to_vrsqrte_table32 = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(&deferred_constants[XMMVRsqrteTableStart]));
|
||||
uint32_t ptr_to_vrsqrte_table32 = static_cast<uint32_t>(
|
||||
reinterpret_cast<uintptr_t>(&deferred_constants[XMMVRsqrteTableStart]));
|
||||
*vrsqrte_table_base = vec128i(ptr_to_vrsqrte_table32);
|
||||
|
||||
memory::Protect(mem, kConstDataSize, memory::PageAccess::kReadOnly, nullptr);
|
||||
|
@ -1288,8 +1292,10 @@ void X64Emitter::FreeConstData(uintptr_t data) {
|
|||
Xbyak::Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
||||
// Load through fixed constant table setup by PlaceConstData.
|
||||
// It's important that the pointer is not signed, as it will be sign-extended.
|
||||
void* emitter_data_ptr = backend_->LookupXMMConstantAddress(static_cast<unsigned>(id));
|
||||
xenia_assert(reinterpret_cast<uintptr_t>(emitter_data_ptr) < (1ULL << 31));//must not have signbit set
|
||||
void* emitter_data_ptr =
|
||||
backend_->LookupXMMConstantAddress(static_cast<unsigned>(id));
|
||||
xenia_assert(reinterpret_cast<uintptr_t>(emitter_data_ptr) <
|
||||
(1ULL << 31)); // must not have signbit set
|
||||
return ptr[emitter_data_ptr];
|
||||
}
|
||||
// Implies possible StashXmm(0, ...)!
|
||||
|
@ -1610,8 +1616,8 @@ SimdDomain X64Emitter::DeduceSimdDomain(const hir::Value* for_value) {
|
|||
|
||||
return SimdDomain::DONTCARE;
|
||||
}
|
||||
Xbyak::RegExp X64Emitter::GetLocalsBase() const {
|
||||
return !locals_page_delta_ ? rsp : GetContextReg() - locals_page_delta_;
|
||||
Xbyak::RegExp X64Emitter::GetLocalsBase() const {
|
||||
return !locals_page_delta_ ? rsp : GetContextReg() - locals_page_delta_;
|
||||
}
|
||||
Xbyak::Address X64Emitter::GetBackendCtxPtr(int offset_in_x64backendctx) const {
|
||||
/*
|
||||
|
|
|
@ -176,7 +176,10 @@ enum XmmConst {
|
|||
XMMVSRShlByteshuf,
|
||||
XMMVSRMask,
|
||||
XMMVRsqrteTableStart,
|
||||
XMMVRsqrteTableBase = XMMVRsqrteTableStart + (32 / 4), //32 4-byte elements in table, 4 4-byte elements fit in each xmm
|
||||
XMMVRsqrteTableBase =
|
||||
XMMVRsqrteTableStart +
|
||||
(32 /
|
||||
4), // 32 4-byte elements in table, 4 4-byte elements fit in each xmm
|
||||
|
||||
};
|
||||
using amdfx::xopcompare_e;
|
||||
|
@ -311,7 +314,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
size_t stack_size() const { return stack_size_; }
|
||||
Xbyak::RegExp GetLocalsBase() const;
|
||||
SimdDomain DeduceSimdDomain(const hir::Value* for_value);
|
||||
|
||||
|
||||
void ForgetMxcsrMode() { mxcsr_mode_ = MXCSRMode::Unknown; }
|
||||
/*
|
||||
returns true if had to load mxcsr. DOT_PRODUCT can use this to skip
|
||||
|
@ -390,6 +393,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
void EmitGetCurrentThreadId();
|
||||
void EmitTraceUserCallReturn();
|
||||
static void HandleStackpointOverflowError(ppc::PPCContext* context);
|
||||
|
||||
protected:
|
||||
Processor* processor_ = nullptr;
|
||||
X64Backend* backend_ = nullptr;
|
||||
|
|
|
@ -398,8 +398,7 @@ struct I<OPCODE, DEST, SRC1, SRC2, SRC3> : DestField<DEST> {
|
|||
};
|
||||
|
||||
template <typename T>
|
||||
XE_MAYBE_UNUSED
|
||||
static const T GetTempReg(X64Emitter& e);
|
||||
XE_MAYBE_UNUSED static const T GetTempReg(X64Emitter& e);
|
||||
template <>
|
||||
XE_MAYBE_UNUSED const Reg8 GetTempReg<Reg8>(X64Emitter& e) {
|
||||
return e.al;
|
||||
|
|
|
@ -705,7 +705,8 @@ struct STORE_LOCAL_I16
|
|||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
// e.TraceStoreI16(DATA_LOCAL, i.src1.constant, i.src2);
|
||||
if (LocalStoreMayUseMembaseLow(e, i)) {
|
||||
e.mov(e.word[e.GetLocalsBase() + i.src1.constant()], e.GetMembaseReg().cvt16());
|
||||
e.mov(e.word[e.GetLocalsBase() + i.src1.constant()],
|
||||
e.GetMembaseReg().cvt16());
|
||||
} else {
|
||||
e.mov(e.word[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||
}
|
||||
|
@ -716,7 +717,8 @@ struct STORE_LOCAL_I32
|
|||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
// e.TraceStoreI32(DATA_LOCAL, i.src1.constant, i.src2);
|
||||
if (LocalStoreMayUseMembaseLow(e, i)) {
|
||||
e.mov(e.dword[e.GetLocalsBase() + i.src1.constant()], e.GetMembaseReg().cvt32());
|
||||
e.mov(e.dword[e.GetLocalsBase() + i.src1.constant()],
|
||||
e.GetMembaseReg().cvt32());
|
||||
} else {
|
||||
e.mov(e.dword[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||
}
|
||||
|
|
|
@ -2120,9 +2120,9 @@ struct RSQRT_V128 : Sequence<RSQRT_V128, I<OPCODE_RSQRT, V128Op, V128Op>> {
|
|||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm3);
|
||||
/*
|
||||
the vast majority of inputs to vrsqrte come from vmsum3 or vmsum4 as part
|
||||
of a vector normalization sequence. in fact, its difficult to find uses of vrsqrte in titles
|
||||
that have inputs which do not come from vmsum.
|
||||
the vast majority of inputs to vrsqrte come from vmsum3 or vmsum4 as
|
||||
part of a vector normalization sequence. in fact, its difficult to find
|
||||
uses of vrsqrte in titles that have inputs which do not come from vmsum.
|
||||
*/
|
||||
if (i.src1.value && i.src1.value->AllFloatVectorLanesSameValue()) {
|
||||
e.vmovss(e.xmm0, src1);
|
||||
|
@ -3193,8 +3193,7 @@ struct SET_ROUNDING_MODE_I32
|
|||
|
||||
if (constant_value & 4) {
|
||||
e.or_(flags_ptr, 1U << kX64BackendNonIEEEMode);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
e.btr(flags_ptr, kX64BackendNonIEEEMode);
|
||||
}
|
||||
e.mov(e.dword[e.rsp + StackLayout::GUEST_SCRATCH], e.eax);
|
||||
|
@ -3202,14 +3201,14 @@ struct SET_ROUNDING_MODE_I32
|
|||
e.vldmxcsr(e.dword[e.rsp + StackLayout::GUEST_SCRATCH]);
|
||||
|
||||
} else {
|
||||
//can andnot, but this is a very infrequently used opcode
|
||||
// can andnot, but this is a very infrequently used opcode
|
||||
e.mov(e.eax, 1U << kX64BackendNonIEEEMode);
|
||||
e.mov(e.edx, e.eax);
|
||||
e.not_(e.edx);
|
||||
e.mov(e.ecx, flags_ptr);
|
||||
//edx = flags w/ non ieee cleared
|
||||
// edx = flags w/ non ieee cleared
|
||||
e.and_(e.edx, e.ecx);
|
||||
//eax = flags w/ non ieee set
|
||||
// eax = flags w/ non ieee set
|
||||
e.or_(e.eax, e.ecx);
|
||||
e.bt(i.src1, 2);
|
||||
|
||||
|
|
|
@ -122,10 +122,12 @@ class StackLayout {
|
|||
*
|
||||
*/
|
||||
static const size_t GUEST_STACK_SIZE = 104;
|
||||
//was GUEST_CTX_HOME, can't remove because that'd throw stack alignment off. instead, can be used as a temporary in sequences
|
||||
// was GUEST_CTX_HOME, can't remove because that'd throw stack alignment off.
|
||||
// instead, can be used as a temporary in sequences
|
||||
static const size_t GUEST_SCRATCH = 0;
|
||||
|
||||
//when profiling is on, this stores the nanosecond time at the start of the function
|
||||
|
||||
// when profiling is on, this stores the nanosecond time at the start of the
|
||||
// function
|
||||
static const size_t GUEST_PROFILER_START = 80;
|
||||
static const size_t GUEST_RET_ADDR = 88;
|
||||
static const size_t GUEST_CALL_RET_ADDR = 96;
|
||||
|
|
|
@ -29,15 +29,14 @@ namespace x64 {
|
|||
|
||||
bool trace_enabled = true;
|
||||
|
||||
#define THREAD_MATCH \
|
||||
(!TARGET_THREAD || ppc_context->thread_id == TARGET_THREAD)
|
||||
#define THREAD_MATCH (!TARGET_THREAD || ppc_context->thread_id == TARGET_THREAD)
|
||||
#define IFLUSH()
|
||||
#define IPRINT(s) \
|
||||
if (trace_enabled && THREAD_MATCH) \
|
||||
xe::logging::AppendLogLine(xe::LogLevel::Debug, 't', s, xe::LogSrc::Cpu)
|
||||
#define DFLUSH()
|
||||
#define DPRINT(...) \
|
||||
if (trace_enabled && THREAD_MATCH) \
|
||||
#define DPRINT(...) \
|
||||
if (trace_enabled && THREAD_MATCH) \
|
||||
xe::logging::AppendLogLineFormat(xe::LogSrc::Cpu, xe::LogLevel::Debug, 't', \
|
||||
__VA_ARGS__)
|
||||
|
||||
|
|
|
@ -429,7 +429,7 @@ bool RegisterAllocationPass::SpillOneRegister(HIRBuilder* builder, Block* block,
|
|||
|
||||
// Set the local slot of the new value to our existing one. This way we will
|
||||
// reuse that same memory if needed.
|
||||
new_value->SetLocalSlot( spill_value->GetLocalSlot());
|
||||
new_value->SetLocalSlot(spill_value->GetLocalSlot());
|
||||
|
||||
// Rename all future uses of the SSA value to the new value as loaded
|
||||
// from the local.
|
||||
|
|
|
@ -1372,27 +1372,27 @@ bool SimplificationPass::SimplifyVectorOps(hir::Instr* i,
|
|||
}
|
||||
|
||||
/*
|
||||
splatting a 32-bit value extracted from a vector where all 4 32-bit values are the same should be eliminated and
|
||||
instead use the vector extracted from, which will be identical
|
||||
have seen this happen, some games vmsum and then splat the low float to all 4 floats, even though it already is there
|
||||
splatting a 32-bit value extracted from a vector where all 4 32-bit values
|
||||
are the same should be eliminated and instead use the vector extracted from,
|
||||
which will be identical have seen this happen, some games vmsum and then
|
||||
splat the low float to all 4 floats, even though it already is there
|
||||
*/
|
||||
if (opc == OPCODE_SPLAT) {
|
||||
if (i->dest->type == VEC128_TYPE) {
|
||||
auto splatted_value = i->src1.value;
|
||||
auto splat_type = splatted_value->type;
|
||||
if (splat_type == FLOAT32_TYPE || splat_type == INT32_TYPE) {
|
||||
//its a splat of a fourbyte value, check the definition
|
||||
// its a splat of a fourbyte value, check the definition
|
||||
auto splat_input_definition = splatted_value->GetDefSkipAssigns();
|
||||
if (splat_input_definition) {
|
||||
auto defining_opcode = splat_input_definition->GetOpcodeNum();
|
||||
if (defining_opcode == OPCODE_EXTRACT) {
|
||||
auto value_extracted_from = splat_input_definition->src1.value;
|
||||
if (value_extracted_from->type == VEC128_TYPE) {
|
||||
|
||||
xenia_assert(splat_input_definition->dest->type == splat_type);
|
||||
|
||||
if (value_extracted_from->AllFloatVectorLanesSameValue()) {
|
||||
i->Replace(&OPCODE_ASSIGN_info,0);
|
||||
i->Replace(&OPCODE_ASSIGN_info, 0);
|
||||
i->set_src1(value_extracted_from);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ class SimplificationPass : public ConditionalGroupSubpass {
|
|||
// handles simple multiplication/addition rules
|
||||
bool SimplifyBasicArith(hir::HIRBuilder* builder);
|
||||
|
||||
bool SimplifyVectorOps(hir::HIRBuilder* builder);
|
||||
bool SimplifyVectorOps(hir::HIRBuilder* builder);
|
||||
bool SimplifyVectorOps(hir::Instr* i, hir::HIRBuilder* builder);
|
||||
bool SimplifyBasicArith(hir::Instr* i, hir::HIRBuilder* builder);
|
||||
bool SimplifyAddWithSHL(hir::Instr* i, hir::HIRBuilder* builder);
|
||||
|
|
|
@ -49,7 +49,7 @@ class EntryTable {
|
|||
xe::global_critical_region global_critical_region_;
|
||||
// TODO(benvanik): replace with a better data structure.
|
||||
xe::split_map<uint32_t, Entry*> map_;
|
||||
//std::unordered_map<uint32_t, Entry*> map_;
|
||||
// std::unordered_map<uint32_t, Entry*> map_;
|
||||
};
|
||||
|
||||
} // namespace cpu
|
||||
|
|
|
@ -95,7 +95,6 @@ class Export {
|
|||
uint32_t variable_ptr;
|
||||
|
||||
struct {
|
||||
|
||||
// Trampoline that is called from the guest-to-host thunk.
|
||||
// Expects only PPC context as first arg.
|
||||
ExportTrampoline trampoline;
|
||||
|
|
|
@ -115,7 +115,6 @@ uintptr_t GuestFunction::MapGuestAddressToMachineCode(
|
|||
return reinterpret_cast<uintptr_t>(machine_code()) + entry->code_offset;
|
||||
} else {
|
||||
return 0;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -79,10 +79,11 @@ class Instr {
|
|||
void MoveBefore(Instr* other);
|
||||
void Replace(const OpcodeInfo* new_opcode, uint16_t new_flags);
|
||||
void UnlinkAndNOP();
|
||||
//chrispy: wanted to change this one to Remove, but i changed Remove's name to UnlinkAndNOP,
|
||||
//so if changes happened in master that we wanted to port over, and those changes used Remove, then we would have a lot of issues that the cause of would
|
||||
//be difficult to track
|
||||
//^todo: rework this comment, im frazzled
|
||||
// chrispy: wanted to change this one to Remove, but i changed Remove's name
|
||||
// to UnlinkAndNOP, so if changes happened in master that we wanted to port
|
||||
// over, and those changes used Remove, then we would have a lot of issues
|
||||
// that the cause of would be difficult to track ^todo: rework this comment,
|
||||
// im frazzled
|
||||
void Deallocate();
|
||||
const OpcodeInfo* GetOpcodeInfo() const { return opcode; }
|
||||
// if opcode is null, we have bigger problems
|
||||
|
|
|
@ -30,9 +30,7 @@ class Label {
|
|||
// this will later be used as an input to xbyak. xbyak only accepts
|
||||
// std::string as a value, not passed by reference, so precomputing the
|
||||
// stringification does not help
|
||||
std::string GetIdString() {
|
||||
return std::to_string(id);
|
||||
}
|
||||
std::string GetIdString() { return std::to_string(id); }
|
||||
};
|
||||
|
||||
} // namespace hir
|
||||
|
|
|
@ -43,7 +43,7 @@ void Value::RemoveUse(Use* use) {
|
|||
use->next->prev = use->prev;
|
||||
}
|
||||
|
||||
//HIRBuilder::GetCurrent()->DeallocateUse(use);
|
||||
// HIRBuilder::GetCurrent()->DeallocateUse(use);
|
||||
}
|
||||
|
||||
uint32_t Value::AsUint32() {
|
||||
|
@ -1805,7 +1805,7 @@ bool Value::AllUsesByOneInsn() const {
|
|||
return true;
|
||||
}
|
||||
bool Value::AllFloatVectorLanesSameValue(const hir::Value* for_value,
|
||||
uint32_t current_depth) {
|
||||
uint32_t current_depth) {
|
||||
// limit recursion, otherwise this function will slow down emission
|
||||
if (current_depth == 16) {
|
||||
return false;
|
||||
|
@ -1819,7 +1819,8 @@ re_enter:
|
|||
xenia_assert(for_value->IsConstant());
|
||||
|
||||
auto&& constant_value = for_value->constant.v128;
|
||||
for (unsigned constant_lane_index = 1; constant_lane_index < 4; ++constant_lane_index) {
|
||||
for (unsigned constant_lane_index = 1; constant_lane_index < 4;
|
||||
++constant_lane_index) {
|
||||
if (constant_value.u32[0] != constant_value.u32[constant_lane_index]) {
|
||||
return false;
|
||||
}
|
||||
|
@ -1844,9 +1845,10 @@ re_enter:
|
|||
definition_opcode_number == OPCODE_DOT_PRODUCT_3) {
|
||||
return true;
|
||||
}
|
||||
//if splat of 32-bit value type, return true
|
||||
//technically a splat of int16 or int8 would also produce the same "float" in all lanes
|
||||
//but i think its best to keep this function focused on specifically float data
|
||||
// if splat of 32-bit value type, return true
|
||||
// technically a splat of int16 or int8 would also produce the same "float" in
|
||||
// all lanes but i think its best to keep this function focused on
|
||||
// specifically float data
|
||||
if (definition_opcode_number == OPCODE_SPLAT) {
|
||||
if (definition->dest->type == VEC128_TYPE) {
|
||||
auto splat_src_value_type = definition->src1.value->type;
|
||||
|
@ -1857,33 +1859,32 @@ re_enter:
|
|||
}
|
||||
}
|
||||
|
||||
switch (definition_opcode_number) {
|
||||
//all of these opcodes produce the same value for the same input
|
||||
case OPCODE_RSQRT:
|
||||
case OPCODE_RECIP:
|
||||
case OPCODE_POW2:
|
||||
case OPCODE_LOG2:
|
||||
switch (definition_opcode_number) {
|
||||
// all of these opcodes produce the same value for the same input
|
||||
case OPCODE_RSQRT:
|
||||
case OPCODE_RECIP:
|
||||
case OPCODE_POW2:
|
||||
case OPCODE_LOG2:
|
||||
for_value = definition->src1.value;
|
||||
goto re_enter;
|
||||
|
||||
//binary opcodes
|
||||
case OPCODE_ADD:
|
||||
case OPCODE_SUB:
|
||||
case OPCODE_MUL:
|
||||
// binary opcodes
|
||||
case OPCODE_ADD:
|
||||
case OPCODE_SUB:
|
||||
case OPCODE_MUL:
|
||||
if (!AllFloatVectorLanesSameValue(definition->src1.value,
|
||||
current_depth + 1)) {
|
||||
return false;
|
||||
}
|
||||
for_value = definition->src2.value;
|
||||
goto re_enter;
|
||||
default:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
} // namespace hir
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -621,6 +621,7 @@ class Value {
|
|||
bool AllFloatVectorLanesSameValue() const {
|
||||
return Value::AllFloatVectorLanesSameValue(this);
|
||||
}
|
||||
|
||||
private:
|
||||
/*
|
||||
returns true if for_value (which must be VEC128_TYPE) has the same value in
|
||||
|
|
|
@ -48,7 +48,9 @@ class MMIOHandler {
|
|||
typedef uint32_t (*HostToGuestVirtual)(const void* context,
|
||||
const void* host_address);
|
||||
typedef bool (*AccessViolationCallback)(
|
||||
global_unique_lock_type global_lock_locked_once, //not passed by reference with const like the others?
|
||||
global_unique_lock_type
|
||||
global_lock_locked_once, // not passed by reference with const like
|
||||
// the others?
|
||||
void* context, void* host_address, bool is_write);
|
||||
|
||||
// access_violation_callback is called with global_critical_region locked once
|
||||
|
|
|
@ -55,6 +55,7 @@ class Module {
|
|||
bool ReadMap(const char* file_name);
|
||||
|
||||
virtual void Precompile() {}
|
||||
|
||||
protected:
|
||||
virtual std::unique_ptr<Function> CreateFunction(uint32_t address) = 0;
|
||||
|
||||
|
|
|
@ -375,11 +375,11 @@ typedef struct alignas(64) PPCContext_s {
|
|||
|
||||
// Most frequently used registers first.
|
||||
|
||||
uint64_t r[32]; // 0x20 General purpose registers
|
||||
uint64_t ctr; // 0x18 Count register
|
||||
uint64_t lr; // 0x10 Link register
|
||||
uint64_t r[32]; // 0x20 General purpose registers
|
||||
uint64_t ctr; // 0x18 Count register
|
||||
uint64_t lr; // 0x10 Link register
|
||||
|
||||
uint64_t msr; //machine state register
|
||||
uint64_t msr; // machine state register
|
||||
|
||||
double f[32]; // 0x120 Floating-point registers
|
||||
vec128_t v[128]; // 0x220 VMX128 vector registers
|
||||
|
|
|
@ -46,7 +46,7 @@ struct PPCDecodeData {
|
|||
uint32_t LEV() const { return bits_.LEV; }
|
||||
|
||||
private:
|
||||
XE_MAYBE_UNUSED
|
||||
XE_MAYBE_UNUSED
|
||||
uint32_t address_;
|
||||
union {
|
||||
uint32_t value_;
|
||||
|
|
|
@ -132,23 +132,23 @@ int InstrEmit_branch(PPCHIRBuilder& f, const char* src, uint64_t cia,
|
|||
#else
|
||||
{
|
||||
#endif
|
||||
// Jump to pointer.
|
||||
bool likely_return = !lk && nia_is_lr;
|
||||
if (likely_return) {
|
||||
call_flags |= CALL_POSSIBLE_RETURN;
|
||||
}
|
||||
if (cond) {
|
||||
if (!expect_true) {
|
||||
cond = f.IsFalse(cond);
|
||||
// Jump to pointer.
|
||||
bool likely_return = !lk && nia_is_lr;
|
||||
if (likely_return) {
|
||||
call_flags |= CALL_POSSIBLE_RETURN;
|
||||
}
|
||||
if (cond) {
|
||||
if (!expect_true) {
|
||||
cond = f.IsFalse(cond);
|
||||
}
|
||||
f.CallIndirectTrue(cond, nia, call_flags);
|
||||
} else {
|
||||
f.CallIndirect(nia, call_flags);
|
||||
}
|
||||
f.CallIndirectTrue(cond, nia, call_flags);
|
||||
} else {
|
||||
f.CallIndirect(nia, call_flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
} // namespace ppc
|
||||
|
||||
int InstrEmit_bx(PPCHIRBuilder& f, const InstrData& i) {
|
||||
|
@ -789,9 +789,8 @@ int InstrEmit_mtspr(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// code requires it. Sequences of mtmsr/lwar/stcw/mtmsr come up a lot, and
|
||||
// without the lock here threads can livelock.
|
||||
|
||||
|
||||
//0x400 = debug singlestep i think
|
||||
//ive seen 0x8000 used in kernel code
|
||||
// 0x400 = debug singlestep i think
|
||||
// ive seen 0x8000 used in kernel code
|
||||
int InstrEmit_mfmsr(PPCHIRBuilder& f, const InstrData& i) {
|
||||
// bit 48 = EE; interrupt enabled
|
||||
// bit 62 = RI; recoverable interrupt
|
||||
|
@ -806,7 +805,7 @@ int InstrEmit_mtmsr(PPCHIRBuilder& f, const InstrData& i) {
|
|||
}
|
||||
|
||||
int InstrEmit_mtmsrd(PPCHIRBuilder& f, const InstrData& i) {
|
||||
//todo: this is moving msr under a mask, so only writing EE and RI
|
||||
// todo: this is moving msr under a mask, so only writing EE and RI
|
||||
|
||||
Value* from = f.LoadGPR(i.X.RT);
|
||||
Value* mtmsrd_mask = f.LoadConstantUint64((1ULL << 15));
|
||||
|
|
|
@ -106,16 +106,17 @@ bool PPCFrontend::Initialize() {
|
|||
}
|
||||
|
||||
bool PPCFrontend::DeclareFunction(GuestFunction* function) {
|
||||
|
||||
//chrispy: make sure we aren't declaring a function that is actually padding data, this will mess up PPCScanner and is hard to debug
|
||||
//wow, this halo reach actually has branches into 0 opcodes, look into further
|
||||
//xenia_assert(*reinterpret_cast<const uint32_t*>(
|
||||
// this->memory()->TranslateVirtual(function->address())) != 0);
|
||||
// Could scan or something here.
|
||||
// Could also check to see if it's a well-known function type and classify
|
||||
// for later.
|
||||
// Could also kick off a precompiler, since we know it's likely the function
|
||||
// will be demanded soon.
|
||||
// chrispy: make sure we aren't declaring a function that is actually padding
|
||||
// data, this will mess up PPCScanner and is hard to debug wow, this halo
|
||||
// reach actually has branches into 0 opcodes, look into further
|
||||
// xenia_assert(*reinterpret_cast<const uint32_t*>(
|
||||
// this->memory()->TranslateVirtual(function->address())) !=
|
||||
// 0);
|
||||
// Could scan or something here.
|
||||
// Could also check to see if it's a well-known function type and classify
|
||||
// for later.
|
||||
// Could also kick off a precompiler, since we know it's likely the function
|
||||
// will be demanded soon.
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -80,8 +80,10 @@ class PPCHIRBuilder : public hir::HIRBuilder {
|
|||
|
||||
void StoreReserved(Value* val);
|
||||
Value* LoadReserved();
|
||||
//calls original impl in hirbuilder, but also records the is_return_site bit into flags in the guestmodule
|
||||
// calls original impl in hirbuilder, but also records the is_return_site bit
|
||||
// into flags in the guestmodule
|
||||
void SetReturnAddress(Value* value);
|
||||
|
||||
private:
|
||||
void MaybeBreakOnInstruction(uint32_t address);
|
||||
void AnnotateLabel(uint32_t address, Label* label);
|
||||
|
|
|
@ -267,7 +267,7 @@ Function* Processor::ResolveFunction(uint32_t address) {
|
|||
entry->status = Entry::STATUS_FAILED;
|
||||
return nullptr;
|
||||
}
|
||||
//only add it to the list of resolved functions if resolving succeeded
|
||||
// only add it to the list of resolved functions if resolving succeeded
|
||||
auto module_for = function->module();
|
||||
|
||||
auto xexmod = dynamic_cast<XexModule*>(module_for);
|
||||
|
@ -1300,7 +1300,7 @@ uint32_t Processor::GuestAtomicIncrement32(ppc::PPCContext* context,
|
|||
result = *host_address;
|
||||
// todo: should call a processor->backend function that acquires a
|
||||
// reservation instead of using host atomics
|
||||
if (xe::atomic_cas(result, xe::byte_swap(xe::byte_swap(result)+1),
|
||||
if (xe::atomic_cas(result, xe::byte_swap(xe::byte_swap(result) + 1),
|
||||
host_address)) {
|
||||
break;
|
||||
}
|
||||
|
@ -1316,7 +1316,7 @@ uint32_t Processor::GuestAtomicDecrement32(ppc::PPCContext* context,
|
|||
result = *host_address;
|
||||
// todo: should call a processor->backend function that acquires a
|
||||
// reservation instead of using host atomics
|
||||
if (xe::atomic_cas(result,xe::byte_swap( xe::byte_swap(result)-1),
|
||||
if (xe::atomic_cas(result, xe::byte_swap(xe::byte_swap(result) - 1),
|
||||
host_address)) {
|
||||
break;
|
||||
}
|
||||
|
@ -1326,9 +1326,9 @@ uint32_t Processor::GuestAtomicDecrement32(ppc::PPCContext* context,
|
|||
|
||||
uint32_t Processor::GuestAtomicOr32(ppc::PPCContext* context,
|
||||
uint32_t guest_address, uint32_t mask) {
|
||||
return xe::byte_swap(xe::atomic_or(
|
||||
context->TranslateVirtual<volatile int32_t*>(guest_address),
|
||||
xe::byte_swap(mask)));
|
||||
return xe::byte_swap(
|
||||
xe::atomic_or(context->TranslateVirtual<volatile int32_t*>(guest_address),
|
||||
xe::byte_swap(mask)));
|
||||
}
|
||||
uint32_t Processor::GuestAtomicXor32(ppc::PPCContext* context,
|
||||
uint32_t guest_address, uint32_t mask) {
|
||||
|
|
|
@ -189,11 +189,11 @@ class Processor {
|
|||
uint32_t GuestAtomicDecrement32(ppc::PPCContext* context,
|
||||
uint32_t guest_address);
|
||||
uint32_t GuestAtomicOr32(ppc::PPCContext* context, uint32_t guest_address,
|
||||
uint32_t mask);
|
||||
uint32_t mask);
|
||||
uint32_t GuestAtomicXor32(ppc::PPCContext* context, uint32_t guest_address,
|
||||
uint32_t mask);
|
||||
uint32_t mask);
|
||||
uint32_t GuestAtomicAnd32(ppc::PPCContext* context, uint32_t guest_address,
|
||||
uint32_t mask);
|
||||
uint32_t mask);
|
||||
bool GuestAtomicCAS32(ppc::PPCContext* context, uint32_t old_value,
|
||||
uint32_t new_value, uint32_t guest_address);
|
||||
|
||||
|
|
|
@ -77,8 +77,7 @@ ThreadState::ThreadState(Processor* processor, uint32_t thread_id,
|
|||
|
||||
// Allocate with 64b alignment.
|
||||
|
||||
context_ = reinterpret_cast<ppc::PPCContext*>(
|
||||
AllocateContext());
|
||||
context_ = reinterpret_cast<ppc::PPCContext*>(AllocateContext());
|
||||
processor->backend()->InitializeBackendContext(context_);
|
||||
assert_true(((uint64_t)context_ & 0x3F) == 0);
|
||||
std::memset(context_, 0, sizeof(ppc::PPCContext));
|
||||
|
@ -97,9 +96,11 @@ ThreadState::ThreadState(Processor* processor, uint32_t thread_id,
|
|||
// fixme: VSCR must be set here!
|
||||
context_->msr = 0x9030; // dumped from a real 360, 0x8000
|
||||
|
||||
//this register can be used for arbitrary data according to the PPC docs
|
||||
//but the suggested use is to mark which vector registers are in use, for faster save/restore
|
||||
//it seems unlikely anything uses this, especially since we have way more than 32 vrs, but setting it to all ones seems closer to correct than 0
|
||||
// this register can be used for arbitrary data according to the PPC docs
|
||||
// but the suggested use is to mark which vector registers are in use, for
|
||||
// faster save/restore it seems unlikely anything uses this, especially since
|
||||
// we have way more than 32 vrs, but setting it to all ones seems closer to
|
||||
// correct than 0
|
||||
context_->vrsave = ~0u;
|
||||
}
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ namespace ui {
|
|||
|
||||
class DebugWindow : public cpu::DebugListener {
|
||||
public:
|
||||
virtual ~DebugWindow();
|
||||
virtual ~DebugWindow();
|
||||
|
||||
static std::unique_ptr<DebugWindow> Create(
|
||||
Emulator* emulator, xe::ui::WindowedAppContext& app_context);
|
||||
|
|
|
@ -398,13 +398,13 @@ X_STATUS Emulator::MountPath(const std::filesystem::path& path,
|
|||
return X_STATUS_NO_SUCH_FILE;
|
||||
}
|
||||
|
||||
file_system_->UnregisterSymbolicLink(kDefaultPartitonSymbolicLink);
|
||||
file_system_->UnregisterSymbolicLink(kDefaultPartitionSymbolicLink);
|
||||
file_system_->UnregisterSymbolicLink(kDefaultGameSymbolicLink);
|
||||
file_system_->UnregisterSymbolicLink("plugins:");
|
||||
|
||||
// Create symlinks to the device.
|
||||
file_system_->RegisterSymbolicLink(kDefaultGameSymbolicLink, mount_path);
|
||||
file_system_->RegisterSymbolicLink(kDefaultPartitonSymbolicLink, mount_path);
|
||||
file_system_->RegisterSymbolicLink(kDefaultPartitionSymbolicLink, mount_path);
|
||||
|
||||
return X_STATUS_SUCCESS;
|
||||
}
|
||||
|
@ -875,14 +875,14 @@ std::string Emulator::FindLaunchModule() {
|
|||
// Remove previous symbolic links.
|
||||
// Some titles can provide root within specific directory.
|
||||
kernel_state_->file_system()->UnregisterSymbolicLink(
|
||||
kDefaultPartitonSymbolicLink);
|
||||
kDefaultPartitionSymbolicLink);
|
||||
kernel_state_->file_system()->UnregisterSymbolicLink(
|
||||
kDefaultGameSymbolicLink);
|
||||
|
||||
file_path /= std::filesystem::path(xam->loader_data().launch_path);
|
||||
|
||||
kernel_state_->file_system()->RegisterSymbolicLink(
|
||||
kDefaultPartitonSymbolicLink,
|
||||
kDefaultPartitionSymbolicLink,
|
||||
xe::path_to_utf8(file_path.parent_path()));
|
||||
kernel_state_->file_system()->RegisterSymbolicLink(
|
||||
kDefaultGameSymbolicLink, xe::path_to_utf8(file_path.parent_path()));
|
||||
|
|
|
@ -53,7 +53,7 @@ namespace xe {
|
|||
|
||||
constexpr fourcc_t kEmulatorSaveSignature = make_fourcc("XSAV");
|
||||
static const std::string kDefaultGameSymbolicLink = "GAME:";
|
||||
static const std::string kDefaultPartitonSymbolicLink = "D:";
|
||||
static const std::string kDefaultPartitionSymbolicLink = "D:";
|
||||
|
||||
// The main type that runs the whole emulator.
|
||||
// This is responsible for initializing and managing all the various subsystems.
|
||||
|
@ -230,9 +230,7 @@ class Emulator {
|
|||
xe::Delegate<> on_exit;
|
||||
|
||||
private:
|
||||
enum : uint64_t {
|
||||
EmulatorFlagDisclaimerAcknowledged = 1ULL << 0
|
||||
};
|
||||
enum : uint64_t { EmulatorFlagDisclaimerAcknowledged = 1ULL << 0 };
|
||||
static uint64_t GetPersistentEmulatorFlags();
|
||||
static void SetPersistentEmulatorFlags(uint64_t new_flags);
|
||||
static std::string CanonicalizeFileExtension(
|
||||
|
|
|
@ -100,11 +100,14 @@ bool CommandProcessor::Initialize() {
|
|||
}
|
||||
|
||||
worker_running_ = true;
|
||||
worker_thread_ = kernel::object_ref<kernel::XHostThread>(
|
||||
new kernel::XHostThread(kernel_state_, 128 * 1024, 0, [this]() {
|
||||
WorkerThreadMain();
|
||||
return 0;
|
||||
}, kernel_state_->GetIdleProcess()));
|
||||
worker_thread_ =
|
||||
kernel::object_ref<kernel::XHostThread>(new kernel::XHostThread(
|
||||
kernel_state_, 128 * 1024, 0,
|
||||
[this]() {
|
||||
WorkerThreadMain();
|
||||
return 0;
|
||||
},
|
||||
kernel_state_->GetIdleProcess()));
|
||||
worker_thread_->set_name("GPU Commands");
|
||||
worker_thread_->Create();
|
||||
|
||||
|
@ -270,7 +273,8 @@ void CommandProcessor::WorkerThreadMain() {
|
|||
|
||||
// TODO(benvanik): use reader->Read_update_freq_ and only issue after moving
|
||||
// that many indices.
|
||||
// Keep in mind that the gpu also updates the cpu-side copy if the write pointer and read pointer would be equal
|
||||
// Keep in mind that the gpu also updates the cpu-side copy if the write
|
||||
// pointer and read pointer would be equal
|
||||
if (read_ptr_writeback_ptr_) {
|
||||
xe::store_and_swap<uint32_t>(
|
||||
memory_->TranslatePhysical(read_ptr_writeback_ptr_), read_ptr_index_);
|
||||
|
@ -360,9 +364,8 @@ void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr,
|
|||
XE_NOINLINE XE_COLD void CommandProcessor::LogKickoffInitator(uint32_t value) {
|
||||
cpu::backend::GuestPseudoStackTrace st;
|
||||
|
||||
if (logging::internal::ShouldLog(LogLevel::Debug) && kernel_state_->processor()
|
||||
->backend()
|
||||
->PopulatePseudoStacktrace(&st)) {
|
||||
if (logging::internal::ShouldLog(LogLevel::Debug) &&
|
||||
kernel_state_->processor()->backend()->PopulatePseudoStacktrace(&st)) {
|
||||
logging::LoggerBatch<LogLevel::Debug> log_initiator{};
|
||||
|
||||
log_initiator("Updating read ptr to {}, initiator stacktrace below\n",
|
||||
|
@ -381,7 +384,7 @@ XE_NOINLINE XE_COLD void CommandProcessor::LogKickoffInitator(uint32_t value) {
|
|||
}
|
||||
|
||||
void CommandProcessor::UpdateWritePointer(uint32_t value) {
|
||||
XE_UNLIKELY_IF (cvars::log_ringbuffer_kickoff_initiator_bts) {
|
||||
XE_UNLIKELY_IF(cvars::log_ringbuffer_kickoff_initiator_bts) {
|
||||
LogKickoffInitator(value);
|
||||
}
|
||||
write_ptr_index_ = value;
|
||||
|
@ -390,7 +393,8 @@ void CommandProcessor::UpdateWritePointer(uint32_t value) {
|
|||
|
||||
void CommandProcessor::LogRegisterSet(uint32_t register_index, uint32_t value) {
|
||||
#if XE_ENABLE_GPU_REG_WRITE_LOGGING == 1
|
||||
if (cvars::log_guest_driven_gpu_register_written_values && logging::internal::ShouldLog(LogLevel::Debug)) {
|
||||
if (cvars::log_guest_driven_gpu_register_written_values &&
|
||||
logging::internal::ShouldLog(LogLevel::Debug)) {
|
||||
const RegisterInfo* reginfo = RegisterFile::GetRegisterInfo(register_index);
|
||||
|
||||
if (!reginfo) {
|
||||
|
@ -734,7 +738,6 @@ void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); }
|
|||
|
||||
void CommandProcessor::ReturnFromWait() {}
|
||||
|
||||
|
||||
void CommandProcessor::InitializeTrace() {
|
||||
// Write the initial register values, to be loaded directly into the
|
||||
// RegisterFile since all registers, including those that may have side
|
||||
|
|
|
@ -225,7 +225,6 @@ class CommandProcessor {
|
|||
virtual void PrepareForWait();
|
||||
virtual void ReturnFromWait();
|
||||
|
||||
|
||||
virtual void OnPrimaryBufferEnd() {}
|
||||
|
||||
#include "pm4_command_processor_declare.h"
|
||||
|
|
|
@ -22,9 +22,9 @@
|
|||
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||
#include "xenia/gpu/draw_util.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/packet_disassembler.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/gpu/packet_disassembler.h"
|
||||
#include "xenia/ui/d3d12/d3d12_presenter.h"
|
||||
#include "xenia/ui/d3d12/d3d12_util.h"
|
||||
|
||||
|
@ -62,10 +62,9 @@ void D3D12SaveGPUSetting(D3D12GPUSetting setting, uint64_t value) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace d3d12 {
|
||||
|
||||
|
||||
// Generated with `xb buildshaders`.
|
||||
namespace shaders {
|
||||
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/apply_gamma_pwl_cs.h"
|
||||
|
@ -4992,7 +4991,8 @@ bool D3D12CommandProcessor::UpdateBindings_BindfulPath(
|
|||
}
|
||||
// Null SRV + UAV + EDRAM.
|
||||
gpu_handle_shared_memory_uav_and_edram_ = view_gpu_handle;
|
||||
ui::d3d12::util::CreateBufferRawSRV(provider.GetDevice(), view_cpu_handle, nullptr, 0);
|
||||
ui::d3d12::util::CreateBufferRawSRV(provider.GetDevice(), view_cpu_handle,
|
||||
nullptr, 0);
|
||||
view_cpu_handle.ptr += descriptor_size_view;
|
||||
view_gpu_handle.ptr += descriptor_size_view;
|
||||
shared_memory_->WriteRawUAVDescriptor(view_cpu_handle);
|
||||
|
|
|
@ -231,13 +231,13 @@ class D3D12CommandProcessor final : public CommandProcessor {
|
|||
XE_FORCEINLINE
|
||||
void WriteRegisterForceinline(uint32_t index, uint32_t value);
|
||||
void WriteRegister(uint32_t index, uint32_t value) override;
|
||||
|
||||
|
||||
virtual void WriteRegistersFromMem(uint32_t start_index, uint32_t* base,
|
||||
uint32_t num_registers) override;
|
||||
/*helper functions for WriteRegistersFromMem*/
|
||||
XE_FORCEINLINE
|
||||
void WriteShaderConstantsFromMem(uint32_t start_index, uint32_t* base,
|
||||
uint32_t num_registers);
|
||||
uint32_t num_registers);
|
||||
XE_FORCEINLINE
|
||||
void WriteBoolLoopFromMem(uint32_t start_index, uint32_t* base,
|
||||
uint32_t num_registers);
|
||||
|
@ -245,8 +245,9 @@ class D3D12CommandProcessor final : public CommandProcessor {
|
|||
void WriteFetchFromMem(uint32_t start_index, uint32_t* base,
|
||||
uint32_t num_registers);
|
||||
|
||||
void WritePossiblySpecialRegistersFromMem(uint32_t start_index, uint32_t* base,
|
||||
uint32_t num_registers);
|
||||
void WritePossiblySpecialRegistersFromMem(uint32_t start_index,
|
||||
uint32_t* base,
|
||||
uint32_t num_registers);
|
||||
template <uint32_t register_lower_bound, uint32_t register_upper_bound>
|
||||
XE_FORCEINLINE void WriteRegisterRangeFromMem_WithKnownBound(
|
||||
uint32_t start_index, uint32_t* base, uint32_t num_registers);
|
||||
|
@ -262,8 +263,7 @@ class D3D12CommandProcessor final : public CommandProcessor {
|
|||
uint32_t base,
|
||||
uint32_t num_registers);
|
||||
XE_NOINLINE
|
||||
void WriteOneRegisterFromRing(uint32_t base,
|
||||
uint32_t num_times);
|
||||
void WriteOneRegisterFromRing(uint32_t base, uint32_t num_times);
|
||||
|
||||
XE_FORCEINLINE
|
||||
void WriteALURangeFromRing(xe::RingBuffer* ring, uint32_t base,
|
||||
|
@ -795,7 +795,6 @@ class D3D12CommandProcessor final : public CommandProcessor {
|
|||
draw_util::GetViewportInfoArgs previous_viewport_info_args_;
|
||||
draw_util::ViewportInfo previous_viewport_info_;
|
||||
|
||||
|
||||
std::atomic<bool> pix_capture_requested_ = false;
|
||||
bool pix_capturing_;
|
||||
|
||||
|
|
|
@ -672,11 +672,11 @@ uint32_t D3D12TextureCache::GetActiveTextureBindlessSRVIndex(
|
|||
return descriptor_index;
|
||||
}
|
||||
void D3D12TextureCache::PrefetchSamplerParameters(
|
||||
const D3D12Shader::SamplerBinding& binding) const {
|
||||
const D3D12Shader::SamplerBinding& binding) const {
|
||||
swcache::PrefetchL1(®ister_file()[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||
binding.fetch_constant * 6]);
|
||||
}
|
||||
D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters(
|
||||
D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters(
|
||||
const D3D12Shader::SamplerBinding& binding) const {
|
||||
const auto& regs = register_file();
|
||||
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
|
||||
|
@ -703,8 +703,8 @@ void D3D12TextureCache::PrefetchSamplerParameters(
|
|||
nullptr, nullptr, nullptr,
|
||||
&mip_min_level, nullptr);
|
||||
parameters.mip_min_level = mip_min_level;
|
||||
//high cache miss count here, prefetch fetch earlier
|
||||
// TODO(Triang3l): Disable filtering for texture formats not supporting it.
|
||||
// high cache miss count here, prefetch fetch earlier
|
||||
// TODO(Triang3l): Disable filtering for texture formats not supporting it.
|
||||
xenos::AnisoFilter aniso_filter =
|
||||
binding.aniso_filter == xenos::AnisoFilter::kUseFetchConst
|
||||
? fetch.aniso_filter
|
||||
|
|
|
@ -18,8 +18,8 @@
|
|||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/literals.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d12 {
|
||||
|
@ -30,9 +30,10 @@ class D3D12CommandProcessor;
|
|||
|
||||
class DeferredCommandList {
|
||||
public:
|
||||
static constexpr size_t MAX_SIZEOF_COMMANDLIST = 65536 * 128; //around 8 mb
|
||||
static constexpr size_t MAX_SIZEOF_COMMANDLIST = 65536 * 128; // around 8 mb
|
||||
/*
|
||||
chrispy: upped from 1_MiB to 4_MiB, m:durandal hits frequent resizes in large open maps
|
||||
chrispy: upped from 1_MiB to 4_MiB, m:durandal hits frequent resizes in
|
||||
large open maps
|
||||
*/
|
||||
DeferredCommandList(const D3D12CommandProcessor& command_processor,
|
||||
size_t initial_size_bytes = MAX_SIZEOF_COMMANDLIST);
|
||||
|
@ -566,7 +567,7 @@ class DeferredCommandList {
|
|||
const D3D12CommandProcessor& command_processor_;
|
||||
|
||||
// uintmax_t to ensure uint64_t and pointer alignment of all structures.
|
||||
//std::vector<uintmax_t> command_stream_;
|
||||
// std::vector<uintmax_t> command_stream_;
|
||||
FixedVMemVector<MAX_SIZEOF_COMMANDLIST> command_stream_;
|
||||
};
|
||||
|
||||
|
|
|
@ -552,9 +552,8 @@ void GetHostViewportInfo(GetViewportInfoArgs* XE_RESTRICT args,
|
|||
}
|
||||
}
|
||||
template <bool clamp_to_surface_pitch>
|
||||
static inline
|
||||
void GetScissorTmpl(const RegisterFile& XE_RESTRICT regs,
|
||||
Scissor& XE_RESTRICT scissor_out) {
|
||||
static inline void GetScissorTmpl(const RegisterFile& XE_RESTRICT regs,
|
||||
Scissor& XE_RESTRICT scissor_out) {
|
||||
#if XE_ARCH_AMD64 == 1
|
||||
auto pa_sc_window_scissor_tl = regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>();
|
||||
auto pa_sc_window_scissor_br = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>();
|
||||
|
@ -623,8 +622,7 @@ void GetScissorTmpl(const RegisterFile& XE_RESTRICT regs,
|
|||
// interlock-based custom RB implementations) and using conventional render
|
||||
// targets, but padded to EDRAM tiles.
|
||||
tmp1 = _mm_blend_epi16(
|
||||
tmp1, _mm_min_epi32(tmp1, _mm_set1_epi32(surface_pitch)),
|
||||
0b00110011);
|
||||
tmp1, _mm_min_epi32(tmp1, _mm_set1_epi32(surface_pitch)), 0b00110011);
|
||||
}
|
||||
|
||||
tmp1 = _mm_max_epi32(tmp1, _mm_setzero_si128());
|
||||
|
|
|
@ -20,7 +20,10 @@ DEFINE_path(
|
|||
|
||||
DEFINE_bool(vsync, true, "Enable VSYNC.", "GPU");
|
||||
|
||||
DEFINE_uint64(vsync_fps, 60, "VSYNC frames per second", "GPU");
|
||||
DEFINE_uint64(framerate_limit, 60,
|
||||
"Maximum frames per second. 0 = Unlimited frames.\n"
|
||||
"Defaults to 60, when set to 0, and VSYNC is enabled.",
|
||||
"GPU");
|
||||
|
||||
DEFINE_bool(
|
||||
gpu_allow_invalid_fetch_constants, true,
|
||||
|
|
|
@ -18,7 +18,7 @@ DECLARE_path(dump_shaders);
|
|||
|
||||
DECLARE_bool(vsync);
|
||||
|
||||
DECLARE_uint64(vsync_fps);
|
||||
DECLARE_uint64(framerate_limit);
|
||||
|
||||
DECLARE_bool(gpu_allow_invalid_fetch_constants);
|
||||
|
||||
|
|
|
@ -25,10 +25,10 @@
|
|||
#include "xenia/base/threading.h"
|
||||
#include "xenia/gpu/command_processor.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/kernel/kernel_state.h"
|
||||
#include "xenia/ui/graphics_provider.h"
|
||||
#include "xenia/ui/window.h"
|
||||
#include "xenia/ui/windowed_app_context.h"
|
||||
#include "xenia/kernel/kernel_state.h"
|
||||
DEFINE_bool(
|
||||
store_shaders, true,
|
||||
"Store shaders persistently and load them when loading games to avoid "
|
||||
|
@ -50,7 +50,7 @@ __declspec(dllexport) uint32_t AmdPowerXpressRequestHighPerformance = 1;
|
|||
} // extern "C"
|
||||
#endif // XE_PLATFORM_WIN32
|
||||
|
||||
GraphicsSystem::GraphicsSystem() : vsync_worker_running_(false) {
|
||||
GraphicsSystem::GraphicsSystem() : frame_limiter_worker_running_(false) {
|
||||
register_file_ = reinterpret_cast<RegisterFile*>(memory::AllocFixed(
|
||||
nullptr, sizeof(RegisterFile), memory::AllocationType::kReserveCommit,
|
||||
memory::PageAccess::kReadWrite));
|
||||
|
@ -100,50 +100,79 @@ X_STATUS GraphicsSystem::Setup(cpu::Processor* processor,
|
|||
reinterpret_cast<cpu::MMIOReadCallback>(ReadRegisterThunk),
|
||||
reinterpret_cast<cpu::MMIOWriteCallback>(WriteRegisterThunk));
|
||||
|
||||
// 60hz vsync timer.
|
||||
vsync_worker_running_ = true;
|
||||
vsync_worker_thread_ = kernel::object_ref<kernel::XHostThread>(
|
||||
new kernel::XHostThread(kernel_state_, 128 * 1024, 0, [this]() {
|
||||
const double vsync_duration_d =
|
||||
cvars::vsync
|
||||
? std::max<double>(
|
||||
5.0, 1000.0 / static_cast<double>(cvars::vsync_fps))
|
||||
: 1.0;
|
||||
uint64_t last_frame_time = Clock::QueryGuestTickCount();
|
||||
// Sleep for 90% of the vblank duration, spin for 10%
|
||||
const double duration_scalar = 0.90;
|
||||
// Frame limiter thread.
|
||||
frame_limiter_worker_running_ = true;
|
||||
frame_limiter_worker_thread_ =
|
||||
kernel::object_ref<kernel::XHostThread>(new kernel::XHostThread(
|
||||
kernel_state_, 128 * 1024, 0,
|
||||
[this]() {
|
||||
uint64_t normalized_framerate_limit =
|
||||
std::max<uint64_t>(0, cvars::framerate_limit);
|
||||
|
||||
while (vsync_worker_running_) {
|
||||
const uint64_t current_time = Clock::QueryGuestTickCount();
|
||||
const uint64_t tick_freq = Clock::guest_tick_frequency();
|
||||
const uint64_t time_delta = current_time - last_frame_time;
|
||||
const double elapsed_d = static_cast<double>(time_delta) /
|
||||
(static_cast<double>(tick_freq) / 1000.0);
|
||||
if (elapsed_d >= vsync_duration_d) {
|
||||
last_frame_time = current_time;
|
||||
// If VSYNC is enabled, but frames are not limited,
|
||||
// lock framerate at default value of 60
|
||||
if (normalized_framerate_limit == 0 && cvars::vsync)
|
||||
normalized_framerate_limit = 60;
|
||||
|
||||
// TODO(disjtqz): should recalculate the remaining time to a vblank
|
||||
// after MarkVblank, no idea how long the guest code normally takes
|
||||
MarkVblank();
|
||||
if (cvars::vsync) {
|
||||
const uint64_t estimated_nanoseconds = static_cast<uint64_t>(
|
||||
(vsync_duration_d * 1000000.0) *
|
||||
duration_scalar); // 1000 microseconds = 1 ms
|
||||
const double vsync_duration_d =
|
||||
cvars::vsync
|
||||
? std::max<double>(5.0,
|
||||
1000.0 / static_cast<double>(
|
||||
normalized_framerate_limit))
|
||||
: 1.0;
|
||||
uint64_t last_frame_time = Clock::QueryGuestTickCount();
|
||||
// Sleep for 90% of the vblank duration, spin for 10%
|
||||
const double duration_scalar = 0.90;
|
||||
|
||||
threading::NanoSleep(estimated_nanoseconds);
|
||||
while (frame_limiter_worker_running_) {
|
||||
if (cvars::vsync) {
|
||||
const uint64_t current_time = Clock::QueryGuestTickCount();
|
||||
const uint64_t tick_freq = Clock::guest_tick_frequency();
|
||||
const uint64_t time_delta = current_time - last_frame_time;
|
||||
const double elapsed_d =
|
||||
static_cast<double>(time_delta) /
|
||||
(static_cast<double>(tick_freq) / 1000.0);
|
||||
if (elapsed_d >= vsync_duration_d) {
|
||||
last_frame_time = current_time;
|
||||
|
||||
// TODO(disjtqz): should recalculate the remaining time to a
|
||||
// vblank after MarkVblank, no idea how long the guest code
|
||||
// normally takes
|
||||
MarkVblank();
|
||||
if (cvars::vsync) {
|
||||
const uint64_t estimated_nanoseconds =
|
||||
static_cast<uint64_t>(
|
||||
(vsync_duration_d * 1000000.0) *
|
||||
duration_scalar); // 1000 microseconds = 1 ms
|
||||
|
||||
threading::NanoSleep(estimated_nanoseconds);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!cvars::vsync) {
|
||||
MarkVblank();
|
||||
if (normalized_framerate_limit > 0) {
|
||||
// framerate_limit is over 0, vsync disabled
|
||||
// - No VSYNC + limited frames defined by user
|
||||
uint64_t framerate_limited_sleep_time =
|
||||
1000000000 / normalized_framerate_limit;
|
||||
xe::threading::NanoSleep(framerate_limited_sleep_time);
|
||||
} else {
|
||||
// framerate_limit is 0, vsync disabled
|
||||
// - No VSYNC + unlimited frames
|
||||
xe::threading::Sleep(std::chrono::milliseconds(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!cvars::vsync) {
|
||||
xe::threading::Sleep(std::chrono::milliseconds(1));
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}, kernel_state->GetIdleProcess()));
|
||||
return 0;
|
||||
},
|
||||
kernel_state->GetIdleProcess()));
|
||||
// As we run vblank interrupts the debugger must be able to suspend us.
|
||||
vsync_worker_thread_->set_can_debugger_suspend(true);
|
||||
vsync_worker_thread_->set_name("GPU VSync");
|
||||
vsync_worker_thread_->Create();
|
||||
vsync_worker_thread_->thread()->set_priority(
|
||||
frame_limiter_worker_thread_->set_can_debugger_suspend(true);
|
||||
frame_limiter_worker_thread_->set_name("GPU Frame limiter");
|
||||
frame_limiter_worker_thread_->Create();
|
||||
frame_limiter_worker_thread_->thread()->set_priority(
|
||||
threading::ThreadPriority::kLowest);
|
||||
if (cvars::trace_gpu_stream) {
|
||||
BeginTracing();
|
||||
|
@ -159,10 +188,10 @@ void GraphicsSystem::Shutdown() {
|
|||
command_processor_.reset();
|
||||
}
|
||||
|
||||
if (vsync_worker_thread_) {
|
||||
vsync_worker_running_ = false;
|
||||
vsync_worker_thread_->Wait(0, 0, 0, nullptr);
|
||||
vsync_worker_thread_.reset();
|
||||
if (frame_limiter_worker_thread_) {
|
||||
frame_limiter_worker_running_ = false;
|
||||
frame_limiter_worker_thread_->Wait(0, 0, 0, nullptr);
|
||||
frame_limiter_worker_thread_.reset();
|
||||
}
|
||||
|
||||
if (presenter_) {
|
||||
|
@ -267,7 +296,8 @@ void GraphicsSystem::SetInterruptCallback(uint32_t callback,
|
|||
}
|
||||
|
||||
void GraphicsSystem::DispatchInterruptCallback(uint32_t source, uint32_t cpu) {
|
||||
kernel_state()->EmulateCPInterruptDPC(interrupt_callback_,interrupt_callback_data_, source, cpu);
|
||||
kernel_state()->EmulateCPInterruptDPC(interrupt_callback_,
|
||||
interrupt_callback_data_, source, cpu);
|
||||
}
|
||||
|
||||
void GraphicsSystem::MarkVblank() {
|
||||
|
|
|
@ -109,8 +109,8 @@ class GraphicsSystem {
|
|||
uint32_t interrupt_callback_ = 0;
|
||||
uint32_t interrupt_callback_data_ = 0;
|
||||
|
||||
std::atomic<bool> vsync_worker_running_;
|
||||
kernel::object_ref<kernel::XHostThread> vsync_worker_thread_;
|
||||
std::atomic<bool> frame_limiter_worker_running_;
|
||||
kernel::object_ref<kernel::XHostThread> frame_limiter_worker_thread_;
|
||||
|
||||
RegisterFile* register_file_;
|
||||
std::unique_ptr<CommandProcessor> command_processor_;
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
|
||||
|
||||
#if defined(OVERRIDING_BASE_CMDPROCESSOR)
|
||||
#define PM4_OVERRIDE override
|
||||
#define PM4_OVERRIDE override
|
||||
#else
|
||||
#define PM4_OVERRIDE
|
||||
#endif
|
||||
void ExecuteIndirectBuffer(uint32_t ptr,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
void ExecuteIndirectBuffer(uint32_t ptr, uint32_t count) XE_RESTRICT;
|
||||
virtual uint32_t ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index)
|
||||
XE_RESTRICT PM4_OVERRIDE;
|
||||
virtual bool ExecutePacket() PM4_OVERRIDE;
|
||||
|
@ -18,91 +17,77 @@ protected:
|
|||
XE_NOINLINE
|
||||
void DisassembleCurrentPacket() XE_RESTRICT;
|
||||
XE_NOINLINE
|
||||
bool ExecutePacketType0( uint32_t packet) XE_RESTRICT;
|
||||
bool ExecutePacketType0(uint32_t packet) XE_RESTRICT;
|
||||
XE_NOINLINE
|
||||
bool ExecutePacketType1( uint32_t packet) XE_RESTRICT;
|
||||
bool ExecutePacketType1(uint32_t packet) XE_RESTRICT;
|
||||
|
||||
bool ExecutePacketType2( uint32_t packet) XE_RESTRICT;
|
||||
bool ExecutePacketType2(uint32_t packet) XE_RESTRICT;
|
||||
XE_NOINLINE
|
||||
bool ExecutePacketType3( uint32_t packet) XE_RESTRICT;
|
||||
bool ExecutePacketType3(uint32_t packet) XE_RESTRICT;
|
||||
XE_NOINLINE
|
||||
bool ExecutePacketType3_ME_INIT( uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
bool ExecutePacketType3_NOP( uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
bool ExecutePacketType3_ME_INIT(uint32_t packet, uint32_t count) XE_RESTRICT;
|
||||
bool ExecutePacketType3_NOP(uint32_t packet, uint32_t count) XE_RESTRICT;
|
||||
XE_NOINLINE
|
||||
bool ExecutePacketType3_INTERRUPT( uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
bool ExecutePacketType3_INTERRUPT(uint32_t packet, uint32_t count) XE_RESTRICT;
|
||||
XE_NOINLINE
|
||||
bool ExecutePacketType3_XE_SWAP( uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
bool ExecutePacketType3_XE_SWAP(uint32_t packet, uint32_t count) XE_RESTRICT;
|
||||
|
||||
bool ExecutePacketType3_INDIRECT_BUFFER( uint32_t packet,
|
||||
bool ExecutePacketType3_INDIRECT_BUFFER(uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
XE_NOINLINE
|
||||
bool ExecutePacketType3_WAIT_REG_MEM( uint32_t packet,
|
||||
bool ExecutePacketType3_WAIT_REG_MEM(uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
XE_NOINLINE
|
||||
bool ExecutePacketType3_REG_RMW( uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
bool ExecutePacketType3_REG_RMW(uint32_t packet, uint32_t count) XE_RESTRICT;
|
||||
|
||||
bool ExecutePacketType3_REG_TO_MEM( uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
bool ExecutePacketType3_REG_TO_MEM(uint32_t packet, uint32_t count) XE_RESTRICT;
|
||||
XE_NOINLINE
|
||||
bool ExecutePacketType3_MEM_WRITE( uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
bool ExecutePacketType3_MEM_WRITE(uint32_t packet, uint32_t count) XE_RESTRICT;
|
||||
XE_NOINLINE
|
||||
bool ExecutePacketType3_COND_WRITE( uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
bool ExecutePacketType3_COND_WRITE(uint32_t packet, uint32_t count) XE_RESTRICT;
|
||||
|
||||
bool ExecutePacketType3_EVENT_WRITE( uint32_t packet,
|
||||
bool ExecutePacketType3_EVENT_WRITE(uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
XE_NOINLINE
|
||||
bool ExecutePacketType3_EVENT_WRITE_SHD( uint32_t packet,
|
||||
bool ExecutePacketType3_EVENT_WRITE_SHD(uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
|
||||
bool ExecutePacketType3_EVENT_WRITE_EXT( uint32_t packet,
|
||||
bool ExecutePacketType3_EVENT_WRITE_EXT(uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
XE_NOINLINE
|
||||
bool ExecutePacketType3_EVENT_WRITE_ZPD( uint32_t packet,
|
||||
bool ExecutePacketType3_EVENT_WRITE_ZPD(uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
|
||||
bool ExecutePacketType3Draw( uint32_t packet,
|
||||
const char* opcode_name,
|
||||
bool ExecutePacketType3Draw(uint32_t packet, const char* opcode_name,
|
||||
uint32_t viz_query_condition,
|
||||
uint32_t count_remaining) XE_RESTRICT;
|
||||
|
||||
bool ExecutePacketType3_DRAW_INDX( uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
bool ExecutePacketType3_DRAW_INDX(uint32_t packet, uint32_t count) XE_RESTRICT;
|
||||
|
||||
bool ExecutePacketType3_DRAW_INDX_2( uint32_t packet,
|
||||
bool ExecutePacketType3_DRAW_INDX_2(uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
XE_FORCEINLINE
|
||||
bool ExecutePacketType3_SET_CONSTANT( uint32_t packet,
|
||||
bool ExecutePacketType3_SET_CONSTANT(uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
XE_NOINLINE
|
||||
bool ExecutePacketType3_SET_CONSTANT2( uint32_t packet,
|
||||
bool ExecutePacketType3_SET_CONSTANT2(uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
XE_FORCEINLINE
|
||||
bool ExecutePacketType3_LOAD_ALU_CONSTANT( uint32_t packet,
|
||||
bool ExecutePacketType3_LOAD_ALU_CONSTANT(uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
|
||||
bool ExecutePacketType3_SET_SHADER_CONSTANTS(
|
||||
uint32_t packet,
|
||||
bool ExecutePacketType3_SET_SHADER_CONSTANTS(uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
|
||||
bool ExecutePacketType3_IM_LOAD( uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
bool ExecutePacketType3_IM_LOAD(uint32_t packet, uint32_t count) XE_RESTRICT;
|
||||
|
||||
bool ExecutePacketType3_IM_LOAD_IMMEDIATE( uint32_t packet,
|
||||
bool ExecutePacketType3_IM_LOAD_IMMEDIATE(uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
|
||||
bool ExecutePacketType3_INVALIDATE_STATE( uint32_t packet,
|
||||
bool ExecutePacketType3_INVALIDATE_STATE(uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
|
||||
bool ExecutePacketType3_VIZ_QUERY( uint32_t packet,
|
||||
uint32_t count) XE_RESTRICT;
|
||||
|
||||
bool ExecutePacketType3_VIZ_QUERY(uint32_t packet, uint32_t count) XE_RESTRICT;
|
||||
|
||||
XE_FORCEINLINE
|
||||
void WriteEventInitiator(uint32_t value) XE_RESTRICT;
|
||||
|
|
|
@ -683,13 +683,12 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_INDIRECT_BUFFER(
|
|||
to 54 bytes
|
||||
*/
|
||||
static bool MatchValueAndRef(uint32_t value, uint32_t ref, uint32_t wait_info) {
|
||||
// smaller code is generated than the #else path, although whether it is faster
|
||||
// i do not know. i don't think games do an enormous number of cond_write
|
||||
// though, so we have picked
|
||||
// the path with the smaller codegen.
|
||||
// we do technically have more instructions executed vs the switch case method,
|
||||
// but we have no mispredicts and most of our instructions are 0.25/0.3
|
||||
// throughput
|
||||
// smaller code is generated than the #else path, although whether it is
|
||||
// faster i do not know. i don't think games do an enormous number of
|
||||
// cond_write though, so we have picked the path with the smaller codegen. we
|
||||
// do technically have more instructions executed vs the switch case method,
|
||||
// but we have no mispredicts and most of our instructions are 0.25/0.3
|
||||
// throughput
|
||||
return ((((value < ref) << 1) | ((value <= ref) << 2) |
|
||||
((value == ref) << 3) | ((value != ref) << 4) |
|
||||
((value >= ref) << 5) | ((value > ref) << 6) | (1 << 7)) >>
|
||||
|
@ -899,13 +898,17 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE_SHD(
|
|||
data_value = GpuSwap(data_value, endianness);
|
||||
uint8_t* write_destination = memory_->TranslatePhysical(address);
|
||||
if (address > 0x1FFFFFFF) {
|
||||
uint32_t writeback_base = register_file_->values[XE_GPU_REG_WRITEBACK_BASE].u32;
|
||||
uint32_t writeback_size = register_file_->values[XE_GPU_REG_WRITEBACK_SIZE].u32;
|
||||
uint32_t writeback_base =
|
||||
register_file_->values[XE_GPU_REG_WRITEBACK_BASE].u32;
|
||||
uint32_t writeback_size =
|
||||
register_file_->values[XE_GPU_REG_WRITEBACK_SIZE].u32;
|
||||
uint32_t writeback_offset = address - writeback_base;
|
||||
//check whether the guest has written writeback base. if they haven't, skip the offset check
|
||||
// check whether the guest has written writeback base. if they haven't, skip
|
||||
// the offset check
|
||||
if (writeback_base != 0 && writeback_offset < writeback_size) {
|
||||
write_destination = memory_->TranslateVirtual(0x7F000000 + writeback_offset);
|
||||
}
|
||||
write_destination =
|
||||
memory_->TranslateVirtual(0x7F000000 + writeback_offset);
|
||||
}
|
||||
}
|
||||
xe::store(write_destination, data_value);
|
||||
trace_writer_.WriteMemoryWrite(CpuToGpu(address), 4);
|
||||
|
|
|
@ -883,8 +883,7 @@ class PrimitiveProcessor {
|
|||
// Must be called in a global critical region.
|
||||
void UpdateCacheBucketsNonEmptyL2(
|
||||
uint32_t bucket_index_div_64,
|
||||
[[maybe_unused]] const global_unique_lock_type&
|
||||
global_lock) {
|
||||
[[maybe_unused]] const global_unique_lock_type& global_lock) {
|
||||
uint64_t& cache_buckets_non_empty_l2_ref =
|
||||
cache_buckets_non_empty_l2_[bucket_index_div_64 >> 6];
|
||||
uint64_t cache_buckets_non_empty_l2_bit = uint64_t(1)
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
// Almost all of these values are taken directly from:
|
||||
// https://github.com/freedreno/amd-gpu/blob/master/include/reg/yamato/22/yamato_offset.h
|
||||
|
||||
//#define XE_GPU_REGISTER(index, type, name)
|
||||
// #define XE_GPU_REGISTER(index, type, name)
|
||||
|
||||
XE_GPU_REGISTER(0x0048, kDword, BIF_PERFCOUNTER0_SELECT)
|
||||
XE_GPU_REGISTER(0x0049, kDword, BIF_PERFCOUNTER0_HI)
|
||||
|
@ -36,35 +36,36 @@ XE_GPU_REGISTER(0x0398, kDword, RBBM_PERFCOUNTER0_HI)
|
|||
XE_GPU_REGISTER(0x0399, kDword, RBBM_PERFCOUNTER1_LOW)
|
||||
XE_GPU_REGISTER(0x039A, kDword, RBBM_PERFCOUNTER1_HI)
|
||||
|
||||
//XAM reads this directly and stores it to a struct, have not tracked where it goes from there
|
||||
//PM4 command PM4_MEM_WRITE_CNTR is supposed to write this to memory
|
||||
//XE_GPU_REGISTER(0x44b, kDword,CP_PROG_COUNTER )
|
||||
// XAM reads this directly and stores it to a struct, have not tracked where it
|
||||
// goes from there PM4 command PM4_MEM_WRITE_CNTR is supposed to write this to
|
||||
// memory XE_GPU_REGISTER(0x44b, kDword,CP_PROG_COUNTER )
|
||||
XE_GPU_REGISTER(0x045E, kDword, CALLBACK_ACK)
|
||||
|
||||
XE_GPU_REGISTER(0x0578, kDword, SCRATCH_REG0) // interrupt sync
|
||||
XE_GPU_REGISTER(0x0579, kDword, SCRATCH_REG1) // present interval
|
||||
XE_GPU_REGISTER(0x057A, kDword, SCRATCH_REG2)
|
||||
XE_GPU_REGISTER(0x057B, kDword, SCRATCH_REG3)
|
||||
XE_GPU_REGISTER(0x057C, kDword, SCRATCH_REG4) //originally this was named CALLBACK_ADDRESS, but that didnt make sense
|
||||
XE_GPU_REGISTER(0x057C, kDword,
|
||||
SCRATCH_REG4) // originally this was named CALLBACK_ADDRESS,
|
||||
// but that didnt make sense
|
||||
XE_GPU_REGISTER(0x057D, kDword, SCRATCH_REG5)
|
||||
XE_GPU_REGISTER(0x057E, kDword, SCRATCH_REG6)
|
||||
XE_GPU_REGISTER(0x057F, kDword, SCRATCH_REG7)
|
||||
|
||||
XE_GPU_REGISTER(0x05C8, kDword, WAIT_UNTIL)
|
||||
|
||||
//src is flash_xam.xex, i've seen it used by the kernel and aurora
|
||||
//seems to have a negative value while the gpu is busy
|
||||
//XE_GPU_REGISTER(0x05D0, kDword, RBBM_STATUS)
|
||||
// src is flash_xam.xex, i've seen it used by the kernel and aurora
|
||||
// seems to have a negative value while the gpu is busy
|
||||
// XE_GPU_REGISTER(0x05D0, kDword, RBBM_STATUS)
|
||||
|
||||
// update count = 6 bit field, bits 8- 14
|
||||
// there are several other fields here, they have an unknown purpose
|
||||
// XE_GPU_REGISTER(0x704, kDword, CP_RB_CNTL)
|
||||
|
||||
//update count = 6 bit field, bits 8- 14
|
||||
//there are several other fields here, they have an unknown purpose
|
||||
//XE_GPU_REGISTER(0x704, kDword, CP_RB_CNTL)
|
||||
|
||||
//address must be 4 byte aligned
|
||||
//low 2 bits encode two different fields?
|
||||
//xboxkrnl just does addr |2 when assigning
|
||||
//XE_GPU_REGISTER(0x70C, kDword, CP_RB_RPTR_ADDR)
|
||||
// address must be 4 byte aligned
|
||||
// low 2 bits encode two different fields?
|
||||
// xboxkrnl just does addr |2 when assigning
|
||||
// XE_GPU_REGISTER(0x70C, kDword, CP_RB_RPTR_ADDR)
|
||||
XE_GPU_REGISTER(0x0815, kDword, MC0_PERFCOUNTER0_SELECT)
|
||||
XE_GPU_REGISTER(0x0816, kDword, MC0_PERFCOUNTER0_HI)
|
||||
XE_GPU_REGISTER(0x0817, kDword, MC0_PERFCOUNTER0_LOW)
|
||||
|
@ -72,13 +73,13 @@ XE_GPU_REGISTER(0x0855, kDword, MC1_PERFCOUNTER0_SELECT)
|
|||
XE_GPU_REGISTER(0x0856, kDword, MC1_PERFCOUNTER0_HI)
|
||||
XE_GPU_REGISTER(0x0857, kDword, MC1_PERFCOUNTER0_LOW)
|
||||
|
||||
//base GPU virtual address of the xps region. Most guests write 0xC0100000 here
|
||||
// base GPU virtual address of the xps region. Most guests write 0xC0100000 here
|
||||
XE_GPU_REGISTER(0x0A02, kDword, XPS_BASE)
|
||||
//will usually be set higher, but is effectively 0x700000 bytes long
|
||||
// will usually be set higher, but is effectively 0x700000 bytes long
|
||||
XE_GPU_REGISTER(0x0A03, kDword, XPS_SIZE)
|
||||
//usually 0xC0000000
|
||||
// usually 0xC0000000
|
||||
XE_GPU_REGISTER(0x0A04, kDword, WRITEBACK_BASE)
|
||||
//usually 0x0100000
|
||||
// usually 0x0100000
|
||||
XE_GPU_REGISTER(0x0A05, kDword, WRITEBACK_SIZE)
|
||||
|
||||
XE_GPU_REGISTER(0x0A18, kDword, MH_PERFCOUNTER0_SELECT)
|
||||
|
@ -279,8 +280,8 @@ XE_GPU_REGISTER(0x0F0C, kDword, BC_PERFCOUNTER2_LOW)
|
|||
XE_GPU_REGISTER(0x0F0D, kDword, BC_PERFCOUNTER2_HI)
|
||||
XE_GPU_REGISTER(0x0F0E, kDword, BC_PERFCOUNTER3_LOW)
|
||||
XE_GPU_REGISTER(0x0F0F, kDword, BC_PERFCOUNTER3_HI)
|
||||
//src is flash_xam.xex
|
||||
//XE_GPU_REGISTER(0x0F12, RB_SIDEBAND_DATA,
|
||||
// src is flash_xam.xex
|
||||
// XE_GPU_REGISTER(0x0F12, RB_SIDEBAND_DATA,
|
||||
|
||||
XE_GPU_REGISTER(0x1004, kDword, HZ_PERFCOUNTER0_SELECT)
|
||||
XE_GPU_REGISTER(0x1005, kDword, HZ_PERFCOUNTER0_HI)
|
||||
|
|
|
@ -231,8 +231,7 @@ class RenderTargetCache {
|
|||
: register_file_(register_file),
|
||||
draw_resolution_scale_x_(draw_resolution_scale_x),
|
||||
draw_resolution_scale_y_(draw_resolution_scale_y),
|
||||
draw_extent_estimator_(register_file, memory, trace_writer)
|
||||
{
|
||||
draw_extent_estimator_(register_file, memory, trace_writer) {
|
||||
assert_not_zero(draw_resolution_scale_x);
|
||||
assert_not_zero(draw_resolution_scale_y);
|
||||
}
|
||||
|
|
|
@ -209,10 +209,10 @@ class SharedMemory {
|
|||
uint64_t valid_and_gpu_resolved;
|
||||
};
|
||||
|
||||
//chrispy: todo, systempageflagsblock should be 3 different arrays
|
||||
// Flags for each 64 system pages, interleaved as blocks, so bit scan can be
|
||||
// used to quickly extract ranges.
|
||||
// std::vector<SystemPageFlagsBlock> system_page_flags_;
|
||||
// chrispy: todo, systempageflagsblock should be 3 different arrays
|
||||
// Flags for each 64 system pages, interleaved as blocks, so bit scan can be
|
||||
// used to quickly extract ranges.
|
||||
// std::vector<SystemPageFlagsBlock> system_page_flags_;
|
||||
|
||||
uint64_t *system_page_flags_valid_ = nullptr,
|
||||
*system_page_flags_valid_and_gpu_written_ = nullptr,
|
||||
|
|
|
@ -108,7 +108,8 @@ class TextureCache {
|
|||
// generate a mask of all bits from before the first index, and xor it with
|
||||
// all bits before the last index this produces a mask covering only the
|
||||
// bits between first and last
|
||||
uint32_t res = ((1U << first_index) - 1) ^ static_cast<uint32_t>((1ULL << (last_index + 1)) - 1ULL);
|
||||
uint32_t res = ((1U << first_index) - 1) ^
|
||||
static_cast<uint32_t>((1ULL << (last_index + 1)) - 1ULL);
|
||||
// todo: check that this is right
|
||||
|
||||
texture_bindings_in_sync_ &= ~res;
|
||||
|
|
|
@ -1,64 +1,64 @@
|
|||
FORMAT_INFO(k_1_REVERSE, kUncompressed, 1, 1, 1),
|
||||
FORMAT_INFO(k_1, kUncompressed, 1, 1, 1),
|
||||
FORMAT_INFO(k_8, kResolvable, 1, 1, 8),
|
||||
FORMAT_INFO(k_1_5_5_5, kResolvable, 1, 1, 16),
|
||||
FORMAT_INFO(k_5_6_5, kResolvable, 1, 1, 16),
|
||||
FORMAT_INFO(k_6_5_5, kResolvable, 1, 1, 16),
|
||||
FORMAT_INFO(k_8_8_8_8, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_2_10_10_10, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_8_A, kResolvable, 1, 1, 8),
|
||||
FORMAT_INFO(k_8_B, kResolvable, 1, 1, 8),
|
||||
FORMAT_INFO(k_8_8, kResolvable, 1, 1, 16),
|
||||
FORMAT_INFO(k_Cr_Y1_Cb_Y0_REP, kCompressed, 2, 1, 16),
|
||||
FORMAT_INFO(k_Y1_Cr_Y0_Cb_REP, kCompressed, 2, 1, 16),
|
||||
FORMAT_INFO(k_16_16_EDRAM, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_8_8_8_8_A, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_4_4_4_4, kResolvable, 1, 1, 16),
|
||||
FORMAT_INFO(k_10_11_11, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_11_11_10, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_DXT1, kCompressed, 4, 4, 4),
|
||||
FORMAT_INFO(k_DXT2_3, kCompressed, 4, 4, 8),
|
||||
FORMAT_INFO(k_DXT4_5, kCompressed, 4, 4, 8),
|
||||
FORMAT_INFO(k_16_16_16_16_EDRAM, kUncompressed, 1, 1, 64),
|
||||
FORMAT_INFO(k_24_8, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_24_8_FLOAT, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_16, kResolvable, 1, 1, 16),
|
||||
FORMAT_INFO(k_16_16, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_16_16_16_16, kResolvable, 1, 1, 64),
|
||||
FORMAT_INFO(k_16_EXPAND, kUncompressed, 1, 1, 16),
|
||||
FORMAT_INFO(k_16_16_EXPAND, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_16_16_16_16_EXPAND, kUncompressed, 1, 1, 64),
|
||||
FORMAT_INFO(k_16_FLOAT, kResolvable, 1, 1, 16),
|
||||
FORMAT_INFO(k_16_16_FLOAT, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_16_16_16_16_FLOAT, kResolvable, 1, 1, 64),
|
||||
FORMAT_INFO(k_32, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_32_32, kUncompressed, 1, 1, 64),
|
||||
FORMAT_INFO(k_32_32_32_32, kUncompressed, 1, 1, 128),
|
||||
FORMAT_INFO(k_32_FLOAT, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_32_32_FLOAT, kResolvable, 1, 1, 64),
|
||||
FORMAT_INFO(k_32_32_32_32_FLOAT, kResolvable, 1, 1, 128),
|
||||
FORMAT_INFO(k_32_AS_8, kCompressed, 4, 1, 8),
|
||||
FORMAT_INFO(k_32_AS_8_8, kCompressed, 2, 1, 16),
|
||||
FORMAT_INFO(k_16_MPEG, kUncompressed, 1, 1, 16),
|
||||
FORMAT_INFO(k_16_16_MPEG, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_8_INTERLACED, kUncompressed, 1, 1, 8),
|
||||
FORMAT_INFO(k_32_AS_8_INTERLACED, kCompressed, 4, 1, 8),
|
||||
FORMAT_INFO(k_32_AS_8_8_INTERLACED, kCompressed, 1, 1, 16),
|
||||
FORMAT_INFO(k_16_INTERLACED, kUncompressed, 1, 1, 16),
|
||||
FORMAT_INFO(k_16_MPEG_INTERLACED, kUncompressed, 1, 1, 16),
|
||||
FORMAT_INFO(k_16_16_MPEG_INTERLACED, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_DXN, kCompressed, 4, 4, 8),
|
||||
FORMAT_INFO(k_8_8_8_8_AS_16_16_16_16, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_DXT1_AS_16_16_16_16, kCompressed, 4, 4, 4),
|
||||
FORMAT_INFO(k_DXT2_3_AS_16_16_16_16, kCompressed, 4, 4, 8),
|
||||
FORMAT_INFO(k_DXT4_5_AS_16_16_16_16, kCompressed, 4, 4, 8),
|
||||
FORMAT_INFO(k_2_10_10_10_AS_16_16_16_16, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_10_11_11_AS_16_16_16_16, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_11_11_10_AS_16_16_16_16, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_32_32_32_FLOAT, kUncompressed, 1, 1, 96),
|
||||
FORMAT_INFO(k_DXT3A, kCompressed, 4, 4, 4),
|
||||
FORMAT_INFO(k_DXT5A, kCompressed, 4, 4, 4),
|
||||
FORMAT_INFO(k_CTX1, kCompressed, 4, 4, 4),
|
||||
FORMAT_INFO(k_DXT3A_AS_1_1_1_1, kCompressed, 4, 4, 4),
|
||||
FORMAT_INFO(k_8_8_8_8_GAMMA_EDRAM, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_2_10_10_10_FLOAT_EDRAM, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_1, kUncompressed, 1, 1, 1),
|
||||
FORMAT_INFO(k_8, kResolvable, 1, 1, 8),
|
||||
FORMAT_INFO(k_1_5_5_5, kResolvable, 1, 1, 16),
|
||||
FORMAT_INFO(k_5_6_5, kResolvable, 1, 1, 16),
|
||||
FORMAT_INFO(k_6_5_5, kResolvable, 1, 1, 16),
|
||||
FORMAT_INFO(k_8_8_8_8, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_2_10_10_10, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_8_A, kResolvable, 1, 1, 8),
|
||||
FORMAT_INFO(k_8_B, kResolvable, 1, 1, 8),
|
||||
FORMAT_INFO(k_8_8, kResolvable, 1, 1, 16),
|
||||
FORMAT_INFO(k_Cr_Y1_Cb_Y0_REP, kCompressed, 2, 1, 16),
|
||||
FORMAT_INFO(k_Y1_Cr_Y0_Cb_REP, kCompressed, 2, 1, 16),
|
||||
FORMAT_INFO(k_16_16_EDRAM, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_8_8_8_8_A, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_4_4_4_4, kResolvable, 1, 1, 16),
|
||||
FORMAT_INFO(k_10_11_11, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_11_11_10, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_DXT1, kCompressed, 4, 4, 4),
|
||||
FORMAT_INFO(k_DXT2_3, kCompressed, 4, 4, 8),
|
||||
FORMAT_INFO(k_DXT4_5, kCompressed, 4, 4, 8),
|
||||
FORMAT_INFO(k_16_16_16_16_EDRAM, kUncompressed, 1, 1, 64),
|
||||
FORMAT_INFO(k_24_8, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_24_8_FLOAT, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_16, kResolvable, 1, 1, 16),
|
||||
FORMAT_INFO(k_16_16, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_16_16_16_16, kResolvable, 1, 1, 64),
|
||||
FORMAT_INFO(k_16_EXPAND, kUncompressed, 1, 1, 16),
|
||||
FORMAT_INFO(k_16_16_EXPAND, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_16_16_16_16_EXPAND, kUncompressed, 1, 1, 64),
|
||||
FORMAT_INFO(k_16_FLOAT, kResolvable, 1, 1, 16),
|
||||
FORMAT_INFO(k_16_16_FLOAT, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_16_16_16_16_FLOAT, kResolvable, 1, 1, 64),
|
||||
FORMAT_INFO(k_32, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_32_32, kUncompressed, 1, 1, 64),
|
||||
FORMAT_INFO(k_32_32_32_32, kUncompressed, 1, 1, 128),
|
||||
FORMAT_INFO(k_32_FLOAT, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_32_32_FLOAT, kResolvable, 1, 1, 64),
|
||||
FORMAT_INFO(k_32_32_32_32_FLOAT, kResolvable, 1, 1, 128),
|
||||
FORMAT_INFO(k_32_AS_8, kCompressed, 4, 1, 8),
|
||||
FORMAT_INFO(k_32_AS_8_8, kCompressed, 2, 1, 16),
|
||||
FORMAT_INFO(k_16_MPEG, kUncompressed, 1, 1, 16),
|
||||
FORMAT_INFO(k_16_16_MPEG, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_8_INTERLACED, kUncompressed, 1, 1, 8),
|
||||
FORMAT_INFO(k_32_AS_8_INTERLACED, kCompressed, 4, 1, 8),
|
||||
FORMAT_INFO(k_32_AS_8_8_INTERLACED, kCompressed, 1, 1, 16),
|
||||
FORMAT_INFO(k_16_INTERLACED, kUncompressed, 1, 1, 16),
|
||||
FORMAT_INFO(k_16_MPEG_INTERLACED, kUncompressed, 1, 1, 16),
|
||||
FORMAT_INFO(k_16_16_MPEG_INTERLACED, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_DXN, kCompressed, 4, 4, 8),
|
||||
FORMAT_INFO(k_8_8_8_8_AS_16_16_16_16, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_DXT1_AS_16_16_16_16, kCompressed, 4, 4, 4),
|
||||
FORMAT_INFO(k_DXT2_3_AS_16_16_16_16, kCompressed, 4, 4, 8),
|
||||
FORMAT_INFO(k_DXT4_5_AS_16_16_16_16, kCompressed, 4, 4, 8),
|
||||
FORMAT_INFO(k_2_10_10_10_AS_16_16_16_16, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_10_11_11_AS_16_16_16_16, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_11_11_10_AS_16_16_16_16, kResolvable, 1, 1, 32),
|
||||
FORMAT_INFO(k_32_32_32_FLOAT, kUncompressed, 1, 1, 96),
|
||||
FORMAT_INFO(k_DXT3A, kCompressed, 4, 4, 4),
|
||||
FORMAT_INFO(k_DXT5A, kCompressed, 4, 4, 4),
|
||||
FORMAT_INFO(k_CTX1, kCompressed, 4, 4, 4),
|
||||
FORMAT_INFO(k_DXT3A_AS_1_1_1_1, kCompressed, 4, 4, 4),
|
||||
FORMAT_INFO(k_8_8_8_8_GAMMA_EDRAM, kUncompressed, 1, 1, 32),
|
||||
FORMAT_INFO(k_2_10_10_10_FLOAT_EDRAM, kUncompressed, 1, 1, 32),
|
|
@ -24,6 +24,7 @@
|
|||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/gpu/draw_util.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/packet_disassembler.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
|
@ -32,7 +33,6 @@
|
|||
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/gpu/packet_disassembler.h"
|
||||
#include "xenia/kernel/kernel_state.h"
|
||||
#include "xenia/kernel/user_module.h"
|
||||
#include "xenia/ui/vulkan/vulkan_presenter.h"
|
||||
|
|
|
@ -10,10 +10,8 @@
|
|||
#ifndef XENIA_GPU_XENOS_H_
|
||||
#define XENIA_GPU_XENOS_H_
|
||||
|
||||
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/math.h"
|
||||
|
||||
#include "xenia/base/memory.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -421,7 +419,7 @@ float Float7e3To32(uint32_t f10);
|
|||
// floating-point number.
|
||||
// Converts an IEEE-754 32-bit floating-point number to Xenos floating-point
|
||||
// depth, rounding to the nearest even or towards zero.
|
||||
XE_NOALIAS
|
||||
XE_NOALIAS
|
||||
uint32_t Float32To20e4(float f32, bool round_to_nearest_even) noexcept;
|
||||
// Converts Xenos floating-point depth in bits 0:23 (not clamping) to an
|
||||
// IEEE-754 32-bit floating-point number.
|
||||
|
|
|
@ -54,9 +54,7 @@ struct X_FILE_FS_ATTRIBUTE_INFORMATION {
|
|||
};
|
||||
static_assert_size(X_FILE_FS_ATTRIBUTE_INFORMATION, 16);
|
||||
|
||||
enum X_FILE_DEVICE_TYPE : uint32_t {
|
||||
FILE_DEVICE_UNKNOWN = 0x22
|
||||
};
|
||||
enum X_FILE_DEVICE_TYPE : uint32_t { FILE_DEVICE_UNKNOWN = 0x22 };
|
||||
|
||||
struct X_FILE_FS_DEVICE_INFORMATION {
|
||||
be<X_FILE_DEVICE_TYPE> device_type;
|
||||
|
|
|
@ -1322,7 +1322,7 @@ void KernelState::InitializeKernelGuestGlobals() {
|
|||
block->ObSymbolicLinkObjectType.delete_proc =
|
||||
kernel_trampoline_group_.NewLongtermTrampoline(DeleteSymlink);
|
||||
|
||||
#define offsetof32(s, m) static_cast<uint32_t>( offsetof(s, m) )
|
||||
#define offsetof32(s, m) static_cast<uint32_t>(offsetof(s, m))
|
||||
|
||||
host_object_type_enum_to_guest_object_type_ptr_ = {
|
||||
{XObject::Type::Event,
|
||||
|
|
|
@ -651,7 +651,8 @@ void UserModule::Dump() {
|
|||
|
||||
for (uint32_t i = 0; i < opt_alternate_title_id->count(); i++) {
|
||||
if (opt_alternate_title_id->values[i] != 0) {
|
||||
title_ids.append(fmt::format(" {:08X},", opt_alternate_title_id->values[i]));
|
||||
title_ids.append(
|
||||
fmt::format(" {:08X},", opt_alternate_title_id->values[i]));
|
||||
}
|
||||
}
|
||||
// Remove last character as it is not necessary
|
||||
|
|
|
@ -117,7 +117,7 @@ class UserModule : public XModule {
|
|||
bool is_dll_module_ = false;
|
||||
uint32_t entry_point_ = 0;
|
||||
uint32_t stack_size_ = 0;
|
||||
uint32_t workspace_size_ = 384*1024;
|
||||
uint32_t workspace_size_ = 384 * 1024;
|
||||
|
||||
uint32_t mod_checksum_ = 0;
|
||||
uint32_t time_date_stamp_ = 0;
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2023 Xenia Canary. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
* Copyright 2023 Xenia Canary. All rights reserved. * Released under the BSD
|
||||
*license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
|
@ -95,9 +95,10 @@ bool GrowHandleTable(uint32_t table_ptr, PPCContext* context) {
|
|||
/*
|
||||
copy old bucket list contents to new, larger bucket list
|
||||
*/
|
||||
memcpy(context->TranslateVirtual(new_dynamic_buckets),
|
||||
context->TranslateVirtual(table->table_dynamic_buckets),
|
||||
sizeof(uint32_t) * (new_bucket_handle_base / SIZE_PER_HANDLE_BUCKET));
|
||||
memcpy(
|
||||
context->TranslateVirtual(new_dynamic_buckets),
|
||||
context->TranslateVirtual(table->table_dynamic_buckets),
|
||||
sizeof(uint32_t) * (new_bucket_handle_base / SIZE_PER_HANDLE_BUCKET));
|
||||
|
||||
if (context->TranslateVirtualBE<uint32_t>(table->table_dynamic_buckets) !=
|
||||
&table->table_static_buckets[0]) {
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2023 Xenia Canary. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
* Copyright 2023 Xenia Canary. All rights reserved. * Released under the BSD
|
||||
*license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
|
@ -26,7 +26,7 @@ struct X_HANDLE_TABLE {
|
|||
xe::be<uint32_t> table_dynamic_buckets;
|
||||
xe::be<uint32_t> table_static_buckets[8];
|
||||
X_KSPINLOCK table_lock;
|
||||
//used as unknown arg 3 to pool allocations
|
||||
// used as unknown arg 3 to pool allocations
|
||||
uint8_t unk_pool_arg_34;
|
||||
uint8_t handle_high_byte;
|
||||
uint8_t unk_36;
|
||||
|
|
|
@ -25,5 +25,5 @@ struct X_OBJECT_CREATE_INFORMATION;
|
|||
namespace xe::kernel::util {
|
||||
class NativeList;
|
||||
class ObjectTable;
|
||||
}
|
||||
} // namespace xe::kernel::util
|
||||
#endif
|
|
@ -168,8 +168,8 @@ static void XeInsertHeadList(uint32_t list_head, X_LIST_ENTRY* entry,
|
|||
template <typename VirtualTranslator>
|
||||
static void XeInsertHeadList(X_LIST_ENTRY* list_head, X_LIST_ENTRY* entry,
|
||||
VirtualTranslator context) {
|
||||
XeInsertHeadList(list_head, XeGuestList(list_head, context),
|
||||
entry, XeGuestList(entry, context), context);
|
||||
XeInsertHeadList(list_head, XeGuestList(list_head, context), entry,
|
||||
XeGuestList(entry, context), context);
|
||||
}
|
||||
|
||||
template <typename TObject, size_t EntryListOffset>
|
||||
|
@ -216,7 +216,7 @@ struct X_TYPED_LIST : public X_LIST_ENTRY {
|
|||
|
||||
uint32_t end() { return vt->HostToGuestVirtual(thiz); }
|
||||
};
|
||||
template<typename VirtualTranslator>
|
||||
template <typename VirtualTranslator>
|
||||
ForwardIteratorBegin<VirtualTranslator> IterateForward(VirtualTranslator vt) {
|
||||
return ForwardIteratorBegin<VirtualTranslator>{vt, this};
|
||||
}
|
||||
|
@ -227,13 +227,14 @@ struct X_TYPED_LIST : public X_LIST_ENTRY {
|
|||
}
|
||||
template <typename VirtualTranslator>
|
||||
void InsertHead(TObject* entry, VirtualTranslator translator) {
|
||||
XeInsertHeadList(static_cast<X_LIST_ENTRY*>(this), ObjectListEntry(entry), translator);
|
||||
XeInsertHeadList(static_cast<X_LIST_ENTRY*>(this), ObjectListEntry(entry),
|
||||
translator);
|
||||
}
|
||||
template <typename VirtualTranslator>
|
||||
void InsertTail(TObject* entry, VirtualTranslator translator) {
|
||||
XeInsertTailList(this, ObjectListEntry(entry), translator);
|
||||
}
|
||||
template<typename VirtualTranslator>
|
||||
template <typename VirtualTranslator>
|
||||
bool empty(VirtualTranslator vt) const {
|
||||
return vt->TranslateVirtual<X_LIST_ENTRY*>(flink_ptr) == this;
|
||||
}
|
||||
|
|
|
@ -111,8 +111,8 @@ class ObjectTable {
|
|||
|
||||
// Generic lookup
|
||||
template <>
|
||||
object_ref<XObject> ObjectTable::LookupObject<XObject>(
|
||||
X_HANDLE handle, bool already_locked);
|
||||
object_ref<XObject> ObjectTable::LookupObject<XObject>(X_HANDLE handle,
|
||||
bool already_locked);
|
||||
|
||||
} // namespace util
|
||||
} // namespace kernel
|
||||
|
|
|
@ -35,8 +35,7 @@ using PPCContext = xe::cpu::ppc::PPCContext;
|
|||
library_name, ordinals::export_name, \
|
||||
(xe::cpu::xe_kernel_export_shim_fn)export_name##_entry);
|
||||
|
||||
#define SHIM_MEM_ADDR(a) \
|
||||
((a) ? ppc_context->TranslateVirtual(a) : nullptr)
|
||||
#define SHIM_MEM_ADDR(a) ((a) ? ppc_context->TranslateVirtual(a) : nullptr)
|
||||
|
||||
#define SHIM_MEM_8(a) xe::load_and_swap<uint8_t>(SHIM_MEM_ADDR(a))
|
||||
#define SHIM_MEM_16(a) xe::load_and_swap<uint16_t>(SHIM_MEM_ADDR(a))
|
||||
|
@ -158,9 +157,8 @@ class Param {
|
|||
} else {
|
||||
uint32_t stack_ptr =
|
||||
uint32_t(init.ppc_context->r[1]) + 0x54 + (ordinal_ - 8) * 8;
|
||||
*out_value = xe::load_and_swap<V>(
|
||||
init.ppc_context->TranslateVirtual(
|
||||
stack_ptr));
|
||||
*out_value =
|
||||
xe::load_and_swap<V>(init.ppc_context->TranslateVirtual(stack_ptr));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -216,6 +214,7 @@ class ContextParam : public Param {
|
|||
X_KPCR* GetPCR() const { return TranslateGPR<X_KPCR*>(13); }
|
||||
|
||||
XThread* CurrentXThread() const;
|
||||
|
||||
protected:
|
||||
PPCContext* XE_RESTRICT ctx_;
|
||||
};
|
||||
|
@ -223,10 +222,7 @@ class ContextParam : public Param {
|
|||
class PointerParam : public ParamBase<uint32_t> {
|
||||
public:
|
||||
PointerParam(Init& init) : ParamBase(init) {
|
||||
host_ptr_ =
|
||||
value_
|
||||
? init.ppc_context->TranslateVirtual(value_)
|
||||
: nullptr;
|
||||
host_ptr_ = value_ ? init.ppc_context->TranslateVirtual(value_) : nullptr;
|
||||
}
|
||||
PointerParam(void* host_ptr) : ParamBase(), host_ptr_(host_ptr) {}
|
||||
PointerParam& operator=(void*& other) {
|
||||
|
@ -296,10 +292,7 @@ class StringPointerParam : public ParamBase<uint32_t> {
|
|||
public:
|
||||
StringPointerParam(Init& init) : ParamBase(init) {
|
||||
host_ptr_ =
|
||||
value_
|
||||
? init.ppc_context->TranslateVirtual<CHAR*>(
|
||||
value_)
|
||||
: nullptr;
|
||||
value_ ? init.ppc_context->TranslateVirtual<CHAR*>(value_) : nullptr;
|
||||
}
|
||||
StringPointerParam(CHAR* host_ptr) : ParamBase(), host_ptr_(host_ptr) {}
|
||||
StringPointerParam& operator=(const CHAR*& other) {
|
||||
|
@ -323,9 +316,7 @@ class TypedPointerParam : public ParamBase<uint32_t> {
|
|||
public:
|
||||
TypedPointerParam(Init& init) : ParamBase(init) {
|
||||
host_ptr_ =
|
||||
value_ ? init.ppc_context->TranslateVirtual<T*>(
|
||||
value_)
|
||||
: nullptr;
|
||||
value_ ? init.ppc_context->TranslateVirtual<T*>(value_) : nullptr;
|
||||
}
|
||||
TypedPointerParam(T* host_ptr) : ParamBase(), host_ptr_(host_ptr) {}
|
||||
TypedPointerParam& operator=(const T*& other) {
|
||||
|
|
|
@ -216,7 +216,7 @@ std::vector<XdbfViewTable> XdbfWrapper::GetStatsView() const {
|
|||
}
|
||||
|
||||
const uint8_t* XdbfWrapper::ReadXLast(uint32_t& compressed_size,
|
||||
uint32_t& decompressed_size) const {
|
||||
uint32_t& decompressed_size) const {
|
||||
auto xlast_table = GetEntry(XdbfSection::kMetadata, kXdbfIdXsrc);
|
||||
if (!xlast_table) {
|
||||
return nullptr;
|
||||
|
|
|
@ -187,8 +187,7 @@ class XdbfWrapper {
|
|||
XdbfPropertyTableEntry GetProperty(const uint32_t id) const;
|
||||
XdbfContextTableEntry GetContext(const uint32_t id) const;
|
||||
std::vector<XdbfViewTable> GetStatsView() const;
|
||||
XdbfSharedView GetSharedView(const uint8_t* ptr,
|
||||
uint32_t& byte_count) const;
|
||||
XdbfSharedView GetSharedView(const uint8_t* ptr, uint32_t& byte_count) const;
|
||||
|
||||
void GetPropertyBagMetadata(const uint8_t* ptr, uint32_t& byte_count,
|
||||
std::vector<xe::be<uint32_t>>& contexts,
|
||||
|
@ -196,7 +195,8 @@ class XdbfWrapper {
|
|||
|
||||
XdbfPropertyBag GetMatchCollection() const;
|
||||
|
||||
const uint8_t* ReadXLast(uint32_t& compressed_size, uint32_t& decompressed_size) const;
|
||||
const uint8_t* ReadXLast(uint32_t& compressed_size,
|
||||
uint32_t& decompressed_size) const;
|
||||
|
||||
private:
|
||||
const uint8_t* data_ = nullptr;
|
||||
|
|
|
@ -10,8 +10,8 @@
|
|||
#include "xenia/kernel/xam/content_manager.h"
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#include "third_party/fmt/include/fmt/format.h"
|
||||
#include "xenia/base/filesystem.h"
|
||||
|
|
|
@ -33,7 +33,18 @@
|
|||
|
||||
#include "third_party/fmt/include/fmt/format.h"
|
||||
|
||||
DEFINE_int32(avpack, 8, "Video modes", "Video");
|
||||
DEFINE_int32(avpack, 8,
|
||||
"Video modes\n"
|
||||
" 0 = PAL-60 Component (SD)\n"
|
||||
" 1 = Unused\n"
|
||||
" 2 = PAL-60 SCART\n"
|
||||
" 3 = 480p Component (HD)\n"
|
||||
" 4 = HDMI+A\n"
|
||||
" 5 = PAL-60 Composite/S-Video\n"
|
||||
" 6 = VGA\n"
|
||||
" 7 = TV PAL-60\n"
|
||||
" 8 = HDMI (default)",
|
||||
"Video");
|
||||
DECLARE_int32(user_country);
|
||||
DECLARE_int32(user_language);
|
||||
|
||||
|
|
|
@ -431,7 +431,8 @@ dword_result_t XamGetLocaleEx_entry(dword_t max_country_id,
|
|||
static_cast<uint8_t>(max_locale_id));
|
||||
}
|
||||
DECLARE_XAM_EXPORT1(XamGetLocaleEx, kLocale, kImplemented);
|
||||
//originally a switch table, wrote a script to extract the values for all possible cases
|
||||
// originally a switch table, wrote a script to extract the values for all
|
||||
// possible cases
|
||||
|
||||
static constexpr uint8_t XamLocaleDateFmtTable[] = {
|
||||
2, 1, 3, 1, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 1, 4, 2, 3, 1, 2, 2, 3,
|
||||
|
|
|
@ -110,10 +110,15 @@ void XamModule::SaveLoaderData() {
|
|||
|
||||
std::filesystem::path host_path = loader_data_.host_path;
|
||||
std::string launch_path = loader_data_.launch_path;
|
||||
const std::string launch_prefix = "game:\\";
|
||||
if (launch_path.compare(0, launch_prefix.length(), launch_prefix) == 0) {
|
||||
launch_path = launch_path.substr(launch_prefix.length());
|
||||
}
|
||||
|
||||
auto remove_prefix = [&launch_path](std::string& prefix) {
|
||||
if (launch_path.compare(0, prefix.length(), prefix) == 0) {
|
||||
launch_path = launch_path.substr(prefix.length());
|
||||
}
|
||||
};
|
||||
|
||||
remove_prefix(std::string("game:\\"));
|
||||
remove_prefix(std::string("d:\\"));
|
||||
|
||||
if (host_path.extension() == ".xex") {
|
||||
host_path.remove_filename();
|
||||
|
|
|
@ -1083,7 +1083,7 @@ dword_result_t NetDll_XNetRegisterKey_entry(dword_t caller, lpdword_t key_id,
|
|||
DECLARE_XAM_EXPORT1(NetDll_XNetRegisterKey, kNetworking, kStub);
|
||||
|
||||
dword_result_t NetDll_XNetUnregisterKey_entry(dword_t caller, lpdword_t key_id,
|
||||
lpdword_t exchange_key) {
|
||||
lpdword_t exchange_key) {
|
||||
return 0;
|
||||
}
|
||||
DECLARE_XAM_EXPORT1(NetDll_XNetUnregisterKey, kNetworking, kStub);
|
||||
|
|
|
@ -56,7 +56,7 @@ dword_result_t XamTaskSchedule_entry(lpvoid_t callback,
|
|||
auto option = ctx->TranslateVirtual<XAM_TASK_ARGS*>(optional_ptr);
|
||||
|
||||
auto v1 = option->value1;
|
||||
auto v2 = option->value2; //typically 0?
|
||||
auto v2 = option->value2; // typically 0?
|
||||
|
||||
XELOGI("Got xam task args: v1 = {:08X}, v2 = {:08X}", v1, v2);
|
||||
}
|
||||
|
@ -66,9 +66,9 @@ dword_result_t XamTaskSchedule_entry(lpvoid_t callback,
|
|||
// Stack must be aligned to 16kb pages
|
||||
stack_size = std::max((uint32_t)0x4000, ((stack_size + 0xFFF) & 0xFFFFF000));
|
||||
|
||||
auto thread =
|
||||
object_ref<XThread>(new XThread(kernel_state(), stack_size, 0, callback,
|
||||
message.guest_address(), 0, true, false, kernel_state()->GetSystemProcess()));
|
||||
auto thread = object_ref<XThread>(new XThread(
|
||||
kernel_state(), stack_size, 0, callback, message.guest_address(), 0, true,
|
||||
false, kernel_state()->GetSystemProcess()));
|
||||
|
||||
X_STATUS result = thread->Create();
|
||||
|
||||
|
|
|
@ -738,7 +738,7 @@ dword_result_t XamUserCreateStatsEnumerator_entry(
|
|||
}
|
||||
|
||||
if (buffer_size_ptr) {
|
||||
*buffer_size_ptr = 0; // sizeof(X_STATS_DETAILS) * stats_ptr->stats_amount;
|
||||
*buffer_size_ptr = 0; // sizeof(X_STATS_DETAILS) * stats_ptr->stats_amount;
|
||||
}
|
||||
|
||||
auto e = object_ref<XUserStatsEnumerator>(
|
||||
|
|
|
@ -686,12 +686,12 @@ dword_result_t XeKeysHmacShaUsingKey_entry(lpvoid_t obscured_key,
|
|||
}
|
||||
DECLARE_XBOXKRNL_EXPORT1(XeKeysHmacShaUsingKey, kNone, kImplemented);
|
||||
|
||||
//going off of usage in some hbrew xex
|
||||
//0 and 1 appear to be devkit, 2 is retail
|
||||
//we default to saying we're retail
|
||||
dword_result_t XeKeysGetConsoleType_entry(lpdword_t type_out) {
|
||||
*type_out = 2;
|
||||
return 0;
|
||||
// going off of usage in some hbrew xex
|
||||
// 0 and 1 appear to be devkit, 2 is retail
|
||||
// we default to saying we're retail
|
||||
dword_result_t XeKeysGetConsoleType_entry(lpdword_t type_out) {
|
||||
*type_out = 2;
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECLARE_XBOXKRNL_EXPORT1(XeKeysGetConsoleType, kNone, kImplemented);
|
||||
|
|
|
@ -120,7 +120,7 @@ void HandleCppException(pointer_t<X_EXCEPTION_RECORD> record) {
|
|||
kernel_memory()->TranslateVirtual<x_s__CatchableTypeArray*>(
|
||||
throw_info->catchable_type_array_ptr);
|
||||
|
||||
//xe::debugging::Break();
|
||||
// xe::debugging::Break();
|
||||
XELOGE("Guest attempted to throw a C++ exception!");
|
||||
}
|
||||
|
||||
|
@ -138,9 +138,10 @@ void RtlRaiseException_entry(pointer_t<X_EXCEPTION_RECORD> record) {
|
|||
|
||||
// TODO(benvanik): unwinding.
|
||||
// This is going to suck.
|
||||
// xe::debugging::Break();
|
||||
// xe::debugging::Break();
|
||||
|
||||
//RtlRaiseException definitely wasn't a noreturn function, we can return safe-ish
|
||||
// RtlRaiseException definitely wasn't a noreturn function, we can return
|
||||
// safe-ish
|
||||
XELOGE("Guest attempted to trigger a breakpoint!");
|
||||
}
|
||||
DECLARE_XBOXKRNL_EXPORT2(RtlRaiseException, kDebug, kStub, kImportant);
|
||||
|
|
|
@ -723,8 +723,9 @@ dword_result_t IoCreateDevice_entry(dword_t driver_object,
|
|||
}
|
||||
DECLARE_XBOXKRNL_EXPORT1(IoCreateDevice, kFileSystem, kStub);
|
||||
|
||||
//supposed to invoke a callback on the driver object! its some sort of destructor function
|
||||
//intended to be called for all devices created from the driver
|
||||
// supposed to invoke a callback on the driver object! its some sort of
|
||||
// destructor function intended to be called for all devices created from the
|
||||
// driver
|
||||
void IoDeleteDevice_entry(dword_t device_ptr, const ppc_context_t& ctx) {
|
||||
if (device_ptr) {
|
||||
auto kernel_mem = ctx->kernel_state->memory();
|
||||
|
|
|
@ -697,7 +697,7 @@ DECLARE_XBOXKRNL_EXPORT1(ExAllocatePool, kMemory, kImplemented);
|
|||
|
||||
void xeFreePool(PPCContext* context, uint32_t base_address) {
|
||||
auto memory = context->kernel_state->memory();
|
||||
//if 4kb aligned, there is no pool header!
|
||||
// if 4kb aligned, there is no pool header!
|
||||
if ((base_address & (4096 - 1)) == 0) {
|
||||
memory->SystemHeapFree(base_address);
|
||||
} else {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue