[LINT] Linted files + Added lint job to CI

This commit is contained in:
Gliniak 2024-03-12 08:59:10 +01:00 committed by Radosław Gliński
parent e8afad8f8a
commit b9061e6292
117 changed files with 1177 additions and 883 deletions

View File

@ -44,37 +44,51 @@ on:
workflow_dispatch: workflow_dispatch:
jobs: jobs:
lint:
name: Lint
runs-on: windows-2022
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Check Clang-Format Version
run: clang-format --version
- name: Lint
run: .\xb lint --all
build-windows: build-windows:
name: Build (Windows) # runner.os can't be used here name: Build (Windows) # runner.os can't be used here
runs-on: windows-2022 runs-on: windows-2022
env: env:
POWERSHELL_TELEMETRY_OPTOUT: 1 POWERSHELL_TELEMETRY_OPTOUT: 1
needs: lint
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
- name: Setup - name: Setup
run: .\xb setup run: .\xb setup
- name: Build - name: Build
run: .\xb build --config=Release --target=src\xenia-app --target=src\xenia-vfs-dump run: .\xb build --config=Release --target=src\xenia-app
- name: Prepare artifacts - name: Prepare artifacts
run: | run: |
robocopy . build\bin\${{ runner.os }}\Release LICENSE /r:0 /w:0 robocopy . build\bin\${{ runner.os }}\Release LICENSE /r:0 /w:0
robocopy build\bin\${{ runner.os }}\Release artifacts\xenia_canary xenia_canary.exe xenia_canary.pdb LICENSE /r:0 /w:0 robocopy build\bin\${{ runner.os }}\Release artifacts\xenia_canary xenia_canary.exe xenia_canary.pdb LICENSE /r:0 /w:0
robocopy build\bin\${{ runner.os }}\Release artifacts\xenia-vfs-dump xenia-vfs-dump.exe xenia-vfs-dump.pdb LICENSE /r:0 /w:0
If ($LastExitCode -le 7) { echo "LastExitCode = $LastExitCode";$LastExitCode = 0 } If ($LastExitCode -le 7) { echo "LastExitCode = $LastExitCode";$LastExitCode = 0 }
- name: Upload xenia-vfs-dump artifacts
uses: actions/upload-artifact@v4
with:
name: xenia-vfs-dump_canary
path: artifacts\xenia-vfs-dump
if-no-files-found: error
- name: Upload xenia canary artifacts - name: Upload xenia canary artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: xenia_canary name: xenia_canary
path: artifacts\xenia_canary path: artifacts\xenia_canary
if-no-files-found: error if-no-files-found: error
- name: Create release - name: Create release
if: | if: |
github.repository == 'xenia-canary/xenia-canary' && github.repository == 'xenia-canary/xenia-canary' &&

View File

@ -61,8 +61,8 @@ class EmulatorWindow {
int32_t selected_title_index = -1; int32_t selected_title_index = -1;
static constexpr int64_t diff_in_ms( static constexpr int64_t diff_in_ms(
const steady_clock::time_point t1, const steady_clock::time_point t1,
const steady_clock::time_point t2) noexcept { const steady_clock::time_point t2) noexcept {
using ms = std::chrono::milliseconds; using ms = std::chrono::milliseconds;
return std::chrono::duration_cast<ms>(t1 - t2).count(); return std::chrono::duration_cast<ms>(t1 - t2).count();
} }

View File

@ -35,9 +35,10 @@
// and let the normal AudioSystem handling take it, to prevent duplicate // and let the normal AudioSystem handling take it, to prevent duplicate
// implementations. They can be found in xboxkrnl_audio_xma.cc // implementations. They can be found in xboxkrnl_audio_xma.cc
DEFINE_uint32( DEFINE_uint32(apu_max_queued_frames, 64,
apu_max_queued_frames, 64, "Allows changing max buffered audio frames to reduce audio "
"Allows changing max buffered audio frames to reduce audio delay. Minimum is 16.", "APU"); "delay. Minimum is 16.",
"APU");
namespace xe { namespace xe {
namespace apu { namespace apu {
@ -76,11 +77,14 @@ X_STATUS AudioSystem::Setup(kernel::KernelState* kernel_state) {
} }
worker_running_ = true; worker_running_ = true;
worker_thread_ = kernel::object_ref<kernel::XHostThread>( worker_thread_ =
new kernel::XHostThread(kernel_state, 128 * 1024, 0, [this]() { kernel::object_ref<kernel::XHostThread>(new kernel::XHostThread(
WorkerThreadMain(); kernel_state, 128 * 1024, 0,
return 0; [this]() {
}, kernel_state->GetSystemProcess())); WorkerThreadMain();
return 0;
},
kernel_state->GetSystemProcess()));
// As we run audio callbacks the debugger must be able to suspend us. // As we run audio callbacks the debugger must be able to suspend us.
worker_thread_->set_can_debugger_suspend(true); worker_thread_->set_can_debugger_suspend(true);
worker_thread_->set_name("Audio Worker"); worker_thread_->set_name("Audio Worker");

View File

@ -21,7 +21,6 @@ namespace conversion {
#if XE_ARCH_AMD64 #if XE_ARCH_AMD64
XE_NOINLINE XE_NOINLINE
static void _generic_sequential_6_BE_to_interleaved_6_LE( static void _generic_sequential_6_BE_to_interleaved_6_LE(
float* XE_RESTRICT output, const float* XE_RESTRICT input, float* XE_RESTRICT output, const float* XE_RESTRICT input,

View File

@ -14,7 +14,7 @@
#include <atomic> #include <atomic>
#include <mutex> #include <mutex>
#include <queue> #include <queue>
//#include <vector> // #include <vector>
#include "xenia/memory.h" #include "xenia/memory.h"
#include "xenia/xbox.h" #include "xenia/xbox.h"
@ -79,8 +79,8 @@ struct XMA_CONTEXT_DATA {
uint32_t unk_dword_2 : 6; // ErrorStatus/ErrorSet (?) uint32_t unk_dword_2 : 6; // ErrorStatus/ErrorSet (?)
// DWORD 3 // DWORD 3
uint32_t loop_start : 26; // XMASetLoopData LoopStartOffset uint32_t loop_start : 26; // XMASetLoopData LoopStartOffset
// frame offset in bits // frame offset in bits
uint32_t parser_error_status : 6; // ? ParserErrorStatus/ParserErrorSet(?) uint32_t parser_error_status : 6; // ? ParserErrorStatus/ParserErrorSet(?)
// DWORD 4 // DWORD 4

View File

@ -18,8 +18,8 @@
#include "xenia/base/string_buffer.h" #include "xenia/base/string_buffer.h"
#include "xenia/cpu/processor.h" #include "xenia/cpu/processor.h"
#include "xenia/cpu/thread_state.h" #include "xenia/cpu/thread_state.h"
#include "xenia/kernel/xthread.h"
#include "xenia/kernel/kernel_state.h" #include "xenia/kernel/kernel_state.h"
#include "xenia/kernel/xthread.h"
extern "C" { extern "C" {
#include "third_party/FFmpeg/libavutil/log.h" #include "third_party/FFmpeg/libavutil/log.h"
} // extern "C" } // extern "C"
@ -102,8 +102,7 @@ void av_log_callback(void* avcl, int level, const char* fmt, va_list va) {
StringBuffer buff; StringBuffer buff;
buff.AppendVarargs(fmt, va); buff.AppendVarargs(fmt, va);
xe::logging::AppendLogLineFormat(LogSrc::Apu, log_level, level_char, xe::logging::AppendLogLineFormat(LogSrc::Apu, log_level, level_char,
"ffmpeg: {}", "ffmpeg: {}", buff.to_string_view());
buff.to_string_view());
} }
X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) { X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) {
@ -141,11 +140,16 @@ X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) {
worker_running_ = true; worker_running_ = true;
work_event_ = xe::threading::Event::CreateAutoResetEvent(false); work_event_ = xe::threading::Event::CreateAutoResetEvent(false);
assert_not_null(work_event_); assert_not_null(work_event_);
worker_thread_ = kernel::object_ref<kernel::XHostThread>( worker_thread_ =
new kernel::XHostThread(kernel_state, 128 * 1024, 0, [this]() { kernel::object_ref<kernel::XHostThread>(new kernel::XHostThread(
WorkerThreadMain(); kernel_state, 128 * 1024, 0,
return 0; [this]() {
}, kernel_state->GetIdleProcess()));//this one doesnt need any process actually. never calls any guest code WorkerThreadMain();
return 0;
},
kernel_state
->GetIdleProcess())); // this one doesnt need any process
// actually. never calls any guest code
worker_thread_->set_name("XMA Decoder"); worker_thread_->set_name("XMA Decoder");
worker_thread_->set_can_debugger_suspend(true); worker_thread_->set_can_debugger_suspend(true);
worker_thread_->Create(); worker_thread_->Create();

View File

@ -39,8 +39,8 @@ class Clock {
// Host tick count. Generally QueryHostTickCount() should be used. // Host tick count. Generally QueryHostTickCount() should be used.
static uint64_t host_tick_count_platform(); static uint64_t host_tick_count_platform();
#if XE_CLOCK_RAW_AVAILABLE #if XE_CLOCK_RAW_AVAILABLE
//chrispy: the way msvc was ordering the branches was causing rdtsc to be speculatively executed each time // chrispy: the way msvc was ordering the branches was causing rdtsc to be
//the branch history was lost // speculatively executed each time the branch history was lost
XE_NOINLINE XE_NOINLINE
static uint64_t host_tick_count_raw(); static uint64_t host_tick_count_raw();
#endif #endif

View File

@ -41,9 +41,6 @@
"\n" \ "\n" \
"Set the cvar 'clock_source_raw' to 'false'."); "Set the cvar 'clock_source_raw' to 'false'.");
namespace xe { namespace xe {
// Getting the TSC frequency can be a bit tricky. This method here only works on // Getting the TSC frequency can be a bit tricky. This method here only works on
// Intel as it seems. There is no easy way to get the frequency outside of ring0 // Intel as it seems. There is no easy way to get the frequency outside of ring0
@ -75,8 +72,6 @@ uint64_t Clock::host_tick_frequency_raw() {
return 0; return 0;
} }
if (max_cpuid >= 0x15) { if (max_cpuid >= 0x15) {
// 15H Get TSC/Crystal ratio and Crystal Hz. // 15H Get TSC/Crystal ratio and Crystal Hz.
xe_cpu_cpuid(0x15, eax, ebx, ecx, edx); xe_cpu_cpuid(0x15, eax, ebx, ecx, edx);
@ -98,7 +93,6 @@ uint64_t Clock::host_tick_frequency_raw() {
return cpu_base_freq; return cpu_base_freq;
} }
CLOCK_FATAL("The clock frequency could not be determined."); CLOCK_FATAL("The clock frequency could not be determined.");
return 0; return 0;
} }

View File

@ -35,15 +35,14 @@ static bool has_shell_environment_variable() {
} }
void AttachConsole() { void AttachConsole() {
bool has_console = ::AttachConsole(ATTACH_PARENT_PROCESS) == TRUE;
bool has_console = ::AttachConsole(ATTACH_PARENT_PROCESS) == TRUE;
#if 0 #if 0
if (!has_console || !has_shell_environment_variable()) { if (!has_console || !has_shell_environment_variable()) {
// We weren't launched from a console, so just return. // We weren't launched from a console, so just return.
has_console_attached_ = false; has_console_attached_ = false;
return; return;
} }
#endif #endif
AllocConsole(); AllocConsole();
has_console_attached_ = true; has_console_attached_ = true;

View File

@ -172,8 +172,7 @@ CommandVar<T>::CommandVar(const char* name, T* default_value,
default_value_(*default_value), default_value_(*default_value),
current_value_(default_value), current_value_(default_value),
commandline_value_(), commandline_value_(),
description_(description) description_(description) {}
{}
template <class T> template <class T>
ConfigVar<T>::ConfigVar(const char* name, T* default_value, ConfigVar<T>::ConfigVar(const char* name, T* default_value,

View File

@ -457,7 +457,7 @@ static ArchFloatMask ArchANDFloatMask(ArchFloatMask x, ArchFloatMask y) {
XE_FORCEINLINE XE_FORCEINLINE
static uint32_t ArchFloatMaskSignbit(ArchFloatMask x) { static uint32_t ArchFloatMaskSignbit(ArchFloatMask x) {
return static_cast<uint32_t>(_mm_movemask_ps(x) &1); return static_cast<uint32_t>(_mm_movemask_ps(x) & 1);
} }
constexpr ArchFloatMask floatmask_zero{.0f}; constexpr ArchFloatMask floatmask_zero{.0f};
@ -606,12 +606,13 @@ union IDivExtraInfo {
} info; } info;
}; };
// returns magicnum multiplier // returns magicnum multiplier
static constexpr uint32_t PregenerateUint32Div(uint32_t _denom, uint32_t& out_extra) { static constexpr uint32_t PregenerateUint32Div(uint32_t _denom,
uint32_t& out_extra) {
IDivExtraInfo extra{}; IDivExtraInfo extra{};
uint32_t d = _denom; uint32_t d = _denom;
int p=0; int p = 0;
uint32_t nc=0, delta=0, q1=0, r1=0, q2=0, r2=0; uint32_t nc = 0, delta = 0, q1 = 0, r1 = 0, q2 = 0, r2 = 0;
struct { struct {
unsigned M; unsigned M;
int a; int a;
@ -662,7 +663,8 @@ static constexpr uint32_t ApplyUint32Div(uint32_t num, uint32_t mul,
extra.value_ = extradata; extra.value_ = extradata;
uint32_t result = static_cast<uint32_t>((static_cast<uint64_t>(num) * static_cast<uint64_t>(mul)) >> 32); uint32_t result = static_cast<uint32_t>(
(static_cast<uint64_t>(num) * static_cast<uint64_t>(mul)) >> 32);
if (extra.info.add_) { if (extra.info.add_) {
uint32_t addend = result + num; uint32_t addend = result + num;
addend = ((addend < result ? 0x80000000 : 0) | addend); addend = ((addend < result ? 0x80000000 : 0) | addend);
@ -672,7 +674,8 @@ static constexpr uint32_t ApplyUint32Div(uint32_t num, uint32_t mul,
} }
static constexpr uint32_t ApplyUint32UMod(uint32_t num, uint32_t mul, static constexpr uint32_t ApplyUint32UMod(uint32_t num, uint32_t mul,
uint32_t extradata, uint32_t original) { uint32_t extradata,
uint32_t original) {
uint32_t dived = ApplyUint32Div(num, mul, extradata); uint32_t dived = ApplyUint32Div(num, mul, extradata);
unsigned result = num - (dived * original); unsigned result = num - (dived * original);
@ -701,8 +704,7 @@ struct MagicDiv {
return extra.info.shift_; return extra.info.shift_;
} }
constexpr uint32_t GetMultiplier() const { return multiplier_; constexpr uint32_t GetMultiplier() const { return multiplier_; }
}
constexpr uint32_t Apply(uint32_t numerator) const { constexpr uint32_t Apply(uint32_t numerator) const {
return ApplyUint32Div(numerator, multiplier_, extradata_); return ApplyUint32Div(numerator, multiplier_, extradata_);
} }

View File

@ -180,7 +180,8 @@ static void vastcpy_impl_repmovs(CacheLine* XE_RESTRICT physaddr,
__movsq((unsigned long long*)physaddr, (unsigned long long*)rdmapping, __movsq((unsigned long long*)physaddr, (unsigned long long*)rdmapping,
written_length / 8); written_length / 8);
#else #else
memcpy((unsigned char*)physaddr, (const unsigned char*)rdmapping, written_length); memcpy((unsigned char*)physaddr, (const unsigned char*)rdmapping,
written_length);
#endif #endif
} }
XE_COLD XE_COLD
@ -331,17 +332,17 @@ void copy_and_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
__m256i output1 = _mm256_shuffle_epi8(input1, shufmask); __m256i output1 = _mm256_shuffle_epi8(input1, shufmask);
__m256i output2 = _mm256_shuffle_epi8(input2, shufmask); __m256i output2 = _mm256_shuffle_epi8(input2, shufmask);
//chrispy: todo, benchmark this w/ and w/out these prefetches here on multiple machines // chrispy: todo, benchmark this w/ and w/out these prefetches here on multiple
//finding a good distance for prefetchw in particular is probably important // machines finding a good distance for prefetchw in particular is probably
//for when we're writing across 2 cachelines // important for when we're writing across 2 cachelines
#if 0 #if 0
if (i + 48 <= count) { if (i + 48 <= count) {
swcache::PrefetchNTA(&src[i + 32]); swcache::PrefetchNTA(&src[i + 32]);
if (amd64::GetFeatureFlags() & amd64::kX64EmitPrefetchW) { if (amd64::GetFeatureFlags() & amd64::kX64EmitPrefetchW) {
swcache::PrefetchW(&dest[i + 32]); swcache::PrefetchW(&dest[i + 32]);
} }
} }
#endif #endif
_mm256_storeu_si256(reinterpret_cast<__m256i*>(&dest[i]), output1); _mm256_storeu_si256(reinterpret_cast<__m256i*>(&dest[i]), output1);
_mm256_storeu_si256(reinterpret_cast<__m256i*>(&dest[i + 8]), output2); _mm256_storeu_si256(reinterpret_cast<__m256i*>(&dest[i + 8]), output2);
} }

View File

@ -17,10 +17,8 @@
#include <string> #include <string>
#include <string_view> #include <string_view>
#include "xenia/base/byte_order.h" #include "xenia/base/byte_order.h"
namespace xe { namespace xe {
namespace memory { namespace memory {

View File

@ -10,8 +10,8 @@
#ifndef XENIA_BASE_MUTEX_H_ #ifndef XENIA_BASE_MUTEX_H_
#define XENIA_BASE_MUTEX_H_ #define XENIA_BASE_MUTEX_H_
#include <mutex> #include <mutex>
#include "platform.h"
#include "memory.h" #include "memory.h"
#include "platform.h"
#define XE_ENABLE_FAST_WIN32_MUTEX 1 #define XE_ENABLE_FAST_WIN32_MUTEX 1
namespace xe { namespace xe {
@ -25,7 +25,7 @@ namespace xe {
*/ */
class alignas(4096) xe_global_mutex { class alignas(4096) xe_global_mutex {
XE_MAYBE_UNUSED XE_MAYBE_UNUSED
char detail[64]; char detail[64];
public: public:
@ -39,7 +39,7 @@ class alignas(4096) xe_global_mutex {
using global_mutex_type = xe_global_mutex; using global_mutex_type = xe_global_mutex;
class alignas(64) xe_fast_mutex { class alignas(64) xe_fast_mutex {
XE_MAYBE_UNUSED XE_MAYBE_UNUSED
char detail[64]; char detail[64];
public: public:

View File

@ -148,7 +148,7 @@
#if XE_COMPILER_HAS_GNU_EXTENSIONS == 1 #if XE_COMPILER_HAS_GNU_EXTENSIONS == 1
#define XE_LIKELY_IF(...) if (XE_LIKELY(__VA_ARGS__)) #define XE_LIKELY_IF(...) if (XE_LIKELY(__VA_ARGS__))
#define XE_UNLIKELY_IF(...) if (XE_UNLIKELY(__VA_ARGS__)) #define XE_UNLIKELY_IF(...) if (XE_UNLIKELY(__VA_ARGS__))
#define XE_MAYBE_UNUSED __attribute__((unused)) #define XE_MAYBE_UNUSED __attribute__((unused))
#else #else
#if __cplusplus >= 202002 #if __cplusplus >= 202002
#define XE_LIKELY_IF(...) if (!!(__VA_ARGS__)) [[likely]] #define XE_LIKELY_IF(...) if (!!(__VA_ARGS__)) [[likely]]
@ -157,7 +157,7 @@
#define XE_LIKELY_IF(...) if (!!(__VA_ARGS__)) #define XE_LIKELY_IF(...) if (!!(__VA_ARGS__))
#define XE_UNLIKELY_IF(...) if (!!(__VA_ARGS__)) #define XE_UNLIKELY_IF(...) if (!!(__VA_ARGS__))
#endif #endif
#define XE_MAYBE_UNUSED #define XE_MAYBE_UNUSED
#endif #endif
// only use __restrict if MSVC, for clang/gcc we can use -fstrict-aliasing which // only use __restrict if MSVC, for clang/gcc we can use -fstrict-aliasing which
// acts as __restrict across the board todo: __restrict is part of the type // acts as __restrict across the board todo: __restrict is part of the type

View File

@ -44,21 +44,21 @@
ntdll versions of functions often skip through a lot of extra garbage in ntdll versions of functions often skip through a lot of extra garbage in
KernelBase KernelBase
*/ */
#define XE_NTDLL_IMPORT(name, cls, clsvar) \ #define XE_NTDLL_IMPORT(name, cls, clsvar) \
static class cls { \ static class cls { \
public: \ public: \
FARPROC fn; \ FARPROC fn; \
cls() : fn(nullptr) { \ cls() : fn(nullptr) { \
auto ntdll = GetModuleHandleA("ntdll.dll"); \ auto ntdll = GetModuleHandleA("ntdll.dll"); \
if (ntdll) { \ if (ntdll) { \
fn = GetProcAddress(ntdll, #name); \ fn = GetProcAddress(ntdll, #name); \
} \ } \
} \ } \
template <typename TRet = void, typename... TArgs> \ template <typename TRet = void, typename... TArgs> \
inline TRet invoke(TArgs... args) { \ inline TRet invoke(TArgs... args) { \
return reinterpret_cast<TRet(NTAPI*)(TArgs...)>(fn)(args...); \ return reinterpret_cast<TRet(NTAPI*)(TArgs...)>(fn)(args...); \
} \ } \
inline operator bool() const { return fn != nullptr; } \ inline operator bool() const { return fn != nullptr; } \
} clsvar } clsvar
#else #else
#define XE_NTDLL_IMPORT(name, cls, clsvar) static constexpr bool clsvar = false #define XE_NTDLL_IMPORT(name, cls, clsvar) static constexpr bool clsvar = false

View File

@ -68,7 +68,6 @@ class RingBuffer {
ring_size_t offset_delta = write_offs - read_offs; ring_size_t offset_delta = write_offs - read_offs;
ring_size_t wrap_read_count = (cap - read_offs) + write_offs; ring_size_t wrap_read_count = (cap - read_offs) + write_offs;
if (XE_LIKELY(read_offs <= write_offs)) { if (XE_LIKELY(read_offs <= write_offs)) {
return offset_delta; // will be 0 if they are equal, semantically return offset_delta; // will be 0 if they are equal, semantically
// identical to old code (i checked the asm, msvc // identical to old code (i checked the asm, msvc

View File

@ -34,7 +34,6 @@ struct SimpleFreelist {
node->next_ = head_; node->next_ = head_;
head_ = node; head_ = node;
} }
void Reset() { head_ = nullptr; void Reset() { head_ = nullptr; }
}
}; };
} // namespace xe } // namespace xe

View File

@ -906,9 +906,9 @@ class PosixEvent : public PosixConditionHandle<Event> {
void Set() override { handle_.Signal(); } void Set() override { handle_.Signal(); }
void Reset() override { handle_.Reset(); } void Reset() override { handle_.Reset(); }
EventInfo Query() { EventInfo Query() {
EventInfo result{}; EventInfo result{};
assert_always(); assert_always();
return result; return result;
} }
void Pulse() override { void Pulse() override {
using namespace std::chrono_literals; using namespace std::chrono_literals;

View File

@ -33,7 +33,9 @@ using WaitItem = TimerQueueWaitItem;
*/ */
/* /*
edit: actually had to change it back, when i was testing it only worked because i fixed disruptorplus' code to compile (it gives wrong args to condition_variable::wait_until) but now builds edit: actually had to change it back, when i was testing it only worked
because i fixed disruptorplus' code to compile (it gives wrong args to
condition_variable::wait_until) but now builds
*/ */
using WaitStrat = dp::blocking_wait_strategy; using WaitStrat = dp::blocking_wait_strategy;
@ -205,7 +207,7 @@ void TimerQueueWaitItem::Disarm() {
spinner.spin_once(); spinner.spin_once();
} }
} }
//unused // unused
std::weak_ptr<WaitItem> QueueTimerOnce(std::function<void(void*)> callback, std::weak_ptr<WaitItem> QueueTimerOnce(std::function<void(void*)> callback,
void* userdata, void* userdata,
WaitItem::clock::time_point due) { WaitItem::clock::time_point due) {

View File

@ -78,7 +78,8 @@ class Backend {
virtual void InitializeBackendContext(void* ctx) {} virtual void InitializeBackendContext(void* ctx) {}
/* /*
Free any dynamically allocated data/resources that the backendcontext uses Free any dynamically allocated data/resources that the backendcontext
uses
*/ */
virtual void DeinitializeBackendContext(void* ctx) {} virtual void DeinitializeBackendContext(void* ctx) {}
virtual void SetGuestRoundingMode(void* ctx, unsigned int mode){}; virtual void SetGuestRoundingMode(void* ctx, unsigned int mode){};

View File

@ -314,7 +314,6 @@ SIMPLE_THREEOPERAND(vpshaw, xop_VPSHAW)
SIMPLE_THREEOPERAND(vpshad, xop_VPSHAD) SIMPLE_THREEOPERAND(vpshad, xop_VPSHAD)
SIMPLE_THREEOPERAND(vpshaq, xop_VPSHAQ) SIMPLE_THREEOPERAND(vpshaq, xop_VPSHAQ)
SIMPLE_THREEOPERAND(vpshlb, xop_VPSHLB) SIMPLE_THREEOPERAND(vpshlb, xop_VPSHLB)
SIMPLE_THREEOPERAND(vpshlw, xop_VPSHLW) SIMPLE_THREEOPERAND(vpshlw, xop_VPSHLW)
SIMPLE_THREEOPERAND(vpshld, xop_VPSHLD) SIMPLE_THREEOPERAND(vpshld, xop_VPSHLD)

View File

@ -924,7 +924,8 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
Xbyak::Label L18, L2, L35, L4, L9, L8, L10, L11, L12, L13, L1; Xbyak::Label L18, L2, L35, L4, L9, L8, L10, L11, L12, L13, L1;
Xbyak::Label LC1, _LCPI3_1; Xbyak::Label LC1, _LCPI3_1;
Xbyak::Label handle_denormal_input; Xbyak::Label handle_denormal_input;
Xbyak::Label specialcheck_1, convert_to_signed_inf_and_ret, handle_oddball_denormal; Xbyak::Label specialcheck_1, convert_to_signed_inf_and_ret,
handle_oddball_denormal;
auto emulate_lzcnt_helper_unary_reg = [this](auto& reg, auto& scratch_reg) { auto emulate_lzcnt_helper_unary_reg = [this](auto& reg, auto& scratch_reg) {
inLocalLabel(); inLocalLabel();
@ -941,19 +942,19 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
vmovd(r8d, xmm0); vmovd(r8d, xmm0);
vmovaps(xmm1, xmm0); vmovaps(xmm1, xmm0);
mov(ecx, r8d); mov(ecx, r8d);
//extract mantissa // extract mantissa
and_(ecx, 0x7fffff); and_(ecx, 0x7fffff);
mov(edx, ecx); mov(edx, ecx);
cmp(r8d, 0xff800000); cmp(r8d, 0xff800000);
jz(specialcheck_1, CodeGenerator::T_NEAR); jz(specialcheck_1, CodeGenerator::T_NEAR);
//is exponent zero? // is exponent zero?
test(r8d, 0x7f800000); test(r8d, 0x7f800000);
jne(L18); jne(L18);
test(ecx, ecx); test(ecx, ecx);
jne(L2); jne(L2);
L(L18); L(L18);
//extract biased exponent and unbias // extract biased exponent and unbias
mov(r9d, r8d); mov(r9d, r8d);
shr(r9d, 23); shr(r9d, 23);
movzx(r9d, r9b); movzx(r9d, r9b);
@ -988,7 +989,7 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
vxorps(xmm0, xmm0, xmm0); vxorps(xmm0, xmm0, xmm0);
vcomiss(xmm0, xmm1); vcomiss(xmm0, xmm1);
jbe(L9); jbe(L9);
vmovss(xmm2, ptr[rip+LC1]); vmovss(xmm2, ptr[rip + LC1]);
vandps(xmm1, GetXmmConstPtr(XMMSignMaskF32)); vandps(xmm1, GetXmmConstPtr(XMMSignMaskF32));
test(edx, edx); test(edx, edx);
@ -1019,7 +1020,7 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
L(L11); L(L11);
vxorps(xmm2, xmm2, xmm2); vxorps(xmm2, xmm2, xmm2);
vmovss(xmm0, ptr[rip+LC1]); vmovss(xmm0, ptr[rip + LC1]);
vcomiss(xmm2, xmm1); vcomiss(xmm2, xmm1);
ja(L1, CodeGenerator::T_NEAR); ja(L1, CodeGenerator::T_NEAR);
mov(ecx, 127); mov(ecx, 127);
@ -1080,7 +1081,7 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
or_(ecx, r8d); or_(ecx, r8d);
or_(ecx, eax); or_(ecx, eax);
vmovd(xmm0, ecx); vmovd(xmm0, ecx);
vaddss(xmm0, xmm1);//apply DAZ behavior to output vaddss(xmm0, xmm1); // apply DAZ behavior to output
L(L1); L(L1);
ret(); ret();
@ -1107,7 +1108,8 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
xchg(ecx, edx); xchg(ecx, edx);
// esi is just the value of xmm0's low word, so we can restore it from there // esi is just the value of xmm0's low word, so we can restore it from there
shl(r8d, cl); shl(r8d, cl);
mov(ecx, edx); // restore ecx, dont xchg because we're going to spoil edx anyway mov(ecx,
edx); // restore ecx, dont xchg because we're going to spoil edx anyway
mov(edx, r8d); mov(edx, r8d);
vmovd(r8d, xmm0); vmovd(r8d, xmm0);
} }
@ -1115,8 +1117,8 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
jmp(L4); jmp(L4);
L(specialcheck_1); L(specialcheck_1);
//should be extremely rare // should be extremely rare
vmovss(xmm0, ptr[rip+LC1]); vmovss(xmm0, ptr[rip + LC1]);
ret(); ret();
L(handle_oddball_denormal); L(handle_oddball_denormal);
@ -1131,7 +1133,8 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
dd(0xFF800000); dd(0xFF800000);
dd(0x7F800000); dd(0x7F800000);
L(LC1); L(LC1);
//the position of 7FC00000 here matters, this address will be indexed in handle_oddball_denormal // the position of 7FC00000 here matters, this address will be indexed in
// handle_oddball_denormal
dd(0x7FC00000); dd(0x7FC00000);
dd(0x5F34FD00); dd(0x5F34FD00);
@ -1148,11 +1151,13 @@ void* X64HelperEmitter::EmitVectorVRsqrteHelper(void* scalar_helper) {
Xbyak::Label check_scalar_operation_in_vmx, actual_vector_version; Xbyak::Label check_scalar_operation_in_vmx, actual_vector_version;
auto result_ptr = auto result_ptr =
GetBackendCtxPtr(offsetof(X64BackendContext, helper_scratch_xmms[0])); GetBackendCtxPtr(offsetof(X64BackendContext, helper_scratch_xmms[0]));
auto counter_ptr = GetBackendCtxPtr(offsetof(X64BackendContext, helper_scratch_u64s[2])); auto counter_ptr =
GetBackendCtxPtr(offsetof(X64BackendContext, helper_scratch_u64s[2]));
counter_ptr.setBit(64); counter_ptr.setBit(64);
//shuffle and xor to check whether all lanes are equal // shuffle and xor to check whether all lanes are equal
//sadly has to leave the float pipeline for the vptest, which is moderate yikes // sadly has to leave the float pipeline for the vptest, which is moderate
// yikes
vmovhlps(xmm2, xmm0, xmm0); vmovhlps(xmm2, xmm0, xmm0);
vmovsldup(xmm1, xmm0); vmovsldup(xmm1, xmm0);
vxorps(xmm1, xmm1, xmm0); vxorps(xmm1, xmm1, xmm0);
@ -1160,7 +1165,7 @@ void* X64HelperEmitter::EmitVectorVRsqrteHelper(void* scalar_helper) {
vorps(xmm2, xmm1, xmm2); vorps(xmm2, xmm1, xmm2);
vptest(xmm2, xmm2); vptest(xmm2, xmm2);
jnz(check_scalar_operation_in_vmx); jnz(check_scalar_operation_in_vmx);
//jmp(scalar_helper, CodeGenerator::T_NEAR); // jmp(scalar_helper, CodeGenerator::T_NEAR);
call(scalar_helper); call(scalar_helper);
vshufps(xmm0, xmm0, xmm0, 0); vshufps(xmm0, xmm0, xmm0, 0);
ret(); ret();
@ -1169,7 +1174,7 @@ void* X64HelperEmitter::EmitVectorVRsqrteHelper(void* scalar_helper) {
vptest(xmm0, ptr[backend()->LookupXMMConstantAddress(XMMThreeFloatMask)]); vptest(xmm0, ptr[backend()->LookupXMMConstantAddress(XMMThreeFloatMask)]);
jnz(actual_vector_version); jnz(actual_vector_version);
vshufps(xmm0, xmm0,xmm0, _MM_SHUFFLE(3, 3, 3, 3)); vshufps(xmm0, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
call(scalar_helper); call(scalar_helper);
// this->DebugBreak(); // this->DebugBreak();
vinsertps(xmm0, xmm0, (3 << 4)); vinsertps(xmm0, xmm0, (3 << 4));
@ -1189,11 +1194,11 @@ void* X64HelperEmitter::EmitVectorVRsqrteHelper(void* scalar_helper) {
L(loop); L(loop);
lea(rax, result_ptr); lea(rax, result_ptr);
vmovss(xmm0, ptr[rax+rcx*4]); vmovss(xmm0, ptr[rax + rcx * 4]);
call(scalar_helper); call(scalar_helper);
mov(rcx, counter_ptr); mov(rcx, counter_ptr);
lea(rax, result_ptr); lea(rax, result_ptr);
vmovss(ptr[rax+rcx*4], xmm0); vmovss(ptr[rax + rcx * 4], xmm0);
inc(ecx); inc(ecx);
cmp(ecx, 4); cmp(ecx, 4);
mov(counter_ptr, rcx); mov(counter_ptr, rcx);
@ -1274,7 +1279,7 @@ void* X64HelperEmitter::EmitFrsqrteHelper() {
xor_(eax, 8); xor_(eax, 8);
sub(edx, ecx); sub(edx, ecx);
lea(rcx, ptr[rip + frsqrte_table2]); lea(rcx, ptr[rip + frsqrte_table2]);
movzx(eax, byte[rax+rcx]); movzx(eax, byte[rax + rcx]);
sal(rdx, 52); sal(rdx, 52);
sal(rax, 44); sal(rax, 44);
or_(rax, rdx); or_(rax, rdx);

View File

@ -12,8 +12,8 @@
#include <memory> #include <memory>
#include "xenia/base/cvar.h"
#include "xenia/base/bit_map.h" #include "xenia/base/bit_map.h"
#include "xenia/base/cvar.h"
#include "xenia/cpu/backend/backend.h" #include "xenia/cpu/backend/backend.h"
#if XE_PLATFORM_WIN32 == 1 #if XE_PLATFORM_WIN32 == 1
@ -44,9 +44,10 @@ typedef void* (*GuestToHostThunk)(void* target, void* arg0, void* arg1);
typedef void (*ResolveFunctionThunk)(); typedef void (*ResolveFunctionThunk)();
/* /*
place guest trampolines in the memory range that the HV normally occupies. place guest trampolines in the memory range that the HV normally occupies.
This way guests can call in via the indirection table and we don't have to clobber/reuse an existing memory range This way guests can call in via the indirection table and we don't have to
The xboxkrnl range is already used by export trampolines (see kernel/kernel_module.cc) clobber/reuse an existing memory range The xboxkrnl range is already used by
export trampolines (see kernel/kernel_module.cc)
*/ */
static constexpr uint32_t GUEST_TRAMPOLINE_BASE = 0x80000000; static constexpr uint32_t GUEST_TRAMPOLINE_BASE = 0x80000000;
static constexpr uint32_t GUEST_TRAMPOLINE_END = 0x80040000; static constexpr uint32_t GUEST_TRAMPOLINE_END = 0x80040000;
@ -75,11 +76,13 @@ struct X64BackendStackpoint {
// use // use
unsigned guest_return_address_; unsigned guest_return_address_;
}; };
enum : uint32_t { enum : uint32_t {
kX64BackendMXCSRModeBit = 0, kX64BackendMXCSRModeBit = 0,
kX64BackendHasReserveBit = 1, kX64BackendHasReserveBit = 1,
kX64BackendNJMOn = 2, //non-java mode bit is currently set. for use in software fp routines kX64BackendNJMOn =
kX64BackendNonIEEEMode = 3, //non-ieee mode is currently enabled for scalar fpu. 2, // non-java mode bit is currently set. for use in software fp routines
kX64BackendNonIEEEMode =
3, // non-ieee mode is currently enabled for scalar fpu.
}; };
// located prior to the ctx register // located prior to the ctx register
// some things it would be nice to have be per-emulator instance instead of per // some things it would be nice to have be per-emulator instance instead of per
@ -170,8 +173,8 @@ class X64Backend : public Backend {
reinterpret_cast<intptr_t>(ctx) - sizeof(X64BackendContext)); reinterpret_cast<intptr_t>(ctx) - sizeof(X64BackendContext));
} }
virtual uint32_t CreateGuestTrampoline(GuestTrampolineProc proc, virtual uint32_t CreateGuestTrampoline(GuestTrampolineProc proc,
void* userdata1, void* userdata1, void* userdata2,
void* userdata2, bool long_term) override; bool long_term) override;
virtual void FreeGuestTrampoline(uint32_t trampoline_addr) override; virtual void FreeGuestTrampoline(uint32_t trampoline_addr) override;
virtual void SetGuestRoundingMode(void* ctx, unsigned int mode) override; virtual void SetGuestRoundingMode(void* ctx, unsigned int mode) override;
@ -213,6 +216,7 @@ class X64Backend : public Backend {
void* vrsqrtefp_vector_helper = nullptr; void* vrsqrtefp_vector_helper = nullptr;
void* vrsqrtefp_scalar_helper = nullptr; void* vrsqrtefp_scalar_helper = nullptr;
void* frsqrtefp_helper = nullptr; void* frsqrtefp_helper = nullptr;
private: private:
#if XE_X64_PROFILER_AVAILABLE == 1 #if XE_X64_PROFILER_AVAILABLE == 1
GuestProfilerData profiler_data_; GuestProfilerData profiler_data_;

View File

@ -93,7 +93,8 @@ class X64CodeCache : public CodeCache {
// This is picked to be high enough to cover whatever we can reasonably // This is picked to be high enough to cover whatever we can reasonably
// expect. If we hit issues with this it probably means some corner case // expect. If we hit issues with this it probably means some corner case
// in analysis triggering. // in analysis triggering.
//chrispy: raised this, some games that were compiled with low optimization levels can exceed this // chrispy: raised this, some games that were compiled with low optimization
// levels can exceed this
static const size_t kMaximumFunctionCount = 1000000; static const size_t kMaximumFunctionCount = 1000000;
struct UnwindReservation { struct UnwindReservation {

View File

@ -213,7 +213,8 @@ Win32X64CodeCache::RequestUnwindReservation(uint8_t* entry_address) {
if (unwind_table_count_ >= kMaximumFunctionCount) { if (unwind_table_count_ >= kMaximumFunctionCount) {
// we should not just be ignoring this in release if it happens // we should not just be ignoring this in release if it happens
xe::FatalError( xe::FatalError(
"Unwind table count (unwind_table_count_) exceeded maximum! Please report this to " "Unwind table count (unwind_table_count_) exceeded maximum! Please "
"report this to "
"Xenia/Canary developers"); "Xenia/Canary developers");
} }
#else #else

View File

@ -210,24 +210,27 @@ bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
// Adding or changing anything here must be matched! // Adding or changing anything here must be matched!
/* /*
pick a page to use as the local base as close to the commonly accessed page that contains most backend fields pick a page to use as the local base as close to the commonly accessed page
the sizes that are checked are chosen based on PTE coalescing sizes. zen does 16k or 32k that contains most backend fields the sizes that are checked are chosen
based on PTE coalescing sizes. zen does 16k or 32k
*/ */
size_t stack_size = StackLayout::GUEST_STACK_SIZE; size_t stack_size = StackLayout::GUEST_STACK_SIZE;
if (stack_offset < (4096 - sizeof(X64BackendContext))) { if (stack_offset < (4096 - sizeof(X64BackendContext))) {
locals_page_delta_ = 4096; locals_page_delta_ = 4096;
} else if (stack_offset < (16384 - sizeof(X64BackendContext))) {//16k PTE coalescing } else if (stack_offset <
(16384 - sizeof(X64BackendContext))) { // 16k PTE coalescing
locals_page_delta_ = 16384; locals_page_delta_ = 16384;
} else if (stack_offset < (32768 - sizeof(X64BackendContext))) { } else if (stack_offset < (32768 - sizeof(X64BackendContext))) {
locals_page_delta_ = 32768; locals_page_delta_ = 32768;
} else if (stack_offset < (65536 - sizeof(X64BackendContext))) { } else if (stack_offset < (65536 - sizeof(X64BackendContext))) {
locals_page_delta_ = 65536; locals_page_delta_ = 65536;
} else { } else {
//extremely unlikely, fall back to stack // extremely unlikely, fall back to stack
stack_size = xe::align<size_t>(StackLayout::GUEST_STACK_SIZE + stack_offset, 16); stack_size =
xe::align<size_t>(StackLayout::GUEST_STACK_SIZE + stack_offset, 16);
locals_page_delta_ = 0; locals_page_delta_ = 0;
} }
assert_true((stack_size + 8) % 16 == 0); assert_true((stack_size + 8) % 16 == 0);
func_info.stack_size = stack_size; func_info.stack_size = stack_size;
stack_size_ = stack_size; stack_size_ = stack_size;
@ -1002,7 +1005,7 @@ static inline vec128_t v128_setr_bytes(unsigned char v0, unsigned char v1,
} }
static inline vec128_t v128_setr_words(uint32_t v0, uint32_t v1, uint32_t v2, static inline vec128_t v128_setr_words(uint32_t v0, uint32_t v1, uint32_t v2,
uint32_t v3) { uint32_t v3) {
vec128_t result; vec128_t result;
result.u32[0] = v0; result.u32[0] = v0;
result.u32[1] = v1; result.u32[1] = v1;
@ -1181,7 +1184,7 @@ static const vec128_t xmm_consts[] = {
v128_setr_bytes(13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 0x80), v128_setr_bytes(13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 0x80),
// XMMVSRMask // XMMVSRMask
vec128b(1), vec128b(1),
//XMMVRsqrteTableStart // XMMVRsqrteTableStart
v128_setr_words(0x568B4FD, 0x4F3AF97, 0x48DAAA5, 0x435A618), v128_setr_words(0x568B4FD, 0x4F3AF97, 0x48DAAA5, 0x435A618),
v128_setr_words(0x3E7A1E4, 0x3A29DFE, 0x3659A5C, 0x32E96F8), v128_setr_words(0x3E7A1E4, 0x3A29DFE, 0x3659A5C, 0x32E96F8),
v128_setr_words(0x2FC93CA, 0x2D090CE, 0x2A88DFE, 0x2838B57), v128_setr_words(0x2FC93CA, 0x2D090CE, 0x2A88DFE, 0x2838B57),
@ -1190,8 +1193,8 @@ static const vec128_t xmm_consts[] = {
v128_setr_words(0x2C27279, 0x2926FB7, 0x2666D26, 0x23F6AC0), v128_setr_words(0x2C27279, 0x2926FB7, 0x2666D26, 0x23F6AC0),
v128_setr_words(0x21D6881, 0x1FD6665, 0x1E16468, 0x1C76287), v128_setr_words(0x21D6881, 0x1FD6665, 0x1E16468, 0x1C76287),
v128_setr_words(0x1AF60C1, 0x1995F12, 0x1855D79, 0x1735BF4), v128_setr_words(0x1AF60C1, 0x1995F12, 0x1855D79, 0x1735BF4),
//XMMVRsqrteTableBase // XMMVRsqrteTableBase
vec128i(0) //filled in later vec128i(0) // filled in later
}; };
void* X64Emitter::FindByteConstantOffset(unsigned bytevalue) { void* X64Emitter::FindByteConstantOffset(unsigned bytevalue) {
@ -1267,12 +1270,13 @@ uintptr_t X64Emitter::PlaceConstData() {
std::memcpy(mem, xmm_consts, sizeof(xmm_consts)); std::memcpy(mem, xmm_consts, sizeof(xmm_consts));
/* /*
set each 32-bit element of the constant XMMVRsqrteTableBase to be the address of the start of the constant XMMVRsqrteTableStart set each 32-bit element of the constant XMMVRsqrteTableBase to be the
this address of the start of the constant XMMVRsqrteTableStart this
*/ */
vec128_t* deferred_constants = reinterpret_cast<vec128_t*>(mem); vec128_t* deferred_constants = reinterpret_cast<vec128_t*>(mem);
vec128_t* vrsqrte_table_base = &deferred_constants[XMMVRsqrteTableBase]; vec128_t* vrsqrte_table_base = &deferred_constants[XMMVRsqrteTableBase];
uint32_t ptr_to_vrsqrte_table32 = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(&deferred_constants[XMMVRsqrteTableStart])); uint32_t ptr_to_vrsqrte_table32 = static_cast<uint32_t>(
reinterpret_cast<uintptr_t>(&deferred_constants[XMMVRsqrteTableStart]));
*vrsqrte_table_base = vec128i(ptr_to_vrsqrte_table32); *vrsqrte_table_base = vec128i(ptr_to_vrsqrte_table32);
memory::Protect(mem, kConstDataSize, memory::PageAccess::kReadOnly, nullptr); memory::Protect(mem, kConstDataSize, memory::PageAccess::kReadOnly, nullptr);
@ -1288,8 +1292,10 @@ void X64Emitter::FreeConstData(uintptr_t data) {
Xbyak::Address X64Emitter::GetXmmConstPtr(XmmConst id) { Xbyak::Address X64Emitter::GetXmmConstPtr(XmmConst id) {
// Load through fixed constant table setup by PlaceConstData. // Load through fixed constant table setup by PlaceConstData.
// It's important that the pointer is not signed, as it will be sign-extended. // It's important that the pointer is not signed, as it will be sign-extended.
void* emitter_data_ptr = backend_->LookupXMMConstantAddress(static_cast<unsigned>(id)); void* emitter_data_ptr =
xenia_assert(reinterpret_cast<uintptr_t>(emitter_data_ptr) < (1ULL << 31));//must not have signbit set backend_->LookupXMMConstantAddress(static_cast<unsigned>(id));
xenia_assert(reinterpret_cast<uintptr_t>(emitter_data_ptr) <
(1ULL << 31)); // must not have signbit set
return ptr[emitter_data_ptr]; return ptr[emitter_data_ptr];
} }
// Implies possible StashXmm(0, ...)! // Implies possible StashXmm(0, ...)!
@ -1610,8 +1616,8 @@ SimdDomain X64Emitter::DeduceSimdDomain(const hir::Value* for_value) {
return SimdDomain::DONTCARE; return SimdDomain::DONTCARE;
} }
Xbyak::RegExp X64Emitter::GetLocalsBase() const { Xbyak::RegExp X64Emitter::GetLocalsBase() const {
return !locals_page_delta_ ? rsp : GetContextReg() - locals_page_delta_; return !locals_page_delta_ ? rsp : GetContextReg() - locals_page_delta_;
} }
Xbyak::Address X64Emitter::GetBackendCtxPtr(int offset_in_x64backendctx) const { Xbyak::Address X64Emitter::GetBackendCtxPtr(int offset_in_x64backendctx) const {
/* /*

View File

@ -176,7 +176,10 @@ enum XmmConst {
XMMVSRShlByteshuf, XMMVSRShlByteshuf,
XMMVSRMask, XMMVSRMask,
XMMVRsqrteTableStart, XMMVRsqrteTableStart,
XMMVRsqrteTableBase = XMMVRsqrteTableStart + (32 / 4), //32 4-byte elements in table, 4 4-byte elements fit in each xmm XMMVRsqrteTableBase =
XMMVRsqrteTableStart +
(32 /
4), // 32 4-byte elements in table, 4 4-byte elements fit in each xmm
}; };
using amdfx::xopcompare_e; using amdfx::xopcompare_e;
@ -311,7 +314,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
size_t stack_size() const { return stack_size_; } size_t stack_size() const { return stack_size_; }
Xbyak::RegExp GetLocalsBase() const; Xbyak::RegExp GetLocalsBase() const;
SimdDomain DeduceSimdDomain(const hir::Value* for_value); SimdDomain DeduceSimdDomain(const hir::Value* for_value);
void ForgetMxcsrMode() { mxcsr_mode_ = MXCSRMode::Unknown; } void ForgetMxcsrMode() { mxcsr_mode_ = MXCSRMode::Unknown; }
/* /*
returns true if had to load mxcsr. DOT_PRODUCT can use this to skip returns true if had to load mxcsr. DOT_PRODUCT can use this to skip
@ -390,6 +393,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
void EmitGetCurrentThreadId(); void EmitGetCurrentThreadId();
void EmitTraceUserCallReturn(); void EmitTraceUserCallReturn();
static void HandleStackpointOverflowError(ppc::PPCContext* context); static void HandleStackpointOverflowError(ppc::PPCContext* context);
protected: protected:
Processor* processor_ = nullptr; Processor* processor_ = nullptr;
X64Backend* backend_ = nullptr; X64Backend* backend_ = nullptr;

View File

@ -398,8 +398,7 @@ struct I<OPCODE, DEST, SRC1, SRC2, SRC3> : DestField<DEST> {
}; };
template <typename T> template <typename T>
XE_MAYBE_UNUSED XE_MAYBE_UNUSED static const T GetTempReg(X64Emitter& e);
static const T GetTempReg(X64Emitter& e);
template <> template <>
XE_MAYBE_UNUSED const Reg8 GetTempReg<Reg8>(X64Emitter& e) { XE_MAYBE_UNUSED const Reg8 GetTempReg<Reg8>(X64Emitter& e) {
return e.al; return e.al;

View File

@ -705,7 +705,8 @@ struct STORE_LOCAL_I16
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
// e.TraceStoreI16(DATA_LOCAL, i.src1.constant, i.src2); // e.TraceStoreI16(DATA_LOCAL, i.src1.constant, i.src2);
if (LocalStoreMayUseMembaseLow(e, i)) { if (LocalStoreMayUseMembaseLow(e, i)) {
e.mov(e.word[e.GetLocalsBase() + i.src1.constant()], e.GetMembaseReg().cvt16()); e.mov(e.word[e.GetLocalsBase() + i.src1.constant()],
e.GetMembaseReg().cvt16());
} else { } else {
e.mov(e.word[e.GetLocalsBase() + i.src1.constant()], i.src2); e.mov(e.word[e.GetLocalsBase() + i.src1.constant()], i.src2);
} }
@ -716,7 +717,8 @@ struct STORE_LOCAL_I32
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
// e.TraceStoreI32(DATA_LOCAL, i.src1.constant, i.src2); // e.TraceStoreI32(DATA_LOCAL, i.src1.constant, i.src2);
if (LocalStoreMayUseMembaseLow(e, i)) { if (LocalStoreMayUseMembaseLow(e, i)) {
e.mov(e.dword[e.GetLocalsBase() + i.src1.constant()], e.GetMembaseReg().cvt32()); e.mov(e.dword[e.GetLocalsBase() + i.src1.constant()],
e.GetMembaseReg().cvt32());
} else { } else {
e.mov(e.dword[e.GetLocalsBase() + i.src1.constant()], i.src2); e.mov(e.dword[e.GetLocalsBase() + i.src1.constant()], i.src2);
} }

View File

@ -2120,9 +2120,9 @@ struct RSQRT_V128 : Sequence<RSQRT_V128, I<OPCODE_RSQRT, V128Op, V128Op>> {
e.ChangeMxcsrMode(MXCSRMode::Vmx); e.ChangeMxcsrMode(MXCSRMode::Vmx);
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm3); Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm3);
/* /*
the vast majority of inputs to vrsqrte come from vmsum3 or vmsum4 as part the vast majority of inputs to vrsqrte come from vmsum3 or vmsum4 as
of a vector normalization sequence. in fact, its difficult to find uses of vrsqrte in titles part of a vector normalization sequence. in fact, its difficult to find
that have inputs which do not come from vmsum. uses of vrsqrte in titles that have inputs which do not come from vmsum.
*/ */
if (i.src1.value && i.src1.value->AllFloatVectorLanesSameValue()) { if (i.src1.value && i.src1.value->AllFloatVectorLanesSameValue()) {
e.vmovss(e.xmm0, src1); e.vmovss(e.xmm0, src1);
@ -3193,8 +3193,7 @@ struct SET_ROUNDING_MODE_I32
if (constant_value & 4) { if (constant_value & 4) {
e.or_(flags_ptr, 1U << kX64BackendNonIEEEMode); e.or_(flags_ptr, 1U << kX64BackendNonIEEEMode);
} } else {
else {
e.btr(flags_ptr, kX64BackendNonIEEEMode); e.btr(flags_ptr, kX64BackendNonIEEEMode);
} }
e.mov(e.dword[e.rsp + StackLayout::GUEST_SCRATCH], e.eax); e.mov(e.dword[e.rsp + StackLayout::GUEST_SCRATCH], e.eax);
@ -3202,14 +3201,14 @@ struct SET_ROUNDING_MODE_I32
e.vldmxcsr(e.dword[e.rsp + StackLayout::GUEST_SCRATCH]); e.vldmxcsr(e.dword[e.rsp + StackLayout::GUEST_SCRATCH]);
} else { } else {
//can andnot, but this is a very infrequently used opcode // can andnot, but this is a very infrequently used opcode
e.mov(e.eax, 1U << kX64BackendNonIEEEMode); e.mov(e.eax, 1U << kX64BackendNonIEEEMode);
e.mov(e.edx, e.eax); e.mov(e.edx, e.eax);
e.not_(e.edx); e.not_(e.edx);
e.mov(e.ecx, flags_ptr); e.mov(e.ecx, flags_ptr);
//edx = flags w/ non ieee cleared // edx = flags w/ non ieee cleared
e.and_(e.edx, e.ecx); e.and_(e.edx, e.ecx);
//eax = flags w/ non ieee set // eax = flags w/ non ieee set
e.or_(e.eax, e.ecx); e.or_(e.eax, e.ecx);
e.bt(i.src1, 2); e.bt(i.src1, 2);

View File

@ -122,10 +122,12 @@ class StackLayout {
* *
*/ */
static const size_t GUEST_STACK_SIZE = 104; static const size_t GUEST_STACK_SIZE = 104;
//was GUEST_CTX_HOME, can't remove because that'd throw stack alignment off. instead, can be used as a temporary in sequences // was GUEST_CTX_HOME, can't remove because that'd throw stack alignment off.
// instead, can be used as a temporary in sequences
static const size_t GUEST_SCRATCH = 0; static const size_t GUEST_SCRATCH = 0;
//when profiling is on, this stores the nanosecond time at the start of the function // when profiling is on, this stores the nanosecond time at the start of the
// function
static const size_t GUEST_PROFILER_START = 80; static const size_t GUEST_PROFILER_START = 80;
static const size_t GUEST_RET_ADDR = 88; static const size_t GUEST_RET_ADDR = 88;
static const size_t GUEST_CALL_RET_ADDR = 96; static const size_t GUEST_CALL_RET_ADDR = 96;

View File

@ -29,15 +29,14 @@ namespace x64 {
bool trace_enabled = true; bool trace_enabled = true;
#define THREAD_MATCH \ #define THREAD_MATCH (!TARGET_THREAD || ppc_context->thread_id == TARGET_THREAD)
(!TARGET_THREAD || ppc_context->thread_id == TARGET_THREAD)
#define IFLUSH() #define IFLUSH()
#define IPRINT(s) \ #define IPRINT(s) \
if (trace_enabled && THREAD_MATCH) \ if (trace_enabled && THREAD_MATCH) \
xe::logging::AppendLogLine(xe::LogLevel::Debug, 't', s, xe::LogSrc::Cpu) xe::logging::AppendLogLine(xe::LogLevel::Debug, 't', s, xe::LogSrc::Cpu)
#define DFLUSH() #define DFLUSH()
#define DPRINT(...) \ #define DPRINT(...) \
if (trace_enabled && THREAD_MATCH) \ if (trace_enabled && THREAD_MATCH) \
xe::logging::AppendLogLineFormat(xe::LogSrc::Cpu, xe::LogLevel::Debug, 't', \ xe::logging::AppendLogLineFormat(xe::LogSrc::Cpu, xe::LogLevel::Debug, 't', \
__VA_ARGS__) __VA_ARGS__)

View File

@ -429,7 +429,7 @@ bool RegisterAllocationPass::SpillOneRegister(HIRBuilder* builder, Block* block,
// Set the local slot of the new value to our existing one. This way we will // Set the local slot of the new value to our existing one. This way we will
// reuse that same memory if needed. // reuse that same memory if needed.
new_value->SetLocalSlot( spill_value->GetLocalSlot()); new_value->SetLocalSlot(spill_value->GetLocalSlot());
// Rename all future uses of the SSA value to the new value as loaded // Rename all future uses of the SSA value to the new value as loaded
// from the local. // from the local.

View File

@ -1372,27 +1372,27 @@ bool SimplificationPass::SimplifyVectorOps(hir::Instr* i,
} }
/* /*
splatting a 32-bit value extracted from a vector where all 4 32-bit values are the same should be eliminated and splatting a 32-bit value extracted from a vector where all 4 32-bit values
instead use the vector extracted from, which will be identical are the same should be eliminated and instead use the vector extracted from,
have seen this happen, some games vmsum and then splat the low float to all 4 floats, even though it already is there which will be identical have seen this happen, some games vmsum and then
splat the low float to all 4 floats, even though it already is there
*/ */
if (opc == OPCODE_SPLAT) { if (opc == OPCODE_SPLAT) {
if (i->dest->type == VEC128_TYPE) { if (i->dest->type == VEC128_TYPE) {
auto splatted_value = i->src1.value; auto splatted_value = i->src1.value;
auto splat_type = splatted_value->type; auto splat_type = splatted_value->type;
if (splat_type == FLOAT32_TYPE || splat_type == INT32_TYPE) { if (splat_type == FLOAT32_TYPE || splat_type == INT32_TYPE) {
//its a splat of a fourbyte value, check the definition // its a splat of a fourbyte value, check the definition
auto splat_input_definition = splatted_value->GetDefSkipAssigns(); auto splat_input_definition = splatted_value->GetDefSkipAssigns();
if (splat_input_definition) { if (splat_input_definition) {
auto defining_opcode = splat_input_definition->GetOpcodeNum(); auto defining_opcode = splat_input_definition->GetOpcodeNum();
if (defining_opcode == OPCODE_EXTRACT) { if (defining_opcode == OPCODE_EXTRACT) {
auto value_extracted_from = splat_input_definition->src1.value; auto value_extracted_from = splat_input_definition->src1.value;
if (value_extracted_from->type == VEC128_TYPE) { if (value_extracted_from->type == VEC128_TYPE) {
xenia_assert(splat_input_definition->dest->type == splat_type); xenia_assert(splat_input_definition->dest->type == splat_type);
if (value_extracted_from->AllFloatVectorLanesSameValue()) { if (value_extracted_from->AllFloatVectorLanesSameValue()) {
i->Replace(&OPCODE_ASSIGN_info,0); i->Replace(&OPCODE_ASSIGN_info, 0);
i->set_src1(value_extracted_from); i->set_src1(value_extracted_from);
return true; return true;
} }

View File

@ -36,7 +36,7 @@ class SimplificationPass : public ConditionalGroupSubpass {
// handles simple multiplication/addition rules // handles simple multiplication/addition rules
bool SimplifyBasicArith(hir::HIRBuilder* builder); bool SimplifyBasicArith(hir::HIRBuilder* builder);
bool SimplifyVectorOps(hir::HIRBuilder* builder); bool SimplifyVectorOps(hir::HIRBuilder* builder);
bool SimplifyVectorOps(hir::Instr* i, hir::HIRBuilder* builder); bool SimplifyVectorOps(hir::Instr* i, hir::HIRBuilder* builder);
bool SimplifyBasicArith(hir::Instr* i, hir::HIRBuilder* builder); bool SimplifyBasicArith(hir::Instr* i, hir::HIRBuilder* builder);
bool SimplifyAddWithSHL(hir::Instr* i, hir::HIRBuilder* builder); bool SimplifyAddWithSHL(hir::Instr* i, hir::HIRBuilder* builder);

View File

@ -49,7 +49,7 @@ class EntryTable {
xe::global_critical_region global_critical_region_; xe::global_critical_region global_critical_region_;
// TODO(benvanik): replace with a better data structure. // TODO(benvanik): replace with a better data structure.
xe::split_map<uint32_t, Entry*> map_; xe::split_map<uint32_t, Entry*> map_;
//std::unordered_map<uint32_t, Entry*> map_; // std::unordered_map<uint32_t, Entry*> map_;
}; };
} // namespace cpu } // namespace cpu

View File

@ -95,7 +95,6 @@ class Export {
uint32_t variable_ptr; uint32_t variable_ptr;
struct { struct {
// Trampoline that is called from the guest-to-host thunk. // Trampoline that is called from the guest-to-host thunk.
// Expects only PPC context as first arg. // Expects only PPC context as first arg.
ExportTrampoline trampoline; ExportTrampoline trampoline;

View File

@ -115,7 +115,6 @@ uintptr_t GuestFunction::MapGuestAddressToMachineCode(
return reinterpret_cast<uintptr_t>(machine_code()) + entry->code_offset; return reinterpret_cast<uintptr_t>(machine_code()) + entry->code_offset;
} else { } else {
return 0; return 0;
} }
} }

View File

@ -79,10 +79,11 @@ class Instr {
void MoveBefore(Instr* other); void MoveBefore(Instr* other);
void Replace(const OpcodeInfo* new_opcode, uint16_t new_flags); void Replace(const OpcodeInfo* new_opcode, uint16_t new_flags);
void UnlinkAndNOP(); void UnlinkAndNOP();
//chrispy: wanted to change this one to Remove, but i changed Remove's name to UnlinkAndNOP, // chrispy: wanted to change this one to Remove, but i changed Remove's name
//so if changes happened in master that we wanted to port over, and those changes used Remove, then we would have a lot of issues that the cause of would // to UnlinkAndNOP, so if changes happened in master that we wanted to port
//be difficult to track // over, and those changes used Remove, then we would have a lot of issues
//^todo: rework this comment, im frazzled // that the cause of would be difficult to track ^todo: rework this comment,
// im frazzled
void Deallocate(); void Deallocate();
const OpcodeInfo* GetOpcodeInfo() const { return opcode; } const OpcodeInfo* GetOpcodeInfo() const { return opcode; }
// if opcode is null, we have bigger problems // if opcode is null, we have bigger problems

View File

@ -30,9 +30,7 @@ class Label {
// this will later be used as an input to xbyak. xbyak only accepts // this will later be used as an input to xbyak. xbyak only accepts
// std::string as a value, not passed by reference, so precomputing the // std::string as a value, not passed by reference, so precomputing the
// stringification does not help // stringification does not help
std::string GetIdString() { std::string GetIdString() { return std::to_string(id); }
return std::to_string(id);
}
}; };
} // namespace hir } // namespace hir

View File

@ -43,7 +43,7 @@ void Value::RemoveUse(Use* use) {
use->next->prev = use->prev; use->next->prev = use->prev;
} }
//HIRBuilder::GetCurrent()->DeallocateUse(use); // HIRBuilder::GetCurrent()->DeallocateUse(use);
} }
uint32_t Value::AsUint32() { uint32_t Value::AsUint32() {
@ -1805,7 +1805,7 @@ bool Value::AllUsesByOneInsn() const {
return true; return true;
} }
bool Value::AllFloatVectorLanesSameValue(const hir::Value* for_value, bool Value::AllFloatVectorLanesSameValue(const hir::Value* for_value,
uint32_t current_depth) { uint32_t current_depth) {
// limit recursion, otherwise this function will slow down emission // limit recursion, otherwise this function will slow down emission
if (current_depth == 16) { if (current_depth == 16) {
return false; return false;
@ -1819,7 +1819,8 @@ re_enter:
xenia_assert(for_value->IsConstant()); xenia_assert(for_value->IsConstant());
auto&& constant_value = for_value->constant.v128; auto&& constant_value = for_value->constant.v128;
for (unsigned constant_lane_index = 1; constant_lane_index < 4; ++constant_lane_index) { for (unsigned constant_lane_index = 1; constant_lane_index < 4;
++constant_lane_index) {
if (constant_value.u32[0] != constant_value.u32[constant_lane_index]) { if (constant_value.u32[0] != constant_value.u32[constant_lane_index]) {
return false; return false;
} }
@ -1844,9 +1845,10 @@ re_enter:
definition_opcode_number == OPCODE_DOT_PRODUCT_3) { definition_opcode_number == OPCODE_DOT_PRODUCT_3) {
return true; return true;
} }
//if splat of 32-bit value type, return true // if splat of 32-bit value type, return true
//technically a splat of int16 or int8 would also produce the same "float" in all lanes // technically a splat of int16 or int8 would also produce the same "float" in
//but i think its best to keep this function focused on specifically float data // all lanes but i think its best to keep this function focused on
// specifically float data
if (definition_opcode_number == OPCODE_SPLAT) { if (definition_opcode_number == OPCODE_SPLAT) {
if (definition->dest->type == VEC128_TYPE) { if (definition->dest->type == VEC128_TYPE) {
auto splat_src_value_type = definition->src1.value->type; auto splat_src_value_type = definition->src1.value->type;
@ -1857,33 +1859,32 @@ re_enter:
} }
} }
switch (definition_opcode_number) { switch (definition_opcode_number) {
//all of these opcodes produce the same value for the same input // all of these opcodes produce the same value for the same input
case OPCODE_RSQRT: case OPCODE_RSQRT:
case OPCODE_RECIP: case OPCODE_RECIP:
case OPCODE_POW2: case OPCODE_POW2:
case OPCODE_LOG2: case OPCODE_LOG2:
for_value = definition->src1.value; for_value = definition->src1.value;
goto re_enter; goto re_enter;
//binary opcodes // binary opcodes
case OPCODE_ADD: case OPCODE_ADD:
case OPCODE_SUB: case OPCODE_SUB:
case OPCODE_MUL: case OPCODE_MUL:
if (!AllFloatVectorLanesSameValue(definition->src1.value, if (!AllFloatVectorLanesSameValue(definition->src1.value,
current_depth + 1)) { current_depth + 1)) {
return false; return false;
} }
for_value = definition->src2.value; for_value = definition->src2.value;
goto re_enter; goto re_enter;
default: default:
break; break;
} }
return false; return false;
} }
} // namespace hir } // namespace hir
} // namespace cpu } // namespace cpu
} // namespace xe } // namespace xe

View File

@ -621,6 +621,7 @@ class Value {
bool AllFloatVectorLanesSameValue() const { bool AllFloatVectorLanesSameValue() const {
return Value::AllFloatVectorLanesSameValue(this); return Value::AllFloatVectorLanesSameValue(this);
} }
private: private:
/* /*
returns true if for_value (which must be VEC128_TYPE) has the same value in returns true if for_value (which must be VEC128_TYPE) has the same value in

View File

@ -48,7 +48,9 @@ class MMIOHandler {
typedef uint32_t (*HostToGuestVirtual)(const void* context, typedef uint32_t (*HostToGuestVirtual)(const void* context,
const void* host_address); const void* host_address);
typedef bool (*AccessViolationCallback)( typedef bool (*AccessViolationCallback)(
global_unique_lock_type global_lock_locked_once, //not passed by reference with const like the others? global_unique_lock_type
global_lock_locked_once, // not passed by reference with const like
// the others?
void* context, void* host_address, bool is_write); void* context, void* host_address, bool is_write);
// access_violation_callback is called with global_critical_region locked once // access_violation_callback is called with global_critical_region locked once

View File

@ -55,6 +55,7 @@ class Module {
bool ReadMap(const char* file_name); bool ReadMap(const char* file_name);
virtual void Precompile() {} virtual void Precompile() {}
protected: protected:
virtual std::unique_ptr<Function> CreateFunction(uint32_t address) = 0; virtual std::unique_ptr<Function> CreateFunction(uint32_t address) = 0;

View File

@ -375,11 +375,11 @@ typedef struct alignas(64) PPCContext_s {
// Most frequently used registers first. // Most frequently used registers first.
uint64_t r[32]; // 0x20 General purpose registers uint64_t r[32]; // 0x20 General purpose registers
uint64_t ctr; // 0x18 Count register uint64_t ctr; // 0x18 Count register
uint64_t lr; // 0x10 Link register uint64_t lr; // 0x10 Link register
uint64_t msr; //machine state register uint64_t msr; // machine state register
double f[32]; // 0x120 Floating-point registers double f[32]; // 0x120 Floating-point registers
vec128_t v[128]; // 0x220 VMX128 vector registers vec128_t v[128]; // 0x220 VMX128 vector registers

View File

@ -46,7 +46,7 @@ struct PPCDecodeData {
uint32_t LEV() const { return bits_.LEV; } uint32_t LEV() const { return bits_.LEV; }
private: private:
XE_MAYBE_UNUSED XE_MAYBE_UNUSED
uint32_t address_; uint32_t address_;
union { union {
uint32_t value_; uint32_t value_;

View File

@ -132,23 +132,23 @@ int InstrEmit_branch(PPCHIRBuilder& f, const char* src, uint64_t cia,
#else #else
{ {
#endif #endif
// Jump to pointer. // Jump to pointer.
bool likely_return = !lk && nia_is_lr; bool likely_return = !lk && nia_is_lr;
if (likely_return) { if (likely_return) {
call_flags |= CALL_POSSIBLE_RETURN; call_flags |= CALL_POSSIBLE_RETURN;
} }
if (cond) { if (cond) {
if (!expect_true) { if (!expect_true) {
cond = f.IsFalse(cond); cond = f.IsFalse(cond);
}
f.CallIndirectTrue(cond, nia, call_flags);
} else {
f.CallIndirect(nia, call_flags);
} }
f.CallIndirectTrue(cond, nia, call_flags);
} else {
f.CallIndirect(nia, call_flags);
} }
} }
}
return 0; return 0;
} // namespace ppc } // namespace ppc
int InstrEmit_bx(PPCHIRBuilder& f, const InstrData& i) { int InstrEmit_bx(PPCHIRBuilder& f, const InstrData& i) {
@ -789,9 +789,8 @@ int InstrEmit_mtspr(PPCHIRBuilder& f, const InstrData& i) {
// code requires it. Sequences of mtmsr/lwar/stcw/mtmsr come up a lot, and // code requires it. Sequences of mtmsr/lwar/stcw/mtmsr come up a lot, and
// without the lock here threads can livelock. // without the lock here threads can livelock.
// 0x400 = debug singlestep i think
//0x400 = debug singlestep i think // ive seen 0x8000 used in kernel code
//ive seen 0x8000 used in kernel code
int InstrEmit_mfmsr(PPCHIRBuilder& f, const InstrData& i) { int InstrEmit_mfmsr(PPCHIRBuilder& f, const InstrData& i) {
// bit 48 = EE; interrupt enabled // bit 48 = EE; interrupt enabled
// bit 62 = RI; recoverable interrupt // bit 62 = RI; recoverable interrupt
@ -806,7 +805,7 @@ int InstrEmit_mtmsr(PPCHIRBuilder& f, const InstrData& i) {
} }
int InstrEmit_mtmsrd(PPCHIRBuilder& f, const InstrData& i) { int InstrEmit_mtmsrd(PPCHIRBuilder& f, const InstrData& i) {
//todo: this is moving msr under a mask, so only writing EE and RI // todo: this is moving msr under a mask, so only writing EE and RI
Value* from = f.LoadGPR(i.X.RT); Value* from = f.LoadGPR(i.X.RT);
Value* mtmsrd_mask = f.LoadConstantUint64((1ULL << 15)); Value* mtmsrd_mask = f.LoadConstantUint64((1ULL << 15));

View File

@ -106,16 +106,17 @@ bool PPCFrontend::Initialize() {
} }
bool PPCFrontend::DeclareFunction(GuestFunction* function) { bool PPCFrontend::DeclareFunction(GuestFunction* function) {
// chrispy: make sure we aren't declaring a function that is actually padding
//chrispy: make sure we aren't declaring a function that is actually padding data, this will mess up PPCScanner and is hard to debug // data, this will mess up PPCScanner and is hard to debug wow, this halo
//wow, this halo reach actually has branches into 0 opcodes, look into further // reach actually has branches into 0 opcodes, look into further
//xenia_assert(*reinterpret_cast<const uint32_t*>( // xenia_assert(*reinterpret_cast<const uint32_t*>(
// this->memory()->TranslateVirtual(function->address())) != 0); // this->memory()->TranslateVirtual(function->address())) !=
// Could scan or something here. // 0);
// Could also check to see if it's a well-known function type and classify // Could scan or something here.
// for later. // Could also check to see if it's a well-known function type and classify
// Could also kick off a precompiler, since we know it's likely the function // for later.
// will be demanded soon. // Could also kick off a precompiler, since we know it's likely the function
// will be demanded soon.
return true; return true;
} }

View File

@ -80,8 +80,10 @@ class PPCHIRBuilder : public hir::HIRBuilder {
void StoreReserved(Value* val); void StoreReserved(Value* val);
Value* LoadReserved(); Value* LoadReserved();
//calls original impl in hirbuilder, but also records the is_return_site bit into flags in the guestmodule // calls original impl in hirbuilder, but also records the is_return_site bit
// into flags in the guestmodule
void SetReturnAddress(Value* value); void SetReturnAddress(Value* value);
private: private:
void MaybeBreakOnInstruction(uint32_t address); void MaybeBreakOnInstruction(uint32_t address);
void AnnotateLabel(uint32_t address, Label* label); void AnnotateLabel(uint32_t address, Label* label);

View File

@ -267,7 +267,7 @@ Function* Processor::ResolveFunction(uint32_t address) {
entry->status = Entry::STATUS_FAILED; entry->status = Entry::STATUS_FAILED;
return nullptr; return nullptr;
} }
//only add it to the list of resolved functions if resolving succeeded // only add it to the list of resolved functions if resolving succeeded
auto module_for = function->module(); auto module_for = function->module();
auto xexmod = dynamic_cast<XexModule*>(module_for); auto xexmod = dynamic_cast<XexModule*>(module_for);
@ -1300,7 +1300,7 @@ uint32_t Processor::GuestAtomicIncrement32(ppc::PPCContext* context,
result = *host_address; result = *host_address;
// todo: should call a processor->backend function that acquires a // todo: should call a processor->backend function that acquires a
// reservation instead of using host atomics // reservation instead of using host atomics
if (xe::atomic_cas(result, xe::byte_swap(xe::byte_swap(result)+1), if (xe::atomic_cas(result, xe::byte_swap(xe::byte_swap(result) + 1),
host_address)) { host_address)) {
break; break;
} }
@ -1316,7 +1316,7 @@ uint32_t Processor::GuestAtomicDecrement32(ppc::PPCContext* context,
result = *host_address; result = *host_address;
// todo: should call a processor->backend function that acquires a // todo: should call a processor->backend function that acquires a
// reservation instead of using host atomics // reservation instead of using host atomics
if (xe::atomic_cas(result,xe::byte_swap( xe::byte_swap(result)-1), if (xe::atomic_cas(result, xe::byte_swap(xe::byte_swap(result) - 1),
host_address)) { host_address)) {
break; break;
} }
@ -1326,9 +1326,9 @@ uint32_t Processor::GuestAtomicDecrement32(ppc::PPCContext* context,
uint32_t Processor::GuestAtomicOr32(ppc::PPCContext* context, uint32_t Processor::GuestAtomicOr32(ppc::PPCContext* context,
uint32_t guest_address, uint32_t mask) { uint32_t guest_address, uint32_t mask) {
return xe::byte_swap(xe::atomic_or( return xe::byte_swap(
context->TranslateVirtual<volatile int32_t*>(guest_address), xe::atomic_or(context->TranslateVirtual<volatile int32_t*>(guest_address),
xe::byte_swap(mask))); xe::byte_swap(mask)));
} }
uint32_t Processor::GuestAtomicXor32(ppc::PPCContext* context, uint32_t Processor::GuestAtomicXor32(ppc::PPCContext* context,
uint32_t guest_address, uint32_t mask) { uint32_t guest_address, uint32_t mask) {

View File

@ -189,11 +189,11 @@ class Processor {
uint32_t GuestAtomicDecrement32(ppc::PPCContext* context, uint32_t GuestAtomicDecrement32(ppc::PPCContext* context,
uint32_t guest_address); uint32_t guest_address);
uint32_t GuestAtomicOr32(ppc::PPCContext* context, uint32_t guest_address, uint32_t GuestAtomicOr32(ppc::PPCContext* context, uint32_t guest_address,
uint32_t mask); uint32_t mask);
uint32_t GuestAtomicXor32(ppc::PPCContext* context, uint32_t guest_address, uint32_t GuestAtomicXor32(ppc::PPCContext* context, uint32_t guest_address,
uint32_t mask); uint32_t mask);
uint32_t GuestAtomicAnd32(ppc::PPCContext* context, uint32_t guest_address, uint32_t GuestAtomicAnd32(ppc::PPCContext* context, uint32_t guest_address,
uint32_t mask); uint32_t mask);
bool GuestAtomicCAS32(ppc::PPCContext* context, uint32_t old_value, bool GuestAtomicCAS32(ppc::PPCContext* context, uint32_t old_value,
uint32_t new_value, uint32_t guest_address); uint32_t new_value, uint32_t guest_address);

View File

@ -77,8 +77,7 @@ ThreadState::ThreadState(Processor* processor, uint32_t thread_id,
// Allocate with 64b alignment. // Allocate with 64b alignment.
context_ = reinterpret_cast<ppc::PPCContext*>( context_ = reinterpret_cast<ppc::PPCContext*>(AllocateContext());
AllocateContext());
processor->backend()->InitializeBackendContext(context_); processor->backend()->InitializeBackendContext(context_);
assert_true(((uint64_t)context_ & 0x3F) == 0); assert_true(((uint64_t)context_ & 0x3F) == 0);
std::memset(context_, 0, sizeof(ppc::PPCContext)); std::memset(context_, 0, sizeof(ppc::PPCContext));
@ -97,9 +96,11 @@ ThreadState::ThreadState(Processor* processor, uint32_t thread_id,
// fixme: VSCR must be set here! // fixme: VSCR must be set here!
context_->msr = 0x9030; // dumped from a real 360, 0x8000 context_->msr = 0x9030; // dumped from a real 360, 0x8000
//this register can be used for arbitrary data according to the PPC docs // this register can be used for arbitrary data according to the PPC docs
//but the suggested use is to mark which vector registers are in use, for faster save/restore // but the suggested use is to mark which vector registers are in use, for
//it seems unlikely anything uses this, especially since we have way more than 32 vrs, but setting it to all ones seems closer to correct than 0 // faster save/restore it seems unlikely anything uses this, especially since
// we have way more than 32 vrs, but setting it to all ones seems closer to
// correct than 0
context_->vrsave = ~0u; context_->vrsave = ~0u;
} }

View File

@ -33,7 +33,7 @@ namespace ui {
class DebugWindow : public cpu::DebugListener { class DebugWindow : public cpu::DebugListener {
public: public:
virtual ~DebugWindow(); virtual ~DebugWindow();
static std::unique_ptr<DebugWindow> Create( static std::unique_ptr<DebugWindow> Create(
Emulator* emulator, xe::ui::WindowedAppContext& app_context); Emulator* emulator, xe::ui::WindowedAppContext& app_context);

View File

@ -230,9 +230,7 @@ class Emulator {
xe::Delegate<> on_exit; xe::Delegate<> on_exit;
private: private:
enum : uint64_t { enum : uint64_t { EmulatorFlagDisclaimerAcknowledged = 1ULL << 0 };
EmulatorFlagDisclaimerAcknowledged = 1ULL << 0
};
static uint64_t GetPersistentEmulatorFlags(); static uint64_t GetPersistentEmulatorFlags();
static void SetPersistentEmulatorFlags(uint64_t new_flags); static void SetPersistentEmulatorFlags(uint64_t new_flags);
static std::string CanonicalizeFileExtension( static std::string CanonicalizeFileExtension(

View File

@ -100,11 +100,14 @@ bool CommandProcessor::Initialize() {
} }
worker_running_ = true; worker_running_ = true;
worker_thread_ = kernel::object_ref<kernel::XHostThread>( worker_thread_ =
new kernel::XHostThread(kernel_state_, 128 * 1024, 0, [this]() { kernel::object_ref<kernel::XHostThread>(new kernel::XHostThread(
WorkerThreadMain(); kernel_state_, 128 * 1024, 0,
return 0; [this]() {
}, kernel_state_->GetIdleProcess())); WorkerThreadMain();
return 0;
},
kernel_state_->GetIdleProcess()));
worker_thread_->set_name("GPU Commands"); worker_thread_->set_name("GPU Commands");
worker_thread_->Create(); worker_thread_->Create();
@ -270,7 +273,8 @@ void CommandProcessor::WorkerThreadMain() {
// TODO(benvanik): use reader->Read_update_freq_ and only issue after moving // TODO(benvanik): use reader->Read_update_freq_ and only issue after moving
// that many indices. // that many indices.
// Keep in mind that the gpu also updates the cpu-side copy if the write pointer and read pointer would be equal // Keep in mind that the gpu also updates the cpu-side copy if the write
// pointer and read pointer would be equal
if (read_ptr_writeback_ptr_) { if (read_ptr_writeback_ptr_) {
xe::store_and_swap<uint32_t>( xe::store_and_swap<uint32_t>(
memory_->TranslatePhysical(read_ptr_writeback_ptr_), read_ptr_index_); memory_->TranslatePhysical(read_ptr_writeback_ptr_), read_ptr_index_);
@ -360,9 +364,8 @@ void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr,
XE_NOINLINE XE_COLD void CommandProcessor::LogKickoffInitator(uint32_t value) { XE_NOINLINE XE_COLD void CommandProcessor::LogKickoffInitator(uint32_t value) {
cpu::backend::GuestPseudoStackTrace st; cpu::backend::GuestPseudoStackTrace st;
if (logging::internal::ShouldLog(LogLevel::Debug) && kernel_state_->processor() if (logging::internal::ShouldLog(LogLevel::Debug) &&
->backend() kernel_state_->processor()->backend()->PopulatePseudoStacktrace(&st)) {
->PopulatePseudoStacktrace(&st)) {
logging::LoggerBatch<LogLevel::Debug> log_initiator{}; logging::LoggerBatch<LogLevel::Debug> log_initiator{};
log_initiator("Updating read ptr to {}, initiator stacktrace below\n", log_initiator("Updating read ptr to {}, initiator stacktrace below\n",
@ -381,7 +384,7 @@ XE_NOINLINE XE_COLD void CommandProcessor::LogKickoffInitator(uint32_t value) {
} }
void CommandProcessor::UpdateWritePointer(uint32_t value) { void CommandProcessor::UpdateWritePointer(uint32_t value) {
XE_UNLIKELY_IF (cvars::log_ringbuffer_kickoff_initiator_bts) { XE_UNLIKELY_IF(cvars::log_ringbuffer_kickoff_initiator_bts) {
LogKickoffInitator(value); LogKickoffInitator(value);
} }
write_ptr_index_ = value; write_ptr_index_ = value;
@ -390,7 +393,8 @@ void CommandProcessor::UpdateWritePointer(uint32_t value) {
void CommandProcessor::LogRegisterSet(uint32_t register_index, uint32_t value) { void CommandProcessor::LogRegisterSet(uint32_t register_index, uint32_t value) {
#if XE_ENABLE_GPU_REG_WRITE_LOGGING == 1 #if XE_ENABLE_GPU_REG_WRITE_LOGGING == 1
if (cvars::log_guest_driven_gpu_register_written_values && logging::internal::ShouldLog(LogLevel::Debug)) { if (cvars::log_guest_driven_gpu_register_written_values &&
logging::internal::ShouldLog(LogLevel::Debug)) {
const RegisterInfo* reginfo = RegisterFile::GetRegisterInfo(register_index); const RegisterInfo* reginfo = RegisterFile::GetRegisterInfo(register_index);
if (!reginfo) { if (!reginfo) {
@ -734,7 +738,6 @@ void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); }
void CommandProcessor::ReturnFromWait() {} void CommandProcessor::ReturnFromWait() {}
void CommandProcessor::InitializeTrace() { void CommandProcessor::InitializeTrace() {
// Write the initial register values, to be loaded directly into the // Write the initial register values, to be loaded directly into the
// RegisterFile since all registers, including those that may have side // RegisterFile since all registers, including those that may have side

View File

@ -225,7 +225,6 @@ class CommandProcessor {
virtual void PrepareForWait(); virtual void PrepareForWait();
virtual void ReturnFromWait(); virtual void ReturnFromWait();
virtual void OnPrimaryBufferEnd() {} virtual void OnPrimaryBufferEnd() {}
#include "pm4_command_processor_declare.h" #include "pm4_command_processor_declare.h"

View File

@ -22,9 +22,9 @@
#include "xenia/gpu/d3d12/d3d12_shader.h" #include "xenia/gpu/d3d12/d3d12_shader.h"
#include "xenia/gpu/draw_util.h" #include "xenia/gpu/draw_util.h"
#include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/packet_disassembler.h"
#include "xenia/gpu/registers.h" #include "xenia/gpu/registers.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
#include "xenia/gpu/packet_disassembler.h"
#include "xenia/ui/d3d12/d3d12_presenter.h" #include "xenia/ui/d3d12/d3d12_presenter.h"
#include "xenia/ui/d3d12/d3d12_util.h" #include "xenia/ui/d3d12/d3d12_util.h"
@ -62,10 +62,9 @@ void D3D12SaveGPUSetting(D3D12GPUSetting setting, uint64_t value) {
break; break;
} }
} }
namespace d3d12 { namespace d3d12 {
// Generated with `xb buildshaders`. // Generated with `xb buildshaders`.
namespace shaders { namespace shaders {
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/apply_gamma_pwl_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/apply_gamma_pwl_cs.h"
@ -4992,7 +4991,8 @@ bool D3D12CommandProcessor::UpdateBindings_BindfulPath(
} }
// Null SRV + UAV + EDRAM. // Null SRV + UAV + EDRAM.
gpu_handle_shared_memory_uav_and_edram_ = view_gpu_handle; gpu_handle_shared_memory_uav_and_edram_ = view_gpu_handle;
ui::d3d12::util::CreateBufferRawSRV(provider.GetDevice(), view_cpu_handle, nullptr, 0); ui::d3d12::util::CreateBufferRawSRV(provider.GetDevice(), view_cpu_handle,
nullptr, 0);
view_cpu_handle.ptr += descriptor_size_view; view_cpu_handle.ptr += descriptor_size_view;
view_gpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view;
shared_memory_->WriteRawUAVDescriptor(view_cpu_handle); shared_memory_->WriteRawUAVDescriptor(view_cpu_handle);

View File

@ -231,13 +231,13 @@ class D3D12CommandProcessor final : public CommandProcessor {
XE_FORCEINLINE XE_FORCEINLINE
void WriteRegisterForceinline(uint32_t index, uint32_t value); void WriteRegisterForceinline(uint32_t index, uint32_t value);
void WriteRegister(uint32_t index, uint32_t value) override; void WriteRegister(uint32_t index, uint32_t value) override;
virtual void WriteRegistersFromMem(uint32_t start_index, uint32_t* base, virtual void WriteRegistersFromMem(uint32_t start_index, uint32_t* base,
uint32_t num_registers) override; uint32_t num_registers) override;
/*helper functions for WriteRegistersFromMem*/ /*helper functions for WriteRegistersFromMem*/
XE_FORCEINLINE XE_FORCEINLINE
void WriteShaderConstantsFromMem(uint32_t start_index, uint32_t* base, void WriteShaderConstantsFromMem(uint32_t start_index, uint32_t* base,
uint32_t num_registers); uint32_t num_registers);
XE_FORCEINLINE XE_FORCEINLINE
void WriteBoolLoopFromMem(uint32_t start_index, uint32_t* base, void WriteBoolLoopFromMem(uint32_t start_index, uint32_t* base,
uint32_t num_registers); uint32_t num_registers);
@ -245,8 +245,9 @@ class D3D12CommandProcessor final : public CommandProcessor {
void WriteFetchFromMem(uint32_t start_index, uint32_t* base, void WriteFetchFromMem(uint32_t start_index, uint32_t* base,
uint32_t num_registers); uint32_t num_registers);
void WritePossiblySpecialRegistersFromMem(uint32_t start_index, uint32_t* base, void WritePossiblySpecialRegistersFromMem(uint32_t start_index,
uint32_t num_registers); uint32_t* base,
uint32_t num_registers);
template <uint32_t register_lower_bound, uint32_t register_upper_bound> template <uint32_t register_lower_bound, uint32_t register_upper_bound>
XE_FORCEINLINE void WriteRegisterRangeFromMem_WithKnownBound( XE_FORCEINLINE void WriteRegisterRangeFromMem_WithKnownBound(
uint32_t start_index, uint32_t* base, uint32_t num_registers); uint32_t start_index, uint32_t* base, uint32_t num_registers);
@ -262,8 +263,7 @@ class D3D12CommandProcessor final : public CommandProcessor {
uint32_t base, uint32_t base,
uint32_t num_registers); uint32_t num_registers);
XE_NOINLINE XE_NOINLINE
void WriteOneRegisterFromRing(uint32_t base, void WriteOneRegisterFromRing(uint32_t base, uint32_t num_times);
uint32_t num_times);
XE_FORCEINLINE XE_FORCEINLINE
void WriteALURangeFromRing(xe::RingBuffer* ring, uint32_t base, void WriteALURangeFromRing(xe::RingBuffer* ring, uint32_t base,
@ -795,7 +795,6 @@ class D3D12CommandProcessor final : public CommandProcessor {
draw_util::GetViewportInfoArgs previous_viewport_info_args_; draw_util::GetViewportInfoArgs previous_viewport_info_args_;
draw_util::ViewportInfo previous_viewport_info_; draw_util::ViewportInfo previous_viewport_info_;
std::atomic<bool> pix_capture_requested_ = false; std::atomic<bool> pix_capture_requested_ = false;
bool pix_capturing_; bool pix_capturing_;

View File

@ -672,11 +672,11 @@ uint32_t D3D12TextureCache::GetActiveTextureBindlessSRVIndex(
return descriptor_index; return descriptor_index;
} }
void D3D12TextureCache::PrefetchSamplerParameters( void D3D12TextureCache::PrefetchSamplerParameters(
const D3D12Shader::SamplerBinding& binding) const { const D3D12Shader::SamplerBinding& binding) const {
swcache::PrefetchL1(&register_file()[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + swcache::PrefetchL1(&register_file()[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
binding.fetch_constant * 6]); binding.fetch_constant * 6]);
} }
D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters( D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters(
const D3D12Shader::SamplerBinding& binding) const { const D3D12Shader::SamplerBinding& binding) const {
const auto& regs = register_file(); const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>( const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
@ -703,8 +703,8 @@ void D3D12TextureCache::PrefetchSamplerParameters(
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
&mip_min_level, nullptr); &mip_min_level, nullptr);
parameters.mip_min_level = mip_min_level; parameters.mip_min_level = mip_min_level;
//high cache miss count here, prefetch fetch earlier // high cache miss count here, prefetch fetch earlier
// TODO(Triang3l): Disable filtering for texture formats not supporting it. // TODO(Triang3l): Disable filtering for texture formats not supporting it.
xenos::AnisoFilter aniso_filter = xenos::AnisoFilter aniso_filter =
binding.aniso_filter == xenos::AnisoFilter::kUseFetchConst binding.aniso_filter == xenos::AnisoFilter::kUseFetchConst
? fetch.aniso_filter ? fetch.aniso_filter

View File

@ -18,8 +18,8 @@
#include "xenia/base/assert.h" #include "xenia/base/assert.h"
#include "xenia/base/literals.h" #include "xenia/base/literals.h"
#include "xenia/base/math.h" #include "xenia/base/math.h"
#include "xenia/ui/d3d12/d3d12_api.h"
#include "xenia/base/memory.h" #include "xenia/base/memory.h"
#include "xenia/ui/d3d12/d3d12_api.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
namespace d3d12 { namespace d3d12 {
@ -30,9 +30,10 @@ class D3D12CommandProcessor;
class DeferredCommandList { class DeferredCommandList {
public: public:
static constexpr size_t MAX_SIZEOF_COMMANDLIST = 65536 * 128; //around 8 mb static constexpr size_t MAX_SIZEOF_COMMANDLIST = 65536 * 128; // around 8 mb
/* /*
chrispy: upped from 1_MiB to 4_MiB, m:durandal hits frequent resizes in large open maps chrispy: upped from 1_MiB to 4_MiB, m:durandal hits frequent resizes in
large open maps
*/ */
DeferredCommandList(const D3D12CommandProcessor& command_processor, DeferredCommandList(const D3D12CommandProcessor& command_processor,
size_t initial_size_bytes = MAX_SIZEOF_COMMANDLIST); size_t initial_size_bytes = MAX_SIZEOF_COMMANDLIST);
@ -566,7 +567,7 @@ class DeferredCommandList {
const D3D12CommandProcessor& command_processor_; const D3D12CommandProcessor& command_processor_;
// uintmax_t to ensure uint64_t and pointer alignment of all structures. // uintmax_t to ensure uint64_t and pointer alignment of all structures.
//std::vector<uintmax_t> command_stream_; // std::vector<uintmax_t> command_stream_;
FixedVMemVector<MAX_SIZEOF_COMMANDLIST> command_stream_; FixedVMemVector<MAX_SIZEOF_COMMANDLIST> command_stream_;
}; };

View File

@ -552,9 +552,8 @@ void GetHostViewportInfo(GetViewportInfoArgs* XE_RESTRICT args,
} }
} }
template <bool clamp_to_surface_pitch> template <bool clamp_to_surface_pitch>
static inline static inline void GetScissorTmpl(const RegisterFile& XE_RESTRICT regs,
void GetScissorTmpl(const RegisterFile& XE_RESTRICT regs, Scissor& XE_RESTRICT scissor_out) {
Scissor& XE_RESTRICT scissor_out) {
#if XE_ARCH_AMD64 == 1 #if XE_ARCH_AMD64 == 1
auto pa_sc_window_scissor_tl = regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>(); auto pa_sc_window_scissor_tl = regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>();
auto pa_sc_window_scissor_br = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>(); auto pa_sc_window_scissor_br = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>();
@ -623,8 +622,7 @@ void GetScissorTmpl(const RegisterFile& XE_RESTRICT regs,
// interlock-based custom RB implementations) and using conventional render // interlock-based custom RB implementations) and using conventional render
// targets, but padded to EDRAM tiles. // targets, but padded to EDRAM tiles.
tmp1 = _mm_blend_epi16( tmp1 = _mm_blend_epi16(
tmp1, _mm_min_epi32(tmp1, _mm_set1_epi32(surface_pitch)), tmp1, _mm_min_epi32(tmp1, _mm_set1_epi32(surface_pitch)), 0b00110011);
0b00110011);
} }
tmp1 = _mm_max_epi32(tmp1, _mm_setzero_si128()); tmp1 = _mm_max_epi32(tmp1, _mm_setzero_si128());

View File

@ -25,10 +25,10 @@
#include "xenia/base/threading.h" #include "xenia/base/threading.h"
#include "xenia/gpu/command_processor.h" #include "xenia/gpu/command_processor.h"
#include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/gpu_flags.h"
#include "xenia/kernel/kernel_state.h"
#include "xenia/ui/graphics_provider.h" #include "xenia/ui/graphics_provider.h"
#include "xenia/ui/window.h" #include "xenia/ui/window.h"
#include "xenia/ui/windowed_app_context.h" #include "xenia/ui/windowed_app_context.h"
#include "xenia/kernel/kernel_state.h"
DEFINE_bool( DEFINE_bool(
store_shaders, true, store_shaders, true,
"Store shaders persistently and load them when loading games to avoid " "Store shaders persistently and load them when loading games to avoid "
@ -102,43 +102,48 @@ X_STATUS GraphicsSystem::Setup(cpu::Processor* processor,
// 60hz vsync timer. // 60hz vsync timer.
vsync_worker_running_ = true; vsync_worker_running_ = true;
vsync_worker_thread_ = kernel::object_ref<kernel::XHostThread>( vsync_worker_thread_ =
new kernel::XHostThread(kernel_state_, 128 * 1024, 0, [this]() { kernel::object_ref<kernel::XHostThread>(new kernel::XHostThread(
const double vsync_duration_d = kernel_state_, 128 * 1024, 0,
cvars::vsync [this]() {
? std::max<double>( const double vsync_duration_d =
5.0, 1000.0 / static_cast<double>(cvars::vsync_fps)) cvars::vsync
: 1.0; ? std::max<double>(
uint64_t last_frame_time = Clock::QueryGuestTickCount(); 5.0, 1000.0 / static_cast<double>(cvars::vsync_fps))
// Sleep for 90% of the vblank duration, spin for 10% : 1.0;
const double duration_scalar = 0.90; uint64_t last_frame_time = Clock::QueryGuestTickCount();
// Sleep for 90% of the vblank duration, spin for 10%
const double duration_scalar = 0.90;
while (vsync_worker_running_) { while (vsync_worker_running_) {
const uint64_t current_time = Clock::QueryGuestTickCount(); const uint64_t current_time = Clock::QueryGuestTickCount();
const uint64_t tick_freq = Clock::guest_tick_frequency(); const uint64_t tick_freq = Clock::guest_tick_frequency();
const uint64_t time_delta = current_time - last_frame_time; const uint64_t time_delta = current_time - last_frame_time;
const double elapsed_d = static_cast<double>(time_delta) / const double elapsed_d =
(static_cast<double>(tick_freq) / 1000.0); static_cast<double>(time_delta) /
if (elapsed_d >= vsync_duration_d) { (static_cast<double>(tick_freq) / 1000.0);
last_frame_time = current_time; if (elapsed_d >= vsync_duration_d) {
last_frame_time = current_time;
// TODO(disjtqz): should recalculate the remaining time to a vblank // TODO(disjtqz): should recalculate the remaining time to a
// after MarkVblank, no idea how long the guest code normally takes // vblank after MarkVblank, no idea how long the guest code
MarkVblank(); // normally takes
if (cvars::vsync) { MarkVblank();
const uint64_t estimated_nanoseconds = static_cast<uint64_t>( if (cvars::vsync) {
(vsync_duration_d * 1000000.0) * const uint64_t estimated_nanoseconds = static_cast<uint64_t>(
duration_scalar); // 1000 microseconds = 1 ms (vsync_duration_d * 1000000.0) *
duration_scalar); // 1000 microseconds = 1 ms
threading::NanoSleep(estimated_nanoseconds); threading::NanoSleep(estimated_nanoseconds);
}
}
if (!cvars::vsync) {
xe::threading::Sleep(std::chrono::milliseconds(1));
}
} }
} return 0;
if (!cvars::vsync) { },
xe::threading::Sleep(std::chrono::milliseconds(1)); kernel_state->GetIdleProcess()));
}
}
return 0;
}, kernel_state->GetIdleProcess()));
// As we run vblank interrupts the debugger must be able to suspend us. // As we run vblank interrupts the debugger must be able to suspend us.
vsync_worker_thread_->set_can_debugger_suspend(true); vsync_worker_thread_->set_can_debugger_suspend(true);
vsync_worker_thread_->set_name("GPU VSync"); vsync_worker_thread_->set_name("GPU VSync");
@ -267,7 +272,8 @@ void GraphicsSystem::SetInterruptCallback(uint32_t callback,
} }
void GraphicsSystem::DispatchInterruptCallback(uint32_t source, uint32_t cpu) { void GraphicsSystem::DispatchInterruptCallback(uint32_t source, uint32_t cpu) {
kernel_state()->EmulateCPInterruptDPC(interrupt_callback_,interrupt_callback_data_, source, cpu); kernel_state()->EmulateCPInterruptDPC(interrupt_callback_,
interrupt_callback_data_, source, cpu);
} }
void GraphicsSystem::MarkVblank() { void GraphicsSystem::MarkVblank() {

View File

@ -1,12 +1,11 @@
#if defined(OVERRIDING_BASE_CMDPROCESSOR) #if defined(OVERRIDING_BASE_CMDPROCESSOR)
#define PM4_OVERRIDE override #define PM4_OVERRIDE override
#else #else
#define PM4_OVERRIDE #define PM4_OVERRIDE
#endif #endif
void ExecuteIndirectBuffer(uint32_t ptr, void ExecuteIndirectBuffer(uint32_t ptr, uint32_t count) XE_RESTRICT;
uint32_t count) XE_RESTRICT;
virtual uint32_t ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index) virtual uint32_t ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index)
XE_RESTRICT PM4_OVERRIDE; XE_RESTRICT PM4_OVERRIDE;
virtual bool ExecutePacket() PM4_OVERRIDE; virtual bool ExecutePacket() PM4_OVERRIDE;
@ -18,91 +17,77 @@ protected:
XE_NOINLINE XE_NOINLINE
void DisassembleCurrentPacket() XE_RESTRICT; void DisassembleCurrentPacket() XE_RESTRICT;
XE_NOINLINE XE_NOINLINE
bool ExecutePacketType0( uint32_t packet) XE_RESTRICT; bool ExecutePacketType0(uint32_t packet) XE_RESTRICT;
XE_NOINLINE XE_NOINLINE
bool ExecutePacketType1( uint32_t packet) XE_RESTRICT; bool ExecutePacketType1(uint32_t packet) XE_RESTRICT;
bool ExecutePacketType2( uint32_t packet) XE_RESTRICT; bool ExecutePacketType2(uint32_t packet) XE_RESTRICT;
XE_NOINLINE XE_NOINLINE
bool ExecutePacketType3( uint32_t packet) XE_RESTRICT; bool ExecutePacketType3(uint32_t packet) XE_RESTRICT;
XE_NOINLINE XE_NOINLINE
bool ExecutePacketType3_ME_INIT( uint32_t packet, bool ExecutePacketType3_ME_INIT(uint32_t packet, uint32_t count) XE_RESTRICT;
uint32_t count) XE_RESTRICT; bool ExecutePacketType3_NOP(uint32_t packet, uint32_t count) XE_RESTRICT;
bool ExecutePacketType3_NOP( uint32_t packet,
uint32_t count) XE_RESTRICT;
XE_NOINLINE XE_NOINLINE
bool ExecutePacketType3_INTERRUPT( uint32_t packet, bool ExecutePacketType3_INTERRUPT(uint32_t packet, uint32_t count) XE_RESTRICT;
uint32_t count) XE_RESTRICT;
XE_NOINLINE XE_NOINLINE
bool ExecutePacketType3_XE_SWAP( uint32_t packet, bool ExecutePacketType3_XE_SWAP(uint32_t packet, uint32_t count) XE_RESTRICT;
uint32_t count) XE_RESTRICT;
bool ExecutePacketType3_INDIRECT_BUFFER( uint32_t packet, bool ExecutePacketType3_INDIRECT_BUFFER(uint32_t packet,
uint32_t count) XE_RESTRICT; uint32_t count) XE_RESTRICT;
XE_NOINLINE XE_NOINLINE
bool ExecutePacketType3_WAIT_REG_MEM( uint32_t packet, bool ExecutePacketType3_WAIT_REG_MEM(uint32_t packet,
uint32_t count) XE_RESTRICT; uint32_t count) XE_RESTRICT;
XE_NOINLINE XE_NOINLINE
bool ExecutePacketType3_REG_RMW( uint32_t packet, bool ExecutePacketType3_REG_RMW(uint32_t packet, uint32_t count) XE_RESTRICT;
uint32_t count) XE_RESTRICT;
bool ExecutePacketType3_REG_TO_MEM( uint32_t packet, bool ExecutePacketType3_REG_TO_MEM(uint32_t packet, uint32_t count) XE_RESTRICT;
uint32_t count) XE_RESTRICT;
XE_NOINLINE XE_NOINLINE
bool ExecutePacketType3_MEM_WRITE( uint32_t packet, bool ExecutePacketType3_MEM_WRITE(uint32_t packet, uint32_t count) XE_RESTRICT;
uint32_t count) XE_RESTRICT;
XE_NOINLINE XE_NOINLINE
bool ExecutePacketType3_COND_WRITE( uint32_t packet, bool ExecutePacketType3_COND_WRITE(uint32_t packet, uint32_t count) XE_RESTRICT;
uint32_t count) XE_RESTRICT;
bool ExecutePacketType3_EVENT_WRITE( uint32_t packet, bool ExecutePacketType3_EVENT_WRITE(uint32_t packet,
uint32_t count) XE_RESTRICT; uint32_t count) XE_RESTRICT;
XE_NOINLINE XE_NOINLINE
bool ExecutePacketType3_EVENT_WRITE_SHD( uint32_t packet, bool ExecutePacketType3_EVENT_WRITE_SHD(uint32_t packet,
uint32_t count) XE_RESTRICT; uint32_t count) XE_RESTRICT;
bool ExecutePacketType3_EVENT_WRITE_EXT( uint32_t packet, bool ExecutePacketType3_EVENT_WRITE_EXT(uint32_t packet,
uint32_t count) XE_RESTRICT; uint32_t count) XE_RESTRICT;
XE_NOINLINE XE_NOINLINE
bool ExecutePacketType3_EVENT_WRITE_ZPD( uint32_t packet, bool ExecutePacketType3_EVENT_WRITE_ZPD(uint32_t packet,
uint32_t count) XE_RESTRICT; uint32_t count) XE_RESTRICT;
bool ExecutePacketType3Draw( uint32_t packet, bool ExecutePacketType3Draw(uint32_t packet, const char* opcode_name,
const char* opcode_name,
uint32_t viz_query_condition, uint32_t viz_query_condition,
uint32_t count_remaining) XE_RESTRICT; uint32_t count_remaining) XE_RESTRICT;
bool ExecutePacketType3_DRAW_INDX( uint32_t packet, bool ExecutePacketType3_DRAW_INDX(uint32_t packet, uint32_t count) XE_RESTRICT;
uint32_t count) XE_RESTRICT;
bool ExecutePacketType3_DRAW_INDX_2( uint32_t packet, bool ExecutePacketType3_DRAW_INDX_2(uint32_t packet,
uint32_t count) XE_RESTRICT; uint32_t count) XE_RESTRICT;
XE_FORCEINLINE XE_FORCEINLINE
bool ExecutePacketType3_SET_CONSTANT( uint32_t packet, bool ExecutePacketType3_SET_CONSTANT(uint32_t packet,
uint32_t count) XE_RESTRICT; uint32_t count) XE_RESTRICT;
XE_NOINLINE XE_NOINLINE
bool ExecutePacketType3_SET_CONSTANT2( uint32_t packet, bool ExecutePacketType3_SET_CONSTANT2(uint32_t packet,
uint32_t count) XE_RESTRICT; uint32_t count) XE_RESTRICT;
XE_FORCEINLINE XE_FORCEINLINE
bool ExecutePacketType3_LOAD_ALU_CONSTANT( uint32_t packet, bool ExecutePacketType3_LOAD_ALU_CONSTANT(uint32_t packet,
uint32_t count) XE_RESTRICT; uint32_t count) XE_RESTRICT;
bool ExecutePacketType3_SET_SHADER_CONSTANTS( bool ExecutePacketType3_SET_SHADER_CONSTANTS(uint32_t packet,
uint32_t packet,
uint32_t count) XE_RESTRICT; uint32_t count) XE_RESTRICT;
bool ExecutePacketType3_IM_LOAD( uint32_t packet, bool ExecutePacketType3_IM_LOAD(uint32_t packet, uint32_t count) XE_RESTRICT;
uint32_t count) XE_RESTRICT;
bool ExecutePacketType3_IM_LOAD_IMMEDIATE( uint32_t packet, bool ExecutePacketType3_IM_LOAD_IMMEDIATE(uint32_t packet,
uint32_t count) XE_RESTRICT; uint32_t count) XE_RESTRICT;
bool ExecutePacketType3_INVALIDATE_STATE( uint32_t packet, bool ExecutePacketType3_INVALIDATE_STATE(uint32_t packet,
uint32_t count) XE_RESTRICT; uint32_t count) XE_RESTRICT;
bool ExecutePacketType3_VIZ_QUERY( uint32_t packet, bool ExecutePacketType3_VIZ_QUERY(uint32_t packet, uint32_t count) XE_RESTRICT;
uint32_t count) XE_RESTRICT;
XE_FORCEINLINE XE_FORCEINLINE
void WriteEventInitiator(uint32_t value) XE_RESTRICT; void WriteEventInitiator(uint32_t value) XE_RESTRICT;

View File

@ -683,13 +683,12 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_INDIRECT_BUFFER(
to 54 bytes to 54 bytes
*/ */
static bool MatchValueAndRef(uint32_t value, uint32_t ref, uint32_t wait_info) { static bool MatchValueAndRef(uint32_t value, uint32_t ref, uint32_t wait_info) {
// smaller code is generated than the #else path, although whether it is faster // smaller code is generated than the #else path, although whether it is
// i do not know. i don't think games do an enormous number of cond_write // faster i do not know. i don't think games do an enormous number of
// though, so we have picked // cond_write though, so we have picked the path with the smaller codegen. we
// the path with the smaller codegen. // do technically have more instructions executed vs the switch case method,
// we do technically have more instructions executed vs the switch case method, // but we have no mispredicts and most of our instructions are 0.25/0.3
// but we have no mispredicts and most of our instructions are 0.25/0.3 // throughput
// throughput
return ((((value < ref) << 1) | ((value <= ref) << 2) | return ((((value < ref) << 1) | ((value <= ref) << 2) |
((value == ref) << 3) | ((value != ref) << 4) | ((value == ref) << 3) | ((value != ref) << 4) |
((value >= ref) << 5) | ((value > ref) << 6) | (1 << 7)) >> ((value >= ref) << 5) | ((value > ref) << 6) | (1 << 7)) >>
@ -899,13 +898,17 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE_SHD(
data_value = GpuSwap(data_value, endianness); data_value = GpuSwap(data_value, endianness);
uint8_t* write_destination = memory_->TranslatePhysical(address); uint8_t* write_destination = memory_->TranslatePhysical(address);
if (address > 0x1FFFFFFF) { if (address > 0x1FFFFFFF) {
uint32_t writeback_base = register_file_->values[XE_GPU_REG_WRITEBACK_BASE].u32; uint32_t writeback_base =
uint32_t writeback_size = register_file_->values[XE_GPU_REG_WRITEBACK_SIZE].u32; register_file_->values[XE_GPU_REG_WRITEBACK_BASE].u32;
uint32_t writeback_size =
register_file_->values[XE_GPU_REG_WRITEBACK_SIZE].u32;
uint32_t writeback_offset = address - writeback_base; uint32_t writeback_offset = address - writeback_base;
//check whether the guest has written writeback base. if they haven't, skip the offset check // check whether the guest has written writeback base. if they haven't, skip
// the offset check
if (writeback_base != 0 && writeback_offset < writeback_size) { if (writeback_base != 0 && writeback_offset < writeback_size) {
write_destination = memory_->TranslateVirtual(0x7F000000 + writeback_offset); write_destination =
} memory_->TranslateVirtual(0x7F000000 + writeback_offset);
}
} }
xe::store(write_destination, data_value); xe::store(write_destination, data_value);
trace_writer_.WriteMemoryWrite(CpuToGpu(address), 4); trace_writer_.WriteMemoryWrite(CpuToGpu(address), 4);

View File

@ -883,8 +883,7 @@ class PrimitiveProcessor {
// Must be called in a global critical region. // Must be called in a global critical region.
void UpdateCacheBucketsNonEmptyL2( void UpdateCacheBucketsNonEmptyL2(
uint32_t bucket_index_div_64, uint32_t bucket_index_div_64,
[[maybe_unused]] const global_unique_lock_type& [[maybe_unused]] const global_unique_lock_type& global_lock) {
global_lock) {
uint64_t& cache_buckets_non_empty_l2_ref = uint64_t& cache_buckets_non_empty_l2_ref =
cache_buckets_non_empty_l2_[bucket_index_div_64 >> 6]; cache_buckets_non_empty_l2_[bucket_index_div_64 >> 6];
uint64_t cache_buckets_non_empty_l2_bit = uint64_t(1) uint64_t cache_buckets_non_empty_l2_bit = uint64_t(1)

View File

@ -13,7 +13,7 @@
// Almost all of these values are taken directly from: // Almost all of these values are taken directly from:
// https://github.com/freedreno/amd-gpu/blob/master/include/reg/yamato/22/yamato_offset.h // https://github.com/freedreno/amd-gpu/blob/master/include/reg/yamato/22/yamato_offset.h
//#define XE_GPU_REGISTER(index, type, name) // #define XE_GPU_REGISTER(index, type, name)
XE_GPU_REGISTER(0x0048, kDword, BIF_PERFCOUNTER0_SELECT) XE_GPU_REGISTER(0x0048, kDword, BIF_PERFCOUNTER0_SELECT)
XE_GPU_REGISTER(0x0049, kDword, BIF_PERFCOUNTER0_HI) XE_GPU_REGISTER(0x0049, kDword, BIF_PERFCOUNTER0_HI)
@ -36,35 +36,36 @@ XE_GPU_REGISTER(0x0398, kDword, RBBM_PERFCOUNTER0_HI)
XE_GPU_REGISTER(0x0399, kDword, RBBM_PERFCOUNTER1_LOW) XE_GPU_REGISTER(0x0399, kDword, RBBM_PERFCOUNTER1_LOW)
XE_GPU_REGISTER(0x039A, kDword, RBBM_PERFCOUNTER1_HI) XE_GPU_REGISTER(0x039A, kDword, RBBM_PERFCOUNTER1_HI)
//XAM reads this directly and stores it to a struct, have not tracked where it goes from there // XAM reads this directly and stores it to a struct, have not tracked where it
//PM4 command PM4_MEM_WRITE_CNTR is supposed to write this to memory // goes from there PM4 command PM4_MEM_WRITE_CNTR is supposed to write this to
//XE_GPU_REGISTER(0x44b, kDword,CP_PROG_COUNTER ) // memory XE_GPU_REGISTER(0x44b, kDword,CP_PROG_COUNTER )
XE_GPU_REGISTER(0x045E, kDword, CALLBACK_ACK) XE_GPU_REGISTER(0x045E, kDword, CALLBACK_ACK)
XE_GPU_REGISTER(0x0578, kDword, SCRATCH_REG0) // interrupt sync XE_GPU_REGISTER(0x0578, kDword, SCRATCH_REG0) // interrupt sync
XE_GPU_REGISTER(0x0579, kDword, SCRATCH_REG1) // present interval XE_GPU_REGISTER(0x0579, kDword, SCRATCH_REG1) // present interval
XE_GPU_REGISTER(0x057A, kDword, SCRATCH_REG2) XE_GPU_REGISTER(0x057A, kDword, SCRATCH_REG2)
XE_GPU_REGISTER(0x057B, kDword, SCRATCH_REG3) XE_GPU_REGISTER(0x057B, kDword, SCRATCH_REG3)
XE_GPU_REGISTER(0x057C, kDword, SCRATCH_REG4) //originally this was named CALLBACK_ADDRESS, but that didnt make sense XE_GPU_REGISTER(0x057C, kDword,
SCRATCH_REG4) // originally this was named CALLBACK_ADDRESS,
// but that didnt make sense
XE_GPU_REGISTER(0x057D, kDword, SCRATCH_REG5) XE_GPU_REGISTER(0x057D, kDword, SCRATCH_REG5)
XE_GPU_REGISTER(0x057E, kDword, SCRATCH_REG6) XE_GPU_REGISTER(0x057E, kDword, SCRATCH_REG6)
XE_GPU_REGISTER(0x057F, kDword, SCRATCH_REG7) XE_GPU_REGISTER(0x057F, kDword, SCRATCH_REG7)
XE_GPU_REGISTER(0x05C8, kDword, WAIT_UNTIL) XE_GPU_REGISTER(0x05C8, kDword, WAIT_UNTIL)
//src is flash_xam.xex, i've seen it used by the kernel and aurora // src is flash_xam.xex, i've seen it used by the kernel and aurora
//seems to have a negative value while the gpu is busy // seems to have a negative value while the gpu is busy
//XE_GPU_REGISTER(0x05D0, kDword, RBBM_STATUS) // XE_GPU_REGISTER(0x05D0, kDword, RBBM_STATUS)
// update count = 6 bit field, bits 8- 14
// there are several other fields here, they have an unknown purpose
// XE_GPU_REGISTER(0x704, kDword, CP_RB_CNTL)
//update count = 6 bit field, bits 8- 14 // address must be 4 byte aligned
//there are several other fields here, they have an unknown purpose // low 2 bits encode two different fields?
//XE_GPU_REGISTER(0x704, kDword, CP_RB_CNTL) // xboxkrnl just does addr |2 when assigning
// XE_GPU_REGISTER(0x70C, kDword, CP_RB_RPTR_ADDR)
//address must be 4 byte aligned
//low 2 bits encode two different fields?
//xboxkrnl just does addr |2 when assigning
//XE_GPU_REGISTER(0x70C, kDword, CP_RB_RPTR_ADDR)
XE_GPU_REGISTER(0x0815, kDword, MC0_PERFCOUNTER0_SELECT) XE_GPU_REGISTER(0x0815, kDword, MC0_PERFCOUNTER0_SELECT)
XE_GPU_REGISTER(0x0816, kDword, MC0_PERFCOUNTER0_HI) XE_GPU_REGISTER(0x0816, kDword, MC0_PERFCOUNTER0_HI)
XE_GPU_REGISTER(0x0817, kDword, MC0_PERFCOUNTER0_LOW) XE_GPU_REGISTER(0x0817, kDword, MC0_PERFCOUNTER0_LOW)
@ -72,13 +73,13 @@ XE_GPU_REGISTER(0x0855, kDword, MC1_PERFCOUNTER0_SELECT)
XE_GPU_REGISTER(0x0856, kDword, MC1_PERFCOUNTER0_HI) XE_GPU_REGISTER(0x0856, kDword, MC1_PERFCOUNTER0_HI)
XE_GPU_REGISTER(0x0857, kDword, MC1_PERFCOUNTER0_LOW) XE_GPU_REGISTER(0x0857, kDword, MC1_PERFCOUNTER0_LOW)
//base GPU virtual address of the xps region. Most guests write 0xC0100000 here // base GPU virtual address of the xps region. Most guests write 0xC0100000 here
XE_GPU_REGISTER(0x0A02, kDword, XPS_BASE) XE_GPU_REGISTER(0x0A02, kDword, XPS_BASE)
//will usually be set higher, but is effectively 0x700000 bytes long // will usually be set higher, but is effectively 0x700000 bytes long
XE_GPU_REGISTER(0x0A03, kDword, XPS_SIZE) XE_GPU_REGISTER(0x0A03, kDword, XPS_SIZE)
//usually 0xC0000000 // usually 0xC0000000
XE_GPU_REGISTER(0x0A04, kDword, WRITEBACK_BASE) XE_GPU_REGISTER(0x0A04, kDword, WRITEBACK_BASE)
//usually 0x0100000 // usually 0x0100000
XE_GPU_REGISTER(0x0A05, kDword, WRITEBACK_SIZE) XE_GPU_REGISTER(0x0A05, kDword, WRITEBACK_SIZE)
XE_GPU_REGISTER(0x0A18, kDword, MH_PERFCOUNTER0_SELECT) XE_GPU_REGISTER(0x0A18, kDword, MH_PERFCOUNTER0_SELECT)
@ -279,8 +280,8 @@ XE_GPU_REGISTER(0x0F0C, kDword, BC_PERFCOUNTER2_LOW)
XE_GPU_REGISTER(0x0F0D, kDword, BC_PERFCOUNTER2_HI) XE_GPU_REGISTER(0x0F0D, kDword, BC_PERFCOUNTER2_HI)
XE_GPU_REGISTER(0x0F0E, kDword, BC_PERFCOUNTER3_LOW) XE_GPU_REGISTER(0x0F0E, kDword, BC_PERFCOUNTER3_LOW)
XE_GPU_REGISTER(0x0F0F, kDword, BC_PERFCOUNTER3_HI) XE_GPU_REGISTER(0x0F0F, kDword, BC_PERFCOUNTER3_HI)
//src is flash_xam.xex // src is flash_xam.xex
//XE_GPU_REGISTER(0x0F12, RB_SIDEBAND_DATA, // XE_GPU_REGISTER(0x0F12, RB_SIDEBAND_DATA,
XE_GPU_REGISTER(0x1004, kDword, HZ_PERFCOUNTER0_SELECT) XE_GPU_REGISTER(0x1004, kDword, HZ_PERFCOUNTER0_SELECT)
XE_GPU_REGISTER(0x1005, kDword, HZ_PERFCOUNTER0_HI) XE_GPU_REGISTER(0x1005, kDword, HZ_PERFCOUNTER0_HI)

View File

@ -231,8 +231,7 @@ class RenderTargetCache {
: register_file_(register_file), : register_file_(register_file),
draw_resolution_scale_x_(draw_resolution_scale_x), draw_resolution_scale_x_(draw_resolution_scale_x),
draw_resolution_scale_y_(draw_resolution_scale_y), draw_resolution_scale_y_(draw_resolution_scale_y),
draw_extent_estimator_(register_file, memory, trace_writer) draw_extent_estimator_(register_file, memory, trace_writer) {
{
assert_not_zero(draw_resolution_scale_x); assert_not_zero(draw_resolution_scale_x);
assert_not_zero(draw_resolution_scale_y); assert_not_zero(draw_resolution_scale_y);
} }

View File

@ -209,10 +209,10 @@ class SharedMemory {
uint64_t valid_and_gpu_resolved; uint64_t valid_and_gpu_resolved;
}; };
//chrispy: todo, systempageflagsblock should be 3 different arrays // chrispy: todo, systempageflagsblock should be 3 different arrays
// Flags for each 64 system pages, interleaved as blocks, so bit scan can be // Flags for each 64 system pages, interleaved as blocks, so bit scan can be
// used to quickly extract ranges. // used to quickly extract ranges.
// std::vector<SystemPageFlagsBlock> system_page_flags_; // std::vector<SystemPageFlagsBlock> system_page_flags_;
uint64_t *system_page_flags_valid_ = nullptr, uint64_t *system_page_flags_valid_ = nullptr,
*system_page_flags_valid_and_gpu_written_ = nullptr, *system_page_flags_valid_and_gpu_written_ = nullptr,

View File

@ -108,7 +108,8 @@ class TextureCache {
// generate a mask of all bits from before the first index, and xor it with // generate a mask of all bits from before the first index, and xor it with
// all bits before the last index this produces a mask covering only the // all bits before the last index this produces a mask covering only the
// bits between first and last // bits between first and last
uint32_t res = ((1U << first_index) - 1) ^ static_cast<uint32_t>((1ULL << (last_index + 1)) - 1ULL); uint32_t res = ((1U << first_index) - 1) ^
static_cast<uint32_t>((1ULL << (last_index + 1)) - 1ULL);
// todo: check that this is right // todo: check that this is right
texture_bindings_in_sync_ &= ~res; texture_bindings_in_sync_ &= ~res;

View File

@ -1,64 +1,64 @@
FORMAT_INFO(k_1_REVERSE, kUncompressed, 1, 1, 1), FORMAT_INFO(k_1_REVERSE, kUncompressed, 1, 1, 1),
FORMAT_INFO(k_1, kUncompressed, 1, 1, 1), FORMAT_INFO(k_1, kUncompressed, 1, 1, 1),
FORMAT_INFO(k_8, kResolvable, 1, 1, 8), FORMAT_INFO(k_8, kResolvable, 1, 1, 8),
FORMAT_INFO(k_1_5_5_5, kResolvable, 1, 1, 16), FORMAT_INFO(k_1_5_5_5, kResolvable, 1, 1, 16),
FORMAT_INFO(k_5_6_5, kResolvable, 1, 1, 16), FORMAT_INFO(k_5_6_5, kResolvable, 1, 1, 16),
FORMAT_INFO(k_6_5_5, kResolvable, 1, 1, 16), FORMAT_INFO(k_6_5_5, kResolvable, 1, 1, 16),
FORMAT_INFO(k_8_8_8_8, kResolvable, 1, 1, 32), FORMAT_INFO(k_8_8_8_8, kResolvable, 1, 1, 32),
FORMAT_INFO(k_2_10_10_10, kResolvable, 1, 1, 32), FORMAT_INFO(k_2_10_10_10, kResolvable, 1, 1, 32),
FORMAT_INFO(k_8_A, kResolvable, 1, 1, 8), FORMAT_INFO(k_8_A, kResolvable, 1, 1, 8),
FORMAT_INFO(k_8_B, kResolvable, 1, 1, 8), FORMAT_INFO(k_8_B, kResolvable, 1, 1, 8),
FORMAT_INFO(k_8_8, kResolvable, 1, 1, 16), FORMAT_INFO(k_8_8, kResolvable, 1, 1, 16),
FORMAT_INFO(k_Cr_Y1_Cb_Y0_REP, kCompressed, 2, 1, 16), FORMAT_INFO(k_Cr_Y1_Cb_Y0_REP, kCompressed, 2, 1, 16),
FORMAT_INFO(k_Y1_Cr_Y0_Cb_REP, kCompressed, 2, 1, 16), FORMAT_INFO(k_Y1_Cr_Y0_Cb_REP, kCompressed, 2, 1, 16),
FORMAT_INFO(k_16_16_EDRAM, kUncompressed, 1, 1, 32), FORMAT_INFO(k_16_16_EDRAM, kUncompressed, 1, 1, 32),
FORMAT_INFO(k_8_8_8_8_A, kResolvable, 1, 1, 32), FORMAT_INFO(k_8_8_8_8_A, kResolvable, 1, 1, 32),
FORMAT_INFO(k_4_4_4_4, kResolvable, 1, 1, 16), FORMAT_INFO(k_4_4_4_4, kResolvable, 1, 1, 16),
FORMAT_INFO(k_10_11_11, kResolvable, 1, 1, 32), FORMAT_INFO(k_10_11_11, kResolvable, 1, 1, 32),
FORMAT_INFO(k_11_11_10, kResolvable, 1, 1, 32), FORMAT_INFO(k_11_11_10, kResolvable, 1, 1, 32),
FORMAT_INFO(k_DXT1, kCompressed, 4, 4, 4), FORMAT_INFO(k_DXT1, kCompressed, 4, 4, 4),
FORMAT_INFO(k_DXT2_3, kCompressed, 4, 4, 8), FORMAT_INFO(k_DXT2_3, kCompressed, 4, 4, 8),
FORMAT_INFO(k_DXT4_5, kCompressed, 4, 4, 8), FORMAT_INFO(k_DXT4_5, kCompressed, 4, 4, 8),
FORMAT_INFO(k_16_16_16_16_EDRAM, kUncompressed, 1, 1, 64), FORMAT_INFO(k_16_16_16_16_EDRAM, kUncompressed, 1, 1, 64),
FORMAT_INFO(k_24_8, kUncompressed, 1, 1, 32), FORMAT_INFO(k_24_8, kUncompressed, 1, 1, 32),
FORMAT_INFO(k_24_8_FLOAT, kUncompressed, 1, 1, 32), FORMAT_INFO(k_24_8_FLOAT, kUncompressed, 1, 1, 32),
FORMAT_INFO(k_16, kResolvable, 1, 1, 16), FORMAT_INFO(k_16, kResolvable, 1, 1, 16),
FORMAT_INFO(k_16_16, kResolvable, 1, 1, 32), FORMAT_INFO(k_16_16, kResolvable, 1, 1, 32),
FORMAT_INFO(k_16_16_16_16, kResolvable, 1, 1, 64), FORMAT_INFO(k_16_16_16_16, kResolvable, 1, 1, 64),
FORMAT_INFO(k_16_EXPAND, kUncompressed, 1, 1, 16), FORMAT_INFO(k_16_EXPAND, kUncompressed, 1, 1, 16),
FORMAT_INFO(k_16_16_EXPAND, kUncompressed, 1, 1, 32), FORMAT_INFO(k_16_16_EXPAND, kUncompressed, 1, 1, 32),
FORMAT_INFO(k_16_16_16_16_EXPAND, kUncompressed, 1, 1, 64), FORMAT_INFO(k_16_16_16_16_EXPAND, kUncompressed, 1, 1, 64),
FORMAT_INFO(k_16_FLOAT, kResolvable, 1, 1, 16), FORMAT_INFO(k_16_FLOAT, kResolvable, 1, 1, 16),
FORMAT_INFO(k_16_16_FLOAT, kResolvable, 1, 1, 32), FORMAT_INFO(k_16_16_FLOAT, kResolvable, 1, 1, 32),
FORMAT_INFO(k_16_16_16_16_FLOAT, kResolvable, 1, 1, 64), FORMAT_INFO(k_16_16_16_16_FLOAT, kResolvable, 1, 1, 64),
FORMAT_INFO(k_32, kUncompressed, 1, 1, 32), FORMAT_INFO(k_32, kUncompressed, 1, 1, 32),
FORMAT_INFO(k_32_32, kUncompressed, 1, 1, 64), FORMAT_INFO(k_32_32, kUncompressed, 1, 1, 64),
FORMAT_INFO(k_32_32_32_32, kUncompressed, 1, 1, 128), FORMAT_INFO(k_32_32_32_32, kUncompressed, 1, 1, 128),
FORMAT_INFO(k_32_FLOAT, kResolvable, 1, 1, 32), FORMAT_INFO(k_32_FLOAT, kResolvable, 1, 1, 32),
FORMAT_INFO(k_32_32_FLOAT, kResolvable, 1, 1, 64), FORMAT_INFO(k_32_32_FLOAT, kResolvable, 1, 1, 64),
FORMAT_INFO(k_32_32_32_32_FLOAT, kResolvable, 1, 1, 128), FORMAT_INFO(k_32_32_32_32_FLOAT, kResolvable, 1, 1, 128),
FORMAT_INFO(k_32_AS_8, kCompressed, 4, 1, 8), FORMAT_INFO(k_32_AS_8, kCompressed, 4, 1, 8),
FORMAT_INFO(k_32_AS_8_8, kCompressed, 2, 1, 16), FORMAT_INFO(k_32_AS_8_8, kCompressed, 2, 1, 16),
FORMAT_INFO(k_16_MPEG, kUncompressed, 1, 1, 16), FORMAT_INFO(k_16_MPEG, kUncompressed, 1, 1, 16),
FORMAT_INFO(k_16_16_MPEG, kUncompressed, 1, 1, 32), FORMAT_INFO(k_16_16_MPEG, kUncompressed, 1, 1, 32),
FORMAT_INFO(k_8_INTERLACED, kUncompressed, 1, 1, 8), FORMAT_INFO(k_8_INTERLACED, kUncompressed, 1, 1, 8),
FORMAT_INFO(k_32_AS_8_INTERLACED, kCompressed, 4, 1, 8), FORMAT_INFO(k_32_AS_8_INTERLACED, kCompressed, 4, 1, 8),
FORMAT_INFO(k_32_AS_8_8_INTERLACED, kCompressed, 1, 1, 16), FORMAT_INFO(k_32_AS_8_8_INTERLACED, kCompressed, 1, 1, 16),
FORMAT_INFO(k_16_INTERLACED, kUncompressed, 1, 1, 16), FORMAT_INFO(k_16_INTERLACED, kUncompressed, 1, 1, 16),
FORMAT_INFO(k_16_MPEG_INTERLACED, kUncompressed, 1, 1, 16), FORMAT_INFO(k_16_MPEG_INTERLACED, kUncompressed, 1, 1, 16),
FORMAT_INFO(k_16_16_MPEG_INTERLACED, kUncompressed, 1, 1, 32), FORMAT_INFO(k_16_16_MPEG_INTERLACED, kUncompressed, 1, 1, 32),
FORMAT_INFO(k_DXN, kCompressed, 4, 4, 8), FORMAT_INFO(k_DXN, kCompressed, 4, 4, 8),
FORMAT_INFO(k_8_8_8_8_AS_16_16_16_16, kResolvable, 1, 1, 32), FORMAT_INFO(k_8_8_8_8_AS_16_16_16_16, kResolvable, 1, 1, 32),
FORMAT_INFO(k_DXT1_AS_16_16_16_16, kCompressed, 4, 4, 4), FORMAT_INFO(k_DXT1_AS_16_16_16_16, kCompressed, 4, 4, 4),
FORMAT_INFO(k_DXT2_3_AS_16_16_16_16, kCompressed, 4, 4, 8), FORMAT_INFO(k_DXT2_3_AS_16_16_16_16, kCompressed, 4, 4, 8),
FORMAT_INFO(k_DXT4_5_AS_16_16_16_16, kCompressed, 4, 4, 8), FORMAT_INFO(k_DXT4_5_AS_16_16_16_16, kCompressed, 4, 4, 8),
FORMAT_INFO(k_2_10_10_10_AS_16_16_16_16, kResolvable, 1, 1, 32), FORMAT_INFO(k_2_10_10_10_AS_16_16_16_16, kResolvable, 1, 1, 32),
FORMAT_INFO(k_10_11_11_AS_16_16_16_16, kResolvable, 1, 1, 32), FORMAT_INFO(k_10_11_11_AS_16_16_16_16, kResolvable, 1, 1, 32),
FORMAT_INFO(k_11_11_10_AS_16_16_16_16, kResolvable, 1, 1, 32), FORMAT_INFO(k_11_11_10_AS_16_16_16_16, kResolvable, 1, 1, 32),
FORMAT_INFO(k_32_32_32_FLOAT, kUncompressed, 1, 1, 96), FORMAT_INFO(k_32_32_32_FLOAT, kUncompressed, 1, 1, 96),
FORMAT_INFO(k_DXT3A, kCompressed, 4, 4, 4), FORMAT_INFO(k_DXT3A, kCompressed, 4, 4, 4),
FORMAT_INFO(k_DXT5A, kCompressed, 4, 4, 4), FORMAT_INFO(k_DXT5A, kCompressed, 4, 4, 4),
FORMAT_INFO(k_CTX1, kCompressed, 4, 4, 4), FORMAT_INFO(k_CTX1, kCompressed, 4, 4, 4),
FORMAT_INFO(k_DXT3A_AS_1_1_1_1, kCompressed, 4, 4, 4), FORMAT_INFO(k_DXT3A_AS_1_1_1_1, kCompressed, 4, 4, 4),
FORMAT_INFO(k_8_8_8_8_GAMMA_EDRAM, kUncompressed, 1, 1, 32), FORMAT_INFO(k_8_8_8_8_GAMMA_EDRAM, kUncompressed, 1, 1, 32),
FORMAT_INFO(k_2_10_10_10_FLOAT_EDRAM, kUncompressed, 1, 1, 32), FORMAT_INFO(k_2_10_10_10_FLOAT_EDRAM, kUncompressed, 1, 1, 32),

View File

@ -24,6 +24,7 @@
#include "xenia/base/profiling.h" #include "xenia/base/profiling.h"
#include "xenia/gpu/draw_util.h" #include "xenia/gpu/draw_util.h"
#include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/packet_disassembler.h"
#include "xenia/gpu/registers.h" #include "xenia/gpu/registers.h"
#include "xenia/gpu/shader.h" #include "xenia/gpu/shader.h"
#include "xenia/gpu/spirv_shader_translator.h" #include "xenia/gpu/spirv_shader_translator.h"
@ -32,7 +33,6 @@
#include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/vulkan/vulkan_shader.h"
#include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/gpu/vulkan/vulkan_shared_memory.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
#include "xenia/gpu/packet_disassembler.h"
#include "xenia/kernel/kernel_state.h" #include "xenia/kernel/kernel_state.h"
#include "xenia/kernel/user_module.h" #include "xenia/kernel/user_module.h"
#include "xenia/ui/vulkan/vulkan_presenter.h" #include "xenia/ui/vulkan/vulkan_presenter.h"

View File

@ -10,10 +10,8 @@
#ifndef XENIA_GPU_XENOS_H_ #ifndef XENIA_GPU_XENOS_H_
#define XENIA_GPU_XENOS_H_ #define XENIA_GPU_XENOS_H_
#include "xenia/base/memory.h"
#include "xenia/base/math.h" #include "xenia/base/math.h"
#include "xenia/base/memory.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -421,7 +419,7 @@ float Float7e3To32(uint32_t f10);
// floating-point number. // floating-point number.
// Converts an IEEE-754 32-bit floating-point number to Xenos floating-point // Converts an IEEE-754 32-bit floating-point number to Xenos floating-point
// depth, rounding to the nearest even or towards zero. // depth, rounding to the nearest even or towards zero.
XE_NOALIAS XE_NOALIAS
uint32_t Float32To20e4(float f32, bool round_to_nearest_even) noexcept; uint32_t Float32To20e4(float f32, bool round_to_nearest_even) noexcept;
// Converts Xenos floating-point depth in bits 0:23 (not clamping) to an // Converts Xenos floating-point depth in bits 0:23 (not clamping) to an
// IEEE-754 32-bit floating-point number. // IEEE-754 32-bit floating-point number.

View File

@ -54,9 +54,7 @@ struct X_FILE_FS_ATTRIBUTE_INFORMATION {
}; };
static_assert_size(X_FILE_FS_ATTRIBUTE_INFORMATION, 16); static_assert_size(X_FILE_FS_ATTRIBUTE_INFORMATION, 16);
enum X_FILE_DEVICE_TYPE : uint32_t { enum X_FILE_DEVICE_TYPE : uint32_t { FILE_DEVICE_UNKNOWN = 0x22 };
FILE_DEVICE_UNKNOWN = 0x22
};
struct X_FILE_FS_DEVICE_INFORMATION { struct X_FILE_FS_DEVICE_INFORMATION {
be<X_FILE_DEVICE_TYPE> device_type; be<X_FILE_DEVICE_TYPE> device_type;

View File

@ -1322,7 +1322,7 @@ void KernelState::InitializeKernelGuestGlobals() {
block->ObSymbolicLinkObjectType.delete_proc = block->ObSymbolicLinkObjectType.delete_proc =
kernel_trampoline_group_.NewLongtermTrampoline(DeleteSymlink); kernel_trampoline_group_.NewLongtermTrampoline(DeleteSymlink);
#define offsetof32(s, m) static_cast<uint32_t>( offsetof(s, m) ) #define offsetof32(s, m) static_cast<uint32_t>(offsetof(s, m))
host_object_type_enum_to_guest_object_type_ptr_ = { host_object_type_enum_to_guest_object_type_ptr_ = {
{XObject::Type::Event, {XObject::Type::Event,

View File

@ -651,7 +651,8 @@ void UserModule::Dump() {
for (uint32_t i = 0; i < opt_alternate_title_id->count(); i++) { for (uint32_t i = 0; i < opt_alternate_title_id->count(); i++) {
if (opt_alternate_title_id->values[i] != 0) { if (opt_alternate_title_id->values[i] != 0) {
title_ids.append(fmt::format(" {:08X},", opt_alternate_title_id->values[i])); title_ids.append(
fmt::format(" {:08X},", opt_alternate_title_id->values[i]));
} }
} }
// Remove last character as it is not necessary // Remove last character as it is not necessary

View File

@ -117,7 +117,7 @@ class UserModule : public XModule {
bool is_dll_module_ = false; bool is_dll_module_ = false;
uint32_t entry_point_ = 0; uint32_t entry_point_ = 0;
uint32_t stack_size_ = 0; uint32_t stack_size_ = 0;
uint32_t workspace_size_ = 384*1024; uint32_t workspace_size_ = 384 * 1024;
uint32_t mod_checksum_ = 0; uint32_t mod_checksum_ = 0;
uint32_t time_date_stamp_ = 0; uint32_t time_date_stamp_ = 0;

View File

@ -2,8 +2,8 @@
****************************************************************************** ******************************************************************************
* Xenia : Xbox 360 Emulator Research Project * * Xenia : Xbox 360 Emulator Research Project *
****************************************************************************** ******************************************************************************
* Copyright 2023 Xenia Canary. All rights reserved. * * Copyright 2023 Xenia Canary. All rights reserved. * Released under the BSD
* Released under the BSD license - see LICENSE in the root for more details. * *license - see LICENSE in the root for more details. *
****************************************************************************** ******************************************************************************
*/ */
@ -95,9 +95,10 @@ bool GrowHandleTable(uint32_t table_ptr, PPCContext* context) {
/* /*
copy old bucket list contents to new, larger bucket list copy old bucket list contents to new, larger bucket list
*/ */
memcpy(context->TranslateVirtual(new_dynamic_buckets), memcpy(
context->TranslateVirtual(table->table_dynamic_buckets), context->TranslateVirtual(new_dynamic_buckets),
sizeof(uint32_t) * (new_bucket_handle_base / SIZE_PER_HANDLE_BUCKET)); context->TranslateVirtual(table->table_dynamic_buckets),
sizeof(uint32_t) * (new_bucket_handle_base / SIZE_PER_HANDLE_BUCKET));
if (context->TranslateVirtualBE<uint32_t>(table->table_dynamic_buckets) != if (context->TranslateVirtualBE<uint32_t>(table->table_dynamic_buckets) !=
&table->table_static_buckets[0]) { &table->table_static_buckets[0]) {

View File

@ -2,8 +2,8 @@
****************************************************************************** ******************************************************************************
* Xenia : Xbox 360 Emulator Research Project * * Xenia : Xbox 360 Emulator Research Project *
****************************************************************************** ******************************************************************************
* Copyright 2023 Xenia Canary. All rights reserved. * * Copyright 2023 Xenia Canary. All rights reserved. * Released under the BSD
* Released under the BSD license - see LICENSE in the root for more details. * *license - see LICENSE in the root for more details. *
****************************************************************************** ******************************************************************************
*/ */
@ -26,7 +26,7 @@ struct X_HANDLE_TABLE {
xe::be<uint32_t> table_dynamic_buckets; xe::be<uint32_t> table_dynamic_buckets;
xe::be<uint32_t> table_static_buckets[8]; xe::be<uint32_t> table_static_buckets[8];
X_KSPINLOCK table_lock; X_KSPINLOCK table_lock;
//used as unknown arg 3 to pool allocations // used as unknown arg 3 to pool allocations
uint8_t unk_pool_arg_34; uint8_t unk_pool_arg_34;
uint8_t handle_high_byte; uint8_t handle_high_byte;
uint8_t unk_36; uint8_t unk_36;

View File

@ -25,5 +25,5 @@ struct X_OBJECT_CREATE_INFORMATION;
namespace xe::kernel::util { namespace xe::kernel::util {
class NativeList; class NativeList;
class ObjectTable; class ObjectTable;
} } // namespace xe::kernel::util
#endif #endif

View File

@ -168,8 +168,8 @@ static void XeInsertHeadList(uint32_t list_head, X_LIST_ENTRY* entry,
template <typename VirtualTranslator> template <typename VirtualTranslator>
static void XeInsertHeadList(X_LIST_ENTRY* list_head, X_LIST_ENTRY* entry, static void XeInsertHeadList(X_LIST_ENTRY* list_head, X_LIST_ENTRY* entry,
VirtualTranslator context) { VirtualTranslator context) {
XeInsertHeadList(list_head, XeGuestList(list_head, context), XeInsertHeadList(list_head, XeGuestList(list_head, context), entry,
entry, XeGuestList(entry, context), context); XeGuestList(entry, context), context);
} }
template <typename TObject, size_t EntryListOffset> template <typename TObject, size_t EntryListOffset>
@ -216,7 +216,7 @@ struct X_TYPED_LIST : public X_LIST_ENTRY {
uint32_t end() { return vt->HostToGuestVirtual(thiz); } uint32_t end() { return vt->HostToGuestVirtual(thiz); }
}; };
template<typename VirtualTranslator> template <typename VirtualTranslator>
ForwardIteratorBegin<VirtualTranslator> IterateForward(VirtualTranslator vt) { ForwardIteratorBegin<VirtualTranslator> IterateForward(VirtualTranslator vt) {
return ForwardIteratorBegin<VirtualTranslator>{vt, this}; return ForwardIteratorBegin<VirtualTranslator>{vt, this};
} }
@ -227,13 +227,14 @@ struct X_TYPED_LIST : public X_LIST_ENTRY {
} }
template <typename VirtualTranslator> template <typename VirtualTranslator>
void InsertHead(TObject* entry, VirtualTranslator translator) { void InsertHead(TObject* entry, VirtualTranslator translator) {
XeInsertHeadList(static_cast<X_LIST_ENTRY*>(this), ObjectListEntry(entry), translator); XeInsertHeadList(static_cast<X_LIST_ENTRY*>(this), ObjectListEntry(entry),
translator);
} }
template <typename VirtualTranslator> template <typename VirtualTranslator>
void InsertTail(TObject* entry, VirtualTranslator translator) { void InsertTail(TObject* entry, VirtualTranslator translator) {
XeInsertTailList(this, ObjectListEntry(entry), translator); XeInsertTailList(this, ObjectListEntry(entry), translator);
} }
template<typename VirtualTranslator> template <typename VirtualTranslator>
bool empty(VirtualTranslator vt) const { bool empty(VirtualTranslator vt) const {
return vt->TranslateVirtual<X_LIST_ENTRY*>(flink_ptr) == this; return vt->TranslateVirtual<X_LIST_ENTRY*>(flink_ptr) == this;
} }

View File

@ -111,8 +111,8 @@ class ObjectTable {
// Generic lookup // Generic lookup
template <> template <>
object_ref<XObject> ObjectTable::LookupObject<XObject>( object_ref<XObject> ObjectTable::LookupObject<XObject>(X_HANDLE handle,
X_HANDLE handle, bool already_locked); bool already_locked);
} // namespace util } // namespace util
} // namespace kernel } // namespace kernel

View File

@ -35,8 +35,7 @@ using PPCContext = xe::cpu::ppc::PPCContext;
library_name, ordinals::export_name, \ library_name, ordinals::export_name, \
(xe::cpu::xe_kernel_export_shim_fn)export_name##_entry); (xe::cpu::xe_kernel_export_shim_fn)export_name##_entry);
#define SHIM_MEM_ADDR(a) \ #define SHIM_MEM_ADDR(a) ((a) ? ppc_context->TranslateVirtual(a) : nullptr)
((a) ? ppc_context->TranslateVirtual(a) : nullptr)
#define SHIM_MEM_8(a) xe::load_and_swap<uint8_t>(SHIM_MEM_ADDR(a)) #define SHIM_MEM_8(a) xe::load_and_swap<uint8_t>(SHIM_MEM_ADDR(a))
#define SHIM_MEM_16(a) xe::load_and_swap<uint16_t>(SHIM_MEM_ADDR(a)) #define SHIM_MEM_16(a) xe::load_and_swap<uint16_t>(SHIM_MEM_ADDR(a))
@ -158,9 +157,8 @@ class Param {
} else { } else {
uint32_t stack_ptr = uint32_t stack_ptr =
uint32_t(init.ppc_context->r[1]) + 0x54 + (ordinal_ - 8) * 8; uint32_t(init.ppc_context->r[1]) + 0x54 + (ordinal_ - 8) * 8;
*out_value = xe::load_and_swap<V>( *out_value =
init.ppc_context->TranslateVirtual( xe::load_and_swap<V>(init.ppc_context->TranslateVirtual(stack_ptr));
stack_ptr));
} }
} }
@ -216,6 +214,7 @@ class ContextParam : public Param {
X_KPCR* GetPCR() const { return TranslateGPR<X_KPCR*>(13); } X_KPCR* GetPCR() const { return TranslateGPR<X_KPCR*>(13); }
XThread* CurrentXThread() const; XThread* CurrentXThread() const;
protected: protected:
PPCContext* XE_RESTRICT ctx_; PPCContext* XE_RESTRICT ctx_;
}; };
@ -223,10 +222,7 @@ class ContextParam : public Param {
class PointerParam : public ParamBase<uint32_t> { class PointerParam : public ParamBase<uint32_t> {
public: public:
PointerParam(Init& init) : ParamBase(init) { PointerParam(Init& init) : ParamBase(init) {
host_ptr_ = host_ptr_ = value_ ? init.ppc_context->TranslateVirtual(value_) : nullptr;
value_
? init.ppc_context->TranslateVirtual(value_)
: nullptr;
} }
PointerParam(void* host_ptr) : ParamBase(), host_ptr_(host_ptr) {} PointerParam(void* host_ptr) : ParamBase(), host_ptr_(host_ptr) {}
PointerParam& operator=(void*& other) { PointerParam& operator=(void*& other) {
@ -296,10 +292,7 @@ class StringPointerParam : public ParamBase<uint32_t> {
public: public:
StringPointerParam(Init& init) : ParamBase(init) { StringPointerParam(Init& init) : ParamBase(init) {
host_ptr_ = host_ptr_ =
value_ value_ ? init.ppc_context->TranslateVirtual<CHAR*>(value_) : nullptr;
? init.ppc_context->TranslateVirtual<CHAR*>(
value_)
: nullptr;
} }
StringPointerParam(CHAR* host_ptr) : ParamBase(), host_ptr_(host_ptr) {} StringPointerParam(CHAR* host_ptr) : ParamBase(), host_ptr_(host_ptr) {}
StringPointerParam& operator=(const CHAR*& other) { StringPointerParam& operator=(const CHAR*& other) {
@ -323,9 +316,7 @@ class TypedPointerParam : public ParamBase<uint32_t> {
public: public:
TypedPointerParam(Init& init) : ParamBase(init) { TypedPointerParam(Init& init) : ParamBase(init) {
host_ptr_ = host_ptr_ =
value_ ? init.ppc_context->TranslateVirtual<T*>( value_ ? init.ppc_context->TranslateVirtual<T*>(value_) : nullptr;
value_)
: nullptr;
} }
TypedPointerParam(T* host_ptr) : ParamBase(), host_ptr_(host_ptr) {} TypedPointerParam(T* host_ptr) : ParamBase(), host_ptr_(host_ptr) {}
TypedPointerParam& operator=(const T*& other) { TypedPointerParam& operator=(const T*& other) {

View File

@ -216,7 +216,7 @@ std::vector<XdbfViewTable> XdbfWrapper::GetStatsView() const {
} }
const uint8_t* XdbfWrapper::ReadXLast(uint32_t& compressed_size, const uint8_t* XdbfWrapper::ReadXLast(uint32_t& compressed_size,
uint32_t& decompressed_size) const { uint32_t& decompressed_size) const {
auto xlast_table = GetEntry(XdbfSection::kMetadata, kXdbfIdXsrc); auto xlast_table = GetEntry(XdbfSection::kMetadata, kXdbfIdXsrc);
if (!xlast_table) { if (!xlast_table) {
return nullptr; return nullptr;

View File

@ -187,8 +187,7 @@ class XdbfWrapper {
XdbfPropertyTableEntry GetProperty(const uint32_t id) const; XdbfPropertyTableEntry GetProperty(const uint32_t id) const;
XdbfContextTableEntry GetContext(const uint32_t id) const; XdbfContextTableEntry GetContext(const uint32_t id) const;
std::vector<XdbfViewTable> GetStatsView() const; std::vector<XdbfViewTable> GetStatsView() const;
XdbfSharedView GetSharedView(const uint8_t* ptr, XdbfSharedView GetSharedView(const uint8_t* ptr, uint32_t& byte_count) const;
uint32_t& byte_count) const;
void GetPropertyBagMetadata(const uint8_t* ptr, uint32_t& byte_count, void GetPropertyBagMetadata(const uint8_t* ptr, uint32_t& byte_count,
std::vector<xe::be<uint32_t>>& contexts, std::vector<xe::be<uint32_t>>& contexts,
@ -196,7 +195,8 @@ class XdbfWrapper {
XdbfPropertyBag GetMatchCollection() const; XdbfPropertyBag GetMatchCollection() const;
const uint8_t* ReadXLast(uint32_t& compressed_size, uint32_t& decompressed_size) const; const uint8_t* ReadXLast(uint32_t& compressed_size,
uint32_t& decompressed_size) const;
private: private:
const uint8_t* data_ = nullptr; const uint8_t* data_ = nullptr;

View File

@ -10,8 +10,8 @@
#include "xenia/kernel/xam/content_manager.h" #include "xenia/kernel/xam/content_manager.h"
#include <array> #include <array>
#include <string>
#include <set> #include <set>
#include <string>
#include "third_party/fmt/include/fmt/format.h" #include "third_party/fmt/include/fmt/format.h"
#include "xenia/base/filesystem.h" #include "xenia/base/filesystem.h"

View File

@ -33,19 +33,18 @@
#include "third_party/fmt/include/fmt/format.h" #include "third_party/fmt/include/fmt/format.h"
DEFINE_int32( DEFINE_int32(avpack, 8,
avpack, 8, "Video modes\n"
"Video modes\n" " 0 = PAL-60 Component (SD)\n"
" 0 = PAL-60 Component (SD)\n" " 1 = Unused\n"
" 1 = Unused\n" " 2 = PAL-60 SCART\n"
" 2 = PAL-60 SCART\n" " 3 = 480p Component (HD)\n"
" 3 = 480p Component (HD)\n" " 4 = HDMI+A\n"
" 4 = HDMI+A\n" " 5 = PAL-60 Composite/S-Video\n"
" 5 = PAL-60 Composite/S-Video\n" " 6 = VGA\n"
" 6 = VGA\n" " 7 = TV PAL-60\n"
" 7 = TV PAL-60\n" " 8 = HDMI (default)",
" 8 = HDMI (default)", "Video");
"Video");
DECLARE_int32(user_country); DECLARE_int32(user_country);
DECLARE_int32(user_language); DECLARE_int32(user_language);

View File

@ -431,7 +431,8 @@ dword_result_t XamGetLocaleEx_entry(dword_t max_country_id,
static_cast<uint8_t>(max_locale_id)); static_cast<uint8_t>(max_locale_id));
} }
DECLARE_XAM_EXPORT1(XamGetLocaleEx, kLocale, kImplemented); DECLARE_XAM_EXPORT1(XamGetLocaleEx, kLocale, kImplemented);
//originally a switch table, wrote a script to extract the values for all possible cases // originally a switch table, wrote a script to extract the values for all
// possible cases
static constexpr uint8_t XamLocaleDateFmtTable[] = { static constexpr uint8_t XamLocaleDateFmtTable[] = {
2, 1, 3, 1, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 1, 4, 2, 3, 1, 2, 2, 3, 2, 1, 3, 1, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 1, 4, 2, 3, 1, 2, 2, 3,

View File

@ -1083,7 +1083,7 @@ dword_result_t NetDll_XNetRegisterKey_entry(dword_t caller, lpdword_t key_id,
DECLARE_XAM_EXPORT1(NetDll_XNetRegisterKey, kNetworking, kStub); DECLARE_XAM_EXPORT1(NetDll_XNetRegisterKey, kNetworking, kStub);
dword_result_t NetDll_XNetUnregisterKey_entry(dword_t caller, lpdword_t key_id, dword_result_t NetDll_XNetUnregisterKey_entry(dword_t caller, lpdword_t key_id,
lpdword_t exchange_key) { lpdword_t exchange_key) {
return 0; return 0;
} }
DECLARE_XAM_EXPORT1(NetDll_XNetUnregisterKey, kNetworking, kStub); DECLARE_XAM_EXPORT1(NetDll_XNetUnregisterKey, kNetworking, kStub);

View File

@ -56,7 +56,7 @@ dword_result_t XamTaskSchedule_entry(lpvoid_t callback,
auto option = ctx->TranslateVirtual<XAM_TASK_ARGS*>(optional_ptr); auto option = ctx->TranslateVirtual<XAM_TASK_ARGS*>(optional_ptr);
auto v1 = option->value1; auto v1 = option->value1;
auto v2 = option->value2; //typically 0? auto v2 = option->value2; // typically 0?
XELOGI("Got xam task args: v1 = {:08X}, v2 = {:08X}", v1, v2); XELOGI("Got xam task args: v1 = {:08X}, v2 = {:08X}", v1, v2);
} }
@ -66,9 +66,9 @@ dword_result_t XamTaskSchedule_entry(lpvoid_t callback,
// Stack must be aligned to 16kb pages // Stack must be aligned to 16kb pages
stack_size = std::max((uint32_t)0x4000, ((stack_size + 0xFFF) & 0xFFFFF000)); stack_size = std::max((uint32_t)0x4000, ((stack_size + 0xFFF) & 0xFFFFF000));
auto thread = auto thread = object_ref<XThread>(new XThread(
object_ref<XThread>(new XThread(kernel_state(), stack_size, 0, callback, kernel_state(), stack_size, 0, callback, message.guest_address(), 0, true,
message.guest_address(), 0, true, false, kernel_state()->GetSystemProcess())); false, kernel_state()->GetSystemProcess()));
X_STATUS result = thread->Create(); X_STATUS result = thread->Create();

View File

@ -738,7 +738,7 @@ dword_result_t XamUserCreateStatsEnumerator_entry(
} }
if (buffer_size_ptr) { if (buffer_size_ptr) {
*buffer_size_ptr = 0; // sizeof(X_STATS_DETAILS) * stats_ptr->stats_amount; *buffer_size_ptr = 0; // sizeof(X_STATS_DETAILS) * stats_ptr->stats_amount;
} }
auto e = object_ref<XUserStatsEnumerator>( auto e = object_ref<XUserStatsEnumerator>(

View File

@ -686,12 +686,12 @@ dword_result_t XeKeysHmacShaUsingKey_entry(lpvoid_t obscured_key,
} }
DECLARE_XBOXKRNL_EXPORT1(XeKeysHmacShaUsingKey, kNone, kImplemented); DECLARE_XBOXKRNL_EXPORT1(XeKeysHmacShaUsingKey, kNone, kImplemented);
//going off of usage in some hbrew xex // going off of usage in some hbrew xex
//0 and 1 appear to be devkit, 2 is retail // 0 and 1 appear to be devkit, 2 is retail
//we default to saying we're retail // we default to saying we're retail
dword_result_t XeKeysGetConsoleType_entry(lpdword_t type_out) { dword_result_t XeKeysGetConsoleType_entry(lpdword_t type_out) {
*type_out = 2; *type_out = 2;
return 0; return 0;
} }
DECLARE_XBOXKRNL_EXPORT1(XeKeysGetConsoleType, kNone, kImplemented); DECLARE_XBOXKRNL_EXPORT1(XeKeysGetConsoleType, kNone, kImplemented);

View File

@ -120,7 +120,7 @@ void HandleCppException(pointer_t<X_EXCEPTION_RECORD> record) {
kernel_memory()->TranslateVirtual<x_s__CatchableTypeArray*>( kernel_memory()->TranslateVirtual<x_s__CatchableTypeArray*>(
throw_info->catchable_type_array_ptr); throw_info->catchable_type_array_ptr);
//xe::debugging::Break(); // xe::debugging::Break();
XELOGE("Guest attempted to throw a C++ exception!"); XELOGE("Guest attempted to throw a C++ exception!");
} }
@ -138,9 +138,10 @@ void RtlRaiseException_entry(pointer_t<X_EXCEPTION_RECORD> record) {
// TODO(benvanik): unwinding. // TODO(benvanik): unwinding.
// This is going to suck. // This is going to suck.
// xe::debugging::Break(); // xe::debugging::Break();
//RtlRaiseException definitely wasn't a noreturn function, we can return safe-ish // RtlRaiseException definitely wasn't a noreturn function, we can return
// safe-ish
XELOGE("Guest attempted to trigger a breakpoint!"); XELOGE("Guest attempted to trigger a breakpoint!");
} }
DECLARE_XBOXKRNL_EXPORT2(RtlRaiseException, kDebug, kStub, kImportant); DECLARE_XBOXKRNL_EXPORT2(RtlRaiseException, kDebug, kStub, kImportant);

View File

@ -723,8 +723,9 @@ dword_result_t IoCreateDevice_entry(dword_t driver_object,
} }
DECLARE_XBOXKRNL_EXPORT1(IoCreateDevice, kFileSystem, kStub); DECLARE_XBOXKRNL_EXPORT1(IoCreateDevice, kFileSystem, kStub);
//supposed to invoke a callback on the driver object! its some sort of destructor function // supposed to invoke a callback on the driver object! its some sort of
//intended to be called for all devices created from the driver // destructor function intended to be called for all devices created from the
// driver
void IoDeleteDevice_entry(dword_t device_ptr, const ppc_context_t& ctx) { void IoDeleteDevice_entry(dword_t device_ptr, const ppc_context_t& ctx) {
if (device_ptr) { if (device_ptr) {
auto kernel_mem = ctx->kernel_state->memory(); auto kernel_mem = ctx->kernel_state->memory();

View File

@ -697,7 +697,7 @@ DECLARE_XBOXKRNL_EXPORT1(ExAllocatePool, kMemory, kImplemented);
void xeFreePool(PPCContext* context, uint32_t base_address) { void xeFreePool(PPCContext* context, uint32_t base_address) {
auto memory = context->kernel_state->memory(); auto memory = context->kernel_state->memory();
//if 4kb aligned, there is no pool header! // if 4kb aligned, there is no pool header!
if ((base_address & (4096 - 1)) == 0) { if ((base_address & (4096 - 1)) == 0) {
memory->SystemHeapFree(base_address); memory->SystemHeapFree(base_address);
} else { } else {

View File

@ -16,7 +16,6 @@
namespace xe { namespace xe {
namespace kernel { namespace kernel {
namespace xboxkrnl { namespace xboxkrnl {
uint32_t xeMmAllocatePhysicalMemoryEx(uint32_t flags, uint32_t region_size, uint32_t xeMmAllocatePhysicalMemoryEx(uint32_t flags, uint32_t region_size,

View File

@ -132,7 +132,7 @@ uint32_t xeObCreateObject(X_OBJECT_TYPE* object_factory,
return X_STATUS_OBJECT_NAME_INVALID; return X_STATUS_OBJECT_NAME_INVALID;
} }
// the object and its name are all created in a single allocation // the object and its name are all created in a single allocation
unsigned int aligned_object_size = unsigned int aligned_object_size =
xe::align<uint32_t>(object_size_without_headers, 4); xe::align<uint32_t>(object_size_without_headers, 4);
{ {

View File

@ -434,9 +434,11 @@ pointer_result_t RtlImageNtHeader_entry(lpvoid_t module) {
} }
DECLARE_XBOXKRNL_EXPORT1(RtlImageNtHeader, kNone, kImplemented); DECLARE_XBOXKRNL_EXPORT1(RtlImageNtHeader, kNone, kImplemented);
// https://learn.microsoft.com/en-us/windows/win32/api/dbghelp/nf-dbghelp-imagedirectoryentrytodata // https://learn.microsoft.com/en-us/windows/win32/api/dbghelp/nf-dbghelp-imagedirectoryentrytodata
dword_result_t RtlImageDirectoryEntryToData_entry(dword_t Base, dword_t MappedAsImage_, dword_result_t RtlImageDirectoryEntryToData_entry(dword_t Base,
word_t DirectoryEntry, dword_t Size, dword_t MappedAsImage_,
const ppc_context_t& ctx) { word_t DirectoryEntry,
dword_t Size,
const ppc_context_t& ctx) {
bool MappedAsImage = static_cast<unsigned char>(MappedAsImage_); bool MappedAsImage = static_cast<unsigned char>(MappedAsImage_);
uint32_t aligned_base = Base; uint32_t aligned_base = Base;
if ((Base & 1) != 0) { if ((Base & 1) != 0) {
@ -830,7 +832,7 @@ static void RtlRip_entry(const ppc_context_t& ctx) {
XELOGE("RtlRip called, arg1 = {}, arg2 = {}\n", msg_str1, msg_str2); XELOGE("RtlRip called, arg1 = {}, arg2 = {}\n", msg_str1, msg_str2);
//we should break here... not sure what to do exactly // we should break here... not sure what to do exactly
} }
DECLARE_XBOXKRNL_EXPORT1(RtlRip, kNone, kImportant); DECLARE_XBOXKRNL_EXPORT1(RtlRip, kNone, kImportant);

View File

@ -63,7 +63,8 @@ uint32_t xeNtQueueApcThread(uint32_t thread_handle, uint32_t apc_routine,
void xeKfLowerIrql(PPCContext* ctx, unsigned char new_irql); void xeKfLowerIrql(PPCContext* ctx, unsigned char new_irql);
unsigned char xeKfRaiseIrql(PPCContext* ctx, unsigned char new_irql); unsigned char xeKfRaiseIrql(PPCContext* ctx, unsigned char new_irql);
void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, uint32_t old_irql, bool change_irql=true); void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock,
uint32_t old_irql, bool change_irql = true);
uint32_t xeKeKfAcquireSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, uint32_t xeKeKfAcquireSpinLock(PPCContext* ctx, X_KSPINLOCK* lock,
bool change_irql = true); bool change_irql = true);

View File

@ -388,7 +388,9 @@ object_ref<XObject> XObject::GetNativeObject(KernelState* kernel_state,
// Already initialized. // Already initialized.
// TODO: assert if the type of the object != as_type // TODO: assert if the type of the object != as_type
uint32_t handle = header->wait_list_blink; uint32_t handle = header->wait_list_blink;
result = kernel_state->object_table()->LookupObject<XObject>(handle, true).release(); result = kernel_state->object_table()
->LookupObject<XObject>(handle, true)
.release();
} else { } else {
// First use, create new. // First use, create new.
// https://www.nirsoft.net/kernel_struct/vista/KOBJECTS.html // https://www.nirsoft.net/kernel_struct/vista/KOBJECTS.html

Some files were not shown because too many files have changed in this diff Show More