squash reallocs in command buffers by using large prealloced buffer, directly use virtual memory with it so os allocs on demand
mark raw clock functions as noinline, the way msvc was inlining them and ordering the branches meant that rdtsc would often be speculatively executed add alternative clock impl for win, instead of using queryperformancecounter we grab systemtime from kusershared. it does not have the same precision as queryperformancecounter, we only have 100 nanosecond precision, but we round to milliseconds so it never made sense to use the performance counter in the first place stubbed out the "guest clock mutex"... (the entirety of clock.cc needs a rewrite) added some helpers for minf/maxf without the nan handling behavior
This commit is contained in:
parent
c9b2d10e17
commit
7cc364dcb8
|
@ -15,6 +15,13 @@
|
|||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/mutex.h"
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
#include "xenia/base/platform_win.h"
|
||||
|
||||
#endif
|
||||
|
||||
DEFINE_bool(clock_no_scaling, false,
|
||||
"Disable scaling code. Time management and locking is bypassed. "
|
||||
|
@ -42,8 +49,19 @@ std::pair<uint64_t, uint64_t> guest_tick_ratio_ = std::make_pair(1, 1);
|
|||
uint64_t last_guest_tick_count_ = 0;
|
||||
// Last sampled host tick count.
|
||||
uint64_t last_host_tick_count_ = Clock::QueryHostTickCount();
|
||||
|
||||
struct null_lock {
|
||||
public:
|
||||
static void lock() {}
|
||||
static void unlock() {}
|
||||
static bool try_lock() { return true; }
|
||||
};
|
||||
|
||||
using tick_mutex_type = null_lock; // xe::xe_mutex;
|
||||
|
||||
// Mutex to ensure last_host_tick_count_ and last_guest_tick_count_ are in sync
|
||||
std::mutex tick_mutex_;
|
||||
// std::mutex tick_mutex_;
|
||||
static tick_mutex_type tick_mutex_;
|
||||
|
||||
void RecomputeGuestTickScalar() {
|
||||
// Create a rational number with numerator (first) and denominator (second)
|
||||
|
@ -61,7 +79,7 @@ void RecomputeGuestTickScalar() {
|
|||
// Keep this a rational calculation and reduce the fraction
|
||||
reduce_fraction(frac);
|
||||
|
||||
std::lock_guard<std::mutex> lock(tick_mutex_);
|
||||
std::lock_guard<tick_mutex_type> lock(tick_mutex_);
|
||||
guest_tick_ratio_ = frac;
|
||||
}
|
||||
|
||||
|
@ -75,7 +93,7 @@ uint64_t UpdateGuestClock() {
|
|||
return host_tick_count * guest_tick_ratio_.first / guest_tick_ratio_.second;
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> lock(tick_mutex_, std::defer_lock);
|
||||
std::unique_lock<tick_mutex_type> lock(tick_mutex_, std::defer_lock);
|
||||
if (lock.try_lock()) {
|
||||
// Translate host tick count to guest tick count.
|
||||
uint64_t host_tick_delta = host_tick_count > last_host_tick_count_
|
||||
|
@ -107,7 +125,6 @@ inline uint64_t QueryGuestSystemTimeOffset() {
|
|||
|
||||
return guest_tick_count * numerator / denominator;
|
||||
}
|
||||
|
||||
uint64_t Clock::QueryHostTickFrequency() {
|
||||
#if XE_CLOCK_RAW_AVAILABLE
|
||||
if (cvars::clock_source_raw) {
|
||||
|
@ -137,7 +154,7 @@ void Clock::set_guest_time_scalar(double scalar) {
|
|||
}
|
||||
|
||||
std::pair<uint64_t, uint64_t> Clock::guest_tick_ratio() {
|
||||
std::lock_guard<std::mutex> lock(tick_mutex_);
|
||||
std::lock_guard<tick_mutex_type> lock(tick_mutex_);
|
||||
return guest_tick_ratio_;
|
||||
}
|
||||
|
||||
|
|
|
@ -33,11 +33,15 @@ class Clock {
|
|||
// Either from platform suplied time source or from hardware directly.
|
||||
static uint64_t host_tick_frequency_platform();
|
||||
#if XE_CLOCK_RAW_AVAILABLE
|
||||
XE_NOINLINE
|
||||
static uint64_t host_tick_frequency_raw();
|
||||
#endif
|
||||
// Host tick count. Generally QueryHostTickCount() should be used.
|
||||
static uint64_t host_tick_count_platform();
|
||||
#if XE_CLOCK_RAW_AVAILABLE
|
||||
//chrispy: the way msvc was ordering the branches was causing rdtsc to be speculatively executed each time
|
||||
//the branch history was lost
|
||||
XE_NOINLINE
|
||||
static uint64_t host_tick_count_raw();
|
||||
#endif
|
||||
|
||||
|
|
|
@ -12,7 +12,18 @@
|
|||
#include "xenia/base/platform_win.h"
|
||||
|
||||
namespace xe {
|
||||
#if XE_USE_KUSER_SHARED==1
|
||||
uint64_t Clock::host_tick_frequency_platform() { return 10000000ULL; }
|
||||
|
||||
uint64_t Clock::host_tick_count_platform() {
|
||||
return *reinterpret_cast<volatile uint64_t*>(&KUserShared()->SystemTime);
|
||||
}
|
||||
uint64_t Clock::QueryHostSystemTime() {
|
||||
return *reinterpret_cast<volatile uint64_t*>(&KUserShared()->SystemTime);
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
uint64_t Clock::host_tick_frequency_platform() {
|
||||
LARGE_INTEGER frequency;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
|
@ -27,7 +38,6 @@ uint64_t Clock::host_tick_count_platform() {
|
|||
}
|
||||
return time;
|
||||
}
|
||||
|
||||
uint64_t Clock::QueryHostSystemTime() {
|
||||
FILETIME t;
|
||||
GetSystemTimeAsFileTime(&t);
|
||||
|
@ -37,5 +47,10 @@ uint64_t Clock::QueryHostSystemTime() {
|
|||
uint64_t Clock::QueryHostUptimeMillis() {
|
||||
return host_tick_count_platform() * 1000 / host_tick_frequency_platform();
|
||||
}
|
||||
#endif
|
||||
uint64_t Clock::QueryHostUptimeMillis() {
|
||||
return host_tick_count_platform() * 1000 / host_tick_frequency_platform();
|
||||
}
|
||||
|
||||
|
||||
} // namespace xe
|
||||
|
|
|
@ -41,10 +41,14 @@
|
|||
"\n" \
|
||||
"Set the cvar 'clock_source_raw' to 'false'.");
|
||||
|
||||
|
||||
|
||||
|
||||
namespace xe {
|
||||
// Getting the TSC frequency can be a bit tricky. This method here only works on
|
||||
// Intel as it seems. There is no easy way to get the frequency outside of ring0
|
||||
// on AMD, so we fail gracefully if not possible.
|
||||
XE_NOINLINE
|
||||
uint64_t Clock::host_tick_frequency_raw() {
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
|
||||
|
@ -71,6 +75,8 @@ uint64_t Clock::host_tick_frequency_raw() {
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (max_cpuid >= 0x15) {
|
||||
// 15H Get TSC/Crystal ratio and Crystal Hz.
|
||||
xe_cpu_cpuid(0x15, eax, ebx, ecx, edx);
|
||||
|
@ -92,10 +98,11 @@ uint64_t Clock::host_tick_frequency_raw() {
|
|||
return cpu_base_freq;
|
||||
}
|
||||
|
||||
|
||||
CLOCK_FATAL("The clock frequency could not be determined.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
XE_NOINLINE
|
||||
uint64_t Clock::host_tick_count_raw() { return xe_cpu_rdtsc(); }
|
||||
|
||||
} // namespace xe
|
||||
|
|
|
@ -376,6 +376,29 @@ template <int N>
|
|||
int64_t m128_i64(const __m128& v) {
|
||||
return m128_i64<N>(_mm_castps_pd(v));
|
||||
}
|
||||
/*
|
||||
|
||||
std::min/max float has handling for nans, where if either argument is nan the first argument is returned
|
||||
|
||||
minss/maxss are different, if either argument is nan the second operand to the instruction is returned
|
||||
this is problematic because we have no assurances from the compiler on the argument ordering
|
||||
|
||||
so only use in places where nan handling is not needed
|
||||
*/
|
||||
static float xe_minf(float x, float y) {
|
||||
return _mm_cvtss_f32(_mm_min_ss(_mm_set_ss(x), _mm_set_ss(y)));
|
||||
}
|
||||
static float xe_maxf(float x, float y) {
|
||||
return _mm_cvtss_f32(_mm_max_ss(_mm_set_ss(x), _mm_set_ss(y)));
|
||||
}
|
||||
static float xe_rcpf(float den) {
|
||||
return _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ss(den)));
|
||||
}
|
||||
|
||||
#else
|
||||
static float xe_minf(float x, float y) { return std::min<float>(x, y); }
|
||||
static float xe_maxf(float x, float y) { return std::max<float>(x, y); }
|
||||
static float xe_rcpf(float den) { return 1.0f / den; }
|
||||
#endif
|
||||
|
||||
// Similar to the C++ implementation of XMConvertFloatToHalf and
|
||||
|
|
|
@ -478,12 +478,13 @@ class fixed_vmem_vector {
|
|||
|
||||
public:
|
||||
fixed_vmem_vector()
|
||||
: data_((uint8_t*)AllocFixed(nullptr, sz, AllocationType::kReserveCommit,
|
||||
PageAccess::kReadWrite)),
|
||||
: data_((uint8_t*)memory::AllocFixed(
|
||||
nullptr, sz, memory::AllocationType::kReserveCommit,
|
||||
memory::PageAccess::kReadWrite)),
|
||||
nbytes_(0) {}
|
||||
~fixed_vmem_vector() {
|
||||
if (data_) {
|
||||
DeallocFixed(data_, sz, DeallocationType::kRelease);
|
||||
memory::DeallocFixed(data_, sz, memory::DeallocationType::kRelease);
|
||||
data_ = nullptr;
|
||||
}
|
||||
nbytes_ = 0;
|
||||
|
|
|
@ -34,31 +34,169 @@
|
|||
#undef DeleteFile
|
||||
#undef GetFirstChild
|
||||
|
||||
#define XE_USE_NTDLL_FUNCTIONS 1
|
||||
#if XE_USE_NTDLL_FUNCTIONS==1
|
||||
#define XE_USE_NTDLL_FUNCTIONS 1
|
||||
#define XE_USE_KUSER_SHARED 1
|
||||
#if XE_USE_NTDLL_FUNCTIONS == 1
|
||||
/*
|
||||
ntdll versions of functions often skip through a lot of extra garbage in KernelBase
|
||||
ntdll versions of functions often skip through a lot of extra garbage in
|
||||
KernelBase
|
||||
*/
|
||||
#define XE_NTDLL_IMPORT(name, cls, clsvar) \
|
||||
static class cls { \
|
||||
public: \
|
||||
FARPROC fn;\
|
||||
cls() : fn(nullptr) {\
|
||||
auto ntdll = GetModuleHandleA("ntdll.dll");\
|
||||
if (ntdll) { \
|
||||
fn = GetProcAddress(ntdll, #name );\
|
||||
}\
|
||||
} \
|
||||
template <typename TRet = void, typename... TArgs> \
|
||||
inline TRet invoke(TArgs... args) {\
|
||||
return reinterpret_cast<NTSYSAPI TRet(NTAPI*)(TArgs...)>(fn)(args...);\
|
||||
}\
|
||||
inline operator bool() const {\
|
||||
return fn!=nullptr;\
|
||||
}\
|
||||
#define XE_NTDLL_IMPORT(name, cls, clsvar) \
|
||||
static class cls { \
|
||||
public: \
|
||||
FARPROC fn; \
|
||||
cls() : fn(nullptr) { \
|
||||
auto ntdll = GetModuleHandleA("ntdll.dll"); \
|
||||
if (ntdll) { \
|
||||
fn = GetProcAddress(ntdll, #name); \
|
||||
} \
|
||||
} \
|
||||
template <typename TRet = void, typename... TArgs> \
|
||||
inline TRet invoke(TArgs... args) { \
|
||||
return reinterpret_cast<NTSYSAPI TRet(NTAPI*)(TArgs...)>(fn)(args...); \
|
||||
} \
|
||||
inline operator bool() const { return fn != nullptr; } \
|
||||
} clsvar
|
||||
#else
|
||||
#define XE_NTDLL_IMPORT(name, cls, clsvar) static constexpr bool clsvar = false
|
||||
|
||||
#endif
|
||||
|
||||
// KUSER_SHARED
|
||||
struct __declspec(align(4)) _KSYSTEM_TIME {
|
||||
unsigned int LowPart;
|
||||
int High1Time;
|
||||
int High2Time;
|
||||
};
|
||||
enum _NT_PRODUCT_TYPE {
|
||||
NtProductWinNt = 0x1,
|
||||
NtProductLanManNt = 0x2,
|
||||
NtProductServer = 0x3,
|
||||
};
|
||||
enum _ALTERNATIVE_ARCHITECTURE_TYPE {
|
||||
StandardDesign = 0x0,
|
||||
NEC98x86 = 0x1,
|
||||
EndAlternatives = 0x2,
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
struct $3D940D5D03EF7F98CEE6737EDE752E57 {
|
||||
__int8 _bf_0;
|
||||
};
|
||||
|
||||
union $DA7A7E727E24E4DD62317E27558CCADA {
|
||||
unsigned __int8 MitigationPolicies;
|
||||
$3D940D5D03EF7F98CEE6737EDE752E57 __s1;
|
||||
};
|
||||
struct __declspec(align(4)) $4BF4056B39611650D41923F164DAFA52 {
|
||||
__int32 _bf_0;
|
||||
};
|
||||
|
||||
union __declspec(align(4)) $BB68545E345A5F8046EF3BC0FE928142 {
|
||||
unsigned int SharedDataFlags;
|
||||
$4BF4056B39611650D41923F164DAFA52 __s1;
|
||||
};
|
||||
union $5031D289C483414B89DA3F368D1FE62C {
|
||||
volatile _KSYSTEM_TIME TickCount;
|
||||
volatile unsigned __int64 TickCountQuad;
|
||||
unsigned int ReservedTickCountOverlay[3];
|
||||
};
|
||||
struct $F91ACE6F13277DFC9425B9B8BBCB30F7 {
|
||||
volatile unsigned __int8 QpcBypassEnabled;
|
||||
unsigned __int8 QpcShift;
|
||||
};
|
||||
|
||||
union __declspec(align(2)) $3C927F8BB7EAEE13CF0CFC3E60EDC8A9 {
|
||||
unsigned __int16 QpcData;
|
||||
$F91ACE6F13277DFC9425B9B8BBCB30F7 __s1;
|
||||
};
|
||||
|
||||
struct __declspec(align(8)) _KUSER_SHARED_DATA {
|
||||
unsigned int TickCountLowDeprecated;
|
||||
unsigned int TickCountMultiplier;
|
||||
volatile _KSYSTEM_TIME InterruptTime;
|
||||
volatile _KSYSTEM_TIME SystemTime;
|
||||
volatile _KSYSTEM_TIME TimeZoneBias;
|
||||
unsigned __int16 ImageNumberLow;
|
||||
unsigned __int16 ImageNumberHigh;
|
||||
wchar_t NtSystemRoot[260];
|
||||
unsigned int MaxStackTraceDepth;
|
||||
unsigned int CryptoExponent;
|
||||
unsigned int TimeZoneId;
|
||||
unsigned int LargePageMinimum;
|
||||
unsigned int AitSamplingValue;
|
||||
unsigned int AppCompatFlag;
|
||||
unsigned __int64 RNGSeedVersion;
|
||||
unsigned int GlobalValidationRunlevel;
|
||||
volatile int TimeZoneBiasStamp;
|
||||
unsigned int NtBuildNumber;
|
||||
_NT_PRODUCT_TYPE NtProductType;
|
||||
unsigned __int8 ProductTypeIsValid;
|
||||
unsigned __int8 Reserved0[1];
|
||||
unsigned __int16 NativeProcessorArchitecture;
|
||||
unsigned int NtMajorVersion;
|
||||
unsigned int NtMinorVersion;
|
||||
unsigned __int8 ProcessorFeatures[64];
|
||||
unsigned int Reserved1;
|
||||
unsigned int Reserved3;
|
||||
volatile unsigned int TimeSlip;
|
||||
_ALTERNATIVE_ARCHITECTURE_TYPE AlternativeArchitecture;
|
||||
unsigned int BootId;
|
||||
_LARGE_INTEGER SystemExpirationDate;
|
||||
unsigned int SuiteMask;
|
||||
unsigned __int8 KdDebuggerEnabled;
|
||||
$DA7A7E727E24E4DD62317E27558CCADA ___u33;
|
||||
unsigned __int8 Reserved6[2];
|
||||
volatile unsigned int ActiveConsoleId;
|
||||
volatile unsigned int DismountCount;
|
||||
unsigned int ComPlusPackage;
|
||||
unsigned int LastSystemRITEventTickCount;
|
||||
unsigned int NumberOfPhysicalPages;
|
||||
unsigned __int8 SafeBootMode;
|
||||
unsigned __int8 VirtualizationFlags;
|
||||
unsigned __int8 Reserved12[2];
|
||||
$BB68545E345A5F8046EF3BC0FE928142 ___u43;
|
||||
unsigned int DataFlagsPad[1];
|
||||
unsigned __int64 TestRetInstruction;
|
||||
__int64 QpcFrequency;
|
||||
unsigned int SystemCall;
|
||||
unsigned int SystemCallPad0;
|
||||
unsigned __int64 SystemCallPad[2];
|
||||
$5031D289C483414B89DA3F368D1FE62C ___u50;
|
||||
unsigned int TickCountPad[1];
|
||||
unsigned int Cookie;
|
||||
unsigned int CookiePad[1];
|
||||
__int64 ConsoleSessionForegroundProcessId;
|
||||
unsigned __int64 TimeUpdateLock;
|
||||
unsigned __int64 BaselineSystemTimeQpc;
|
||||
unsigned __int64 BaselineInterruptTimeQpc;
|
||||
unsigned __int64 QpcSystemTimeIncrement;
|
||||
unsigned __int64 QpcInterruptTimeIncrement;
|
||||
unsigned __int8 QpcSystemTimeIncrementShift;
|
||||
unsigned __int8 QpcInterruptTimeIncrementShift;
|
||||
unsigned __int16 UnparkedProcessorCount;
|
||||
unsigned int EnclaveFeatureMask[4];
|
||||
unsigned int TelemetryCoverageRound;
|
||||
unsigned __int16 UserModeGlobalLogger[16];
|
||||
unsigned int ImageFileExecutionOptions;
|
||||
unsigned int LangGenerationCount;
|
||||
unsigned __int64 Reserved4;
|
||||
volatile unsigned __int64 InterruptTimeBias;
|
||||
volatile unsigned __int64 QpcBias;
|
||||
unsigned int ActiveProcessorCount;
|
||||
volatile unsigned __int8 ActiveGroupCount;
|
||||
unsigned __int8 Reserved9;
|
||||
$3C927F8BB7EAEE13CF0CFC3E60EDC8A9 ___u74;
|
||||
_LARGE_INTEGER TimeZoneBiasEffectiveStart;
|
||||
_LARGE_INTEGER TimeZoneBiasEffectiveEnd;
|
||||
_XSTATE_CONFIGURATION XState;
|
||||
};
|
||||
static constexpr unsigned KUSER_SIZE = sizeof(_KUSER_SHARED_DATA);
|
||||
|
||||
static_assert(KUSER_SIZE == 1808, "yay");
|
||||
#pragma pack(pop)
|
||||
|
||||
static _KUSER_SHARED_DATA* KUserShared() {
|
||||
return (_KUSER_SHARED_DATA*)0x7FFE0000;
|
||||
}
|
||||
#endif // XENIA_BASE_PLATFORM_WIN_H_
|
||||
|
|
|
@ -148,6 +148,7 @@ bool SetTlsValue(TlsHandle handle, uintptr_t value);
|
|||
// be kept short or else all timers will be impacted. This is a simplified
|
||||
// wrapper around QueueTimerRecurring which automatically cancels the timer on
|
||||
// destruction.
|
||||
//only used by XboxkrnlModule::XboxkrnlModule
|
||||
class HighResolutionTimer {
|
||||
HighResolutionTimer(std::chrono::milliseconds interval,
|
||||
std::function<void()> callback) {
|
||||
|
|
|
@ -205,7 +205,7 @@ void TimerQueueWaitItem::Disarm() {
|
|||
spinner.spin_once();
|
||||
}
|
||||
}
|
||||
|
||||
//unused
|
||||
std::weak_ptr<WaitItem> QueueTimerOnce(std::function<void(void*)> callback,
|
||||
void* userdata,
|
||||
WaitItem::clock::time_point due) {
|
||||
|
@ -213,7 +213,7 @@ std::weak_ptr<WaitItem> QueueTimerOnce(std::function<void(void*)> callback,
|
|||
std::make_shared<WaitItem>(std::move(callback), userdata, &timer_queue_,
|
||||
due, WaitItem::clock::duration::zero()));
|
||||
}
|
||||
|
||||
// only used by HighResolutionTimer
|
||||
std::weak_ptr<WaitItem> QueueTimerRecurring(
|
||||
std::function<void(void*)> callback, void* userdata,
|
||||
WaitItem::clock::time_point due, WaitItem::clock::duration interval) {
|
||||
|
|
|
@ -31,8 +31,8 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
|
|||
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
const uintmax_t* stream = command_stream_.data();
|
||||
size_t stream_remaining = command_stream_.size();
|
||||
const uintmax_t* stream = (const uintmax_t*)command_stream_.data();
|
||||
size_t stream_remaining = command_stream_.size() / sizeof(uintmax_t);
|
||||
ID3D12PipelineState* current_pipeline_state = nullptr;
|
||||
while (stream_remaining != 0) {
|
||||
const CommandHeader& header =
|
||||
|
@ -266,8 +266,12 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
|
|||
|
||||
void* DeferredCommandList::WriteCommand(Command command,
|
||||
size_t arguments_size_bytes) {
|
||||
|
||||
size_t arguments_size_elements =
|
||||
(arguments_size_bytes + sizeof(uintmax_t) - 1) / sizeof(uintmax_t);
|
||||
round_up(arguments_size_bytes, sizeof(uintmax_t), false);
|
||||
|
||||
//(arguments_size_bytes + sizeof(uintmax_t) - 1) / sizeof(uintmax_t);
|
||||
#if 0
|
||||
size_t offset = command_stream_.size();
|
||||
command_stream_.resize(offset + kCommandHeaderSizeElements +
|
||||
arguments_size_elements);
|
||||
|
@ -276,6 +280,19 @@ void* DeferredCommandList::WriteCommand(Command command,
|
|||
header.command = command;
|
||||
header.arguments_size_elements = uint32_t(arguments_size_elements);
|
||||
return command_stream_.data() + (offset + kCommandHeaderSizeElements);
|
||||
#else
|
||||
|
||||
size_t offset = command_stream_.size();
|
||||
constexpr size_t kCommandHeaderSizeBytes =
|
||||
kCommandHeaderSizeElements * sizeof(uintmax_t);
|
||||
command_stream_.resize(offset + kCommandHeaderSizeBytes +
|
||||
arguments_size_elements);
|
||||
CommandHeader& header =
|
||||
*reinterpret_cast<CommandHeader*>(command_stream_.data() + offset);
|
||||
header.command = command;
|
||||
header.arguments_size_elements = uint32_t(arguments_size_elements) / sizeof(uintmax_t);
|
||||
return command_stream_.data() + (offset + kCommandHeaderSizeBytes);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace d3d12
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include "xenia/base/literals.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||
|
||||
#include "xenia/base/memory.h"
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d12 {
|
||||
|
@ -30,11 +30,12 @@ class D3D12CommandProcessor;
|
|||
|
||||
class DeferredCommandList {
|
||||
public:
|
||||
static constexpr size_t MAX_SIZEOF_COMMANDLIST = 65536 * 128; //around 8 mb
|
||||
/*
|
||||
chrispy: upped from 1_MiB to 4_MiB, m:durandal hits frequent resizes in large open maps
|
||||
*/
|
||||
DeferredCommandList(const D3D12CommandProcessor& command_processor,
|
||||
size_t initial_size_bytes = 4_MiB);
|
||||
size_t initial_size_bytes = MAX_SIZEOF_COMMANDLIST);
|
||||
|
||||
void Reset();
|
||||
void Execute(ID3D12GraphicsCommandList* command_list,
|
||||
|
@ -565,7 +566,8 @@ class DeferredCommandList {
|
|||
const D3D12CommandProcessor& command_processor_;
|
||||
|
||||
// uintmax_t to ensure uint64_t and pointer alignment of all structures.
|
||||
std::vector<uintmax_t> command_stream_;
|
||||
//std::vector<uintmax_t> command_stream_;
|
||||
fixed_vmem_vector<MAX_SIZEOF_COMMANDLIST> command_stream_;
|
||||
};
|
||||
|
||||
} // namespace d3d12
|
||||
|
|
Loading…
Reference in New Issue