alternative mutex impl on windows works but i really can't tell if it helps much. use larger size in deferred_command_list to cut down on resizes in big scenes on m:dur
This commit is contained in:
parent
a037bdb2e8
commit
c9b2d10e17
|
@ -466,6 +466,48 @@ constexpr inline fourcc_t make_fourcc(const std::string_view fourcc) {
|
|||
}
|
||||
return make_fourcc(fourcc[0], fourcc[1], fourcc[2], fourcc[3]);
|
||||
}
|
||||
//chrispy::todo:use for command stream vector, resize happens a ton and has to call memset
|
||||
template <size_t sz>
|
||||
class fixed_vmem_vector {
|
||||
static_assert((sz & 65535) == 0,
|
||||
"Always give fixed_vmem_vector a size divisible by 65536 to "
|
||||
"avoid wasting memory on windows");
|
||||
|
||||
uint8_t* data_;
|
||||
size_t nbytes_;
|
||||
|
||||
public:
|
||||
fixed_vmem_vector()
|
||||
: data_((uint8_t*)AllocFixed(nullptr, sz, AllocationType::kReserveCommit,
|
||||
PageAccess::kReadWrite)),
|
||||
nbytes_(0) {}
|
||||
~fixed_vmem_vector() {
|
||||
if (data_) {
|
||||
DeallocFixed(data_, sz, DeallocationType::kRelease);
|
||||
data_ = nullptr;
|
||||
}
|
||||
nbytes_ = 0;
|
||||
}
|
||||
|
||||
uint8_t* data() const { return data_; }
|
||||
size_t size() const { return nbytes_; }
|
||||
|
||||
void resize(size_t newsize) {
|
||||
nbytes_ = newsize;
|
||||
xenia_assert(newsize < sz);
|
||||
}
|
||||
size_t alloc() const { return sz; }
|
||||
|
||||
void clear() {
|
||||
resize(0); // todo:maybe zero out
|
||||
}
|
||||
void reserve(size_t size) { xenia_assert(size < sz); }
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
} // namespace xe
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#include <mutex>
|
||||
#include "platform.h"
|
||||
|
||||
//#define XE_ENABLE_FAST_WIN32_MUTEX 1
|
||||
#define XE_ENABLE_FAST_WIN32_MUTEX 1
|
||||
namespace xe {
|
||||
|
||||
#if XE_PLATFORM_WIN32 == 1 && XE_ENABLE_FAST_WIN32_MUTEX==1
|
||||
|
|
|
@ -493,13 +493,18 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
|||
// very unlikely. these ORS here are meant to be bitwise ors, so that we do
|
||||
// not do branching evaluation of the conditions (we will almost always take
|
||||
// all of the branches)
|
||||
if (XE_UNLIKELY(
|
||||
(index - XE_GPU_REG_SCRATCH_REG0 < 8) |
|
||||
(index == XE_GPU_REG_COHER_STATUS_HOST) |
|
||||
((index - XE_GPU_REG_DC_LUT_RW_INDEX) <=
|
||||
(XE_GPU_REG_DC_LUT_30_COLOR - XE_GPU_REG_DC_LUT_RW_INDEX)))) {
|
||||
|
||||
unsigned expr = (index - XE_GPU_REG_SCRATCH_REG0 < 8) |
|
||||
(index == XE_GPU_REG_COHER_STATUS_HOST) |
|
||||
((index - XE_GPU_REG_DC_LUT_RW_INDEX) <=
|
||||
(XE_GPU_REG_DC_LUT_30_COLOR - XE_GPU_REG_DC_LUT_RW_INDEX));
|
||||
//chrispy: reordered for msvc branch probability (assumes if is taken and else is not)
|
||||
if (XE_LIKELY(expr == 0)) {
|
||||
|
||||
} else {
|
||||
HandleSpecialRegisterWrite(index, value);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void CommandProcessor::MakeCoherent() {
|
||||
|
|
|
@ -153,6 +153,7 @@ class CommandProcessor {
|
|||
// rarely needed, most register writes have no special logic here
|
||||
XE_NOINLINE
|
||||
void HandleSpecialRegisterWrite(uint32_t index, uint32_t value);
|
||||
XE_FORCEINLINE
|
||||
virtual void WriteRegister(uint32_t index, uint32_t value);
|
||||
|
||||
const reg::DC_LUT_30_COLOR* gamma_ramp_256_entry_table() const {
|
||||
|
|
|
@ -30,8 +30,11 @@ class D3D12CommandProcessor;
|
|||
|
||||
class DeferredCommandList {
|
||||
public:
|
||||
/*
|
||||
chrispy: upped from 1_MiB to 4_MiB, m:durandal hits frequent resizes in large open maps
|
||||
*/
|
||||
DeferredCommandList(const D3D12CommandProcessor& command_processor,
|
||||
size_t initial_size_bytes = 1_MiB);
|
||||
size_t initial_size_bytes = 4_MiB);
|
||||
|
||||
void Reset();
|
||||
void Execute(ID3D12GraphicsCommandList* command_list,
|
||||
|
|
Loading…
Reference in New Issue