alternative mutex impl on windows works but i really can't tell if it helps much. use larger size in deferred_command_list to cut down on resizes in big scenes on m:dur

This commit is contained in:
chss95cs@gmail.com 2022-08-14 10:26:50 -07:00
parent a037bdb2e8
commit c9b2d10e17
5 changed files with 58 additions and 7 deletions

View File

@ -466,6 +466,48 @@ constexpr inline fourcc_t make_fourcc(const std::string_view fourcc) {
}
return make_fourcc(fourcc[0], fourcc[1], fourcc[2], fourcc[3]);
}
//chrispy::todo:use for command stream vector, resize happens a ton and has to call memset
template <size_t sz>
class fixed_vmem_vector {
static_assert((sz & 65535) == 0,
"Always give fixed_vmem_vector a size divisible by 65536 to "
"avoid wasting memory on windows");
uint8_t* data_;
size_t nbytes_;
public:
fixed_vmem_vector()
: data_((uint8_t*)AllocFixed(nullptr, sz, AllocationType::kReserveCommit,
PageAccess::kReadWrite)),
nbytes_(0) {}
~fixed_vmem_vector() {
if (data_) {
DeallocFixed(data_, sz, DeallocationType::kRelease);
data_ = nullptr;
}
nbytes_ = 0;
}
uint8_t* data() const { return data_; }
size_t size() const { return nbytes_; }
void resize(size_t newsize) {
nbytes_ = newsize;
xenia_assert(newsize < sz);
}
size_t alloc() const { return sz; }
void clear() {
resize(0); // todo:maybe zero out
}
void reserve(size_t size) { xenia_assert(size < sz); }
};
} // namespace xe

View File

@ -12,7 +12,7 @@
#include <mutex>
#include "platform.h"
//#define XE_ENABLE_FAST_WIN32_MUTEX 1
#define XE_ENABLE_FAST_WIN32_MUTEX 1
namespace xe {
#if XE_PLATFORM_WIN32 == 1 && XE_ENABLE_FAST_WIN32_MUTEX==1

View File

@ -493,13 +493,18 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
// very unlikely. these ORS here are meant to be bitwise ors, so that we do
// not do branching evaluation of the conditions (we will almost always take
// all of the branches)
if (XE_UNLIKELY(
(index - XE_GPU_REG_SCRATCH_REG0 < 8) |
(index == XE_GPU_REG_COHER_STATUS_HOST) |
((index - XE_GPU_REG_DC_LUT_RW_INDEX) <=
(XE_GPU_REG_DC_LUT_30_COLOR - XE_GPU_REG_DC_LUT_RW_INDEX)))) {
unsigned expr = (index - XE_GPU_REG_SCRATCH_REG0 < 8) |
(index == XE_GPU_REG_COHER_STATUS_HOST) |
((index - XE_GPU_REG_DC_LUT_RW_INDEX) <=
(XE_GPU_REG_DC_LUT_30_COLOR - XE_GPU_REG_DC_LUT_RW_INDEX));
//chrispy: reordered for msvc branch probability (assumes if is taken and else is not)
if (XE_LIKELY(expr == 0)) {
} else {
HandleSpecialRegisterWrite(index, value);
}
}
void CommandProcessor::MakeCoherent() {

View File

@ -153,6 +153,7 @@ class CommandProcessor {
// rarely needed, most register writes have no special logic here
XE_NOINLINE
void HandleSpecialRegisterWrite(uint32_t index, uint32_t value);
XE_FORCEINLINE
virtual void WriteRegister(uint32_t index, uint32_t value);
const reg::DC_LUT_30_COLOR* gamma_ramp_256_entry_table() const {

View File

@ -30,8 +30,11 @@ class D3D12CommandProcessor;
class DeferredCommandList {
public:
/*
chrispy: upped from 1_MiB to 4_MiB, m:durandal hits frequent resizes in large open maps
*/
DeferredCommandList(const D3D12CommandProcessor& command_processor,
size_t initial_size_bytes = 1_MiB);
size_t initial_size_bytes = 4_MiB);
void Reset();
void Execute(ID3D12GraphicsCommandList* command_list,