[GPU] DC_LUT_RW_INDEX/WRITE_EN_MASK + gamma ramp and registers in traces
This commit is contained in:
parent
2d90d5940f
commit
c794d0d538
|
@ -12,6 +12,7 @@
|
|||
#include <algorithm>
|
||||
#include <cinttypes>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
#include "third_party/fmt/include/fmt/format.h"
|
||||
#include "xenia/base/byte_stream.h"
|
||||
|
@ -49,22 +50,23 @@ CommandProcessor::~CommandProcessor() = default;
|
|||
|
||||
bool CommandProcessor::Initialize() {
|
||||
// Initialize the gamma ramps to their default (linear) values - taken from
|
||||
// what games set when starting.
|
||||
// what games set when starting with the sRGB (return value 1)
|
||||
// VdGetCurrentDisplayGamma.
|
||||
for (uint32_t i = 0; i < 256; ++i) {
|
||||
uint32_t value = i * 1023 / 255;
|
||||
gamma_ramp_.table[i].value = value | (value << 10) | (value << 20);
|
||||
uint32_t value = i * 0x3FF / 0xFF;
|
||||
reg::DC_LUT_30_COLOR& gamma_ramp_entry = gamma_ramp_256_entry_table_[i];
|
||||
gamma_ramp_entry.color_10_blue = value;
|
||||
gamma_ramp_entry.color_10_green = value;
|
||||
gamma_ramp_entry.color_10_red = value;
|
||||
}
|
||||
for (uint32_t i = 0; i < 128; ++i) {
|
||||
uint32_t value = (i * 65535 / 127) & ~63;
|
||||
if (i < 127) {
|
||||
value |= 0x200 << 16;
|
||||
}
|
||||
reg::DC_LUT_PWL_DATA gamma_ramp_entry = {};
|
||||
gamma_ramp_entry.base = (i * 0xFFFF / 0x7F) & ~UINT32_C(0x3F);
|
||||
gamma_ramp_entry.delta = i < 0x7F ? 0x200 : 0;
|
||||
for (uint32_t j = 0; j < 3; ++j) {
|
||||
gamma_ramp_.pwl[i].values[j].value = value;
|
||||
gamma_ramp_pwl_rgb_[i][j] = gamma_ramp_entry;
|
||||
}
|
||||
}
|
||||
dirty_gamma_ramp_table_ = true;
|
||||
dirty_gamma_ramp_pwl_ = true;
|
||||
|
||||
worker_running_ = true;
|
||||
worker_thread_ = kernel::object_ref<kernel::XHostThread>(
|
||||
|
@ -128,6 +130,46 @@ void CommandProcessor::EndTracing() {
|
|||
trace_writer_.Close();
|
||||
}
|
||||
|
||||
void CommandProcessor::RestoreRegisters(uint32_t first_register,
|
||||
const uint32_t* register_values,
|
||||
uint32_t register_count,
|
||||
bool execute_callbacks) {
|
||||
if (first_register > RegisterFile::kRegisterCount ||
|
||||
RegisterFile::kRegisterCount - first_register < register_count) {
|
||||
XELOGW(
|
||||
"CommandProcessor::RestoreRegisters out of bounds (0x{:X} registers "
|
||||
"starting with 0x{:X}, while a total of 0x{:X} registers are stored)",
|
||||
register_count, first_register, RegisterFile::kRegisterCount);
|
||||
if (first_register > RegisterFile::kRegisterCount) {
|
||||
return;
|
||||
}
|
||||
register_count =
|
||||
std::min(uint32_t(RegisterFile::kRegisterCount) - first_register,
|
||||
register_count);
|
||||
}
|
||||
if (execute_callbacks) {
|
||||
for (uint32_t i = 0; i < register_count; ++i) {
|
||||
WriteRegister(first_register + i, register_values[i]);
|
||||
}
|
||||
} else {
|
||||
std::memcpy(register_file_->values + first_register, register_values,
|
||||
sizeof(uint32_t) * register_count);
|
||||
}
|
||||
}
|
||||
|
||||
void CommandProcessor::RestoreGammaRamp(
|
||||
const reg::DC_LUT_30_COLOR* new_gamma_ramp_256_entry_table,
|
||||
const reg::DC_LUT_PWL_DATA* new_gamma_ramp_pwl_rgb,
|
||||
uint32_t new_gamma_ramp_rw_component) {
|
||||
std::memcpy(gamma_ramp_256_entry_table_, new_gamma_ramp_256_entry_table,
|
||||
sizeof(reg::DC_LUT_30_COLOR) * 256);
|
||||
std::memcpy(gamma_ramp_pwl_rgb_, new_gamma_ramp_pwl_rgb,
|
||||
sizeof(reg::DC_LUT_PWL_DATA) * 3 * 128);
|
||||
gamma_ramp_rw_component_ = new_gamma_ramp_rw_component;
|
||||
OnGammaRamp256EntryTableValueWritten();
|
||||
OnGammaRampPWLValueWritten();
|
||||
}
|
||||
|
||||
void CommandProcessor::CallInThread(std::function<void()> fn) {
|
||||
if (pending_fns_.empty() &&
|
||||
kernel::XThread::IsInThread(worker_thread_.get())) {
|
||||
|
@ -286,68 +328,141 @@ void CommandProcessor::UpdateWritePointer(uint32_t value) {
|
|||
}
|
||||
|
||||
void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
||||
RegisterFile* regs = register_file_;
|
||||
RegisterFile& regs = *register_file_;
|
||||
if (index >= RegisterFile::kRegisterCount) {
|
||||
XELOGW("CommandProcessor::WriteRegister index out of bounds: {}", index);
|
||||
return;
|
||||
}
|
||||
|
||||
regs->values[index].u32 = value;
|
||||
if (!regs->GetRegisterInfo(index)) {
|
||||
regs.values[index].u32 = value;
|
||||
if (!regs.GetRegisterInfo(index)) {
|
||||
XELOGW("GPU: Write to unknown register ({:04X} = {:08X})", index, value);
|
||||
}
|
||||
|
||||
// If this is a COHER register, set the dirty flag.
|
||||
// This will block the command processor the next time it WAIT_MEM_REGs and
|
||||
// allow us to synchronize the memory.
|
||||
if (index == XE_GPU_REG_COHER_STATUS_HOST) {
|
||||
regs->values[index].u32 |= 0x80000000ul;
|
||||
}
|
||||
|
||||
// Scratch register writeback.
|
||||
if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
|
||||
uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
|
||||
if ((1 << scratch_reg) & regs->values[XE_GPU_REG_SCRATCH_UMSK].u32) {
|
||||
if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK].u32) {
|
||||
// Enabled - write to address.
|
||||
uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32;
|
||||
uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR].u32;
|
||||
uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
|
||||
xe::store_and_swap<uint32_t>(memory_->TranslatePhysical(mem_addr), value);
|
||||
}
|
||||
} else {
|
||||
switch (index) {
|
||||
// If this is a COHER register, set the dirty flag.
|
||||
// This will block the command processor the next time it WAIT_MEM_REGs
|
||||
// and allow us to synchronize the memory.
|
||||
case XE_GPU_REG_COHER_STATUS_HOST: {
|
||||
regs.values[index].u32 |= UINT32_C(0x80000000);
|
||||
} break;
|
||||
|
||||
case XE_GPU_REG_DC_LUT_RW_INDEX: {
|
||||
// Reset the sequential read / write component index (see the M56
|
||||
// DC_LUT_SEQ_COLOR documentation).
|
||||
gamma_ramp_rw_component_ = 0;
|
||||
} break;
|
||||
|
||||
case XE_GPU_REG_DC_LUT_SEQ_COLOR: {
|
||||
// Should be in the 256-entry table writing mode.
|
||||
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
|
||||
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
|
||||
// DC_LUT_SEQ_COLOR is in the red, green, blue order, but the write
|
||||
// enable mask is blue, green, red.
|
||||
bool write_gamma_ramp_component =
|
||||
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 &
|
||||
(UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
|
||||
if (write_gamma_ramp_component) {
|
||||
reg::DC_LUT_30_COLOR& gamma_ramp_entry =
|
||||
gamma_ramp_256_entry_table_[gamma_ramp_rw_index.rw_index];
|
||||
// Bits 0:5 are hardwired to zero.
|
||||
uint32_t gamma_ramp_seq_color =
|
||||
regs.Get<reg::DC_LUT_SEQ_COLOR>().seq_color >> 6;
|
||||
switch (gamma_ramp_rw_component_) {
|
||||
case 0:
|
||||
gamma_ramp_entry.color_10_red = gamma_ramp_seq_color;
|
||||
break;
|
||||
case 1:
|
||||
gamma_ramp_entry.color_10_green = gamma_ramp_seq_color;
|
||||
break;
|
||||
case 2:
|
||||
gamma_ramp_entry.color_10_blue = gamma_ramp_seq_color;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (++gamma_ramp_rw_component_ >= 3) {
|
||||
gamma_ramp_rw_component_ = 0;
|
||||
++gamma_ramp_rw_index.rw_index;
|
||||
}
|
||||
if (write_gamma_ramp_component) {
|
||||
OnGammaRamp256EntryTableValueWritten();
|
||||
}
|
||||
} break;
|
||||
|
||||
void CommandProcessor::UpdateGammaRampValue(GammaRampType type,
|
||||
uint32_t value) {
|
||||
RegisterFile* regs = register_file_;
|
||||
case XE_GPU_REG_DC_LUT_PWL_DATA: {
|
||||
// Should be in the PWL writing mode.
|
||||
assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
|
||||
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
|
||||
// Bit 7 of the index is ignored for PWL.
|
||||
uint32_t gamma_ramp_rw_index_pwl = gamma_ramp_rw_index.rw_index & 0x7F;
|
||||
// DC_LUT_RW_INDEX is likely in the red, green, blue order because
|
||||
// DC_LUT_SEQ_COLOR is, but the write enable mask is blue, green, red.
|
||||
bool write_gamma_ramp_component =
|
||||
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 &
|
||||
(UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
|
||||
if (write_gamma_ramp_component) {
|
||||
reg::DC_LUT_PWL_DATA& gamma_ramp_entry =
|
||||
gamma_ramp_pwl_rgb_[gamma_ramp_rw_index_pwl]
|
||||
[gamma_ramp_rw_component_];
|
||||
auto gamma_ramp_value = regs.Get<reg::DC_LUT_PWL_DATA>();
|
||||
// Bits 0:5 are hardwired to zero.
|
||||
gamma_ramp_entry.base = gamma_ramp_value.base & ~UINT32_C(0x3F);
|
||||
gamma_ramp_entry.delta = gamma_ramp_value.delta & ~UINT32_C(0x3F);
|
||||
}
|
||||
if (++gamma_ramp_rw_component_ >= 3) {
|
||||
gamma_ramp_rw_component_ = 0;
|
||||
// TODO(Triang3l): Should this increase beyond 7 bits for PWL?
|
||||
// Direct3D 9 explicitly sets rw_index to 0x80 after writing the last
|
||||
// PWL entry. However, the DC_LUT_RW_INDEX documentation says that for
|
||||
// PWL, the bit 7 is ignored.
|
||||
gamma_ramp_rw_index.rw_index =
|
||||
(gamma_ramp_rw_index.rw_index & ~UINT32_C(0x7F)) |
|
||||
((gamma_ramp_rw_index_pwl + 1) & 0x7F);
|
||||
}
|
||||
if (write_gamma_ramp_component) {
|
||||
OnGammaRampPWLValueWritten();
|
||||
}
|
||||
} break;
|
||||
|
||||
auto index = regs->values[XE_GPU_REG_DC_LUT_RW_INDEX].u32;
|
||||
|
||||
auto mask = regs->values[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32;
|
||||
auto mask_lo = (mask >> 0) & 0x7;
|
||||
auto mask_hi = (mask >> 3) & 0x7;
|
||||
|
||||
// If games update individual components we're going to have a problem.
|
||||
assert_true(mask_lo == 0 || mask_lo == 7);
|
||||
assert_true(mask_hi == 0);
|
||||
|
||||
if (mask_lo) {
|
||||
switch (type) {
|
||||
case GammaRampType::kTable:
|
||||
assert_true(regs->values[XE_GPU_REG_DC_LUT_RW_MODE].u32 == 0);
|
||||
gamma_ramp_.table[index].value = value;
|
||||
dirty_gamma_ramp_table_ = true;
|
||||
break;
|
||||
case GammaRampType::kPWL:
|
||||
assert_true(regs->values[XE_GPU_REG_DC_LUT_RW_MODE].u32 == 1);
|
||||
// The lower 6 bits are hardwired to 0.
|
||||
// https://developer.amd.com/wordpress/media/2012/10/RRG-216M56-03oOEM.pdf
|
||||
gamma_ramp_.pwl[index].values[gamma_ramp_rw_subindex_].value =
|
||||
value & ~(uint32_t(63) | (uint32_t(63) << 16));
|
||||
gamma_ramp_rw_subindex_ = (gamma_ramp_rw_subindex_ + 1) % 3;
|
||||
dirty_gamma_ramp_pwl_ = true;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(type);
|
||||
case XE_GPU_REG_DC_LUT_30_COLOR: {
|
||||
// Should be in the 256-entry table writing mode.
|
||||
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
|
||||
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
|
||||
uint32_t gamma_ramp_write_enable_mask =
|
||||
regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & 0b111;
|
||||
if (gamma_ramp_write_enable_mask) {
|
||||
reg::DC_LUT_30_COLOR& gamma_ramp_entry =
|
||||
gamma_ramp_256_entry_table_[gamma_ramp_rw_index.rw_index];
|
||||
auto gamma_ramp_value = regs.Get<reg::DC_LUT_30_COLOR>();
|
||||
if (gamma_ramp_write_enable_mask & 0b001) {
|
||||
gamma_ramp_entry.color_10_blue = gamma_ramp_value.color_10_blue;
|
||||
}
|
||||
if (gamma_ramp_write_enable_mask & 0b010) {
|
||||
gamma_ramp_entry.color_10_green = gamma_ramp_value.color_10_green;
|
||||
}
|
||||
if (gamma_ramp_write_enable_mask & 0b100) {
|
||||
gamma_ramp_entry.color_10_red = gamma_ramp_value.color_10_red;
|
||||
}
|
||||
}
|
||||
++gamma_ramp_rw_index.rw_index;
|
||||
// TODO(Triang3l): Should this reset the component write index? If this
|
||||
// increase is assumed to behave like a full DC_LUT_RW_INDEX write, it
|
||||
// probably should.
|
||||
gamma_ramp_rw_component_ = 0;
|
||||
if (gamma_ramp_write_enable_mask) {
|
||||
OnGammaRamp256EntryTableValueWritten();
|
||||
}
|
||||
} break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1493,5 +1608,17 @@ bool CommandProcessor::ExecutePacketType3_VIZ_QUERY(RingBuffer* reader,
|
|||
return true;
|
||||
}
|
||||
|
||||
void CommandProcessor::InitializeTrace() {
|
||||
// Write the initial register values, to be loaded directly into the
|
||||
// RegisterFile since all registers, including those that may have side
|
||||
// effects on setting, will be saved.
|
||||
trace_writer_.WriteRegisters(
|
||||
0, reinterpret_cast<const uint32_t*>(register_file_->values),
|
||||
RegisterFile::kRegisterCount, false);
|
||||
|
||||
trace_writer_.WriteGammaRamp(gamma_ramp_256_entry_table(),
|
||||
gamma_ramp_pwl_rgb(), gamma_ramp_rw_component_);
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "xenia/base/ring_buffer.h"
|
||||
#include "xenia/base/threading.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/trace_writer.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/kernel/xthread.h"
|
||||
|
@ -64,61 +65,6 @@ enum class GammaRampType {
|
|||
kPWL,
|
||||
};
|
||||
|
||||
struct GammaRamp {
|
||||
// A lot of gamma ramp (DC_LUT) documentation:
|
||||
// https://developer.amd.com/wordpress/media/2012/10/RRG-216M56-03oOEM.pdf
|
||||
// The ramps entries are BGR, not RGB.
|
||||
// For the 256-entry table (used by Direct3D 9 for a 8bpc front buffer),
|
||||
// 535107D4 has in-game settings allowing separate configuration.
|
||||
// The component order of the PWL table is untested, however, it's likely BGR
|
||||
// too, since DC_LUTA/B registers have values for blue first, and for red
|
||||
// last.
|
||||
struct TableEntry {
|
||||
union {
|
||||
uint32_t value;
|
||||
struct {
|
||||
uint32_t b : 10;
|
||||
uint32_t g : 10;
|
||||
uint32_t r : 10;
|
||||
uint32_t : 2;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
struct PWLValue {
|
||||
union {
|
||||
uint32_t value;
|
||||
struct {
|
||||
// The lower 6 bits are always zero (these are 10-bit in the upper bits
|
||||
// thus, not fully 16-bit).
|
||||
// See DC_LUTA/B_CONTROL for information about the way they should be
|
||||
// interpreted (`output = base + (multiplier * delta) / 2^increment`,
|
||||
// where the increment is the value specified in DC_LUTA/B_CONTROL for
|
||||
// the specific color channel, the base is 7 bits of the front buffer
|
||||
// value above `increment` bits, the multiplier is the lower `increment`
|
||||
// bits of it; the increment is nonzero, otherwise the 256-entry table
|
||||
// should be used instead).
|
||||
uint16_t base;
|
||||
uint16_t delta;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
struct PWLEntry {
|
||||
union {
|
||||
PWLValue values[3];
|
||||
struct {
|
||||
PWLValue b;
|
||||
PWLValue g;
|
||||
PWLValue r;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
TableEntry table[256];
|
||||
PWLEntry pwl[128];
|
||||
};
|
||||
|
||||
class CommandProcessor {
|
||||
public:
|
||||
enum class SwapPostEffect {
|
||||
|
@ -170,6 +116,13 @@ class CommandProcessor {
|
|||
|
||||
virtual void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) = 0;
|
||||
|
||||
void RestoreRegisters(uint32_t first_register,
|
||||
const uint32_t* register_values,
|
||||
uint32_t register_count, bool execute_callbacks);
|
||||
void RestoreGammaRamp(
|
||||
const reg::DC_LUT_30_COLOR* new_gamma_ramp_256_entry_table,
|
||||
const reg::DC_LUT_PWL_DATA* new_gamma_ramp_pwl_rgb,
|
||||
uint32_t new_gamma_ramp_rw_component);
|
||||
virtual void RestoreEdramSnapshot(const void* snapshot) = 0;
|
||||
|
||||
void InitializeRingBuffer(uint32_t ptr, uint32_t size_log2);
|
||||
|
@ -201,7 +154,14 @@ class CommandProcessor {
|
|||
|
||||
virtual void WriteRegister(uint32_t index, uint32_t value);
|
||||
|
||||
void UpdateGammaRampValue(GammaRampType type, uint32_t value);
|
||||
const reg::DC_LUT_30_COLOR* gamma_ramp_256_entry_table() const {
|
||||
return gamma_ramp_256_entry_table_;
|
||||
}
|
||||
const reg::DC_LUT_PWL_DATA* gamma_ramp_pwl_rgb() const {
|
||||
return gamma_ramp_pwl_rgb_[0];
|
||||
}
|
||||
virtual void OnGammaRamp256EntryTableValueWritten() {}
|
||||
virtual void OnGammaRampPWLValueWritten() {}
|
||||
|
||||
virtual void MakeCoherent();
|
||||
virtual void PrepareForWait();
|
||||
|
@ -285,9 +245,7 @@ class CommandProcessor {
|
|||
return swap_post_effect_actual_;
|
||||
}
|
||||
|
||||
// TODO(Triang3l): Write the gamma ramp (including the display controller
|
||||
// write pointers) in the common code.
|
||||
virtual void InitializeTrace() = 0;
|
||||
virtual void InitializeTrace();
|
||||
|
||||
Memory* memory_ = nullptr;
|
||||
kernel::KernelState* kernel_state_ = nullptr;
|
||||
|
@ -334,15 +292,15 @@ class CommandProcessor {
|
|||
|
||||
bool paused_ = false;
|
||||
|
||||
GammaRamp gamma_ramp_ = {};
|
||||
int gamma_ramp_rw_subindex_ = 0;
|
||||
bool dirty_gamma_ramp_table_ = true;
|
||||
bool dirty_gamma_ramp_pwl_ = true;
|
||||
|
||||
// By default (such as for tools), post-processing is disabled.
|
||||
// "Desired" is for the external thread managing the post-processing effect.
|
||||
SwapPostEffect swap_post_effect_desired_ = SwapPostEffect::kNone;
|
||||
SwapPostEffect swap_post_effect_actual_ = SwapPostEffect::kNone;
|
||||
|
||||
private:
|
||||
reg::DC_LUT_30_COLOR gamma_ramp_256_entry_table_[256] = {};
|
||||
reg::DC_LUT_PWL_DATA gamma_ramp_pwl_rgb_[128][3] = {};
|
||||
uint32_t gamma_ramp_rw_component_ = 0;
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <utility>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/byte_order.h"
|
||||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
|
@ -1161,8 +1162,8 @@ bool D3D12CommandProcessor::SetupContext() {
|
|||
provider.GetHeapFlagCreateNotZeroed();
|
||||
|
||||
// Create gamma ramp resources.
|
||||
dirty_gamma_ramp_table_ = true;
|
||||
dirty_gamma_ramp_pwl_ = true;
|
||||
gamma_ramp_256_entry_table_up_to_date_ = false;
|
||||
gamma_ramp_pwl_up_to_date_ = false;
|
||||
D3D12_RESOURCE_DESC gamma_ramp_buffer_desc;
|
||||
ui::d3d12::util::FillBufferResourceDesc(
|
||||
gamma_ramp_buffer_desc, (256 + 128 * 3) * 4, D3D12_RESOURCE_FLAG_NONE);
|
||||
|
@ -1699,15 +1700,17 @@ void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
|||
texture_cache_->TextureFetchConstantWritten(
|
||||
(index - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6);
|
||||
}
|
||||
} else if (index == XE_GPU_REG_DC_LUT_PWL_DATA) {
|
||||
UpdateGammaRampValue(GammaRampType::kPWL, value);
|
||||
} else if (index == XE_GPU_REG_DC_LUT_30_COLOR) {
|
||||
UpdateGammaRampValue(GammaRampType::kTable, value);
|
||||
} else if (index == XE_GPU_REG_DC_LUT_RW_MODE) {
|
||||
gamma_ramp_rw_subindex_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void D3D12CommandProcessor::OnGammaRamp256EntryTableValueWritten() {
|
||||
gamma_ramp_256_entry_table_up_to_date_ = false;
|
||||
}
|
||||
|
||||
void D3D12CommandProcessor::OnGammaRampPWLValueWritten() {
|
||||
gamma_ramp_pwl_up_to_date_ = false;
|
||||
}
|
||||
|
||||
void D3D12CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr,
|
||||
uint32_t frontbuffer_width,
|
||||
uint32_t frontbuffer_height) {
|
||||
|
@ -1801,6 +1804,9 @@ void D3D12CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr,
|
|||
// This is according to D3D::InitializePresentationParameters from a
|
||||
// game executable, which initializes the 256-entry table gamma ramp for
|
||||
// 8_8_8_8 output and the PWL gamma ramp for 2_10_10_10.
|
||||
// TODO(Triang3l): Choose between the table and PWL based on
|
||||
// DC_LUTA_CONTROL, support both for all formats (and also different
|
||||
// increments for PWL).
|
||||
bool use_pwl_gamma_ramp =
|
||||
frontbuffer_format == xenos::TextureFormat::k_2_10_10_10 ||
|
||||
frontbuffer_format ==
|
||||
|
@ -1811,20 +1817,43 @@ void D3D12CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr,
|
|||
// Upload the new gamma ramp, using the upload buffer for the current
|
||||
// frame (will close the frame after this anyway, so can't write
|
||||
// multiple times per frame).
|
||||
if (use_pwl_gamma_ramp ? dirty_gamma_ramp_pwl_
|
||||
: dirty_gamma_ramp_table_) {
|
||||
if (!(use_pwl_gamma_ramp ? gamma_ramp_pwl_up_to_date_
|
||||
: gamma_ramp_256_entry_table_up_to_date_)) {
|
||||
uint32_t gamma_ramp_offset_bytes = use_pwl_gamma_ramp ? 256 * 4 : 0;
|
||||
uint32_t gamma_ramp_upload_offset_bytes =
|
||||
uint32_t(frame_current_ % kQueueFrames) * ((256 + 128 * 3) * 4) +
|
||||
gamma_ramp_offset_bytes;
|
||||
uint32_t gamma_ramp_size_bytes =
|
||||
(use_pwl_gamma_ramp ? 128 * 3 : 256) * 4;
|
||||
std::memcpy(gamma_ramp_upload_buffer_mapping_ +
|
||||
if (std::endian::native != std::endian::little &&
|
||||
use_pwl_gamma_ramp) {
|
||||
// R16G16 is first R16, where the shader expects the base, and
|
||||
// second G16, where the delta should be, but gamma_ramp_pwl_rgb()
|
||||
// is an array of 32-bit DC_LUT_PWL_DATA registers - swap 16 bits in
|
||||
// each 32.
|
||||
auto gamma_ramp_pwl_upload_buffer =
|
||||
reinterpret_cast<reg::DC_LUT_PWL_DATA*>(
|
||||
gamma_ramp_upload_buffer_mapping_ +
|
||||
gamma_ramp_upload_offset_bytes);
|
||||
const reg::DC_LUT_PWL_DATA* gamma_ramp_pwl = gamma_ramp_pwl_rgb();
|
||||
for (size_t i = 0; i < 128 * 3; ++i) {
|
||||
reg::DC_LUT_PWL_DATA& gamma_ramp_pwl_upload_buffer_entry =
|
||||
gamma_ramp_pwl_upload_buffer[i];
|
||||
reg::DC_LUT_PWL_DATA gamma_ramp_pwl_entry = gamma_ramp_pwl[i];
|
||||
gamma_ramp_pwl_upload_buffer_entry.base =
|
||||
gamma_ramp_pwl_entry.delta;
|
||||
gamma_ramp_pwl_upload_buffer_entry.delta =
|
||||
gamma_ramp_pwl_entry.base;
|
||||
}
|
||||
} else {
|
||||
std::memcpy(
|
||||
gamma_ramp_upload_buffer_mapping_ +
|
||||
gamma_ramp_upload_offset_bytes,
|
||||
use_pwl_gamma_ramp
|
||||
? static_cast<const void*>(gamma_ramp_.pwl)
|
||||
: static_cast<const void*>(gamma_ramp_.table),
|
||||
? static_cast<const void*>(gamma_ramp_pwl_rgb())
|
||||
: static_cast<const void*>(gamma_ramp_256_entry_table()),
|
||||
gamma_ramp_size_bytes);
|
||||
}
|
||||
PushTransitionBarrier(gamma_ramp_buffer_.Get(),
|
||||
gamma_ramp_buffer_state_,
|
||||
D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
|
@ -1834,8 +1863,8 @@ void D3D12CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr,
|
|||
gamma_ramp_buffer_.Get(), gamma_ramp_offset_bytes,
|
||||
gamma_ramp_upload_buffer_.Get(), gamma_ramp_upload_offset_bytes,
|
||||
gamma_ramp_size_bytes);
|
||||
(use_pwl_gamma_ramp ? dirty_gamma_ramp_pwl_
|
||||
: dirty_gamma_ramp_table_) = false;
|
||||
(use_pwl_gamma_ramp ? gamma_ramp_pwl_up_to_date_
|
||||
: gamma_ramp_256_entry_table_up_to_date_) = true;
|
||||
}
|
||||
|
||||
// Destination, source, and if bindful, gamma ramp.
|
||||
|
@ -2589,6 +2618,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
}
|
||||
|
||||
void D3D12CommandProcessor::InitializeTrace() {
|
||||
CommandProcessor::InitializeTrace();
|
||||
|
||||
if (!BeginSubmission(false)) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -209,6 +209,9 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
|
||||
void WriteRegister(uint32_t index, uint32_t value) override;
|
||||
|
||||
void OnGammaRamp256EntryTableValueWritten() override;
|
||||
void OnGammaRampPWLValueWritten() override;
|
||||
|
||||
void IssueSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width,
|
||||
uint32_t frontbuffer_height) override;
|
||||
|
||||
|
@ -496,17 +499,18 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
|
||||
std::unique_ptr<TextureCache> texture_cache_;
|
||||
|
||||
// Bytes 0x0...0x3FF - 256-entry R10G10B10X2 gamma ramp (red and blue must be
|
||||
// read as swapped - 535107D4 has settings allowing separate configuration).
|
||||
// Bytes 0x0...0x3FF - 256-entry gamma ramp table with B10G10R10X2 data (read
|
||||
// as R10G10B10X2 with swizzle).
|
||||
// Bytes 0x400...0x9FF - 128-entry PWL R16G16 gamma ramp (R - base, G - delta,
|
||||
// low 6 bits of each are zero, 3 elements per entry).
|
||||
// https://www.x.org/docs/AMD/old/42590_m76_rrg_1.01o.pdf
|
||||
Microsoft::WRL::ComPtr<ID3D12Resource> gamma_ramp_buffer_;
|
||||
D3D12_RESOURCE_STATES gamma_ramp_buffer_state_;
|
||||
// Upload buffer for an image that is the same as gamma_ramp_, but with
|
||||
// kQueueFrames array layers.
|
||||
Microsoft::WRL::ComPtr<ID3D12Resource> gamma_ramp_upload_buffer_;
|
||||
uint8_t* gamma_ramp_upload_buffer_mapping_ = nullptr;
|
||||
bool gamma_ramp_256_entry_table_up_to_date_ = false;
|
||||
bool gamma_ramp_pwl_up_to_date_ = false;
|
||||
|
||||
struct ApplyGammaConstants {
|
||||
uint32_t size[2];
|
||||
|
|
|
@ -275,14 +275,183 @@ XE_GPU_REGISTER(0x1844, kDword, D1GRPH_PRIMARY_SURFACE_ADDRESS)
|
|||
|
||||
XE_GPU_REGISTER(0x1852, kDword, D1GRPH_FLIP_CONTROL)
|
||||
|
||||
XE_GPU_REGISTER(0x1921, kDword, DC_LUT_RW_MODE)
|
||||
XE_GPU_REGISTER(0x1922, kDword, DC_LUT_RW_INDEX)
|
||||
// In 4B4F07FE, the 256-entry gamma ramp for the 8bpc framebuffer is set to
|
||||
// different values in multiple places in the game. For VdGetCurrentDisplayGamma
|
||||
// returning 1 (sRGB), it's set up in the beginning as:
|
||||
// DC_LUTA_CONTROL = 0x00000000 (256-entry unsigned fixed-point)
|
||||
// DC_LUT_RW_MODE = 0x00000000
|
||||
// DC_LUT_RW_INDEX = 0x00000000
|
||||
// DC_LUT_WRITE_EN_MASK = 0x00000007
|
||||
// DC_LUT_30_COLOR = 0x00000000
|
||||
// DC_LUT_RW_INDEX = 0x00000001
|
||||
// DC_LUT_30_COLOR = 0x04812048
|
||||
// DC_LUT_RW_INDEX = 0x00000002
|
||||
// DC_LUT_30_COLOR = 0x05916459
|
||||
// DC_LUT_RW_INDEX = 0x00000003
|
||||
// DC_LUT_30_COLOR = 0x06519465
|
||||
// ...
|
||||
// DC_LUT_RW_INDEX = 0x000000FE
|
||||
// DC_LUT_30_COLOR = 0x3FBFEFFB
|
||||
// DC_LUT_RW_INDEX = 0x000000FF
|
||||
// DC_LUT_30_COLOR = 0x3FFFFFFF
|
||||
// DC_LUT_RW_INDEX = 0x00000100
|
||||
//
|
||||
// One another possible setup in 4B4F07FE is:
|
||||
// DC_LUTA_CONTROL = 0x00000000 (256-entry unsigned fixed-point)
|
||||
// DC_LUT_RW_MODE = 0x00000000
|
||||
// DC_LUT_RW_INDEX = 0x00000000
|
||||
// DC_LUT_WRITE_EN_MASK = 0x00000007
|
||||
// DC_LUT_30_COLOR = 0x00000000
|
||||
// DC_LUT_RW_INDEX = 0x00000001
|
||||
// DC_LUT_30_COLOR = 0x01A0681A
|
||||
// DC_LUT_RW_INDEX = 0x00000002
|
||||
// DC_LUT_30_COLOR = 0x02709C27
|
||||
// ...
|
||||
// DC_LUT_RW_INDEX = 0x000000FE
|
||||
// DC_LUT_30_COLOR = 0x3FBFEFFB
|
||||
// DC_LUT_RW_INDEX = 0x000000FF
|
||||
// DC_LUT_30_COLOR = 0x3FFFFFFF
|
||||
// DC_LUT_RW_INDEX = 0x00000100
|
||||
//
|
||||
// In 4D5307E6, the 128-entry PWL gamma ramp for the 10bpc framebuffer, for
|
||||
// VdGetCurrentDisplayGamma returning 1 (sRGB), is set up right after launching
|
||||
// the game as:
|
||||
// DC_LUTA_CONTROL = 0x00000003 (8-increment unsigned fixed-point)
|
||||
// DC_LUT_RW_MODE = 0x00000001
|
||||
// DC_LUT_RW_INDEX = 0x00000000
|
||||
// DC_LUT_WRITE_EN_MASK = 0x00000007
|
||||
// DC_LUT_PWL_DATA = 0x02000000
|
||||
// DC_LUT_PWL_DATA = 0x02000000
|
||||
// DC_LUT_PWL_DATA = 0x02000000
|
||||
// DC_LUT_RW_INDEX = 0x00000001
|
||||
// DC_LUT_PWL_DATA = 0x02000200
|
||||
// DC_LUT_PWL_DATA = 0x02000200
|
||||
// DC_LUT_PWL_DATA = 0x02000200
|
||||
// DC_LUT_RW_INDEX = 0x00000001
|
||||
// DC_LUT_PWL_DATA = 0x02000400
|
||||
// DC_LUT_PWL_DATA = 0x02000400
|
||||
// DC_LUT_PWL_DATA = 0x02000400
|
||||
// ...
|
||||
// DC_LUT_RW_INDEX = 0x0000007D
|
||||
// DC_LUT_PWL_DATA = 0x0200FBC0
|
||||
// DC_LUT_PWL_DATA = 0x0200FBC0
|
||||
// DC_LUT_PWL_DATA = 0x0200FBC0
|
||||
// DC_LUT_RW_INDEX = 0x0000007E
|
||||
// DC_LUT_PWL_DATA = 0x0200FDC0
|
||||
// DC_LUT_PWL_DATA = 0x0200FDC0
|
||||
// DC_LUT_PWL_DATA = 0x0200FDC0
|
||||
// DC_LUT_RW_INDEX = 0x0000007F
|
||||
// DC_LUT_PWL_DATA = 0x0000FFC0
|
||||
// DC_LUT_PWL_DATA = 0x0000FFC0
|
||||
// DC_LUT_PWL_DATA = 0x0000FFC0
|
||||
// DC_LUT_RW_INDEX = 0x00000080
|
||||
//
|
||||
// Later in 4D5307E6, for the game itself (apparently for conversion of the bit
|
||||
// representation of 7e3 floating-point data in the front buffer to 10-bit fixed
|
||||
// point, as the game draws the final passes to a 7e3 framebuffer), with
|
||||
// VdGetCurrentDisplayGamma returning 1 (sRGB) and the normal brightness in the
|
||||
// game settings, it's:
|
||||
// DC_LUTA_CONTROL = 0x00000003 (8-increment unsigned fixed-point)
|
||||
// DC_LUT_RW_MODE = 0x00000001
|
||||
// DC_LUT_RW_INDEX = 0x00000000
|
||||
// DC_LUT_WRITE_EN_MASK = 0x00000007
|
||||
// DC_LUT_PWL_DATA = 0x05000000
|
||||
// DC_LUT_PWL_DATA = 0x05000000
|
||||
// DC_LUT_PWL_DATA = 0x05000000
|
||||
// DC_LUT_RW_INDEX = 0x00000001
|
||||
// DC_LUT_PWL_DATA = 0x02000500
|
||||
// DC_LUT_PWL_DATA = 0x02000500
|
||||
// DC_LUT_PWL_DATA = 0x02000500
|
||||
// DC_LUT_RW_INDEX = 0x00000001
|
||||
// DC_LUT_PWL_DATA = 0x01800740
|
||||
// DC_LUT_PWL_DATA = 0x01800740
|
||||
// DC_LUT_PWL_DATA = 0x01800740
|
||||
// ...
|
||||
// DC_LUT_RW_INDEX = 0x0000007D
|
||||
// DC_LUT_PWL_DATA = 0x0440F340
|
||||
// DC_LUT_PWL_DATA = 0x0440F340
|
||||
// DC_LUT_PWL_DATA = 0x0440F340
|
||||
// DC_LUT_RW_INDEX = 0x0000007E
|
||||
// DC_LUT_PWL_DATA = 0x0400F780
|
||||
// DC_LUT_PWL_DATA = 0x0400F780
|
||||
// DC_LUT_PWL_DATA = 0x0400F780
|
||||
// DC_LUT_RW_INDEX = 0x0000007F
|
||||
// DC_LUT_PWL_DATA = 0x0400FBC0
|
||||
// DC_LUT_PWL_DATA = 0x0400FBC0
|
||||
// DC_LUT_PWL_DATA = 0x0400FBC0
|
||||
// DC_LUT_RW_INDEX = 0x00000080
|
||||
//
|
||||
// In 535107D4, the 256-entry gamma ramp for the 8bpc framebuffer is
|
||||
// configurable from the game's settings menu for each channel independently.
|
||||
// For VdGetCurrentDisplayGamma returning 1 (sRGB), when in the settings, the
|
||||
// red gamma is at the maximum of 5.56, green is at 1.00, and blue is at the
|
||||
// minimum of 0.17, the setup is done as:
|
||||
// DC_LUT_RW_MODE = 0x00000000
|
||||
// DC_LUT_RW_INDEX = 0x00000000
|
||||
// DC_LUT_WRITE_EN_MASK = 0x00000007
|
||||
// DC_LUT_30_COLOR = 0x00000000
|
||||
// DC_LUT_RW_INDEX = 0x00000001
|
||||
// DC_LUT_30_COLOR = 0x17901000
|
||||
// DC_LUT_RW_INDEX = 0x00000002
|
||||
// DC_LUT_30_COLOR = 0x1AB02000
|
||||
// ...
|
||||
// DC_LUT_RW_INDEX = 0x000000FE
|
||||
// DC_LUT_30_COLOR = 0x3FEFE3D2
|
||||
// DC_LUT_RW_INDEX = 0x000000FF
|
||||
// DC_LUT_30_COLOR = 0x3FFFF3E9
|
||||
// DC_LUT_RW_INDEX = 0x00000100
|
||||
|
||||
// Read / write mode in bit 0: 0 - 256-entry table, 1 - PWL.
|
||||
// Default: 0x00000000.
|
||||
XE_GPU_REGISTER(0x1921, kDword, DC_LUT_RW_MODE)
|
||||
// Read / write index. No lower and upper halves on the Xenos apparently, for
|
||||
// the 256-entry table, the bits 0:7 are the index directly (unlike on the M56,
|
||||
// not split into the index in 1:7 and the lower or upper 10 bits selection in
|
||||
// 0:0, instead, on the Xenos, the index in 0:7 is just increased
|
||||
// monotonically). For some reason though Direct3D 9 writes an index that
|
||||
// overflows by one (0x100 for the 256-entry table, 0x80 for the 128-entry PWL
|
||||
// gamma ramp) after setting up all the values. However, the index is 8-bit, and
|
||||
// for PWL, according to the M56 documentation, the bit 7 is not used.
|
||||
// Default: 0x00000000.
|
||||
XE_GPU_REGISTER(0x1922, kDword, DC_LUT_RW_INDEX)
|
||||
// Sequential 10-bit R, G, B host read / write for the 256-entry table. After
|
||||
// reset or writing DC_LUT_RW_INDEX, the first access is for the red component,
|
||||
// the second is for green, the third is for blue, and after blue is accessed,
|
||||
// the LUT index is increased by 1 (without having to explicitly change
|
||||
// DC_LUT_RW_INDEX). Bits 0:5 are hardwired to zero.
|
||||
// Default: 0x00000000.
|
||||
XE_GPU_REGISTER(0x1923, kDword, DC_LUT_SEQ_COLOR)
|
||||
// Read / write, 0:15 - base, 16:31 - delta. Bits 0:5 of both the base and the
|
||||
// delta are hardwired to zero. The LUT index is increased by 1 when
|
||||
// DC_LUT_PWL_DATA is accessed, though three DC_LUT_PWL_DATA writes are done for
|
||||
// one entry (the order is likely R, G, B, similar to DC_LUT_SEQ_COLOR, but this
|
||||
// hasn't been verified yet as no games using the PWL gamma ramp with separate
|
||||
// settings for each channel have been found yet).
|
||||
// Default: 0x00000000.
|
||||
XE_GPU_REGISTER(0x1924, kDword, DC_LUT_PWL_DATA)
|
||||
// Read / write, 0:9 - blue, 10:19 - green, 20:29 - red. The LUT index is
|
||||
// increased by 1 when DC_LUT_30_COLOR is accessed.
|
||||
// Default: 0x00000000.
|
||||
XE_GPU_REGISTER(0x1925, kDword, DC_LUT_30_COLOR)
|
||||
|
||||
// Only LUT pipe 1 on the Xenos apparently (Direct3D 9 sets DC_LUT_WRITE_EN_MASK
|
||||
// to 0b111 before writing the gamma ramp), 3 bits set, rather than 6 on the
|
||||
// M56.
|
||||
// Bit 0 - blue write enable mask.
|
||||
// Bit 1 - green write enable mask.
|
||||
// Bit 2 - red write enable mask.
|
||||
// Default: 0x00000007 (though 0x0000003F on the M56 where there are two pipes).
|
||||
XE_GPU_REGISTER(0x1927, kDword, DC_LUT_WRITE_EN_MASK)
|
||||
|
||||
// Single set of parameters for all channels apparently unlike on the M56
|
||||
// (4D5307E6 sets DC_LUTA_CONTROL to 0x00000003 for the data increment of 8 in
|
||||
// the 128-entry PWL gamma ramp for a 10bpc framebuffer). Also set not only
|
||||
// during setup, but also apparently during every swap by Direct3D 9, though not
|
||||
// directly in all games (happens in 4B4F07FE and 4D5307E6 even without proper
|
||||
// VdSwap emulation, but in 535107D4, with a fake VdSwap packet rather than the
|
||||
// real ones, the register is not set at all, though the expected behavior is
|
||||
// that of the value of 0x00000000).
|
||||
// Default: 0x00000000.
|
||||
XE_GPU_REGISTER(0x1930, kDword, DC_LUTA_CONTROL)
|
||||
|
||||
XE_GPU_REGISTER(0x1961, kDword, AVIVO_D1MODE_VIEWPORT_SIZE)
|
||||
|
|
|
@ -825,6 +825,68 @@ union alignas(uint32_t) RB_COPY_DEST_PITCH {
|
|||
};
|
||||
static_assert_size(RB_COPY_DEST_PITCH, sizeof(uint32_t));
|
||||
|
||||
/*******************************************************************************
|
||||
___ ___ ___ ___ _ ___ __
|
||||
| \_ _/ __| _ \ | /_\ \ / /
|
||||
| |) | |\__ \ _/ |__ / _ \ V /
|
||||
|___/___|___/_| |____/_/ \_\_|
|
||||
|
||||
___ ___ _ _ _____ ___ ___ _ _ ___ ___
|
||||
/ __/ _ \| \| |_ _| _ \/ _ \| | | | | __| _ \
|
||||
| (_| (_) | .` | | | | / (_) | |__| |__| _|| /
|
||||
\___\___/|_|\_| |_| |_|_\\___/|____|____|___|_|_\
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
union alignas(uint32_t) DC_LUT_RW_INDEX {
|
||||
uint32_t value;
|
||||
struct {
|
||||
// Unlike in the M56 documentation, for the 256-table entry, this is the
|
||||
// absolute index, without the lower or upper 10 bits selection in the
|
||||
// bit 0. For PWL, the bit 7 is ignored.
|
||||
uint32_t rw_index : 8; // +0
|
||||
};
|
||||
static constexpr Register register_index = XE_GPU_REG_DC_LUT_RW_INDEX;
|
||||
};
|
||||
static_assert_size(DC_LUT_RW_INDEX, sizeof(uint32_t));
|
||||
|
||||
union alignas(uint32_t) DC_LUT_SEQ_COLOR {
|
||||
uint32_t value;
|
||||
struct {
|
||||
uint32_t seq_color : 16; // +0, bits 0:5 are hardwired to zero
|
||||
};
|
||||
static constexpr Register register_index = XE_GPU_REG_DC_LUT_SEQ_COLOR;
|
||||
};
|
||||
static_assert_size(DC_LUT_SEQ_COLOR, sizeof(uint32_t));
|
||||
|
||||
union alignas(uint32_t) DC_LUT_PWL_DATA {
|
||||
uint32_t value;
|
||||
struct {
|
||||
// See the M56 DC_LUTA_CONTROL for information about the way these should be
|
||||
// interpreted (`output = base + (multiplier * delta) / 2^increment`, where
|
||||
// the increment is the value specified in DC_LUTA_CONTROL for the specific
|
||||
// color channel, the base is 7 bits of the front buffer value above
|
||||
// `increment` bits, the multiplier is the lower `increment` bits of it; the
|
||||
// increment is nonzero, otherwise the 256-entry table should be used
|
||||
// instead).
|
||||
uint32_t base : 16; // +0, bits 0:5 are hardwired to zero
|
||||
uint32_t delta : 16; // +16, bits 0:5 are hardwired to zero
|
||||
};
|
||||
static constexpr Register register_index = XE_GPU_REG_DC_LUT_PWL_DATA;
|
||||
};
|
||||
static_assert_size(DC_LUT_PWL_DATA, sizeof(uint32_t));
|
||||
|
||||
union alignas(uint32_t) DC_LUT_30_COLOR {
|
||||
uint32_t value;
|
||||
struct {
|
||||
uint32_t color_10_blue : 10; // +0
|
||||
uint32_t color_10_green : 10; // +10
|
||||
uint32_t color_10_red : 10; // +20
|
||||
};
|
||||
static constexpr Register register_index = XE_GPU_REG_DC_LUT_30_COLOR;
|
||||
};
|
||||
static_assert_size(DC_LUT_30_COLOR, sizeof(uint32_t));
|
||||
|
||||
} // namespace reg
|
||||
|
||||
} // namespace gpu
|
||||
|
|
|
@ -32,10 +32,9 @@ void main(uint3 xe_thread_id : SV_DispatchThreadID) {
|
|||
}
|
||||
// UNORM conversion according to the Direct3D 10+ rules.
|
||||
uint3 input = uint3(xe_apply_gamma_source[xe_thread_id.xy] * 1023.0f + 0.5f);
|
||||
// The ramp is BGR, not RGB.
|
||||
float3 output = float3(XeApplyPWLGamma(input.r, 2u),
|
||||
float3 output = float3(XeApplyPWLGamma(input.r, 0u),
|
||||
XeApplyPWLGamma(input.g, 1u),
|
||||
XeApplyPWLGamma(input.b, 0u));
|
||||
XeApplyPWLGamma(input.b, 2u));
|
||||
xe_apply_gamma_dest[xe_thread_id.xy] =
|
||||
float4(output, XeApplyGammaGetAlpha(output));
|
||||
}
|
||||
|
|
|
@ -14,7 +14,8 @@ void main(uint3 xe_thread_id : SV_DispatchThreadID) {
|
|||
}
|
||||
// UNORM conversion according to the Direct3D 10+ rules.
|
||||
uint3 input = uint3(xe_apply_gamma_source[xe_thread_id.xy] * 255.0f + 0.5f);
|
||||
// The ramp is BGR, not RGB.
|
||||
// The ramp has blue in bits 0:9, green in 10:19, red in 20:29 - BGR passed as
|
||||
// an R10G10B10A2 buffer.
|
||||
float3 output = float3(xe_apply_gamma_ramp[input.r].b,
|
||||
xe_apply_gamma_ramp[input.g].g,
|
||||
xe_apply_gamma_ramp[input.b].r);
|
||||
|
|
|
@ -55,17 +55,17 @@ ld r0.xyz, r0.xyzw, T0[0].xyzw
|
|||
mad r0.xyz, r0.xyzx, l(1023.000000, 1023.000000, 1023.000000, 0.000000), l(0.500000, 0.500000, 0.500000, 0.000000)
|
||||
ftou r0.xyz, r0.xyzx
|
||||
ushr r1.xyz, r0.xyzx, l(3, 3, 3, 0)
|
||||
imul null, r0.w, r1.z, l(3)
|
||||
imad r1.xy, r1.xyxx, l(3, 3, 0, 0), l(2, 1, 0, 0)
|
||||
ld r1.xz, r1.xxxx, T1[1].xzyw
|
||||
utof r1.x, r1.x
|
||||
imul null, r0.w, r1.x, l(3)
|
||||
ld r1.xw, r0.wwww, T1[1].xzwy
|
||||
utof r0.w, r1.x
|
||||
and r0.xyz, r0.xyzx, l(7, 7, 7, 0)
|
||||
imul null, r0.x, r1.z, r0.x
|
||||
imul null, r0.x, r1.w, r0.x
|
||||
utof r0.x, r0.x
|
||||
mad r0.x, r0.x, l(0.125000), r1.x
|
||||
mad r0.x, r0.x, l(0.125000), r0.w
|
||||
mul r0.x, r0.x, l(0.000015)
|
||||
min r2.x, r0.x, l(1.000000)
|
||||
ld r1.xy, r1.yyyy, T1[1].xyzw
|
||||
imad r0.xw, r1.yyyz, l(3, 0, 0, 3), l(1, 0, 0, 2)
|
||||
ld r1.xy, r0.xxxx, T1[1].xyzw
|
||||
utof r0.x, r1.x
|
||||
imul null, r0.y, r0.y, r1.y
|
||||
utof r0.y, r0.y
|
||||
|
@ -86,10 +86,10 @@ ret
|
|||
|
||||
const BYTE apply_gamma_pwl_cs[] =
|
||||
{
|
||||
68, 88, 66, 67, 180, 180,
|
||||
222, 28, 4, 138, 188, 113,
|
||||
52, 97, 214, 88, 116, 106,
|
||||
105, 240, 1, 0, 0, 0,
|
||||
68, 88, 66, 67, 134, 193,
|
||||
189, 188, 150, 246, 151, 78,
|
||||
29, 10, 33, 117, 212, 145,
|
||||
204, 130, 1, 0, 0, 0,
|
||||
128, 7, 0, 0, 5, 0,
|
||||
0, 0, 52, 0, 0, 0,
|
||||
24, 2, 0, 0, 40, 2,
|
||||
|
@ -257,26 +257,16 @@ const BYTE apply_gamma_pwl_cs[] =
|
|||
0, 0, 0, 0, 38, 0,
|
||||
0, 8, 0, 208, 0, 0,
|
||||
130, 0, 16, 0, 0, 0,
|
||||
0, 0, 42, 0, 16, 0,
|
||||
0, 0, 10, 0, 16, 0,
|
||||
1, 0, 0, 0, 1, 64,
|
||||
0, 0, 3, 0, 0, 0,
|
||||
35, 0, 0, 15, 50, 0,
|
||||
45, 0, 0, 8, 146, 0,
|
||||
16, 0, 1, 0, 0, 0,
|
||||
70, 0, 16, 0, 1, 0,
|
||||
0, 0, 2, 64, 0, 0,
|
||||
3, 0, 0, 0, 3, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 2, 64,
|
||||
0, 0, 2, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
45, 0, 0, 8, 82, 0,
|
||||
16, 0, 1, 0, 0, 0,
|
||||
6, 0, 16, 0, 1, 0,
|
||||
0, 0, 134, 125, 32, 0,
|
||||
246, 15, 16, 0, 0, 0,
|
||||
0, 0, 134, 119, 32, 0,
|
||||
1, 0, 0, 0, 1, 0,
|
||||
0, 0, 86, 0, 0, 5,
|
||||
18, 0, 16, 0, 1, 0,
|
||||
130, 0, 16, 0, 0, 0,
|
||||
0, 0, 10, 0, 16, 0,
|
||||
1, 0, 0, 0, 1, 0,
|
||||
0, 10, 114, 0, 16, 0,
|
||||
|
@ -288,7 +278,7 @@ const BYTE apply_gamma_pwl_cs[] =
|
|||
0, 0, 38, 0, 0, 8,
|
||||
0, 208, 0, 0, 18, 0,
|
||||
16, 0, 0, 0, 0, 0,
|
||||
42, 0, 16, 0, 1, 0,
|
||||
58, 0, 16, 0, 1, 0,
|
||||
0, 0, 10, 0, 16, 0,
|
||||
0, 0, 0, 0, 86, 0,
|
||||
0, 5, 18, 0, 16, 0,
|
||||
|
@ -298,8 +288,8 @@ const BYTE apply_gamma_pwl_cs[] =
|
|||
16, 0, 0, 0, 0, 0,
|
||||
10, 0, 16, 0, 0, 0,
|
||||
0, 0, 1, 64, 0, 0,
|
||||
0, 0, 0, 62, 10, 0,
|
||||
16, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 62, 58, 0,
|
||||
16, 0, 0, 0, 0, 0,
|
||||
56, 0, 0, 7, 18, 0,
|
||||
16, 0, 0, 0, 0, 0,
|
||||
10, 0, 16, 0, 0, 0,
|
||||
|
@ -309,10 +299,20 @@ const BYTE apply_gamma_pwl_cs[] =
|
|||
2, 0, 0, 0, 10, 0,
|
||||
16, 0, 0, 0, 0, 0,
|
||||
1, 64, 0, 0, 0, 0,
|
||||
128, 63, 45, 0, 0, 8,
|
||||
128, 63, 35, 0, 0, 15,
|
||||
146, 0, 16, 0, 0, 0,
|
||||
0, 0, 86, 9, 16, 0,
|
||||
1, 0, 0, 0, 2, 64,
|
||||
0, 0, 3, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
0, 0, 3, 0, 0, 0,
|
||||
2, 64, 0, 0, 1, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 2, 0,
|
||||
0, 0, 45, 0, 0, 8,
|
||||
50, 0, 16, 0, 1, 0,
|
||||
0, 0, 86, 5, 16, 0,
|
||||
1, 0, 0, 0, 70, 126,
|
||||
0, 0, 6, 0, 16, 0,
|
||||
0, 0, 0, 0, 70, 126,
|
||||
32, 0, 1, 0, 0, 0,
|
||||
1, 0, 0, 0, 86, 0,
|
||||
0, 5, 18, 0, 16, 0,
|
||||
|
|
|
@ -55,17 +55,17 @@ ld r0.xyz, r0.xyzw, T0[0].xyzw
|
|||
mad r0.xyz, r0.xyzx, l(1023.000000, 1023.000000, 1023.000000, 0.000000), l(0.500000, 0.500000, 0.500000, 0.000000)
|
||||
ftou r0.xyz, r0.xyzx
|
||||
ushr r1.xyz, r0.xyzx, l(3, 3, 3, 0)
|
||||
imul null, r0.w, r1.z, l(3)
|
||||
imad r1.xy, r1.xyxx, l(3, 3, 0, 0), l(2, 1, 0, 0)
|
||||
ld r1.xz, r1.xxxx, T1[1].xzyw
|
||||
utof r1.x, r1.x
|
||||
imul null, r0.w, r1.x, l(3)
|
||||
ld r1.xw, r0.wwww, T1[1].xzwy
|
||||
utof r0.w, r1.x
|
||||
and r0.xyz, r0.xyzx, l(7, 7, 7, 0)
|
||||
imul null, r0.x, r1.z, r0.x
|
||||
imul null, r0.x, r1.w, r0.x
|
||||
utof r0.x, r0.x
|
||||
mad r0.x, r0.x, l(0.125000), r1.x
|
||||
mad r0.x, r0.x, l(0.125000), r0.w
|
||||
mul r0.x, r0.x, l(0.000015)
|
||||
min r2.x, r0.x, l(1.000000)
|
||||
ld r1.xy, r1.yyyy, T1[1].xyzw
|
||||
imad r0.xw, r1.yyyz, l(3, 0, 0, 3), l(1, 0, 0, 2)
|
||||
ld r1.xy, r0.xxxx, T1[1].xyzw
|
||||
utof r0.x, r1.x
|
||||
imul null, r0.y, r0.y, r1.y
|
||||
utof r0.y, r0.y
|
||||
|
@ -86,10 +86,10 @@ ret
|
|||
|
||||
const BYTE apply_gamma_pwl_fxaa_luma_cs[] =
|
||||
{
|
||||
68, 88, 66, 67, 165, 122,
|
||||
242, 36, 160, 218, 193, 67,
|
||||
37, 43, 138, 45, 109, 219,
|
||||
226, 109, 1, 0, 0, 0,
|
||||
68, 88, 66, 67, 115, 68,
|
||||
69, 234, 116, 212, 118, 193,
|
||||
71, 10, 44, 165, 244, 209,
|
||||
63, 198, 1, 0, 0, 0,
|
||||
148, 7, 0, 0, 5, 0,
|
||||
0, 0, 52, 0, 0, 0,
|
||||
24, 2, 0, 0, 40, 2,
|
||||
|
@ -257,26 +257,16 @@ const BYTE apply_gamma_pwl_fxaa_luma_cs[] =
|
|||
0, 0, 0, 0, 38, 0,
|
||||
0, 8, 0, 208, 0, 0,
|
||||
130, 0, 16, 0, 0, 0,
|
||||
0, 0, 42, 0, 16, 0,
|
||||
0, 0, 10, 0, 16, 0,
|
||||
1, 0, 0, 0, 1, 64,
|
||||
0, 0, 3, 0, 0, 0,
|
||||
35, 0, 0, 15, 50, 0,
|
||||
45, 0, 0, 8, 146, 0,
|
||||
16, 0, 1, 0, 0, 0,
|
||||
70, 0, 16, 0, 1, 0,
|
||||
0, 0, 2, 64, 0, 0,
|
||||
3, 0, 0, 0, 3, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 2, 64,
|
||||
0, 0, 2, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
45, 0, 0, 8, 82, 0,
|
||||
16, 0, 1, 0, 0, 0,
|
||||
6, 0, 16, 0, 1, 0,
|
||||
0, 0, 134, 125, 32, 0,
|
||||
246, 15, 16, 0, 0, 0,
|
||||
0, 0, 134, 119, 32, 0,
|
||||
1, 0, 0, 0, 1, 0,
|
||||
0, 0, 86, 0, 0, 5,
|
||||
18, 0, 16, 0, 1, 0,
|
||||
130, 0, 16, 0, 0, 0,
|
||||
0, 0, 10, 0, 16, 0,
|
||||
1, 0, 0, 0, 1, 0,
|
||||
0, 10, 114, 0, 16, 0,
|
||||
|
@ -288,7 +278,7 @@ const BYTE apply_gamma_pwl_fxaa_luma_cs[] =
|
|||
0, 0, 38, 0, 0, 8,
|
||||
0, 208, 0, 0, 18, 0,
|
||||
16, 0, 0, 0, 0, 0,
|
||||
42, 0, 16, 0, 1, 0,
|
||||
58, 0, 16, 0, 1, 0,
|
||||
0, 0, 10, 0, 16, 0,
|
||||
0, 0, 0, 0, 86, 0,
|
||||
0, 5, 18, 0, 16, 0,
|
||||
|
@ -298,8 +288,8 @@ const BYTE apply_gamma_pwl_fxaa_luma_cs[] =
|
|||
16, 0, 0, 0, 0, 0,
|
||||
10, 0, 16, 0, 0, 0,
|
||||
0, 0, 1, 64, 0, 0,
|
||||
0, 0, 0, 62, 10, 0,
|
||||
16, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 62, 58, 0,
|
||||
16, 0, 0, 0, 0, 0,
|
||||
56, 0, 0, 7, 18, 0,
|
||||
16, 0, 0, 0, 0, 0,
|
||||
10, 0, 16, 0, 0, 0,
|
||||
|
@ -309,10 +299,20 @@ const BYTE apply_gamma_pwl_fxaa_luma_cs[] =
|
|||
2, 0, 0, 0, 10, 0,
|
||||
16, 0, 0, 0, 0, 0,
|
||||
1, 64, 0, 0, 0, 0,
|
||||
128, 63, 45, 0, 0, 8,
|
||||
128, 63, 35, 0, 0, 15,
|
||||
146, 0, 16, 0, 0, 0,
|
||||
0, 0, 86, 9, 16, 0,
|
||||
1, 0, 0, 0, 2, 64,
|
||||
0, 0, 3, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
0, 0, 3, 0, 0, 0,
|
||||
2, 64, 0, 0, 1, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 2, 0,
|
||||
0, 0, 45, 0, 0, 8,
|
||||
50, 0, 16, 0, 1, 0,
|
||||
0, 0, 86, 5, 16, 0,
|
||||
1, 0, 0, 0, 70, 126,
|
||||
0, 0, 6, 0, 16, 0,
|
||||
0, 0, 0, 0, 70, 126,
|
||||
32, 0, 1, 0, 0, 0,
|
||||
1, 0, 0, 0, 86, 0,
|
||||
0, 5, 18, 0, 16, 0,
|
||||
|
|
|
@ -9,8 +9,11 @@
|
|||
|
||||
#include "xenia/gpu/trace_player.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "xenia/gpu/command_processor.h"
|
||||
#include "xenia/gpu/graphics_system.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/memory.h"
|
||||
|
||||
|
@ -33,8 +36,6 @@ TracePlayer::TracePlayer(GraphicsSystem* graphics_system)
|
|||
assert_not_null(playback_event_);
|
||||
}
|
||||
|
||||
TracePlayer::~TracePlayer() { delete[] edram_snapshot_; }
|
||||
|
||||
const TraceReader::Frame* TracePlayer::current_frame() const {
|
||||
if (current_frame_index_ >= frame_count()) {
|
||||
return nullptr;
|
||||
|
@ -197,13 +198,12 @@ void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data,
|
|||
case TraceCommandType::kEdramSnapshot: {
|
||||
auto cmd = reinterpret_cast<const EdramSnapshotCommand*>(trace_ptr);
|
||||
trace_ptr += sizeof(*cmd);
|
||||
if (!edram_snapshot_) {
|
||||
edram_snapshot_ = new uint8_t[xenos::kEdramSizeBytes];
|
||||
}
|
||||
std::unique_ptr<uint8_t[]> edram_snapshot(
|
||||
new uint8_t[xenos::kEdramSizeBytes]);
|
||||
DecompressMemory(cmd->encoding_format, trace_ptr, cmd->encoded_length,
|
||||
edram_snapshot_, xenos::kEdramSizeBytes);
|
||||
edram_snapshot.get(), xenos::kEdramSizeBytes);
|
||||
trace_ptr += cmd->encoded_length;
|
||||
command_processor->RestoreEdramSnapshot(edram_snapshot_);
|
||||
command_processor->RestoreEdramSnapshot(edram_snapshot.get());
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kEvent: {
|
||||
|
@ -219,6 +219,34 @@ void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data,
|
|||
}
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kRegisters: {
|
||||
auto cmd = reinterpret_cast<const RegistersCommand*>(trace_ptr);
|
||||
trace_ptr += sizeof(*cmd);
|
||||
std::unique_ptr<uint32_t[]> register_values(
|
||||
new uint32_t[cmd->register_count]);
|
||||
DecompressMemory(cmd->encoding_format, trace_ptr, cmd->encoded_length,
|
||||
register_values.get(),
|
||||
sizeof(uint32_t) * cmd->register_count);
|
||||
trace_ptr += cmd->encoded_length;
|
||||
command_processor->RestoreRegisters(
|
||||
cmd->first_register, register_values.get(), cmd->register_count,
|
||||
cmd->execute_callbacks);
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kGammaRamp: {
|
||||
auto cmd = reinterpret_cast<const GammaRampCommand*>(trace_ptr);
|
||||
trace_ptr += sizeof(*cmd);
|
||||
std::unique_ptr<uint32_t[]> gamma_ramps(new uint32_t[256 + 3 * 128]);
|
||||
DecompressMemory(cmd->encoding_format, trace_ptr, cmd->encoded_length,
|
||||
gamma_ramps.get(), sizeof(uint32_t) * (256 + 3 * 128));
|
||||
trace_ptr += cmd->encoded_length;
|
||||
command_processor->RestoreGammaRamp(
|
||||
reinterpret_cast<const reg::DC_LUT_30_COLOR*>(gamma_ramps.get()),
|
||||
reinterpret_cast<const reg::DC_LUT_PWL_DATA*>(gamma_ramps.get() +
|
||||
256),
|
||||
cmd->rw_component);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -30,7 +30,6 @@ enum class TracePlaybackMode {
|
|||
class TracePlayer : public TraceReader {
|
||||
public:
|
||||
TracePlayer(GraphicsSystem* graphics_system);
|
||||
~TracePlayer() override;
|
||||
|
||||
GraphicsSystem* graphics_system() const { return graphics_system_; }
|
||||
void SetPresentLastCopy(bool present_last_copy) {
|
||||
|
@ -66,7 +65,6 @@ class TracePlayer : public TraceReader {
|
|||
bool playing_trace_ = false;
|
||||
std::atomic<uint32_t> playback_percent_ = {0};
|
||||
std::unique_ptr<xe::threading::Event> playback_event_;
|
||||
uint8_t* edram_snapshot_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
|
|
|
@ -53,6 +53,8 @@ enum class TraceCommandType : uint32_t {
|
|||
kMemoryWrite,
|
||||
kEdramSnapshot,
|
||||
kEvent,
|
||||
kRegisters,
|
||||
kGammaRamp,
|
||||
};
|
||||
|
||||
struct PrimaryBufferStartCommand {
|
||||
|
@ -134,6 +136,40 @@ struct EventCommand {
|
|||
Type event_type;
|
||||
};
|
||||
|
||||
// Represents a range of registers.
|
||||
struct RegistersCommand {
|
||||
TraceCommandType type;
|
||||
|
||||
uint32_t first_register;
|
||||
uint32_t register_count;
|
||||
// Whether to set the registers via WriteRegister, which may have side
|
||||
// effects, rather than by copying them directly to the register file.
|
||||
bool execute_callbacks;
|
||||
|
||||
// Encoding format of the values in the trace file.
|
||||
MemoryEncodingFormat encoding_format;
|
||||
// Number of bytes the values occupy in the trace file in their encoded form.
|
||||
// If no encoding is used, this will be sizeof(uint32_t) * register_count.
|
||||
uint32_t encoded_length;
|
||||
};
|
||||
|
||||
// Represents a gamma ramp - encoded 256 DC_LUT_30_COLOR values and 128
|
||||
// interleaved RGB DC_LUT_PWL_DATA values.
|
||||
// Assuming that all other gamma ramp state is saved as plain registers.
|
||||
struct GammaRampCommand {
|
||||
TraceCommandType type;
|
||||
|
||||
// The component index (0 = red, 1 = green, 2 = blue) for the next
|
||||
// DC_LUT_SEQ_COLOR or DC_LUT_PWL_DATA read or write.
|
||||
uint8_t rw_component;
|
||||
|
||||
// Encoding format of the ramps in the trace file.
|
||||
MemoryEncodingFormat encoding_format;
|
||||
// Number of bytes the ramps occupy in the trace file in their encoded form.
|
||||
// If no encoding is used, this will be sizeof(uint32_t) * (256 + 3 * 128).
|
||||
uint32_t encoded_length;
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
|
|
@ -205,6 +205,16 @@ void TraceReader::ParseTrace() {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kRegisters: {
|
||||
auto cmd = reinterpret_cast<const RegistersCommand*>(trace_ptr);
|
||||
trace_ptr += sizeof(*cmd) + cmd->encoded_length;
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kGammaRamp: {
|
||||
auto cmd = reinterpret_cast<const GammaRampCommand*>(trace_ptr);
|
||||
trace_ptr += sizeof(*cmd) + cmd->encoded_length;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// Broken trace file?
|
||||
assert_unhandled_case(type);
|
||||
|
@ -218,8 +228,8 @@ void TraceReader::ParseTrace() {
|
|||
}
|
||||
|
||||
bool TraceReader::DecompressMemory(MemoryEncodingFormat encoding_format,
|
||||
const uint8_t* src, size_t src_size,
|
||||
uint8_t* dest, size_t dest_size) {
|
||||
const void* src, size_t src_size, void* dest,
|
||||
size_t dest_size) {
|
||||
switch (encoding_format) {
|
||||
case MemoryEncodingFormat::kNone:
|
||||
assert_true(src_size == dest_size);
|
||||
|
|
|
@ -135,9 +135,8 @@ class TraceReader {
|
|||
|
||||
protected:
|
||||
void ParseTrace();
|
||||
bool DecompressMemory(MemoryEncodingFormat encoding_format,
|
||||
const uint8_t* src, size_t src_size, uint8_t* dest,
|
||||
size_t dest_size);
|
||||
bool DecompressMemory(MemoryEncodingFormat encoding_format, const void* src,
|
||||
size_t src_size, void* dest, size_t dest_size);
|
||||
|
||||
std::unique_ptr<MappedMemory> mmap_;
|
||||
const uint8_t* trace_data_ = nullptr;
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "xenia/gpu/trace_writer.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
||||
#include "third_party/snappy/snappy-sinksource.h"
|
||||
#include "third_party/snappy/snappy.h"
|
||||
|
@ -19,6 +20,7 @@
|
|||
#include "xenia/base/filesystem.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/string.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
||||
namespace xe {
|
||||
|
@ -194,7 +196,7 @@ class SnappySink : public snappy::Sink {
|
|||
|
||||
void TraceWriter::WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr,
|
||||
size_t length, const void* host_ptr) {
|
||||
MemoryCommand cmd;
|
||||
MemoryCommand cmd = {};
|
||||
cmd.type = type;
|
||||
cmd.base_ptr = base_ptr;
|
||||
cmd.encoding_format = MemoryEncodingFormat::kNone;
|
||||
|
@ -232,8 +234,9 @@ void TraceWriter::WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr,
|
|||
}
|
||||
|
||||
void TraceWriter::WriteEdramSnapshot(const void* snapshot) {
|
||||
EdramSnapshotCommand cmd;
|
||||
EdramSnapshotCommand cmd = {};
|
||||
cmd.type = TraceCommandType::kEdramSnapshot;
|
||||
|
||||
if (compress_output_) {
|
||||
// Write the header now so we reserve space in the buffer.
|
||||
long header_position = std::ftell(file_);
|
||||
|
@ -272,5 +275,93 @@ void TraceWriter::WriteEvent(EventCommand::Type event_type) {
|
|||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
}
|
||||
|
||||
void TraceWriter::WriteRegisters(uint32_t first_register,
|
||||
const uint32_t* register_values,
|
||||
uint32_t register_count,
|
||||
bool execute_callbacks_on_play) {
|
||||
RegistersCommand cmd = {};
|
||||
cmd.type = TraceCommandType::kRegisters;
|
||||
cmd.first_register = first_register;
|
||||
cmd.register_count = register_count;
|
||||
cmd.execute_callbacks = execute_callbacks_on_play;
|
||||
|
||||
uint32_t uncompressed_length = uint32_t(sizeof(uint32_t) * register_count);
|
||||
if (compress_output_) {
|
||||
// Write the header now so we reserve space in the buffer.
|
||||
long header_position = std::ftell(file_);
|
||||
cmd.encoding_format = MemoryEncodingFormat::kSnappy;
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
|
||||
// Stream the content right to the buffer.
|
||||
snappy::ByteArraySource snappy_source(
|
||||
reinterpret_cast<const char*>(register_values), uncompressed_length);
|
||||
SnappySink snappy_sink(file_);
|
||||
cmd.encoded_length =
|
||||
static_cast<uint32_t>(snappy::Compress(&snappy_source, &snappy_sink));
|
||||
|
||||
// Seek back and overwrite the header with our final size.
|
||||
std::fseek(file_, header_position, SEEK_SET);
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
std::fseek(file_, header_position + sizeof(cmd) + cmd.encoded_length,
|
||||
SEEK_SET);
|
||||
} else {
|
||||
// Uncompressed - write the values directly to the file.
|
||||
cmd.encoding_format = MemoryEncodingFormat::kNone;
|
||||
cmd.encoded_length = uncompressed_length;
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
fwrite(register_values, 1, uncompressed_length, file_);
|
||||
}
|
||||
}
|
||||
|
||||
void TraceWriter::WriteGammaRamp(
|
||||
const reg::DC_LUT_30_COLOR* gamma_ramp_256_entry_table,
|
||||
const reg::DC_LUT_PWL_DATA* gamma_ramp_pwl_rgb,
|
||||
uint32_t gamma_ramp_rw_component) {
|
||||
GammaRampCommand cmd = {};
|
||||
cmd.type = TraceCommandType::kGammaRamp;
|
||||
cmd.rw_component = uint8_t(gamma_ramp_rw_component);
|
||||
|
||||
constexpr uint32_t k256EntryTableUncompressedLength =
|
||||
sizeof(reg::DC_LUT_30_COLOR) * 256;
|
||||
constexpr uint32_t kPWLUncompressedLength =
|
||||
sizeof(reg::DC_LUT_PWL_DATA) * 3 * 128;
|
||||
constexpr uint32_t kUncompressedLength =
|
||||
k256EntryTableUncompressedLength + kPWLUncompressedLength;
|
||||
if (compress_output_) {
|
||||
// Write the header now so we reserve space in the buffer.
|
||||
long header_position = std::ftell(file_);
|
||||
cmd.encoding_format = MemoryEncodingFormat::kSnappy;
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
|
||||
// Stream the content right to the buffer.
|
||||
{
|
||||
std::unique_ptr<char[]> gamma_ramps(new char[kUncompressedLength]);
|
||||
std::memcpy(gamma_ramps.get(), gamma_ramp_256_entry_table,
|
||||
k256EntryTableUncompressedLength);
|
||||
std::memcpy(gamma_ramps.get() + k256EntryTableUncompressedLength,
|
||||
gamma_ramp_pwl_rgb, kPWLUncompressedLength);
|
||||
snappy::ByteArraySource snappy_source(gamma_ramps.get(),
|
||||
kUncompressedLength);
|
||||
SnappySink snappy_sink(file_);
|
||||
cmd.encoded_length =
|
||||
static_cast<uint32_t>(snappy::Compress(&snappy_source, &snappy_sink));
|
||||
}
|
||||
|
||||
// Seek back and overwrite the header with our final size.
|
||||
std::fseek(file_, header_position, SEEK_SET);
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
std::fseek(file_, header_position + sizeof(cmd) + cmd.encoded_length,
|
||||
SEEK_SET);
|
||||
} else {
|
||||
// Uncompressed - write the values directly to the file.
|
||||
cmd.encoding_format = MemoryEncodingFormat::kNone;
|
||||
cmd.encoded_length = kUncompressedLength;
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
fwrite(gamma_ramp_256_entry_table, 1, k256EntryTableUncompressedLength,
|
||||
file_);
|
||||
fwrite(gamma_ramp_pwl_rgb, 1, kPWLUncompressedLength, file_);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/trace_protocol.h"
|
||||
|
||||
namespace xe {
|
||||
|
@ -44,6 +45,11 @@ class TraceWriter {
|
|||
const void* host_ptr = nullptr);
|
||||
void WriteEdramSnapshot(const void* snapshot);
|
||||
void WriteEvent(EventCommand::Type event_type);
|
||||
void WriteRegisters(uint32_t first_register, const uint32_t* register_values,
|
||||
uint32_t register_count, bool execute_callbacks_on_play);
|
||||
void WriteGammaRamp(const reg::DC_LUT_30_COLOR* gamma_ramp_256_entry_table,
|
||||
const reg::DC_LUT_PWL_DATA* gamma_ramp_pwl_rgb,
|
||||
uint32_t gamma_ramp_rw_component);
|
||||
|
||||
private:
|
||||
void WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr,
|
||||
|
|
|
@ -191,20 +191,6 @@ void VulkanCommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
|||
offset ^= 0x1F;
|
||||
|
||||
dirty_loop_constants_ |= (1 << offset);
|
||||
} else if (index == XE_GPU_REG_DC_LUT_PWL_DATA) {
|
||||
UpdateGammaRampValue(GammaRampType::kPWL, value);
|
||||
} else if (index == XE_GPU_REG_DC_LUT_30_COLOR) {
|
||||
UpdateGammaRampValue(GammaRampType::kTable, value);
|
||||
} else if (index >= XE_GPU_REG_DC_LUT_RW_MODE &&
|
||||
index <= XE_GPU_REG_DC_LUTA_CONTROL) {
|
||||
uint32_t offset = index - XE_GPU_REG_DC_LUT_RW_MODE;
|
||||
offset ^= 0x05;
|
||||
|
||||
dirty_gamma_constants_ |= (1 << offset);
|
||||
|
||||
if (index == XE_GPU_REG_DC_LUT_RW_INDEX) {
|
||||
gamma_ramp_rw_subindex_ = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1400,8 +1386,6 @@ bool VulkanCommandProcessor::IssueCopy() {
|
|||
return true;
|
||||
}
|
||||
|
||||
void VulkanCommandProcessor::InitializeTrace() {}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -98,8 +98,6 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
VulkanShader* pixel_shader);
|
||||
bool IssueCopy() override;
|
||||
|
||||
void InitializeTrace() override;
|
||||
|
||||
uint64_t dirty_float_constants_ = 0; // Dirty float constants in blocks of 4
|
||||
uint8_t dirty_bool_constants_ = 0;
|
||||
uint32_t dirty_loop_constants_ = 0;
|
||||
|
|
Loading…
Reference in New Issue