Merge remote-tracking branch 'upstream/master' into canary
This commit is contained in:
commit
d0e3281741
|
@ -72,7 +72,7 @@ xb genspirv
|
|||
|
||||
#### Debugging
|
||||
|
||||
VS behaves oddly with the debug paths. Open the xenia project properties
|
||||
VS behaves oddly with the debug paths. Open the 'xenia-app' project properties
|
||||
and set the 'Command' to `$(SolutionDir)$(TargetPath)` and the
|
||||
'Working Directory' to `$(SolutionDir)..\..`. You can specify flags and
|
||||
the file to run in the 'Command Arguments' field (or use `--flagfile=flags.txt`).
|
||||
|
|
|
@ -1,51 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2017 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_BASE_BIT_FIELD_H_
|
||||
#define XENIA_BASE_BIT_FIELD_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <type_traits>
|
||||
|
||||
namespace xe {
|
||||
|
||||
// Bitfield, where position starts at the LSB.
|
||||
template <typename T, size_t position, size_t n_bits>
|
||||
struct bf {
|
||||
// For enum values, we strip them down to an underlying type.
|
||||
typedef
|
||||
typename std::conditional<std::is_enum<T>::value, std::underlying_type<T>,
|
||||
std::remove_reference<T>>::type::type
|
||||
value_type;
|
||||
|
||||
bf() = default;
|
||||
inline operator T() const { return value(); }
|
||||
|
||||
inline T value() const {
|
||||
auto value = (storage & mask()) >> position;
|
||||
if (std::is_signed<value_type>::value) {
|
||||
// If the value is signed, sign-extend it.
|
||||
value_type sign_mask = value_type(1) << (n_bits - 1);
|
||||
value = (sign_mask ^ value) - sign_mask;
|
||||
}
|
||||
|
||||
return static_cast<T>(value);
|
||||
}
|
||||
|
||||
inline value_type mask() const {
|
||||
return ((value_type(1) << n_bits) - 1) << position;
|
||||
}
|
||||
|
||||
value_type storage;
|
||||
};
|
||||
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_BASE_BIT_FIELD_H_
|
|
@ -20,7 +20,6 @@
|
|||
#include "xenia/base/ring_buffer.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/graphics_system.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/sampler_info.h"
|
||||
#include "xenia/gpu/texture_info.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
@ -351,20 +350,20 @@ void CommandProcessor::MakeCoherent() {
|
|||
// https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454
|
||||
|
||||
RegisterFile* regs = register_file_;
|
||||
auto status_host = regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32;
|
||||
auto& status_host = regs->Get<reg::COHER_STATUS_HOST>();
|
||||
auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32;
|
||||
auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32;
|
||||
|
||||
if (!(status_host & 0x80000000ul)) {
|
||||
if (!status_host.status) {
|
||||
return;
|
||||
}
|
||||
|
||||
const char* action = "N/A";
|
||||
if ((status_host & 0x03000000) == 0x03000000) {
|
||||
if (status_host.vc_action_ena && status_host.tc_action_ena) {
|
||||
action = "VC | TC";
|
||||
} else if (status_host & 0x02000000) {
|
||||
} else if (status_host.tc_action_ena) {
|
||||
action = "TC";
|
||||
} else if (status_host & 0x01000000) {
|
||||
} else if (status_host.vc_action_ena) {
|
||||
action = "VC";
|
||||
}
|
||||
|
||||
|
@ -373,8 +372,7 @@ void CommandProcessor::MakeCoherent() {
|
|||
base_host + size_host, size_host, action);
|
||||
|
||||
// Mark coherent.
|
||||
status_host &= ~0x80000000ul;
|
||||
regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32 = status_host;
|
||||
status_host.status = 0;
|
||||
}
|
||||
|
||||
void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); }
|
||||
|
|
|
@ -151,7 +151,7 @@ class CommandProcessor {
|
|||
protected:
|
||||
struct IndexBufferInfo {
|
||||
IndexFormat format = IndexFormat::kInt16;
|
||||
Endian endianness = Endian::kUnspecified;
|
||||
Endian endianness = Endian::kNone;
|
||||
uint32_t count = 0;
|
||||
uint32_t guest_base = 0;
|
||||
size_t length = 0;
|
||||
|
|
|
@ -1142,8 +1142,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
auto enable_mode = static_cast<xenos::ModeControl>(
|
||||
regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
|
||||
xenos::ModeControl enable_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
|
||||
if (enable_mode == xenos::ModeControl::kIgnore) {
|
||||
// Ignored.
|
||||
return true;
|
||||
|
@ -1153,7 +1152,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
return IssueCopy();
|
||||
}
|
||||
|
||||
if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) {
|
||||
if (regs.Get<reg::RB_SURFACE_INFO>().surface_pitch == 0) {
|
||||
// Doesn't actually draw.
|
||||
// TODO(Triang3l): Do something so memexport still works in this case maybe?
|
||||
// Unlikely that zero would even really be legal though.
|
||||
|
@ -1164,7 +1163,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
bool tessellated;
|
||||
if (uint32_t(primitive_type) >=
|
||||
uint32_t(PrimitiveType::kExplicitMajorModeForceStart)) {
|
||||
tessellated = (regs[XE_GPU_REG_VGT_OUTPUT_PATH_CNTL].u32 & 0x3) == 0x1;
|
||||
tessellated = regs.Get<reg::VGT_OUTPUT_PATH_CNTL>().path_select ==
|
||||
xenos::VGTOutputPath::kTessellationEnable;
|
||||
} else {
|
||||
tessellated = false;
|
||||
}
|
||||
|
@ -1202,8 +1202,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
bool memexport_used = memexport_used_vertex || memexport_used_pixel;
|
||||
|
||||
bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type);
|
||||
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
||||
if (!memexport_used_vertex && primitive_two_faced &&
|
||||
(regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3) {
|
||||
pa_su_sc_mode_cntl.cull_front && pa_su_sc_mode_cntl.cull_back) {
|
||||
// Both sides are culled - can't be expressed in the pipeline state.
|
||||
return true;
|
||||
}
|
||||
|
@ -1223,9 +1224,10 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
// tessellation factors (as floats) instead of control point indices.
|
||||
bool adaptive_tessellation;
|
||||
if (tessellated) {
|
||||
TessellationMode tessellation_mode =
|
||||
TessellationMode(regs[XE_GPU_REG_VGT_HOS_CNTL].u32 & 0x3);
|
||||
adaptive_tessellation = tessellation_mode == TessellationMode::kAdaptive;
|
||||
xenos::TessellationMode tessellation_mode =
|
||||
regs.Get<reg::VGT_HOS_CNTL>().tess_mode;
|
||||
adaptive_tessellation =
|
||||
tessellation_mode == xenos::TessellationMode::kAdaptive;
|
||||
if (adaptive_tessellation &&
|
||||
(!indexed || index_buffer_info->format != IndexFormat::kInt32)) {
|
||||
return false;
|
||||
|
@ -1235,7 +1237,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
// passed to vertex shader registers, especially if patches are drawn with
|
||||
// an index buffer.
|
||||
// https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360
|
||||
if (tessellation_mode != TessellationMode::kAdaptive) {
|
||||
if (tessellation_mode != xenos::TessellationMode::kAdaptive) {
|
||||
XELOGE(
|
||||
"Tessellation mode %u is not implemented yet, only adaptive is "
|
||||
"partially available now - report the game to Xenia developers!",
|
||||
|
@ -1309,20 +1311,16 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
vertex_shader->GetUsedTextureMask(),
|
||||
pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
|
||||
|
||||
// Check if early depth/stencil can be enabled explicitly by RB_DEPTHCONTROL
|
||||
// or implicitly when alpha test and alpha to coverage are disabled.
|
||||
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
|
||||
uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32;
|
||||
bool early_z = false;
|
||||
if (pixel_shader == nullptr) {
|
||||
// Check if early depth/stencil can be enabled.
|
||||
bool early_z;
|
||||
if (pixel_shader) {
|
||||
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
|
||||
early_z = pixel_shader->implicit_early_z_allowed() &&
|
||||
(!rb_colorcontrol.alpha_test_enable ||
|
||||
rb_colorcontrol.alpha_func == CompareFunction::kAlways) &&
|
||||
!rb_colorcontrol.alpha_to_mask_enable;
|
||||
} else {
|
||||
early_z = true;
|
||||
} else if (!pixel_shader->writes_depth()) {
|
||||
if (rb_depthcontrol & 0x8) {
|
||||
early_z = true;
|
||||
} else if (pixel_shader->implicit_early_z_allowed()) {
|
||||
early_z = (!(rb_colorcontrol & 0x8) || (rb_colorcontrol & 0x7) == 0x7) &&
|
||||
!(rb_colorcontrol & 0x10);
|
||||
}
|
||||
}
|
||||
|
||||
// Create the pipeline if needed and bind it.
|
||||
|
@ -1347,7 +1345,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
// Update system constants before uploading them.
|
||||
UpdateSystemConstantValues(
|
||||
memexport_used, primitive_two_faced, line_loop_closing_index,
|
||||
indexed ? index_buffer_info->endianness : Endian::kUnspecified,
|
||||
indexed ? index_buffer_info->endianness : Endian::kNone,
|
||||
adaptive_tessellation ? (index_buffer_info->guest_base & 0x1FFFFFFC) : 0,
|
||||
early_z, GetCurrentColorMask(pixel_shader), pipeline_render_targets);
|
||||
|
||||
|
@ -1366,22 +1364,19 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
(1ull << (vfetch_index & 63))) {
|
||||
continue;
|
||||
}
|
||||
uint32_t vfetch_constant_index =
|
||||
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2;
|
||||
if ((regs[vfetch_constant_index].u32 & 0x3) != 3) {
|
||||
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
|
||||
if (vfetch_constant.type != 3) {
|
||||
XELOGW("Vertex fetch type is not 3 (fetch constant %u is %.8X %.8X)!",
|
||||
vfetch_index, regs[vfetch_constant_index].u32,
|
||||
regs[vfetch_constant_index + 1].u32);
|
||||
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
|
||||
return false;
|
||||
}
|
||||
if (!shared_memory_->RequestRange(
|
||||
regs[vfetch_constant_index].u32 & 0x1FFFFFFC,
|
||||
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC)) {
|
||||
if (!shared_memory_->RequestRange(vfetch_constant.address << 2,
|
||||
vfetch_constant.size << 2)) {
|
||||
XELOGE(
|
||||
"Failed to request vertex buffer at 0x%.8X (size %u) in the shared "
|
||||
"memory",
|
||||
regs[vfetch_constant_index].u32 & 0x1FFFFFFC,
|
||||
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC);
|
||||
vfetch_constant.address << 2, vfetch_constant.size << 2);
|
||||
return false;
|
||||
}
|
||||
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
|
||||
|
@ -1400,31 +1395,29 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
const std::vector<uint32_t>& memexport_stream_constants_vertex =
|
||||
vertex_shader->memexport_stream_constants();
|
||||
for (uint32_t constant_index : memexport_stream_constants_vertex) {
|
||||
const xenos::xe_gpu_memexport_stream_t* memexport_stream =
|
||||
reinterpret_cast<const xenos::xe_gpu_memexport_stream_t*>(
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4]);
|
||||
if (memexport_stream->index_count == 0) {
|
||||
const auto& memexport_stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4);
|
||||
if (memexport_stream.index_count == 0) {
|
||||
continue;
|
||||
}
|
||||
uint32_t memexport_format_size =
|
||||
GetSupportedMemExportFormatSize(memexport_stream->format);
|
||||
GetSupportedMemExportFormatSize(memexport_stream.format);
|
||||
if (memexport_format_size == 0) {
|
||||
XELOGE(
|
||||
"Unsupported memexport format %s",
|
||||
FormatInfo::Get(TextureFormat(uint32_t(memexport_stream->format)))
|
||||
->name);
|
||||
XELOGE("Unsupported memexport format %s",
|
||||
FormatInfo::Get(TextureFormat(uint32_t(memexport_stream.format)))
|
||||
->name);
|
||||
return false;
|
||||
}
|
||||
uint32_t memexport_base_address = memexport_stream->base_address;
|
||||
uint32_t memexport_size_dwords =
|
||||
memexport_stream->index_count * memexport_format_size;
|
||||
memexport_stream.index_count * memexport_format_size;
|
||||
// Try to reduce the number of shared memory operations when writing
|
||||
// different elements into the same buffer through different exports
|
||||
// (happens in Halo 3).
|
||||
bool memexport_range_reused = false;
|
||||
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
||||
MemExportRange& memexport_range = memexport_ranges[i];
|
||||
if (memexport_range.base_address_dwords == memexport_base_address) {
|
||||
if (memexport_range.base_address_dwords ==
|
||||
memexport_stream.base_address) {
|
||||
memexport_range.size_dwords =
|
||||
std::max(memexport_range.size_dwords, memexport_size_dwords);
|
||||
memexport_range_reused = true;
|
||||
|
@ -1435,7 +1428,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
if (!memexport_range_reused) {
|
||||
MemExportRange& memexport_range =
|
||||
memexport_ranges[memexport_range_count++];
|
||||
memexport_range.base_address_dwords = memexport_base_address;
|
||||
memexport_range.base_address_dwords = memexport_stream.base_address;
|
||||
memexport_range.size_dwords = memexport_size_dwords;
|
||||
}
|
||||
}
|
||||
|
@ -1444,28 +1437,26 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
const std::vector<uint32_t>& memexport_stream_constants_pixel =
|
||||
pixel_shader->memexport_stream_constants();
|
||||
for (uint32_t constant_index : memexport_stream_constants_pixel) {
|
||||
const xenos::xe_gpu_memexport_stream_t* memexport_stream =
|
||||
reinterpret_cast<const xenos::xe_gpu_memexport_stream_t*>(
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4]);
|
||||
if (memexport_stream->index_count == 0) {
|
||||
const auto& memexport_stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4);
|
||||
if (memexport_stream.index_count == 0) {
|
||||
continue;
|
||||
}
|
||||
uint32_t memexport_format_size =
|
||||
GetSupportedMemExportFormatSize(memexport_stream->format);
|
||||
GetSupportedMemExportFormatSize(memexport_stream.format);
|
||||
if (memexport_format_size == 0) {
|
||||
XELOGE(
|
||||
"Unsupported memexport format %s",
|
||||
FormatInfo::Get(TextureFormat(uint32_t(memexport_stream->format)))
|
||||
->name);
|
||||
XELOGE("Unsupported memexport format %s",
|
||||
FormatInfo::Get(TextureFormat(uint32_t(memexport_stream.format)))
|
||||
->name);
|
||||
return false;
|
||||
}
|
||||
uint32_t memexport_base_address = memexport_stream->base_address;
|
||||
uint32_t memexport_size_dwords =
|
||||
memexport_stream->index_count * memexport_format_size;
|
||||
memexport_stream.index_count * memexport_format_size;
|
||||
bool memexport_range_reused = false;
|
||||
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
||||
MemExportRange& memexport_range = memexport_ranges[i];
|
||||
if (memexport_range.base_address_dwords == memexport_base_address) {
|
||||
if (memexport_range.base_address_dwords ==
|
||||
memexport_stream.base_address) {
|
||||
memexport_range.size_dwords =
|
||||
std::max(memexport_range.size_dwords, memexport_size_dwords);
|
||||
memexport_range_reused = true;
|
||||
|
@ -1475,7 +1466,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
if (!memexport_range_reused) {
|
||||
MemExportRange& memexport_range =
|
||||
memexport_ranges[memexport_range_count++];
|
||||
memexport_range.base_address_dwords = memexport_base_address;
|
||||
memexport_range.base_address_dwords = memexport_stream.base_address;
|
||||
memexport_range.size_dwords = memexport_size_dwords;
|
||||
}
|
||||
}
|
||||
|
@ -1850,15 +1841,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
|
|||
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
|
||||
// See r200UpdateWindow:
|
||||
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
|
||||
uint32_t pa_sc_window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32;
|
||||
int16_t window_offset_x = pa_sc_window_offset & 0x7FFF;
|
||||
int16_t window_offset_y = (pa_sc_window_offset >> 16) & 0x7FFF;
|
||||
if (window_offset_x & 0x4000) {
|
||||
window_offset_x |= 0x8000;
|
||||
}
|
||||
if (window_offset_y & 0x4000) {
|
||||
window_offset_y |= 0x8000;
|
||||
}
|
||||
auto pa_sc_window_offset = regs.Get<reg::PA_SC_WINDOW_OFFSET>();
|
||||
|
||||
// Supersampling replacing multisampling due to difficulties of emulating
|
||||
// EDRAM with multisampling with RTV/DSV (with ROV, there's MSAA), and also
|
||||
|
@ -1868,8 +1851,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
|
|||
pixel_size_x = 1;
|
||||
pixel_size_y = 1;
|
||||
} else {
|
||||
MsaaSamples msaa_samples =
|
||||
MsaaSamples((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 >> 16) & 0x3);
|
||||
MsaaSamples msaa_samples = regs.Get<reg::RB_SURFACE_INFO>().msaa_samples;
|
||||
pixel_size_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1;
|
||||
pixel_size_y = msaa_samples >= MsaaSamples::k2X ? 2 : 1;
|
||||
}
|
||||
|
@ -1889,30 +1871,30 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
|
|||
// box. If it's not, the position is in screen space. Since we can only use
|
||||
// the NDC in PC APIs, we use a viewport of the largest possible size, and
|
||||
// divide the position by it in translated shaders.
|
||||
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
|
||||
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
||||
float viewport_scale_x =
|
||||
(pa_cl_vte_cntl & (1 << 0))
|
||||
pa_cl_vte_cntl.vport_x_scale_ena
|
||||
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32)
|
||||
: 1280.0f;
|
||||
float viewport_scale_y =
|
||||
(pa_cl_vte_cntl & (1 << 2))
|
||||
pa_cl_vte_cntl.vport_y_scale_ena
|
||||
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32)
|
||||
: 1280.0f;
|
||||
float viewport_scale_z = (pa_cl_vte_cntl & (1 << 4))
|
||||
float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32
|
||||
: 1.0f;
|
||||
float viewport_offset_x = (pa_cl_vte_cntl & (1 << 1))
|
||||
float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
|
||||
: std::abs(viewport_scale_x);
|
||||
float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3))
|
||||
float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
|
||||
: std::abs(viewport_scale_y);
|
||||
float viewport_offset_z = (pa_cl_vte_cntl & (1 << 5))
|
||||
float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32
|
||||
: 0.0f;
|
||||
if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) {
|
||||
viewport_offset_x += float(window_offset_x);
|
||||
viewport_offset_y += float(window_offset_y);
|
||||
if (regs.Get<reg::PA_SU_SC_MODE_CNTL>().vtx_window_offset_enable) {
|
||||
viewport_offset_x += float(pa_sc_window_offset.window_x_offset);
|
||||
viewport_offset_y += float(pa_sc_window_offset.window_y_offset);
|
||||
}
|
||||
D3D12_VIEWPORT viewport;
|
||||
viewport.TopLeftX =
|
||||
|
@ -1941,21 +1923,22 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
|
|||
}
|
||||
|
||||
// Scissor.
|
||||
uint32_t pa_sc_window_scissor_tl =
|
||||
regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
|
||||
uint32_t pa_sc_window_scissor_br =
|
||||
regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
|
||||
auto pa_sc_window_scissor_tl = regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>();
|
||||
auto pa_sc_window_scissor_br = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>();
|
||||
D3D12_RECT scissor;
|
||||
scissor.left = pa_sc_window_scissor_tl & 0x7FFF;
|
||||
scissor.top = (pa_sc_window_scissor_tl >> 16) & 0x7FFF;
|
||||
scissor.right = pa_sc_window_scissor_br & 0x7FFF;
|
||||
scissor.bottom = (pa_sc_window_scissor_br >> 16) & 0x7FFF;
|
||||
if (!(pa_sc_window_scissor_tl & (1u << 31))) {
|
||||
// !WINDOW_OFFSET_DISABLE.
|
||||
scissor.left = std::max(scissor.left + window_offset_x, LONG(0));
|
||||
scissor.top = std::max(scissor.top + window_offset_y, LONG(0));
|
||||
scissor.right = std::max(scissor.right + window_offset_x, LONG(0));
|
||||
scissor.bottom = std::max(scissor.bottom + window_offset_y, LONG(0));
|
||||
scissor.left = pa_sc_window_scissor_tl.tl_x;
|
||||
scissor.top = pa_sc_window_scissor_tl.tl_y;
|
||||
scissor.right = pa_sc_window_scissor_br.br_x;
|
||||
scissor.bottom = pa_sc_window_scissor_br.br_y;
|
||||
if (!pa_sc_window_scissor_tl.window_offset_disable) {
|
||||
scissor.left =
|
||||
std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0));
|
||||
scissor.top =
|
||||
std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0));
|
||||
scissor.right =
|
||||
std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0));
|
||||
scissor.bottom =
|
||||
std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0));
|
||||
}
|
||||
scissor.left *= pixel_size_x;
|
||||
scissor.top *= pixel_size_y;
|
||||
|
@ -1992,13 +1975,17 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
|
|||
|
||||
// Stencil reference value. Per-face reference not supported by Direct3D 12,
|
||||
// choose the back face one only if drawing only back faces.
|
||||
uint32_t stencil_ref;
|
||||
if (primitive_two_faced && (regs[XE_GPU_REG_RB_DEPTHCONTROL].u32 & 0x80) &&
|
||||
(regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 1) {
|
||||
stencil_ref = regs[XE_GPU_REG_RB_STENCILREFMASK_BF].u32 & 0xFF;
|
||||
Register stencil_ref_mask_reg;
|
||||
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
||||
if (primitive_two_faced &&
|
||||
regs.Get<reg::RB_DEPTHCONTROL>().backface_enable &&
|
||||
pa_su_sc_mode_cntl.cull_front && !pa_su_sc_mode_cntl.cull_back) {
|
||||
stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK_BF;
|
||||
} else {
|
||||
stencil_ref = regs[XE_GPU_REG_RB_STENCILREFMASK].u32 & 0xFF;
|
||||
stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK;
|
||||
}
|
||||
uint32_t stencil_ref =
|
||||
regs.Get<reg::RB_STENCILREFMASK>(stencil_ref_mask_reg).stencilref;
|
||||
ff_stencil_ref_update_needed_ |= ff_stencil_ref_ != stencil_ref;
|
||||
if (ff_stencil_ref_update_needed_) {
|
||||
ff_stencil_ref_ = stencil_ref;
|
||||
|
@ -2019,64 +2006,50 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
uint32_t pa_cl_clip_cntl = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32;
|
||||
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
|
||||
uint32_t pa_su_point_minmax = regs[XE_GPU_REG_PA_SU_POINT_MINMAX].u32;
|
||||
uint32_t pa_su_point_size = regs[XE_GPU_REG_PA_SU_POINT_SIZE].u32;
|
||||
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
|
||||
uint32_t pa_su_vtx_cntl = regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32;
|
||||
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
|
||||
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
||||
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
|
||||
auto pa_su_point_size = regs.Get<reg::PA_SU_POINT_SIZE>();
|
||||
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
||||
auto pa_su_vtx_cntl = regs.Get<reg::PA_SU_VTX_CNTL>();
|
||||
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
|
||||
uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32;
|
||||
uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
|
||||
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
|
||||
uint32_t rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
|
||||
uint32_t rb_stencilrefmask_bf = regs[XE_GPU_REG_RB_STENCILREFMASK_BF].u32;
|
||||
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
||||
uint32_t sq_context_misc = regs[XE_GPU_REG_SQ_CONTEXT_MISC].u32;
|
||||
uint32_t sq_program_cntl = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
|
||||
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
|
||||
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
|
||||
auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>();
|
||||
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
|
||||
auto rb_stencilrefmask_bf =
|
||||
regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF);
|
||||
auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
|
||||
auto sq_context_misc = regs.Get<reg::SQ_CONTEXT_MISC>();
|
||||
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
|
||||
int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32);
|
||||
|
||||
// Get the color info register values for each render target, and also put
|
||||
// some safety measures for the ROV path - disable fully aliased render
|
||||
// targets. Also, for ROV, exclude components that don't exist in the format
|
||||
// from the write mask.
|
||||
uint32_t color_infos[4];
|
||||
ColorRenderTargetFormat color_formats[4];
|
||||
reg::RB_COLOR_INFO color_infos[4];
|
||||
float rt_clamp[4][4];
|
||||
uint32_t rt_keep_masks[4][2];
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
uint32_t color_info;
|
||||
switch (i) {
|
||||
case 1:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32;
|
||||
break;
|
||||
case 2:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32;
|
||||
break;
|
||||
case 3:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32;
|
||||
break;
|
||||
default:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32;
|
||||
}
|
||||
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
|
||||
reg::RB_COLOR_INFO::rt_register_indices[i]);
|
||||
color_infos[i] = color_info;
|
||||
color_formats[i] = ColorRenderTargetFormat((color_info >> 16) & 0xF);
|
||||
|
||||
if (IsROVUsedForEDRAM()) {
|
||||
// Get the mask for keeping previous color's components unmodified,
|
||||
// or two UINT32_MAX if no colors actually existing in the RT are written.
|
||||
DxbcShaderTranslator::ROV_GetColorFormatSystemConstants(
|
||||
color_formats[i], (color_mask >> (i * 4)) & 0b1111, rt_clamp[i][0],
|
||||
rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3], rt_keep_masks[i][0],
|
||||
rt_keep_masks[i][1]);
|
||||
color_info.color_format, (color_mask >> (i * 4)) & 0b1111,
|
||||
rt_clamp[i][0], rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3],
|
||||
rt_keep_masks[i][0], rt_keep_masks[i][1]);
|
||||
|
||||
// Disable the render target if it has the same EDRAM base as another one
|
||||
// (with a smaller index - assume it's more important).
|
||||
if (rt_keep_masks[i][0] == UINT32_MAX &&
|
||||
rt_keep_masks[i][1] == UINT32_MAX) {
|
||||
uint32_t edram_base = color_info & 0xFFF;
|
||||
for (uint32_t j = 0; j < i; ++j) {
|
||||
if (edram_base == (color_infos[j] & 0xFFF) &&
|
||||
if (color_info.color_base == color_infos[j].color_base &&
|
||||
(rt_keep_masks[j][0] != UINT32_MAX ||
|
||||
rt_keep_masks[j][1] != UINT32_MAX)) {
|
||||
rt_keep_masks[i][0] = UINT32_MAX;
|
||||
|
@ -2091,20 +2064,21 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
// Disable depth and stencil if it aliases a color render target (for
|
||||
// instance, during the XBLA logo in Banjo-Kazooie, though depth writing is
|
||||
// already disabled there).
|
||||
if (IsROVUsedForEDRAM() && (rb_depthcontrol & (0x1 | 0x2))) {
|
||||
uint32_t edram_base_depth = rb_depth_info & 0xFFF;
|
||||
bool depth_stencil_enabled =
|
||||
rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable;
|
||||
if (IsROVUsedForEDRAM() && depth_stencil_enabled) {
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (edram_base_depth == (color_infos[i] & 0xFFF) &&
|
||||
if (rb_depth_info.depth_base == color_infos[i].color_base &&
|
||||
(rt_keep_masks[i][0] != UINT32_MAX ||
|
||||
rt_keep_masks[i][1] != UINT32_MAX)) {
|
||||
rb_depthcontrol &= ~(uint32_t(0x1 | 0x2));
|
||||
depth_stencil_enabled = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get viewport Z scale - needed for flags and ROV output.
|
||||
float viewport_scale_z = (pa_cl_vte_cntl & (1 << 4))
|
||||
float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32
|
||||
: 1.0f;
|
||||
|
||||
|
@ -2126,18 +2100,18 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
// = false: multiply the Z coordinate by 1/W0.
|
||||
// 10: VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal
|
||||
// to get 1/W0.
|
||||
if (pa_cl_vte_cntl & (1 << 8)) {
|
||||
if (pa_cl_vte_cntl.vtx_xy_fmt) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_XYDividedByW;
|
||||
}
|
||||
if (pa_cl_vte_cntl & (1 << 9)) {
|
||||
if (pa_cl_vte_cntl.vtx_z_fmt) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_ZDividedByW;
|
||||
}
|
||||
if (pa_cl_vte_cntl & (1 << 10)) {
|
||||
if (pa_cl_vte_cntl.vtx_w0_fmt) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_WNotReciprocal;
|
||||
}
|
||||
// User clip planes (UCP_ENA_#), when not CLIP_DISABLE.
|
||||
if (!(pa_cl_clip_cntl & (1 << 16))) {
|
||||
flags |= (pa_cl_clip_cntl & 0b111111)
|
||||
if (!pa_cl_clip_cntl.clip_disable) {
|
||||
flags |= (pa_cl_clip_cntl.value & 0b111111)
|
||||
<< DxbcShaderTranslator::kSysFlag_UserClipPlane0_Shift;
|
||||
}
|
||||
// Reversed depth.
|
||||
|
@ -2145,8 +2119,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
flags |= DxbcShaderTranslator::kSysFlag_ReverseZ;
|
||||
}
|
||||
// Alpha test.
|
||||
if (rb_colorcontrol & 0x8) {
|
||||
flags |= (rb_colorcontrol & 0x7)
|
||||
if (rb_colorcontrol.alpha_test_enable) {
|
||||
flags |= uint32_t(rb_colorcontrol.alpha_func)
|
||||
<< DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift;
|
||||
} else {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_AlphaPassIfLess |
|
||||
|
@ -2154,25 +2128,25 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
DxbcShaderTranslator::kSysFlag_AlphaPassIfGreater;
|
||||
}
|
||||
// Alpha to coverage.
|
||||
if (rb_colorcontrol & 0x10) {
|
||||
if (rb_colorcontrol.alpha_to_mask_enable) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_AlphaToCoverage;
|
||||
}
|
||||
// Gamma writing.
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (color_formats[i] == ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
|
||||
if (color_infos[i].color_format ==
|
||||
ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_Color0Gamma << i;
|
||||
}
|
||||
}
|
||||
if (IsROVUsedForEDRAM() && (rb_depthcontrol & (0x1 | 0x2))) {
|
||||
if (IsROVUsedForEDRAM() && depth_stencil_enabled) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencil;
|
||||
if (DepthRenderTargetFormat((rb_depth_info >> 16) & 0x1) ==
|
||||
DepthRenderTargetFormat::kD24FS8) {
|
||||
if (rb_depth_info.depth_format == DepthRenderTargetFormat::kD24FS8) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthFloat24;
|
||||
}
|
||||
if (rb_depthcontrol & 0x2) {
|
||||
flags |= ((rb_depthcontrol >> 4) & 0x7)
|
||||
if (rb_depthcontrol.z_enable) {
|
||||
flags |= uint32_t(rb_depthcontrol.zfunc)
|
||||
<< DxbcShaderTranslator::kSysFlag_ROVDepthPassIfLess_Shift;
|
||||
if (rb_depthcontrol & 0x4) {
|
||||
if (rb_depthcontrol.z_write_enable) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthWrite;
|
||||
}
|
||||
} else {
|
||||
|
@ -2182,7 +2156,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
DxbcShaderTranslator::kSysFlag_ROVDepthPassIfEqual |
|
||||
DxbcShaderTranslator::kSysFlag_ROVDepthPassIfGreater;
|
||||
}
|
||||
if (rb_depthcontrol & 0x1) {
|
||||
if (rb_depthcontrol.stencil_enable) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_ROVStencilTest;
|
||||
}
|
||||
if (early_z) {
|
||||
|
@ -2223,9 +2197,9 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
index_endian_and_edge_factors;
|
||||
|
||||
// User clip planes (UCP_ENA_#), when not CLIP_DISABLE.
|
||||
if (!(pa_cl_clip_cntl & (1 << 16))) {
|
||||
if (!pa_cl_clip_cntl.clip_disable) {
|
||||
for (uint32_t i = 0; i < 6; ++i) {
|
||||
if (!(pa_cl_clip_cntl & (1 << i))) {
|
||||
if (!(pa_cl_clip_cntl.value & (1 << i))) {
|
||||
continue;
|
||||
}
|
||||
const float* ucp = ®s[XE_GPU_REG_PA_CL_UCP_0_X + i * 4].f32;
|
||||
|
@ -2249,45 +2223,49 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
// different register (and if there's such register at all).
|
||||
float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32;
|
||||
float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32;
|
||||
// When VPORT_Z_SCALE_ENA is disabled, Z/W is directly what is expected to be
|
||||
// written to the depth buffer, and for some reason DX_CLIP_SPACE_DEF isn't
|
||||
// set in this case in draws in games.
|
||||
bool gl_clip_space_def =
|
||||
!(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4));
|
||||
!pa_cl_clip_cntl.dx_clip_space_def && pa_cl_vte_cntl.vport_z_scale_ena;
|
||||
float ndc_scale_x, ndc_scale_y, ndc_scale_z;
|
||||
if (primitive_two_faced && (pa_su_sc_mode_cntl & 0x3) == 0x3) {
|
||||
if (primitive_two_faced && pa_su_sc_mode_cntl.cull_front &&
|
||||
pa_su_sc_mode_cntl.cull_back) {
|
||||
// Kill all primitives if both faces are culled, but the vertex shader still
|
||||
// needs to do memexport (not NaN because of comparison for setting the
|
||||
// dirty flag).
|
||||
ndc_scale_x = ndc_scale_y = ndc_scale_z = 0;
|
||||
} else {
|
||||
if (pa_cl_vte_cntl & (1 << 0)) {
|
||||
if (pa_cl_vte_cntl.vport_x_scale_ena) {
|
||||
ndc_scale_x = viewport_scale_x >= 0.0f ? 1.0f : -1.0f;
|
||||
} else {
|
||||
ndc_scale_x = 1.0f / 1280.0f;
|
||||
}
|
||||
if (pa_cl_vte_cntl & (1 << 2)) {
|
||||
if (pa_cl_vte_cntl.vport_y_scale_ena) {
|
||||
ndc_scale_y = viewport_scale_y >= 0.0f ? -1.0f : 1.0f;
|
||||
} else {
|
||||
ndc_scale_y = -1.0f / 1280.0f;
|
||||
}
|
||||
ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f;
|
||||
}
|
||||
float ndc_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? 0.0f : -1.0f;
|
||||
float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? 0.0f : 1.0f;
|
||||
float ndc_offset_x = pa_cl_vte_cntl.vport_x_offset_ena ? 0.0f : -1.0f;
|
||||
float ndc_offset_y = pa_cl_vte_cntl.vport_y_offset_ena ? 0.0f : 1.0f;
|
||||
float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f;
|
||||
// Like in OpenGL - VPOS giving pixel centers.
|
||||
// TODO(Triang3l): Check if ps_param_gen should give center positions in
|
||||
// OpenGL mode on the Xbox 360.
|
||||
float pixel_half_pixel_offset = 0.5f;
|
||||
if (cvars::d3d12_half_pixel_offset && !(pa_su_vtx_cntl & (1 << 0))) {
|
||||
if (cvars::d3d12_half_pixel_offset && !pa_su_vtx_cntl.pix_center) {
|
||||
// Signs are hopefully correct here, tested in GTA IV on both clearing
|
||||
// (without a viewport) and drawing things near the edges of the screen.
|
||||
if (pa_cl_vte_cntl & (1 << 0)) {
|
||||
if (pa_cl_vte_cntl.vport_x_scale_ena) {
|
||||
if (viewport_scale_x != 0.0f) {
|
||||
ndc_offset_x += 0.5f / viewport_scale_x;
|
||||
}
|
||||
} else {
|
||||
ndc_offset_x += 1.0f / 2560.0f;
|
||||
}
|
||||
if (pa_cl_vte_cntl & (1 << 2)) {
|
||||
if (pa_cl_vte_cntl.vport_y_scale_ena) {
|
||||
if (viewport_scale_y != 0.0f) {
|
||||
ndc_offset_y += 0.5f / viewport_scale_y;
|
||||
}
|
||||
|
@ -2313,10 +2291,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
system_constants_.pixel_half_pixel_offset = pixel_half_pixel_offset;
|
||||
|
||||
// Point size.
|
||||
float point_size_x = float(pa_su_point_size >> 16) * 0.125f;
|
||||
float point_size_y = float(pa_su_point_size & 0xFFFF) * 0.125f;
|
||||
float point_size_min = float(pa_su_point_minmax & 0xFFFF) * 0.125f;
|
||||
float point_size_max = float(pa_su_point_minmax >> 16) * 0.125f;
|
||||
float point_size_x = float(pa_su_point_size.width) * 0.125f;
|
||||
float point_size_y = float(pa_su_point_size.height) * 0.125f;
|
||||
float point_size_min = float(pa_su_point_minmax.min_size) * 0.125f;
|
||||
float point_size_max = float(pa_su_point_minmax.max_size) * 0.125f;
|
||||
dirty |= system_constants_.point_size[0] != point_size_x;
|
||||
dirty |= system_constants_.point_size[1] != point_size_y;
|
||||
dirty |= system_constants_.point_size_min_max[0] != point_size_min;
|
||||
|
@ -2326,13 +2304,13 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
system_constants_.point_size_min_max[0] = point_size_min;
|
||||
system_constants_.point_size_min_max[1] = point_size_max;
|
||||
float point_screen_to_ndc_x, point_screen_to_ndc_y;
|
||||
if (pa_cl_vte_cntl & (1 << 0)) {
|
||||
if (pa_cl_vte_cntl.vport_x_scale_ena) {
|
||||
point_screen_to_ndc_x =
|
||||
(viewport_scale_x != 0.0f) ? (0.5f / viewport_scale_x) : 0.0f;
|
||||
} else {
|
||||
point_screen_to_ndc_x = 1.0f / 2560.0f;
|
||||
}
|
||||
if (pa_cl_vte_cntl & (1 << 2)) {
|
||||
if (pa_cl_vte_cntl.vport_y_scale_ena) {
|
||||
point_screen_to_ndc_y =
|
||||
(viewport_scale_y != 0.0f) ? (-0.5f / viewport_scale_y) : 0.0f;
|
||||
} else {
|
||||
|
@ -2345,15 +2323,16 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
|
||||
// Pixel position register.
|
||||
uint32_t pixel_pos_reg =
|
||||
(sq_program_cntl & (1 << 18)) ? (sq_context_misc >> 8) & 0xFF : UINT_MAX;
|
||||
sq_program_cntl.param_gen ? sq_context_misc.param_gen_pos : UINT_MAX;
|
||||
dirty |= system_constants_.pixel_pos_reg != pixel_pos_reg;
|
||||
system_constants_.pixel_pos_reg = pixel_pos_reg;
|
||||
|
||||
// Log2 of sample count, for scaling VPOS with SSAA (without ROV) and for
|
||||
// EDRAM address calculation with MSAA (with ROV).
|
||||
MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3);
|
||||
uint32_t sample_count_log2_x = msaa_samples >= MsaaSamples::k4X ? 1 : 0;
|
||||
uint32_t sample_count_log2_y = msaa_samples >= MsaaSamples::k2X ? 1 : 0;
|
||||
uint32_t sample_count_log2_x =
|
||||
rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 1 : 0;
|
||||
uint32_t sample_count_log2_y =
|
||||
rb_surface_info.msaa_samples >= MsaaSamples::k2X ? 1 : 0;
|
||||
dirty |= system_constants_.sample_count_log2[0] != sample_count_log2_x;
|
||||
dirty |= system_constants_.sample_count_log2[1] != sample_count_log2_y;
|
||||
system_constants_.sample_count_log2[0] = sample_count_log2_x;
|
||||
|
@ -2365,43 +2344,22 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
|
||||
// EDRAM pitch for ROV writing.
|
||||
if (IsROVUsedForEDRAM()) {
|
||||
uint32_t edram_pitch_tiles = ((std::min(rb_surface_info & 0x3FFFu, 2560u) *
|
||||
(msaa_samples >= MsaaSamples::k4X ? 2 : 1)) +
|
||||
79) /
|
||||
80;
|
||||
uint32_t edram_pitch_tiles =
|
||||
((std::min(rb_surface_info.surface_pitch, 2560u) *
|
||||
(rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1)) +
|
||||
79) /
|
||||
80;
|
||||
dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles;
|
||||
system_constants_.edram_pitch_tiles = edram_pitch_tiles;
|
||||
}
|
||||
|
||||
// Color exponent bias and output index mapping or ROV render target writing.
|
||||
bool colorcontrol_blend_enable = (rb_colorcontrol & 0x20) == 0;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
uint32_t color_info = color_infos[i];
|
||||
uint32_t blend_factors_ops;
|
||||
if (colorcontrol_blend_enable) {
|
||||
switch (i) {
|
||||
case 1:
|
||||
blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32;
|
||||
break;
|
||||
case 2:
|
||||
blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32;
|
||||
break;
|
||||
case 3:
|
||||
blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32;
|
||||
break;
|
||||
default:
|
||||
blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32;
|
||||
break;
|
||||
}
|
||||
blend_factors_ops &= 0x1FFF1FFF;
|
||||
} else {
|
||||
blend_factors_ops = 0x00010001;
|
||||
}
|
||||
reg::RB_COLOR_INFO color_info = color_infos[i];
|
||||
// Exponent bias is in bits 20:25 of RB_COLOR_INFO.
|
||||
int32_t color_exp_bias = int32_t(color_info << 6) >> 26;
|
||||
ColorRenderTargetFormat color_format = color_formats[i];
|
||||
if (color_format == ColorRenderTargetFormat::k_16_16 ||
|
||||
color_format == ColorRenderTargetFormat::k_16_16_16_16) {
|
||||
int32_t color_exp_bias = color_info.color_exp_bias;
|
||||
if (color_info.color_format == ColorRenderTargetFormat::k_16_16 ||
|
||||
color_info.color_format == ColorRenderTargetFormat::k_16_16_16_16) {
|
||||
// On the Xbox 360, k_16_16_EDRAM and k_16_16_16_16_EDRAM internally have
|
||||
// -32...32 range and expect shaders to give -32...32 values, but they're
|
||||
// emulated using normalized RG16/RGBA16 when not using the ROV, so the
|
||||
|
@ -2427,7 +2385,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
system_constants_.edram_rt_keep_mask[i][1] = rt_keep_masks[i][1];
|
||||
if (rt_keep_masks[i][0] != UINT32_MAX ||
|
||||
rt_keep_masks[i][1] != UINT32_MAX) {
|
||||
uint32_t rt_base_dwords_scaled = (color_info & 0xFFF) * 1280;
|
||||
uint32_t rt_base_dwords_scaled = color_info.color_base * 1280;
|
||||
if (texture_cache_->IsResolutionScale2X()) {
|
||||
rt_base_dwords_scaled <<= 2;
|
||||
}
|
||||
|
@ -2435,8 +2393,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
rt_base_dwords_scaled;
|
||||
system_constants_.edram_rt_base_dwords_scaled[i] =
|
||||
rt_base_dwords_scaled;
|
||||
uint32_t format_flags =
|
||||
DxbcShaderTranslator::ROV_AddColorFormatFlags(color_format);
|
||||
uint32_t format_flags = DxbcShaderTranslator::ROV_AddColorFormatFlags(
|
||||
color_info.color_format);
|
||||
dirty |= system_constants_.edram_rt_format_flags[i] != format_flags;
|
||||
system_constants_.edram_rt_format_flags[i] = format_flags;
|
||||
// Can't do float comparisons here because NaNs would result in always
|
||||
|
@ -2445,6 +2403,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
4 * sizeof(float)) != 0;
|
||||
std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i],
|
||||
4 * sizeof(float));
|
||||
uint32_t blend_factors_ops =
|
||||
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF;
|
||||
dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
|
||||
blend_factors_ops;
|
||||
system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
|
||||
|
@ -2465,7 +2425,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
resolution_square_scale;
|
||||
system_constants_.edram_resolution_square_scale = resolution_square_scale;
|
||||
|
||||
uint32_t depth_base_dwords = (rb_depth_info & 0xFFF) * 1280;
|
||||
uint32_t depth_base_dwords = rb_depth_info.depth_base * 1280;
|
||||
dirty |= system_constants_.edram_depth_base_dwords != depth_base_dwords;
|
||||
system_constants_.edram_depth_base_dwords = depth_base_dwords;
|
||||
|
||||
|
@ -2474,7 +2434,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
float depth_range_scale = std::abs(viewport_scale_z);
|
||||
dirty |= system_constants_.edram_depth_range_scale != depth_range_scale;
|
||||
system_constants_.edram_depth_range_scale = depth_range_scale;
|
||||
float depth_range_offset = (pa_cl_vte_cntl & (1 << 5))
|
||||
float depth_range_offset = pa_cl_vte_cntl.vport_z_offset_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32
|
||||
: 0.0f;
|
||||
if (viewport_scale_z < 0.0f) {
|
||||
|
@ -2490,20 +2450,20 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
float poly_offset_front_scale = 0.0f, poly_offset_front_offset = 0.0f;
|
||||
float poly_offset_back_scale = 0.0f, poly_offset_back_offset = 0.0f;
|
||||
if (primitive_two_faced) {
|
||||
if (pa_su_sc_mode_cntl & (1 << 11)) {
|
||||
if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
|
||||
poly_offset_front_scale =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
||||
poly_offset_front_offset =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
||||
}
|
||||
if (pa_su_sc_mode_cntl & (1 << 12)) {
|
||||
if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
|
||||
poly_offset_back_scale =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
|
||||
poly_offset_back_offset =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
|
||||
}
|
||||
} else {
|
||||
if (pa_su_sc_mode_cntl & (1 << 13)) {
|
||||
if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
|
||||
poly_offset_front_scale =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
||||
poly_offset_front_offset =
|
||||
|
@ -2533,39 +2493,43 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
poly_offset_back_offset;
|
||||
system_constants_.edram_poly_offset_back_offset = poly_offset_back_offset;
|
||||
|
||||
if (rb_depthcontrol & 0x1) {
|
||||
uint32_t stencil_value;
|
||||
|
||||
stencil_value = rb_stencilrefmask & 0xFF;
|
||||
dirty |= system_constants_.edram_stencil_front_reference != stencil_value;
|
||||
system_constants_.edram_stencil_front_reference = stencil_value;
|
||||
stencil_value = (rb_stencilrefmask >> 8) & 0xFF;
|
||||
dirty |= system_constants_.edram_stencil_front_read_mask != stencil_value;
|
||||
system_constants_.edram_stencil_front_read_mask = stencil_value;
|
||||
stencil_value = (rb_stencilrefmask >> 16) & 0xFF;
|
||||
if (depth_stencil_enabled && rb_depthcontrol.stencil_enable) {
|
||||
dirty |= system_constants_.edram_stencil_front_reference !=
|
||||
rb_stencilrefmask.stencilref;
|
||||
system_constants_.edram_stencil_front_reference =
|
||||
rb_stencilrefmask.stencilref;
|
||||
dirty |= system_constants_.edram_stencil_front_read_mask !=
|
||||
rb_stencilrefmask.stencilmask;
|
||||
system_constants_.edram_stencil_front_read_mask =
|
||||
rb_stencilrefmask.stencilmask;
|
||||
dirty |= system_constants_.edram_stencil_front_write_mask !=
|
||||
rb_stencilrefmask.stencilwritemask;
|
||||
system_constants_.edram_stencil_front_write_mask =
|
||||
rb_stencilrefmask.stencilwritemask;
|
||||
uint32_t stencil_func_ops =
|
||||
(rb_depthcontrol.value >> 8) & ((1 << 12) - 1);
|
||||
dirty |=
|
||||
system_constants_.edram_stencil_front_write_mask != stencil_value;
|
||||
system_constants_.edram_stencil_front_write_mask = stencil_value;
|
||||
stencil_value = (rb_depthcontrol >> 8) & ((1 << 12) - 1);
|
||||
dirty |= system_constants_.edram_stencil_front_func_ops != stencil_value;
|
||||
system_constants_.edram_stencil_front_func_ops = stencil_value;
|
||||
system_constants_.edram_stencil_front_func_ops != stencil_func_ops;
|
||||
system_constants_.edram_stencil_front_func_ops = stencil_func_ops;
|
||||
|
||||
if (primitive_two_faced && (rb_depthcontrol & 0x80)) {
|
||||
stencil_value = rb_stencilrefmask_bf & 0xFF;
|
||||
dirty |=
|
||||
system_constants_.edram_stencil_back_reference != stencil_value;
|
||||
system_constants_.edram_stencil_back_reference = stencil_value;
|
||||
stencil_value = (rb_stencilrefmask_bf >> 8) & 0xFF;
|
||||
dirty |=
|
||||
system_constants_.edram_stencil_back_read_mask != stencil_value;
|
||||
system_constants_.edram_stencil_back_read_mask = stencil_value;
|
||||
stencil_value = (rb_stencilrefmask_bf >> 16) & 0xFF;
|
||||
dirty |=
|
||||
system_constants_.edram_stencil_back_write_mask != stencil_value;
|
||||
system_constants_.edram_stencil_back_write_mask = stencil_value;
|
||||
stencil_value = (rb_depthcontrol >> 20) & ((1 << 12) - 1);
|
||||
dirty |= system_constants_.edram_stencil_back_func_ops != stencil_value;
|
||||
system_constants_.edram_stencil_back_func_ops = stencil_value;
|
||||
if (primitive_two_faced && rb_depthcontrol.backface_enable) {
|
||||
dirty |= system_constants_.edram_stencil_back_reference !=
|
||||
rb_stencilrefmask_bf.stencilref;
|
||||
system_constants_.edram_stencil_back_reference =
|
||||
rb_stencilrefmask_bf.stencilref;
|
||||
dirty |= system_constants_.edram_stencil_back_read_mask !=
|
||||
rb_stencilrefmask_bf.stencilmask;
|
||||
system_constants_.edram_stencil_back_read_mask =
|
||||
rb_stencilrefmask_bf.stencilmask;
|
||||
dirty |= system_constants_.edram_stencil_back_write_mask !=
|
||||
rb_stencilrefmask_bf.stencilwritemask;
|
||||
system_constants_.edram_stencil_back_write_mask =
|
||||
rb_stencilrefmask_bf.stencilwritemask;
|
||||
uint32_t stencil_func_ops_bf =
|
||||
(rb_depthcontrol.value >> 20) & ((1 << 12) - 1);
|
||||
dirty |= system_constants_.edram_stencil_back_func_ops !=
|
||||
stencil_func_ops_bf;
|
||||
system_constants_.edram_stencil_back_func_ops = stencil_func_ops_bf;
|
||||
} else {
|
||||
dirty |= std::memcmp(system_constants_.edram_stencil_back,
|
||||
system_constants_.edram_stencil_front,
|
||||
|
|
|
@ -207,8 +207,17 @@ bool PipelineCache::EnsureShadersTranslated(D3D12Shader* vertex_shader,
|
|||
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
|
||||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
|
||||
|
||||
xenos::xe_gpu_program_cntl_t sq_program_cntl;
|
||||
sq_program_cntl.dword_0 = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
|
||||
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
|
||||
|
||||
// Normal vertex shaders only, for now.
|
||||
assert_true(sq_program_cntl.vs_export_mode ==
|
||||
xenos::VertexShaderExportMode::kPosition1Vector ||
|
||||
sq_program_cntl.vs_export_mode ==
|
||||
xenos::VertexShaderExportMode::kPosition2VectorsSprite ||
|
||||
sq_program_cntl.vs_export_mode ==
|
||||
xenos::VertexShaderExportMode::kMultipass);
|
||||
assert_false(sq_program_cntl.gen_index_vtx);
|
||||
|
||||
if (!vertex_shader->is_translated() &&
|
||||
!TranslateShader(vertex_shader, sq_program_cntl, tessellated,
|
||||
primitive_type)) {
|
||||
|
@ -294,8 +303,7 @@ bool PipelineCache::ConfigurePipeline(
|
|||
}
|
||||
|
||||
bool PipelineCache::TranslateShader(D3D12Shader* shader,
|
||||
xenos::xe_gpu_program_cntl_t cntl,
|
||||
bool tessellated,
|
||||
reg::SQ_PROGRAM_CNTL cntl, bool tessellated,
|
||||
PrimitiveType primitive_type) {
|
||||
// Perform translation.
|
||||
// If this fails the shader will be marked as invalid and ignored later.
|
||||
|
@ -355,7 +363,7 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
const RenderTargetCache::PipelineRenderTarget render_targets[5],
|
||||
PipelineDescription& description_out) {
|
||||
auto& regs = *register_file_;
|
||||
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
|
||||
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
||||
bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type);
|
||||
|
||||
// Initialize all unused fields to zero for comparison/hashing.
|
||||
|
@ -373,7 +381,7 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
description_out.pixel_shader = pixel_shader;
|
||||
|
||||
// Index buffer strip cut value.
|
||||
if (pa_su_sc_mode_cntl & (1 << 21)) {
|
||||
if (pa_su_sc_mode_cntl.multi_prim_ib_ena) {
|
||||
// Not using 0xFFFF with 32-bit indices because in index buffers it will be
|
||||
// 0xFFFF0000 anyway due to endianness.
|
||||
description_out.strip_cut_index = index_format == IndexFormat::kInt32
|
||||
|
@ -385,12 +393,12 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
|
||||
// Primitive topology type, tessellation mode and geometry shader.
|
||||
if (tessellated) {
|
||||
switch (TessellationMode(regs[XE_GPU_REG_VGT_HOS_CNTL].u32 & 0x3)) {
|
||||
case TessellationMode::kContinuous:
|
||||
switch (regs.Get<reg::VGT_HOS_CNTL>().tess_mode) {
|
||||
case xenos::TessellationMode::kContinuous:
|
||||
description_out.tessellation_mode =
|
||||
PipelineTessellationMode::kContinuous;
|
||||
break;
|
||||
case TessellationMode::kAdaptive:
|
||||
case xenos::TessellationMode::kAdaptive:
|
||||
description_out.tessellation_mode =
|
||||
cvars::d3d12_tessellation_adaptive
|
||||
? PipelineTessellationMode::kAdaptive
|
||||
|
@ -471,53 +479,60 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
// Xenos fill mode 1).
|
||||
// Here we also assume that only one side is culled - if two sides are culled,
|
||||
// the D3D12 command processor will drop such draw early.
|
||||
uint32_t cull_mode = primitive_two_faced ? (pa_su_sc_mode_cntl & 0x3) : 0;
|
||||
bool cull_front, cull_back;
|
||||
if (primitive_two_faced) {
|
||||
cull_front = pa_su_sc_mode_cntl.cull_front != 0;
|
||||
cull_back = pa_su_sc_mode_cntl.cull_back != 0;
|
||||
} else {
|
||||
cull_front = false;
|
||||
cull_back = false;
|
||||
}
|
||||
float poly_offset = 0.0f, poly_offset_scale = 0.0f;
|
||||
if (primitive_two_faced) {
|
||||
description_out.front_counter_clockwise = (pa_su_sc_mode_cntl & 0x4) == 0;
|
||||
if (cull_mode == 1) {
|
||||
description_out.front_counter_clockwise = pa_su_sc_mode_cntl.face == 0;
|
||||
if (cull_front) {
|
||||
description_out.cull_mode = PipelineCullMode::kFront;
|
||||
} else if (cull_mode == 2) {
|
||||
} else if (cull_back) {
|
||||
description_out.cull_mode = PipelineCullMode::kBack;
|
||||
} else {
|
||||
description_out.cull_mode = PipelineCullMode::kNone;
|
||||
}
|
||||
// With ROV, the depth bias is applied in the pixel shader because
|
||||
// per-sample depth is needed for MSAA.
|
||||
if (cull_mode != 1) {
|
||||
if (!cull_front) {
|
||||
// Front faces aren't culled.
|
||||
uint32_t fill_mode = (pa_su_sc_mode_cntl >> 5) & 0x7;
|
||||
if (fill_mode == 0 || fill_mode == 1) {
|
||||
// Direct3D 12, unfortunately, doesn't support point fill mode.
|
||||
if (pa_su_sc_mode_cntl.polymode_front_ptype !=
|
||||
xenos::PolygonType::kTriangles) {
|
||||
description_out.fill_mode_wireframe = 1;
|
||||
}
|
||||
if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 11))) {
|
||||
if (!edram_rov_used_ && pa_su_sc_mode_cntl.poly_offset_front_enable) {
|
||||
poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
||||
poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
||||
}
|
||||
}
|
||||
if (cull_mode != 2) {
|
||||
if (!cull_back) {
|
||||
// Back faces aren't culled.
|
||||
uint32_t fill_mode = (pa_su_sc_mode_cntl >> 8) & 0x7;
|
||||
if (fill_mode == 0 || fill_mode == 1) {
|
||||
if (pa_su_sc_mode_cntl.polymode_back_ptype !=
|
||||
xenos::PolygonType::kTriangles) {
|
||||
description_out.fill_mode_wireframe = 1;
|
||||
}
|
||||
// Prefer front depth bias because in general, front faces are the ones
|
||||
// that are rendered (except for shadow volumes).
|
||||
if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 12)) &&
|
||||
if (!edram_rov_used_ && pa_su_sc_mode_cntl.poly_offset_back_enable &&
|
||||
poly_offset == 0.0f && poly_offset_scale == 0.0f) {
|
||||
poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
|
||||
poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
|
||||
}
|
||||
}
|
||||
if (((pa_su_sc_mode_cntl >> 3) & 0x3) == 0) {
|
||||
// Fill mode is disabled.
|
||||
if (pa_su_sc_mode_cntl.poly_mode == xenos::PolygonModeEnable::kDisabled) {
|
||||
description_out.fill_mode_wireframe = 0;
|
||||
}
|
||||
} else {
|
||||
// Filled front faces only.
|
||||
// Use front depth bias if POLY_OFFSET_PARA_ENABLED
|
||||
// (POLY_OFFSET_FRONT_ENABLED is for two-sided primitives).
|
||||
if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 13))) {
|
||||
if (!edram_rov_used_ && pa_su_sc_mode_cntl.poly_offset_para_enable) {
|
||||
poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
||||
poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
||||
}
|
||||
|
@ -535,8 +550,8 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
// of Duty 4 (vehicledamage map explosion decals) and Red Dead Redemption
|
||||
// (shadows - 2^17 is not enough, 2^18 hasn't been tested, but 2^19
|
||||
// eliminates the acne).
|
||||
if (((register_file_->values[XE_GPU_REG_RB_DEPTH_INFO].u32 >> 16) & 0x1) ==
|
||||
uint32_t(DepthRenderTargetFormat::kD24FS8)) {
|
||||
if (regs.Get<reg::RB_DEPTH_INFO>().depth_format ==
|
||||
DepthRenderTargetFormat::kD24FS8) {
|
||||
poly_offset *= float(1 << 19);
|
||||
} else {
|
||||
poly_offset *= float(1 << 23);
|
||||
|
@ -556,58 +571,49 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
primitive_type == PrimitiveType::kQuadPatch)) {
|
||||
description_out.fill_mode_wireframe = 1;
|
||||
}
|
||||
// CLIP_DISABLE
|
||||
description_out.depth_clip =
|
||||
(regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32 & (1 << 16)) == 0;
|
||||
// TODO(DrChat): This seem to differ. Need to examine this.
|
||||
// https://github.com/decaf-emu/decaf-emu/blob/c017a9ff8128852fb9a5da19466778a171cea6e1/src/libdecaf/src/gpu/latte_registers_pa.h#L11
|
||||
// ZCLIP_NEAR_DISABLE
|
||||
// description_out.depth_clip = (PA_CL_CLIP_CNTL & (1 << 26)) == 0;
|
||||
// RASTERIZER_DISABLE
|
||||
// Disable rendering in command processor if PA_CL_CLIP_CNTL & (1 << 22)?
|
||||
description_out.depth_clip = !regs.Get<reg::PA_CL_CLIP_CNTL>().clip_disable;
|
||||
if (edram_rov_used_) {
|
||||
description_out.rov_msaa =
|
||||
((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 >> 16) & 0x3) != 0;
|
||||
}
|
||||
|
||||
if (!edram_rov_used_) {
|
||||
uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32;
|
||||
|
||||
regs.Get<reg::RB_SURFACE_INFO>().msaa_samples != MsaaSamples::k1X;
|
||||
} else {
|
||||
// Depth/stencil. No stencil, always passing depth test and no depth writing
|
||||
// means depth disabled.
|
||||
if (render_targets[4].format != DXGI_FORMAT_UNKNOWN) {
|
||||
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
|
||||
if (rb_depthcontrol & 0x2) {
|
||||
description_out.depth_func = (rb_depthcontrol >> 4) & 0x7;
|
||||
description_out.depth_write = (rb_depthcontrol & 0x4) != 0;
|
||||
auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>();
|
||||
if (rb_depthcontrol.z_enable) {
|
||||
description_out.depth_func = rb_depthcontrol.zfunc;
|
||||
description_out.depth_write = rb_depthcontrol.z_write_enable;
|
||||
} else {
|
||||
description_out.depth_func = 0b111;
|
||||
description_out.depth_func = CompareFunction::kAlways;
|
||||
}
|
||||
if (rb_depthcontrol & 0x1) {
|
||||
if (rb_depthcontrol.stencil_enable) {
|
||||
description_out.stencil_enable = 1;
|
||||
bool stencil_backface_enable =
|
||||
primitive_two_faced && (rb_depthcontrol & 0x80);
|
||||
uint32_t stencil_masks;
|
||||
primitive_two_faced && rb_depthcontrol.backface_enable;
|
||||
// Per-face masks not supported by Direct3D 12, choose the back face
|
||||
// ones only if drawing only back faces.
|
||||
if (stencil_backface_enable && cull_mode == 1) {
|
||||
stencil_masks = regs[XE_GPU_REG_RB_STENCILREFMASK_BF].u32;
|
||||
Register stencil_ref_mask_reg;
|
||||
if (stencil_backface_enable && cull_front) {
|
||||
stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK_BF;
|
||||
} else {
|
||||
stencil_masks = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
|
||||
stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK;
|
||||
}
|
||||
description_out.stencil_read_mask = (stencil_masks >> 8) & 0xFF;
|
||||
description_out.stencil_write_mask = (stencil_masks >> 16) & 0xFF;
|
||||
description_out.stencil_front_fail_op = (rb_depthcontrol >> 11) & 0x7;
|
||||
auto stencil_ref_mask =
|
||||
regs.Get<reg::RB_STENCILREFMASK>(stencil_ref_mask_reg);
|
||||
description_out.stencil_read_mask = stencil_ref_mask.stencilmask;
|
||||
description_out.stencil_write_mask = stencil_ref_mask.stencilwritemask;
|
||||
description_out.stencil_front_fail_op = rb_depthcontrol.stencilfail;
|
||||
description_out.stencil_front_depth_fail_op =
|
||||
(rb_depthcontrol >> 17) & 0x7;
|
||||
description_out.stencil_front_pass_op = (rb_depthcontrol >> 14) & 0x7;
|
||||
description_out.stencil_front_func = (rb_depthcontrol >> 8) & 0x7;
|
||||
rb_depthcontrol.stencilzfail;
|
||||
description_out.stencil_front_pass_op = rb_depthcontrol.stencilzpass;
|
||||
description_out.stencil_front_func = rb_depthcontrol.stencilfunc;
|
||||
if (stencil_backface_enable) {
|
||||
description_out.stencil_back_fail_op = (rb_depthcontrol >> 23) & 0x7;
|
||||
description_out.stencil_back_fail_op = rb_depthcontrol.stencilfail_bf;
|
||||
description_out.stencil_back_depth_fail_op =
|
||||
(rb_depthcontrol >> 29) & 0x7;
|
||||
description_out.stencil_back_pass_op = (rb_depthcontrol >> 26) & 0x7;
|
||||
description_out.stencil_back_func = (rb_depthcontrol >> 20) & 0x7;
|
||||
rb_depthcontrol.stencilzfail_bf;
|
||||
description_out.stencil_back_pass_op =
|
||||
rb_depthcontrol.stencilzpass_bf;
|
||||
description_out.stencil_back_func = rb_depthcontrol.stencilfunc_bf;
|
||||
} else {
|
||||
description_out.stencil_back_fail_op =
|
||||
description_out.stencil_front_fail_op;
|
||||
|
@ -620,13 +626,13 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
}
|
||||
}
|
||||
// If not binding the DSV, ignore the format in the hash.
|
||||
if (description_out.depth_func != 0b111 || description_out.depth_write ||
|
||||
description_out.stencil_enable) {
|
||||
description_out.depth_format = DepthRenderTargetFormat(
|
||||
(regs[XE_GPU_REG_RB_DEPTH_INFO].u32 >> 16) & 1);
|
||||
if (description_out.depth_func != CompareFunction::kAlways ||
|
||||
description_out.depth_write || description_out.stencil_enable) {
|
||||
description_out.depth_format =
|
||||
regs.Get<reg::RB_DEPTH_INFO>().depth_format;
|
||||
}
|
||||
} else {
|
||||
description_out.depth_func = 0b111;
|
||||
description_out.depth_func = CompareFunction::kAlways;
|
||||
}
|
||||
if (early_z) {
|
||||
description_out.force_early_z = 1;
|
||||
|
@ -686,38 +692,25 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
if (render_targets[i].format == DXGI_FORMAT_UNKNOWN) {
|
||||
break;
|
||||
}
|
||||
uint32_t guest_rt_index = render_targets[i].guest_render_target;
|
||||
uint32_t color_info, blendcontrol;
|
||||
switch (guest_rt_index) {
|
||||
case 1:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32;
|
||||
blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32;
|
||||
break;
|
||||
case 2:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32;
|
||||
blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32;
|
||||
break;
|
||||
case 3:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32;
|
||||
blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32;
|
||||
break;
|
||||
default:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32;
|
||||
blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32;
|
||||
break;
|
||||
}
|
||||
PipelineRenderTarget& rt = description_out.render_targets[i];
|
||||
rt.used = 1;
|
||||
rt.format = RenderTargetCache::GetBaseColorFormat(
|
||||
ColorRenderTargetFormat((color_info >> 16) & 0xF));
|
||||
uint32_t guest_rt_index = render_targets[i].guest_render_target;
|
||||
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
|
||||
reg::RB_COLOR_INFO::rt_register_indices[guest_rt_index]);
|
||||
rt.format =
|
||||
RenderTargetCache::GetBaseColorFormat(color_info.color_format);
|
||||
rt.write_mask = (color_mask >> (guest_rt_index * 4)) & 0xF;
|
||||
if (!(rb_colorcontrol & 0x20) && rt.write_mask) {
|
||||
rt.src_blend = kBlendFactorMap[blendcontrol & 0x1F];
|
||||
rt.dest_blend = kBlendFactorMap[(blendcontrol >> 8) & 0x1F];
|
||||
rt.blend_op = BlendOp((blendcontrol >> 5) & 0x7);
|
||||
rt.src_blend_alpha = kBlendFactorAlphaMap[(blendcontrol >> 16) & 0x1F];
|
||||
rt.dest_blend_alpha = kBlendFactorAlphaMap[(blendcontrol >> 24) & 0x1F];
|
||||
rt.blend_op_alpha = BlendOp((blendcontrol >> 21) & 0x7);
|
||||
if (rt.write_mask) {
|
||||
auto blendcontrol = regs.Get<reg::RB_BLENDCONTROL>(
|
||||
reg::RB_BLENDCONTROL::rt_register_indices[guest_rt_index]);
|
||||
rt.src_blend = kBlendFactorMap[uint32_t(blendcontrol.color_srcblend)];
|
||||
rt.dest_blend = kBlendFactorMap[uint32_t(blendcontrol.color_destblend)];
|
||||
rt.blend_op = blendcontrol.color_comb_fcn;
|
||||
rt.src_blend_alpha =
|
||||
kBlendFactorAlphaMap[uint32_t(blendcontrol.alpha_srcblend)];
|
||||
rt.dest_blend_alpha =
|
||||
kBlendFactorAlphaMap[uint32_t(blendcontrol.alpha_destblend)];
|
||||
rt.blend_op_alpha = blendcontrol.alpha_comb_fcn;
|
||||
} else {
|
||||
rt.src_blend = PipelineBlendFactor::kOne;
|
||||
rt.dest_blend = PipelineBlendFactor::kZero;
|
||||
|
@ -943,15 +936,17 @@ ID3D12PipelineState* PipelineCache::CreatePipelineState(
|
|||
|
||||
if (!edram_rov_used_) {
|
||||
// Depth/stencil.
|
||||
if (description.depth_func != 0b111 || description.depth_write) {
|
||||
if (description.depth_func != CompareFunction::kAlways ||
|
||||
description.depth_write) {
|
||||
state_desc.DepthStencilState.DepthEnable = TRUE;
|
||||
state_desc.DepthStencilState.DepthWriteMask =
|
||||
description.depth_write ? D3D12_DEPTH_WRITE_MASK_ALL
|
||||
: D3D12_DEPTH_WRITE_MASK_ZERO;
|
||||
// Comparison functions are the same in Direct3D 12 but plus one (minus
|
||||
// one, bit 0 for less, bit 1 for equal, bit 2 for greater).
|
||||
state_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC(
|
||||
uint32_t(D3D12_COMPARISON_FUNC_NEVER) + description.depth_func);
|
||||
state_desc.DepthStencilState.DepthFunc =
|
||||
D3D12_COMPARISON_FUNC(uint32_t(D3D12_COMPARISON_FUNC_NEVER) +
|
||||
uint32_t(description.depth_func));
|
||||
}
|
||||
if (description.stencil_enable) {
|
||||
state_desc.DepthStencilState.StencilEnable = TRUE;
|
||||
|
@ -960,26 +955,30 @@ ID3D12PipelineState* PipelineCache::CreatePipelineState(
|
|||
state_desc.DepthStencilState.StencilWriteMask =
|
||||
description.stencil_write_mask;
|
||||
// Stencil operations are the same in Direct3D 12 too but plus one.
|
||||
state_desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP(
|
||||
uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_front_fail_op);
|
||||
state_desc.DepthStencilState.FrontFace.StencilFailOp =
|
||||
D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) +
|
||||
uint32_t(description.stencil_front_fail_op));
|
||||
state_desc.DepthStencilState.FrontFace.StencilDepthFailOp =
|
||||
D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) +
|
||||
description.stencil_front_depth_fail_op);
|
||||
state_desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP(
|
||||
uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_front_pass_op);
|
||||
uint32_t(description.stencil_front_depth_fail_op));
|
||||
state_desc.DepthStencilState.FrontFace.StencilPassOp =
|
||||
D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) +
|
||||
uint32_t(description.stencil_front_pass_op));
|
||||
state_desc.DepthStencilState.FrontFace.StencilFunc =
|
||||
D3D12_COMPARISON_FUNC(uint32_t(D3D12_COMPARISON_FUNC_NEVER) +
|
||||
description.stencil_front_func);
|
||||
state_desc.DepthStencilState.BackFace.StencilFailOp = D3D12_STENCIL_OP(
|
||||
uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_back_fail_op);
|
||||
uint32_t(description.stencil_front_func));
|
||||
state_desc.DepthStencilState.BackFace.StencilFailOp =
|
||||
D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) +
|
||||
uint32_t(description.stencil_back_fail_op));
|
||||
state_desc.DepthStencilState.BackFace.StencilDepthFailOp =
|
||||
D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) +
|
||||
description.stencil_back_depth_fail_op);
|
||||
state_desc.DepthStencilState.BackFace.StencilPassOp = D3D12_STENCIL_OP(
|
||||
uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_back_pass_op);
|
||||
uint32_t(description.stencil_back_depth_fail_op));
|
||||
state_desc.DepthStencilState.BackFace.StencilPassOp =
|
||||
D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) +
|
||||
uint32_t(description.stencil_back_pass_op));
|
||||
state_desc.DepthStencilState.BackFace.StencilFunc =
|
||||
D3D12_COMPARISON_FUNC(uint32_t(D3D12_COMPARISON_FUNC_NEVER) +
|
||||
description.stencil_back_func);
|
||||
uint32_t(description.stencil_back_func));
|
||||
}
|
||||
if (state_desc.DepthStencilState.DepthEnable ||
|
||||
state_desc.DepthStencilState.StencilEnable) {
|
||||
|
|
|
@ -152,26 +152,26 @@ class PipelineCache {
|
|||
uint32_t depth_clip : 1; // 15
|
||||
uint32_t rov_msaa : 1; // 16
|
||||
DepthRenderTargetFormat depth_format : 1; // 17
|
||||
uint32_t depth_func : 3; // 20
|
||||
CompareFunction depth_func : 3; // 20
|
||||
uint32_t depth_write : 1; // 21
|
||||
uint32_t stencil_enable : 1; // 22
|
||||
uint32_t stencil_read_mask : 8; // 30
|
||||
uint32_t force_early_z : 1; // 31
|
||||
|
||||
uint32_t stencil_write_mask : 8; // 8
|
||||
uint32_t stencil_front_fail_op : 3; // 11
|
||||
uint32_t stencil_front_depth_fail_op : 3; // 14
|
||||
uint32_t stencil_front_pass_op : 3; // 17
|
||||
uint32_t stencil_front_func : 3; // 20
|
||||
uint32_t stencil_back_fail_op : 3; // 23
|
||||
uint32_t stencil_back_depth_fail_op : 3; // 26
|
||||
uint32_t stencil_back_pass_op : 3; // 29
|
||||
uint32_t stencil_back_func : 3; // 32
|
||||
uint32_t stencil_write_mask : 8; // 8
|
||||
StencilOp stencil_front_fail_op : 3; // 11
|
||||
StencilOp stencil_front_depth_fail_op : 3; // 14
|
||||
StencilOp stencil_front_pass_op : 3; // 17
|
||||
CompareFunction stencil_front_func : 3; // 20
|
||||
StencilOp stencil_back_fail_op : 3; // 23
|
||||
StencilOp stencil_back_depth_fail_op : 3; // 26
|
||||
StencilOp stencil_back_pass_op : 3; // 29
|
||||
CompareFunction stencil_back_func : 3; // 32
|
||||
|
||||
PipelineRenderTarget render_targets[4];
|
||||
};
|
||||
|
||||
bool TranslateShader(D3D12Shader* shader, xenos::xe_gpu_program_cntl_t cntl,
|
||||
bool TranslateShader(D3D12Shader* shader, reg::SQ_PROGRAM_CNTL cntl,
|
||||
bool tessellated, PrimitiveType primitive_type);
|
||||
|
||||
bool GetCurrentStateDescription(
|
||||
|
|
|
@ -192,7 +192,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
|
|||
D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out, uint32_t& index_count_out) {
|
||||
bool index_32bit = index_format == IndexFormat::kInt32;
|
||||
auto& regs = *register_file_;
|
||||
bool reset = (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21)) != 0;
|
||||
bool reset = regs.Get<reg::PA_SU_SC_MODE_CNTL>().multi_prim_ib_ena;
|
||||
// Swap the reset index because we will be comparing unswapped values to it.
|
||||
uint32_t reset_index = xenos::GpuSwap(
|
||||
regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32, index_endianness);
|
||||
|
|
|
@ -541,16 +541,17 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
|||
|
||||
bool rov_used = command_processor_->IsROVUsedForEDRAM();
|
||||
|
||||
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
||||
uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u);
|
||||
auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
|
||||
uint32_t surface_pitch = std::min(rb_surface_info.surface_pitch, 2560u);
|
||||
if (surface_pitch == 0) {
|
||||
// TODO(Triang3l): Do something if a memexport-only draw has 0 surface
|
||||
// pitch (never seen in any game so far, not sure if even legal).
|
||||
return false;
|
||||
}
|
||||
MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3);
|
||||
uint32_t msaa_samples_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1;
|
||||
uint32_t msaa_samples_y = msaa_samples >= MsaaSamples::k2X ? 2 : 1;
|
||||
uint32_t msaa_samples_x =
|
||||
rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1;
|
||||
uint32_t msaa_samples_y =
|
||||
rb_surface_info.msaa_samples >= MsaaSamples::k2X ? 2 : 1;
|
||||
|
||||
// Extract color/depth info in an unified way.
|
||||
bool enabled[5];
|
||||
|
@ -558,26 +559,27 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
|||
uint32_t formats[5];
|
||||
bool formats_are_64bpp[5];
|
||||
uint32_t color_mask = command_processor_->GetCurrentColorMask(pixel_shader);
|
||||
uint32_t rb_color_info[4] = {
|
||||
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
|
||||
regs[XE_GPU_REG_RB_COLOR2_INFO].u32, regs[XE_GPU_REG_RB_COLOR3_INFO].u32};
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
enabled[i] = (color_mask & (0xF << (i * 4))) != 0;
|
||||
edram_bases[i] = std::min(rb_color_info[i] & 0xFFF, 2048u);
|
||||
formats[i] = uint32_t(GetBaseColorFormat(
|
||||
ColorRenderTargetFormat((rb_color_info[i] >> 16) & 0xF)));
|
||||
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
|
||||
reg::RB_COLOR_INFO::rt_register_indices[i]);
|
||||
edram_bases[i] = std::min(color_info.color_base, 2048u);
|
||||
formats[i] = uint32_t(GetBaseColorFormat(color_info.color_format));
|
||||
formats_are_64bpp[i] =
|
||||
IsColorFormat64bpp(ColorRenderTargetFormat(formats[i]));
|
||||
}
|
||||
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
|
||||
uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
|
||||
auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>();
|
||||
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
|
||||
// 0x1 = stencil test, 0x2 = depth test.
|
||||
enabled[4] = (rb_depthcontrol & (0x1 | 0x2)) != 0;
|
||||
edram_bases[4] = std::min(rb_depth_info & 0xFFF, 2048u);
|
||||
formats[4] = (rb_depth_info >> 16) & 0x1;
|
||||
enabled[4] = rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable;
|
||||
edram_bases[4] = std::min(rb_depth_info.depth_base, 2048u);
|
||||
formats[4] = uint32_t(rb_depth_info.depth_format);
|
||||
formats_are_64bpp[4] = false;
|
||||
// Don't mark depth regions as dirty if not writing the depth.
|
||||
bool depth_readonly = (rb_depthcontrol & (0x1 | 0x4)) == 0;
|
||||
// TODO(Triang3l): Make a common function for checking if stencil writing is
|
||||
// really done?
|
||||
bool depth_readonly =
|
||||
!rb_depthcontrol.stencil_enable && !rb_depthcontrol.z_write_enable;
|
||||
|
||||
bool full_update = false;
|
||||
|
||||
|
@ -590,7 +592,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
|||
// in the beginning of the frame or after resolves by setting the current
|
||||
// pitch to 0.
|
||||
if (current_surface_pitch_ != surface_pitch ||
|
||||
current_msaa_samples_ != msaa_samples) {
|
||||
current_msaa_samples_ != rb_surface_info.msaa_samples) {
|
||||
full_update = true;
|
||||
}
|
||||
|
||||
|
@ -632,26 +634,22 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
|||
|
||||
// Get EDRAM usage of the current draw so dirty regions can be calculated.
|
||||
// See D3D12CommandProcessor::UpdateFixedFunctionState for more info.
|
||||
int16_t window_offset_y =
|
||||
(regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32 >> 16) & 0x7FFF;
|
||||
if (window_offset_y & 0x4000) {
|
||||
window_offset_y |= 0x8000;
|
||||
}
|
||||
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
|
||||
float viewport_scale_y = (pa_cl_vte_cntl & (1 << 2))
|
||||
int32_t window_offset_y =
|
||||
regs.Get<reg::PA_SC_WINDOW_OFFSET>().window_y_offset;
|
||||
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
||||
float viewport_scale_y = pa_cl_vte_cntl.vport_y_scale_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
|
||||
: 1280.0f;
|
||||
float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3))
|
||||
float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
|
||||
: std::abs(viewport_scale_y);
|
||||
if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) {
|
||||
if (regs.Get<reg::PA_SU_SC_MODE_CNTL>().vtx_window_offset_enable) {
|
||||
viewport_offset_y += float(window_offset_y);
|
||||
}
|
||||
uint32_t viewport_bottom = uint32_t(std::max(
|
||||
0.0f, std::ceil(viewport_offset_y + std::abs(viewport_scale_y))));
|
||||
uint32_t scissor_bottom =
|
||||
(regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32 >> 16) & 0x7FFF;
|
||||
if (!(regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32 & (1u << 31))) {
|
||||
uint32_t scissor_bottom = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>().br_y;
|
||||
if (!regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>().window_offset_disable) {
|
||||
scissor_bottom = std::max(int32_t(scissor_bottom) + window_offset_y, 0);
|
||||
}
|
||||
uint32_t dirty_bottom =
|
||||
|
@ -769,7 +767,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
|||
|
||||
ClearBindings();
|
||||
current_surface_pitch_ = surface_pitch;
|
||||
current_msaa_samples_ = msaa_samples;
|
||||
current_msaa_samples_ = rb_surface_info.msaa_samples;
|
||||
if (!rov_used) {
|
||||
current_edram_max_rows_ = edram_max_rows;
|
||||
}
|
||||
|
@ -801,8 +799,8 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
|||
#endif
|
||||
}
|
||||
XELOGGPU("RT Cache: %s update - pitch %u, samples %u, RTs to attach %u",
|
||||
full_update ? "Full" : "Partial", surface_pitch, msaa_samples,
|
||||
render_targets_to_attach);
|
||||
full_update ? "Full" : "Partial", surface_pitch,
|
||||
rb_surface_info.msaa_samples, render_targets_to_attach);
|
||||
|
||||
#if 0
|
||||
auto device =
|
||||
|
@ -891,7 +889,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
|||
if (!rov_used) {
|
||||
// Sample positions when loading depth must match sample positions when
|
||||
// drawing.
|
||||
command_processor_->SetSamplePositions(msaa_samples);
|
||||
command_processor_->SetSamplePositions(rb_surface_info.msaa_samples);
|
||||
|
||||
// Load the contents of the new render targets from the EDRAM buffer (will
|
||||
// change the state of the render targets to copy destination).
|
||||
|
@ -1007,18 +1005,14 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
|
|||
auto& regs = *register_file_;
|
||||
|
||||
// Get the render target properties.
|
||||
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
||||
uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u);
|
||||
auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
|
||||
uint32_t surface_pitch = std::min(rb_surface_info.surface_pitch, 2560u);
|
||||
if (surface_pitch == 0) {
|
||||
return true;
|
||||
}
|
||||
MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3);
|
||||
uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
|
||||
// Depth info is always needed because color resolve may also clear depth.
|
||||
uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
|
||||
uint32_t depth_edram_base = rb_depth_info & 0xFFF;
|
||||
uint32_t depth_format = (rb_depth_info >> 16) & 0x1;
|
||||
uint32_t surface_index = rb_copy_control & 0x7;
|
||||
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
|
||||
uint32_t surface_index = regs.Get<reg::RB_COPY_CONTROL>().copy_src_select;
|
||||
if (surface_index > 4) {
|
||||
assert_always();
|
||||
return false;
|
||||
|
@ -1027,43 +1021,28 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
|
|||
uint32_t surface_edram_base;
|
||||
uint32_t surface_format;
|
||||
if (surface_is_depth) {
|
||||
surface_edram_base = depth_edram_base;
|
||||
surface_format = depth_format;
|
||||
surface_edram_base = rb_depth_info.depth_base;
|
||||
surface_format = uint32_t(rb_depth_info.depth_format);
|
||||
} else {
|
||||
uint32_t rb_color_info;
|
||||
switch (surface_index) {
|
||||
case 1:
|
||||
rb_color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32;
|
||||
break;
|
||||
case 2:
|
||||
rb_color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32;
|
||||
break;
|
||||
case 3:
|
||||
rb_color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32;
|
||||
break;
|
||||
default:
|
||||
rb_color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32;
|
||||
break;
|
||||
}
|
||||
surface_edram_base = rb_color_info & 0xFFF;
|
||||
surface_format = uint32_t(GetBaseColorFormat(
|
||||
ColorRenderTargetFormat((rb_color_info >> 16) & 0xF)));
|
||||
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
|
||||
reg::RB_COLOR_INFO::rt_register_indices[surface_index]);
|
||||
surface_edram_base = color_info.color_base;
|
||||
surface_format = uint32_t(GetBaseColorFormat(color_info.color_format));
|
||||
}
|
||||
|
||||
// Get the resolve region since both copying and clearing need it.
|
||||
// HACK: Vertices to use are always in vf0.
|
||||
auto fetch_group = reinterpret_cast<const xenos::xe_gpu_fetch_group_t*>(
|
||||
®s.values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0]);
|
||||
const auto& fetch = fetch_group->vertex_fetch_0;
|
||||
const auto& fetch = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
|
||||
assert_true(fetch.type == 3);
|
||||
assert_true(fetch.endian == 2);
|
||||
assert_true(fetch.endian == Endian::k8in32);
|
||||
assert_true(fetch.size == 6);
|
||||
const uint8_t* src_vertex_address =
|
||||
memory->TranslatePhysical(fetch.address << 2);
|
||||
float vertices[6];
|
||||
// Most vertices have a negative half pixel offset applied, which we reverse.
|
||||
float vertex_offset =
|
||||
(regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32 & 0x1) ? 0.0f : 0.5f;
|
||||
regs.Get<reg::PA_SU_VTX_CNTL>().pix_center ? 0.0f : 0.5f;
|
||||
for (uint32_t i = 0; i < 6; ++i) {
|
||||
vertices[i] =
|
||||
xenos::GpuSwap(xe::load<float>(src_vertex_address + i * sizeof(float)),
|
||||
|
@ -1097,39 +1076,34 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
|
|||
// vertices (-640,0)->(640,720), however, the destination texture pointer is
|
||||
// adjusted properly to the right half of the texture, and the source render
|
||||
// target has a pitch of 800).
|
||||
auto pa_sc_window_offset = regs.Get<reg::PA_SC_WINDOW_OFFSET>();
|
||||
D3D12_RECT rect;
|
||||
rect.left = LONG(std::min(std::min(vertices[0], vertices[2]), vertices[4]));
|
||||
rect.right = LONG(std::max(std::max(vertices[0], vertices[2]), vertices[4]));
|
||||
rect.top = LONG(std::min(std::min(vertices[1], vertices[3]), vertices[5]));
|
||||
rect.bottom = LONG(std::max(std::max(vertices[1], vertices[3]), vertices[5]));
|
||||
if (regs.Get<reg::PA_SU_SC_MODE_CNTL>().vtx_window_offset_enable) {
|
||||
rect.left += pa_sc_window_offset.window_x_offset;
|
||||
rect.right += pa_sc_window_offset.window_x_offset;
|
||||
rect.top += pa_sc_window_offset.window_y_offset;
|
||||
rect.bottom += pa_sc_window_offset.window_y_offset;
|
||||
}
|
||||
D3D12_RECT scissor;
|
||||
uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
|
||||
uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
|
||||
scissor.left = LONG(window_scissor_tl & 0x7FFF);
|
||||
scissor.right = LONG(window_scissor_br & 0x7FFF);
|
||||
scissor.top = LONG((window_scissor_tl >> 16) & 0x7FFF);
|
||||
scissor.bottom = LONG((window_scissor_br >> 16) & 0x7FFF);
|
||||
if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) {
|
||||
uint32_t pa_sc_window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32;
|
||||
int16_t window_offset_x = pa_sc_window_offset & 0x7FFF;
|
||||
int16_t window_offset_y = (pa_sc_window_offset >> 16) & 0x7FFF;
|
||||
if (window_offset_x & 0x4000) {
|
||||
window_offset_x |= 0x8000;
|
||||
}
|
||||
if (window_offset_y & 0x4000) {
|
||||
window_offset_y |= 0x8000;
|
||||
}
|
||||
rect.left += window_offset_x;
|
||||
rect.right += window_offset_x;
|
||||
rect.top += window_offset_y;
|
||||
rect.bottom += window_offset_y;
|
||||
if (!(window_scissor_tl & (1u << 31))) {
|
||||
scissor.left = std::max(LONG(scissor.left + window_offset_x), LONG(0));
|
||||
scissor.right = std::max(LONG(scissor.right + window_offset_x), LONG(0));
|
||||
scissor.top = std::max(LONG(scissor.top + window_offset_y), LONG(0));
|
||||
scissor.bottom =
|
||||
std::max(LONG(scissor.bottom + window_offset_y), LONG(0));
|
||||
}
|
||||
auto pa_sc_window_scissor_tl = regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>();
|
||||
auto pa_sc_window_scissor_br = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>();
|
||||
scissor.left = pa_sc_window_scissor_tl.tl_x;
|
||||
scissor.right = pa_sc_window_scissor_br.br_x;
|
||||
scissor.top = pa_sc_window_scissor_tl.tl_y;
|
||||
scissor.bottom = pa_sc_window_scissor_br.br_y;
|
||||
if (!pa_sc_window_scissor_tl.window_offset_disable) {
|
||||
scissor.left = std::max(
|
||||
LONG(scissor.left + pa_sc_window_offset.window_x_offset), LONG(0));
|
||||
scissor.right = std::max(
|
||||
LONG(scissor.right + pa_sc_window_offset.window_x_offset), LONG(0));
|
||||
scissor.top = std::max(
|
||||
LONG(scissor.top + pa_sc_window_offset.window_y_offset), LONG(0));
|
||||
scissor.bottom = std::max(
|
||||
LONG(scissor.bottom + pa_sc_window_offset.window_y_offset), LONG(0));
|
||||
}
|
||||
rect.left = std::max(rect.left, scissor.left);
|
||||
rect.right = std::min(rect.right, scissor.right);
|
||||
|
@ -1140,9 +1114,9 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
|
|||
"Resolve: (%d,%d)->(%d,%d) of RT %u (pitch %u, %u sample%s, format %u) "
|
||||
"at %u",
|
||||
rect.left, rect.top, rect.right, rect.bottom, surface_index,
|
||||
surface_pitch, 1 << uint32_t(msaa_samples),
|
||||
msaa_samples != MsaaSamples::k1X ? "s" : "", surface_format,
|
||||
surface_edram_base);
|
||||
surface_pitch, 1 << uint32_t(rb_surface_info.msaa_samples),
|
||||
rb_surface_info.msaa_samples != MsaaSamples::k1X ? "s" : "",
|
||||
surface_format, surface_edram_base);
|
||||
|
||||
if (rect.left >= rect.right || rect.top >= rect.bottom) {
|
||||
// Nothing to copy.
|
||||
|
@ -1157,18 +1131,20 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
|
|||
// GetEDRAMLayout in ResolveCopy and ResolveClear will perform the needed
|
||||
// clamping to the source render target size.
|
||||
|
||||
bool result =
|
||||
ResolveCopy(shared_memory, texture_cache, surface_edram_base,
|
||||
surface_pitch, msaa_samples, surface_is_depth, surface_format,
|
||||
rect, written_address_out, written_length_out);
|
||||
bool result = ResolveCopy(shared_memory, texture_cache, surface_edram_base,
|
||||
surface_pitch, rb_surface_info.msaa_samples,
|
||||
surface_is_depth, surface_format, rect,
|
||||
written_address_out, written_length_out);
|
||||
// Clear the color RT if needed.
|
||||
if (!surface_is_depth) {
|
||||
result &= ResolveClear(surface_edram_base, surface_pitch, msaa_samples,
|
||||
false, surface_format, rect);
|
||||
result &=
|
||||
ResolveClear(surface_edram_base, surface_pitch,
|
||||
rb_surface_info.msaa_samples, false, surface_format, rect);
|
||||
}
|
||||
// Clear the depth RT if needed (may be cleared alongside color).
|
||||
result &= ResolveClear(depth_edram_base, surface_pitch, msaa_samples, true,
|
||||
depth_format, rect);
|
||||
result &= ResolveClear(rb_depth_info.depth_base, surface_pitch,
|
||||
rb_surface_info.msaa_samples, true,
|
||||
uint32_t(rb_depth_info.depth_format), rect);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -1183,19 +1159,18 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
|
||||
auto& regs = *register_file_;
|
||||
|
||||
uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
|
||||
xenos::CopyCommand copy_command =
|
||||
xenos::CopyCommand((rb_copy_control >> 20) & 0x3);
|
||||
if (copy_command != xenos::CopyCommand::kRaw &&
|
||||
copy_command != xenos::CopyCommand::kConvert) {
|
||||
auto rb_copy_control = regs.Get<reg::RB_COPY_CONTROL>();
|
||||
if (rb_copy_control.copy_command != xenos::CopyCommand::kRaw &&
|
||||
rb_copy_control.copy_command != xenos::CopyCommand::kConvert) {
|
||||
// TODO(Triang3l): Handle kConstantOne and kNull.
|
||||
assert_always();
|
||||
return false;
|
||||
}
|
||||
|
||||
auto command_list = command_processor_->GetDeferredCommandList();
|
||||
|
||||
// Get format info.
|
||||
uint32_t rb_copy_dest_info = regs[XE_GPU_REG_RB_COPY_DEST_INFO].u32;
|
||||
auto rb_copy_dest_info = regs.Get<reg::RB_COPY_DEST_INFO>();
|
||||
TextureFormat src_texture_format;
|
||||
bool src_64bpp;
|
||||
if (is_depth) {
|
||||
|
@ -1222,14 +1197,15 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
// The destination format is specified as k_8_8_8_8 when resolving depth, but
|
||||
// no format conversion is done for depth, so ignore it.
|
||||
TextureFormat dest_format =
|
||||
is_depth ? src_texture_format
|
||||
: GetBaseFormat(TextureFormat((rb_copy_dest_info >> 7) & 0x3F));
|
||||
is_depth
|
||||
? src_texture_format
|
||||
: GetBaseFormat(TextureFormat(rb_copy_dest_info.copy_dest_format));
|
||||
const FormatInfo* dest_format_info = FormatInfo::Get(dest_format);
|
||||
|
||||
// Get the destination region and clamp the source region to it.
|
||||
uint32_t rb_copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32;
|
||||
uint32_t dest_pitch = rb_copy_dest_pitch & 0x3FFF;
|
||||
uint32_t dest_height = (rb_copy_dest_pitch >> 16) & 0x3FFF;
|
||||
auto rb_copy_dest_pitch = regs.Get<reg::RB_COPY_DEST_PITCH>();
|
||||
uint32_t dest_pitch = rb_copy_dest_pitch.copy_dest_pitch;
|
||||
uint32_t dest_height = rb_copy_dest_pitch.copy_dest_height;
|
||||
if (dest_pitch == 0 || dest_height == 0) {
|
||||
// Nothing to copy.
|
||||
return true;
|
||||
|
@ -1263,8 +1239,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
uint32_t dest_address = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32 & 0x1FFFFFFF;
|
||||
// An example of a 3D resolve destination is the color grading LUT (used
|
||||
// starting from the developer/publisher intro) in Dead Space 3.
|
||||
bool dest_3d = (rb_copy_dest_info & (1 << 3)) != 0;
|
||||
if (dest_3d) {
|
||||
if (rb_copy_dest_info.copy_dest_array) {
|
||||
dest_address += texture_util::GetTiledOffset3D(
|
||||
int(rect.left & ~LONG(31)), int(rect.top & ~LONG(31)), 0, dest_pitch,
|
||||
dest_height, xe::log2_floor(dest_format_info->bits_per_pixel >> 3));
|
||||
|
@ -1279,21 +1254,20 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
// resolve to 8bpp or 16bpp textures at very odd locations.
|
||||
return false;
|
||||
}
|
||||
uint32_t dest_z = dest_3d ? ((rb_copy_dest_info >> 4) & 0x7) : 0;
|
||||
uint32_t dest_z =
|
||||
rb_copy_dest_info.copy_dest_array ? rb_copy_dest_info.copy_dest_slice : 0;
|
||||
|
||||
// See what samples we need and what we should do with them.
|
||||
xenos::CopySampleSelect sample_select =
|
||||
xenos::CopySampleSelect((rb_copy_control >> 4) & 0x7);
|
||||
xenos::CopySampleSelect sample_select = rb_copy_control.copy_sample_select;
|
||||
if (is_depth && sample_select > xenos::CopySampleSelect::k3) {
|
||||
assert_always();
|
||||
return false;
|
||||
}
|
||||
Endian128 dest_endian = Endian128(rb_copy_dest_info & 0x7);
|
||||
int32_t dest_exp_bias;
|
||||
if (is_depth) {
|
||||
dest_exp_bias = 0;
|
||||
} else {
|
||||
dest_exp_bias = int32_t((rb_copy_dest_info >> 16) << 26) >> 26;
|
||||
dest_exp_bias = rb_copy_dest_info.copy_dest_exp_bias;
|
||||
if (ColorRenderTargetFormat(src_format) ==
|
||||
ColorRenderTargetFormat::k_16_16 ||
|
||||
ColorRenderTargetFormat(src_format) ==
|
||||
|
@ -1309,14 +1283,14 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
}
|
||||
}
|
||||
}
|
||||
bool dest_swap = !is_depth && ((rb_copy_dest_info >> 24) & 0x1);
|
||||
bool dest_swap = !is_depth && rb_copy_dest_info.copy_dest_swap;
|
||||
|
||||
XELOGGPU(
|
||||
"Resolve: Copying samples %u to 0x%.8X (%ux%u, %cD), destination Z %u, "
|
||||
"destination format %s, exponent bias %d, red and blue %sswapped",
|
||||
uint32_t(sample_select), dest_address, dest_pitch, dest_height,
|
||||
dest_3d ? '3' : '2', dest_z, dest_format_info->name, dest_exp_bias,
|
||||
dest_swap ? "" : "not ");
|
||||
rb_copy_dest_info.copy_dest_array ? '3' : '2', dest_z,
|
||||
dest_format_info->name, dest_exp_bias, dest_swap ? "" : "not ");
|
||||
|
||||
// There are 2 paths for resolving in this function - they don't necessarily
|
||||
// have to map directly to kRaw and kConvert CopyCommands.
|
||||
|
@ -1344,7 +1318,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
resolution_scale_2x_ &&
|
||||
cvars::d3d12_resolution_scale_resolve_edge_clamp &&
|
||||
cvars::d3d12_half_pixel_offset &&
|
||||
!(regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32 & 0x1);
|
||||
!regs.Get<reg::PA_SU_VTX_CNTL>().pix_center;
|
||||
if (sample_select <= xenos::CopySampleSelect::k3 &&
|
||||
src_texture_format == dest_format && dest_exp_bias == 0) {
|
||||
// *************************************************************************
|
||||
|
@ -1363,7 +1337,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
uint32_t dest_size;
|
||||
uint32_t dest_modified_start = dest_address;
|
||||
uint32_t dest_modified_length;
|
||||
if (dest_3d) {
|
||||
if (rb_copy_dest_info.copy_dest_array) {
|
||||
// Depth granularity is 4 (though TiledAddress chaining is possible with 8
|
||||
// granularity).
|
||||
dest_size = texture_util::GetGuestMipSliceStorageSize(
|
||||
|
@ -1442,8 +1416,10 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
assert_true(dest_pitch <= 8192);
|
||||
root_constants.tile_sample_dest_info =
|
||||
((dest_pitch + 31) >> 5) |
|
||||
(dest_3d ? (((dest_height + 31) >> 5) << 9) : 0) |
|
||||
(uint32_t(sample_select) << 18) | (uint32_t(dest_endian) << 20);
|
||||
(rb_copy_dest_info.copy_dest_array ? (((dest_height + 31) >> 5) << 9)
|
||||
: 0) |
|
||||
(uint32_t(sample_select) << 18) |
|
||||
(uint32_t(rb_copy_dest_info.copy_dest_endian) << 20);
|
||||
if (dest_swap) {
|
||||
root_constants.tile_sample_dest_info |= (1 << 23) | (src_format << 24);
|
||||
}
|
||||
|
@ -1797,10 +1773,12 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
copy_buffer_state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
|
||||
// dest_address already adjusted, so offsets are & 31.
|
||||
texture_cache->TileResolvedTexture(
|
||||
dest_format, dest_address, dest_pitch, dest_height, dest_3d,
|
||||
uint32_t(rect.left) & 31, uint32_t(rect.top) & 31, dest_z, copy_width,
|
||||
copy_height, dest_endian, copy_buffer, resolve_target->copy_buffer_size,
|
||||
resolve_target->footprint, &written_address_out, &written_length_out);
|
||||
dest_format, dest_address, dest_pitch, dest_height,
|
||||
rb_copy_dest_info.copy_dest_array != 0, uint32_t(rect.left) & 31,
|
||||
uint32_t(rect.top) & 31, dest_z, copy_width, copy_height,
|
||||
rb_copy_dest_info.copy_dest_endian, copy_buffer,
|
||||
resolve_target->copy_buffer_size, resolve_target->footprint,
|
||||
&written_address_out, &written_length_out);
|
||||
|
||||
// Done with the copy buffer.
|
||||
|
||||
|
@ -1817,9 +1795,15 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base,
|
|||
auto& regs = *register_file_;
|
||||
|
||||
// Check if clearing is enabled.
|
||||
uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
|
||||
if (!(rb_copy_control & (is_depth ? (1 << 9) : (1 << 8)))) {
|
||||
return true;
|
||||
auto rb_copy_control = regs.Get<reg::RB_COPY_CONTROL>();
|
||||
if (is_depth) {
|
||||
if (!rb_copy_control.depth_clear_enable) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if (!rb_copy_control.color_clear_enable) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
XELOGGPU("Resolve: Clearing the %s render target",
|
||||
|
@ -1886,7 +1870,7 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base,
|
|||
} else if (is_64bpp) {
|
||||
// TODO(Triang3l): Check which 32-bit portion is in which register.
|
||||
root_constants.clear_color_high = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32;
|
||||
root_constants.clear_color_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32;
|
||||
root_constants.clear_color_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32;
|
||||
command_processor_->SetComputePipeline(edram_clear_64bpp_pipeline_);
|
||||
} else {
|
||||
Register reg =
|
||||
|
|
|
@ -34,7 +34,6 @@ namespace d3d12 {
|
|||
|
||||
constexpr uint32_t SharedMemory::kBufferSizeLog2;
|
||||
constexpr uint32_t SharedMemory::kBufferSize;
|
||||
constexpr uint32_t SharedMemory::kAddressMask;
|
||||
constexpr uint32_t SharedMemory::kHeapSizeLog2;
|
||||
constexpr uint32_t SharedMemory::kHeapSize;
|
||||
constexpr uint32_t SharedMemory::kWatchBucketSizeLog2;
|
||||
|
@ -198,10 +197,9 @@ void SharedMemory::UnregisterGlobalWatch(GlobalWatchHandle handle) {
|
|||
SharedMemory::WatchHandle SharedMemory::WatchMemoryRange(
|
||||
uint32_t start, uint32_t length, WatchCallback callback,
|
||||
void* callback_context, void* callback_data, uint64_t callback_argument) {
|
||||
if (length == 0) {
|
||||
if (length == 0 || start >= kBufferSize) {
|
||||
return nullptr;
|
||||
}
|
||||
start &= kAddressMask;
|
||||
length = std::min(length, kBufferSize - start);
|
||||
uint32_t watch_page_first = start >> page_size_log2_;
|
||||
uint32_t watch_page_last = (start + length - 1) >> page_size_log2_;
|
||||
|
@ -278,9 +276,7 @@ bool SharedMemory::MakeTilesResident(uint32_t start, uint32_t length) {
|
|||
// Some texture is empty, for example - safe to draw in this case.
|
||||
return true;
|
||||
}
|
||||
start &= kAddressMask;
|
||||
if ((kBufferSize - start) < length) {
|
||||
// Exceeds the physical address space.
|
||||
if (start > kBufferSize || (kBufferSize - start) < length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -343,9 +339,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) {
|
|||
// Some texture is empty, for example - safe to draw in this case.
|
||||
return true;
|
||||
}
|
||||
start &= kAddressMask;
|
||||
if ((kBufferSize - start) < length) {
|
||||
// Exceeds the physical address space.
|
||||
if (start > kBufferSize || (kBufferSize - start) < length) {
|
||||
return false;
|
||||
}
|
||||
uint32_t last = start + length - 1;
|
||||
|
@ -433,8 +427,7 @@ void SharedMemory::FireWatches(uint32_t page_first, uint32_t page_last,
|
|||
}
|
||||
|
||||
void SharedMemory::RangeWrittenByGPU(uint32_t start, uint32_t length) {
|
||||
start &= kAddressMask;
|
||||
if (length == 0) {
|
||||
if (length == 0 || start >= kBufferSize) {
|
||||
return;
|
||||
}
|
||||
length = std::min(length, kBufferSize - start);
|
||||
|
|
|
@ -138,7 +138,6 @@ class SharedMemory {
|
|||
// The 512 MB tiled buffer.
|
||||
static constexpr uint32_t kBufferSizeLog2 = 29;
|
||||
static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2;
|
||||
static constexpr uint32_t kAddressMask = kBufferSize - 1;
|
||||
ID3D12Resource* buffer_ = nullptr;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address_ = 0;
|
||||
D3D12_RESOURCE_STATES buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -266,13 +266,8 @@ class TextureCache {
|
|||
DXGI_FORMAT dxgi_format_resolve_tile;
|
||||
ResolveTileMode resolve_tile_mode;
|
||||
|
||||
// Whether the red component must be replicated in the SRV swizzle, for
|
||||
// single-component formats. At least for DXT3A/DXT5A, this is according to
|
||||
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
||||
// k_8 is also used with RGBA swizzle, but assumes replicated components, in
|
||||
// Halo 3 sprites, thus it appears that all single-component formats should
|
||||
// have RRRR swizzle.
|
||||
bool replicate_component;
|
||||
// Mapping of Xenos swizzle components to DXGI format components.
|
||||
uint8_t swizzle[4];
|
||||
};
|
||||
|
||||
union TextureKey {
|
||||
|
|
|
@ -416,9 +416,13 @@ void DxbcShaderTranslator::ConvertPWLGamma(
|
|||
}
|
||||
|
||||
void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
|
||||
if (register_count() < 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Vertex index is in an input bound to SV_VertexID, byte swapped according to
|
||||
// xe_vertex_index_endian_and_edge_factors system constant and written to GPR
|
||||
// 0 (which is always present because register_count includes +1).
|
||||
// xe_vertex_index_endian_and_edge_factors system constant and written to
|
||||
// GPR 0.
|
||||
|
||||
// xe_vertex_index_endian_and_edge_factors & 0b11 is:
|
||||
// - 00 for no swap.
|
||||
|
@ -756,157 +760,161 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
|||
// Write the vertex index to GPR 0.
|
||||
StartVertexShader_LoadVertexIndex();
|
||||
} else if (IsDxbcDomainShader()) {
|
||||
uint32_t temp_register_operand_length =
|
||||
uses_register_dynamic_addressing() ? 3 : 2;
|
||||
|
||||
// Copy the domain location to r0.yz (for quad patches) or r0.xyz (for
|
||||
// triangle patches), and also set the domain in STAT.
|
||||
uint32_t domain_location_mask, domain_location_swizzle;
|
||||
if (patch_primitive_type() == PrimitiveType::kTrianglePatch) {
|
||||
domain_location_mask = 0b0111;
|
||||
// ZYX swizzle with r1.y == 0, according to the water shader in
|
||||
// Banjo-Kazooie: Nuts & Bolts.
|
||||
domain_location_swizzle = 0b00000110;
|
||||
stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_TRI;
|
||||
} else {
|
||||
// TODO(Triang3l): Support line patches.
|
||||
assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch);
|
||||
// According to the ground shader in Viva Pinata, though it's impossible
|
||||
// (as of December 12th, 2018) to test there since it possibly requires
|
||||
// memexport for ground control points (the memory region with them is
|
||||
// filled with zeros).
|
||||
domain_location_mask = 0b0110;
|
||||
domain_location_swizzle = 0b00000100;
|
||||
stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_QUAD;
|
||||
}
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||
2 + temp_register_operand_length));
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, domain_location_mask, 2));
|
||||
shader_code_.push_back(0);
|
||||
} else {
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, domain_location_mask, 1));
|
||||
}
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT, domain_location_swizzle, 0));
|
||||
++stat_.instruction_count;
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
++stat_.array_instruction_count;
|
||||
} else {
|
||||
++stat_.mov_instruction_count;
|
||||
}
|
||||
|
||||
assert_true(register_count() >= 2);
|
||||
if (register_count() != 0) {
|
||||
uint32_t temp_register_operand_length =
|
||||
uses_register_dynamic_addressing() ? 3 : 2;
|
||||
|
||||
// Copy the primitive index to r0.x (for quad patches) or r1.x (for
|
||||
// triangle patches) as a float.
|
||||
// When using indexable temps, copy through a r# because x# are apparently
|
||||
// only accessible via mov.
|
||||
// TODO(Triang3l): Investigate what should be written for primitives (or
|
||||
// even control points) for non-adaptive tessellation modes (they may
|
||||
// possibly have an index buffer).
|
||||
// TODO(Triang3l): Support line patches.
|
||||
uint32_t primitive_id_gpr_index =
|
||||
patch_primitive_type() == PrimitiveType::kTrianglePatch ? 1 : 0;
|
||||
|
||||
if (register_count() > primitive_id_gpr_index) {
|
||||
uint32_t primitive_id_temp = uses_register_dynamic_addressing()
|
||||
? PushSystemTemp()
|
||||
: primitive_id_gpr_index;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UTOF) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(primitive_id_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID, 0));
|
||||
++stat_.instruction_count;
|
||||
++stat_.conversion_instruction_count;
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, 0b0001, 2));
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(primitive_id_gpr_index);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(primitive_id_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.array_instruction_count;
|
||||
// Release primitive_id_temp.
|
||||
PopSystemTemp();
|
||||
// Copy the domain location to r0.yz (for quad patches) or r0.xyz (for
|
||||
// triangle patches), and also set the domain in STAT.
|
||||
uint32_t domain_location_mask, domain_location_swizzle;
|
||||
if (patch_primitive_type() == PrimitiveType::kTrianglePatch) {
|
||||
domain_location_mask = 0b0111;
|
||||
// ZYX swizzle with r1.y == 0, according to the water shader in
|
||||
// Banjo-Kazooie: Nuts & Bolts.
|
||||
domain_location_swizzle = 0b00000110;
|
||||
stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_TRI;
|
||||
} else {
|
||||
// TODO(Triang3l): Support line patches.
|
||||
assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch);
|
||||
// According to the ground shader in Viva Pinata, though it's impossible
|
||||
// (as of December 12th, 2018) to test there since it possibly requires
|
||||
// memexport for ground control points (the memory region with them is
|
||||
// filled with zeros).
|
||||
domain_location_mask = 0b0110;
|
||||
domain_location_swizzle = 0b00000100;
|
||||
stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_QUAD;
|
||||
}
|
||||
}
|
||||
|
||||
if (register_count() >= 2) {
|
||||
// Write the swizzle of the barycentric/UV coordinates to r1.x (for quad
|
||||
// patches) or r1.y (for triangle patches). It appears that the
|
||||
// tessellator offloads the reordering of coordinates for edges to game
|
||||
// shaders.
|
||||
//
|
||||
// In Banjo-Kazooie: Nuts & Bolts (triangle patches with per-edge
|
||||
// factors), the shader multiplies the first control point's position by
|
||||
// r0.z, the second CP's by r0.y, and the third CP's by r0.x. But before
|
||||
// doing that it swizzles r0.xyz the following way depending on the value
|
||||
// in r1.y:
|
||||
// - ZXY for 1.0.
|
||||
// - YZX for 2.0.
|
||||
// - XZY for 4.0.
|
||||
// - YXZ for 5.0.
|
||||
// - ZYX for 6.0.
|
||||
// Possibly, the logic here is that the value itself is the amount of
|
||||
// rotation of the swizzle to the right, and 1 << 2 is set when the
|
||||
// swizzle needs to be flipped before rotating.
|
||||
//
|
||||
// In Viva Pinata (quad patches with per-edge factors - not possible to
|
||||
// test however as of December 12th, 2018), if we assume that r0.y is V
|
||||
// and r0.z is U, the factors each control point value is multiplied by
|
||||
// are the following:
|
||||
// - (1-v)*(1-u), v*(1-u), (1-v)*u, v*u for 0.0 (base swizzle).
|
||||
// - v*(1-u), (1-v)*(1-u), v*u, (1-v)*u for 1.0 (YXWZ).
|
||||
// - v*u, (1-v)*u, v*(1-u), (1-v)*(1-u) for 2.0 (WZYX).
|
||||
// - (1-v)*u, v*u, (1-v)*(1-u), v*(1-u) for 3.0 (ZWXY).
|
||||
// According to the control point order at
|
||||
// https://www.khronos.org/registry/OpenGL/extensions/AMD/AMD_vertex_shader_tessellator.txt
|
||||
// the first is located at (0,0), the second at (0,1), the third at (1,0)
|
||||
// and the fourth at (1,1). So, swizzle index 0 appears to be the correct
|
||||
// one. But, this hasn't been tested yet.
|
||||
//
|
||||
// Direct3D 12 appears to be passing the coordinates in a consistent
|
||||
// order, so we can just use ZYX for triangle patches.
|
||||
//
|
||||
// TODO(Triang3l): Support line patches.
|
||||
uint32_t domain_location_swizzle_mask =
|
||||
patch_primitive_type() == PrimitiveType::kTrianglePatch ? 0b0010
|
||||
: 0b0001;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||
3 + temp_register_operand_length));
|
||||
2 + temp_register_operand_length));
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP,
|
||||
domain_location_swizzle_mask, 2));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, domain_location_mask, 2));
|
||||
shader_code_.push_back(0);
|
||||
} else {
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, domain_location_swizzle_mask, 1));
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, domain_location_mask, 1));
|
||||
}
|
||||
shader_code_.push_back(1);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT,
|
||||
domain_location_swizzle, 0));
|
||||
++stat_.instruction_count;
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
++stat_.array_instruction_count;
|
||||
} else {
|
||||
++stat_.mov_instruction_count;
|
||||
}
|
||||
|
||||
// Copy the primitive index to r0.x (for quad patches) or r1.x (for
|
||||
// triangle patches) as a float.
|
||||
// When using indexable temps, copy through a r# because x# are apparently
|
||||
// only accessible via mov.
|
||||
// TODO(Triang3l): Investigate what should be written for primitives (or
|
||||
// even control points) for non-adaptive tessellation modes (they may
|
||||
// possibly have an index buffer).
|
||||
// TODO(Triang3l): Support line patches.
|
||||
uint32_t primitive_id_gpr_index =
|
||||
patch_primitive_type() == PrimitiveType::kTrianglePatch ? 1 : 0;
|
||||
|
||||
if (register_count() > primitive_id_gpr_index) {
|
||||
uint32_t primitive_id_temp = uses_register_dynamic_addressing()
|
||||
? PushSystemTemp()
|
||||
: primitive_id_gpr_index;
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UTOF) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(primitive_id_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID, 0));
|
||||
++stat_.instruction_count;
|
||||
++stat_.conversion_instruction_count;
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, 0b0001, 2));
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(primitive_id_gpr_index);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(primitive_id_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.array_instruction_count;
|
||||
// Release primitive_id_temp.
|
||||
PopSystemTemp();
|
||||
}
|
||||
}
|
||||
|
||||
if (register_count() >= 2) {
|
||||
// Write the swizzle of the barycentric/UV coordinates to r1.x (for quad
|
||||
// patches) or r1.y (for triangle patches). It appears that the
|
||||
// tessellator offloads the reordering of coordinates for edges to game
|
||||
// shaders.
|
||||
//
|
||||
// In Banjo-Kazooie: Nuts & Bolts (triangle patches with per-edge
|
||||
// factors), the shader multiplies the first control point's position by
|
||||
// r0.z, the second CP's by r0.y, and the third CP's by r0.x. But before
|
||||
// doing that it swizzles r0.xyz the following way depending on the
|
||||
// value in r1.y:
|
||||
// - ZXY for 1.0.
|
||||
// - YZX for 2.0.
|
||||
// - XZY for 4.0.
|
||||
// - YXZ for 5.0.
|
||||
// - ZYX for 6.0.
|
||||
// Possibly, the logic here is that the value itself is the amount of
|
||||
// rotation of the swizzle to the right, and 1 << 2 is set when the
|
||||
// swizzle needs to be flipped before rotating.
|
||||
//
|
||||
// In Viva Pinata (quad patches with per-edge factors - not possible to
|
||||
// test however as of December 12th, 2018), if we assume that r0.y is V
|
||||
// and r0.z is U, the factors each control point value is multiplied by
|
||||
// are the following:
|
||||
// - (1-v)*(1-u), v*(1-u), (1-v)*u, v*u for 0.0 (base swizzle).
|
||||
// - v*(1-u), (1-v)*(1-u), v*u, (1-v)*u for 1.0 (YXWZ).
|
||||
// - v*u, (1-v)*u, v*(1-u), (1-v)*(1-u) for 2.0 (WZYX).
|
||||
// - (1-v)*u, v*u, (1-v)*(1-u), v*(1-u) for 3.0 (ZWXY).
|
||||
// According to the control point order at
|
||||
// https://www.khronos.org/registry/OpenGL/extensions/AMD/AMD_vertex_shader_tessellator.txt
|
||||
// the first is located at (0,0), the second at (0,1), the third at
|
||||
// (1,0) and the fourth at (1,1). So, swizzle index 0 appears to be the
|
||||
// correct one. But, this hasn't been tested yet.
|
||||
//
|
||||
// Direct3D 12 appears to be passing the coordinates in a consistent
|
||||
// order, so we can just use ZYX for triangle patches.
|
||||
//
|
||||
// TODO(Triang3l): Support line patches.
|
||||
uint32_t domain_location_swizzle_mask =
|
||||
patch_primitive_type() == PrimitiveType::kTrianglePatch ? 0b0010
|
||||
: 0b0001;
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
|
||||
3 + temp_register_operand_length));
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP,
|
||||
domain_location_swizzle_mask, 2));
|
||||
shader_code_.push_back(0);
|
||||
} else {
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, domain_location_swizzle_mask, 1));
|
||||
}
|
||||
shader_code_.push_back(1);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(0);
|
||||
++stat_.instruction_count;
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
++stat_.array_instruction_count;
|
||||
} else {
|
||||
++stat_.mov_instruction_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4796,6 +4804,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
|
||||
// General-purpose registers if using dynamic indexing (x0).
|
||||
if (!is_depth_only_pixel_shader_ && uses_register_dynamic_addressing()) {
|
||||
assert_true(register_count() != 0);
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));
|
||||
|
|
|
@ -503,6 +503,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
kVSOutPosition,
|
||||
kVSOutClipDistance0123,
|
||||
kVSOutClipDistance45,
|
||||
// TODO(Triang3l): Use SV_CullDistance instead for
|
||||
// PA_CL_CLIP_CNTL::UCP_CULL_ONLY_ENA, but can't have more than 8 clip and
|
||||
// cull distances in total.
|
||||
|
||||
kPSInInterpolators = 0,
|
||||
kPSInPointParameters = kPSInInterpolators + kInterpolatorCount,
|
||||
|
|
|
@ -13,15 +13,11 @@
|
|||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "xenia/gpu/registers.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
enum Register {
|
||||
#define XE_GPU_REGISTER(index, type, name) XE_GPU_REG_##name = index,
|
||||
#include "xenia/gpu/register_table.inc"
|
||||
#undef XE_GPU_REGISTER
|
||||
};
|
||||
|
||||
struct RegisterInfo {
|
||||
enum class Type {
|
||||
kDword,
|
||||
|
@ -44,8 +40,20 @@ class RegisterFile {
|
|||
};
|
||||
RegisterValue values[kRegisterCount];
|
||||
|
||||
RegisterValue& operator[](int reg) { return values[reg]; }
|
||||
RegisterValue& operator[](uint32_t reg) { return values[reg]; }
|
||||
RegisterValue& operator[](Register reg) { return values[reg]; }
|
||||
template <typename T>
|
||||
T& Get(uint32_t reg) {
|
||||
return *reinterpret_cast<T*>(&values[reg]);
|
||||
}
|
||||
template <typename T>
|
||||
T& Get(Register reg) {
|
||||
return *reinterpret_cast<T*>(&values[reg]);
|
||||
}
|
||||
template <typename T>
|
||||
T& Get() {
|
||||
return *reinterpret_cast<T*>(&values[T::register_index]);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
|
|
|
@ -134,7 +134,7 @@ XE_GPU_REGISTER(0x2184, kDword, SQ_WRAPPING_1)
|
|||
XE_GPU_REGISTER(0x21F9, kDword, VGT_EVENT_INITIATOR)
|
||||
|
||||
XE_GPU_REGISTER(0x2200, kDword, RB_DEPTHCONTROL)
|
||||
XE_GPU_REGISTER(0x2201, kDword, RB_BLENDCONTROL_0)
|
||||
XE_GPU_REGISTER(0x2201, kDword, RB_BLENDCONTROL0)
|
||||
XE_GPU_REGISTER(0x2202, kDword, RB_COLORCONTROL)
|
||||
XE_GPU_REGISTER(0x2203, kDword, RB_HIZCONTROL)
|
||||
XE_GPU_REGISTER(0x2204, kDword, PA_CL_CLIP_CNTL)
|
||||
|
@ -142,9 +142,9 @@ XE_GPU_REGISTER(0x2205, kDword, PA_SU_SC_MODE_CNTL)
|
|||
XE_GPU_REGISTER(0x2206, kDword, PA_CL_VTE_CNTL)
|
||||
XE_GPU_REGISTER(0x2207, kDword, VGT_CURRENT_BIN_ID_MIN)
|
||||
XE_GPU_REGISTER(0x2208, kDword, RB_MODECONTROL)
|
||||
XE_GPU_REGISTER(0x2209, kDword, RB_BLENDCONTROL_1)
|
||||
XE_GPU_REGISTER(0x220A, kDword, RB_BLENDCONTROL_2)
|
||||
XE_GPU_REGISTER(0x220B, kDword, RB_BLENDCONTROL_3)
|
||||
XE_GPU_REGISTER(0x2209, kDword, RB_BLENDCONTROL1)
|
||||
XE_GPU_REGISTER(0x220A, kDword, RB_BLENDCONTROL2)
|
||||
XE_GPU_REGISTER(0x220B, kDword, RB_BLENDCONTROL3)
|
||||
|
||||
XE_GPU_REGISTER(0x2280, kDword, PA_SU_POINT_SIZE)
|
||||
XE_GPU_REGISTER(0x2281, kDword, PA_SU_POINT_MINMAX)
|
||||
|
@ -199,7 +199,7 @@ XE_GPU_REGISTER(0x231B, kDword, RB_COPY_DEST_INFO)
|
|||
XE_GPU_REGISTER(0x231C, kDword, RB_HIZ_CLEAR)
|
||||
XE_GPU_REGISTER(0x231D, kDword, RB_DEPTH_CLEAR)
|
||||
XE_GPU_REGISTER(0x231E, kDword, RB_COLOR_CLEAR)
|
||||
XE_GPU_REGISTER(0x231F, kDword, RB_COLOR_CLEAR_LOW)
|
||||
XE_GPU_REGISTER(0x231F, kDword, RB_COLOR_CLEAR_LO)
|
||||
XE_GPU_REGISTER(0x2320, kDword, RB_COPY_FUNC)
|
||||
XE_GPU_REGISTER(0x2321, kDword, RB_COPY_REF)
|
||||
XE_GPU_REGISTER(0x2322, kDword, RB_COPY_MASK)
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2019 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/registers.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace reg {
|
||||
|
||||
constexpr Register COHER_STATUS_HOST::register_index;
|
||||
constexpr Register WAIT_UNTIL::register_index;
|
||||
|
||||
constexpr Register SQ_PROGRAM_CNTL::register_index;
|
||||
constexpr Register SQ_CONTEXT_MISC::register_index;
|
||||
|
||||
constexpr Register VGT_OUTPUT_PATH_CNTL::register_index;
|
||||
constexpr Register VGT_HOS_CNTL::register_index;
|
||||
|
||||
constexpr Register PA_SU_POINT_MINMAX::register_index;
|
||||
constexpr Register PA_SU_POINT_SIZE::register_index;
|
||||
constexpr Register PA_SU_SC_MODE_CNTL::register_index;
|
||||
constexpr Register PA_SU_VTX_CNTL::register_index;
|
||||
constexpr Register PA_SC_MPASS_PS_CNTL::register_index;
|
||||
constexpr Register PA_SC_VIZ_QUERY::register_index;
|
||||
constexpr Register PA_CL_CLIP_CNTL::register_index;
|
||||
constexpr Register PA_CL_VTE_CNTL::register_index;
|
||||
constexpr Register PA_SC_WINDOW_OFFSET::register_index;
|
||||
constexpr Register PA_SC_WINDOW_SCISSOR_TL::register_index;
|
||||
constexpr Register PA_SC_WINDOW_SCISSOR_BR::register_index;
|
||||
|
||||
constexpr Register RB_MODECONTROL::register_index;
|
||||
constexpr Register RB_SURFACE_INFO::register_index;
|
||||
constexpr Register RB_COLORCONTROL::register_index;
|
||||
constexpr Register RB_COLOR_INFO::register_index;
|
||||
const Register RB_COLOR_INFO::rt_register_indices[4] = {
|
||||
XE_GPU_REG_RB_COLOR_INFO,
|
||||
XE_GPU_REG_RB_COLOR1_INFO,
|
||||
XE_GPU_REG_RB_COLOR2_INFO,
|
||||
XE_GPU_REG_RB_COLOR3_INFO,
|
||||
};
|
||||
constexpr Register RB_COLOR_MASK::register_index;
|
||||
constexpr Register RB_BLENDCONTROL::register_index;
|
||||
const Register RB_BLENDCONTROL::rt_register_indices[4] = {
|
||||
XE_GPU_REG_RB_BLENDCONTROL0,
|
||||
XE_GPU_REG_RB_BLENDCONTROL1,
|
||||
XE_GPU_REG_RB_BLENDCONTROL2,
|
||||
XE_GPU_REG_RB_BLENDCONTROL3,
|
||||
};
|
||||
constexpr Register RB_DEPTHCONTROL::register_index;
|
||||
constexpr Register RB_STENCILREFMASK::register_index;
|
||||
constexpr Register RB_DEPTH_INFO::register_index;
|
||||
constexpr Register RB_COPY_CONTROL::register_index;
|
||||
constexpr Register RB_COPY_DEST_INFO::register_index;
|
||||
constexpr Register RB_COPY_DEST_PITCH::register_index;
|
||||
|
||||
} // namespace reg
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -13,65 +13,155 @@
|
|||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "xenia/base/bit_field.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
||||
// Most registers can be found from:
|
||||
// https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/14/yamato_registers.h
|
||||
// Some registers were added on Adreno specifically and are not referenced in
|
||||
// game .pdb files and never set by games.
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
enum Register {
|
||||
#define XE_GPU_REGISTER(index, type, name) XE_GPU_REG_##name = index,
|
||||
#include "xenia/gpu/register_table.inc"
|
||||
#undef XE_GPU_REGISTER
|
||||
};
|
||||
|
||||
namespace reg {
|
||||
|
||||
/**************************************************
|
||||
/*******************************************************************************
|
||||
___ ___ _ _ _____ ___ ___ _
|
||||
/ __/ _ \| \| |_ _| _ \/ _ \| |
|
||||
| (_| (_) | .` | | | | / (_) | |__
|
||||
\___\___/|_|\_| |_| |_|_\\___/|____|
|
||||
|
||||
***************************************************/
|
||||
*******************************************************************************/
|
||||
|
||||
union COHER_STATUS_HOST {
|
||||
xe::bf<uint32_t, 0, 8> matching_contexts;
|
||||
xe::bf<uint32_t, 8, 1> rb_copy_dest_base_ena;
|
||||
xe::bf<uint32_t, 9, 1> dest_base_0_ena;
|
||||
xe::bf<uint32_t, 10, 1> dest_base_1_ena;
|
||||
xe::bf<uint32_t, 11, 1> dest_base_2_ena;
|
||||
xe::bf<uint32_t, 12, 1> dest_base_3_ena;
|
||||
xe::bf<uint32_t, 13, 1> dest_base_4_ena;
|
||||
xe::bf<uint32_t, 14, 1> dest_base_5_ena;
|
||||
xe::bf<uint32_t, 15, 1> dest_base_6_ena;
|
||||
xe::bf<uint32_t, 16, 1> dest_base_7_ena;
|
||||
|
||||
xe::bf<uint32_t, 24, 1> vc_action_ena;
|
||||
xe::bf<uint32_t, 25, 1> tc_action_ena;
|
||||
xe::bf<uint32_t, 26, 1> pglb_action_ena;
|
||||
|
||||
xe::bf<uint32_t, 31, 1> status;
|
||||
|
||||
struct {
|
||||
uint32_t matching_contexts : 8; // +0
|
||||
uint32_t rb_copy_dest_base_ena : 1; // +8
|
||||
uint32_t dest_base_0_ena : 1; // +9
|
||||
uint32_t dest_base_1_ena : 1; // +10
|
||||
uint32_t dest_base_2_ena : 1; // +11
|
||||
uint32_t dest_base_3_ena : 1; // +12
|
||||
uint32_t dest_base_4_ena : 1; // +13
|
||||
uint32_t dest_base_5_ena : 1; // +14
|
||||
uint32_t dest_base_6_ena : 1; // +15
|
||||
uint32_t dest_base_7_ena : 1; // +16
|
||||
uint32_t : 7; // +17
|
||||
uint32_t vc_action_ena : 1; // +24
|
||||
uint32_t tc_action_ena : 1; // +25
|
||||
uint32_t pglb_action_ena : 1; // +26
|
||||
uint32_t : 4; // +27
|
||||
uint32_t status : 1; // +31
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_COHER_STATUS_HOST;
|
||||
};
|
||||
|
||||
union WAIT_UNTIL {
|
||||
xe::bf<uint32_t, 1, 1> wait_re_vsync;
|
||||
xe::bf<uint32_t, 2, 1> wait_fe_vsync;
|
||||
xe::bf<uint32_t, 3, 1> wait_vsync;
|
||||
xe::bf<uint32_t, 4, 1> wait_dsply_id0;
|
||||
xe::bf<uint32_t, 5, 1> wait_dsply_id1;
|
||||
xe::bf<uint32_t, 6, 1> wait_dsply_id2;
|
||||
|
||||
xe::bf<uint32_t, 10, 1> wait_cmdfifo;
|
||||
|
||||
xe::bf<uint32_t, 14, 1> wait_2d_idle;
|
||||
xe::bf<uint32_t, 15, 1> wait_3d_idle;
|
||||
xe::bf<uint32_t, 16, 1> wait_2d_idleclean;
|
||||
xe::bf<uint32_t, 17, 1> wait_3d_idleclean;
|
||||
|
||||
xe::bf<uint32_t, 20, 4> cmdfifo_entries;
|
||||
|
||||
struct {
|
||||
uint32_t : 1; // +0
|
||||
uint32_t wait_re_vsync : 1; // +1
|
||||
uint32_t wait_fe_vsync : 1; // +2
|
||||
uint32_t wait_vsync : 1; // +3
|
||||
uint32_t wait_dsply_id0 : 1; // +4
|
||||
uint32_t wait_dsply_id1 : 1; // +5
|
||||
uint32_t wait_dsply_id2 : 1; // +6
|
||||
uint32_t : 3; // +7
|
||||
uint32_t wait_cmdfifo : 1; // +10
|
||||
uint32_t : 3; // +11
|
||||
uint32_t wait_2d_idle : 1; // +14
|
||||
uint32_t wait_3d_idle : 1; // +15
|
||||
uint32_t wait_2d_idleclean : 1; // +16
|
||||
uint32_t wait_3d_idleclean : 1; // +17
|
||||
uint32_t : 2; // +18
|
||||
uint32_t cmdfifo_entries : 4; // +20
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_WAIT_UNTIL;
|
||||
};
|
||||
|
||||
/**************************************************
|
||||
/*******************************************************************************
|
||||
___ ___ ___ _ _ ___ _ _ ___ ___ ___
|
||||
/ __| __/ _ \| | | | __| \| |/ __| __| _ \
|
||||
\__ \ _| (_) | |_| | _|| .` | (__| _|| /
|
||||
|___/___\__\_\\___/|___|_|\_|\___|___|_|_\
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
union SQ_PROGRAM_CNTL {
|
||||
struct {
|
||||
// Note from a2xx.xml:
|
||||
// Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG,
|
||||
// but high bit is set to indicate "0 registers used".
|
||||
uint32_t vs_num_reg : 8; // +0
|
||||
uint32_t ps_num_reg : 8; // +8
|
||||
uint32_t vs_resource : 1; // +16
|
||||
uint32_t ps_resource : 1; // +17
|
||||
uint32_t param_gen : 1; // +18
|
||||
uint32_t gen_index_pix : 1; // +19
|
||||
uint32_t vs_export_count : 4; // +20
|
||||
xenos::VertexShaderExportMode vs_export_mode : 3; // +24
|
||||
uint32_t ps_export_mode : 4; // +27
|
||||
uint32_t gen_index_vtx : 1; // +31
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_SQ_PROGRAM_CNTL;
|
||||
};
|
||||
|
||||
union SQ_CONTEXT_MISC {
|
||||
struct {
|
||||
uint32_t inst_pred_optimize : 1; // +0
|
||||
uint32_t sc_output_screen_xy : 1; // +1
|
||||
xenos::SampleControl sc_sample_cntl : 2; // +2
|
||||
uint32_t : 4; // +4
|
||||
uint32_t param_gen_pos : 8; // +8
|
||||
uint32_t perfcounter_ref : 1; // +16
|
||||
uint32_t yeild_optimize : 1; // +17 sic
|
||||
uint32_t tx_cache_sel : 1; // +18
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_SQ_CONTEXT_MISC;
|
||||
};
|
||||
|
||||
/*******************************************************************************
|
||||
__ _____ ___ _____ _____ __
|
||||
\ \ / / __| _ \_ _| __\ \/ /
|
||||
\ V /| _|| / | | | _| > <
|
||||
\_/ |___|_|_\ |_| |___/_/\_\
|
||||
|
||||
___ ___ ___ _ _ ___ ___ ___ _ _ _ ___
|
||||
/ __| _ \/ _ \| | | | _ \ __| _ \ /_\ | \| | \
|
||||
| (_ | / (_) | |_| | _/ _|| / / _ \| .` | |) |
|
||||
\___|_|_\\___/ \___/|_| |___|_|_\ /_/ \_\_|\_|___/
|
||||
|
||||
_____ ___ ___ ___ ___ _ _ _ _____ ___ ___
|
||||
|_ _| __/ __/ __| __| | | | /_\_ _/ _ \| _ \
|
||||
| | | _|\__ \__ \ _|| |__| |__ / _ \| || (_) | /
|
||||
|_| |___|___/___/___|____|____/_/ \_\_| \___/|_|_\
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
union VGT_OUTPUT_PATH_CNTL {
|
||||
struct {
|
||||
xenos::VGTOutputPath path_select : 2; // +0
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_VGT_OUTPUT_PATH_CNTL;
|
||||
};
|
||||
|
||||
union VGT_HOS_CNTL {
|
||||
struct {
|
||||
xenos::TessellationMode tess_mode : 2; // +0
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_VGT_HOS_CNTL;
|
||||
};
|
||||
|
||||
/*******************************************************************************
|
||||
___ ___ ___ __ __ ___ _____ _____ _____
|
||||
| _ \ _ \_ _| \/ |_ _|_ _|_ _\ \ / / __|
|
||||
| _/ /| || |\/| || | | | | | \ V /| _|
|
||||
|
@ -82,206 +172,348 @@ union WAIT_UNTIL {
|
|||
/ _ \\__ \__ \ _|| |\/| | _ \ |__| _|| /
|
||||
/_/ \_\___/___/___|_| |_|___/____|___|_|_\
|
||||
|
||||
***************************************************/
|
||||
*******************************************************************************/
|
||||
|
||||
union PA_SU_POINT_MINMAX {
|
||||
struct {
|
||||
// Radius, 12.4 fixed point.
|
||||
uint32_t min_size : 16; // +0
|
||||
uint32_t max_size : 16; // +16
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_PA_SU_POINT_MINMAX;
|
||||
};
|
||||
|
||||
union PA_SU_POINT_SIZE {
|
||||
struct {
|
||||
// 1/2 width or height, 12.4 fixed point.
|
||||
uint32_t height : 16; // +0
|
||||
uint32_t width : 16; // +16
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_PA_SU_POINT_SIZE;
|
||||
};
|
||||
|
||||
// Setup Unit / Scanline Converter mode cntl
|
||||
union PA_SU_SC_MODE_CNTL {
|
||||
xe::bf<uint32_t, 0, 1> cull_front;
|
||||
xe::bf<uint32_t, 1, 1> cull_back;
|
||||
xe::bf<uint32_t, 2, 1> face;
|
||||
xe::bf<uint32_t, 3, 2> poly_mode;
|
||||
xe::bf<uint32_t, 5, 3> polymode_front_ptype;
|
||||
xe::bf<uint32_t, 8, 3> polymode_back_ptype;
|
||||
xe::bf<uint32_t, 11, 1> poly_offset_front_enable;
|
||||
xe::bf<uint32_t, 12, 1> poly_offset_back_enable;
|
||||
xe::bf<uint32_t, 13, 1> poly_offset_para_enable;
|
||||
|
||||
xe::bf<uint32_t, 15, 1> msaa_enable;
|
||||
xe::bf<uint32_t, 16, 1> vtx_window_offset_enable;
|
||||
|
||||
xe::bf<uint32_t, 18, 1> line_stipple_enable;
|
||||
xe::bf<uint32_t, 19, 1> provoking_vtx_last;
|
||||
xe::bf<uint32_t, 20, 1> persp_corr_dis;
|
||||
xe::bf<uint32_t, 21, 1> multi_prim_ib_ena;
|
||||
|
||||
xe::bf<uint32_t, 23, 1> quad_order_enable;
|
||||
|
||||
xe::bf<uint32_t, 25, 1> wait_rb_idle_all_tri;
|
||||
xe::bf<uint32_t, 26, 1> wait_rb_idle_first_tri_new_state;
|
||||
|
||||
struct {
|
||||
uint32_t cull_front : 1; // +0
|
||||
uint32_t cull_back : 1; // +1
|
||||
// 0 - front is CCW, 1 - front is CW.
|
||||
uint32_t face : 1; // +2
|
||||
xenos::PolygonModeEnable poly_mode : 2; // +3
|
||||
xenos::PolygonType polymode_front_ptype : 3; // +5
|
||||
xenos::PolygonType polymode_back_ptype : 3; // +8
|
||||
uint32_t poly_offset_front_enable : 1; // +11
|
||||
uint32_t poly_offset_back_enable : 1; // +12
|
||||
uint32_t poly_offset_para_enable : 1; // +13
|
||||
uint32_t : 1; // +14
|
||||
uint32_t msaa_enable : 1; // +15
|
||||
uint32_t vtx_window_offset_enable : 1; // +16
|
||||
// LINE_STIPPLE_ENABLE was added on Adreno.
|
||||
uint32_t : 2; // +17
|
||||
uint32_t provoking_vtx_last : 1; // +19
|
||||
uint32_t persp_corr_dis : 1; // +20
|
||||
uint32_t multi_prim_ib_ena : 1; // +21
|
||||
uint32_t : 1; // +22
|
||||
uint32_t quad_order_enable : 1; // +23
|
||||
// WAIT_RB_IDLE_ALL_TRI and WAIT_RB_IDLE_FIRST_TRI_NEW_STATE were added on
|
||||
// Adreno.
|
||||
// TODO(Triang3l): Find SC_ONE_QUAD_PER_CLOCK offset.
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL;
|
||||
};
|
||||
|
||||
// Setup Unit Vertex Control
|
||||
union PA_SU_VTX_CNTL {
|
||||
xe::bf<uint32_t, 0, 1> pix_center; // 1 = half pixel offset
|
||||
xe::bf<uint32_t, 1, 2> round_mode;
|
||||
xe::bf<uint32_t, 3, 3> quant_mode;
|
||||
|
||||
struct {
|
||||
uint32_t pix_center : 1; // +0 1 = half pixel offset (OpenGL).
|
||||
uint32_t round_mode : 2; // +1
|
||||
uint32_t quant_mode : 3; // +3
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_PA_SU_VTX_CNTL;
|
||||
};
|
||||
|
||||
union PA_SC_MPASS_PS_CNTL {
|
||||
xe::bf<uint32_t, 0, 20> mpass_pix_vec_per_pass;
|
||||
xe::bf<uint32_t, 31, 1> mpass_ps_ena;
|
||||
|
||||
struct {
|
||||
uint32_t mpass_pix_vec_per_pass : 20; // +0
|
||||
uint32_t : 11; // +20
|
||||
uint32_t mpass_ps_ena : 1; // +31
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_PA_SC_MPASS_PS_CNTL;
|
||||
};
|
||||
|
||||
// Scanline converter viz query
|
||||
union PA_SC_VIZ_QUERY {
|
||||
xe::bf<uint32_t, 0, 1> viz_query_ena;
|
||||
xe::bf<uint32_t, 1, 6> viz_query_id;
|
||||
xe::bf<uint32_t, 7, 1> kill_pix_post_early_z;
|
||||
|
||||
struct {
|
||||
uint32_t viz_query_ena : 1; // +0
|
||||
uint32_t viz_query_id : 6; // +1
|
||||
uint32_t kill_pix_post_early_z : 1; // +7
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_PA_SC_VIZ_QUERY;
|
||||
};
|
||||
|
||||
// Clipper clip control
|
||||
// TODO(DrChat): This seem to differ. Need to examine this.
|
||||
// https://github.com/decaf-emu/decaf-emu/blob/c017a9ff8128852fb9a5da19466778a171cea6e1/src/libdecaf/src/gpu/latte_registers_pa.h#L11
|
||||
union PA_CL_CLIP_CNTL {
|
||||
xe::bf<uint32_t, 0, 1> ucp_ena_0;
|
||||
xe::bf<uint32_t, 1, 1> ucp_ena_1;
|
||||
xe::bf<uint32_t, 2, 1> ucp_ena_2;
|
||||
xe::bf<uint32_t, 3, 1> ucp_ena_3;
|
||||
xe::bf<uint32_t, 4, 1> ucp_ena_4;
|
||||
xe::bf<uint32_t, 5, 1> ucp_ena_5;
|
||||
|
||||
xe::bf<uint32_t, 14, 2> ps_ucp_mode;
|
||||
xe::bf<uint32_t, 16, 1> clip_disable;
|
||||
xe::bf<uint32_t, 17, 1> ucp_cull_only_ena;
|
||||
xe::bf<uint32_t, 18, 1> boundary_edge_flag_ena;
|
||||
xe::bf<uint32_t, 19, 1> dx_clip_space_def;
|
||||
xe::bf<uint32_t, 20, 1> dis_clip_err_detect;
|
||||
xe::bf<uint32_t, 21, 1> vtx_kill_or;
|
||||
xe::bf<uint32_t, 22, 1> xy_nan_retain;
|
||||
xe::bf<uint32_t, 23, 1> z_nan_retain;
|
||||
xe::bf<uint32_t, 24, 1> w_nan_retain;
|
||||
|
||||
struct {
|
||||
uint32_t ucp_ena_0 : 1; // +0
|
||||
uint32_t ucp_ena_1 : 1; // +1
|
||||
uint32_t ucp_ena_2 : 1; // +2
|
||||
uint32_t ucp_ena_3 : 1; // +3
|
||||
uint32_t ucp_ena_4 : 1; // +4
|
||||
uint32_t ucp_ena_5 : 1; // +5
|
||||
uint32_t : 8; // +6
|
||||
uint32_t ps_ucp_mode : 2; // +14
|
||||
uint32_t clip_disable : 1; // +16
|
||||
uint32_t ucp_cull_only_ena : 1; // +17
|
||||
uint32_t boundary_edge_flag_ena : 1; // +18
|
||||
uint32_t dx_clip_space_def : 1; // +19
|
||||
uint32_t dis_clip_err_detect : 1; // +20
|
||||
uint32_t vtx_kill_or : 1; // +21
|
||||
uint32_t xy_nan_retain : 1; // +22
|
||||
uint32_t z_nan_retain : 1; // +23
|
||||
uint32_t w_nan_retain : 1; // +24
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_PA_CL_CLIP_CNTL;
|
||||
};
|
||||
|
||||
// Viewport transform engine control
|
||||
union PA_CL_VTE_CNTL {
|
||||
xe::bf<uint32_t, 0, 1> vport_x_scale_ena;
|
||||
xe::bf<uint32_t, 1, 1> vport_x_offset_ena;
|
||||
xe::bf<uint32_t, 2, 1> vport_y_scale_ena;
|
||||
xe::bf<uint32_t, 3, 1> vport_y_offset_ena;
|
||||
xe::bf<uint32_t, 4, 1> vport_z_scale_ena;
|
||||
xe::bf<uint32_t, 5, 1> vport_z_offset_ena;
|
||||
|
||||
xe::bf<uint32_t, 8, 1> vtx_xy_fmt;
|
||||
xe::bf<uint32_t, 9, 1> vtx_z_fmt;
|
||||
xe::bf<uint32_t, 10, 1> vtx_w0_fmt;
|
||||
xe::bf<uint32_t, 11, 1> perfcounter_ref;
|
||||
|
||||
struct {
|
||||
uint32_t vport_x_scale_ena : 1; // +0
|
||||
uint32_t vport_x_offset_ena : 1; // +1
|
||||
uint32_t vport_y_scale_ena : 1; // +2
|
||||
uint32_t vport_y_offset_ena : 1; // +3
|
||||
uint32_t vport_z_scale_ena : 1; // +4
|
||||
uint32_t vport_z_offset_ena : 1; // +5
|
||||
uint32_t : 2; // +6
|
||||
uint32_t vtx_xy_fmt : 1; // +8
|
||||
uint32_t vtx_z_fmt : 1; // +9
|
||||
uint32_t vtx_w0_fmt : 1; // +10
|
||||
uint32_t perfcounter_ref : 1; // +11
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_PA_CL_VTE_CNTL;
|
||||
};
|
||||
|
||||
union PA_SC_WINDOW_OFFSET {
|
||||
xe::bf<int32_t, 0, 15> window_x_offset;
|
||||
xe::bf<int32_t, 16, 15> window_y_offset;
|
||||
|
||||
struct {
|
||||
int32_t window_x_offset : 15; // +0
|
||||
uint32_t : 1; // +15
|
||||
int32_t window_y_offset : 15; // +16
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_OFFSET;
|
||||
};
|
||||
|
||||
union PA_SC_WINDOW_SCISSOR_TL {
|
||||
xe::bf<uint32_t, 0, 14> tl_x;
|
||||
xe::bf<uint32_t, 16, 14> tl_y;
|
||||
xe::bf<uint32_t, 31, 1> window_offset_disable;
|
||||
|
||||
struct {
|
||||
uint32_t tl_x : 14; // +0
|
||||
uint32_t : 2; // +14
|
||||
uint32_t tl_y : 14; // +16
|
||||
uint32_t : 1; // +30
|
||||
uint32_t window_offset_disable : 1; // +31
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL;
|
||||
};
|
||||
|
||||
union PA_SC_WINDOW_SCISSOR_BR {
|
||||
xe::bf<uint32_t, 0, 14> br_x;
|
||||
xe::bf<uint32_t, 16, 14> br_y;
|
||||
|
||||
struct {
|
||||
uint32_t br_x : 14; // +0
|
||||
uint32_t : 2; // +14
|
||||
uint32_t br_y : 14; // +16
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR;
|
||||
};
|
||||
|
||||
/**************************************************
|
||||
/*******************************************************************************
|
||||
___ ___
|
||||
| _ \ _ )
|
||||
| / _ \
|
||||
|_|_\___/
|
||||
|
||||
***************************************************/
|
||||
*******************************************************************************/
|
||||
|
||||
union RB_MODECONTROL {
|
||||
xe::bf<xenos::ModeControl, 0, 3> edram_mode;
|
||||
|
||||
struct {
|
||||
xenos::ModeControl edram_mode : 3; // +0
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_RB_MODECONTROL;
|
||||
};
|
||||
|
||||
union RB_SURFACE_INFO {
|
||||
xe::bf<uint32_t, 0, 14> surface_pitch;
|
||||
xe::bf<MsaaSamples, 16, 2> msaa_samples;
|
||||
xe::bf<uint32_t, 18, 14> hiz_pitch;
|
||||
|
||||
struct {
|
||||
uint32_t surface_pitch : 14; // +0
|
||||
uint32_t : 2; // +14
|
||||
MsaaSamples msaa_samples : 2; // +16
|
||||
uint32_t hiz_pitch : 14; // +18
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_RB_SURFACE_INFO;
|
||||
};
|
||||
|
||||
union RB_COLORCONTROL {
|
||||
xe::bf<uint32_t, 0, 3> alpha_func;
|
||||
xe::bf<uint32_t, 3, 1> alpha_test_enable;
|
||||
xe::bf<uint32_t, 4, 1> alpha_to_mask_enable;
|
||||
|
||||
xe::bf<uint32_t, 24, 2> alpha_to_mask_offset0;
|
||||
xe::bf<uint32_t, 26, 2> alpha_to_mask_offset1;
|
||||
xe::bf<uint32_t, 28, 2> alpha_to_mask_offset2;
|
||||
xe::bf<uint32_t, 30, 2> alpha_to_mask_offset3;
|
||||
|
||||
struct {
|
||||
CompareFunction alpha_func : 3; // +0
|
||||
uint32_t alpha_test_enable : 1; // +3
|
||||
uint32_t alpha_to_mask_enable : 1; // +4
|
||||
// Everything in between was added on Adreno.
|
||||
uint32_t : 19; // +5
|
||||
uint32_t alpha_to_mask_offset0 : 2; // +24
|
||||
uint32_t alpha_to_mask_offset1 : 2; // +26
|
||||
uint32_t alpha_to_mask_offset2 : 2; // +28
|
||||
uint32_t alpha_to_mask_offset3 : 2; // +30
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_RB_COLORCONTROL;
|
||||
};
|
||||
|
||||
union RB_COLOR_INFO {
|
||||
xe::bf<uint32_t, 0, 12> color_base;
|
||||
xe::bf<ColorRenderTargetFormat, 16, 4> color_format;
|
||||
xe::bf<uint32_t, 20, 6> color_exp_bias;
|
||||
|
||||
struct {
|
||||
uint32_t color_base : 12; // +0
|
||||
uint32_t : 4; // +12
|
||||
ColorRenderTargetFormat color_format : 4; // +16
|
||||
int32_t color_exp_bias : 6; // +20
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_RB_COLOR_INFO;
|
||||
// RB_COLOR[1-3]_INFO also use this format.
|
||||
static const Register rt_register_indices[4];
|
||||
};
|
||||
|
||||
union RB_COLOR_MASK {
|
||||
struct {
|
||||
uint32_t write_red0 : 1; // +0
|
||||
uint32_t write_green0 : 1; // +1
|
||||
uint32_t write_blue0 : 1; // +2
|
||||
uint32_t write_alpha0 : 1; // +3
|
||||
uint32_t write_red1 : 1; // +4
|
||||
uint32_t write_green1 : 1; // +5
|
||||
uint32_t write_blue1 : 1; // +6
|
||||
uint32_t write_alpha1 : 1; // +7
|
||||
uint32_t write_red2 : 1; // +8
|
||||
uint32_t write_green2 : 1; // +9
|
||||
uint32_t write_blue2 : 1; // +10
|
||||
uint32_t write_alpha2 : 1; // +11
|
||||
uint32_t write_red3 : 1; // +12
|
||||
uint32_t write_green3 : 1; // +13
|
||||
uint32_t write_blue3 : 1; // +14
|
||||
uint32_t write_alpha3 : 1; // +15
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_RB_COLOR_MASK;
|
||||
};
|
||||
|
||||
union RB_BLENDCONTROL {
|
||||
struct {
|
||||
BlendFactor color_srcblend : 5; // +0
|
||||
BlendOp color_comb_fcn : 3; // +5
|
||||
BlendFactor color_destblend : 5; // +8
|
||||
uint32_t : 3; // +13
|
||||
BlendFactor alpha_srcblend : 5; // +16
|
||||
BlendOp alpha_comb_fcn : 3; // +21
|
||||
BlendFactor alpha_destblend : 5; // +24
|
||||
// BLEND_FORCE_ENABLE and BLEND_FORCE were added on Adreno.
|
||||
};
|
||||
uint32_t value;
|
||||
// RB_BLENDCONTROL[0-3] use this format.
|
||||
static constexpr Register register_index = XE_GPU_REG_RB_BLENDCONTROL0;
|
||||
static const Register rt_register_indices[4];
|
||||
};
|
||||
|
||||
union RB_DEPTHCONTROL {
|
||||
struct {
|
||||
uint32_t stencil_enable : 1; // +0
|
||||
uint32_t z_enable : 1; // +1
|
||||
uint32_t z_write_enable : 1; // +2
|
||||
// EARLY_Z_ENABLE was added on Adreno.
|
||||
uint32_t : 1; // +3
|
||||
CompareFunction zfunc : 3; // +4
|
||||
uint32_t backface_enable : 1; // +7
|
||||
CompareFunction stencilfunc : 3; // +8
|
||||
StencilOp stencilfail : 3; // +11
|
||||
StencilOp stencilzpass : 3; // +14
|
||||
StencilOp stencilzfail : 3; // +17
|
||||
CompareFunction stencilfunc_bf : 3; // +20
|
||||
StencilOp stencilfail_bf : 3; // +23
|
||||
StencilOp stencilzpass_bf : 3; // +26
|
||||
StencilOp stencilzfail_bf : 3; // +29
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_RB_DEPTHCONTROL;
|
||||
};
|
||||
|
||||
union RB_STENCILREFMASK {
|
||||
struct {
|
||||
uint32_t stencilref : 8; // +0
|
||||
uint32_t stencilmask : 8; // +8
|
||||
uint32_t stencilwritemask : 8; // +16
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_RB_STENCILREFMASK;
|
||||
// RB_STENCILREFMASK_BF also uses this format.
|
||||
};
|
||||
|
||||
union RB_DEPTH_INFO {
|
||||
xe::bf<uint32_t, 0, 12> depth_base;
|
||||
xe::bf<DepthRenderTargetFormat, 16, 1> depth_format;
|
||||
|
||||
struct {
|
||||
uint32_t depth_base : 12; // +0
|
||||
uint32_t : 4; // +12
|
||||
DepthRenderTargetFormat depth_format : 1; // +16
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_RB_DEPTH_INFO;
|
||||
};
|
||||
|
||||
union RB_COPY_CONTROL {
|
||||
xe::bf<uint32_t, 0, 3> copy_src_select;
|
||||
xe::bf<xenos::CopySampleSelect, 4, 3> copy_sample_select;
|
||||
xe::bf<uint32_t, 8, 1> color_clear_enable;
|
||||
xe::bf<uint32_t, 9, 1> depth_clear_enable;
|
||||
|
||||
xe::bf<xenos::CopyCommand, 20, 2> copy_command;
|
||||
|
||||
struct {
|
||||
uint32_t copy_src_select : 3; // +0 Depth is 4.
|
||||
uint32_t : 1; // +3
|
||||
xenos::CopySampleSelect copy_sample_select : 3; // +4
|
||||
uint32_t : 1; // +7
|
||||
uint32_t color_clear_enable : 1; // +8
|
||||
uint32_t depth_clear_enable : 1; // +9
|
||||
uint32_t : 10; // +10
|
||||
xenos::CopyCommand copy_command : 2; // +20
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_RB_COPY_CONTROL;
|
||||
};
|
||||
|
||||
union RB_COPY_DEST_INFO {
|
||||
xe::bf<Endian128, 0, 3> copy_dest_endian;
|
||||
xe::bf<uint32_t, 3, 1> copy_dest_array;
|
||||
xe::bf<uint32_t, 4, 3> copy_dest_slice;
|
||||
xe::bf<ColorFormat, 7, 6> copy_dest_format;
|
||||
xe::bf<uint32_t, 13, 3> copy_dest_number;
|
||||
xe::bf<uint32_t, 16, 6> copy_dest_exp_bias;
|
||||
xe::bf<uint32_t, 24, 1> copy_dest_swap;
|
||||
|
||||
struct {
|
||||
Endian128 copy_dest_endian : 3; // +0
|
||||
uint32_t copy_dest_array : 1; // +3
|
||||
uint32_t copy_dest_slice : 3; // +4
|
||||
ColorFormat copy_dest_format : 6; // +7
|
||||
uint32_t copy_dest_number : 3; // +13
|
||||
int32_t copy_dest_exp_bias : 6; // +16
|
||||
uint32_t : 2; // +22
|
||||
uint32_t copy_dest_swap : 1; // +24
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_INFO;
|
||||
};
|
||||
|
||||
union RB_COPY_DEST_PITCH {
|
||||
xe::bf<uint32_t, 0, 14> copy_dest_pitch;
|
||||
xe::bf<uint32_t, 16, 14> copy_dest_height;
|
||||
|
||||
struct {
|
||||
uint32_t copy_dest_pitch : 14; // +0
|
||||
uint32_t : 2; // +14
|
||||
uint32_t copy_dest_height : 14; // +16
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_PITCH;
|
||||
};
|
||||
|
||||
} // namespace reg
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
|
|
@ -24,25 +24,25 @@ bool SamplerInfo::Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
|
|||
|
||||
out_info->min_filter =
|
||||
fetch_instr.attributes.min_filter == TextureFilter::kUseFetchConst
|
||||
? static_cast<TextureFilter>(fetch.min_filter)
|
||||
? fetch.min_filter
|
||||
: fetch_instr.attributes.min_filter;
|
||||
out_info->mag_filter =
|
||||
fetch_instr.attributes.mag_filter == TextureFilter::kUseFetchConst
|
||||
? static_cast<TextureFilter>(fetch.mag_filter)
|
||||
? fetch.mag_filter
|
||||
: fetch_instr.attributes.mag_filter;
|
||||
out_info->mip_filter =
|
||||
fetch_instr.attributes.mip_filter == TextureFilter::kUseFetchConst
|
||||
? static_cast<TextureFilter>(fetch.mip_filter)
|
||||
? fetch.mip_filter
|
||||
: fetch_instr.attributes.mip_filter;
|
||||
out_info->clamp_u = static_cast<ClampMode>(fetch.clamp_x);
|
||||
out_info->clamp_v = static_cast<ClampMode>(fetch.clamp_y);
|
||||
out_info->clamp_w = static_cast<ClampMode>(fetch.clamp_z);
|
||||
out_info->clamp_u = fetch.clamp_x;
|
||||
out_info->clamp_v = fetch.clamp_y;
|
||||
out_info->clamp_w = fetch.clamp_z;
|
||||
out_info->aniso_filter =
|
||||
fetch_instr.attributes.aniso_filter == AnisoFilter::kUseFetchConst
|
||||
? static_cast<AnisoFilter>(fetch.aniso_filter)
|
||||
? fetch.aniso_filter
|
||||
: fetch_instr.attributes.aniso_filter;
|
||||
|
||||
out_info->border_color = static_cast<BorderColor>(fetch.border_color);
|
||||
out_info->border_color = fetch.border_color;
|
||||
out_info->lod_bias = (fetch.lod_bias) / 32.f;
|
||||
out_info->mip_min_level = fetch.mip_min_level;
|
||||
out_info->mip_max_level = fetch.mip_max_level;
|
||||
|
|
|
@ -108,10 +108,11 @@ bool ShaderTranslator::GatherAllBindingInformation(Shader* shader) {
|
|||
}
|
||||
|
||||
bool ShaderTranslator::Translate(Shader* shader, PrimitiveType patch_type,
|
||||
xenos::xe_gpu_program_cntl_t cntl) {
|
||||
reg::SQ_PROGRAM_CNTL cntl) {
|
||||
Reset();
|
||||
register_count_ = shader->type() == ShaderType::kVertex ? cntl.vs_regs + 1
|
||||
: cntl.ps_regs + 1;
|
||||
uint32_t cntl_num_reg =
|
||||
shader->type() == ShaderType::kVertex ? cntl.vs_num_reg : cntl.ps_num_reg;
|
||||
register_count_ = (cntl_num_reg & 0x80) ? 0 : (cntl_num_reg + 1);
|
||||
|
||||
return TranslateInternal(shader, patch_type);
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/string_buffer.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/ucode.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
@ -33,7 +34,7 @@ class ShaderTranslator {
|
|||
bool GatherAllBindingInformation(Shader* shader);
|
||||
|
||||
bool Translate(Shader* shader, PrimitiveType patch_type,
|
||||
xenos::xe_gpu_program_cntl_t cntl);
|
||||
reg::SQ_PROGRAM_CNTL cntl);
|
||||
bool Translate(Shader* shader, PrimitiveType patch_type);
|
||||
|
||||
protected:
|
||||
|
@ -232,7 +233,7 @@ class ShaderTranslator {
|
|||
PrimitiveType patch_primitive_type_;
|
||||
const uint32_t* ucode_dwords_;
|
||||
size_t ucode_dword_count_;
|
||||
xenos::xe_gpu_program_cntl_t program_cntl_;
|
||||
reg::SQ_PROGRAM_CNTL program_cntl_;
|
||||
uint32_t register_count_;
|
||||
|
||||
// Accumulated translation errors.
|
||||
|
|
|
@ -93,6 +93,7 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
b.makeFunctionEntry(spv::NoPrecision, b.makeVoidType(), "translated_main",
|
||||
{}, {}, &function_block);
|
||||
|
||||
assert_not_zero(register_count());
|
||||
registers_type_ = b.makeArrayType(vec4_float_type_,
|
||||
b.makeUintConstant(register_count()), 0);
|
||||
registers_ptr_ = b.createVariable(spv::StorageClass::StorageClassFunction,
|
||||
|
|
|
@ -40,7 +40,7 @@ void CopySwapBlock(Endian endian, void* output, const void* input,
|
|||
xe::copy_and_swap_16_in_32_unaligned(output, input, length);
|
||||
break;
|
||||
default:
|
||||
case Endian::kUnspecified:
|
||||
case Endian::kNone:
|
||||
std::memcpy(output, input, length);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -33,8 +33,8 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
|
|||
|
||||
auto& info = *out_info;
|
||||
|
||||
info.format = static_cast<TextureFormat>(fetch.format);
|
||||
info.endianness = static_cast<Endian>(fetch.endianness);
|
||||
info.format = fetch.format;
|
||||
info.endianness = fetch.endianness;
|
||||
|
||||
info.dimension = static_cast<Dimension>(fetch.dimension);
|
||||
info.width = info.height = info.depth = 0;
|
||||
|
|
|
@ -19,77 +19,6 @@
|
|||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
// a2xx_sq_surfaceformat +
|
||||
// https://github.com/indirivacua/RAGE-Console-Texture-Editor/blob/master/Console.Xbox360.Graphics.pas
|
||||
enum class TextureFormat : uint32_t {
|
||||
k_1_REVERSE = 0,
|
||||
k_1 = 1,
|
||||
k_8 = 2,
|
||||
k_1_5_5_5 = 3,
|
||||
k_5_6_5 = 4,
|
||||
k_6_5_5 = 5,
|
||||
k_8_8_8_8 = 6,
|
||||
k_2_10_10_10 = 7,
|
||||
k_8_A = 8,
|
||||
k_8_B = 9,
|
||||
k_8_8 = 10,
|
||||
k_Cr_Y1_Cb_Y0_REP = 11,
|
||||
k_Y1_Cr_Y0_Cb_REP = 12,
|
||||
k_16_16_EDRAM = 13,
|
||||
k_8_8_8_8_A = 14,
|
||||
k_4_4_4_4 = 15,
|
||||
k_10_11_11 = 16,
|
||||
k_11_11_10 = 17,
|
||||
k_DXT1 = 18,
|
||||
k_DXT2_3 = 19,
|
||||
k_DXT4_5 = 20,
|
||||
k_16_16_16_16_EDRAM = 21,
|
||||
k_24_8 = 22,
|
||||
k_24_8_FLOAT = 23,
|
||||
k_16 = 24,
|
||||
k_16_16 = 25,
|
||||
k_16_16_16_16 = 26,
|
||||
k_16_EXPAND = 27,
|
||||
k_16_16_EXPAND = 28,
|
||||
k_16_16_16_16_EXPAND = 29,
|
||||
k_16_FLOAT = 30,
|
||||
k_16_16_FLOAT = 31,
|
||||
k_16_16_16_16_FLOAT = 32,
|
||||
k_32 = 33,
|
||||
k_32_32 = 34,
|
||||
k_32_32_32_32 = 35,
|
||||
k_32_FLOAT = 36,
|
||||
k_32_32_FLOAT = 37,
|
||||
k_32_32_32_32_FLOAT = 38,
|
||||
k_32_AS_8 = 39,
|
||||
k_32_AS_8_8 = 40,
|
||||
k_16_MPEG = 41,
|
||||
k_16_16_MPEG = 42,
|
||||
k_8_INTERLACED = 43,
|
||||
k_32_AS_8_INTERLACED = 44,
|
||||
k_32_AS_8_8_INTERLACED = 45,
|
||||
k_16_INTERLACED = 46,
|
||||
k_16_MPEG_INTERLACED = 47,
|
||||
k_16_16_MPEG_INTERLACED = 48,
|
||||
k_DXN = 49,
|
||||
k_8_8_8_8_AS_16_16_16_16 = 50,
|
||||
k_DXT1_AS_16_16_16_16 = 51,
|
||||
k_DXT2_3_AS_16_16_16_16 = 52,
|
||||
k_DXT4_5_AS_16_16_16_16 = 53,
|
||||
k_2_10_10_10_AS_16_16_16_16 = 54,
|
||||
k_10_11_11_AS_16_16_16_16 = 55,
|
||||
k_11_11_10_AS_16_16_16_16 = 56,
|
||||
k_32_32_32_FLOAT = 57,
|
||||
k_DXT3A = 58,
|
||||
k_DXT5A = 59,
|
||||
k_CTX1 = 60,
|
||||
k_DXT3A_AS_1_1_1_1 = 61,
|
||||
k_8_8_8_8_GAMMA_EDRAM = 62,
|
||||
k_2_10_10_10_FLOAT_EDRAM = 63,
|
||||
|
||||
kUnknown = 0xFFFFFFFFu,
|
||||
};
|
||||
|
||||
inline TextureFormat GetBaseFormat(TextureFormat texture_format) {
|
||||
// These formats are used for resampling textures / gamma control.
|
||||
switch (texture_format) {
|
||||
|
|
|
@ -824,7 +824,7 @@ void TraceViewer::DrawVertexFetcher(Shader* shader,
|
|||
#define LOADEL(type, wo) \
|
||||
GpuSwap(xe::load<type>(vstart + \
|
||||
(attrib.fetch_instr.attributes.offset + wo) * 4), \
|
||||
Endian(fetch->endian))
|
||||
fetch->endian)
|
||||
switch (attrib.fetch_instr.attributes.data_format) {
|
||||
case VertexFormat::k_32:
|
||||
ImGui::Text("%.8X", LOADEL(uint32_t, 0));
|
||||
|
@ -1334,10 +1334,10 @@ void TraceViewer::DrawStateUI() {
|
|||
regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
|
||||
};
|
||||
uint32_t rb_blendcontrol[4] = {
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32,
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32,
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32,
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32,
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL0].u32,
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL1].u32,
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL2].u32,
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL3].u32,
|
||||
};
|
||||
ImGui::Columns(2);
|
||||
for (int i = 0; i < xe::countof(color_info); ++i) {
|
||||
|
@ -1713,7 +1713,7 @@ void TraceViewer::DrawStateUI() {
|
|||
fetch = &group->vertex_fetch_2;
|
||||
break;
|
||||
}
|
||||
assert_true(fetch->endian == 2);
|
||||
assert_true(fetch->endian == Endian::k8in32);
|
||||
char tree_root_id[32];
|
||||
sprintf(tree_root_id, "#vertices_root_%d",
|
||||
vertex_binding.fetch_constant);
|
||||
|
|
|
@ -146,12 +146,8 @@ enum class AllocType : uint32_t {
|
|||
|
||||
// Instruction data for ControlFlowOpcode::kExec and kExecEnd.
|
||||
struct ControlFlowExecInstruction {
|
||||
ControlFlowOpcode opcode() const {
|
||||
return static_cast<ControlFlowOpcode>(opcode_);
|
||||
}
|
||||
AddressingMode addressing_mode() const {
|
||||
return static_cast<AddressingMode>(address_mode_);
|
||||
}
|
||||
ControlFlowOpcode opcode() const { return opcode_; }
|
||||
AddressingMode addressing_mode() const { return address_mode_; }
|
||||
// Address of the instructions to execute.
|
||||
uint32_t address() const { return address_; }
|
||||
// Number of instructions being executed.
|
||||
|
@ -176,19 +172,15 @@ struct ControlFlowExecInstruction {
|
|||
uint32_t : 7;
|
||||
uint32_t clean_ : 1;
|
||||
uint32_t : 1;
|
||||
uint32_t address_mode_ : 1;
|
||||
uint32_t opcode_ : 4;
|
||||
AddressingMode address_mode_ : 1;
|
||||
ControlFlowOpcode opcode_ : 4;
|
||||
};
|
||||
static_assert_size(ControlFlowExecInstruction, 8);
|
||||
|
||||
// Instruction data for ControlFlowOpcode::kCondExec and kCondExecEnd.
|
||||
struct ControlFlowCondExecInstruction {
|
||||
ControlFlowOpcode opcode() const {
|
||||
return static_cast<ControlFlowOpcode>(opcode_);
|
||||
}
|
||||
AddressingMode addressing_mode() const {
|
||||
return static_cast<AddressingMode>(address_mode_);
|
||||
}
|
||||
ControlFlowOpcode opcode() const { return opcode_; }
|
||||
AddressingMode addressing_mode() const { return address_mode_; }
|
||||
// Address of the instructions to execute.
|
||||
uint32_t address() const { return address_; }
|
||||
// Number of instructions being executed.
|
||||
|
@ -214,20 +206,16 @@ struct ControlFlowCondExecInstruction {
|
|||
uint32_t vc_lo_ : 2;
|
||||
uint32_t bool_address_ : 8;
|
||||
uint32_t condition_ : 1;
|
||||
uint32_t address_mode_ : 1;
|
||||
uint32_t opcode_ : 4;
|
||||
AddressingMode address_mode_ : 1;
|
||||
ControlFlowOpcode opcode_ : 4;
|
||||
};
|
||||
static_assert_size(ControlFlowCondExecInstruction, 8);
|
||||
|
||||
// Instruction data for ControlFlowOpcode::kCondExecPred, kCondExecPredEnd,
|
||||
// kCondExecPredClean, kCondExecPredCleanEnd.
|
||||
struct ControlFlowCondExecPredInstruction {
|
||||
ControlFlowOpcode opcode() const {
|
||||
return static_cast<ControlFlowOpcode>(opcode_);
|
||||
}
|
||||
AddressingMode addressing_mode() const {
|
||||
return static_cast<AddressingMode>(address_mode_);
|
||||
}
|
||||
ControlFlowOpcode opcode() const { return opcode_; }
|
||||
AddressingMode addressing_mode() const { return address_mode_; }
|
||||
// Address of the instructions to execute.
|
||||
uint32_t address() const { return address_; }
|
||||
// Number of instructions being executed.
|
||||
|
@ -254,19 +242,15 @@ struct ControlFlowCondExecPredInstruction {
|
|||
uint32_t : 7;
|
||||
uint32_t clean_ : 1;
|
||||
uint32_t condition_ : 1;
|
||||
uint32_t address_mode_ : 1;
|
||||
uint32_t opcode_ : 4;
|
||||
AddressingMode address_mode_ : 1;
|
||||
ControlFlowOpcode opcode_ : 4;
|
||||
};
|
||||
static_assert_size(ControlFlowCondExecPredInstruction, 8);
|
||||
|
||||
// Instruction data for ControlFlowOpcode::kLoopStart.
|
||||
struct ControlFlowLoopStartInstruction {
|
||||
ControlFlowOpcode opcode() const {
|
||||
return static_cast<ControlFlowOpcode>(opcode_);
|
||||
}
|
||||
AddressingMode addressing_mode() const {
|
||||
return static_cast<AddressingMode>(address_mode_);
|
||||
}
|
||||
ControlFlowOpcode opcode() const { return opcode_; }
|
||||
AddressingMode addressing_mode() const { return address_mode_; }
|
||||
// Target address to jump to when skipping the loop.
|
||||
uint32_t address() const { return address_; }
|
||||
// Whether to reuse the current aL instead of reset it to loop start.
|
||||
|
@ -285,19 +269,15 @@ struct ControlFlowLoopStartInstruction {
|
|||
|
||||
// Word 1: (16 bits)
|
||||
uint32_t : 11;
|
||||
uint32_t address_mode_ : 1;
|
||||
uint32_t opcode_ : 4;
|
||||
AddressingMode address_mode_ : 1;
|
||||
ControlFlowOpcode opcode_ : 4;
|
||||
};
|
||||
static_assert_size(ControlFlowLoopStartInstruction, 8);
|
||||
|
||||
// Instruction data for ControlFlowOpcode::kLoopEnd.
|
||||
struct ControlFlowLoopEndInstruction {
|
||||
ControlFlowOpcode opcode() const {
|
||||
return static_cast<ControlFlowOpcode>(opcode_);
|
||||
}
|
||||
AddressingMode addressing_mode() const {
|
||||
return static_cast<AddressingMode>(address_mode_);
|
||||
}
|
||||
ControlFlowOpcode opcode() const { return opcode_; }
|
||||
AddressingMode addressing_mode() const { return address_mode_; }
|
||||
// Target address of the start of the loop body.
|
||||
uint32_t address() const { return address_; }
|
||||
// Integer constant register that holds the loop parameters.
|
||||
|
@ -319,19 +299,15 @@ struct ControlFlowLoopEndInstruction {
|
|||
// Word 1: (16 bits)
|
||||
uint32_t : 10;
|
||||
uint32_t condition_ : 1;
|
||||
uint32_t address_mode_ : 1;
|
||||
uint32_t opcode_ : 4;
|
||||
AddressingMode address_mode_ : 1;
|
||||
ControlFlowOpcode opcode_ : 4;
|
||||
};
|
||||
static_assert_size(ControlFlowLoopEndInstruction, 8);
|
||||
|
||||
// Instruction data for ControlFlowOpcode::kCondCall.
|
||||
struct ControlFlowCondCallInstruction {
|
||||
ControlFlowOpcode opcode() const {
|
||||
return static_cast<ControlFlowOpcode>(opcode_);
|
||||
}
|
||||
AddressingMode addressing_mode() const {
|
||||
return static_cast<AddressingMode>(address_mode_);
|
||||
}
|
||||
ControlFlowOpcode opcode() const { return opcode_; }
|
||||
AddressingMode addressing_mode() const { return address_mode_; }
|
||||
// Target address.
|
||||
uint32_t address() const { return address_; }
|
||||
// Unconditional call - ignores condition/predication.
|
||||
|
@ -354,19 +330,15 @@ struct ControlFlowCondCallInstruction {
|
|||
uint32_t : 2;
|
||||
uint32_t bool_address_ : 8;
|
||||
uint32_t condition_ : 1;
|
||||
uint32_t address_mode_ : 1;
|
||||
uint32_t opcode_ : 4;
|
||||
AddressingMode address_mode_ : 1;
|
||||
ControlFlowOpcode opcode_ : 4;
|
||||
};
|
||||
static_assert_size(ControlFlowCondCallInstruction, 8);
|
||||
|
||||
// Instruction data for ControlFlowOpcode::kReturn.
|
||||
struct ControlFlowReturnInstruction {
|
||||
ControlFlowOpcode opcode() const {
|
||||
return static_cast<ControlFlowOpcode>(opcode_);
|
||||
}
|
||||
AddressingMode addressing_mode() const {
|
||||
return static_cast<AddressingMode>(address_mode_);
|
||||
}
|
||||
ControlFlowOpcode opcode() const { return opcode_; }
|
||||
AddressingMode addressing_mode() const { return address_mode_; }
|
||||
|
||||
private:
|
||||
// Word 0: (32 bits)
|
||||
|
@ -381,12 +353,8 @@ static_assert_size(ControlFlowReturnInstruction, 8);
|
|||
|
||||
// Instruction data for ControlFlowOpcode::kCondJmp.
|
||||
struct ControlFlowCondJmpInstruction {
|
||||
ControlFlowOpcode opcode() const {
|
||||
return static_cast<ControlFlowOpcode>(opcode_);
|
||||
}
|
||||
AddressingMode addressing_mode() const {
|
||||
return static_cast<AddressingMode>(address_mode_);
|
||||
}
|
||||
ControlFlowOpcode opcode() const { return opcode_; }
|
||||
AddressingMode addressing_mode() const { return address_mode_; }
|
||||
// Target address.
|
||||
uint32_t address() const { return address_; }
|
||||
// Unconditional jump - ignores condition/predication.
|
||||
|
@ -410,20 +378,18 @@ struct ControlFlowCondJmpInstruction {
|
|||
uint32_t direction_ : 1;
|
||||
uint32_t bool_address_ : 8;
|
||||
uint32_t condition_ : 1;
|
||||
uint32_t address_mode_ : 1;
|
||||
uint32_t opcode_ : 4;
|
||||
AddressingMode address_mode_ : 1;
|
||||
ControlFlowOpcode opcode_ : 4;
|
||||
};
|
||||
static_assert_size(ControlFlowCondJmpInstruction, 8);
|
||||
|
||||
// Instruction data for ControlFlowOpcode::kAlloc.
|
||||
struct ControlFlowAllocInstruction {
|
||||
ControlFlowOpcode opcode() const {
|
||||
return static_cast<ControlFlowOpcode>(opcode_);
|
||||
}
|
||||
ControlFlowOpcode opcode() const { return opcode_; }
|
||||
// The total number of the given type allocated by this instruction.
|
||||
uint32_t size() const { return size_; }
|
||||
// Unconditional jump - ignores condition/predication.
|
||||
AllocType alloc_type() const { return static_cast<AllocType>(alloc_type_); }
|
||||
AllocType alloc_type() const { return alloc_type_; }
|
||||
|
||||
private:
|
||||
// Word 0: (32 bits)
|
||||
|
@ -433,16 +399,14 @@ struct ControlFlowAllocInstruction {
|
|||
// Word 1: (16 bits)
|
||||
uint32_t : 8;
|
||||
uint32_t is_unserialized_ : 1;
|
||||
uint32_t alloc_type_ : 2;
|
||||
AllocType alloc_type_ : 2;
|
||||
uint32_t : 1;
|
||||
uint32_t opcode_ : 4;
|
||||
ControlFlowOpcode opcode_ : 4;
|
||||
};
|
||||
static_assert_size(ControlFlowAllocInstruction, 8);
|
||||
|
||||
XEPACKEDUNION(ControlFlowInstruction, {
|
||||
ControlFlowOpcode opcode() const {
|
||||
return static_cast<ControlFlowOpcode>(opcode_value);
|
||||
}
|
||||
ControlFlowOpcode opcode() const { return opcode_value; }
|
||||
|
||||
ControlFlowExecInstruction exec; // kExec*
|
||||
ControlFlowCondExecInstruction cond_exec; // kCondExec*
|
||||
|
@ -457,7 +421,7 @@ XEPACKEDUNION(ControlFlowInstruction, {
|
|||
XEPACKEDSTRUCTANONYMOUS({
|
||||
uint32_t unused_0 : 32;
|
||||
uint32_t unused_1 : 12;
|
||||
uint32_t opcode_value : 4;
|
||||
ControlFlowOpcode opcode_value : 4;
|
||||
});
|
||||
XEPACKEDSTRUCTANONYMOUS({
|
||||
uint32_t dword_0;
|
||||
|
@ -478,7 +442,7 @@ inline void UnpackControlFlowInstructions(const uint32_t* dwords,
|
|||
out_b->dword_1 = dword_2 >> 16;
|
||||
}
|
||||
|
||||
enum class FetchOpcode {
|
||||
enum class FetchOpcode : uint32_t {
|
||||
kVertexFetch = 0,
|
||||
kTextureFetch = 1,
|
||||
kGetTextureBorderColorFrac = 16,
|
||||
|
@ -492,9 +456,7 @@ enum class FetchOpcode {
|
|||
};
|
||||
|
||||
struct VertexFetchInstruction {
|
||||
FetchOpcode opcode() const {
|
||||
return static_cast<FetchOpcode>(data_.opcode_value);
|
||||
}
|
||||
FetchOpcode opcode() const { return data_.opcode_value; }
|
||||
|
||||
// Whether the jump is predicated (or conditional).
|
||||
bool is_predicated() const { return data_.is_predicated; }
|
||||
|
@ -538,13 +500,9 @@ struct VertexFetchInstruction {
|
|||
uint32_t prefetch_count() const { return data_.prefetch_count; }
|
||||
bool is_mini_fetch() const { return data_.is_mini_fetch == 1; }
|
||||
|
||||
VertexFormat data_format() const {
|
||||
return static_cast<VertexFormat>(data_.format);
|
||||
}
|
||||
VertexFormat data_format() const { return data_.format; }
|
||||
// [-32, 31]
|
||||
int exp_adjust() const {
|
||||
return ((static_cast<int>(data_.exp_adjust) << 26) >> 26);
|
||||
}
|
||||
int exp_adjust() const { return data_.exp_adjust; }
|
||||
bool is_signed() const { return data_.fomat_comp_all == 1; }
|
||||
bool is_normalized() const { return data_.num_format_all == 0; }
|
||||
bool is_index_rounded() const { return data_.is_index_rounded == 1; }
|
||||
|
@ -562,7 +520,7 @@ struct VertexFetchInstruction {
|
|||
private:
|
||||
XEPACKEDSTRUCT(Data, {
|
||||
XEPACKEDSTRUCTANONYMOUS({
|
||||
uint32_t opcode_value : 5;
|
||||
FetchOpcode opcode_value : 5;
|
||||
uint32_t src_reg : 6;
|
||||
uint32_t src_reg_am : 1;
|
||||
uint32_t dst_reg : 6;
|
||||
|
@ -579,9 +537,9 @@ struct VertexFetchInstruction {
|
|||
uint32_t num_format_all : 1;
|
||||
uint32_t signed_rf_mode_all : 1;
|
||||
uint32_t is_index_rounded : 1;
|
||||
uint32_t format : 6;
|
||||
VertexFormat format : 6;
|
||||
uint32_t reserved2 : 2;
|
||||
uint32_t exp_adjust : 6;
|
||||
int32_t exp_adjust : 6;
|
||||
uint32_t is_mini_fetch : 1;
|
||||
uint32_t is_predicated : 1;
|
||||
});
|
||||
|
@ -595,9 +553,7 @@ struct VertexFetchInstruction {
|
|||
};
|
||||
|
||||
struct TextureFetchInstruction {
|
||||
FetchOpcode opcode() const {
|
||||
return static_cast<FetchOpcode>(data_.opcode_value);
|
||||
}
|
||||
FetchOpcode opcode() const { return data_.opcode_value; }
|
||||
|
||||
// Whether the jump is predicated (or conditional).
|
||||
bool is_predicated() const { return data_.is_predicated; }
|
||||
|
@ -613,59 +569,49 @@ struct TextureFetchInstruction {
|
|||
uint32_t src_swizzle() const { return data_.src_swiz; }
|
||||
bool is_src_relative() const { return data_.src_reg_am; }
|
||||
|
||||
TextureDimension dimension() const {
|
||||
return static_cast<TextureDimension>(data_.dimension);
|
||||
}
|
||||
TextureDimension dimension() const { return data_.dimension; }
|
||||
bool fetch_valid_only() const { return data_.fetch_valid_only == 1; }
|
||||
bool unnormalized_coordinates() const { return data_.tx_coord_denorm == 1; }
|
||||
bool has_mag_filter() const { return data_.mag_filter != 0x3; }
|
||||
TextureFilter mag_filter() const {
|
||||
return static_cast<TextureFilter>(data_.mag_filter);
|
||||
bool has_mag_filter() const {
|
||||
return data_.mag_filter != TextureFilter::kUseFetchConst;
|
||||
}
|
||||
bool has_min_filter() const { return data_.min_filter != 0x3; }
|
||||
TextureFilter min_filter() const {
|
||||
return static_cast<TextureFilter>(data_.min_filter);
|
||||
TextureFilter mag_filter() const { return data_.mag_filter; }
|
||||
bool has_min_filter() const {
|
||||
return data_.min_filter != TextureFilter::kUseFetchConst;
|
||||
}
|
||||
bool has_mip_filter() const { return data_.mip_filter != 0x3; }
|
||||
TextureFilter mip_filter() const {
|
||||
return static_cast<TextureFilter>(data_.mip_filter);
|
||||
TextureFilter min_filter() const { return data_.min_filter; }
|
||||
bool has_mip_filter() const {
|
||||
return data_.mip_filter != TextureFilter::kUseFetchConst;
|
||||
}
|
||||
bool has_aniso_filter() const { return data_.aniso_filter != 0x7; }
|
||||
AnisoFilter aniso_filter() const {
|
||||
return static_cast<AnisoFilter>(data_.aniso_filter);
|
||||
TextureFilter mip_filter() const { return data_.mip_filter; }
|
||||
bool has_aniso_filter() const {
|
||||
return data_.aniso_filter != AnisoFilter::kUseFetchConst;
|
||||
}
|
||||
bool has_vol_mag_filter() const { return data_.vol_mag_filter != 0x3; }
|
||||
TextureFilter vol_mag_filter() const {
|
||||
return static_cast<TextureFilter>(data_.vol_mag_filter);
|
||||
AnisoFilter aniso_filter() const { return data_.aniso_filter; }
|
||||
bool has_vol_mag_filter() const {
|
||||
return data_.vol_mag_filter != TextureFilter::kUseFetchConst;
|
||||
}
|
||||
bool has_vol_min_filter() const { return data_.vol_min_filter != 0x3; }
|
||||
TextureFilter vol_min_filter() const {
|
||||
return static_cast<TextureFilter>(data_.vol_min_filter);
|
||||
TextureFilter vol_mag_filter() const { return data_.vol_mag_filter; }
|
||||
bool has_vol_min_filter() const {
|
||||
return data_.vol_min_filter != TextureFilter::kUseFetchConst;
|
||||
}
|
||||
TextureFilter vol_min_filter() const { return data_.vol_min_filter; }
|
||||
bool use_computed_lod() const { return data_.use_comp_lod == 1; }
|
||||
bool use_register_lod() const { return data_.use_reg_lod == 1; }
|
||||
bool use_register_gradients() const { return data_.use_reg_gradients == 1; }
|
||||
SampleLocation sample_location() const {
|
||||
return static_cast<SampleLocation>(data_.sample_location);
|
||||
}
|
||||
SampleLocation sample_location() const { return data_.sample_location; }
|
||||
float lod_bias() const {
|
||||
// http://web.archive.org/web/20090514012026/http://msdn.microsoft.com:80/en-us/library/bb313957.aspx
|
||||
return ((static_cast<int>(data_.lod_bias) << 25) >> 25) / 16.0f;
|
||||
}
|
||||
float offset_x() const {
|
||||
return ((static_cast<int>(data_.offset_x) << 27) >> 27) / 2.0f;
|
||||
}
|
||||
float offset_y() const {
|
||||
return ((static_cast<int>(data_.offset_y) << 27) >> 27) / 2.0f;
|
||||
}
|
||||
float offset_z() const {
|
||||
return ((static_cast<int>(data_.offset_z) << 27) >> 27) / 2.0f;
|
||||
return data_.lod_bias * (1.0f / 16.0f);
|
||||
}
|
||||
float offset_x() const { return data_.offset_x * 0.5f; }
|
||||
float offset_y() const { return data_.offset_y * 0.5f; }
|
||||
float offset_z() const { return data_.offset_z * 0.5f; }
|
||||
|
||||
private:
|
||||
XEPACKEDSTRUCT(Data, {
|
||||
XEPACKEDSTRUCTANONYMOUS({
|
||||
uint32_t opcode_value : 5;
|
||||
FetchOpcode opcode_value : 5;
|
||||
uint32_t src_reg : 6;
|
||||
uint32_t src_reg_am : 1;
|
||||
uint32_t dst_reg : 6;
|
||||
|
@ -676,14 +622,14 @@ struct TextureFetchInstruction {
|
|||
uint32_t src_swiz : 6; // xyz
|
||||
});
|
||||
XEPACKEDSTRUCTANONYMOUS({
|
||||
uint32_t dst_swiz : 12; // xyzw
|
||||
uint32_t mag_filter : 2; // instr_tex_filter_t
|
||||
uint32_t min_filter : 2; // instr_tex_filter_t
|
||||
uint32_t mip_filter : 2; // instr_tex_filter_t
|
||||
uint32_t aniso_filter : 3; // instr_aniso_filter_t
|
||||
uint32_t arbitrary_filter : 3; // instr_arbitrary_filter_t
|
||||
uint32_t vol_mag_filter : 2; // instr_tex_filter_t
|
||||
uint32_t vol_min_filter : 2; // instr_tex_filter_t
|
||||
uint32_t dst_swiz : 12; // xyzw
|
||||
TextureFilter mag_filter : 2;
|
||||
TextureFilter min_filter : 2;
|
||||
TextureFilter mip_filter : 2;
|
||||
AnisoFilter aniso_filter : 3;
|
||||
xenos::ArbitraryFilter arbitrary_filter : 3;
|
||||
TextureFilter vol_mag_filter : 2;
|
||||
TextureFilter vol_min_filter : 2;
|
||||
uint32_t use_comp_lod : 1;
|
||||
uint32_t use_reg_lod : 1;
|
||||
uint32_t unk : 1;
|
||||
|
@ -691,13 +637,13 @@ struct TextureFetchInstruction {
|
|||
});
|
||||
XEPACKEDSTRUCTANONYMOUS({
|
||||
uint32_t use_reg_gradients : 1;
|
||||
uint32_t sample_location : 1;
|
||||
uint32_t lod_bias : 7;
|
||||
SampleLocation sample_location : 1;
|
||||
int32_t lod_bias : 7;
|
||||
uint32_t unused : 5;
|
||||
uint32_t dimension : 2;
|
||||
uint32_t offset_x : 5;
|
||||
uint32_t offset_y : 5;
|
||||
uint32_t offset_z : 5;
|
||||
TextureDimension dimension : 2;
|
||||
int32_t offset_x : 5;
|
||||
int32_t offset_y : 5;
|
||||
int32_t offset_z : 5;
|
||||
uint32_t pred_condition : 1;
|
||||
});
|
||||
});
|
||||
|
@ -722,7 +668,7 @@ static_assert_size(TextureFetchInstruction, 12);
|
|||
// when write masks are disabled or the instruction that would write them
|
||||
// fails its predication check.
|
||||
|
||||
enum class AluScalarOpcode {
|
||||
enum class AluScalarOpcode : uint32_t {
|
||||
// Floating-Point Add
|
||||
// adds dest, src0.ab
|
||||
// dest.xyzw = src0.a + src0.b;
|
||||
|
@ -1049,7 +995,7 @@ enum class AluScalarOpcode {
|
|||
kRetainPrev = 50,
|
||||
};
|
||||
|
||||
enum class AluVectorOpcode {
|
||||
enum class AluVectorOpcode : uint32_t {
|
||||
// Per-Component Floating-Point Add
|
||||
// add dest, src0, src1
|
||||
// dest.x = src0.x + src1.x;
|
||||
|
@ -1373,9 +1319,7 @@ struct AluInstruction {
|
|||
return vector_write_mask() || is_export() ||
|
||||
AluVectorOpcodeHasSideEffects(vector_opcode());
|
||||
}
|
||||
AluVectorOpcode vector_opcode() const {
|
||||
return static_cast<AluVectorOpcode>(data_.vector_opc);
|
||||
}
|
||||
AluVectorOpcode vector_opcode() const { return data_.vector_opc; }
|
||||
uint32_t vector_write_mask() const { return data_.vector_write_mask; }
|
||||
uint32_t vector_dest() const { return data_.vector_dest; }
|
||||
bool is_vector_dest_relative() const { return data_.vector_dest_rel == 1; }
|
||||
|
@ -1385,9 +1329,7 @@ struct AluInstruction {
|
|||
return scalar_opcode() != AluScalarOpcode::kRetainPrev ||
|
||||
(!is_export() && scalar_write_mask() != 0);
|
||||
}
|
||||
AluScalarOpcode scalar_opcode() const {
|
||||
return static_cast<AluScalarOpcode>(data_.scalar_opc);
|
||||
}
|
||||
AluScalarOpcode scalar_opcode() const { return data_.scalar_opc; }
|
||||
uint32_t scalar_write_mask() const { return data_.scalar_write_mask; }
|
||||
uint32_t scalar_dest() const { return data_.scalar_dest; }
|
||||
bool is_scalar_dest_relative() const { return data_.scalar_dest_rel == 1; }
|
||||
|
@ -1459,7 +1401,7 @@ struct AluInstruction {
|
|||
uint32_t scalar_write_mask : 4;
|
||||
uint32_t vector_clamp : 1;
|
||||
uint32_t scalar_clamp : 1;
|
||||
uint32_t scalar_opc : 6; // instr_scalar_opc_t
|
||||
AluScalarOpcode scalar_opc : 6;
|
||||
});
|
||||
XEPACKEDSTRUCTANONYMOUS({
|
||||
uint32_t src3_swiz : 8;
|
||||
|
@ -1478,7 +1420,7 @@ struct AluInstruction {
|
|||
uint32_t src3_reg : 8;
|
||||
uint32_t src2_reg : 8;
|
||||
uint32_t src1_reg : 8;
|
||||
uint32_t vector_opc : 5; // instr_vector_opc_t
|
||||
AluVectorOpcode vector_opc : 5;
|
||||
uint32_t src3_sel : 1;
|
||||
uint32_t src2_sel : 1;
|
||||
uint32_t src1_sel : 1;
|
||||
|
|
|
@ -654,9 +654,8 @@ VkDescriptorSet BufferCache::PrepareVertexSet(
|
|||
// trace_writer_.WriteMemoryRead(physical_address, source_length);
|
||||
|
||||
// Upload (or get a cached copy of) the buffer.
|
||||
auto buffer_ref =
|
||||
UploadVertexBuffer(command_buffer, physical_address, source_length,
|
||||
static_cast<Endian>(fetch->endian), fence);
|
||||
auto buffer_ref = UploadVertexBuffer(command_buffer, physical_address,
|
||||
source_length, fetch->endian, fence);
|
||||
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
||||
// Failed to upload buffer.
|
||||
XELOGW("Failed to upload vertex buffer!");
|
||||
|
|
|
@ -364,7 +364,7 @@ VkPipeline PipelineCache::GetPipeline(const RenderState* render_state,
|
|||
}
|
||||
|
||||
bool PipelineCache::TranslateShader(VulkanShader* shader,
|
||||
xenos::xe_gpu_program_cntl_t cntl) {
|
||||
reg::SQ_PROGRAM_CNTL cntl) {
|
||||
// Perform translation.
|
||||
// If this fails the shader will be marked as invalid and ignored later.
|
||||
if (!shader_translator_->Translate(shader, PrimitiveType::kNone, cntl)) {
|
||||
|
@ -808,44 +808,33 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
|
|||
}
|
||||
|
||||
bool push_constants_dirty = full_update || viewport_state_dirty;
|
||||
push_constants_dirty |=
|
||||
SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
|
||||
push_constants_dirty |= SetShadowRegister(®s.sq_program_cntl.value,
|
||||
XE_GPU_REG_SQ_PROGRAM_CNTL);
|
||||
push_constants_dirty |=
|
||||
SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC);
|
||||
push_constants_dirty |=
|
||||
SetShadowRegister(®s.rb_colorcontrol, XE_GPU_REG_RB_COLORCONTROL);
|
||||
push_constants_dirty |=
|
||||
SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO);
|
||||
SetShadowRegister(®s.rb_color_info.value, XE_GPU_REG_RB_COLOR_INFO);
|
||||
push_constants_dirty |=
|
||||
SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO);
|
||||
SetShadowRegister(®s.rb_color1_info.value, XE_GPU_REG_RB_COLOR1_INFO);
|
||||
push_constants_dirty |=
|
||||
SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO);
|
||||
SetShadowRegister(®s.rb_color2_info.value, XE_GPU_REG_RB_COLOR2_INFO);
|
||||
push_constants_dirty |=
|
||||
SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO);
|
||||
SetShadowRegister(®s.rb_color3_info.value, XE_GPU_REG_RB_COLOR3_INFO);
|
||||
push_constants_dirty |=
|
||||
SetShadowRegister(®s.rb_alpha_ref, XE_GPU_REG_RB_ALPHA_REF);
|
||||
push_constants_dirty |=
|
||||
SetShadowRegister(®s.pa_su_point_size, XE_GPU_REG_PA_SU_POINT_SIZE);
|
||||
if (push_constants_dirty) {
|
||||
xenos::xe_gpu_program_cntl_t program_cntl;
|
||||
program_cntl.dword_0 = regs.sq_program_cntl;
|
||||
|
||||
// Normal vertex shaders only, for now.
|
||||
// TODO(benvanik): transform feedback/memexport.
|
||||
// https://github.com/freedreno/freedreno/blob/master/includes/a2xx.xml.h
|
||||
// Draw calls skipped if they have unsupported export modes.
|
||||
// 0 = positionOnly
|
||||
// 1 = unused
|
||||
// 2 = sprite
|
||||
// 3 = edge
|
||||
// 4 = kill
|
||||
// 5 = spriteKill
|
||||
// 6 = edgeKill
|
||||
// 7 = multipass
|
||||
assert_true(program_cntl.vs_export_mode == 0 ||
|
||||
program_cntl.vs_export_mode == 2 ||
|
||||
program_cntl.vs_export_mode == 7);
|
||||
assert_false(program_cntl.gen_index_vtx);
|
||||
assert_true(regs.sq_program_cntl.vs_export_mode ==
|
||||
xenos::VertexShaderExportMode::kPosition1Vector ||
|
||||
regs.sq_program_cntl.vs_export_mode ==
|
||||
xenos::VertexShaderExportMode::kPosition2VectorsSprite ||
|
||||
regs.sq_program_cntl.vs_export_mode ==
|
||||
xenos::VertexShaderExportMode::kMultipass);
|
||||
assert_false(regs.sq_program_cntl.gen_index_vtx);
|
||||
|
||||
SpirvPushConstants push_constants = {};
|
||||
|
||||
|
@ -909,7 +898,8 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
|
|||
|
||||
// Whether to populate a register in the pixel shader with frag coord.
|
||||
int ps_param_gen = (regs.sq_context_misc >> 8) & 0xFF;
|
||||
push_constants.ps_param_gen = program_cntl.param_gen ? ps_param_gen : -1;
|
||||
push_constants.ps_param_gen =
|
||||
regs.sq_program_cntl.param_gen ? ps_param_gen : -1;
|
||||
|
||||
vkCmdPushConstants(command_buffer, pipeline_layout_,
|
||||
VK_SHADER_STAGE_VERTEX_BIT |
|
||||
|
@ -1061,7 +1051,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
|
|||
bool dirty = false;
|
||||
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
||||
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
||||
dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
|
||||
dirty |= SetShadowRegister(®s.sq_program_cntl.value,
|
||||
XE_GPU_REG_SQ_PROGRAM_CNTL);
|
||||
dirty |= regs.vertex_shader != vertex_shader;
|
||||
dirty |= regs.pixel_shader != pixel_shader;
|
||||
dirty |= regs.primitive_type != primitive_type;
|
||||
|
@ -1073,17 +1064,14 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
|
|||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
xenos::xe_gpu_program_cntl_t sq_program_cntl;
|
||||
sq_program_cntl.dword_0 = regs.sq_program_cntl;
|
||||
|
||||
if (!vertex_shader->is_translated() &&
|
||||
!TranslateShader(vertex_shader, sq_program_cntl)) {
|
||||
!TranslateShader(vertex_shader, regs.sq_program_cntl)) {
|
||||
XELOGE("Failed to translate the vertex shader!");
|
||||
return UpdateStatus::kError;
|
||||
}
|
||||
|
||||
if (pixel_shader && !pixel_shader->is_translated() &&
|
||||
!TranslateShader(pixel_shader, sq_program_cntl)) {
|
||||
!TranslateShader(pixel_shader, regs.sq_program_cntl)) {
|
||||
XELOGE("Failed to translate the pixel shader!");
|
||||
return UpdateStatus::kError;
|
||||
}
|
||||
|
@ -1513,16 +1501,15 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() {
|
|||
auto& state_info = update_color_blend_state_info_;
|
||||
|
||||
bool dirty = false;
|
||||
dirty |= SetShadowRegister(®s.rb_colorcontrol, XE_GPU_REG_RB_COLORCONTROL);
|
||||
dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK);
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0);
|
||||
SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL0);
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1);
|
||||
SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL1);
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2);
|
||||
SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL2);
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3);
|
||||
SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL3);
|
||||
dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
|
||||
XXH64_update(&hash_state_, ®s, sizeof(regs));
|
||||
if (!dirty) {
|
||||
|
@ -1568,7 +1555,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() {
|
|||
for (int i = 0; i < 4; ++i) {
|
||||
uint32_t blend_control = regs.rb_blendcontrol[i];
|
||||
auto& attachment_state = attachment_states[i];
|
||||
attachment_state.blendEnable = !(regs.rb_colorcontrol & 0x20);
|
||||
attachment_state.blendEnable = (blend_control & 0x1FFF1FFF) != 0x00010001;
|
||||
// A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND
|
||||
attachment_state.srcColorBlendFactor =
|
||||
kBlendFactorMap[(blend_control & 0x0000001F) >> 0];
|
||||
|
|
|
@ -79,7 +79,7 @@ class PipelineCache {
|
|||
// state.
|
||||
VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key);
|
||||
|
||||
bool TranslateShader(VulkanShader* shader, xenos::xe_gpu_program_cntl_t cntl);
|
||||
bool TranslateShader(VulkanShader* shader, reg::SQ_PROGRAM_CNTL cntl);
|
||||
|
||||
void DumpShaderDisasmAMD(VkPipeline pipeline);
|
||||
void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info);
|
||||
|
@ -170,7 +170,7 @@ class PipelineCache {
|
|||
struct UpdateShaderStagesRegisters {
|
||||
PrimitiveType primitive_type;
|
||||
uint32_t pa_su_sc_mode_cntl;
|
||||
uint32_t sq_program_cntl;
|
||||
reg::SQ_PROGRAM_CNTL sq_program_cntl;
|
||||
VulkanShader* vertex_shader;
|
||||
VulkanShader* pixel_shader;
|
||||
|
||||
|
@ -256,7 +256,6 @@ class PipelineCache {
|
|||
VkPipelineDepthStencilStateCreateInfo update_depth_stencil_state_info_;
|
||||
|
||||
struct UpdateColorBlendStateRegisters {
|
||||
uint32_t rb_colorcontrol;
|
||||
uint32_t rb_color_mask;
|
||||
uint32_t rb_blendcontrol[4];
|
||||
uint32_t rb_modecontrol;
|
||||
|
@ -290,13 +289,13 @@ class PipelineCache {
|
|||
float rb_blend_rgba[4];
|
||||
uint32_t rb_stencilrefmask;
|
||||
|
||||
uint32_t sq_program_cntl;
|
||||
reg::SQ_PROGRAM_CNTL sq_program_cntl;
|
||||
uint32_t sq_context_misc;
|
||||
uint32_t rb_colorcontrol;
|
||||
uint32_t rb_color_info;
|
||||
uint32_t rb_color1_info;
|
||||
uint32_t rb_color2_info;
|
||||
uint32_t rb_color3_info;
|
||||
reg::RB_COLOR_INFO rb_color_info;
|
||||
reg::RB_COLOR_INFO rb_color1_info;
|
||||
reg::RB_COLOR_INFO rb_color2_info;
|
||||
reg::RB_COLOR_INFO rb_color3_info;
|
||||
float rb_alpha_ref;
|
||||
uint32_t pa_su_point_size;
|
||||
|
||||
|
|
|
@ -962,7 +962,7 @@ bool VulkanCommandProcessor::IssueCopy() {
|
|||
break;
|
||||
}
|
||||
assert_true(fetch->type == 3);
|
||||
assert_true(fetch->endian == 2);
|
||||
assert_true(fetch->endian == Endian::k8in32);
|
||||
assert_true(fetch->size == 6);
|
||||
const uint8_t* vertex_addr = memory_->TranslatePhysical(fetch->address << 2);
|
||||
trace_writer_.WriteMemoryRead(fetch->address << 2, fetch->size * 4);
|
||||
|
@ -974,7 +974,7 @@ bool VulkanCommandProcessor::IssueCopy() {
|
|||
float dest_points[6];
|
||||
for (int i = 0; i < 6; i++) {
|
||||
dest_points[i] =
|
||||
GpuSwap(xe::load<float>(vertex_addr + i * 4), Endian(fetch->endian)) +
|
||||
GpuSwap(xe::load<float>(vertex_addr + i * 4), fetch->endian) +
|
||||
vtx_offset;
|
||||
}
|
||||
|
||||
|
@ -1000,10 +1000,10 @@ bool VulkanCommandProcessor::IssueCopy() {
|
|||
if (is_color_source) {
|
||||
// Source from a color target.
|
||||
reg::RB_COLOR_INFO color_info[4] = {
|
||||
regs[XE_GPU_REG_RB_COLOR_INFO].u32,
|
||||
regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
|
||||
regs[XE_GPU_REG_RB_COLOR2_INFO].u32,
|
||||
regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
|
||||
regs.Get<reg::RB_COLOR_INFO>(),
|
||||
regs.Get<reg::RB_COLOR_INFO>(XE_GPU_REG_RB_COLOR1_INFO),
|
||||
regs.Get<reg::RB_COLOR_INFO>(XE_GPU_REG_RB_COLOR2_INFO),
|
||||
regs.Get<reg::RB_COLOR_INFO>(XE_GPU_REG_RB_COLOR3_INFO),
|
||||
};
|
||||
color_edram_base = color_info[copy_src_select].color_base;
|
||||
color_format = color_info[copy_src_select].color_format;
|
||||
|
@ -1023,7 +1023,7 @@ bool VulkanCommandProcessor::IssueCopy() {
|
|||
Endian resolve_endian = Endian::k8in32;
|
||||
if (copy_regs->copy_dest_info.copy_dest_endian <= Endian128::k16in32) {
|
||||
resolve_endian =
|
||||
static_cast<Endian>(copy_regs->copy_dest_info.copy_dest_endian.value());
|
||||
static_cast<Endian>(copy_regs->copy_dest_info.copy_dest_endian);
|
||||
}
|
||||
|
||||
// Demand a resolve texture from the texture cache.
|
||||
|
@ -1289,7 +1289,7 @@ bool VulkanCommandProcessor::IssueCopy() {
|
|||
// Perform any requested clears.
|
||||
uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32;
|
||||
uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32;
|
||||
uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32;
|
||||
uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32;
|
||||
assert_true(copy_color_clear == copy_color_clear_low);
|
||||
|
||||
if (color_clear_enabled) {
|
||||
|
|
|
@ -80,12 +80,6 @@ inline bool IsPrimitiveTwoFaced(bool tessellated, PrimitiveType type) {
|
|||
return false;
|
||||
}
|
||||
|
||||
enum class TessellationMode : uint32_t {
|
||||
kDiscrete = 0,
|
||||
kContinuous = 1,
|
||||
kAdaptive = 2,
|
||||
};
|
||||
|
||||
enum class Dimension : uint32_t {
|
||||
k1D = 0,
|
||||
k2D = 1,
|
||||
|
@ -167,14 +161,14 @@ enum class SampleLocation : uint32_t {
|
|||
};
|
||||
|
||||
enum class Endian : uint32_t {
|
||||
kUnspecified = 0,
|
||||
kNone = 0,
|
||||
k8in16 = 1,
|
||||
k8in32 = 2,
|
||||
k16in32 = 3,
|
||||
};
|
||||
|
||||
enum class Endian128 : uint32_t {
|
||||
kUnspecified = 0,
|
||||
kNone = 0,
|
||||
k8in16 = 1,
|
||||
k8in32 = 2,
|
||||
k16in32 = 3,
|
||||
|
@ -231,6 +225,77 @@ enum class DepthRenderTargetFormat : uint32_t {
|
|||
kD24FS8 = 1,
|
||||
};
|
||||
|
||||
// a2xx_sq_surfaceformat +
|
||||
// https://github.com/indirivacua/RAGE-Console-Texture-Editor/blob/master/Console.Xbox360.Graphics.pas
|
||||
enum class TextureFormat : uint32_t {
|
||||
k_1_REVERSE = 0,
|
||||
k_1 = 1,
|
||||
k_8 = 2,
|
||||
k_1_5_5_5 = 3,
|
||||
k_5_6_5 = 4,
|
||||
k_6_5_5 = 5,
|
||||
k_8_8_8_8 = 6,
|
||||
k_2_10_10_10 = 7,
|
||||
k_8_A = 8,
|
||||
k_8_B = 9,
|
||||
k_8_8 = 10,
|
||||
k_Cr_Y1_Cb_Y0_REP = 11,
|
||||
k_Y1_Cr_Y0_Cb_REP = 12,
|
||||
k_16_16_EDRAM = 13,
|
||||
k_8_8_8_8_A = 14,
|
||||
k_4_4_4_4 = 15,
|
||||
k_10_11_11 = 16,
|
||||
k_11_11_10 = 17,
|
||||
k_DXT1 = 18,
|
||||
k_DXT2_3 = 19,
|
||||
k_DXT4_5 = 20,
|
||||
k_16_16_16_16_EDRAM = 21,
|
||||
k_24_8 = 22,
|
||||
k_24_8_FLOAT = 23,
|
||||
k_16 = 24,
|
||||
k_16_16 = 25,
|
||||
k_16_16_16_16 = 26,
|
||||
k_16_EXPAND = 27,
|
||||
k_16_16_EXPAND = 28,
|
||||
k_16_16_16_16_EXPAND = 29,
|
||||
k_16_FLOAT = 30,
|
||||
k_16_16_FLOAT = 31,
|
||||
k_16_16_16_16_FLOAT = 32,
|
||||
k_32 = 33,
|
||||
k_32_32 = 34,
|
||||
k_32_32_32_32 = 35,
|
||||
k_32_FLOAT = 36,
|
||||
k_32_32_FLOAT = 37,
|
||||
k_32_32_32_32_FLOAT = 38,
|
||||
k_32_AS_8 = 39,
|
||||
k_32_AS_8_8 = 40,
|
||||
k_16_MPEG = 41,
|
||||
k_16_16_MPEG = 42,
|
||||
k_8_INTERLACED = 43,
|
||||
k_32_AS_8_INTERLACED = 44,
|
||||
k_32_AS_8_8_INTERLACED = 45,
|
||||
k_16_INTERLACED = 46,
|
||||
k_16_MPEG_INTERLACED = 47,
|
||||
k_16_16_MPEG_INTERLACED = 48,
|
||||
k_DXN = 49,
|
||||
k_8_8_8_8_AS_16_16_16_16 = 50,
|
||||
k_DXT1_AS_16_16_16_16 = 51,
|
||||
k_DXT2_3_AS_16_16_16_16 = 52,
|
||||
k_DXT4_5_AS_16_16_16_16 = 53,
|
||||
k_2_10_10_10_AS_16_16_16_16 = 54,
|
||||
k_10_11_11_AS_16_16_16_16 = 55,
|
||||
k_11_11_10_AS_16_16_16_16 = 56,
|
||||
k_32_32_32_FLOAT = 57,
|
||||
k_DXT3A = 58,
|
||||
k_DXT5A = 59,
|
||||
k_CTX1 = 60,
|
||||
k_DXT3A_AS_1_1_1_1 = 61,
|
||||
k_8_8_8_8_GAMMA_EDRAM = 62,
|
||||
k_2_10_10_10_FLOAT_EDRAM = 63,
|
||||
|
||||
kUnknown = 0xFFFFFFFFu,
|
||||
};
|
||||
|
||||
// Subset of a2xx_sq_surfaceformat - formats that RTs can be resolved to.
|
||||
enum class ColorFormat : uint32_t {
|
||||
k_8 = 2,
|
||||
|
@ -334,6 +399,28 @@ inline int GetVertexFormatSizeInWords(VertexFormat format) {
|
|||
}
|
||||
}
|
||||
|
||||
enum class CompareFunction : uint32_t {
|
||||
kNever = 0b000,
|
||||
kLess = 0b001,
|
||||
kEqual = 0b010,
|
||||
kLessEqual = 0b011,
|
||||
kGreater = 0b100,
|
||||
kNotEqual = 0b101,
|
||||
kGreaterEqual = 0b110,
|
||||
kAlways = 0b111,
|
||||
};
|
||||
|
||||
enum class StencilOp : uint32_t {
|
||||
kKeep = 0,
|
||||
kZero = 1,
|
||||
kReplace = 2,
|
||||
kIncrementClamp = 3,
|
||||
kDecrementClamp = 4,
|
||||
kInvert = 5,
|
||||
kIncrementWrap = 6,
|
||||
kDecrementWrap = 7,
|
||||
};
|
||||
|
||||
// adreno_rb_blend_factor
|
||||
enum class BlendFactor : uint32_t {
|
||||
kZero = 0,
|
||||
|
@ -351,11 +438,7 @@ enum class BlendFactor : uint32_t {
|
|||
kConstantAlpha = 14,
|
||||
kOneMinusConstantAlpha = 15,
|
||||
kSrcAlphaSaturate = 16,
|
||||
// SRC1 likely not used on the Xbox 360 - only available in Direct3D 9Ex.
|
||||
kSrc1Color = 20,
|
||||
kOneMinusSrc1Color = 21,
|
||||
kSrc1Alpha = 22,
|
||||
kOneMinusSrc1Alpha = 23,
|
||||
// SRC1 added on Adreno.
|
||||
};
|
||||
|
||||
enum class BlendOp : uint32_t {
|
||||
|
@ -375,6 +458,57 @@ typedef enum {
|
|||
XE_GPU_INVALIDATE_MASK_ALL = 0x7FFF,
|
||||
} XE_GPU_INVALIDATE_MASK;
|
||||
|
||||
// instr_arbitrary_filter_t
|
||||
enum class ArbitraryFilter : uint32_t {
|
||||
k2x4Sym = 0,
|
||||
k2x4Asym = 1,
|
||||
k4x2Sym = 2,
|
||||
k4x2Asym = 3,
|
||||
k4x4Sym = 4,
|
||||
k4x4Asym = 5,
|
||||
kUseFetchConst = 7,
|
||||
};
|
||||
|
||||
// a2xx_sq_ps_vtx_mode
|
||||
enum class VertexShaderExportMode : uint32_t {
|
||||
kPosition1Vector = 0,
|
||||
kPosition2VectorsSprite = 2,
|
||||
kPosition2VectorsEdge = 3,
|
||||
kPosition2VectorsKill = 4,
|
||||
kPosition2VectorsSpriteKill = 5,
|
||||
kPosition2VectorsEdgeKill = 6,
|
||||
kMultipass = 7,
|
||||
};
|
||||
|
||||
enum class SampleControl : uint32_t {
|
||||
kCentroidsOnly = 0,
|
||||
kCentersOnly = 1,
|
||||
kCentroidsAndCenters = 2,
|
||||
};
|
||||
|
||||
enum class VGTOutputPath : uint32_t {
|
||||
kVertexReuse = 0,
|
||||
kTessellationEnable = 1,
|
||||
kPassthru = 2,
|
||||
};
|
||||
|
||||
enum class TessellationMode : uint32_t {
|
||||
kDiscrete = 0,
|
||||
kContinuous = 1,
|
||||
kAdaptive = 2,
|
||||
};
|
||||
|
||||
enum class PolygonModeEnable : uint32_t {
|
||||
kDisabled = 0, // Render triangles.
|
||||
kDualMode = 1, // Send 2 sets of 3 polygons with the specified polygon type.
|
||||
};
|
||||
|
||||
enum class PolygonType : uint32_t {
|
||||
kPoints = 0,
|
||||
kLines = 1,
|
||||
kTriangles = 2,
|
||||
};
|
||||
|
||||
enum class ModeControl : uint32_t {
|
||||
kIgnore = 0,
|
||||
kColorDepth = 4,
|
||||
|
@ -426,7 +560,7 @@ typedef enum {
|
|||
|
||||
inline uint16_t GpuSwap(uint16_t value, Endian endianness) {
|
||||
switch (endianness) {
|
||||
case Endian::kUnspecified:
|
||||
case Endian::kNone:
|
||||
// No swap.
|
||||
return value;
|
||||
case Endian::k8in16:
|
||||
|
@ -441,7 +575,7 @@ inline uint16_t GpuSwap(uint16_t value, Endian endianness) {
|
|||
inline uint32_t GpuSwap(uint32_t value, Endian endianness) {
|
||||
switch (endianness) {
|
||||
default:
|
||||
case Endian::kUnspecified:
|
||||
case Endian::kNone:
|
||||
// No swap.
|
||||
return value;
|
||||
case Endian::k8in16:
|
||||
|
@ -471,35 +605,15 @@ inline uint32_t GpuToCpu(uint32_t p) { return p; }
|
|||
|
||||
inline uint32_t CpuToGpu(uint32_t p) { return p & 0x1FFFFFFF; }
|
||||
|
||||
// XE_GPU_REG_SQ_PROGRAM_CNTL
|
||||
typedef union {
|
||||
XEPACKEDSTRUCTANONYMOUS({
|
||||
uint32_t vs_regs : 6;
|
||||
uint32_t unk_0 : 2;
|
||||
uint32_t ps_regs : 6;
|
||||
uint32_t unk_1 : 2;
|
||||
uint32_t vs_resource : 1;
|
||||
uint32_t ps_resource : 1;
|
||||
uint32_t param_gen : 1;
|
||||
uint32_t gen_index_pix : 1;
|
||||
uint32_t vs_export_count : 4;
|
||||
uint32_t vs_export_mode : 3;
|
||||
uint32_t ps_export_depth : 1;
|
||||
uint32_t ps_export_count : 3;
|
||||
uint32_t gen_index_vtx : 1;
|
||||
});
|
||||
XEPACKEDSTRUCTANONYMOUS({ uint32_t dword_0; });
|
||||
} xe_gpu_program_cntl_t;
|
||||
|
||||
// XE_GPU_REG_SHADER_CONSTANT_FETCH_*
|
||||
XEPACKEDUNION(xe_gpu_vertex_fetch_t, {
|
||||
XEPACKEDSTRUCTANONYMOUS({
|
||||
uint32_t type : 2; // +0
|
||||
uint32_t address : 30; // +2
|
||||
uint32_t address : 30; // +2 address in dwords
|
||||
|
||||
uint32_t endian : 2; // +0
|
||||
uint32_t size : 24; // +2 size in words
|
||||
uint32_t unk1 : 6; // +26
|
||||
Endian endian : 2; // +0
|
||||
uint32_t size : 24; // +2 size in words
|
||||
uint32_t unk1 : 6; // +26
|
||||
});
|
||||
XEPACKEDSTRUCTANONYMOUS({
|
||||
uint32_t dword_0;
|
||||
|
@ -510,34 +624,36 @@ XEPACKEDUNION(xe_gpu_vertex_fetch_t, {
|
|||
// XE_GPU_REG_SHADER_CONSTANT_FETCH_*
|
||||
XEPACKEDUNION(xe_gpu_texture_fetch_t, {
|
||||
XEPACKEDSTRUCTANONYMOUS({
|
||||
uint32_t type : 2; // +0 dword_0
|
||||
uint32_t sign_x : 2; // +2
|
||||
uint32_t sign_y : 2; // +4
|
||||
uint32_t sign_z : 2; // +6
|
||||
uint32_t sign_w : 2; // +8
|
||||
uint32_t clamp_x : 3; // +10
|
||||
uint32_t clamp_y : 3; // +13
|
||||
uint32_t clamp_z : 3; // +16
|
||||
uint32_t unused_0 : 3; // +19
|
||||
uint32_t pitch : 9; // +22 byte_pitch >> 5
|
||||
uint32_t tiled : 1; // +31
|
||||
uint32_t type : 2; // +0 dword_0
|
||||
TextureSign sign_x : 2; // +2
|
||||
TextureSign sign_y : 2; // +4
|
||||
TextureSign sign_z : 2; // +6
|
||||
TextureSign sign_w : 2; // +8
|
||||
ClampMode clamp_x : 3; // +10
|
||||
ClampMode clamp_y : 3; // +13
|
||||
ClampMode clamp_z : 3; // +16
|
||||
uint32_t signed_rf_mode_all : 1; // +19
|
||||
// TODO(Triang3l): 1 or 2 dim_tbd bits?
|
||||
uint32_t unk_0 : 2; // +20
|
||||
uint32_t pitch : 9; // +22 byte_pitch >> 5
|
||||
uint32_t tiled : 1; // +31
|
||||
|
||||
uint32_t format : 6; // +0 dword_1
|
||||
uint32_t endianness : 2; // +6
|
||||
uint32_t request_size : 2; // +8
|
||||
uint32_t stacked : 1; // +10
|
||||
uint32_t clamp_policy : 1; // +11 d3d/opengl
|
||||
uint32_t base_address : 20; // +12
|
||||
TextureFormat format : 6; // +0 dword_1
|
||||
Endian endianness : 2; // +6
|
||||
uint32_t request_size : 2; // +8
|
||||
uint32_t stacked : 1; // +10
|
||||
uint32_t nearest_clamp_policy : 1; // +11 d3d/opengl
|
||||
uint32_t base_address : 20; // +12 base address >> 12
|
||||
|
||||
union { // dword_2
|
||||
struct {
|
||||
uint32_t width : 24;
|
||||
uint32_t unused : 8;
|
||||
uint32_t : 8;
|
||||
} size_1d;
|
||||
struct {
|
||||
uint32_t width : 13;
|
||||
uint32_t height : 13;
|
||||
uint32_t unused : 6;
|
||||
uint32_t : 6;
|
||||
} size_2d;
|
||||
struct {
|
||||
uint32_t width : 13;
|
||||
|
@ -551,15 +667,16 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, {
|
|||
} size_3d;
|
||||
};
|
||||
|
||||
uint32_t num_format : 1; // +0 dword_3 frac/int
|
||||
uint32_t swizzle : 12; // +1 xyzw, 3b each (XE_GPU_SWIZZLE)
|
||||
int32_t exp_adjust : 6; // +13
|
||||
uint32_t mag_filter : 2; // +19
|
||||
uint32_t min_filter : 2; // +21
|
||||
uint32_t mip_filter : 2; // +23
|
||||
uint32_t aniso_filter : 3; // +25
|
||||
uint32_t unused_3 : 3; // +28
|
||||
uint32_t border_size : 1; // +31
|
||||
uint32_t num_format : 1; // +0 dword_3 frac/int
|
||||
// xyzw, 3b each (XE_GPU_SWIZZLE)
|
||||
uint32_t swizzle : 12; // +1
|
||||
int32_t exp_adjust : 6; // +13
|
||||
TextureFilter mag_filter : 2; // +19
|
||||
TextureFilter min_filter : 2; // +21
|
||||
TextureFilter mip_filter : 2; // +23
|
||||
AnisoFilter aniso_filter : 3; // +25
|
||||
xenos::ArbitraryFilter arbitrary_filter : 3; // +28
|
||||
uint32_t border_size : 1; // +31
|
||||
|
||||
uint32_t vol_mag_filter : 1; // +0 dword_4
|
||||
uint32_t vol_min_filter : 1; // +1
|
||||
|
@ -571,13 +688,13 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, {
|
|||
int32_t grad_exp_adjust_h : 5; // +22
|
||||
int32_t grad_exp_adjust_v : 5; // +27
|
||||
|
||||
uint32_t border_color : 2; // +0 dword_5
|
||||
uint32_t force_bcw_max : 1; // +2
|
||||
uint32_t tri_clamp : 2; // +3
|
||||
int32_t aniso_bias : 4; // +5
|
||||
uint32_t dimension : 2; // +9
|
||||
uint32_t packed_mips : 1; // +11
|
||||
uint32_t mip_address : 20; // +12
|
||||
BorderColor border_color : 2; // +0 dword_5
|
||||
uint32_t force_bc_w_to_max : 1; // +2
|
||||
uint32_t tri_clamp : 2; // +3
|
||||
int32_t aniso_bias : 4; // +5
|
||||
uint32_t dimension : 2; // +9
|
||||
uint32_t packed_mips : 1; // +11
|
||||
uint32_t mip_address : 20; // +12 mip address >> 12
|
||||
});
|
||||
XEPACKEDSTRUCTANONYMOUS({
|
||||
uint32_t dword_0;
|
||||
|
|
Loading…
Reference in New Issue