[GPU] Remove most hardcoded register/instruction layouts from common and D3D12 code

This commit is contained in:
Triang3l 2019-10-20 19:40:37 +03:00
parent f83269cf8c
commit a9ed73bdd1
24 changed files with 896 additions and 942 deletions

View File

@ -1,51 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2017 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_BASE_BIT_FIELD_H_
#define XENIA_BASE_BIT_FIELD_H_
#include <cstdint>
#include <cstdlib>
#include <type_traits>
namespace xe {
// Bitfield, where position starts at the LSB.
template <typename T, size_t position, size_t n_bits>
struct bf {
// For enum values, we strip them down to an underlying type.
typedef
typename std::conditional<std::is_enum<T>::value, std::underlying_type<T>,
std::remove_reference<T>>::type::type
value_type;
bf() = default;
inline operator T() const { return value(); }
inline T value() const {
auto value = (storage & mask()) >> position;
if (std::is_signed<value_type>::value) {
// If the value is signed, sign-extend it.
value_type sign_mask = value_type(1) << (n_bits - 1);
value = (sign_mask ^ value) - sign_mask;
}
return static_cast<T>(value);
}
inline value_type mask() const {
return ((value_type(1) << n_bits) - 1) << position;
}
value_type storage;
};
} // namespace xe
#endif // XENIA_BASE_BIT_FIELD_H_

View File

@ -350,20 +350,20 @@ void CommandProcessor::MakeCoherent() {
// https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454 // https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454
RegisterFile* regs = register_file_; RegisterFile* regs = register_file_;
auto status_host = regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32; auto& status_host = regs->Get<reg::COHER_STATUS_HOST>();
auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32; auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32;
auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32; auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32;
if (!(status_host & 0x80000000ul)) { if (!status_host.status) {
return; return;
} }
const char* action = "N/A"; const char* action = "N/A";
if ((status_host & 0x03000000) == 0x03000000) { if (status_host.vc_action_ena && status_host.tc_action_ena) {
action = "VC | TC"; action = "VC | TC";
} else if (status_host & 0x02000000) { } else if (status_host.tc_action_ena) {
action = "TC"; action = "TC";
} else if (status_host & 0x01000000) { } else if (status_host.vc_action_ena) {
action = "VC"; action = "VC";
} }
@ -372,8 +372,7 @@ void CommandProcessor::MakeCoherent() {
base_host + size_host, size_host, action); base_host + size_host, size_host, action);
// Mark coherent. // Mark coherent.
status_host &= ~0x80000000ul; status_host.status = 0;
regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32 = status_host;
} }
void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); } void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); }

View File

@ -151,7 +151,7 @@ class CommandProcessor {
protected: protected:
struct IndexBufferInfo { struct IndexBufferInfo {
IndexFormat format = IndexFormat::kInt16; IndexFormat format = IndexFormat::kInt16;
Endian endianness = Endian::kUnspecified; Endian endianness = Endian::kNone;
uint32_t count = 0; uint32_t count = 0;
uint32_t guest_base = 0; uint32_t guest_base = 0;
size_t length = 0; size_t length = 0;

View File

@ -1345,7 +1345,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
// Update system constants before uploading them. // Update system constants before uploading them.
UpdateSystemConstantValues( UpdateSystemConstantValues(
memexport_used, primitive_two_faced, line_loop_closing_index, memexport_used, primitive_two_faced, line_loop_closing_index,
indexed ? index_buffer_info->endianness : Endian::kUnspecified, indexed ? index_buffer_info->endianness : Endian::kNone,
adaptive_tessellation ? (index_buffer_info->guest_base & 0x1FFFFFFC) : 0, adaptive_tessellation ? (index_buffer_info->guest_base & 0x1FFFFFFC) : 0,
early_z, GetCurrentColorMask(pixel_shader), pipeline_render_targets); early_z, GetCurrentColorMask(pixel_shader), pipeline_render_targets);
@ -1975,7 +1975,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
// Stencil reference value. Per-face reference not supported by Direct3D 12, // Stencil reference value. Per-face reference not supported by Direct3D 12,
// choose the back face one only if drawing only back faces. // choose the back face one only if drawing only back faces.
uint32_t stencil_ref_mask_reg; Register stencil_ref_mask_reg;
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>(); auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
if (primitive_two_faced && if (primitive_two_faced &&
regs.Get<reg::RB_DEPTHCONTROL>().backface_enable && regs.Get<reg::RB_DEPTHCONTROL>().backface_enable &&
@ -2032,13 +2032,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
float rt_clamp[4][4]; float rt_clamp[4][4];
uint32_t rt_keep_masks[4][2]; uint32_t rt_keep_masks[4][2];
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
static const uint32_t kColorInfoRegs[] = { auto color_info = regs.Get<reg::RB_COLOR_INFO>(
XE_GPU_REG_RB_COLOR_INFO, reg::RB_COLOR_INFO::rt_register_indices[i]);
XE_GPU_REG_RB_COLOR1_INFO,
XE_GPU_REG_RB_COLOR2_INFO,
XE_GPU_REG_RB_COLOR3_INFO,
};
auto color_info = regs.Get<reg::RB_COLOR_INFO>(kColorInfoRegs[i]);
color_infos[i] = color_info; color_infos[i] = color_info;
if (IsROVUsedForEDRAM()) { if (IsROVUsedForEDRAM()) {
@ -2125,7 +2120,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
} }
// Alpha test. // Alpha test.
if (rb_colorcontrol.alpha_test_enable) { if (rb_colorcontrol.alpha_test_enable) {
flags |= uint32_t(rb_colorcontrol.alpha_func.value()) flags |= uint32_t(rb_colorcontrol.alpha_func)
<< DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift; << DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift;
} else { } else {
flags |= DxbcShaderTranslator::kSysFlag_AlphaPassIfLess | flags |= DxbcShaderTranslator::kSysFlag_AlphaPassIfLess |
@ -2149,7 +2144,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthFloat24; flags |= DxbcShaderTranslator::kSysFlag_ROVDepthFloat24;
} }
if (rb_depthcontrol.z_enable) { if (rb_depthcontrol.z_enable) {
flags |= uint32_t(rb_depthcontrol.zfunc.value()) flags |= uint32_t(rb_depthcontrol.zfunc)
<< DxbcShaderTranslator::kSysFlag_ROVDepthPassIfLess_Shift; << DxbcShaderTranslator::kSysFlag_ROVDepthPassIfLess_Shift;
if (rb_depthcontrol.z_write_enable) { if (rb_depthcontrol.z_write_enable) {
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthWrite; flags |= DxbcShaderTranslator::kSysFlag_ROVDepthWrite;
@ -2350,7 +2345,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// EDRAM pitch for ROV writing. // EDRAM pitch for ROV writing.
if (IsROVUsedForEDRAM()) { if (IsROVUsedForEDRAM()) {
uint32_t edram_pitch_tiles = uint32_t edram_pitch_tiles =
((std::min(rb_surface_info.surface_pitch.value(), 2560u) * ((std::min(rb_surface_info.surface_pitch, 2560u) *
(rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1)) + (rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1)) +
79) / 79) /
80; 80;
@ -2408,14 +2403,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
4 * sizeof(float)) != 0; 4 * sizeof(float)) != 0;
std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i], std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i],
4 * sizeof(float)); 4 * sizeof(float));
static const uint32_t kBlendControlRegs[] = {
XE_GPU_REG_RB_BLENDCONTROL_0,
XE_GPU_REG_RB_BLENDCONTROL_1,
XE_GPU_REG_RB_BLENDCONTROL_2,
XE_GPU_REG_RB_BLENDCONTROL_3,
};
uint32_t blend_factors_ops = uint32_t blend_factors_ops =
regs[kBlendControlRegs[i]].u32 & 0x1FFF1FFF; regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF;
dirty |= system_constants_.edram_rt_blend_factors_ops[i] != dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
blend_factors_ops; blend_factors_ops;
system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops; system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
@ -2537,7 +2526,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
system_constants_.edram_stencil_back_write_mask = system_constants_.edram_stencil_back_write_mask =
rb_stencilrefmask_bf.stencilwritemask; rb_stencilrefmask_bf.stencilwritemask;
uint32_t stencil_func_ops_bf = uint32_t stencil_func_ops_bf =
(rb_depthcontrol.value >> 8) & ((1 << 12) - 1); (rb_depthcontrol.value >> 20) & ((1 << 12) - 1);
dirty |= system_constants_.edram_stencil_back_func_ops != dirty |= system_constants_.edram_stencil_back_func_ops !=
stencil_func_ops_bf; stencil_func_ops_bf;
system_constants_.edram_stencil_back_func_ops = stencil_func_ops_bf; system_constants_.edram_stencil_back_func_ops = stencil_func_ops_bf;

View File

@ -363,7 +363,7 @@ bool PipelineCache::GetCurrentStateDescription(
const RenderTargetCache::PipelineRenderTarget render_targets[5], const RenderTargetCache::PipelineRenderTarget render_targets[5],
PipelineDescription& description_out) { PipelineDescription& description_out) {
auto& regs = *register_file_; auto& regs = *register_file_;
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type); bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type);
// Initialize all unused fields to zero for comparison/hashing. // Initialize all unused fields to zero for comparison/hashing.
@ -381,7 +381,7 @@ bool PipelineCache::GetCurrentStateDescription(
description_out.pixel_shader = pixel_shader; description_out.pixel_shader = pixel_shader;
// Index buffer strip cut value. // Index buffer strip cut value.
if (pa_su_sc_mode_cntl & (1 << 21)) { if (pa_su_sc_mode_cntl.multi_prim_ib_ena) {
// Not using 0xFFFF with 32-bit indices because in index buffers it will be // Not using 0xFFFF with 32-bit indices because in index buffers it will be
// 0xFFFF0000 anyway due to endianness. // 0xFFFF0000 anyway due to endianness.
description_out.strip_cut_index = index_format == IndexFormat::kInt32 description_out.strip_cut_index = index_format == IndexFormat::kInt32
@ -479,53 +479,60 @@ bool PipelineCache::GetCurrentStateDescription(
// Xenos fill mode 1). // Xenos fill mode 1).
// Here we also assume that only one side is culled - if two sides are culled, // Here we also assume that only one side is culled - if two sides are culled,
// the D3D12 command processor will drop such draw early. // the D3D12 command processor will drop such draw early.
uint32_t cull_mode = primitive_two_faced ? (pa_su_sc_mode_cntl & 0x3) : 0; bool cull_front, cull_back;
if (primitive_two_faced) {
cull_front = pa_su_sc_mode_cntl.cull_front != 0;
cull_back = pa_su_sc_mode_cntl.cull_back != 0;
} else {
cull_front = false;
cull_back = false;
}
float poly_offset = 0.0f, poly_offset_scale = 0.0f; float poly_offset = 0.0f, poly_offset_scale = 0.0f;
if (primitive_two_faced) { if (primitive_two_faced) {
description_out.front_counter_clockwise = (pa_su_sc_mode_cntl & 0x4) == 0; description_out.front_counter_clockwise = pa_su_sc_mode_cntl.face == 0;
if (cull_mode == 1) { if (cull_front) {
description_out.cull_mode = PipelineCullMode::kFront; description_out.cull_mode = PipelineCullMode::kFront;
} else if (cull_mode == 2) { } else if (cull_back) {
description_out.cull_mode = PipelineCullMode::kBack; description_out.cull_mode = PipelineCullMode::kBack;
} else { } else {
description_out.cull_mode = PipelineCullMode::kNone; description_out.cull_mode = PipelineCullMode::kNone;
} }
// With ROV, the depth bias is applied in the pixel shader because // With ROV, the depth bias is applied in the pixel shader because
// per-sample depth is needed for MSAA. // per-sample depth is needed for MSAA.
if (cull_mode != 1) { if (!cull_front) {
// Front faces aren't culled. // Front faces aren't culled.
uint32_t fill_mode = (pa_su_sc_mode_cntl >> 5) & 0x7; // Direct3D 12, unfortunately, doesn't support point fill mode.
if (fill_mode == 0 || fill_mode == 1) { if (pa_su_sc_mode_cntl.polymode_front_ptype !=
xenos::PolygonType::kTriangles) {
description_out.fill_mode_wireframe = 1; description_out.fill_mode_wireframe = 1;
} }
if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 11))) { if (!edram_rov_used_ && pa_su_sc_mode_cntl.poly_offset_front_enable) {
poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
} }
} }
if (cull_mode != 2) { if (!cull_back) {
// Back faces aren't culled. // Back faces aren't culled.
uint32_t fill_mode = (pa_su_sc_mode_cntl >> 8) & 0x7; if (pa_su_sc_mode_cntl.polymode_back_ptype !=
if (fill_mode == 0 || fill_mode == 1) { xenos::PolygonType::kTriangles) {
description_out.fill_mode_wireframe = 1; description_out.fill_mode_wireframe = 1;
} }
// Prefer front depth bias because in general, front faces are the ones // Prefer front depth bias because in general, front faces are the ones
// that are rendered (except for shadow volumes). // that are rendered (except for shadow volumes).
if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 12)) && if (!edram_rov_used_ && pa_su_sc_mode_cntl.poly_offset_back_enable &&
poly_offset == 0.0f && poly_offset_scale == 0.0f) { poly_offset == 0.0f && poly_offset_scale == 0.0f) {
poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
} }
} }
if (((pa_su_sc_mode_cntl >> 3) & 0x3) == 0) { if (pa_su_sc_mode_cntl.poly_mode == xenos::PolygonModeEnable::kDisabled) {
// Fill mode is disabled.
description_out.fill_mode_wireframe = 0; description_out.fill_mode_wireframe = 0;
} }
} else { } else {
// Filled front faces only. // Filled front faces only.
// Use front depth bias if POLY_OFFSET_PARA_ENABLED // Use front depth bias if POLY_OFFSET_PARA_ENABLED
// (POLY_OFFSET_FRONT_ENABLED is for two-sided primitives). // (POLY_OFFSET_FRONT_ENABLED is for two-sided primitives).
if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 13))) { if (!edram_rov_used_ && pa_su_sc_mode_cntl.poly_offset_para_enable) {
poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
} }
@ -543,8 +550,8 @@ bool PipelineCache::GetCurrentStateDescription(
// of Duty 4 (vehicledamage map explosion decals) and Red Dead Redemption // of Duty 4 (vehicledamage map explosion decals) and Red Dead Redemption
// (shadows - 2^17 is not enough, 2^18 hasn't been tested, but 2^19 // (shadows - 2^17 is not enough, 2^18 hasn't been tested, but 2^19
// eliminates the acne). // eliminates the acne).
if (((register_file_->values[XE_GPU_REG_RB_DEPTH_INFO].u32 >> 16) & 0x1) == if (regs.Get<reg::RB_DEPTH_INFO>().depth_format ==
uint32_t(DepthRenderTargetFormat::kD24FS8)) { DepthRenderTargetFormat::kD24FS8) {
poly_offset *= float(1 << 19); poly_offset *= float(1 << 19);
} else { } else {
poly_offset *= float(1 << 23); poly_offset *= float(1 << 23);
@ -564,48 +571,49 @@ bool PipelineCache::GetCurrentStateDescription(
primitive_type == PrimitiveType::kQuadPatch)) { primitive_type == PrimitiveType::kQuadPatch)) {
description_out.fill_mode_wireframe = 1; description_out.fill_mode_wireframe = 1;
} }
// CLIP_DISABLE description_out.depth_clip = !regs.Get<reg::PA_CL_CLIP_CNTL>().clip_disable;
description_out.depth_clip =
(regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32 & (1 << 16)) == 0;
if (edram_rov_used_) { if (edram_rov_used_) {
description_out.rov_msaa = description_out.rov_msaa =
((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 >> 16) & 0x3) != 0; regs.Get<reg::RB_SURFACE_INFO>().msaa_samples != MsaaSamples::k1X;
} else { } else {
// Depth/stencil. No stencil, always passing depth test and no depth writing // Depth/stencil. No stencil, always passing depth test and no depth writing
// means depth disabled. // means depth disabled.
if (render_targets[4].format != DXGI_FORMAT_UNKNOWN) { if (render_targets[4].format != DXGI_FORMAT_UNKNOWN) {
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>();
if (rb_depthcontrol & 0x2) { if (rb_depthcontrol.z_enable) {
description_out.depth_func = (rb_depthcontrol >> 4) & 0x7; description_out.depth_func = rb_depthcontrol.zfunc;
description_out.depth_write = (rb_depthcontrol & 0x4) != 0; description_out.depth_write = rb_depthcontrol.z_write_enable;
} else { } else {
description_out.depth_func = 0b111; description_out.depth_func = CompareFunction::kAlways;
} }
if (rb_depthcontrol & 0x1) { if (rb_depthcontrol.stencil_enable) {
description_out.stencil_enable = 1; description_out.stencil_enable = 1;
bool stencil_backface_enable = bool stencil_backface_enable =
primitive_two_faced && (rb_depthcontrol & 0x80); primitive_two_faced && rb_depthcontrol.backface_enable;
uint32_t stencil_masks;
// Per-face masks not supported by Direct3D 12, choose the back face // Per-face masks not supported by Direct3D 12, choose the back face
// ones only if drawing only back faces. // ones only if drawing only back faces.
if (stencil_backface_enable && cull_mode == 1) { Register stencil_ref_mask_reg;
stencil_masks = regs[XE_GPU_REG_RB_STENCILREFMASK_BF].u32; if (stencil_backface_enable && cull_front) {
stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK_BF;
} else { } else {
stencil_masks = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK;
} }
description_out.stencil_read_mask = (stencil_masks >> 8) & 0xFF; auto stencil_ref_mask =
description_out.stencil_write_mask = (stencil_masks >> 16) & 0xFF; regs.Get<reg::RB_STENCILREFMASK>(stencil_ref_mask_reg);
description_out.stencil_front_fail_op = (rb_depthcontrol >> 11) & 0x7; description_out.stencil_read_mask = stencil_ref_mask.stencilmask;
description_out.stencil_write_mask = stencil_ref_mask.stencilwritemask;
description_out.stencil_front_fail_op = rb_depthcontrol.stencilfail;
description_out.stencil_front_depth_fail_op = description_out.stencil_front_depth_fail_op =
(rb_depthcontrol >> 17) & 0x7; rb_depthcontrol.stencilzfail;
description_out.stencil_front_pass_op = (rb_depthcontrol >> 14) & 0x7; description_out.stencil_front_pass_op = rb_depthcontrol.stencilzpass;
description_out.stencil_front_func = (rb_depthcontrol >> 8) & 0x7; description_out.stencil_front_func = rb_depthcontrol.stencilfunc;
if (stencil_backface_enable) { if (stencil_backface_enable) {
description_out.stencil_back_fail_op = (rb_depthcontrol >> 23) & 0x7; description_out.stencil_back_fail_op = rb_depthcontrol.stencilfail_bf;
description_out.stencil_back_depth_fail_op = description_out.stencil_back_depth_fail_op =
(rb_depthcontrol >> 29) & 0x7; rb_depthcontrol.stencilzfail_bf;
description_out.stencil_back_pass_op = (rb_depthcontrol >> 26) & 0x7; description_out.stencil_back_pass_op =
description_out.stencil_back_func = (rb_depthcontrol >> 20) & 0x7; rb_depthcontrol.stencilzpass_bf;
description_out.stencil_back_func = rb_depthcontrol.stencilfunc_bf;
} else { } else {
description_out.stencil_back_fail_op = description_out.stencil_back_fail_op =
description_out.stencil_front_fail_op; description_out.stencil_front_fail_op;
@ -618,13 +626,13 @@ bool PipelineCache::GetCurrentStateDescription(
} }
} }
// If not binding the DSV, ignore the format in the hash. // If not binding the DSV, ignore the format in the hash.
if (description_out.depth_func != 0b111 || description_out.depth_write || if (description_out.depth_func != CompareFunction::kAlways ||
description_out.stencil_enable) { description_out.depth_write || description_out.stencil_enable) {
description_out.depth_format = DepthRenderTargetFormat( description_out.depth_format =
(regs[XE_GPU_REG_RB_DEPTH_INFO].u32 >> 16) & 1); regs.Get<reg::RB_DEPTH_INFO>().depth_format;
} }
} else { } else {
description_out.depth_func = 0b111; description_out.depth_func = CompareFunction::kAlways;
} }
if (early_z) { if (early_z) {
description_out.force_early_z = 1; description_out.force_early_z = 1;
@ -684,38 +692,25 @@ bool PipelineCache::GetCurrentStateDescription(
if (render_targets[i].format == DXGI_FORMAT_UNKNOWN) { if (render_targets[i].format == DXGI_FORMAT_UNKNOWN) {
break; break;
} }
uint32_t guest_rt_index = render_targets[i].guest_render_target;
uint32_t color_info, blendcontrol;
switch (guest_rt_index) {
case 1:
color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32;
blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32;
break;
case 2:
color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32;
blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32;
break;
case 3:
color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32;
blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32;
break;
default:
color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32;
blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32;
break;
}
PipelineRenderTarget& rt = description_out.render_targets[i]; PipelineRenderTarget& rt = description_out.render_targets[i];
rt.used = 1; rt.used = 1;
rt.format = RenderTargetCache::GetBaseColorFormat( uint32_t guest_rt_index = render_targets[i].guest_render_target;
ColorRenderTargetFormat((color_info >> 16) & 0xF)); auto color_info = regs.Get<reg::RB_COLOR_INFO>(
reg::RB_COLOR_INFO::rt_register_indices[guest_rt_index]);
rt.format =
RenderTargetCache::GetBaseColorFormat(color_info.color_format);
rt.write_mask = (color_mask >> (guest_rt_index * 4)) & 0xF; rt.write_mask = (color_mask >> (guest_rt_index * 4)) & 0xF;
if (rt.write_mask) { if (rt.write_mask) {
rt.src_blend = kBlendFactorMap[blendcontrol & 0x1F]; auto blendcontrol = regs.Get<reg::RB_BLENDCONTROL>(
rt.dest_blend = kBlendFactorMap[(blendcontrol >> 8) & 0x1F]; reg::RB_BLENDCONTROL::rt_register_indices[guest_rt_index]);
rt.blend_op = BlendOp((blendcontrol >> 5) & 0x7); rt.src_blend = kBlendFactorMap[uint32_t(blendcontrol.color_srcblend)];
rt.src_blend_alpha = kBlendFactorAlphaMap[(blendcontrol >> 16) & 0x1F]; rt.dest_blend = kBlendFactorMap[uint32_t(blendcontrol.color_destblend)];
rt.dest_blend_alpha = kBlendFactorAlphaMap[(blendcontrol >> 24) & 0x1F]; rt.blend_op = blendcontrol.color_comb_fcn;
rt.blend_op_alpha = BlendOp((blendcontrol >> 21) & 0x7); rt.src_blend_alpha =
kBlendFactorAlphaMap[uint32_t(blendcontrol.alpha_srcblend)];
rt.dest_blend_alpha =
kBlendFactorAlphaMap[uint32_t(blendcontrol.alpha_destblend)];
rt.blend_op_alpha = blendcontrol.alpha_comb_fcn;
} else { } else {
rt.src_blend = PipelineBlendFactor::kOne; rt.src_blend = PipelineBlendFactor::kOne;
rt.dest_blend = PipelineBlendFactor::kZero; rt.dest_blend = PipelineBlendFactor::kZero;
@ -941,15 +936,17 @@ ID3D12PipelineState* PipelineCache::CreatePipelineState(
if (!edram_rov_used_) { if (!edram_rov_used_) {
// Depth/stencil. // Depth/stencil.
if (description.depth_func != 0b111 || description.depth_write) { if (description.depth_func != CompareFunction::kAlways ||
description.depth_write) {
state_desc.DepthStencilState.DepthEnable = TRUE; state_desc.DepthStencilState.DepthEnable = TRUE;
state_desc.DepthStencilState.DepthWriteMask = state_desc.DepthStencilState.DepthWriteMask =
description.depth_write ? D3D12_DEPTH_WRITE_MASK_ALL description.depth_write ? D3D12_DEPTH_WRITE_MASK_ALL
: D3D12_DEPTH_WRITE_MASK_ZERO; : D3D12_DEPTH_WRITE_MASK_ZERO;
// Comparison functions are the same in Direct3D 12 but plus one (minus // Comparison functions are the same in Direct3D 12 but plus one (minus
// one, bit 0 for less, bit 1 for equal, bit 2 for greater). // one, bit 0 for less, bit 1 for equal, bit 2 for greater).
state_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC( state_desc.DepthStencilState.DepthFunc =
uint32_t(D3D12_COMPARISON_FUNC_NEVER) + description.depth_func); D3D12_COMPARISON_FUNC(uint32_t(D3D12_COMPARISON_FUNC_NEVER) +
uint32_t(description.depth_func));
} }
if (description.stencil_enable) { if (description.stencil_enable) {
state_desc.DepthStencilState.StencilEnable = TRUE; state_desc.DepthStencilState.StencilEnable = TRUE;
@ -958,26 +955,30 @@ ID3D12PipelineState* PipelineCache::CreatePipelineState(
state_desc.DepthStencilState.StencilWriteMask = state_desc.DepthStencilState.StencilWriteMask =
description.stencil_write_mask; description.stencil_write_mask;
// Stencil operations are the same in Direct3D 12 too but plus one. // Stencil operations are the same in Direct3D 12 too but plus one.
state_desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP( state_desc.DepthStencilState.FrontFace.StencilFailOp =
uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_front_fail_op); D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) +
uint32_t(description.stencil_front_fail_op));
state_desc.DepthStencilState.FrontFace.StencilDepthFailOp = state_desc.DepthStencilState.FrontFace.StencilDepthFailOp =
D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) + D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) +
description.stencil_front_depth_fail_op); uint32_t(description.stencil_front_depth_fail_op));
state_desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP( state_desc.DepthStencilState.FrontFace.StencilPassOp =
uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_front_pass_op); D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) +
uint32_t(description.stencil_front_pass_op));
state_desc.DepthStencilState.FrontFace.StencilFunc = state_desc.DepthStencilState.FrontFace.StencilFunc =
D3D12_COMPARISON_FUNC(uint32_t(D3D12_COMPARISON_FUNC_NEVER) + D3D12_COMPARISON_FUNC(uint32_t(D3D12_COMPARISON_FUNC_NEVER) +
description.stencil_front_func); uint32_t(description.stencil_front_func));
state_desc.DepthStencilState.BackFace.StencilFailOp = D3D12_STENCIL_OP( state_desc.DepthStencilState.BackFace.StencilFailOp =
uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_back_fail_op); D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) +
uint32_t(description.stencil_back_fail_op));
state_desc.DepthStencilState.BackFace.StencilDepthFailOp = state_desc.DepthStencilState.BackFace.StencilDepthFailOp =
D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) + D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) +
description.stencil_back_depth_fail_op); uint32_t(description.stencil_back_depth_fail_op));
state_desc.DepthStencilState.BackFace.StencilPassOp = D3D12_STENCIL_OP( state_desc.DepthStencilState.BackFace.StencilPassOp =
uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_back_pass_op); D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) +
uint32_t(description.stencil_back_pass_op));
state_desc.DepthStencilState.BackFace.StencilFunc = state_desc.DepthStencilState.BackFace.StencilFunc =
D3D12_COMPARISON_FUNC(uint32_t(D3D12_COMPARISON_FUNC_NEVER) + D3D12_COMPARISON_FUNC(uint32_t(D3D12_COMPARISON_FUNC_NEVER) +
description.stencil_back_func); uint32_t(description.stencil_back_func));
} }
if (state_desc.DepthStencilState.DepthEnable || if (state_desc.DepthStencilState.DepthEnable ||
state_desc.DepthStencilState.StencilEnable) { state_desc.DepthStencilState.StencilEnable) {

View File

@ -152,21 +152,21 @@ class PipelineCache {
uint32_t depth_clip : 1; // 15 uint32_t depth_clip : 1; // 15
uint32_t rov_msaa : 1; // 16 uint32_t rov_msaa : 1; // 16
DepthRenderTargetFormat depth_format : 1; // 17 DepthRenderTargetFormat depth_format : 1; // 17
uint32_t depth_func : 3; // 20 CompareFunction depth_func : 3; // 20
uint32_t depth_write : 1; // 21 uint32_t depth_write : 1; // 21
uint32_t stencil_enable : 1; // 22 uint32_t stencil_enable : 1; // 22
uint32_t stencil_read_mask : 8; // 30 uint32_t stencil_read_mask : 8; // 30
uint32_t force_early_z : 1; // 31 uint32_t force_early_z : 1; // 31
uint32_t stencil_write_mask : 8; // 8 uint32_t stencil_write_mask : 8; // 8
uint32_t stencil_front_fail_op : 3; // 11 StencilOp stencil_front_fail_op : 3; // 11
uint32_t stencil_front_depth_fail_op : 3; // 14 StencilOp stencil_front_depth_fail_op : 3; // 14
uint32_t stencil_front_pass_op : 3; // 17 StencilOp stencil_front_pass_op : 3; // 17
uint32_t stencil_front_func : 3; // 20 CompareFunction stencil_front_func : 3; // 20
uint32_t stencil_back_fail_op : 3; // 23 StencilOp stencil_back_fail_op : 3; // 23
uint32_t stencil_back_depth_fail_op : 3; // 26 StencilOp stencil_back_depth_fail_op : 3; // 26
uint32_t stencil_back_pass_op : 3; // 29 StencilOp stencil_back_pass_op : 3; // 29
uint32_t stencil_back_func : 3; // 32 CompareFunction stencil_back_func : 3; // 32
PipelineRenderTarget render_targets[4]; PipelineRenderTarget render_targets[4];
}; };

View File

@ -192,7 +192,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out, uint32_t& index_count_out) { D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out, uint32_t& index_count_out) {
bool index_32bit = index_format == IndexFormat::kInt32; bool index_32bit = index_format == IndexFormat::kInt32;
auto& regs = *register_file_; auto& regs = *register_file_;
bool reset = (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21)) != 0; bool reset = regs.Get<reg::PA_SU_SC_MODE_CNTL>().multi_prim_ib_ena;
// Swap the reset index because we will be comparing unswapped values to it. // Swap the reset index because we will be comparing unswapped values to it.
uint32_t reset_index = xenos::GpuSwap( uint32_t reset_index = xenos::GpuSwap(
regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32, index_endianness); regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32, index_endianness);

View File

@ -541,16 +541,17 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
bool rov_used = command_processor_->IsROVUsedForEDRAM(); bool rov_used = command_processor_->IsROVUsedForEDRAM();
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u); uint32_t surface_pitch = std::min(rb_surface_info.surface_pitch, 2560u);
if (surface_pitch == 0) { if (surface_pitch == 0) {
// TODO(Triang3l): Do something if a memexport-only draw has 0 surface // TODO(Triang3l): Do something if a memexport-only draw has 0 surface
// pitch (never seen in any game so far, not sure if even legal). // pitch (never seen in any game so far, not sure if even legal).
return false; return false;
} }
MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3); uint32_t msaa_samples_x =
uint32_t msaa_samples_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1; rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1;
uint32_t msaa_samples_y = msaa_samples >= MsaaSamples::k2X ? 2 : 1; uint32_t msaa_samples_y =
rb_surface_info.msaa_samples >= MsaaSamples::k2X ? 2 : 1;
// Extract color/depth info in an unified way. // Extract color/depth info in an unified way.
bool enabled[5]; bool enabled[5];
@ -558,26 +559,27 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
uint32_t formats[5]; uint32_t formats[5];
bool formats_are_64bpp[5]; bool formats_are_64bpp[5];
uint32_t color_mask = command_processor_->GetCurrentColorMask(pixel_shader); uint32_t color_mask = command_processor_->GetCurrentColorMask(pixel_shader);
uint32_t rb_color_info[4] = {
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
regs[XE_GPU_REG_RB_COLOR2_INFO].u32, regs[XE_GPU_REG_RB_COLOR3_INFO].u32};
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
enabled[i] = (color_mask & (0xF << (i * 4))) != 0; enabled[i] = (color_mask & (0xF << (i * 4))) != 0;
edram_bases[i] = std::min(rb_color_info[i] & 0xFFF, 2048u); auto color_info = regs.Get<reg::RB_COLOR_INFO>(
formats[i] = uint32_t(GetBaseColorFormat( reg::RB_COLOR_INFO::rt_register_indices[i]);
ColorRenderTargetFormat((rb_color_info[i] >> 16) & 0xF))); edram_bases[i] = std::min(color_info.color_base, 2048u);
formats[i] = uint32_t(GetBaseColorFormat(color_info.color_format));
formats_are_64bpp[i] = formats_are_64bpp[i] =
IsColorFormat64bpp(ColorRenderTargetFormat(formats[i])); IsColorFormat64bpp(ColorRenderTargetFormat(formats[i]));
} }
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>();
uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
// 0x1 = stencil test, 0x2 = depth test. // 0x1 = stencil test, 0x2 = depth test.
enabled[4] = (rb_depthcontrol & (0x1 | 0x2)) != 0; enabled[4] = rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable;
edram_bases[4] = std::min(rb_depth_info & 0xFFF, 2048u); edram_bases[4] = std::min(rb_depth_info.depth_base, 2048u);
formats[4] = (rb_depth_info >> 16) & 0x1; formats[4] = uint32_t(rb_depth_info.depth_format);
formats_are_64bpp[4] = false; formats_are_64bpp[4] = false;
// Don't mark depth regions as dirty if not writing the depth. // Don't mark depth regions as dirty if not writing the depth.
bool depth_readonly = (rb_depthcontrol & (0x1 | 0x4)) == 0; // TODO(Triang3l): Make a common function for checking if stencil writing is
// really done?
bool depth_readonly =
!rb_depthcontrol.stencil_enable && !rb_depthcontrol.z_write_enable;
bool full_update = false; bool full_update = false;
@ -590,7 +592,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
// in the beginning of the frame or after resolves by setting the current // in the beginning of the frame or after resolves by setting the current
// pitch to 0. // pitch to 0.
if (current_surface_pitch_ != surface_pitch || if (current_surface_pitch_ != surface_pitch ||
current_msaa_samples_ != msaa_samples) { current_msaa_samples_ != rb_surface_info.msaa_samples) {
full_update = true; full_update = true;
} }
@ -632,26 +634,22 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
// Get EDRAM usage of the current draw so dirty regions can be calculated. // Get EDRAM usage of the current draw so dirty regions can be calculated.
// See D3D12CommandProcessor::UpdateFixedFunctionState for more info. // See D3D12CommandProcessor::UpdateFixedFunctionState for more info.
int16_t window_offset_y = int32_t window_offset_y =
(regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32 >> 16) & 0x7FFF; regs.Get<reg::PA_SC_WINDOW_OFFSET>().window_y_offset;
if (window_offset_y & 0x4000) { auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
window_offset_y |= 0x8000; float viewport_scale_y = pa_cl_vte_cntl.vport_y_scale_ena
}
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
float viewport_scale_y = (pa_cl_vte_cntl & (1 << 2))
? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
: 1280.0f; : 1280.0f;
float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3)) float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
: std::abs(viewport_scale_y); : std::abs(viewport_scale_y);
if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) { if (regs.Get<reg::PA_SU_SC_MODE_CNTL>().vtx_window_offset_enable) {
viewport_offset_y += float(window_offset_y); viewport_offset_y += float(window_offset_y);
} }
uint32_t viewport_bottom = uint32_t(std::max( uint32_t viewport_bottom = uint32_t(std::max(
0.0f, std::ceil(viewport_offset_y + std::abs(viewport_scale_y)))); 0.0f, std::ceil(viewport_offset_y + std::abs(viewport_scale_y))));
uint32_t scissor_bottom = uint32_t scissor_bottom = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>().br_y;
(regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32 >> 16) & 0x7FFF; if (!regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>().window_offset_disable) {
if (!(regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32 & (1u << 31))) {
scissor_bottom = std::max(int32_t(scissor_bottom) + window_offset_y, 0); scissor_bottom = std::max(int32_t(scissor_bottom) + window_offset_y, 0);
} }
uint32_t dirty_bottom = uint32_t dirty_bottom =
@ -769,7 +767,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
ClearBindings(); ClearBindings();
current_surface_pitch_ = surface_pitch; current_surface_pitch_ = surface_pitch;
current_msaa_samples_ = msaa_samples; current_msaa_samples_ = rb_surface_info.msaa_samples;
if (!rov_used) { if (!rov_used) {
current_edram_max_rows_ = edram_max_rows; current_edram_max_rows_ = edram_max_rows;
} }
@ -801,8 +799,8 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
#endif #endif
} }
XELOGGPU("RT Cache: %s update - pitch %u, samples %u, RTs to attach %u", XELOGGPU("RT Cache: %s update - pitch %u, samples %u, RTs to attach %u",
full_update ? "Full" : "Partial", surface_pitch, msaa_samples, full_update ? "Full" : "Partial", surface_pitch,
render_targets_to_attach); rb_surface_info.msaa_samples, render_targets_to_attach);
#if 0 #if 0
auto device = auto device =
@ -891,7 +889,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
if (!rov_used) { if (!rov_used) {
// Sample positions when loading depth must match sample positions when // Sample positions when loading depth must match sample positions when
// drawing. // drawing.
command_processor_->SetSamplePositions(msaa_samples); command_processor_->SetSamplePositions(rb_surface_info.msaa_samples);
// Load the contents of the new render targets from the EDRAM buffer (will // Load the contents of the new render targets from the EDRAM buffer (will
// change the state of the render targets to copy destination). // change the state of the render targets to copy destination).
@ -1007,18 +1005,14 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
auto& regs = *register_file_; auto& regs = *register_file_;
// Get the render target properties. // Get the render target properties.
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u); uint32_t surface_pitch = std::min(rb_surface_info.surface_pitch, 2560u);
if (surface_pitch == 0) { if (surface_pitch == 0) {
return true; return true;
} }
MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3);
uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
// Depth info is always needed because color resolve may also clear depth. // Depth info is always needed because color resolve may also clear depth.
uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
uint32_t depth_edram_base = rb_depth_info & 0xFFF; uint32_t surface_index = regs.Get<reg::RB_COPY_CONTROL>().copy_src_select;
uint32_t depth_format = (rb_depth_info >> 16) & 0x1;
uint32_t surface_index = rb_copy_control & 0x7;
if (surface_index > 4) { if (surface_index > 4) {
assert_always(); assert_always();
return false; return false;
@ -1027,43 +1021,28 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
uint32_t surface_edram_base; uint32_t surface_edram_base;
uint32_t surface_format; uint32_t surface_format;
if (surface_is_depth) { if (surface_is_depth) {
surface_edram_base = depth_edram_base; surface_edram_base = rb_depth_info.depth_base;
surface_format = depth_format; surface_format = uint32_t(rb_depth_info.depth_format);
} else { } else {
uint32_t rb_color_info; auto color_info = regs.Get<reg::RB_COLOR_INFO>(
switch (surface_index) { reg::RB_COLOR_INFO::rt_register_indices[surface_index]);
case 1: surface_edram_base = color_info.color_base;
rb_color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32; surface_format = uint32_t(GetBaseColorFormat(color_info.color_format));
break;
case 2:
rb_color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32;
break;
case 3:
rb_color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32;
break;
default:
rb_color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32;
break;
}
surface_edram_base = rb_color_info & 0xFFF;
surface_format = uint32_t(GetBaseColorFormat(
ColorRenderTargetFormat((rb_color_info >> 16) & 0xF)));
} }
// Get the resolve region since both copying and clearing need it. // Get the resolve region since both copying and clearing need it.
// HACK: Vertices to use are always in vf0. // HACK: Vertices to use are always in vf0.
auto fetch_group = reinterpret_cast<const xenos::xe_gpu_fetch_group_t*>( const auto& fetch = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
&regs.values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0]); XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
const auto& fetch = fetch_group->vertex_fetch_0;
assert_true(fetch.type == 3); assert_true(fetch.type == 3);
assert_true(fetch.endian == 2); assert_true(fetch.endian == Endian::k8in32);
assert_true(fetch.size == 6); assert_true(fetch.size == 6);
const uint8_t* src_vertex_address = const uint8_t* src_vertex_address =
memory->TranslatePhysical(fetch.address << 2); memory->TranslatePhysical(fetch.address << 2);
float vertices[6]; float vertices[6];
// Most vertices have a negative half pixel offset applied, which we reverse. // Most vertices have a negative half pixel offset applied, which we reverse.
float vertex_offset = float vertex_offset =
(regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32 & 0x1) ? 0.0f : 0.5f; regs.Get<reg::PA_SU_VTX_CNTL>().pix_center ? 0.0f : 0.5f;
for (uint32_t i = 0; i < 6; ++i) { for (uint32_t i = 0; i < 6; ++i) {
vertices[i] = vertices[i] =
xenos::GpuSwap(xe::load<float>(src_vertex_address + i * sizeof(float)), xenos::GpuSwap(xe::load<float>(src_vertex_address + i * sizeof(float)),
@ -1097,39 +1076,34 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
// vertices (-640,0)->(640,720), however, the destination texture pointer is // vertices (-640,0)->(640,720), however, the destination texture pointer is
// adjusted properly to the right half of the texture, and the source render // adjusted properly to the right half of the texture, and the source render
// target has a pitch of 800). // target has a pitch of 800).
auto pa_sc_window_offset = regs.Get<reg::PA_SC_WINDOW_OFFSET>();
D3D12_RECT rect; D3D12_RECT rect;
rect.left = LONG(std::min(std::min(vertices[0], vertices[2]), vertices[4])); rect.left = LONG(std::min(std::min(vertices[0], vertices[2]), vertices[4]));
rect.right = LONG(std::max(std::max(vertices[0], vertices[2]), vertices[4])); rect.right = LONG(std::max(std::max(vertices[0], vertices[2]), vertices[4]));
rect.top = LONG(std::min(std::min(vertices[1], vertices[3]), vertices[5])); rect.top = LONG(std::min(std::min(vertices[1], vertices[3]), vertices[5]));
rect.bottom = LONG(std::max(std::max(vertices[1], vertices[3]), vertices[5])); rect.bottom = LONG(std::max(std::max(vertices[1], vertices[3]), vertices[5]));
if (regs.Get<reg::PA_SU_SC_MODE_CNTL>().vtx_window_offset_enable) {
rect.left += pa_sc_window_offset.window_x_offset;
rect.right += pa_sc_window_offset.window_x_offset;
rect.top += pa_sc_window_offset.window_y_offset;
rect.bottom += pa_sc_window_offset.window_y_offset;
}
D3D12_RECT scissor; D3D12_RECT scissor;
uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; auto pa_sc_window_scissor_tl = regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>();
uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; auto pa_sc_window_scissor_br = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>();
scissor.left = LONG(window_scissor_tl & 0x7FFF); scissor.left = pa_sc_window_scissor_tl.tl_x;
scissor.right = LONG(window_scissor_br & 0x7FFF); scissor.right = pa_sc_window_scissor_br.br_x;
scissor.top = LONG((window_scissor_tl >> 16) & 0x7FFF); scissor.top = pa_sc_window_scissor_tl.tl_y;
scissor.bottom = LONG((window_scissor_br >> 16) & 0x7FFF); scissor.bottom = pa_sc_window_scissor_br.br_y;
if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) { if (!pa_sc_window_scissor_tl.window_offset_disable) {
uint32_t pa_sc_window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; scissor.left = std::max(
int16_t window_offset_x = pa_sc_window_offset & 0x7FFF; LONG(scissor.left + pa_sc_window_offset.window_x_offset), LONG(0));
int16_t window_offset_y = (pa_sc_window_offset >> 16) & 0x7FFF; scissor.right = std::max(
if (window_offset_x & 0x4000) { LONG(scissor.right + pa_sc_window_offset.window_x_offset), LONG(0));
window_offset_x |= 0x8000; scissor.top = std::max(
} LONG(scissor.top + pa_sc_window_offset.window_y_offset), LONG(0));
if (window_offset_y & 0x4000) { scissor.bottom = std::max(
window_offset_y |= 0x8000; LONG(scissor.bottom + pa_sc_window_offset.window_y_offset), LONG(0));
}
rect.left += window_offset_x;
rect.right += window_offset_x;
rect.top += window_offset_y;
rect.bottom += window_offset_y;
if (!(window_scissor_tl & (1u << 31))) {
scissor.left = std::max(LONG(scissor.left + window_offset_x), LONG(0));
scissor.right = std::max(LONG(scissor.right + window_offset_x), LONG(0));
scissor.top = std::max(LONG(scissor.top + window_offset_y), LONG(0));
scissor.bottom =
std::max(LONG(scissor.bottom + window_offset_y), LONG(0));
}
} }
rect.left = std::max(rect.left, scissor.left); rect.left = std::max(rect.left, scissor.left);
rect.right = std::min(rect.right, scissor.right); rect.right = std::min(rect.right, scissor.right);
@ -1140,9 +1114,9 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
"Resolve: (%d,%d)->(%d,%d) of RT %u (pitch %u, %u sample%s, format %u) " "Resolve: (%d,%d)->(%d,%d) of RT %u (pitch %u, %u sample%s, format %u) "
"at %u", "at %u",
rect.left, rect.top, rect.right, rect.bottom, surface_index, rect.left, rect.top, rect.right, rect.bottom, surface_index,
surface_pitch, 1 << uint32_t(msaa_samples), surface_pitch, 1 << uint32_t(rb_surface_info.msaa_samples),
msaa_samples != MsaaSamples::k1X ? "s" : "", surface_format, rb_surface_info.msaa_samples != MsaaSamples::k1X ? "s" : "",
surface_edram_base); surface_format, surface_edram_base);
if (rect.left >= rect.right || rect.top >= rect.bottom) { if (rect.left >= rect.right || rect.top >= rect.bottom) {
// Nothing to copy. // Nothing to copy.
@ -1157,18 +1131,20 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
// GetEDRAMLayout in ResolveCopy and ResolveClear will perform the needed // GetEDRAMLayout in ResolveCopy and ResolveClear will perform the needed
// clamping to the source render target size. // clamping to the source render target size.
bool result = bool result = ResolveCopy(shared_memory, texture_cache, surface_edram_base,
ResolveCopy(shared_memory, texture_cache, surface_edram_base, surface_pitch, rb_surface_info.msaa_samples,
surface_pitch, msaa_samples, surface_is_depth, surface_format, surface_is_depth, surface_format, rect,
rect, written_address_out, written_length_out); written_address_out, written_length_out);
// Clear the color RT if needed. // Clear the color RT if needed.
if (!surface_is_depth) { if (!surface_is_depth) {
result &= ResolveClear(surface_edram_base, surface_pitch, msaa_samples, result &=
false, surface_format, rect); ResolveClear(surface_edram_base, surface_pitch,
rb_surface_info.msaa_samples, false, surface_format, rect);
} }
// Clear the depth RT if needed (may be cleared alongside color). // Clear the depth RT if needed (may be cleared alongside color).
result &= ResolveClear(depth_edram_base, surface_pitch, msaa_samples, true, result &= ResolveClear(rb_depth_info.depth_base, surface_pitch,
depth_format, rect); rb_surface_info.msaa_samples, true,
uint32_t(rb_depth_info.depth_format), rect);
return result; return result;
} }
@ -1183,19 +1159,18 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
auto& regs = *register_file_; auto& regs = *register_file_;
uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32; auto rb_copy_control = regs.Get<reg::RB_COPY_CONTROL>();
xenos::CopyCommand copy_command = if (rb_copy_control.copy_command != xenos::CopyCommand::kRaw &&
xenos::CopyCommand((rb_copy_control >> 20) & 0x3); rb_copy_control.copy_command != xenos::CopyCommand::kConvert) {
if (copy_command != xenos::CopyCommand::kRaw &&
copy_command != xenos::CopyCommand::kConvert) {
// TODO(Triang3l): Handle kConstantOne and kNull. // TODO(Triang3l): Handle kConstantOne and kNull.
assert_always();
return false; return false;
} }
auto command_list = command_processor_->GetDeferredCommandList(); auto command_list = command_processor_->GetDeferredCommandList();
// Get format info. // Get format info.
uint32_t rb_copy_dest_info = regs[XE_GPU_REG_RB_COPY_DEST_INFO].u32; auto rb_copy_dest_info = regs.Get<reg::RB_COPY_DEST_INFO>();
TextureFormat src_texture_format; TextureFormat src_texture_format;
bool src_64bpp; bool src_64bpp;
if (is_depth) { if (is_depth) {
@ -1222,14 +1197,15 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
// The destination format is specified as k_8_8_8_8 when resolving depth, but // The destination format is specified as k_8_8_8_8 when resolving depth, but
// no format conversion is done for depth, so ignore it. // no format conversion is done for depth, so ignore it.
TextureFormat dest_format = TextureFormat dest_format =
is_depth ? src_texture_format is_depth
: GetBaseFormat(TextureFormat((rb_copy_dest_info >> 7) & 0x3F)); ? src_texture_format
: GetBaseFormat(TextureFormat(rb_copy_dest_info.copy_dest_format));
const FormatInfo* dest_format_info = FormatInfo::Get(dest_format); const FormatInfo* dest_format_info = FormatInfo::Get(dest_format);
// Get the destination region and clamp the source region to it. // Get the destination region and clamp the source region to it.
uint32_t rb_copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32; auto rb_copy_dest_pitch = regs.Get<reg::RB_COPY_DEST_PITCH>();
uint32_t dest_pitch = rb_copy_dest_pitch & 0x3FFF; uint32_t dest_pitch = rb_copy_dest_pitch.copy_dest_pitch;
uint32_t dest_height = (rb_copy_dest_pitch >> 16) & 0x3FFF; uint32_t dest_height = rb_copy_dest_pitch.copy_dest_height;
if (dest_pitch == 0 || dest_height == 0) { if (dest_pitch == 0 || dest_height == 0) {
// Nothing to copy. // Nothing to copy.
return true; return true;
@ -1263,8 +1239,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
uint32_t dest_address = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32 & 0x1FFFFFFF; uint32_t dest_address = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32 & 0x1FFFFFFF;
// An example of a 3D resolve destination is the color grading LUT (used // An example of a 3D resolve destination is the color grading LUT (used
// starting from the developer/publisher intro) in Dead Space 3. // starting from the developer/publisher intro) in Dead Space 3.
bool dest_3d = (rb_copy_dest_info & (1 << 3)) != 0; if (rb_copy_dest_info.copy_dest_array) {
if (dest_3d) {
dest_address += texture_util::GetTiledOffset3D( dest_address += texture_util::GetTiledOffset3D(
int(rect.left & ~LONG(31)), int(rect.top & ~LONG(31)), 0, dest_pitch, int(rect.left & ~LONG(31)), int(rect.top & ~LONG(31)), 0, dest_pitch,
dest_height, xe::log2_floor(dest_format_info->bits_per_pixel >> 3)); dest_height, xe::log2_floor(dest_format_info->bits_per_pixel >> 3));
@ -1279,21 +1254,20 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
// resolve to 8bpp or 16bpp textures at very odd locations. // resolve to 8bpp or 16bpp textures at very odd locations.
return false; return false;
} }
uint32_t dest_z = dest_3d ? ((rb_copy_dest_info >> 4) & 0x7) : 0; uint32_t dest_z =
rb_copy_dest_info.copy_dest_array ? rb_copy_dest_info.copy_dest_slice : 0;
// See what samples we need and what we should do with them. // See what samples we need and what we should do with them.
xenos::CopySampleSelect sample_select = xenos::CopySampleSelect sample_select = rb_copy_control.copy_sample_select;
xenos::CopySampleSelect((rb_copy_control >> 4) & 0x7);
if (is_depth && sample_select > xenos::CopySampleSelect::k3) { if (is_depth && sample_select > xenos::CopySampleSelect::k3) {
assert_always(); assert_always();
return false; return false;
} }
Endian128 dest_endian = Endian128(rb_copy_dest_info & 0x7);
int32_t dest_exp_bias; int32_t dest_exp_bias;
if (is_depth) { if (is_depth) {
dest_exp_bias = 0; dest_exp_bias = 0;
} else { } else {
dest_exp_bias = int32_t((rb_copy_dest_info >> 16) << 26) >> 26; dest_exp_bias = rb_copy_dest_info.copy_dest_exp_bias;
if (ColorRenderTargetFormat(src_format) == if (ColorRenderTargetFormat(src_format) ==
ColorRenderTargetFormat::k_16_16 || ColorRenderTargetFormat::k_16_16 ||
ColorRenderTargetFormat(src_format) == ColorRenderTargetFormat(src_format) ==
@ -1309,14 +1283,14 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
} }
} }
} }
bool dest_swap = !is_depth && ((rb_copy_dest_info >> 24) & 0x1); bool dest_swap = !is_depth && rb_copy_dest_info.copy_dest_swap;
XELOGGPU( XELOGGPU(
"Resolve: Copying samples %u to 0x%.8X (%ux%u, %cD), destination Z %u, " "Resolve: Copying samples %u to 0x%.8X (%ux%u, %cD), destination Z %u, "
"destination format %s, exponent bias %d, red and blue %sswapped", "destination format %s, exponent bias %d, red and blue %sswapped",
uint32_t(sample_select), dest_address, dest_pitch, dest_height, uint32_t(sample_select), dest_address, dest_pitch, dest_height,
dest_3d ? '3' : '2', dest_z, dest_format_info->name, dest_exp_bias, rb_copy_dest_info.copy_dest_array ? '3' : '2', dest_z,
dest_swap ? "" : "not "); dest_format_info->name, dest_exp_bias, dest_swap ? "" : "not ");
// There are 2 paths for resolving in this function - they don't necessarily // There are 2 paths for resolving in this function - they don't necessarily
// have to map directly to kRaw and kConvert CopyCommands. // have to map directly to kRaw and kConvert CopyCommands.
@ -1344,7 +1318,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
resolution_scale_2x_ && resolution_scale_2x_ &&
cvars::d3d12_resolution_scale_resolve_edge_clamp && cvars::d3d12_resolution_scale_resolve_edge_clamp &&
cvars::d3d12_half_pixel_offset && cvars::d3d12_half_pixel_offset &&
!(regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32 & 0x1); !regs.Get<reg::PA_SU_VTX_CNTL>().pix_center;
if (sample_select <= xenos::CopySampleSelect::k3 && if (sample_select <= xenos::CopySampleSelect::k3 &&
src_texture_format == dest_format && dest_exp_bias == 0) { src_texture_format == dest_format && dest_exp_bias == 0) {
// ************************************************************************* // *************************************************************************
@ -1363,7 +1337,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
uint32_t dest_size; uint32_t dest_size;
uint32_t dest_modified_start = dest_address; uint32_t dest_modified_start = dest_address;
uint32_t dest_modified_length; uint32_t dest_modified_length;
if (dest_3d) { if (rb_copy_dest_info.copy_dest_array) {
// Depth granularity is 4 (though TiledAddress chaining is possible with 8 // Depth granularity is 4 (though TiledAddress chaining is possible with 8
// granularity). // granularity).
dest_size = texture_util::GetGuestMipSliceStorageSize( dest_size = texture_util::GetGuestMipSliceStorageSize(
@ -1442,8 +1416,10 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
assert_true(dest_pitch <= 8192); assert_true(dest_pitch <= 8192);
root_constants.tile_sample_dest_info = root_constants.tile_sample_dest_info =
((dest_pitch + 31) >> 5) | ((dest_pitch + 31) >> 5) |
(dest_3d ? (((dest_height + 31) >> 5) << 9) : 0) | (rb_copy_dest_info.copy_dest_array ? (((dest_height + 31) >> 5) << 9)
(uint32_t(sample_select) << 18) | (uint32_t(dest_endian) << 20); : 0) |
(uint32_t(sample_select) << 18) |
(uint32_t(rb_copy_dest_info.copy_dest_endian) << 20);
if (dest_swap) { if (dest_swap) {
root_constants.tile_sample_dest_info |= (1 << 23) | (src_format << 24); root_constants.tile_sample_dest_info |= (1 << 23) | (src_format << 24);
} }
@ -1797,10 +1773,12 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
copy_buffer_state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; copy_buffer_state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
// dest_address already adjusted, so offsets are & 31. // dest_address already adjusted, so offsets are & 31.
texture_cache->TileResolvedTexture( texture_cache->TileResolvedTexture(
dest_format, dest_address, dest_pitch, dest_height, dest_3d, dest_format, dest_address, dest_pitch, dest_height,
uint32_t(rect.left) & 31, uint32_t(rect.top) & 31, dest_z, copy_width, rb_copy_dest_info.copy_dest_array != 0, uint32_t(rect.left) & 31,
copy_height, dest_endian, copy_buffer, resolve_target->copy_buffer_size, uint32_t(rect.top) & 31, dest_z, copy_width, copy_height,
resolve_target->footprint, &written_address_out, &written_length_out); rb_copy_dest_info.copy_dest_endian, copy_buffer,
resolve_target->copy_buffer_size, resolve_target->footprint,
&written_address_out, &written_length_out);
// Done with the copy buffer. // Done with the copy buffer.
@ -1817,9 +1795,15 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base,
auto& regs = *register_file_; auto& regs = *register_file_;
// Check if clearing is enabled. // Check if clearing is enabled.
uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32; auto rb_copy_control = regs.Get<reg::RB_COPY_CONTROL>();
if (!(rb_copy_control & (is_depth ? (1 << 9) : (1 << 8)))) { if (is_depth) {
return true; if (!rb_copy_control.depth_clear_enable) {
return true;
}
} else {
if (!rb_copy_control.color_clear_enable) {
return true;
}
} }
XELOGGPU("Resolve: Clearing the %s render target", XELOGGPU("Resolve: Clearing the %s render target",
@ -1886,7 +1870,7 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base,
} else if (is_64bpp) { } else if (is_64bpp) {
// TODO(Triang3l): Check which 32-bit portion is in which register. // TODO(Triang3l): Check which 32-bit portion is in which register.
root_constants.clear_color_high = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; root_constants.clear_color_high = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32;
root_constants.clear_color_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32; root_constants.clear_color_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32;
command_processor_->SetComputePipeline(edram_clear_64bpp_pipeline_); command_processor_->SetComputePipeline(edram_clear_64bpp_pipeline_);
} else { } else {
Register reg = Register reg =

View File

@ -848,15 +848,13 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask,
continue; continue;
} }
TextureBinding& binding = texture_bindings_[index]; TextureBinding& binding = texture_bindings_[index];
uint32_t r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6; const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
auto group = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6);
reinterpret_cast<const xenos::xe_gpu_fetch_group_t*>(&regs.values[r]);
TextureKey old_key = binding.key; TextureKey old_key = binding.key;
bool old_has_unsigned = binding.has_unsigned; bool old_has_unsigned = binding.has_unsigned;
bool old_has_signed = binding.has_signed; bool old_has_signed = binding.has_signed;
BindingInfoFromFetchConstant(group->texture_fetch, binding.key, BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzle,
&binding.swizzle, &binding.has_unsigned, &binding.has_unsigned, &binding.has_signed);
&binding.has_signed);
texture_keys_in_sync_ |= index_bit; texture_keys_in_sync_ |= index_bit;
if (binding.key.IsInvalid()) { if (binding.key.IsInvalid()) {
binding.texture = nullptr; binding.texture = nullptr;
@ -1142,18 +1140,15 @@ void TextureCache::WriteTextureSRV(const D3D12Shader::TextureSRV& texture_srv,
TextureCache::SamplerParameters TextureCache::GetSamplerParameters( TextureCache::SamplerParameters TextureCache::GetSamplerParameters(
const D3D12Shader::SamplerBinding& binding) const { const D3D12Shader::SamplerBinding& binding) const {
auto& regs = *register_file_; auto& regs = *register_file_;
uint32_t r = const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6; XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6);
auto group =
reinterpret_cast<const xenos::xe_gpu_fetch_group_t*>(&regs.values[r]);
auto& fetch = group->texture_fetch;
SamplerParameters parameters; SamplerParameters parameters;
parameters.clamp_x = ClampMode(fetch.clamp_x); parameters.clamp_x = fetch.clamp_x;
parameters.clamp_y = ClampMode(fetch.clamp_y); parameters.clamp_y = fetch.clamp_y;
parameters.clamp_z = ClampMode(fetch.clamp_z); parameters.clamp_z = fetch.clamp_z;
parameters.border_color = BorderColor(fetch.border_color); parameters.border_color = fetch.border_color;
uint32_t mip_min_level = fetch.mip_min_level; uint32_t mip_min_level = fetch.mip_min_level;
uint32_t mip_max_level = fetch.mip_max_level; uint32_t mip_max_level = fetch.mip_max_level;
@ -1171,7 +1166,7 @@ TextureCache::SamplerParameters TextureCache::GetSamplerParameters(
parameters.lod_bias = fetch.lod_bias; parameters.lod_bias = fetch.lod_bias;
AnisoFilter aniso_filter = binding.aniso_filter == AnisoFilter::kUseFetchConst AnisoFilter aniso_filter = binding.aniso_filter == AnisoFilter::kUseFetchConst
? AnisoFilter(fetch.aniso_filter) ? fetch.aniso_filter
: binding.aniso_filter; : binding.aniso_filter;
aniso_filter = std::min(aniso_filter, AnisoFilter::kMax_16_1); aniso_filter = std::min(aniso_filter, AnisoFilter::kMax_16_1);
parameters.aniso_filter = aniso_filter; parameters.aniso_filter = aniso_filter;
@ -1182,17 +1177,17 @@ TextureCache::SamplerParameters TextureCache::GetSamplerParameters(
} else { } else {
TextureFilter mag_filter = TextureFilter mag_filter =
binding.mag_filter == TextureFilter::kUseFetchConst binding.mag_filter == TextureFilter::kUseFetchConst
? TextureFilter(fetch.mag_filter) ? fetch.mag_filter
: binding.mag_filter; : binding.mag_filter;
parameters.mag_linear = mag_filter == TextureFilter::kLinear; parameters.mag_linear = mag_filter == TextureFilter::kLinear;
TextureFilter min_filter = TextureFilter min_filter =
binding.min_filter == TextureFilter::kUseFetchConst binding.min_filter == TextureFilter::kUseFetchConst
? TextureFilter(fetch.min_filter) ? fetch.min_filter
: binding.min_filter; : binding.min_filter;
parameters.min_linear = min_filter == TextureFilter::kLinear; parameters.min_linear = min_filter == TextureFilter::kLinear;
TextureFilter mip_filter = TextureFilter mip_filter =
binding.mip_filter == TextureFilter::kUseFetchConst binding.mip_filter == TextureFilter::kUseFetchConst
? TextureFilter(fetch.mip_filter) ? fetch.mip_filter
: binding.mip_filter; : binding.mip_filter;
parameters.mip_linear = mip_filter == TextureFilter::kLinear; parameters.mip_linear = mip_filter == TextureFilter::kLinear;
// TODO(Triang3l): Investigate mip_filter TextureFilter::kBaseMap. // TODO(Triang3l): Investigate mip_filter TextureFilter::kBaseMap.
@ -1586,13 +1581,12 @@ void TextureCache::CreateScaledResolveBufferRawUAV(
bool TextureCache::RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle, bool TextureCache::RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle,
TextureFormat& format_out) { TextureFormat& format_out) {
auto group = reinterpret_cast<const xenos::xe_gpu_fetch_group_t*>( auto& regs = *register_file_;
&register_file_->values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0]); const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
auto& fetch = group->texture_fetch; XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
TextureKey key; TextureKey key;
uint32_t swizzle; uint32_t swizzle;
BindingInfoFromFetchConstant(group->texture_fetch, key, &swizzle, nullptr, BindingInfoFromFetchConstant(fetch, key, &swizzle, nullptr, nullptr);
nullptr);
if (key.base_page == 0 || key.dimension != Dimension::k2D) { if (key.base_page == 0 || key.dimension != Dimension::k2D) {
return false; return false;
} }
@ -1733,7 +1727,7 @@ void TextureCache::BindingInfoFromFetchConstant(
return; return;
} }
TextureFormat format = GetBaseFormat(TextureFormat(fetch.format)); TextureFormat format = GetBaseFormat(fetch.format);
key_out.base_page = base_page; key_out.base_page = base_page;
key_out.mip_page = mip_page; key_out.mip_page = mip_page;
@ -1745,7 +1739,7 @@ void TextureCache::BindingInfoFromFetchConstant(
key_out.tiled = fetch.tiled; key_out.tiled = fetch.tiled;
key_out.packed_mips = fetch.packed_mips; key_out.packed_mips = fetch.packed_mips;
key_out.format = format; key_out.format = format;
key_out.endianness = Endian(fetch.endianness); key_out.endianness = fetch.endianness;
if (swizzle_out != nullptr) { if (swizzle_out != nullptr) {
uint32_t swizzle = fetch.swizzle; uint32_t swizzle = fetch.swizzle;
@ -1783,16 +1777,16 @@ void TextureCache::BindingInfoFromFetchConstant(
} }
if (has_unsigned_out != nullptr) { if (has_unsigned_out != nullptr) {
*has_unsigned_out = TextureSign(fetch.sign_x) != TextureSign::kSigned || *has_unsigned_out = fetch.sign_x != TextureSign::kSigned ||
TextureSign(fetch.sign_y) != TextureSign::kSigned || fetch.sign_y != TextureSign::kSigned ||
TextureSign(fetch.sign_z) != TextureSign::kSigned || fetch.sign_z != TextureSign::kSigned ||
TextureSign(fetch.sign_w) != TextureSign::kSigned; fetch.sign_w != TextureSign::kSigned;
} }
if (has_signed_out != nullptr) { if (has_signed_out != nullptr) {
*has_signed_out = TextureSign(fetch.sign_x) == TextureSign::kSigned || *has_signed_out = fetch.sign_x == TextureSign::kSigned ||
TextureSign(fetch.sign_y) == TextureSign::kSigned || fetch.sign_y == TextureSign::kSigned ||
TextureSign(fetch.sign_z) == TextureSign::kSigned || fetch.sign_z == TextureSign::kSigned ||
TextureSign(fetch.sign_w) == TextureSign::kSigned; fetch.sign_w == TextureSign::kSigned;
} }
} }

View File

@ -134,7 +134,7 @@ XE_GPU_REGISTER(0x2184, kDword, SQ_WRAPPING_1)
XE_GPU_REGISTER(0x21F9, kDword, VGT_EVENT_INITIATOR) XE_GPU_REGISTER(0x21F9, kDword, VGT_EVENT_INITIATOR)
XE_GPU_REGISTER(0x2200, kDword, RB_DEPTHCONTROL) XE_GPU_REGISTER(0x2200, kDword, RB_DEPTHCONTROL)
XE_GPU_REGISTER(0x2201, kDword, RB_BLENDCONTROL_0) XE_GPU_REGISTER(0x2201, kDword, RB_BLENDCONTROL0)
XE_GPU_REGISTER(0x2202, kDword, RB_COLORCONTROL) XE_GPU_REGISTER(0x2202, kDword, RB_COLORCONTROL)
XE_GPU_REGISTER(0x2203, kDword, RB_HIZCONTROL) XE_GPU_REGISTER(0x2203, kDword, RB_HIZCONTROL)
XE_GPU_REGISTER(0x2204, kDword, PA_CL_CLIP_CNTL) XE_GPU_REGISTER(0x2204, kDword, PA_CL_CLIP_CNTL)
@ -142,9 +142,9 @@ XE_GPU_REGISTER(0x2205, kDword, PA_SU_SC_MODE_CNTL)
XE_GPU_REGISTER(0x2206, kDword, PA_CL_VTE_CNTL) XE_GPU_REGISTER(0x2206, kDword, PA_CL_VTE_CNTL)
XE_GPU_REGISTER(0x2207, kDword, VGT_CURRENT_BIN_ID_MIN) XE_GPU_REGISTER(0x2207, kDword, VGT_CURRENT_BIN_ID_MIN)
XE_GPU_REGISTER(0x2208, kDword, RB_MODECONTROL) XE_GPU_REGISTER(0x2208, kDword, RB_MODECONTROL)
XE_GPU_REGISTER(0x2209, kDword, RB_BLENDCONTROL_1) XE_GPU_REGISTER(0x2209, kDword, RB_BLENDCONTROL1)
XE_GPU_REGISTER(0x220A, kDword, RB_BLENDCONTROL_2) XE_GPU_REGISTER(0x220A, kDword, RB_BLENDCONTROL2)
XE_GPU_REGISTER(0x220B, kDword, RB_BLENDCONTROL_3) XE_GPU_REGISTER(0x220B, kDword, RB_BLENDCONTROL3)
XE_GPU_REGISTER(0x2280, kDword, PA_SU_POINT_SIZE) XE_GPU_REGISTER(0x2280, kDword, PA_SU_POINT_SIZE)
XE_GPU_REGISTER(0x2281, kDword, PA_SU_POINT_MINMAX) XE_GPU_REGISTER(0x2281, kDword, PA_SU_POINT_MINMAX)
@ -199,7 +199,7 @@ XE_GPU_REGISTER(0x231B, kDword, RB_COPY_DEST_INFO)
XE_GPU_REGISTER(0x231C, kDword, RB_HIZ_CLEAR) XE_GPU_REGISTER(0x231C, kDword, RB_HIZ_CLEAR)
XE_GPU_REGISTER(0x231D, kDword, RB_DEPTH_CLEAR) XE_GPU_REGISTER(0x231D, kDword, RB_DEPTH_CLEAR)
XE_GPU_REGISTER(0x231E, kDword, RB_COLOR_CLEAR) XE_GPU_REGISTER(0x231E, kDword, RB_COLOR_CLEAR)
XE_GPU_REGISTER(0x231F, kDword, RB_COLOR_CLEAR_LOW) XE_GPU_REGISTER(0x231F, kDword, RB_COLOR_CLEAR_LO)
XE_GPU_REGISTER(0x2320, kDword, RB_COPY_FUNC) XE_GPU_REGISTER(0x2320, kDword, RB_COPY_FUNC)
XE_GPU_REGISTER(0x2321, kDword, RB_COPY_REF) XE_GPU_REGISTER(0x2321, kDword, RB_COPY_REF)
XE_GPU_REGISTER(0x2322, kDword, RB_COPY_MASK) XE_GPU_REGISTER(0x2322, kDword, RB_COPY_MASK)

View File

@ -13,38 +13,51 @@ namespace xe {
namespace gpu { namespace gpu {
namespace reg { namespace reg {
constexpr uint32_t COHER_STATUS_HOST::register_index; constexpr Register COHER_STATUS_HOST::register_index;
constexpr uint32_t WAIT_UNTIL::register_index; constexpr Register WAIT_UNTIL::register_index;
constexpr uint32_t SQ_PROGRAM_CNTL::register_index; constexpr Register SQ_PROGRAM_CNTL::register_index;
constexpr uint32_t SQ_CONTEXT_MISC::register_index; constexpr Register SQ_CONTEXT_MISC::register_index;
constexpr uint32_t VGT_OUTPUT_PATH_CNTL::register_index; constexpr Register VGT_OUTPUT_PATH_CNTL::register_index;
constexpr uint32_t VGT_HOS_CNTL::register_index; constexpr Register VGT_HOS_CNTL::register_index;
constexpr uint32_t PA_SU_POINT_MINMAX::register_index; constexpr Register PA_SU_POINT_MINMAX::register_index;
constexpr uint32_t PA_SU_POINT_SIZE::register_index; constexpr Register PA_SU_POINT_SIZE::register_index;
constexpr uint32_t PA_SU_SC_MODE_CNTL::register_index; constexpr Register PA_SU_SC_MODE_CNTL::register_index;
constexpr uint32_t PA_SU_VTX_CNTL::register_index; constexpr Register PA_SU_VTX_CNTL::register_index;
constexpr uint32_t PA_SC_MPASS_PS_CNTL::register_index; constexpr Register PA_SC_MPASS_PS_CNTL::register_index;
constexpr uint32_t PA_SC_VIZ_QUERY::register_index; constexpr Register PA_SC_VIZ_QUERY::register_index;
constexpr uint32_t PA_CL_CLIP_CNTL::register_index; constexpr Register PA_CL_CLIP_CNTL::register_index;
constexpr uint32_t PA_CL_VTE_CNTL::register_index; constexpr Register PA_CL_VTE_CNTL::register_index;
constexpr uint32_t PA_SC_WINDOW_OFFSET::register_index; constexpr Register PA_SC_WINDOW_OFFSET::register_index;
constexpr uint32_t PA_SC_WINDOW_SCISSOR_TL::register_index; constexpr Register PA_SC_WINDOW_SCISSOR_TL::register_index;
constexpr uint32_t PA_SC_WINDOW_SCISSOR_BR::register_index; constexpr Register PA_SC_WINDOW_SCISSOR_BR::register_index;
constexpr uint32_t RB_MODECONTROL::register_index; constexpr Register RB_MODECONTROL::register_index;
constexpr uint32_t RB_SURFACE_INFO::register_index; constexpr Register RB_SURFACE_INFO::register_index;
constexpr uint32_t RB_COLORCONTROL::register_index; constexpr Register RB_COLORCONTROL::register_index;
constexpr uint32_t RB_COLOR_INFO::register_index; constexpr Register RB_COLOR_INFO::register_index;
constexpr uint32_t RB_COLOR_MASK::register_index; const Register RB_COLOR_INFO::rt_register_indices[4] = {
constexpr uint32_t RB_DEPTHCONTROL::register_index; XE_GPU_REG_RB_COLOR_INFO,
constexpr uint32_t RB_STENCILREFMASK::register_index; XE_GPU_REG_RB_COLOR1_INFO,
constexpr uint32_t RB_DEPTH_INFO::register_index; XE_GPU_REG_RB_COLOR2_INFO,
constexpr uint32_t RB_COPY_CONTROL::register_index; XE_GPU_REG_RB_COLOR3_INFO,
constexpr uint32_t RB_COPY_DEST_INFO::register_index; };
constexpr uint32_t RB_COPY_DEST_PITCH::register_index; constexpr Register RB_COLOR_MASK::register_index;
constexpr Register RB_BLENDCONTROL::register_index;
const Register RB_BLENDCONTROL::rt_register_indices[4] = {
XE_GPU_REG_RB_BLENDCONTROL0,
XE_GPU_REG_RB_BLENDCONTROL1,
XE_GPU_REG_RB_BLENDCONTROL2,
XE_GPU_REG_RB_BLENDCONTROL3,
};
constexpr Register RB_DEPTHCONTROL::register_index;
constexpr Register RB_STENCILREFMASK::register_index;
constexpr Register RB_DEPTH_INFO::register_index;
constexpr Register RB_COPY_CONTROL::register_index;
constexpr Register RB_COPY_DEST_INFO::register_index;
constexpr Register RB_COPY_DEST_PITCH::register_index;
} // namespace reg } // namespace reg
} // namespace gpu } // namespace gpu

View File

@ -13,11 +13,12 @@
#include <cstdint> #include <cstdint>
#include <cstdlib> #include <cstdlib>
#include "xenia/base/bit_field.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
// Most registers can be found from: // Most registers can be found from:
// https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/14/yamato_registers.h // https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/14/yamato_registers.h
// Some registers were added on Adreno specifically and are not referenced in
// game .pdb files and never set by games.
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -38,46 +39,49 @@ namespace reg {
*******************************************************************************/ *******************************************************************************/
union COHER_STATUS_HOST { union COHER_STATUS_HOST {
xe::bf<uint32_t, 0, 8> matching_contexts; struct {
xe::bf<uint32_t, 8, 1> rb_copy_dest_base_ena; uint32_t matching_contexts : 8; // +0
xe::bf<uint32_t, 9, 1> dest_base_0_ena; uint32_t rb_copy_dest_base_ena : 1; // +8
xe::bf<uint32_t, 10, 1> dest_base_1_ena; uint32_t dest_base_0_ena : 1; // +9
xe::bf<uint32_t, 11, 1> dest_base_2_ena; uint32_t dest_base_1_ena : 1; // +10
xe::bf<uint32_t, 12, 1> dest_base_3_ena; uint32_t dest_base_2_ena : 1; // +11
xe::bf<uint32_t, 13, 1> dest_base_4_ena; uint32_t dest_base_3_ena : 1; // +12
xe::bf<uint32_t, 14, 1> dest_base_5_ena; uint32_t dest_base_4_ena : 1; // +13
xe::bf<uint32_t, 15, 1> dest_base_6_ena; uint32_t dest_base_5_ena : 1; // +14
xe::bf<uint32_t, 16, 1> dest_base_7_ena; uint32_t dest_base_6_ena : 1; // +15
uint32_t dest_base_7_ena : 1; // +16
xe::bf<uint32_t, 24, 1> vc_action_ena; uint32_t : 7; // +17
xe::bf<uint32_t, 25, 1> tc_action_ena; uint32_t vc_action_ena : 1; // +24
xe::bf<uint32_t, 26, 1> pglb_action_ena; uint32_t tc_action_ena : 1; // +25
uint32_t pglb_action_ena : 1; // +26
xe::bf<uint32_t, 31, 1> status; uint32_t : 4; // +27
uint32_t status : 1; // +31
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_COHER_STATUS_HOST; static constexpr Register register_index = XE_GPU_REG_COHER_STATUS_HOST;
}; };
union WAIT_UNTIL { union WAIT_UNTIL {
xe::bf<uint32_t, 1, 1> wait_re_vsync; struct {
xe::bf<uint32_t, 2, 1> wait_fe_vsync; uint32_t : 1; // +0
xe::bf<uint32_t, 3, 1> wait_vsync; uint32_t wait_re_vsync : 1; // +1
xe::bf<uint32_t, 4, 1> wait_dsply_id0; uint32_t wait_fe_vsync : 1; // +2
xe::bf<uint32_t, 5, 1> wait_dsply_id1; uint32_t wait_vsync : 1; // +3
xe::bf<uint32_t, 6, 1> wait_dsply_id2; uint32_t wait_dsply_id0 : 1; // +4
uint32_t wait_dsply_id1 : 1; // +5
xe::bf<uint32_t, 10, 1> wait_cmdfifo; uint32_t wait_dsply_id2 : 1; // +6
uint32_t : 3; // +7
xe::bf<uint32_t, 14, 1> wait_2d_idle; uint32_t wait_cmdfifo : 1; // +10
xe::bf<uint32_t, 15, 1> wait_3d_idle; uint32_t : 3; // +11
xe::bf<uint32_t, 16, 1> wait_2d_idleclean; uint32_t wait_2d_idle : 1; // +14
xe::bf<uint32_t, 17, 1> wait_3d_idleclean; uint32_t wait_3d_idle : 1; // +15
uint32_t wait_2d_idleclean : 1; // +16
xe::bf<uint32_t, 20, 4> cmdfifo_entries; uint32_t wait_3d_idleclean : 1; // +17
uint32_t : 2; // +18
uint32_t cmdfifo_entries : 4; // +20
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_WAIT_UNTIL; static constexpr Register register_index = XE_GPU_REG_WAIT_UNTIL;
}; };
/******************************************************************************* /*******************************************************************************
@ -89,35 +93,38 @@ union WAIT_UNTIL {
*******************************************************************************/ *******************************************************************************/
union SQ_PROGRAM_CNTL { union SQ_PROGRAM_CNTL {
// Note from a2xx.xml: struct {
// Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG, but // Note from a2xx.xml:
// high bit is set to indicate "0 registers used". // Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG,
xe::bf<uint32_t, 0, 8> vs_num_reg; // but high bit is set to indicate "0 registers used".
xe::bf<uint32_t, 8, 8> ps_num_reg; uint32_t vs_num_reg : 8; // +0
xe::bf<uint32_t, 16, 1> vs_resource; uint32_t ps_num_reg : 8; // +8
xe::bf<uint32_t, 17, 1> ps_resource; uint32_t vs_resource : 1; // +16
xe::bf<uint32_t, 18, 1> param_gen; uint32_t ps_resource : 1; // +17
xe::bf<uint32_t, 19, 1> gen_index_pix; uint32_t param_gen : 1; // +18
xe::bf<uint32_t, 20, 4> vs_export_count; uint32_t gen_index_pix : 1; // +19
xe::bf<xenos::VertexShaderExportMode, 24, 3> vs_export_mode; uint32_t vs_export_count : 4; // +20
xe::bf<uint32_t, 27, 4> ps_export_mode; xenos::VertexShaderExportMode vs_export_mode : 3; // +24
xe::bf<uint32_t, 31, 1> gen_index_vtx; uint32_t ps_export_mode : 4; // +27
uint32_t gen_index_vtx : 1; // +31
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_SQ_PROGRAM_CNTL; static constexpr Register register_index = XE_GPU_REG_SQ_PROGRAM_CNTL;
}; };
union SQ_CONTEXT_MISC { union SQ_CONTEXT_MISC {
xe::bf<uint32_t, 0, 1> inst_pred_optimize; struct {
xe::bf<uint32_t, 1, 1> sc_output_screen_xy; uint32_t inst_pred_optimize : 1; // +0
xe::bf<xenos::SampleControl, 2, 2> sc_sample_cntl; uint32_t sc_output_screen_xy : 1; // +1
xe::bf<uint32_t, 8, 8> param_gen_pos; xenos::SampleControl sc_sample_cntl : 2; // +2
xe::bf<uint32_t, 16, 1> perfcounter_ref; uint32_t : 4; // +4
xe::bf<uint32_t, 17, 1> yeild_optimize; // sic uint32_t param_gen_pos : 8; // +8
xe::bf<uint32_t, 18, 1> tx_cache_sel; uint32_t perfcounter_ref : 1; // +16
uint32_t yeild_optimize : 1; // +17 sic
uint32_t tx_cache_sel : 1; // +18
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_SQ_CONTEXT_MISC; static constexpr Register register_index = XE_GPU_REG_SQ_CONTEXT_MISC;
}; };
/******************************************************************************* /*******************************************************************************
@ -139,17 +146,19 @@ union SQ_CONTEXT_MISC {
*******************************************************************************/ *******************************************************************************/
union VGT_OUTPUT_PATH_CNTL { union VGT_OUTPUT_PATH_CNTL {
xe::bf<xenos::VGTOutputPath, 0, 2> path_select; struct {
xenos::VGTOutputPath path_select : 2; // +0
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_VGT_OUTPUT_PATH_CNTL; static constexpr Register register_index = XE_GPU_REG_VGT_OUTPUT_PATH_CNTL;
}; };
union VGT_HOS_CNTL { union VGT_HOS_CNTL {
xe::bf<xenos::TessellationMode, 0, 2> tess_mode; struct {
xenos::TessellationMode tess_mode : 2; // +0
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_VGT_HOS_CNTL; static constexpr Register register_index = XE_GPU_REG_VGT_HOS_CNTL;
}; };
/******************************************************************************* /*******************************************************************************
@ -166,145 +175,162 @@ union VGT_HOS_CNTL {
*******************************************************************************/ *******************************************************************************/
union PA_SU_POINT_MINMAX { union PA_SU_POINT_MINMAX {
// Radius, 12.4 fixed point. struct {
xe::bf<uint32_t, 0, 16> min_size; // Radius, 12.4 fixed point.
xe::bf<uint32_t, 16, 16> max_size; uint32_t min_size : 16; // +0
uint32_t max_size : 16; // +16
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_POINT_MINMAX; static constexpr Register register_index = XE_GPU_REG_PA_SU_POINT_MINMAX;
}; };
union PA_SU_POINT_SIZE { union PA_SU_POINT_SIZE {
// 1/2 width or height, 12.4 fixed point. struct {
xe::bf<uint32_t, 0, 16> height; // 1/2 width or height, 12.4 fixed point.
xe::bf<uint32_t, 16, 16> width; uint32_t height : 16; // +0
uint32_t width : 16; // +16
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_POINT_SIZE; static constexpr Register register_index = XE_GPU_REG_PA_SU_POINT_SIZE;
}; };
// Setup Unit / Scanline Converter mode cntl // Setup Unit / Scanline Converter mode cntl
union PA_SU_SC_MODE_CNTL { union PA_SU_SC_MODE_CNTL {
xe::bf<uint32_t, 0, 1> cull_front; struct {
xe::bf<uint32_t, 1, 1> cull_back; uint32_t cull_front : 1; // +0
xe::bf<uint32_t, 2, 1> face; uint32_t cull_back : 1; // +1
xe::bf<uint32_t, 3, 2> poly_mode; // 0 - front is CCW, 1 - front is CW.
xe::bf<uint32_t, 5, 3> polymode_front_ptype; uint32_t face : 1; // +2
xe::bf<uint32_t, 8, 3> polymode_back_ptype; xenos::PolygonModeEnable poly_mode : 2; // +3
xe::bf<uint32_t, 11, 1> poly_offset_front_enable; xenos::PolygonType polymode_front_ptype : 3; // +5
xe::bf<uint32_t, 12, 1> poly_offset_back_enable; xenos::PolygonType polymode_back_ptype : 3; // +8
xe::bf<uint32_t, 13, 1> poly_offset_para_enable; uint32_t poly_offset_front_enable : 1; // +11
uint32_t poly_offset_back_enable : 1; // +12
xe::bf<uint32_t, 15, 1> msaa_enable; uint32_t poly_offset_para_enable : 1; // +13
xe::bf<uint32_t, 16, 1> vtx_window_offset_enable; uint32_t : 1; // +14
uint32_t msaa_enable : 1; // +15
xe::bf<uint32_t, 18, 1> line_stipple_enable; uint32_t vtx_window_offset_enable : 1; // +16
xe::bf<uint32_t, 19, 1> provoking_vtx_last; // LINE_STIPPLE_ENABLE was added on Adreno.
xe::bf<uint32_t, 20, 1> persp_corr_dis; uint32_t : 2; // +17
xe::bf<uint32_t, 21, 1> multi_prim_ib_ena; uint32_t provoking_vtx_last : 1; // +19
uint32_t persp_corr_dis : 1; // +20
xe::bf<uint32_t, 23, 1> quad_order_enable; uint32_t multi_prim_ib_ena : 1; // +21
uint32_t : 1; // +22
xe::bf<uint32_t, 25, 1> wait_rb_idle_all_tri; uint32_t quad_order_enable : 1; // +23
xe::bf<uint32_t, 26, 1> wait_rb_idle_first_tri_new_state; // WAIT_RB_IDLE_ALL_TRI and WAIT_RB_IDLE_FIRST_TRI_NEW_STATE were added on
// Adreno.
// TODO(Triang3l): Find SC_ONE_QUAD_PER_CLOCK offset.
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL; static constexpr Register register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL;
}; };
// Setup Unit Vertex Control // Setup Unit Vertex Control
union PA_SU_VTX_CNTL { union PA_SU_VTX_CNTL {
xe::bf<uint32_t, 0, 1> pix_center; // 1 = half pixel offset struct {
xe::bf<uint32_t, 1, 2> round_mode; uint32_t pix_center : 1; // +0 1 = half pixel offset (OpenGL).
xe::bf<uint32_t, 3, 3> quant_mode; uint32_t round_mode : 2; // +1
uint32_t quant_mode : 3; // +3
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_VTX_CNTL; static constexpr Register register_index = XE_GPU_REG_PA_SU_VTX_CNTL;
}; };
union PA_SC_MPASS_PS_CNTL { union PA_SC_MPASS_PS_CNTL {
xe::bf<uint32_t, 0, 20> mpass_pix_vec_per_pass; struct {
xe::bf<uint32_t, 31, 1> mpass_ps_ena; uint32_t mpass_pix_vec_per_pass : 20; // +0
uint32_t : 11; // +20
uint32_t mpass_ps_ena : 1; // +31
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_MPASS_PS_CNTL; static constexpr Register register_index = XE_GPU_REG_PA_SC_MPASS_PS_CNTL;
}; };
// Scanline converter viz query // Scanline converter viz query
union PA_SC_VIZ_QUERY { union PA_SC_VIZ_QUERY {
xe::bf<uint32_t, 0, 1> viz_query_ena; struct {
xe::bf<uint32_t, 1, 6> viz_query_id; uint32_t viz_query_ena : 1; // +0
xe::bf<uint32_t, 7, 1> kill_pix_post_early_z; uint32_t viz_query_id : 6; // +1
uint32_t kill_pix_post_early_z : 1; // +7
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_VIZ_QUERY; static constexpr Register register_index = XE_GPU_REG_PA_SC_VIZ_QUERY;
}; };
// Clipper clip control // Clipper clip control
union PA_CL_CLIP_CNTL { union PA_CL_CLIP_CNTL {
xe::bf<uint32_t, 0, 1> ucp_ena_0; struct {
xe::bf<uint32_t, 1, 1> ucp_ena_1; uint32_t ucp_ena_0 : 1; // +0
xe::bf<uint32_t, 2, 1> ucp_ena_2; uint32_t ucp_ena_1 : 1; // +1
xe::bf<uint32_t, 3, 1> ucp_ena_3; uint32_t ucp_ena_2 : 1; // +2
xe::bf<uint32_t, 4, 1> ucp_ena_4; uint32_t ucp_ena_3 : 1; // +3
xe::bf<uint32_t, 5, 1> ucp_ena_5; uint32_t ucp_ena_4 : 1; // +4
uint32_t ucp_ena_5 : 1; // +5
xe::bf<uint32_t, 14, 2> ps_ucp_mode; uint32_t : 8; // +6
xe::bf<uint32_t, 16, 1> clip_disable; uint32_t ps_ucp_mode : 2; // +14
xe::bf<uint32_t, 17, 1> ucp_cull_only_ena; uint32_t clip_disable : 1; // +16
xe::bf<uint32_t, 18, 1> boundary_edge_flag_ena; uint32_t ucp_cull_only_ena : 1; // +17
xe::bf<uint32_t, 19, 1> dx_clip_space_def; uint32_t boundary_edge_flag_ena : 1; // +18
xe::bf<uint32_t, 20, 1> dis_clip_err_detect; uint32_t dx_clip_space_def : 1; // +19
xe::bf<uint32_t, 21, 1> vtx_kill_or; uint32_t dis_clip_err_detect : 1; // +20
xe::bf<uint32_t, 22, 1> xy_nan_retain; uint32_t vtx_kill_or : 1; // +21
xe::bf<uint32_t, 23, 1> z_nan_retain; uint32_t xy_nan_retain : 1; // +22
xe::bf<uint32_t, 24, 1> w_nan_retain; uint32_t z_nan_retain : 1; // +23
uint32_t w_nan_retain : 1; // +24
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_CL_CLIP_CNTL; static constexpr Register register_index = XE_GPU_REG_PA_CL_CLIP_CNTL;
}; };
// Viewport transform engine control // Viewport transform engine control
union PA_CL_VTE_CNTL { union PA_CL_VTE_CNTL {
xe::bf<uint32_t, 0, 1> vport_x_scale_ena; struct {
xe::bf<uint32_t, 1, 1> vport_x_offset_ena; uint32_t vport_x_scale_ena : 1; // +0
xe::bf<uint32_t, 2, 1> vport_y_scale_ena; uint32_t vport_x_offset_ena : 1; // +1
xe::bf<uint32_t, 3, 1> vport_y_offset_ena; uint32_t vport_y_scale_ena : 1; // +2
xe::bf<uint32_t, 4, 1> vport_z_scale_ena; uint32_t vport_y_offset_ena : 1; // +3
xe::bf<uint32_t, 5, 1> vport_z_offset_ena; uint32_t vport_z_scale_ena : 1; // +4
uint32_t vport_z_offset_ena : 1; // +5
xe::bf<uint32_t, 8, 1> vtx_xy_fmt; uint32_t : 2; // +6
xe::bf<uint32_t, 9, 1> vtx_z_fmt; uint32_t vtx_xy_fmt : 1; // +8
xe::bf<uint32_t, 10, 1> vtx_w0_fmt; uint32_t vtx_z_fmt : 1; // +9
xe::bf<uint32_t, 11, 1> perfcounter_ref; uint32_t vtx_w0_fmt : 1; // +10
uint32_t perfcounter_ref : 1; // +11
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_CL_VTE_CNTL; static constexpr Register register_index = XE_GPU_REG_PA_CL_VTE_CNTL;
}; };
union PA_SC_WINDOW_OFFSET { union PA_SC_WINDOW_OFFSET {
xe::bf<int32_t, 0, 15> window_x_offset; struct {
xe::bf<int32_t, 16, 15> window_y_offset; int32_t window_x_offset : 15; // +0
uint32_t : 1; // +15
int32_t window_y_offset : 15; // +16
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_OFFSET; static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_OFFSET;
}; };
union PA_SC_WINDOW_SCISSOR_TL { union PA_SC_WINDOW_SCISSOR_TL {
xe::bf<uint32_t, 0, 14> tl_x; struct {
xe::bf<uint32_t, 16, 14> tl_y; uint32_t tl_x : 14; // +0
xe::bf<uint32_t, 31, 1> window_offset_disable; uint32_t : 2; // +14
uint32_t tl_y : 14; // +16
uint32_t : 1; // +30
uint32_t window_offset_disable : 1; // +31
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL; static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL;
}; };
union PA_SC_WINDOW_SCISSOR_BR { union PA_SC_WINDOW_SCISSOR_BR {
xe::bf<uint32_t, 0, 14> br_x; struct {
xe::bf<uint32_t, 16, 14> br_y; uint32_t br_x : 14; // +0
uint32_t : 2; // +14
uint32_t br_y : 14; // +16
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR; static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR;
}; };
/******************************************************************************* /*******************************************************************************
@ -316,136 +342,174 @@ union PA_SC_WINDOW_SCISSOR_BR {
*******************************************************************************/ *******************************************************************************/
union RB_MODECONTROL { union RB_MODECONTROL {
xe::bf<xenos::ModeControl, 0, 3> edram_mode; struct {
xenos::ModeControl edram_mode : 3; // +0
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_MODECONTROL; static constexpr Register register_index = XE_GPU_REG_RB_MODECONTROL;
}; };
union RB_SURFACE_INFO { union RB_SURFACE_INFO {
xe::bf<uint32_t, 0, 14> surface_pitch; struct {
xe::bf<MsaaSamples, 16, 2> msaa_samples; uint32_t surface_pitch : 14; // +0
xe::bf<uint32_t, 18, 14> hiz_pitch; uint32_t : 2; // +14
MsaaSamples msaa_samples : 2; // +16
uint32_t hiz_pitch : 14; // +18
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_SURFACE_INFO; static constexpr Register register_index = XE_GPU_REG_RB_SURFACE_INFO;
}; };
union RB_COLORCONTROL { union RB_COLORCONTROL {
xe::bf<CompareFunction, 0, 3> alpha_func; struct {
xe::bf<uint32_t, 3, 1> alpha_test_enable; CompareFunction alpha_func : 3; // +0
xe::bf<uint32_t, 4, 1> alpha_to_mask_enable; uint32_t alpha_test_enable : 1; // +3
// Everything in between was added on Adreno, not in game PDBs and never set. uint32_t alpha_to_mask_enable : 1; // +4
xe::bf<uint32_t, 24, 2> alpha_to_mask_offset0; // Everything in between was added on Adreno.
xe::bf<uint32_t, 26, 2> alpha_to_mask_offset1; uint32_t : 19; // +5
xe::bf<uint32_t, 28, 2> alpha_to_mask_offset2; uint32_t alpha_to_mask_offset0 : 2; // +24
xe::bf<uint32_t, 30, 2> alpha_to_mask_offset3; uint32_t alpha_to_mask_offset1 : 2; // +26
uint32_t alpha_to_mask_offset2 : 2; // +28
uint32_t alpha_to_mask_offset3 : 2; // +30
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_COLORCONTROL; static constexpr Register register_index = XE_GPU_REG_RB_COLORCONTROL;
}; };
union RB_COLOR_INFO { union RB_COLOR_INFO {
xe::bf<uint32_t, 0, 12> color_base; struct {
xe::bf<ColorRenderTargetFormat, 16, 4> color_format; uint32_t color_base : 12; // +0
xe::bf<int32_t, 20, 6> color_exp_bias; uint32_t : 4; // +12
ColorRenderTargetFormat color_format : 4; // +16
int32_t color_exp_bias : 6; // +20
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_COLOR_INFO; static constexpr Register register_index = XE_GPU_REG_RB_COLOR_INFO;
// RB_COLOR[1-3]_INFO also use this format. // RB_COLOR[1-3]_INFO also use this format.
static const Register rt_register_indices[4];
}; };
union RB_COLOR_MASK { union RB_COLOR_MASK {
xe::bf<uint32_t, 0, 1> write_red0; struct {
xe::bf<uint32_t, 1, 1> write_green0; uint32_t write_red0 : 1; // +0
xe::bf<uint32_t, 2, 1> write_blue0; uint32_t write_green0 : 1; // +1
xe::bf<uint32_t, 3, 1> write_alpha0; uint32_t write_blue0 : 1; // +2
xe::bf<uint32_t, 4, 1> write_red1; uint32_t write_alpha0 : 1; // +3
xe::bf<uint32_t, 5, 1> write_green1; uint32_t write_red1 : 1; // +4
xe::bf<uint32_t, 6, 1> write_blue1; uint32_t write_green1 : 1; // +5
xe::bf<uint32_t, 7, 1> write_alpha1; uint32_t write_blue1 : 1; // +6
xe::bf<uint32_t, 8, 1> write_red2; uint32_t write_alpha1 : 1; // +7
xe::bf<uint32_t, 9, 1> write_green2; uint32_t write_red2 : 1; // +8
xe::bf<uint32_t, 10, 1> write_blue2; uint32_t write_green2 : 1; // +9
xe::bf<uint32_t, 11, 1> write_alpha2; uint32_t write_blue2 : 1; // +10
xe::bf<uint32_t, 12, 1> write_red3; uint32_t write_alpha2 : 1; // +11
xe::bf<uint32_t, 13, 1> write_green3; uint32_t write_red3 : 1; // +12
xe::bf<uint32_t, 14, 1> write_blue3; uint32_t write_green3 : 1; // +13
xe::bf<uint32_t, 15, 1> write_alpha3; uint32_t write_blue3 : 1; // +14
uint32_t write_alpha3 : 1; // +15
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_COLOR_MASK; static constexpr Register register_index = XE_GPU_REG_RB_COLOR_MASK;
};
union RB_BLENDCONTROL {
struct {
BlendFactor color_srcblend : 5; // +0
BlendOp color_comb_fcn : 3; // +5
BlendFactor color_destblend : 5; // +8
uint32_t : 3; // +13
BlendFactor alpha_srcblend : 5; // +16
BlendOp alpha_comb_fcn : 3; // +21
BlendFactor alpha_destblend : 5; // +24
// BLEND_FORCE_ENABLE and BLEND_FORCE were added on Adreno.
};
uint32_t value;
// RB_BLENDCONTROL[0-3] use this format.
static constexpr Register register_index = XE_GPU_REG_RB_BLENDCONTROL0;
static const Register rt_register_indices[4];
}; };
union RB_DEPTHCONTROL { union RB_DEPTHCONTROL {
xe::bf<uint32_t, 0, 1> stencil_enable; struct {
xe::bf<uint32_t, 1, 1> z_enable; uint32_t stencil_enable : 1; // +0
xe::bf<uint32_t, 2, 1> z_write_enable; uint32_t z_enable : 1; // +1
// EARLY_Z_ENABLE was added on Adreno. uint32_t z_write_enable : 1; // +2
xe::bf<CompareFunction, 4, 3> zfunc; // EARLY_Z_ENABLE was added on Adreno.
xe::bf<uint32_t, 7, 1> backface_enable; uint32_t : 1; // +3
xe::bf<CompareFunction, 8, 3> stencilfunc; CompareFunction zfunc : 3; // +4
xe::bf<StencilOp, 11, 3> stencilfail; uint32_t backface_enable : 1; // +7
xe::bf<StencilOp, 14, 3> stencilzpass; CompareFunction stencilfunc : 3; // +8
xe::bf<StencilOp, 17, 3> stencilzfail; StencilOp stencilfail : 3; // +11
xe::bf<CompareFunction, 20, 3> stencilfunc_bf; StencilOp stencilzpass : 3; // +14
xe::bf<StencilOp, 23, 3> stencilfail_bf; StencilOp stencilzfail : 3; // +17
xe::bf<StencilOp, 26, 3> stencilzpass_bf; CompareFunction stencilfunc_bf : 3; // +20
xe::bf<StencilOp, 29, 3> stencilzfail_bf; StencilOp stencilfail_bf : 3; // +23
StencilOp stencilzpass_bf : 3; // +26
StencilOp stencilzfail_bf : 3; // +29
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_DEPTHCONTROL; static constexpr Register register_index = XE_GPU_REG_RB_DEPTHCONTROL;
}; };
union RB_STENCILREFMASK { union RB_STENCILREFMASK {
xe::bf<uint32_t, 0, 8> stencilref; struct {
xe::bf<uint32_t, 8, 8> stencilmask; uint32_t stencilref : 8; // +0
xe::bf<uint32_t, 16, 8> stencilwritemask; uint32_t stencilmask : 8; // +8
uint32_t stencilwritemask : 8; // +16
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_STENCILREFMASK; static constexpr Register register_index = XE_GPU_REG_RB_STENCILREFMASK;
// RB_STENCILREFMASK_BF also uses this format. // RB_STENCILREFMASK_BF also uses this format.
}; };
union RB_DEPTH_INFO { union RB_DEPTH_INFO {
xe::bf<uint32_t, 0, 12> depth_base; struct {
xe::bf<DepthRenderTargetFormat, 16, 1> depth_format; uint32_t depth_base : 12; // +0
uint32_t : 4; // +12
DepthRenderTargetFormat depth_format : 1; // +16
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_DEPTH_INFO; static constexpr Register register_index = XE_GPU_REG_RB_DEPTH_INFO;
}; };
union RB_COPY_CONTROL { union RB_COPY_CONTROL {
xe::bf<uint32_t, 0, 3> copy_src_select; struct {
xe::bf<xenos::CopySampleSelect, 4, 3> copy_sample_select; uint32_t copy_src_select : 3; // +0 Depth is 4.
xe::bf<uint32_t, 8, 1> color_clear_enable; uint32_t : 1; // +3
xe::bf<uint32_t, 9, 1> depth_clear_enable; xenos::CopySampleSelect copy_sample_select : 3; // +4
uint32_t : 1; // +7
xe::bf<xenos::CopyCommand, 20, 2> copy_command; uint32_t color_clear_enable : 1; // +8
uint32_t depth_clear_enable : 1; // +9
uint32_t : 10; // +10
xenos::CopyCommand copy_command : 2; // +20
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_CONTROL; static constexpr Register register_index = XE_GPU_REG_RB_COPY_CONTROL;
}; };
union RB_COPY_DEST_INFO { union RB_COPY_DEST_INFO {
xe::bf<Endian128, 0, 3> copy_dest_endian; struct {
xe::bf<uint32_t, 3, 1> copy_dest_array; Endian128 copy_dest_endian : 3; // +0
xe::bf<uint32_t, 4, 3> copy_dest_slice; uint32_t copy_dest_array : 1; // +3
xe::bf<ColorFormat, 7, 6> copy_dest_format; uint32_t copy_dest_slice : 3; // +4
xe::bf<uint32_t, 13, 3> copy_dest_number; ColorFormat copy_dest_format : 6; // +7
xe::bf<int32_t, 16, 6> copy_dest_exp_bias; uint32_t copy_dest_number : 3; // +13
xe::bf<uint32_t, 24, 1> copy_dest_swap; int32_t copy_dest_exp_bias : 6; // +16
uint32_t : 2; // +22
uint32_t copy_dest_swap : 1; // +24
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_DEST_INFO; static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_INFO;
}; };
union RB_COPY_DEST_PITCH { union RB_COPY_DEST_PITCH {
xe::bf<uint32_t, 0, 14> copy_dest_pitch; struct {
xe::bf<uint32_t, 16, 14> copy_dest_height; uint32_t copy_dest_pitch : 14; // +0
uint32_t : 2; // +14
uint32_t copy_dest_height : 14; // +16
};
uint32_t value; uint32_t value;
static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_DEST_PITCH; static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_PITCH;
}; };
} // namespace reg } // namespace reg

View File

@ -24,25 +24,25 @@ bool SamplerInfo::Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
out_info->min_filter = out_info->min_filter =
fetch_instr.attributes.min_filter == TextureFilter::kUseFetchConst fetch_instr.attributes.min_filter == TextureFilter::kUseFetchConst
? static_cast<TextureFilter>(fetch.min_filter) ? fetch.min_filter
: fetch_instr.attributes.min_filter; : fetch_instr.attributes.min_filter;
out_info->mag_filter = out_info->mag_filter =
fetch_instr.attributes.mag_filter == TextureFilter::kUseFetchConst fetch_instr.attributes.mag_filter == TextureFilter::kUseFetchConst
? static_cast<TextureFilter>(fetch.mag_filter) ? fetch.mag_filter
: fetch_instr.attributes.mag_filter; : fetch_instr.attributes.mag_filter;
out_info->mip_filter = out_info->mip_filter =
fetch_instr.attributes.mip_filter == TextureFilter::kUseFetchConst fetch_instr.attributes.mip_filter == TextureFilter::kUseFetchConst
? static_cast<TextureFilter>(fetch.mip_filter) ? fetch.mip_filter
: fetch_instr.attributes.mip_filter; : fetch_instr.attributes.mip_filter;
out_info->clamp_u = static_cast<ClampMode>(fetch.clamp_x); out_info->clamp_u = fetch.clamp_x;
out_info->clamp_v = static_cast<ClampMode>(fetch.clamp_y); out_info->clamp_v = fetch.clamp_y;
out_info->clamp_w = static_cast<ClampMode>(fetch.clamp_z); out_info->clamp_w = fetch.clamp_z;
out_info->aniso_filter = out_info->aniso_filter =
fetch_instr.attributes.aniso_filter == AnisoFilter::kUseFetchConst fetch_instr.attributes.aniso_filter == AnisoFilter::kUseFetchConst
? static_cast<AnisoFilter>(fetch.aniso_filter) ? fetch.aniso_filter
: fetch_instr.attributes.aniso_filter; : fetch_instr.attributes.aniso_filter;
out_info->border_color = static_cast<BorderColor>(fetch.border_color); out_info->border_color = fetch.border_color;
out_info->lod_bias = (fetch.lod_bias) / 32.f; out_info->lod_bias = (fetch.lod_bias) / 32.f;
out_info->mip_min_level = fetch.mip_min_level; out_info->mip_min_level = fetch.mip_min_level;
out_info->mip_max_level = fetch.mip_max_level; out_info->mip_max_level = fetch.mip_max_level;

View File

@ -110,9 +110,8 @@ bool ShaderTranslator::GatherAllBindingInformation(Shader* shader) {
bool ShaderTranslator::Translate(Shader* shader, PrimitiveType patch_type, bool ShaderTranslator::Translate(Shader* shader, PrimitiveType patch_type,
reg::SQ_PROGRAM_CNTL cntl) { reg::SQ_PROGRAM_CNTL cntl) {
Reset(); Reset();
uint32_t cntl_num_reg = shader->type() == ShaderType::kVertex uint32_t cntl_num_reg =
? cntl.vs_num_reg.value() shader->type() == ShaderType::kVertex ? cntl.vs_num_reg : cntl.ps_num_reg;
: cntl.ps_num_reg.value();
register_count_ = (cntl_num_reg & 0x80) ? 0 : (cntl_num_reg + 1); register_count_ = (cntl_num_reg & 0x80) ? 0 : (cntl_num_reg + 1);
return TranslateInternal(shader, patch_type); return TranslateInternal(shader, patch_type);

View File

@ -40,7 +40,7 @@ void CopySwapBlock(Endian endian, void* output, const void* input,
xe::copy_and_swap_16_in_32_unaligned(output, input, length); xe::copy_and_swap_16_in_32_unaligned(output, input, length);
break; break;
default: default:
case Endian::kUnspecified: case Endian::kNone:
std::memcpy(output, input, length); std::memcpy(output, input, length);
break; break;
} }

View File

@ -33,8 +33,8 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
auto& info = *out_info; auto& info = *out_info;
info.format = static_cast<TextureFormat>(fetch.format); info.format = fetch.format;
info.endianness = static_cast<Endian>(fetch.endianness); info.endianness = fetch.endianness;
info.dimension = static_cast<Dimension>(fetch.dimension); info.dimension = static_cast<Dimension>(fetch.dimension);
info.width = info.height = info.depth = 0; info.width = info.height = info.depth = 0;

View File

@ -19,77 +19,6 @@
namespace xe { namespace xe {
namespace gpu { namespace gpu {
// a2xx_sq_surfaceformat +
// https://github.com/indirivacua/RAGE-Console-Texture-Editor/blob/master/Console.Xbox360.Graphics.pas
enum class TextureFormat : uint32_t {
k_1_REVERSE = 0,
k_1 = 1,
k_8 = 2,
k_1_5_5_5 = 3,
k_5_6_5 = 4,
k_6_5_5 = 5,
k_8_8_8_8 = 6,
k_2_10_10_10 = 7,
k_8_A = 8,
k_8_B = 9,
k_8_8 = 10,
k_Cr_Y1_Cb_Y0_REP = 11,
k_Y1_Cr_Y0_Cb_REP = 12,
k_16_16_EDRAM = 13,
k_8_8_8_8_A = 14,
k_4_4_4_4 = 15,
k_10_11_11 = 16,
k_11_11_10 = 17,
k_DXT1 = 18,
k_DXT2_3 = 19,
k_DXT4_5 = 20,
k_16_16_16_16_EDRAM = 21,
k_24_8 = 22,
k_24_8_FLOAT = 23,
k_16 = 24,
k_16_16 = 25,
k_16_16_16_16 = 26,
k_16_EXPAND = 27,
k_16_16_EXPAND = 28,
k_16_16_16_16_EXPAND = 29,
k_16_FLOAT = 30,
k_16_16_FLOAT = 31,
k_16_16_16_16_FLOAT = 32,
k_32 = 33,
k_32_32 = 34,
k_32_32_32_32 = 35,
k_32_FLOAT = 36,
k_32_32_FLOAT = 37,
k_32_32_32_32_FLOAT = 38,
k_32_AS_8 = 39,
k_32_AS_8_8 = 40,
k_16_MPEG = 41,
k_16_16_MPEG = 42,
k_8_INTERLACED = 43,
k_32_AS_8_INTERLACED = 44,
k_32_AS_8_8_INTERLACED = 45,
k_16_INTERLACED = 46,
k_16_MPEG_INTERLACED = 47,
k_16_16_MPEG_INTERLACED = 48,
k_DXN = 49,
k_8_8_8_8_AS_16_16_16_16 = 50,
k_DXT1_AS_16_16_16_16 = 51,
k_DXT2_3_AS_16_16_16_16 = 52,
k_DXT4_5_AS_16_16_16_16 = 53,
k_2_10_10_10_AS_16_16_16_16 = 54,
k_10_11_11_AS_16_16_16_16 = 55,
k_11_11_10_AS_16_16_16_16 = 56,
k_32_32_32_FLOAT = 57,
k_DXT3A = 58,
k_DXT5A = 59,
k_CTX1 = 60,
k_DXT3A_AS_1_1_1_1 = 61,
k_8_8_8_8_GAMMA_EDRAM = 62,
k_2_10_10_10_FLOAT_EDRAM = 63,
kUnknown = 0xFFFFFFFFu,
};
inline TextureFormat GetBaseFormat(TextureFormat texture_format) { inline TextureFormat GetBaseFormat(TextureFormat texture_format) {
// These formats are used for resampling textures / gamma control. // These formats are used for resampling textures / gamma control.
switch (texture_format) { switch (texture_format) {

View File

@ -824,7 +824,7 @@ void TraceViewer::DrawVertexFetcher(Shader* shader,
#define LOADEL(type, wo) \ #define LOADEL(type, wo) \
GpuSwap(xe::load<type>(vstart + \ GpuSwap(xe::load<type>(vstart + \
(attrib.fetch_instr.attributes.offset + wo) * 4), \ (attrib.fetch_instr.attributes.offset + wo) * 4), \
Endian(fetch->endian)) fetch->endian)
switch (attrib.fetch_instr.attributes.data_format) { switch (attrib.fetch_instr.attributes.data_format) {
case VertexFormat::k_32: case VertexFormat::k_32:
ImGui::Text("%.8X", LOADEL(uint32_t, 0)); ImGui::Text("%.8X", LOADEL(uint32_t, 0));
@ -1334,10 +1334,10 @@ void TraceViewer::DrawStateUI() {
regs[XE_GPU_REG_RB_COLOR3_INFO].u32, regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
}; };
uint32_t rb_blendcontrol[4] = { uint32_t rb_blendcontrol[4] = {
regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32, regs[XE_GPU_REG_RB_BLENDCONTROL0].u32,
regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32, regs[XE_GPU_REG_RB_BLENDCONTROL1].u32,
regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32, regs[XE_GPU_REG_RB_BLENDCONTROL2].u32,
regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32, regs[XE_GPU_REG_RB_BLENDCONTROL3].u32,
}; };
ImGui::Columns(2); ImGui::Columns(2);
for (int i = 0; i < xe::countof(color_info); ++i) { for (int i = 0; i < xe::countof(color_info); ++i) {
@ -1713,7 +1713,7 @@ void TraceViewer::DrawStateUI() {
fetch = &group->vertex_fetch_2; fetch = &group->vertex_fetch_2;
break; break;
} }
assert_true(fetch->endian == 2); assert_true(fetch->endian == Endian::k8in32);
char tree_root_id[32]; char tree_root_id[32];
sprintf(tree_root_id, "#vertices_root_%d", sprintf(tree_root_id, "#vertices_root_%d",
vertex_binding.fetch_constant); vertex_binding.fetch_constant);

View File

@ -146,12 +146,8 @@ enum class AllocType : uint32_t {
// Instruction data for ControlFlowOpcode::kExec and kExecEnd. // Instruction data for ControlFlowOpcode::kExec and kExecEnd.
struct ControlFlowExecInstruction { struct ControlFlowExecInstruction {
ControlFlowOpcode opcode() const { ControlFlowOpcode opcode() const { return opcode_; }
return static_cast<ControlFlowOpcode>(opcode_); AddressingMode addressing_mode() const { return address_mode_; }
}
AddressingMode addressing_mode() const {
return static_cast<AddressingMode>(address_mode_);
}
// Address of the instructions to execute. // Address of the instructions to execute.
uint32_t address() const { return address_; } uint32_t address() const { return address_; }
// Number of instructions being executed. // Number of instructions being executed.
@ -176,19 +172,15 @@ struct ControlFlowExecInstruction {
uint32_t : 7; uint32_t : 7;
uint32_t clean_ : 1; uint32_t clean_ : 1;
uint32_t : 1; uint32_t : 1;
uint32_t address_mode_ : 1; AddressingMode address_mode_ : 1;
uint32_t opcode_ : 4; ControlFlowOpcode opcode_ : 4;
}; };
static_assert_size(ControlFlowExecInstruction, 8); static_assert_size(ControlFlowExecInstruction, 8);
// Instruction data for ControlFlowOpcode::kCondExec and kCondExecEnd. // Instruction data for ControlFlowOpcode::kCondExec and kCondExecEnd.
struct ControlFlowCondExecInstruction { struct ControlFlowCondExecInstruction {
ControlFlowOpcode opcode() const { ControlFlowOpcode opcode() const { return opcode_; }
return static_cast<ControlFlowOpcode>(opcode_); AddressingMode addressing_mode() const { return address_mode_; }
}
AddressingMode addressing_mode() const {
return static_cast<AddressingMode>(address_mode_);
}
// Address of the instructions to execute. // Address of the instructions to execute.
uint32_t address() const { return address_; } uint32_t address() const { return address_; }
// Number of instructions being executed. // Number of instructions being executed.
@ -214,20 +206,16 @@ struct ControlFlowCondExecInstruction {
uint32_t vc_lo_ : 2; uint32_t vc_lo_ : 2;
uint32_t bool_address_ : 8; uint32_t bool_address_ : 8;
uint32_t condition_ : 1; uint32_t condition_ : 1;
uint32_t address_mode_ : 1; AddressingMode address_mode_ : 1;
uint32_t opcode_ : 4; ControlFlowOpcode opcode_ : 4;
}; };
static_assert_size(ControlFlowCondExecInstruction, 8); static_assert_size(ControlFlowCondExecInstruction, 8);
// Instruction data for ControlFlowOpcode::kCondExecPred, kCondExecPredEnd, // Instruction data for ControlFlowOpcode::kCondExecPred, kCondExecPredEnd,
// kCondExecPredClean, kCondExecPredCleanEnd. // kCondExecPredClean, kCondExecPredCleanEnd.
struct ControlFlowCondExecPredInstruction { struct ControlFlowCondExecPredInstruction {
ControlFlowOpcode opcode() const { ControlFlowOpcode opcode() const { return opcode_; }
return static_cast<ControlFlowOpcode>(opcode_); AddressingMode addressing_mode() const { return address_mode_; }
}
AddressingMode addressing_mode() const {
return static_cast<AddressingMode>(address_mode_);
}
// Address of the instructions to execute. // Address of the instructions to execute.
uint32_t address() const { return address_; } uint32_t address() const { return address_; }
// Number of instructions being executed. // Number of instructions being executed.
@ -254,19 +242,15 @@ struct ControlFlowCondExecPredInstruction {
uint32_t : 7; uint32_t : 7;
uint32_t clean_ : 1; uint32_t clean_ : 1;
uint32_t condition_ : 1; uint32_t condition_ : 1;
uint32_t address_mode_ : 1; AddressingMode address_mode_ : 1;
uint32_t opcode_ : 4; ControlFlowOpcode opcode_ : 4;
}; };
static_assert_size(ControlFlowCondExecPredInstruction, 8); static_assert_size(ControlFlowCondExecPredInstruction, 8);
// Instruction data for ControlFlowOpcode::kLoopStart. // Instruction data for ControlFlowOpcode::kLoopStart.
struct ControlFlowLoopStartInstruction { struct ControlFlowLoopStartInstruction {
ControlFlowOpcode opcode() const { ControlFlowOpcode opcode() const { return opcode_; }
return static_cast<ControlFlowOpcode>(opcode_); AddressingMode addressing_mode() const { return address_mode_; }
}
AddressingMode addressing_mode() const {
return static_cast<AddressingMode>(address_mode_);
}
// Target address to jump to when skipping the loop. // Target address to jump to when skipping the loop.
uint32_t address() const { return address_; } uint32_t address() const { return address_; }
// Whether to reuse the current aL instead of reset it to loop start. // Whether to reuse the current aL instead of reset it to loop start.
@ -285,19 +269,15 @@ struct ControlFlowLoopStartInstruction {
// Word 1: (16 bits) // Word 1: (16 bits)
uint32_t : 11; uint32_t : 11;
uint32_t address_mode_ : 1; AddressingMode address_mode_ : 1;
uint32_t opcode_ : 4; ControlFlowOpcode opcode_ : 4;
}; };
static_assert_size(ControlFlowLoopStartInstruction, 8); static_assert_size(ControlFlowLoopStartInstruction, 8);
// Instruction data for ControlFlowOpcode::kLoopEnd. // Instruction data for ControlFlowOpcode::kLoopEnd.
struct ControlFlowLoopEndInstruction { struct ControlFlowLoopEndInstruction {
ControlFlowOpcode opcode() const { ControlFlowOpcode opcode() const { return opcode_; }
return static_cast<ControlFlowOpcode>(opcode_); AddressingMode addressing_mode() const { return address_mode_; }
}
AddressingMode addressing_mode() const {
return static_cast<AddressingMode>(address_mode_);
}
// Target address of the start of the loop body. // Target address of the start of the loop body.
uint32_t address() const { return address_; } uint32_t address() const { return address_; }
// Integer constant register that holds the loop parameters. // Integer constant register that holds the loop parameters.
@ -319,19 +299,15 @@ struct ControlFlowLoopEndInstruction {
// Word 1: (16 bits) // Word 1: (16 bits)
uint32_t : 10; uint32_t : 10;
uint32_t condition_ : 1; uint32_t condition_ : 1;
uint32_t address_mode_ : 1; AddressingMode address_mode_ : 1;
uint32_t opcode_ : 4; ControlFlowOpcode opcode_ : 4;
}; };
static_assert_size(ControlFlowLoopEndInstruction, 8); static_assert_size(ControlFlowLoopEndInstruction, 8);
// Instruction data for ControlFlowOpcode::kCondCall. // Instruction data for ControlFlowOpcode::kCondCall.
struct ControlFlowCondCallInstruction { struct ControlFlowCondCallInstruction {
ControlFlowOpcode opcode() const { ControlFlowOpcode opcode() const { return opcode_; }
return static_cast<ControlFlowOpcode>(opcode_); AddressingMode addressing_mode() const { return address_mode_; }
}
AddressingMode addressing_mode() const {
return static_cast<AddressingMode>(address_mode_);
}
// Target address. // Target address.
uint32_t address() const { return address_; } uint32_t address() const { return address_; }
// Unconditional call - ignores condition/predication. // Unconditional call - ignores condition/predication.
@ -354,19 +330,15 @@ struct ControlFlowCondCallInstruction {
uint32_t : 2; uint32_t : 2;
uint32_t bool_address_ : 8; uint32_t bool_address_ : 8;
uint32_t condition_ : 1; uint32_t condition_ : 1;
uint32_t address_mode_ : 1; AddressingMode address_mode_ : 1;
uint32_t opcode_ : 4; ControlFlowOpcode opcode_ : 4;
}; };
static_assert_size(ControlFlowCondCallInstruction, 8); static_assert_size(ControlFlowCondCallInstruction, 8);
// Instruction data for ControlFlowOpcode::kReturn. // Instruction data for ControlFlowOpcode::kReturn.
struct ControlFlowReturnInstruction { struct ControlFlowReturnInstruction {
ControlFlowOpcode opcode() const { ControlFlowOpcode opcode() const { return opcode_; }
return static_cast<ControlFlowOpcode>(opcode_); AddressingMode addressing_mode() const { return address_mode_; }
}
AddressingMode addressing_mode() const {
return static_cast<AddressingMode>(address_mode_);
}
private: private:
// Word 0: (32 bits) // Word 0: (32 bits)
@ -381,12 +353,8 @@ static_assert_size(ControlFlowReturnInstruction, 8);
// Instruction data for ControlFlowOpcode::kCondJmp. // Instruction data for ControlFlowOpcode::kCondJmp.
struct ControlFlowCondJmpInstruction { struct ControlFlowCondJmpInstruction {
ControlFlowOpcode opcode() const { ControlFlowOpcode opcode() const { return opcode_; }
return static_cast<ControlFlowOpcode>(opcode_); AddressingMode addressing_mode() const { return address_mode_; }
}
AddressingMode addressing_mode() const {
return static_cast<AddressingMode>(address_mode_);
}
// Target address. // Target address.
uint32_t address() const { return address_; } uint32_t address() const { return address_; }
// Unconditional jump - ignores condition/predication. // Unconditional jump - ignores condition/predication.
@ -410,20 +378,18 @@ struct ControlFlowCondJmpInstruction {
uint32_t direction_ : 1; uint32_t direction_ : 1;
uint32_t bool_address_ : 8; uint32_t bool_address_ : 8;
uint32_t condition_ : 1; uint32_t condition_ : 1;
uint32_t address_mode_ : 1; AddressingMode address_mode_ : 1;
uint32_t opcode_ : 4; ControlFlowOpcode opcode_ : 4;
}; };
static_assert_size(ControlFlowCondJmpInstruction, 8); static_assert_size(ControlFlowCondJmpInstruction, 8);
// Instruction data for ControlFlowOpcode::kAlloc. // Instruction data for ControlFlowOpcode::kAlloc.
struct ControlFlowAllocInstruction { struct ControlFlowAllocInstruction {
ControlFlowOpcode opcode() const { ControlFlowOpcode opcode() const { return opcode_; }
return static_cast<ControlFlowOpcode>(opcode_);
}
// The total number of the given type allocated by this instruction. // The total number of the given type allocated by this instruction.
uint32_t size() const { return size_; } uint32_t size() const { return size_; }
// Unconditional jump - ignores condition/predication. // Unconditional jump - ignores condition/predication.
AllocType alloc_type() const { return static_cast<AllocType>(alloc_type_); } AllocType alloc_type() const { return alloc_type_; }
private: private:
// Word 0: (32 bits) // Word 0: (32 bits)
@ -433,16 +399,14 @@ struct ControlFlowAllocInstruction {
// Word 1: (16 bits) // Word 1: (16 bits)
uint32_t : 8; uint32_t : 8;
uint32_t is_unserialized_ : 1; uint32_t is_unserialized_ : 1;
uint32_t alloc_type_ : 2; AllocType alloc_type_ : 2;
uint32_t : 1; uint32_t : 1;
uint32_t opcode_ : 4; ControlFlowOpcode opcode_ : 4;
}; };
static_assert_size(ControlFlowAllocInstruction, 8); static_assert_size(ControlFlowAllocInstruction, 8);
XEPACKEDUNION(ControlFlowInstruction, { XEPACKEDUNION(ControlFlowInstruction, {
ControlFlowOpcode opcode() const { ControlFlowOpcode opcode() const { return opcode_value; }
return static_cast<ControlFlowOpcode>(opcode_value);
}
ControlFlowExecInstruction exec; // kExec* ControlFlowExecInstruction exec; // kExec*
ControlFlowCondExecInstruction cond_exec; // kCondExec* ControlFlowCondExecInstruction cond_exec; // kCondExec*
@ -457,7 +421,7 @@ XEPACKEDUNION(ControlFlowInstruction, {
XEPACKEDSTRUCTANONYMOUS({ XEPACKEDSTRUCTANONYMOUS({
uint32_t unused_0 : 32; uint32_t unused_0 : 32;
uint32_t unused_1 : 12; uint32_t unused_1 : 12;
uint32_t opcode_value : 4; ControlFlowOpcode opcode_value : 4;
}); });
XEPACKEDSTRUCTANONYMOUS({ XEPACKEDSTRUCTANONYMOUS({
uint32_t dword_0; uint32_t dword_0;
@ -478,7 +442,7 @@ inline void UnpackControlFlowInstructions(const uint32_t* dwords,
out_b->dword_1 = dword_2 >> 16; out_b->dword_1 = dword_2 >> 16;
} }
enum class FetchOpcode { enum class FetchOpcode : uint32_t {
kVertexFetch = 0, kVertexFetch = 0,
kTextureFetch = 1, kTextureFetch = 1,
kGetTextureBorderColorFrac = 16, kGetTextureBorderColorFrac = 16,
@ -492,9 +456,7 @@ enum class FetchOpcode {
}; };
struct VertexFetchInstruction { struct VertexFetchInstruction {
FetchOpcode opcode() const { FetchOpcode opcode() const { return data_.opcode_value; }
return static_cast<FetchOpcode>(data_.opcode_value);
}
// Whether the jump is predicated (or conditional). // Whether the jump is predicated (or conditional).
bool is_predicated() const { return data_.is_predicated; } bool is_predicated() const { return data_.is_predicated; }
@ -538,13 +500,9 @@ struct VertexFetchInstruction {
uint32_t prefetch_count() const { return data_.prefetch_count; } uint32_t prefetch_count() const { return data_.prefetch_count; }
bool is_mini_fetch() const { return data_.is_mini_fetch == 1; } bool is_mini_fetch() const { return data_.is_mini_fetch == 1; }
VertexFormat data_format() const { VertexFormat data_format() const { return data_.format; }
return static_cast<VertexFormat>(data_.format);
}
// [-32, 31] // [-32, 31]
int exp_adjust() const { int exp_adjust() const { return data_.exp_adjust; }
return ((static_cast<int>(data_.exp_adjust) << 26) >> 26);
}
bool is_signed() const { return data_.fomat_comp_all == 1; } bool is_signed() const { return data_.fomat_comp_all == 1; }
bool is_normalized() const { return data_.num_format_all == 0; } bool is_normalized() const { return data_.num_format_all == 0; }
bool is_index_rounded() const { return data_.is_index_rounded == 1; } bool is_index_rounded() const { return data_.is_index_rounded == 1; }
@ -562,7 +520,7 @@ struct VertexFetchInstruction {
private: private:
XEPACKEDSTRUCT(Data, { XEPACKEDSTRUCT(Data, {
XEPACKEDSTRUCTANONYMOUS({ XEPACKEDSTRUCTANONYMOUS({
uint32_t opcode_value : 5; FetchOpcode opcode_value : 5;
uint32_t src_reg : 6; uint32_t src_reg : 6;
uint32_t src_reg_am : 1; uint32_t src_reg_am : 1;
uint32_t dst_reg : 6; uint32_t dst_reg : 6;
@ -579,9 +537,9 @@ struct VertexFetchInstruction {
uint32_t num_format_all : 1; uint32_t num_format_all : 1;
uint32_t signed_rf_mode_all : 1; uint32_t signed_rf_mode_all : 1;
uint32_t is_index_rounded : 1; uint32_t is_index_rounded : 1;
uint32_t format : 6; VertexFormat format : 6;
uint32_t reserved2 : 2; uint32_t reserved2 : 2;
uint32_t exp_adjust : 6; int32_t exp_adjust : 6;
uint32_t is_mini_fetch : 1; uint32_t is_mini_fetch : 1;
uint32_t is_predicated : 1; uint32_t is_predicated : 1;
}); });
@ -595,9 +553,7 @@ struct VertexFetchInstruction {
}; };
struct TextureFetchInstruction { struct TextureFetchInstruction {
FetchOpcode opcode() const { FetchOpcode opcode() const { return data_.opcode_value; }
return static_cast<FetchOpcode>(data_.opcode_value);
}
// Whether the jump is predicated (or conditional). // Whether the jump is predicated (or conditional).
bool is_predicated() const { return data_.is_predicated; } bool is_predicated() const { return data_.is_predicated; }
@ -613,59 +569,49 @@ struct TextureFetchInstruction {
uint32_t src_swizzle() const { return data_.src_swiz; } uint32_t src_swizzle() const { return data_.src_swiz; }
bool is_src_relative() const { return data_.src_reg_am; } bool is_src_relative() const { return data_.src_reg_am; }
TextureDimension dimension() const { TextureDimension dimension() const { return data_.dimension; }
return static_cast<TextureDimension>(data_.dimension);
}
bool fetch_valid_only() const { return data_.fetch_valid_only == 1; } bool fetch_valid_only() const { return data_.fetch_valid_only == 1; }
bool unnormalized_coordinates() const { return data_.tx_coord_denorm == 1; } bool unnormalized_coordinates() const { return data_.tx_coord_denorm == 1; }
bool has_mag_filter() const { return data_.mag_filter != 0x3; } bool has_mag_filter() const {
TextureFilter mag_filter() const { return data_.mag_filter != TextureFilter::kUseFetchConst;
return static_cast<TextureFilter>(data_.mag_filter);
} }
bool has_min_filter() const { return data_.min_filter != 0x3; } TextureFilter mag_filter() const { return data_.mag_filter; }
TextureFilter min_filter() const { bool has_min_filter() const {
return static_cast<TextureFilter>(data_.min_filter); return data_.min_filter != TextureFilter::kUseFetchConst;
} }
bool has_mip_filter() const { return data_.mip_filter != 0x3; } TextureFilter min_filter() const { return data_.min_filter; }
TextureFilter mip_filter() const { bool has_mip_filter() const {
return static_cast<TextureFilter>(data_.mip_filter); return data_.mip_filter != TextureFilter::kUseFetchConst;
} }
bool has_aniso_filter() const { return data_.aniso_filter != 0x7; } TextureFilter mip_filter() const { return data_.mip_filter; }
AnisoFilter aniso_filter() const { bool has_aniso_filter() const {
return static_cast<AnisoFilter>(data_.aniso_filter); return data_.aniso_filter != AnisoFilter::kUseFetchConst;
} }
bool has_vol_mag_filter() const { return data_.vol_mag_filter != 0x3; } AnisoFilter aniso_filter() const { return data_.aniso_filter; }
TextureFilter vol_mag_filter() const { bool has_vol_mag_filter() const {
return static_cast<TextureFilter>(data_.vol_mag_filter); return data_.vol_mag_filter != TextureFilter::kUseFetchConst;
} }
bool has_vol_min_filter() const { return data_.vol_min_filter != 0x3; } TextureFilter vol_mag_filter() const { return data_.vol_mag_filter; }
TextureFilter vol_min_filter() const { bool has_vol_min_filter() const {
return static_cast<TextureFilter>(data_.vol_min_filter); return data_.vol_min_filter != TextureFilter::kUseFetchConst;
} }
TextureFilter vol_min_filter() const { return data_.vol_min_filter; }
bool use_computed_lod() const { return data_.use_comp_lod == 1; } bool use_computed_lod() const { return data_.use_comp_lod == 1; }
bool use_register_lod() const { return data_.use_reg_lod == 1; } bool use_register_lod() const { return data_.use_reg_lod == 1; }
bool use_register_gradients() const { return data_.use_reg_gradients == 1; } bool use_register_gradients() const { return data_.use_reg_gradients == 1; }
SampleLocation sample_location() const { SampleLocation sample_location() const { return data_.sample_location; }
return static_cast<SampleLocation>(data_.sample_location);
}
float lod_bias() const { float lod_bias() const {
// http://web.archive.org/web/20090514012026/http://msdn.microsoft.com:80/en-us/library/bb313957.aspx // http://web.archive.org/web/20090514012026/http://msdn.microsoft.com:80/en-us/library/bb313957.aspx
return ((static_cast<int>(data_.lod_bias) << 25) >> 25) / 16.0f; return data_.lod_bias * (1.0f / 16.0f);
}
float offset_x() const {
return ((static_cast<int>(data_.offset_x) << 27) >> 27) / 2.0f;
}
float offset_y() const {
return ((static_cast<int>(data_.offset_y) << 27) >> 27) / 2.0f;
}
float offset_z() const {
return ((static_cast<int>(data_.offset_z) << 27) >> 27) / 2.0f;
} }
float offset_x() const { return data_.offset_x * 0.5f; }
float offset_y() const { return data_.offset_y * 0.5f; }
float offset_z() const { return data_.offset_z * 0.5f; }
private: private:
XEPACKEDSTRUCT(Data, { XEPACKEDSTRUCT(Data, {
XEPACKEDSTRUCTANONYMOUS({ XEPACKEDSTRUCTANONYMOUS({
uint32_t opcode_value : 5; FetchOpcode opcode_value : 5;
uint32_t src_reg : 6; uint32_t src_reg : 6;
uint32_t src_reg_am : 1; uint32_t src_reg_am : 1;
uint32_t dst_reg : 6; uint32_t dst_reg : 6;
@ -676,14 +622,14 @@ struct TextureFetchInstruction {
uint32_t src_swiz : 6; // xyz uint32_t src_swiz : 6; // xyz
}); });
XEPACKEDSTRUCTANONYMOUS({ XEPACKEDSTRUCTANONYMOUS({
uint32_t dst_swiz : 12; // xyzw uint32_t dst_swiz : 12; // xyzw
uint32_t mag_filter : 2; // instr_tex_filter_t TextureFilter mag_filter : 2;
uint32_t min_filter : 2; // instr_tex_filter_t TextureFilter min_filter : 2;
uint32_t mip_filter : 2; // instr_tex_filter_t TextureFilter mip_filter : 2;
uint32_t aniso_filter : 3; // instr_aniso_filter_t AnisoFilter aniso_filter : 3;
uint32_t arbitrary_filter : 3; // instr_arbitrary_filter_t xenos::ArbitraryFilter arbitrary_filter : 3;
uint32_t vol_mag_filter : 2; // instr_tex_filter_t TextureFilter vol_mag_filter : 2;
uint32_t vol_min_filter : 2; // instr_tex_filter_t TextureFilter vol_min_filter : 2;
uint32_t use_comp_lod : 1; uint32_t use_comp_lod : 1;
uint32_t use_reg_lod : 1; uint32_t use_reg_lod : 1;
uint32_t unk : 1; uint32_t unk : 1;
@ -691,13 +637,13 @@ struct TextureFetchInstruction {
}); });
XEPACKEDSTRUCTANONYMOUS({ XEPACKEDSTRUCTANONYMOUS({
uint32_t use_reg_gradients : 1; uint32_t use_reg_gradients : 1;
uint32_t sample_location : 1; SampleLocation sample_location : 1;
uint32_t lod_bias : 7; int32_t lod_bias : 7;
uint32_t unused : 5; uint32_t unused : 5;
uint32_t dimension : 2; TextureDimension dimension : 2;
uint32_t offset_x : 5; int32_t offset_x : 5;
uint32_t offset_y : 5; int32_t offset_y : 5;
uint32_t offset_z : 5; int32_t offset_z : 5;
uint32_t pred_condition : 1; uint32_t pred_condition : 1;
}); });
}); });
@ -722,7 +668,7 @@ static_assert_size(TextureFetchInstruction, 12);
// when write masks are disabled or the instruction that would write them // when write masks are disabled or the instruction that would write them
// fails its predication check. // fails its predication check.
enum class AluScalarOpcode { enum class AluScalarOpcode : uint32_t {
// Floating-Point Add // Floating-Point Add
// adds dest, src0.ab // adds dest, src0.ab
// dest.xyzw = src0.a + src0.b; // dest.xyzw = src0.a + src0.b;
@ -1049,7 +995,7 @@ enum class AluScalarOpcode {
kRetainPrev = 50, kRetainPrev = 50,
}; };
enum class AluVectorOpcode { enum class AluVectorOpcode : uint32_t {
// Per-Component Floating-Point Add // Per-Component Floating-Point Add
// add dest, src0, src1 // add dest, src0, src1
// dest.x = src0.x + src1.x; // dest.x = src0.x + src1.x;
@ -1373,9 +1319,7 @@ struct AluInstruction {
return vector_write_mask() || is_export() || return vector_write_mask() || is_export() ||
AluVectorOpcodeHasSideEffects(vector_opcode()); AluVectorOpcodeHasSideEffects(vector_opcode());
} }
AluVectorOpcode vector_opcode() const { AluVectorOpcode vector_opcode() const { return data_.vector_opc; }
return static_cast<AluVectorOpcode>(data_.vector_opc);
}
uint32_t vector_write_mask() const { return data_.vector_write_mask; } uint32_t vector_write_mask() const { return data_.vector_write_mask; }
uint32_t vector_dest() const { return data_.vector_dest; } uint32_t vector_dest() const { return data_.vector_dest; }
bool is_vector_dest_relative() const { return data_.vector_dest_rel == 1; } bool is_vector_dest_relative() const { return data_.vector_dest_rel == 1; }
@ -1385,9 +1329,7 @@ struct AluInstruction {
return scalar_opcode() != AluScalarOpcode::kRetainPrev || return scalar_opcode() != AluScalarOpcode::kRetainPrev ||
(!is_export() && scalar_write_mask() != 0); (!is_export() && scalar_write_mask() != 0);
} }
AluScalarOpcode scalar_opcode() const { AluScalarOpcode scalar_opcode() const { return data_.scalar_opc; }
return static_cast<AluScalarOpcode>(data_.scalar_opc);
}
uint32_t scalar_write_mask() const { return data_.scalar_write_mask; } uint32_t scalar_write_mask() const { return data_.scalar_write_mask; }
uint32_t scalar_dest() const { return data_.scalar_dest; } uint32_t scalar_dest() const { return data_.scalar_dest; }
bool is_scalar_dest_relative() const { return data_.scalar_dest_rel == 1; } bool is_scalar_dest_relative() const { return data_.scalar_dest_rel == 1; }
@ -1459,7 +1401,7 @@ struct AluInstruction {
uint32_t scalar_write_mask : 4; uint32_t scalar_write_mask : 4;
uint32_t vector_clamp : 1; uint32_t vector_clamp : 1;
uint32_t scalar_clamp : 1; uint32_t scalar_clamp : 1;
uint32_t scalar_opc : 6; // instr_scalar_opc_t AluScalarOpcode scalar_opc : 6;
}); });
XEPACKEDSTRUCTANONYMOUS({ XEPACKEDSTRUCTANONYMOUS({
uint32_t src3_swiz : 8; uint32_t src3_swiz : 8;
@ -1478,7 +1420,7 @@ struct AluInstruction {
uint32_t src3_reg : 8; uint32_t src3_reg : 8;
uint32_t src2_reg : 8; uint32_t src2_reg : 8;
uint32_t src1_reg : 8; uint32_t src1_reg : 8;
uint32_t vector_opc : 5; // instr_vector_opc_t AluVectorOpcode vector_opc : 5;
uint32_t src3_sel : 1; uint32_t src3_sel : 1;
uint32_t src2_sel : 1; uint32_t src2_sel : 1;
uint32_t src1_sel : 1; uint32_t src1_sel : 1;

View File

@ -654,9 +654,8 @@ VkDescriptorSet BufferCache::PrepareVertexSet(
// trace_writer_.WriteMemoryRead(physical_address, source_length); // trace_writer_.WriteMemoryRead(physical_address, source_length);
// Upload (or get a cached copy of) the buffer. // Upload (or get a cached copy of) the buffer.
auto buffer_ref = auto buffer_ref = UploadVertexBuffer(command_buffer, physical_address,
UploadVertexBuffer(command_buffer, physical_address, source_length, source_length, fetch->endian, fence);
static_cast<Endian>(fetch->endian), fence);
if (buffer_ref.second == VK_WHOLE_SIZE) { if (buffer_ref.second == VK_WHOLE_SIZE) {
// Failed to upload buffer. // Failed to upload buffer.
XELOGW("Failed to upload vertex buffer!"); XELOGW("Failed to upload vertex buffer!");

View File

@ -815,13 +815,13 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
push_constants_dirty |= push_constants_dirty |=
SetShadowRegister(&regs.rb_colorcontrol, XE_GPU_REG_RB_COLORCONTROL); SetShadowRegister(&regs.rb_colorcontrol, XE_GPU_REG_RB_COLORCONTROL);
push_constants_dirty |= push_constants_dirty |=
SetShadowRegister(&regs.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); SetShadowRegister(&regs.rb_color_info.value, XE_GPU_REG_RB_COLOR_INFO);
push_constants_dirty |= push_constants_dirty |=
SetShadowRegister(&regs.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); SetShadowRegister(&regs.rb_color1_info.value, XE_GPU_REG_RB_COLOR1_INFO);
push_constants_dirty |= push_constants_dirty |=
SetShadowRegister(&regs.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); SetShadowRegister(&regs.rb_color2_info.value, XE_GPU_REG_RB_COLOR2_INFO);
push_constants_dirty |= push_constants_dirty |=
SetShadowRegister(&regs.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); SetShadowRegister(&regs.rb_color3_info.value, XE_GPU_REG_RB_COLOR3_INFO);
push_constants_dirty |= push_constants_dirty |=
SetShadowRegister(&regs.rb_alpha_ref, XE_GPU_REG_RB_ALPHA_REF); SetShadowRegister(&regs.rb_alpha_ref, XE_GPU_REG_RB_ALPHA_REF);
push_constants_dirty |= push_constants_dirty |=
@ -1503,13 +1503,13 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() {
bool dirty = false; bool dirty = false;
dirty |= SetShadowRegister(&regs.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); dirty |= SetShadowRegister(&regs.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK);
dirty |= dirty |=
SetShadowRegister(&regs.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0); SetShadowRegister(&regs.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL0);
dirty |= dirty |=
SetShadowRegister(&regs.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1); SetShadowRegister(&regs.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL1);
dirty |= dirty |=
SetShadowRegister(&regs.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2); SetShadowRegister(&regs.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL2);
dirty |= dirty |=
SetShadowRegister(&regs.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3); SetShadowRegister(&regs.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL3);
dirty |= SetShadowRegister(&regs.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); dirty |= SetShadowRegister(&regs.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
XXH64_update(&hash_state_, &regs, sizeof(regs)); XXH64_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) { if (!dirty) {

View File

@ -292,10 +292,10 @@ class PipelineCache {
reg::SQ_PROGRAM_CNTL sq_program_cntl; reg::SQ_PROGRAM_CNTL sq_program_cntl;
uint32_t sq_context_misc; uint32_t sq_context_misc;
uint32_t rb_colorcontrol; uint32_t rb_colorcontrol;
uint32_t rb_color_info; reg::RB_COLOR_INFO rb_color_info;
uint32_t rb_color1_info; reg::RB_COLOR_INFO rb_color1_info;
uint32_t rb_color2_info; reg::RB_COLOR_INFO rb_color2_info;
uint32_t rb_color3_info; reg::RB_COLOR_INFO rb_color3_info;
float rb_alpha_ref; float rb_alpha_ref;
uint32_t pa_su_point_size; uint32_t pa_su_point_size;

View File

@ -962,7 +962,7 @@ bool VulkanCommandProcessor::IssueCopy() {
break; break;
} }
assert_true(fetch->type == 3); assert_true(fetch->type == 3);
assert_true(fetch->endian == 2); assert_true(fetch->endian == Endian::k8in32);
assert_true(fetch->size == 6); assert_true(fetch->size == 6);
const uint8_t* vertex_addr = memory_->TranslatePhysical(fetch->address << 2); const uint8_t* vertex_addr = memory_->TranslatePhysical(fetch->address << 2);
trace_writer_.WriteMemoryRead(fetch->address << 2, fetch->size * 4); trace_writer_.WriteMemoryRead(fetch->address << 2, fetch->size * 4);
@ -974,7 +974,7 @@ bool VulkanCommandProcessor::IssueCopy() {
float dest_points[6]; float dest_points[6];
for (int i = 0; i < 6; i++) { for (int i = 0; i < 6; i++) {
dest_points[i] = dest_points[i] =
GpuSwap(xe::load<float>(vertex_addr + i * 4), Endian(fetch->endian)) + GpuSwap(xe::load<float>(vertex_addr + i * 4), fetch->endian) +
vtx_offset; vtx_offset;
} }
@ -1000,10 +1000,10 @@ bool VulkanCommandProcessor::IssueCopy() {
if (is_color_source) { if (is_color_source) {
// Source from a color target. // Source from a color target.
reg::RB_COLOR_INFO color_info[4] = { reg::RB_COLOR_INFO color_info[4] = {
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs.Get<reg::RB_COLOR_INFO>(),
regs[XE_GPU_REG_RB_COLOR1_INFO].u32, regs.Get<reg::RB_COLOR_INFO>(XE_GPU_REG_RB_COLOR1_INFO),
regs[XE_GPU_REG_RB_COLOR2_INFO].u32, regs.Get<reg::RB_COLOR_INFO>(XE_GPU_REG_RB_COLOR2_INFO),
regs[XE_GPU_REG_RB_COLOR3_INFO].u32, regs.Get<reg::RB_COLOR_INFO>(XE_GPU_REG_RB_COLOR3_INFO),
}; };
color_edram_base = color_info[copy_src_select].color_base; color_edram_base = color_info[copy_src_select].color_base;
color_format = color_info[copy_src_select].color_format; color_format = color_info[copy_src_select].color_format;
@ -1023,7 +1023,7 @@ bool VulkanCommandProcessor::IssueCopy() {
Endian resolve_endian = Endian::k8in32; Endian resolve_endian = Endian::k8in32;
if (copy_regs->copy_dest_info.copy_dest_endian <= Endian128::k16in32) { if (copy_regs->copy_dest_info.copy_dest_endian <= Endian128::k16in32) {
resolve_endian = resolve_endian =
static_cast<Endian>(copy_regs->copy_dest_info.copy_dest_endian.value()); static_cast<Endian>(copy_regs->copy_dest_info.copy_dest_endian);
} }
// Demand a resolve texture from the texture cache. // Demand a resolve texture from the texture cache.
@ -1289,7 +1289,7 @@ bool VulkanCommandProcessor::IssueCopy() {
// Perform any requested clears. // Perform any requested clears.
uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32; uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32;
uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32;
uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32; uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32;
assert_true(copy_color_clear == copy_color_clear_low); assert_true(copy_color_clear == copy_color_clear_low);
if (color_clear_enabled) { if (color_clear_enabled) {

View File

@ -161,14 +161,14 @@ enum class SampleLocation : uint32_t {
}; };
enum class Endian : uint32_t { enum class Endian : uint32_t {
kUnspecified = 0, kNone = 0,
k8in16 = 1, k8in16 = 1,
k8in32 = 2, k8in32 = 2,
k16in32 = 3, k16in32 = 3,
}; };
enum class Endian128 : uint32_t { enum class Endian128 : uint32_t {
kUnspecified = 0, kNone = 0,
k8in16 = 1, k8in16 = 1,
k8in32 = 2, k8in32 = 2,
k16in32 = 3, k16in32 = 3,
@ -225,6 +225,77 @@ enum class DepthRenderTargetFormat : uint32_t {
kD24FS8 = 1, kD24FS8 = 1,
}; };
// a2xx_sq_surfaceformat +
// https://github.com/indirivacua/RAGE-Console-Texture-Editor/blob/master/Console.Xbox360.Graphics.pas
enum class TextureFormat : uint32_t {
k_1_REVERSE = 0,
k_1 = 1,
k_8 = 2,
k_1_5_5_5 = 3,
k_5_6_5 = 4,
k_6_5_5 = 5,
k_8_8_8_8 = 6,
k_2_10_10_10 = 7,
k_8_A = 8,
k_8_B = 9,
k_8_8 = 10,
k_Cr_Y1_Cb_Y0_REP = 11,
k_Y1_Cr_Y0_Cb_REP = 12,
k_16_16_EDRAM = 13,
k_8_8_8_8_A = 14,
k_4_4_4_4 = 15,
k_10_11_11 = 16,
k_11_11_10 = 17,
k_DXT1 = 18,
k_DXT2_3 = 19,
k_DXT4_5 = 20,
k_16_16_16_16_EDRAM = 21,
k_24_8 = 22,
k_24_8_FLOAT = 23,
k_16 = 24,
k_16_16 = 25,
k_16_16_16_16 = 26,
k_16_EXPAND = 27,
k_16_16_EXPAND = 28,
k_16_16_16_16_EXPAND = 29,
k_16_FLOAT = 30,
k_16_16_FLOAT = 31,
k_16_16_16_16_FLOAT = 32,
k_32 = 33,
k_32_32 = 34,
k_32_32_32_32 = 35,
k_32_FLOAT = 36,
k_32_32_FLOAT = 37,
k_32_32_32_32_FLOAT = 38,
k_32_AS_8 = 39,
k_32_AS_8_8 = 40,
k_16_MPEG = 41,
k_16_16_MPEG = 42,
k_8_INTERLACED = 43,
k_32_AS_8_INTERLACED = 44,
k_32_AS_8_8_INTERLACED = 45,
k_16_INTERLACED = 46,
k_16_MPEG_INTERLACED = 47,
k_16_16_MPEG_INTERLACED = 48,
k_DXN = 49,
k_8_8_8_8_AS_16_16_16_16 = 50,
k_DXT1_AS_16_16_16_16 = 51,
k_DXT2_3_AS_16_16_16_16 = 52,
k_DXT4_5_AS_16_16_16_16 = 53,
k_2_10_10_10_AS_16_16_16_16 = 54,
k_10_11_11_AS_16_16_16_16 = 55,
k_11_11_10_AS_16_16_16_16 = 56,
k_32_32_32_FLOAT = 57,
k_DXT3A = 58,
k_DXT5A = 59,
k_CTX1 = 60,
k_DXT3A_AS_1_1_1_1 = 61,
k_8_8_8_8_GAMMA_EDRAM = 62,
k_2_10_10_10_FLOAT_EDRAM = 63,
kUnknown = 0xFFFFFFFFu,
};
// Subset of a2xx_sq_surfaceformat - formats that RTs can be resolved to. // Subset of a2xx_sq_surfaceformat - formats that RTs can be resolved to.
enum class ColorFormat : uint32_t { enum class ColorFormat : uint32_t {
k_8 = 2, k_8 = 2,
@ -367,11 +438,7 @@ enum class BlendFactor : uint32_t {
kConstantAlpha = 14, kConstantAlpha = 14,
kOneMinusConstantAlpha = 15, kOneMinusConstantAlpha = 15,
kSrcAlphaSaturate = 16, kSrcAlphaSaturate = 16,
// SRC1 likely not used on the Xbox 360 - only available in Direct3D 9Ex. // SRC1 added on Adreno.
kSrc1Color = 20,
kOneMinusSrc1Color = 21,
kSrc1Alpha = 22,
kOneMinusSrc1Alpha = 23,
}; };
enum class BlendOp : uint32_t { enum class BlendOp : uint32_t {
@ -391,6 +458,17 @@ typedef enum {
XE_GPU_INVALIDATE_MASK_ALL = 0x7FFF, XE_GPU_INVALIDATE_MASK_ALL = 0x7FFF,
} XE_GPU_INVALIDATE_MASK; } XE_GPU_INVALIDATE_MASK;
// instr_arbitrary_filter_t
enum class ArbitraryFilter : uint32_t {
k2x4Sym = 0,
k2x4Asym = 1,
k4x2Sym = 2,
k4x2Asym = 3,
k4x4Sym = 4,
k4x4Asym = 5,
kUseFetchConst = 7,
};
// a2xx_sq_ps_vtx_mode // a2xx_sq_ps_vtx_mode
enum class VertexShaderExportMode : uint32_t { enum class VertexShaderExportMode : uint32_t {
kPosition1Vector = 0, kPosition1Vector = 0,
@ -420,6 +498,17 @@ enum class TessellationMode : uint32_t {
kAdaptive = 2, kAdaptive = 2,
}; };
enum class PolygonModeEnable : uint32_t {
kDisabled = 0, // Render triangles.
kDualMode = 1, // Send 2 sets of 3 polygons with the specified polygon type.
};
enum class PolygonType : uint32_t {
kPoints = 0,
kLines = 1,
kTriangles = 2,
};
enum class ModeControl : uint32_t { enum class ModeControl : uint32_t {
kIgnore = 0, kIgnore = 0,
kColorDepth = 4, kColorDepth = 4,
@ -471,7 +560,7 @@ typedef enum {
inline uint16_t GpuSwap(uint16_t value, Endian endianness) { inline uint16_t GpuSwap(uint16_t value, Endian endianness) {
switch (endianness) { switch (endianness) {
case Endian::kUnspecified: case Endian::kNone:
// No swap. // No swap.
return value; return value;
case Endian::k8in16: case Endian::k8in16:
@ -486,7 +575,7 @@ inline uint16_t GpuSwap(uint16_t value, Endian endianness) {
inline uint32_t GpuSwap(uint32_t value, Endian endianness) { inline uint32_t GpuSwap(uint32_t value, Endian endianness) {
switch (endianness) { switch (endianness) {
default: default:
case Endian::kUnspecified: case Endian::kNone:
// No swap. // No swap.
return value; return value;
case Endian::k8in16: case Endian::k8in16:
@ -520,11 +609,11 @@ inline uint32_t CpuToGpu(uint32_t p) { return p & 0x1FFFFFFF; }
XEPACKEDUNION(xe_gpu_vertex_fetch_t, { XEPACKEDUNION(xe_gpu_vertex_fetch_t, {
XEPACKEDSTRUCTANONYMOUS({ XEPACKEDSTRUCTANONYMOUS({
uint32_t type : 2; // +0 uint32_t type : 2; // +0
uint32_t address : 30; // +2 uint32_t address : 30; // +2 address in dwords
uint32_t endian : 2; // +0 Endian endian : 2; // +0
uint32_t size : 24; // +2 size in words uint32_t size : 24; // +2 size in words
uint32_t unk1 : 6; // +26 uint32_t unk1 : 6; // +26
}); });
XEPACKEDSTRUCTANONYMOUS({ XEPACKEDSTRUCTANONYMOUS({
uint32_t dword_0; uint32_t dword_0;
@ -535,34 +624,36 @@ XEPACKEDUNION(xe_gpu_vertex_fetch_t, {
// XE_GPU_REG_SHADER_CONSTANT_FETCH_* // XE_GPU_REG_SHADER_CONSTANT_FETCH_*
XEPACKEDUNION(xe_gpu_texture_fetch_t, { XEPACKEDUNION(xe_gpu_texture_fetch_t, {
XEPACKEDSTRUCTANONYMOUS({ XEPACKEDSTRUCTANONYMOUS({
uint32_t type : 2; // +0 dword_0 uint32_t type : 2; // +0 dword_0
uint32_t sign_x : 2; // +2 TextureSign sign_x : 2; // +2
uint32_t sign_y : 2; // +4 TextureSign sign_y : 2; // +4
uint32_t sign_z : 2; // +6 TextureSign sign_z : 2; // +6
uint32_t sign_w : 2; // +8 TextureSign sign_w : 2; // +8
uint32_t clamp_x : 3; // +10 ClampMode clamp_x : 3; // +10
uint32_t clamp_y : 3; // +13 ClampMode clamp_y : 3; // +13
uint32_t clamp_z : 3; // +16 ClampMode clamp_z : 3; // +16
uint32_t unused_0 : 3; // +19 uint32_t signed_rf_mode_all : 1; // +19
uint32_t pitch : 9; // +22 byte_pitch >> 5 // TODO(Triang3l): 1 or 2 dim_tbd bits?
uint32_t tiled : 1; // +31 uint32_t unk_0 : 2; // +20
uint32_t pitch : 9; // +22 byte_pitch >> 5
uint32_t tiled : 1; // +31
uint32_t format : 6; // +0 dword_1 TextureFormat format : 6; // +0 dword_1
uint32_t endianness : 2; // +6 Endian endianness : 2; // +6
uint32_t request_size : 2; // +8 uint32_t request_size : 2; // +8
uint32_t stacked : 1; // +10 uint32_t stacked : 1; // +10
uint32_t clamp_policy : 1; // +11 d3d/opengl uint32_t nearest_clamp_policy : 1; // +11 d3d/opengl
uint32_t base_address : 20; // +12 uint32_t base_address : 20; // +12 base address >> 12
union { // dword_2 union { // dword_2
struct { struct {
uint32_t width : 24; uint32_t width : 24;
uint32_t unused : 8; uint32_t : 8;
} size_1d; } size_1d;
struct { struct {
uint32_t width : 13; uint32_t width : 13;
uint32_t height : 13; uint32_t height : 13;
uint32_t unused : 6; uint32_t : 6;
} size_2d; } size_2d;
struct { struct {
uint32_t width : 13; uint32_t width : 13;
@ -576,15 +667,16 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, {
} size_3d; } size_3d;
}; };
uint32_t num_format : 1; // +0 dword_3 frac/int uint32_t num_format : 1; // +0 dword_3 frac/int
uint32_t swizzle : 12; // +1 xyzw, 3b each (XE_GPU_SWIZZLE) // xyzw, 3b each (XE_GPU_SWIZZLE)
int32_t exp_adjust : 6; // +13 uint32_t swizzle : 12; // +1
uint32_t mag_filter : 2; // +19 int32_t exp_adjust : 6; // +13
uint32_t min_filter : 2; // +21 TextureFilter mag_filter : 2; // +19
uint32_t mip_filter : 2; // +23 TextureFilter min_filter : 2; // +21
uint32_t aniso_filter : 3; // +25 TextureFilter mip_filter : 2; // +23
uint32_t unused_3 : 3; // +28 AnisoFilter aniso_filter : 3; // +25
uint32_t border_size : 1; // +31 xenos::ArbitraryFilter arbitrary_filter : 3; // +28
uint32_t border_size : 1; // +31
uint32_t vol_mag_filter : 1; // +0 dword_4 uint32_t vol_mag_filter : 1; // +0 dword_4
uint32_t vol_min_filter : 1; // +1 uint32_t vol_min_filter : 1; // +1
@ -596,13 +688,13 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, {
int32_t grad_exp_adjust_h : 5; // +22 int32_t grad_exp_adjust_h : 5; // +22
int32_t grad_exp_adjust_v : 5; // +27 int32_t grad_exp_adjust_v : 5; // +27
uint32_t border_color : 2; // +0 dword_5 BorderColor border_color : 2; // +0 dword_5
uint32_t force_bcw_max : 1; // +2 uint32_t force_bc_w_to_max : 1; // +2
uint32_t tri_clamp : 2; // +3 uint32_t tri_clamp : 2; // +3
int32_t aniso_bias : 4; // +5 int32_t aniso_bias : 4; // +5
uint32_t dimension : 2; // +9 uint32_t dimension : 2; // +9
uint32_t packed_mips : 1; // +11 uint32_t packed_mips : 1; // +11
uint32_t mip_address : 20; // +12 uint32_t mip_address : 20; // +12 mip address >> 12
}); });
XEPACKEDSTRUCTANONYMOUS({ XEPACKEDSTRUCTANONYMOUS({
uint32_t dword_0; uint32_t dword_0;