[GPU] Cleanup: XEPACKED -> static_assert_size/alignas

This commit is contained in:
Triang3l 2021-05-16 14:03:36 +03:00
parent 165c6f0e4d
commit f39020700a
4 changed files with 187 additions and 130 deletions

View File

@ -138,7 +138,7 @@ constexpr uint32_t MakeFourCC(uint32_t ch0, uint32_t ch1, uint32_t ch2,
(uint32_t(ch3) << 24);
}
struct ContainerHeader {
struct alignas(uint32_t) ContainerHeader {
static constexpr uint32_t kFourCC = MakeFourCC('D', 'X', 'B', 'C');
static constexpr uint16_t kVersionMajor = 1;
static constexpr uint16_t kVersionMinor = 0;
@ -159,9 +159,9 @@ struct ContainerHeader {
// Followed by uint32_t[blob_count] offsets from the start of the container in
// bytes to the start of each blob's header.
};
static_assert(alignof(ContainerHeader) <= sizeof(uint32_t));
static_assert_size(ContainerHeader, sizeof(uint32_t) * 8);
struct BlobHeader {
struct alignas(uint32_t) BlobHeader {
enum class FourCC : uint32_t {
// In order of appearance in a container.
kResourceDefinition = MakeFourCC('R', 'D', 'E', 'F'),
@ -175,7 +175,7 @@ struct BlobHeader {
FourCC fourcc;
uint32_t size_bytes;
};
static_assert(alignof(BlobHeader) <= sizeof(uint32_t));
static_assert_size(BlobHeader, sizeof(uint32_t) * 2);
// Appends a string to a DWORD stream, returns the DWORD-aligned length.
inline uint32_t AppendAlignedString(std::vector<uint32_t>& dest,
@ -325,7 +325,7 @@ enum class RdefShaderModel : uint32_t {
};
// D3D12_SHADER_TYPE_DESC with some differences.
struct RdefType {
struct alignas(uint32_t) RdefType {
RdefVariableClass variable_class;
RdefVariableType variable_type;
// Matrix rows, 1 for other numeric, 0 if not applicable.
@ -343,18 +343,18 @@ struct RdefType {
// uint is called dword when it's scalar (but uint vectors are still uintN).
uint32_t name_ptr;
};
static_assert(alignof(RdefType) <= sizeof(uint32_t));
static_assert_size(RdefType, sizeof(uint32_t) * 9);
struct RdefStructureMember {
struct alignas(uint32_t) RdefStructureMember {
uint32_t name_ptr;
uint32_t type_ptr;
uint32_t offset_bytes;
};
static_assert(alignof(RdefStructureMember) <= sizeof(uint32_t));
static_assert_size(RdefStructureMember, sizeof(uint32_t) * 3);
// D3D12_SHADER_VARIABLE_DESC with some differences.
// Used for constants in constant buffers primarily.
struct RdefVariable {
struct alignas(uint32_t) RdefVariable {
uint32_t name_ptr;
uint32_t start_offset_bytes;
uint32_t size_bytes;
@ -371,10 +371,10 @@ struct RdefVariable {
// Number of sampler slots possibly used, 0 if no textures used.
uint32_t sampler_size;
};
static_assert(alignof(RdefVariable) <= sizeof(uint32_t));
static_assert_size(RdefVariable, sizeof(uint32_t) * 10);
// Sorted by ID.
struct RdefCbuffer {
struct alignas(uint32_t) RdefCbuffer {
uint32_t name_ptr;
uint32_t variable_count;
uint32_t variables_ptr;
@ -384,11 +384,11 @@ struct RdefCbuffer {
// RdefCbufferFlags.
uint32_t flags;
};
static_assert(alignof(RdefCbuffer) <= sizeof(uint32_t));
static_assert_size(RdefCbuffer, sizeof(uint32_t) * 6);
// D3D12_SHADER_INPUT_BIND_DESC with some differences.
// Placed in samplers, SRVs, UAVs, CBVs order, sorted by ID.
struct RdefInputBind {
struct alignas(uint32_t) RdefInputBind {
uint32_t name_ptr;
RdefInputType type;
ResourceReturnType return_type;
@ -406,9 +406,9 @@ struct RdefInputBind {
uint32_t bind_point_space;
uint32_t id;
};
static_assert(alignof(RdefInputBind) <= sizeof(uint32_t));
static_assert_size(RdefInputBind, sizeof(uint32_t) * 10);
struct RdefHeader {
struct alignas(uint32_t) RdefHeader {
enum class FourCC : uint32_t {
// RD11 in Shader Model 5_0 shaders.
k5_0 = MakeFourCC('R', 'D', '1', '1'),
@ -441,7 +441,7 @@ struct RdefHeader {
sizeof_structure_member_bytes = sizeof(RdefStructureMember);
}
};
static_assert(alignof(RdefHeader) <= sizeof(uint32_t));
static_assert_size(RdefHeader, sizeof(uint32_t) * 15);
// D3D_NAME subset
enum class Name : uint32_t {
@ -467,6 +467,7 @@ enum class SignatureRegisterComponentType : uint32_t {
};
// D3D_MIN_PRECISION
// uint8_t as it's used as one byte in SignatureParameter.
enum class MinPrecision : uint8_t {
kDefault,
kFloat16,
@ -478,7 +479,7 @@ enum class MinPrecision : uint8_t {
};
// D3D11_INTERNALSHADER_PARAMETER_11_1
struct SignatureParameter {
struct alignas(uint32_t) SignatureParameter {
uint32_t semantic_name_ptr;
uint32_t semantic_index;
// kUndefined for pixel shader outputs - inferred from the component type and
@ -497,15 +498,15 @@ struct SignatureParameter {
};
MinPrecision min_precision;
};
static_assert(alignof(SignatureParameter) <= sizeof(uint32_t));
static_assert_size(SignatureParameter, sizeof(uint32_t) * 6);
// D3D10_INTERNALSHADER_SIGNATURE
struct Signature {
struct alignas(uint32_t) Signature {
uint32_t parameter_count;
// If the signature is empty, this still points after the header.
uint32_t parameter_info_ptr;
};
static_assert(alignof(Signature) <= sizeof(uint32_t));
static_assert_size(Signature, sizeof(uint32_t) * 2);
// SHADER_FEATURE
// Low 32 bits.
@ -528,11 +529,11 @@ enum ShaderFeature0 : uint32_t {
<< 13,
};
struct ShaderFeatureInfo {
struct alignas(uint32_t) ShaderFeatureInfo {
// UINT64 originally, but aligned to 4 rather than 8.
uint32_t feature_flags[2];
};
static_assert(alignof(ShaderFeatureInfo) <= sizeof(uint32_t));
static_assert_size(ShaderFeatureInfo, sizeof(uint32_t) * 2);
// D3D11_SB_TESSELLATOR_DOMAIN
enum class TessellatorDomain : uint32_t {
@ -543,7 +544,7 @@ enum class TessellatorDomain : uint32_t {
};
// The STAT blob (based on Wine d3dcompiler_parse_stat).
struct Statistics {
struct alignas(uint32_t) Statistics {
// Not increased by declarations and labels.
uint32_t instruction_count; // +0
uint32_t temp_register_count; // +4
@ -595,7 +596,7 @@ struct Statistics {
// Unknown in Wine, but confirmed by testing.
uint32_t c_texture_store_instructions; // +90
};
static_assert(alignof(Statistics) <= sizeof(uint32_t));
static_assert_size(Statistics, sizeof(uint32_t) * 37);
// A shader blob begins with a version token and the shader length in dwords
// (including the version token and the length token itself).

View File

@ -13,12 +13,19 @@
#include <cstdint>
#include <cstdlib>
#include "xenia/base/assert.h"
#include "xenia/gpu/xenos.h"
// Most registers can be found from:
// https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/14/yamato_registers.h
// Some registers were added on Adreno specifically and are not referenced in
// game .pdb files and never set by games.
// Only 32-bit types (uint32_t, int32_t, float or enums with uint32_t / int32_t
// as the underlying type) are allowed in the bit fields here, as Visual C++
// restarts packing when a field requires different alignment than the previous
// one.
namespace xe {
namespace gpu {
@ -38,7 +45,7 @@ namespace reg {
*******************************************************************************/
union COHER_STATUS_HOST {
union alignas(uint32_t) COHER_STATUS_HOST {
struct {
uint32_t matching_contexts : 8; // +0
uint32_t rb_copy_dest_base_ena : 1; // +8
@ -60,8 +67,9 @@ union COHER_STATUS_HOST {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_COHER_STATUS_HOST;
};
static_assert_size(COHER_STATUS_HOST, sizeof(uint32_t));
union WAIT_UNTIL {
union alignas(uint32_t) WAIT_UNTIL {
struct {
uint32_t : 1; // +0
uint32_t wait_re_vsync : 1; // +1
@ -83,6 +91,7 @@ union WAIT_UNTIL {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_WAIT_UNTIL;
};
static_assert_size(WAIT_UNTIL, sizeof(uint32_t));
/*******************************************************************************
___ ___ ___ _ _ ___ _ _ ___ ___ ___
@ -92,7 +101,7 @@ union WAIT_UNTIL {
*******************************************************************************/
union SQ_PROGRAM_CNTL {
union alignas(uint32_t) SQ_PROGRAM_CNTL {
struct {
// Note from a2xx.xml:
// Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG,
@ -112,8 +121,9 @@ union SQ_PROGRAM_CNTL {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_SQ_PROGRAM_CNTL;
};
static_assert_size(SQ_PROGRAM_CNTL, sizeof(uint32_t));
union SQ_CONTEXT_MISC {
union alignas(uint32_t) SQ_CONTEXT_MISC {
struct {
uint32_t inst_pred_optimize : 1; // +0
uint32_t sc_output_screen_xy : 1; // +1
@ -143,8 +153,9 @@ union SQ_CONTEXT_MISC {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_SQ_CONTEXT_MISC;
};
static_assert_size(SQ_CONTEXT_MISC, sizeof(uint32_t));
union SQ_INTERPOLATOR_CNTL {
union alignas(uint32_t) SQ_INTERPOLATOR_CNTL {
struct {
uint32_t param_shade : 16; // +0
// SampleLocation bits - 0 for centroid, 1 for center, if
@ -154,6 +165,7 @@ union SQ_INTERPOLATOR_CNTL {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_SQ_INTERPOLATOR_CNTL;
};
static_assert_size(SQ_INTERPOLATOR_CNTL, sizeof(uint32_t));
/*******************************************************************************
__ _____ ___ _____ _____ __
@ -173,7 +185,7 @@ union SQ_INTERPOLATOR_CNTL {
*******************************************************************************/
union VGT_DRAW_INITIATOR {
union alignas(uint32_t) VGT_DRAW_INITIATOR {
// Different than on A2xx and R6xx/R7xx.
struct {
xenos::PrimitiveType prim_type : 6; // +0
@ -188,22 +200,25 @@ union VGT_DRAW_INITIATOR {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_VGT_DRAW_INITIATOR;
};
static_assert_size(VGT_DRAW_INITIATOR, sizeof(uint32_t));
union VGT_OUTPUT_PATH_CNTL {
union alignas(uint32_t) VGT_OUTPUT_PATH_CNTL {
struct {
xenos::VGTOutputPath path_select : 2; // +0
};
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_VGT_OUTPUT_PATH_CNTL;
};
static_assert_size(VGT_OUTPUT_PATH_CNTL, sizeof(uint32_t));
union VGT_HOS_CNTL {
union alignas(uint32_t) VGT_HOS_CNTL {
struct {
xenos::TessellationMode tess_mode : 2; // +0
};
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_VGT_HOS_CNTL;
};
static_assert_size(VGT_HOS_CNTL, sizeof(uint32_t));
/*******************************************************************************
___ ___ ___ __ __ ___ _____ _____ _____
@ -218,7 +233,7 @@ union VGT_HOS_CNTL {
*******************************************************************************/
union PA_SU_POINT_MINMAX {
union alignas(uint32_t) PA_SU_POINT_MINMAX {
struct {
// Radius, 12.4 fixed point.
uint32_t min_size : 16; // +0
@ -227,8 +242,9 @@ union PA_SU_POINT_MINMAX {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_PA_SU_POINT_MINMAX;
};
static_assert_size(PA_SU_POINT_MINMAX, sizeof(uint32_t));
union PA_SU_POINT_SIZE {
union alignas(uint32_t) PA_SU_POINT_SIZE {
struct {
// 1/2 width or height, 12.4 fixed point.
uint32_t height : 16; // +0
@ -237,9 +253,10 @@ union PA_SU_POINT_SIZE {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_PA_SU_POINT_SIZE;
};
static_assert_size(PA_SU_POINT_SIZE, sizeof(uint32_t));
// Setup Unit / Scanline Converter mode cntl
union PA_SU_SC_MODE_CNTL {
union alignas(uint32_t) PA_SU_SC_MODE_CNTL {
struct {
uint32_t cull_front : 1; // +0
uint32_t cull_back : 1; // +1
@ -268,9 +285,10 @@ union PA_SU_SC_MODE_CNTL {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL;
};
static_assert_size(PA_SU_SC_MODE_CNTL, sizeof(uint32_t));
// Setup Unit Vertex Control
union PA_SU_VTX_CNTL {
union alignas(uint32_t) PA_SU_VTX_CNTL {
struct {
uint32_t pix_center : 1; // +0 1 = half pixel offset (OpenGL).
uint32_t round_mode : 2; // +1
@ -279,8 +297,9 @@ union PA_SU_VTX_CNTL {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_PA_SU_VTX_CNTL;
};
static_assert_size(PA_SU_VTX_CNTL, sizeof(uint32_t));
union PA_SC_MPASS_PS_CNTL {
union alignas(uint32_t) PA_SC_MPASS_PS_CNTL {
struct {
uint32_t mpass_pix_vec_per_pass : 20; // +0
uint32_t : 11; // +20
@ -289,9 +308,10 @@ union PA_SC_MPASS_PS_CNTL {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_PA_SC_MPASS_PS_CNTL;
};
static_assert_size(PA_SC_MPASS_PS_CNTL, sizeof(uint32_t));
// Scanline converter viz query, used by D3D for gpu side conditional rendering
union PA_SC_VIZ_QUERY {
union alignas(uint32_t) PA_SC_VIZ_QUERY {
struct {
// the visibility of draws should be evaluated
uint32_t viz_query_ena : 1; // +0
@ -304,9 +324,10 @@ union PA_SC_VIZ_QUERY {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_PA_SC_VIZ_QUERY;
};
static_assert_size(PA_SC_VIZ_QUERY, sizeof(uint32_t));
// Clipper clip control
union PA_CL_CLIP_CNTL {
union alignas(uint32_t) PA_CL_CLIP_CNTL {
struct {
uint32_t ucp_ena_0 : 1; // +0
uint32_t ucp_ena_1 : 1; // +1
@ -329,9 +350,10 @@ union PA_CL_CLIP_CNTL {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_PA_CL_CLIP_CNTL;
};
static_assert_size(PA_CL_CLIP_CNTL, sizeof(uint32_t));
// Viewport transform engine control
union PA_CL_VTE_CNTL {
union alignas(uint32_t) PA_CL_VTE_CNTL {
struct {
uint32_t vport_x_scale_ena : 1; // +0
uint32_t vport_x_offset_ena : 1; // +1
@ -348,8 +370,9 @@ union PA_CL_VTE_CNTL {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_PA_CL_VTE_CNTL;
};
static_assert_size(PA_CL_VTE_CNTL, sizeof(uint32_t));
union PA_SC_SCREEN_SCISSOR_TL {
union alignas(uint32_t) PA_SC_SCREEN_SCISSOR_TL {
struct {
int32_t tl_x : 15; // +0
uint32_t : 1; // +15
@ -358,8 +381,9 @@ union PA_SC_SCREEN_SCISSOR_TL {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL;
};
static_assert_size(PA_SC_SCREEN_SCISSOR_TL, sizeof(uint32_t));
union PA_SC_SCREEN_SCISSOR_BR {
union alignas(uint32_t) PA_SC_SCREEN_SCISSOR_BR {
struct {
int32_t br_x : 15; // +0
uint32_t : 1; // +15
@ -368,8 +392,9 @@ union PA_SC_SCREEN_SCISSOR_BR {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR;
};
static_assert_size(PA_SC_SCREEN_SCISSOR_BR, sizeof(uint32_t));
union PA_SC_WINDOW_OFFSET {
union alignas(uint32_t) PA_SC_WINDOW_OFFSET {
struct {
int32_t window_x_offset : 15; // +0
uint32_t : 1; // +15
@ -378,8 +403,9 @@ union PA_SC_WINDOW_OFFSET {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_OFFSET;
};
static_assert_size(PA_SC_WINDOW_OFFSET, sizeof(uint32_t));
union PA_SC_WINDOW_SCISSOR_TL {
union alignas(uint32_t) PA_SC_WINDOW_SCISSOR_TL {
struct {
uint32_t tl_x : 14; // +0
uint32_t : 2; // +14
@ -390,8 +416,9 @@ union PA_SC_WINDOW_SCISSOR_TL {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL;
};
static_assert_size(PA_SC_WINDOW_SCISSOR_TL, sizeof(uint32_t));
union PA_SC_WINDOW_SCISSOR_BR {
union alignas(uint32_t) PA_SC_WINDOW_SCISSOR_BR {
struct {
uint32_t br_x : 14; // +0
uint32_t : 2; // +14
@ -400,6 +427,7 @@ union PA_SC_WINDOW_SCISSOR_BR {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR;
};
static_assert_size(PA_SC_WINDOW_SCISSOR_BR, sizeof(uint32_t));
/*******************************************************************************
___ ___
@ -409,15 +437,16 @@ union PA_SC_WINDOW_SCISSOR_BR {
*******************************************************************************/
union RB_MODECONTROL {
union alignas(uint32_t) RB_MODECONTROL {
struct {
xenos::ModeControl edram_mode : 3; // +0
};
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_RB_MODECONTROL;
};
static_assert_size(RB_MODECONTROL, sizeof(uint32_t));
union RB_SURFACE_INFO {
union alignas(uint32_t) RB_SURFACE_INFO {
struct {
uint32_t surface_pitch : 14; // +0 in pixels.
uint32_t : 2; // +14
@ -427,8 +456,9 @@ union RB_SURFACE_INFO {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_RB_SURFACE_INFO;
};
static_assert_size(RB_SURFACE_INFO, sizeof(uint32_t));
union RB_COLORCONTROL {
union alignas(uint32_t) RB_COLORCONTROL {
struct {
xenos::CompareFunction alpha_func : 3; // +0
uint32_t alpha_test_enable : 1; // +3
@ -476,8 +506,9 @@ union RB_COLORCONTROL {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_RB_COLORCONTROL;
};
static_assert_size(RB_COLORCONTROL, sizeof(uint32_t));
union RB_COLOR_INFO {
union alignas(uint32_t) RB_COLOR_INFO {
struct {
uint32_t color_base : 12; // +0 in tiles.
uint32_t : 4; // +12
@ -489,8 +520,9 @@ union RB_COLOR_INFO {
// RB_COLOR[1-3]_INFO also use this format.
static const Register rt_register_indices[4];
};
static_assert_size(RB_COLOR_INFO, sizeof(uint32_t));
union RB_COLOR_MASK {
union alignas(uint32_t) RB_COLOR_MASK {
struct {
uint32_t write_red0 : 1; // +0
uint32_t write_green0 : 1; // +1
@ -512,8 +544,9 @@ union RB_COLOR_MASK {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_RB_COLOR_MASK;
};
static_assert_size(RB_COLOR_MASK, sizeof(uint32_t));
union RB_BLENDCONTROL {
union alignas(uint32_t) RB_BLENDCONTROL {
struct {
xenos::BlendFactor color_srcblend : 5; // +0
xenos::BlendOp color_comb_fcn : 3; // +5
@ -529,8 +562,9 @@ union RB_BLENDCONTROL {
static constexpr Register register_index = XE_GPU_REG_RB_BLENDCONTROL0;
static const Register rt_register_indices[4];
};
static_assert_size(RB_BLENDCONTROL, sizeof(uint32_t));
union RB_DEPTHCONTROL {
union alignas(uint32_t) RB_DEPTHCONTROL {
struct {
uint32_t stencil_enable : 1; // +0
uint32_t z_enable : 1; // +1
@ -551,8 +585,9 @@ union RB_DEPTHCONTROL {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_RB_DEPTHCONTROL;
};
static_assert_size(RB_DEPTHCONTROL, sizeof(uint32_t));
union RB_STENCILREFMASK {
union alignas(uint32_t) RB_STENCILREFMASK {
struct {
uint32_t stencilref : 8; // +0
uint32_t stencilmask : 8; // +8
@ -562,8 +597,9 @@ union RB_STENCILREFMASK {
static constexpr Register register_index = XE_GPU_REG_RB_STENCILREFMASK;
// RB_STENCILREFMASK_BF also uses this format.
};
static_assert_size(RB_STENCILREFMASK, sizeof(uint32_t));
union RB_DEPTH_INFO {
union alignas(uint32_t) RB_DEPTH_INFO {
struct {
uint32_t depth_base : 12; // +0 in tiles.
uint32_t : 4; // +12
@ -572,10 +608,11 @@ union RB_DEPTH_INFO {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_RB_DEPTH_INFO;
};
static_assert_size(RB_DEPTH_INFO, sizeof(uint32_t));
// Copy registers are very different than on Adreno.
union RB_COPY_CONTROL {
union alignas(uint32_t) RB_COPY_CONTROL {
struct {
uint32_t copy_src_select : 3; // +0 Depth is 4.
uint32_t : 1; // +3
@ -589,8 +626,9 @@ union RB_COPY_CONTROL {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_RB_COPY_CONTROL;
};
static_assert_size(RB_COPY_CONTROL, sizeof(uint32_t));
union RB_COPY_DEST_INFO {
union alignas(uint32_t) RB_COPY_DEST_INFO {
struct {
xenos::Endian128 copy_dest_endian : 3; // +0
uint32_t copy_dest_array : 1; // +3
@ -604,8 +642,9 @@ union RB_COPY_DEST_INFO {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_INFO;
};
static_assert_size(RB_COPY_DEST_INFO, sizeof(uint32_t));
union RB_COPY_DEST_PITCH {
union alignas(uint32_t) RB_COPY_DEST_PITCH {
struct {
uint32_t copy_dest_pitch : 14; // +0
uint32_t : 2; // +14
@ -614,6 +653,7 @@ union RB_COPY_DEST_PITCH {
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_PITCH;
};
static_assert_size(RB_COPY_DEST_PITCH, sizeof(uint32_t));
} // namespace reg

View File

@ -45,6 +45,10 @@
* SOFTWARE.
*/
// Only 32-bit types (uint32_t, int32_t or enums with uint32_t / int32_t as the
// underlying type) are allowed in the bit fields here, as Visual C++ restarts
// packing when a field requires different alignment than the previous one.
namespace xe {
namespace gpu {
namespace ucode {
@ -175,7 +179,7 @@ struct ControlFlowExecInstruction {
AddressingMode address_mode_ : 1;
ControlFlowOpcode opcode_ : 4;
};
static_assert_size(ControlFlowExecInstruction, 8);
static_assert_size(ControlFlowExecInstruction, sizeof(uint32_t) * 2);
// Instruction data for ControlFlowOpcode::kCondExec and kCondExecEnd.
struct ControlFlowCondExecInstruction {
@ -209,7 +213,7 @@ struct ControlFlowCondExecInstruction {
AddressingMode address_mode_ : 1;
ControlFlowOpcode opcode_ : 4;
};
static_assert_size(ControlFlowCondExecInstruction, 8);
static_assert_size(ControlFlowCondExecInstruction, sizeof(uint32_t) * 2);
// Instruction data for ControlFlowOpcode::kCondExecPred, kCondExecPredEnd,
// kCondExecPredClean, kCondExecPredCleanEnd.
@ -245,7 +249,7 @@ struct ControlFlowCondExecPredInstruction {
AddressingMode address_mode_ : 1;
ControlFlowOpcode opcode_ : 4;
};
static_assert_size(ControlFlowCondExecPredInstruction, 8);
static_assert_size(ControlFlowCondExecPredInstruction, sizeof(uint32_t) * 2);
// Instruction data for ControlFlowOpcode::kLoopStart.
struct ControlFlowLoopStartInstruction {
@ -272,7 +276,7 @@ struct ControlFlowLoopStartInstruction {
AddressingMode address_mode_ : 1;
ControlFlowOpcode opcode_ : 4;
};
static_assert_size(ControlFlowLoopStartInstruction, 8);
static_assert_size(ControlFlowLoopStartInstruction, sizeof(uint32_t) * 2);
// Instruction data for ControlFlowOpcode::kLoopEnd.
struct ControlFlowLoopEndInstruction {
@ -302,7 +306,7 @@ struct ControlFlowLoopEndInstruction {
AddressingMode address_mode_ : 1;
ControlFlowOpcode opcode_ : 4;
};
static_assert_size(ControlFlowLoopEndInstruction, 8);
static_assert_size(ControlFlowLoopEndInstruction, sizeof(uint32_t) * 2);
// Instruction data for ControlFlowOpcode::kCondCall.
struct ControlFlowCondCallInstruction {
@ -333,7 +337,7 @@ struct ControlFlowCondCallInstruction {
AddressingMode address_mode_ : 1;
ControlFlowOpcode opcode_ : 4;
};
static_assert_size(ControlFlowCondCallInstruction, 8);
static_assert_size(ControlFlowCondCallInstruction, sizeof(uint32_t) * 2);
// Instruction data for ControlFlowOpcode::kReturn.
struct ControlFlowReturnInstruction {
@ -349,7 +353,7 @@ struct ControlFlowReturnInstruction {
AddressingMode address_mode_ : 1;
ControlFlowOpcode opcode_ : 4;
};
static_assert_size(ControlFlowReturnInstruction, 8);
static_assert_size(ControlFlowReturnInstruction, sizeof(uint32_t) * 2);
// Instruction data for ControlFlowOpcode::kCondJmp.
struct ControlFlowCondJmpInstruction {
@ -381,7 +385,7 @@ struct ControlFlowCondJmpInstruction {
AddressingMode address_mode_ : 1;
ControlFlowOpcode opcode_ : 4;
};
static_assert_size(ControlFlowCondJmpInstruction, 8);
static_assert_size(ControlFlowCondJmpInstruction, sizeof(uint32_t) * 2);
// Instruction data for ControlFlowOpcode::kAlloc.
struct ControlFlowAllocInstruction {
@ -403,9 +407,9 @@ struct ControlFlowAllocInstruction {
uint32_t : 1;
ControlFlowOpcode opcode_ : 4;
};
static_assert_size(ControlFlowAllocInstruction, 8);
static_assert_size(ControlFlowAllocInstruction, sizeof(uint32_t) * 2);
XEPACKEDUNION(ControlFlowInstruction, {
union ControlFlowInstruction {
ControlFlowOpcode opcode() const { return opcode_value; }
ControlFlowExecInstruction exec; // kExec*
@ -418,17 +422,17 @@ XEPACKEDUNION(ControlFlowInstruction, {
ControlFlowCondJmpInstruction cond_jmp; // kCondJmp
ControlFlowAllocInstruction alloc; // kAlloc
XEPACKEDSTRUCTANONYMOUS({
struct {
uint32_t unused_0 : 32;
uint32_t unused_1 : 12;
ControlFlowOpcode opcode_value : 4;
});
XEPACKEDSTRUCTANONYMOUS({
};
struct {
uint32_t dword_0;
uint32_t dword_1;
});
});
static_assert_size(ControlFlowInstruction, 8);
};
};
static_assert_size(ControlFlowInstruction, sizeof(uint32_t) * 2);
inline void UnpackControlFlowInstructions(const uint32_t* dwords,
ControlFlowInstruction* out_ab) {
@ -587,7 +591,7 @@ enum class FetchOpcode : uint32_t {
kSetTextureGradientsVert = 26,
};
struct VertexFetchInstruction {
struct alignas(uint32_t) VertexFetchInstruction {
FetchOpcode opcode() const { return data_.opcode_value; }
// Whether the jump is predicated (or conditional).
@ -653,8 +657,8 @@ struct VertexFetchInstruction {
}
private:
XEPACKEDSTRUCT(Data, {
XEPACKEDSTRUCTANONYMOUS({
struct Data {
struct {
FetchOpcode opcode_value : 5;
uint32_t src_reg : 6;
uint32_t src_reg_am : 1;
@ -666,8 +670,8 @@ struct VertexFetchInstruction {
// Prefetch count minus 1.
uint32_t prefetch_count : 3;
uint32_t src_swiz : 2;
});
XEPACKEDSTRUCTANONYMOUS({
};
struct {
uint32_t dst_swiz : 12;
uint32_t fomat_comp_all : 1;
uint32_t num_format_all : 1;
@ -678,17 +682,18 @@ struct VertexFetchInstruction {
int32_t exp_adjust : 6;
uint32_t is_mini_fetch : 1;
uint32_t is_predicated : 1;
});
XEPACKEDSTRUCTANONYMOUS({
};
struct {
uint32_t stride : 8;
int32_t offset : 23;
uint32_t pred_condition : 1;
});
});
};
};
Data data_;
};
static_assert_size(VertexFetchInstruction, sizeof(uint32_t) * 3);
struct TextureFetchInstruction {
struct alignas(uint32_t) TextureFetchInstruction {
FetchOpcode opcode() const { return data_.opcode_value; }
// Whether the jump is predicated (or conditional).
@ -747,8 +752,8 @@ struct TextureFetchInstruction {
float offset_z() const { return data_.offset_z * 0.5f; }
private:
XEPACKEDSTRUCT(Data, {
XEPACKEDSTRUCTANONYMOUS({
struct Data {
struct {
FetchOpcode opcode_value : 5;
uint32_t src_reg : 6;
uint32_t src_reg_am : 1;
@ -758,8 +763,8 @@ struct TextureFetchInstruction {
uint32_t const_index : 5;
uint32_t tx_coord_denorm : 1;
uint32_t src_swiz : 6; // xyz
});
XEPACKEDSTRUCTANONYMOUS({
};
struct {
uint32_t dst_swiz : 12; // xyzw
xenos::TextureFilter mag_filter : 2;
xenos::TextureFilter min_filter : 2;
@ -772,8 +777,8 @@ struct TextureFetchInstruction {
uint32_t use_reg_lod : 1;
uint32_t unk : 1;
uint32_t is_predicated : 1;
});
XEPACKEDSTRUCTANONYMOUS({
};
struct {
uint32_t use_reg_gradients : 1;
xenos::SampleLocation sample_location : 1;
int32_t lod_bias : 7;
@ -783,11 +788,11 @@ struct TextureFetchInstruction {
int32_t offset_y : 5;
int32_t offset_z : 5;
uint32_t pred_condition : 1;
});
});
};
};
Data data_;
};
static_assert_size(TextureFetchInstruction, 12);
static_assert_size(TextureFetchInstruction, sizeof(uint32_t) * 3);
// What follows is largely a mash up of the microcode assembly naming and the
// R600 docs that have a near 1:1 with the instructions available in the xenos
@ -1645,7 +1650,7 @@ enum class ExportRegister : uint32_t {
kExportData4,
};
struct AluInstruction {
struct alignas(uint32_t) AluInstruction {
// Raw accessors.
// Whether data is being exported (or written to local registers).
@ -1762,8 +1767,8 @@ struct AluInstruction {
}
private:
XEPACKEDSTRUCT(Data, {
XEPACKEDSTRUCTANONYMOUS({
struct Data {
struct {
// If exporting, both vector and scalar operations use the vector
// destination (which can't be relative in this case).
// Not very important note: If both scalar and vector operations exporting
@ -1789,8 +1794,8 @@ struct AluInstruction {
uint32_t vector_clamp : 1;
uint32_t scalar_clamp : 1;
AluScalarOpcode scalar_opc : 6;
});
XEPACKEDSTRUCTANONYMOUS({
};
struct {
uint32_t src3_swiz : 8;
uint32_t src2_swiz : 8;
uint32_t src1_swiz : 8;
@ -1802,8 +1807,8 @@ struct AluInstruction {
uint32_t address_absolute : 1;
uint32_t const_1_rel_abs : 1;
uint32_t const_0_rel_abs : 1;
});
XEPACKEDSTRUCTANONYMOUS({
};
struct {
uint32_t src3_reg : 8;
uint32_t src2_reg : 8;
uint32_t src1_reg : 8;
@ -1811,11 +1816,11 @@ struct AluInstruction {
uint32_t src3_sel : 1;
uint32_t src2_sel : 1;
uint32_t src1_sel : 1;
});
});
};
};
Data data_;
};
static_assert_size(AluInstruction, 12);
static_assert_size(AluInstruction, sizeof(uint32_t) * 3);
} // namespace ucode
} // namespace gpu

View File

@ -21,6 +21,12 @@ namespace xe {
namespace gpu {
namespace xenos {
// enum types used in the GPU registers or the microcode must be : uint32_t or
// : int32_t, as Visual C++ restarts bit field packing when a field requires
// different alignment than the previous one, so only 32-bit types must be used
// in bit fields (registers are 32-bit, and the microcode consists of triples of
// 32-bit words).
enum class ShaderType : uint32_t {
kVertex = 0,
kPixel = 1,
@ -991,20 +997,21 @@ enum class FetchConstantType : uint32_t {
};
// XE_GPU_REG_SHADER_CONSTANT_FETCH_*
XEPACKEDUNION(xe_gpu_vertex_fetch_t, {
XEPACKEDSTRUCTANONYMOUS({
union alignas(uint32_t) xe_gpu_vertex_fetch_t {
struct {
FetchConstantType type : 2; // +0
uint32_t address : 30; // +2 address in dwords
Endian endian : 2; // +0
uint32_t size : 24; // +2 size in words
uint32_t unk1 : 6; // +26
});
XEPACKEDSTRUCTANONYMOUS({
};
struct {
uint32_t dword_0;
uint32_t dword_1;
});
});
};
};
static_assert_size(xe_gpu_vertex_fetch_t, sizeof(uint32_t) * 2);
// Byte alignment of texture subresources in memory - of each mip and stack
// slice / cube face (and of textures themselves), this number of bits is also
@ -1049,8 +1056,8 @@ constexpr uint32_t kTextureLinearRowAlignmentBytes =
1 << kTextureLinearRowAlignmentBytesLog2;
// XE_GPU_REG_SHADER_CONSTANT_FETCH_*
XEPACKEDUNION(xe_gpu_texture_fetch_t, {
XEPACKEDSTRUCTANONYMOUS({
union alignas(uint32_t) xe_gpu_texture_fetch_t {
struct {
FetchConstantType type : 2; // +0 dword_0
// Likely before the swizzle, seems logical from R5xx (SIGNED_COMP0/1/2/3
// set the signedness of components 0/1/2/3, while SEL_ALPHA/RED/GREEN/BLUE
@ -1140,34 +1147,35 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, {
DataDimension dimension : 2; // +9
uint32_t packed_mips : 1; // +11
uint32_t mip_address : 20; // +12 mip address >> 12
});
XEPACKEDSTRUCTANONYMOUS({
};
struct {
uint32_t dword_0;
uint32_t dword_1;
uint32_t dword_2;
uint32_t dword_3;
uint32_t dword_4;
uint32_t dword_5;
});
});
};
};
static_assert_size(xe_gpu_texture_fetch_t, sizeof(uint32_t) * 6);
// XE_GPU_REG_SHADER_CONSTANT_FETCH_*
XEPACKEDUNION(xe_gpu_fetch_group_t, {
union alignas(uint32_t) xe_gpu_fetch_group_t {
xe_gpu_texture_fetch_t texture_fetch;
XEPACKEDSTRUCTANONYMOUS({
struct {
xe_gpu_vertex_fetch_t vertex_fetch_0;
xe_gpu_vertex_fetch_t vertex_fetch_1;
xe_gpu_vertex_fetch_t vertex_fetch_2;
});
XEPACKEDSTRUCTANONYMOUS({
};
struct {
uint32_t dword_0;
uint32_t dword_1;
uint32_t dword_2;
uint32_t dword_3;
uint32_t dword_4;
uint32_t dword_5;
});
XEPACKEDSTRUCTANONYMOUS({
};
struct {
uint32_t type_0 : 2;
uint32_t data_0_a : 30;
uint32_t data_0_b : 32;
@ -1177,8 +1185,9 @@ XEPACKEDUNION(xe_gpu_fetch_group_t, {
uint32_t type_2 : 2;
uint32_t data_2_a : 30;
uint32_t data_2_b : 32;
});
});
};
};
static_assert_size(xe_gpu_fetch_group_t, sizeof(uint32_t) * 6);
// GPU_MEMEXPORT_STREAM_CONSTANT from a game .pdb - float constant for memexport
// stream configuration.
@ -1188,8 +1197,8 @@ XEPACKEDUNION(xe_gpu_fetch_group_t, {
// integers. dword_1 specifically is 2^23 because
// powf(2.0f, 23.0f) + float(i) == 0x4B000000 | i
// so mad can pack indices as integers in the lower bits.
XEPACKEDUNION(xe_gpu_memexport_stream_t, {
XEPACKEDSTRUCTANONYMOUS({
union alignas(uint32_t) xe_gpu_memexport_stream_t {
struct {
uint32_t base_address : 30; // +0 dword_0 physical address >> 2
uint32_t const_0x1 : 2; // +30
@ -1205,16 +1214,17 @@ XEPACKEDUNION(xe_gpu_memexport_stream_t, {
uint32_t index_count : 23; // +0 dword_3
uint32_t const_0x96 : 9; // +23
});
XEPACKEDSTRUCTANONYMOUS({
};
struct {
uint32_t dword_0;
uint32_t dword_1;
uint32_t dword_2;
uint32_t dword_3;
});
});
};
};
static_assert_size(xe_gpu_memexport_stream_t, sizeof(uint32_t) * 4);
XEPACKEDSTRUCT(xe_gpu_depth_sample_counts, {
struct alignas(uint32_t) xe_gpu_depth_sample_counts {
// This is little endian as it is swapped in D3D code.
// Corresponding A and B values are summed up by D3D.
// Occlusion there is calculated by substracting begin from end struct.
@ -1226,7 +1236,8 @@ XEPACKEDSTRUCT(xe_gpu_depth_sample_counts, {
uint32_t ZPass_B;
uint32_t StencilFail_A;
uint32_t StencilFail_B;
});
};
static_assert_size(xe_gpu_depth_sample_counts, sizeof(uint32_t) * 8);
// Enum of event values used for VGT_EVENT_INITIATOR
enum Event {