diff --git a/src/xenia/gpu/dxbc.h b/src/xenia/gpu/dxbc.h index c51076cbc..bf2fc8b86 100644 --- a/src/xenia/gpu/dxbc.h +++ b/src/xenia/gpu/dxbc.h @@ -138,7 +138,7 @@ constexpr uint32_t MakeFourCC(uint32_t ch0, uint32_t ch1, uint32_t ch2, (uint32_t(ch3) << 24); } -struct ContainerHeader { +struct alignas(uint32_t) ContainerHeader { static constexpr uint32_t kFourCC = MakeFourCC('D', 'X', 'B', 'C'); static constexpr uint16_t kVersionMajor = 1; static constexpr uint16_t kVersionMinor = 0; @@ -159,9 +159,9 @@ struct ContainerHeader { // Followed by uint32_t[blob_count] offsets from the start of the container in // bytes to the start of each blob's header. }; -static_assert(alignof(ContainerHeader) <= sizeof(uint32_t)); +static_assert_size(ContainerHeader, sizeof(uint32_t) * 8); -struct BlobHeader { +struct alignas(uint32_t) BlobHeader { enum class FourCC : uint32_t { // In order of appearance in a container. kResourceDefinition = MakeFourCC('R', 'D', 'E', 'F'), @@ -175,7 +175,7 @@ struct BlobHeader { FourCC fourcc; uint32_t size_bytes; }; -static_assert(alignof(BlobHeader) <= sizeof(uint32_t)); +static_assert_size(BlobHeader, sizeof(uint32_t) * 2); // Appends a string to a DWORD stream, returns the DWORD-aligned length. inline uint32_t AppendAlignedString(std::vector& dest, @@ -325,7 +325,7 @@ enum class RdefShaderModel : uint32_t { }; // D3D12_SHADER_TYPE_DESC with some differences. -struct RdefType { +struct alignas(uint32_t) RdefType { RdefVariableClass variable_class; RdefVariableType variable_type; // Matrix rows, 1 for other numeric, 0 if not applicable. @@ -343,18 +343,18 @@ struct RdefType { // uint is called dword when it's scalar (but uint vectors are still uintN). uint32_t name_ptr; }; -static_assert(alignof(RdefType) <= sizeof(uint32_t)); +static_assert_size(RdefType, sizeof(uint32_t) * 9); -struct RdefStructureMember { +struct alignas(uint32_t) RdefStructureMember { uint32_t name_ptr; uint32_t type_ptr; uint32_t offset_bytes; }; -static_assert(alignof(RdefStructureMember) <= sizeof(uint32_t)); +static_assert_size(RdefStructureMember, sizeof(uint32_t) * 3); // D3D12_SHADER_VARIABLE_DESC with some differences. // Used for constants in constant buffers primarily. -struct RdefVariable { +struct alignas(uint32_t) RdefVariable { uint32_t name_ptr; uint32_t start_offset_bytes; uint32_t size_bytes; @@ -371,10 +371,10 @@ struct RdefVariable { // Number of sampler slots possibly used, 0 if no textures used. uint32_t sampler_size; }; -static_assert(alignof(RdefVariable) <= sizeof(uint32_t)); +static_assert_size(RdefVariable, sizeof(uint32_t) * 10); // Sorted by ID. -struct RdefCbuffer { +struct alignas(uint32_t) RdefCbuffer { uint32_t name_ptr; uint32_t variable_count; uint32_t variables_ptr; @@ -384,11 +384,11 @@ struct RdefCbuffer { // RdefCbufferFlags. uint32_t flags; }; -static_assert(alignof(RdefCbuffer) <= sizeof(uint32_t)); +static_assert_size(RdefCbuffer, sizeof(uint32_t) * 6); // D3D12_SHADER_INPUT_BIND_DESC with some differences. // Placed in samplers, SRVs, UAVs, CBVs order, sorted by ID. -struct RdefInputBind { +struct alignas(uint32_t) RdefInputBind { uint32_t name_ptr; RdefInputType type; ResourceReturnType return_type; @@ -406,9 +406,9 @@ struct RdefInputBind { uint32_t bind_point_space; uint32_t id; }; -static_assert(alignof(RdefInputBind) <= sizeof(uint32_t)); +static_assert_size(RdefInputBind, sizeof(uint32_t) * 10); -struct RdefHeader { +struct alignas(uint32_t) RdefHeader { enum class FourCC : uint32_t { // RD11 in Shader Model 5_0 shaders. k5_0 = MakeFourCC('R', 'D', '1', '1'), @@ -441,7 +441,7 @@ struct RdefHeader { sizeof_structure_member_bytes = sizeof(RdefStructureMember); } }; -static_assert(alignof(RdefHeader) <= sizeof(uint32_t)); +static_assert_size(RdefHeader, sizeof(uint32_t) * 15); // D3D_NAME subset enum class Name : uint32_t { @@ -467,6 +467,7 @@ enum class SignatureRegisterComponentType : uint32_t { }; // D3D_MIN_PRECISION +// uint8_t as it's used as one byte in SignatureParameter. enum class MinPrecision : uint8_t { kDefault, kFloat16, @@ -478,7 +479,7 @@ enum class MinPrecision : uint8_t { }; // D3D11_INTERNALSHADER_PARAMETER_11_1 -struct SignatureParameter { +struct alignas(uint32_t) SignatureParameter { uint32_t semantic_name_ptr; uint32_t semantic_index; // kUndefined for pixel shader outputs - inferred from the component type and @@ -497,15 +498,15 @@ struct SignatureParameter { }; MinPrecision min_precision; }; -static_assert(alignof(SignatureParameter) <= sizeof(uint32_t)); +static_assert_size(SignatureParameter, sizeof(uint32_t) * 6); // D3D10_INTERNALSHADER_SIGNATURE -struct Signature { +struct alignas(uint32_t) Signature { uint32_t parameter_count; // If the signature is empty, this still points after the header. uint32_t parameter_info_ptr; }; -static_assert(alignof(Signature) <= sizeof(uint32_t)); +static_assert_size(Signature, sizeof(uint32_t) * 2); // SHADER_FEATURE // Low 32 bits. @@ -528,11 +529,11 @@ enum ShaderFeature0 : uint32_t { << 13, }; -struct ShaderFeatureInfo { +struct alignas(uint32_t) ShaderFeatureInfo { // UINT64 originally, but aligned to 4 rather than 8. uint32_t feature_flags[2]; }; -static_assert(alignof(ShaderFeatureInfo) <= sizeof(uint32_t)); +static_assert_size(ShaderFeatureInfo, sizeof(uint32_t) * 2); // D3D11_SB_TESSELLATOR_DOMAIN enum class TessellatorDomain : uint32_t { @@ -543,7 +544,7 @@ enum class TessellatorDomain : uint32_t { }; // The STAT blob (based on Wine d3dcompiler_parse_stat). -struct Statistics { +struct alignas(uint32_t) Statistics { // Not increased by declarations and labels. uint32_t instruction_count; // +0 uint32_t temp_register_count; // +4 @@ -595,7 +596,7 @@ struct Statistics { // Unknown in Wine, but confirmed by testing. uint32_t c_texture_store_instructions; // +90 }; -static_assert(alignof(Statistics) <= sizeof(uint32_t)); +static_assert_size(Statistics, sizeof(uint32_t) * 37); // A shader blob begins with a version token and the shader length in dwords // (including the version token and the length token itself). diff --git a/src/xenia/gpu/registers.h b/src/xenia/gpu/registers.h index a193800b7..9d1ce4886 100644 --- a/src/xenia/gpu/registers.h +++ b/src/xenia/gpu/registers.h @@ -13,12 +13,19 @@ #include #include +#include "xenia/base/assert.h" #include "xenia/gpu/xenos.h" // Most registers can be found from: // https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/14/yamato_registers.h // Some registers were added on Adreno specifically and are not referenced in // game .pdb files and never set by games. + +// Only 32-bit types (uint32_t, int32_t, float or enums with uint32_t / int32_t +// as the underlying type) are allowed in the bit fields here, as Visual C++ +// restarts packing when a field requires different alignment than the previous +// one. + namespace xe { namespace gpu { @@ -38,7 +45,7 @@ namespace reg { *******************************************************************************/ -union COHER_STATUS_HOST { +union alignas(uint32_t) COHER_STATUS_HOST { struct { uint32_t matching_contexts : 8; // +0 uint32_t rb_copy_dest_base_ena : 1; // +8 @@ -60,8 +67,9 @@ union COHER_STATUS_HOST { uint32_t value; static constexpr Register register_index = XE_GPU_REG_COHER_STATUS_HOST; }; +static_assert_size(COHER_STATUS_HOST, sizeof(uint32_t)); -union WAIT_UNTIL { +union alignas(uint32_t) WAIT_UNTIL { struct { uint32_t : 1; // +0 uint32_t wait_re_vsync : 1; // +1 @@ -83,6 +91,7 @@ union WAIT_UNTIL { uint32_t value; static constexpr Register register_index = XE_GPU_REG_WAIT_UNTIL; }; +static_assert_size(WAIT_UNTIL, sizeof(uint32_t)); /******************************************************************************* ___ ___ ___ _ _ ___ _ _ ___ ___ ___ @@ -92,7 +101,7 @@ union WAIT_UNTIL { *******************************************************************************/ -union SQ_PROGRAM_CNTL { +union alignas(uint32_t) SQ_PROGRAM_CNTL { struct { // Note from a2xx.xml: // Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG, @@ -112,8 +121,9 @@ union SQ_PROGRAM_CNTL { uint32_t value; static constexpr Register register_index = XE_GPU_REG_SQ_PROGRAM_CNTL; }; +static_assert_size(SQ_PROGRAM_CNTL, sizeof(uint32_t)); -union SQ_CONTEXT_MISC { +union alignas(uint32_t) SQ_CONTEXT_MISC { struct { uint32_t inst_pred_optimize : 1; // +0 uint32_t sc_output_screen_xy : 1; // +1 @@ -143,8 +153,9 @@ union SQ_CONTEXT_MISC { uint32_t value; static constexpr Register register_index = XE_GPU_REG_SQ_CONTEXT_MISC; }; +static_assert_size(SQ_CONTEXT_MISC, sizeof(uint32_t)); -union SQ_INTERPOLATOR_CNTL { +union alignas(uint32_t) SQ_INTERPOLATOR_CNTL { struct { uint32_t param_shade : 16; // +0 // SampleLocation bits - 0 for centroid, 1 for center, if @@ -154,6 +165,7 @@ union SQ_INTERPOLATOR_CNTL { uint32_t value; static constexpr Register register_index = XE_GPU_REG_SQ_INTERPOLATOR_CNTL; }; +static_assert_size(SQ_INTERPOLATOR_CNTL, sizeof(uint32_t)); /******************************************************************************* __ _____ ___ _____ _____ __ @@ -173,7 +185,7 @@ union SQ_INTERPOLATOR_CNTL { *******************************************************************************/ -union VGT_DRAW_INITIATOR { +union alignas(uint32_t) VGT_DRAW_INITIATOR { // Different than on A2xx and R6xx/R7xx. struct { xenos::PrimitiveType prim_type : 6; // +0 @@ -188,22 +200,25 @@ union VGT_DRAW_INITIATOR { uint32_t value; static constexpr Register register_index = XE_GPU_REG_VGT_DRAW_INITIATOR; }; +static_assert_size(VGT_DRAW_INITIATOR, sizeof(uint32_t)); -union VGT_OUTPUT_PATH_CNTL { +union alignas(uint32_t) VGT_OUTPUT_PATH_CNTL { struct { xenos::VGTOutputPath path_select : 2; // +0 }; uint32_t value; static constexpr Register register_index = XE_GPU_REG_VGT_OUTPUT_PATH_CNTL; }; +static_assert_size(VGT_OUTPUT_PATH_CNTL, sizeof(uint32_t)); -union VGT_HOS_CNTL { +union alignas(uint32_t) VGT_HOS_CNTL { struct { xenos::TessellationMode tess_mode : 2; // +0 }; uint32_t value; static constexpr Register register_index = XE_GPU_REG_VGT_HOS_CNTL; }; +static_assert_size(VGT_HOS_CNTL, sizeof(uint32_t)); /******************************************************************************* ___ ___ ___ __ __ ___ _____ _____ _____ @@ -218,7 +233,7 @@ union VGT_HOS_CNTL { *******************************************************************************/ -union PA_SU_POINT_MINMAX { +union alignas(uint32_t) PA_SU_POINT_MINMAX { struct { // Radius, 12.4 fixed point. uint32_t min_size : 16; // +0 @@ -227,8 +242,9 @@ union PA_SU_POINT_MINMAX { uint32_t value; static constexpr Register register_index = XE_GPU_REG_PA_SU_POINT_MINMAX; }; +static_assert_size(PA_SU_POINT_MINMAX, sizeof(uint32_t)); -union PA_SU_POINT_SIZE { +union alignas(uint32_t) PA_SU_POINT_SIZE { struct { // 1/2 width or height, 12.4 fixed point. uint32_t height : 16; // +0 @@ -237,9 +253,10 @@ union PA_SU_POINT_SIZE { uint32_t value; static constexpr Register register_index = XE_GPU_REG_PA_SU_POINT_SIZE; }; +static_assert_size(PA_SU_POINT_SIZE, sizeof(uint32_t)); // Setup Unit / Scanline Converter mode cntl -union PA_SU_SC_MODE_CNTL { +union alignas(uint32_t) PA_SU_SC_MODE_CNTL { struct { uint32_t cull_front : 1; // +0 uint32_t cull_back : 1; // +1 @@ -268,9 +285,10 @@ union PA_SU_SC_MODE_CNTL { uint32_t value; static constexpr Register register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL; }; +static_assert_size(PA_SU_SC_MODE_CNTL, sizeof(uint32_t)); // Setup Unit Vertex Control -union PA_SU_VTX_CNTL { +union alignas(uint32_t) PA_SU_VTX_CNTL { struct { uint32_t pix_center : 1; // +0 1 = half pixel offset (OpenGL). uint32_t round_mode : 2; // +1 @@ -279,8 +297,9 @@ union PA_SU_VTX_CNTL { uint32_t value; static constexpr Register register_index = XE_GPU_REG_PA_SU_VTX_CNTL; }; +static_assert_size(PA_SU_VTX_CNTL, sizeof(uint32_t)); -union PA_SC_MPASS_PS_CNTL { +union alignas(uint32_t) PA_SC_MPASS_PS_CNTL { struct { uint32_t mpass_pix_vec_per_pass : 20; // +0 uint32_t : 11; // +20 @@ -289,9 +308,10 @@ union PA_SC_MPASS_PS_CNTL { uint32_t value; static constexpr Register register_index = XE_GPU_REG_PA_SC_MPASS_PS_CNTL; }; +static_assert_size(PA_SC_MPASS_PS_CNTL, sizeof(uint32_t)); // Scanline converter viz query, used by D3D for gpu side conditional rendering -union PA_SC_VIZ_QUERY { +union alignas(uint32_t) PA_SC_VIZ_QUERY { struct { // the visibility of draws should be evaluated uint32_t viz_query_ena : 1; // +0 @@ -304,9 +324,10 @@ union PA_SC_VIZ_QUERY { uint32_t value; static constexpr Register register_index = XE_GPU_REG_PA_SC_VIZ_QUERY; }; +static_assert_size(PA_SC_VIZ_QUERY, sizeof(uint32_t)); // Clipper clip control -union PA_CL_CLIP_CNTL { +union alignas(uint32_t) PA_CL_CLIP_CNTL { struct { uint32_t ucp_ena_0 : 1; // +0 uint32_t ucp_ena_1 : 1; // +1 @@ -329,9 +350,10 @@ union PA_CL_CLIP_CNTL { uint32_t value; static constexpr Register register_index = XE_GPU_REG_PA_CL_CLIP_CNTL; }; +static_assert_size(PA_CL_CLIP_CNTL, sizeof(uint32_t)); // Viewport transform engine control -union PA_CL_VTE_CNTL { +union alignas(uint32_t) PA_CL_VTE_CNTL { struct { uint32_t vport_x_scale_ena : 1; // +0 uint32_t vport_x_offset_ena : 1; // +1 @@ -348,8 +370,9 @@ union PA_CL_VTE_CNTL { uint32_t value; static constexpr Register register_index = XE_GPU_REG_PA_CL_VTE_CNTL; }; +static_assert_size(PA_CL_VTE_CNTL, sizeof(uint32_t)); -union PA_SC_SCREEN_SCISSOR_TL { +union alignas(uint32_t) PA_SC_SCREEN_SCISSOR_TL { struct { int32_t tl_x : 15; // +0 uint32_t : 1; // +15 @@ -358,8 +381,9 @@ union PA_SC_SCREEN_SCISSOR_TL { uint32_t value; static constexpr Register register_index = XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL; }; +static_assert_size(PA_SC_SCREEN_SCISSOR_TL, sizeof(uint32_t)); -union PA_SC_SCREEN_SCISSOR_BR { +union alignas(uint32_t) PA_SC_SCREEN_SCISSOR_BR { struct { int32_t br_x : 15; // +0 uint32_t : 1; // +15 @@ -368,8 +392,9 @@ union PA_SC_SCREEN_SCISSOR_BR { uint32_t value; static constexpr Register register_index = XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR; }; +static_assert_size(PA_SC_SCREEN_SCISSOR_BR, sizeof(uint32_t)); -union PA_SC_WINDOW_OFFSET { +union alignas(uint32_t) PA_SC_WINDOW_OFFSET { struct { int32_t window_x_offset : 15; // +0 uint32_t : 1; // +15 @@ -378,8 +403,9 @@ union PA_SC_WINDOW_OFFSET { uint32_t value; static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_OFFSET; }; +static_assert_size(PA_SC_WINDOW_OFFSET, sizeof(uint32_t)); -union PA_SC_WINDOW_SCISSOR_TL { +union alignas(uint32_t) PA_SC_WINDOW_SCISSOR_TL { struct { uint32_t tl_x : 14; // +0 uint32_t : 2; // +14 @@ -390,8 +416,9 @@ union PA_SC_WINDOW_SCISSOR_TL { uint32_t value; static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL; }; +static_assert_size(PA_SC_WINDOW_SCISSOR_TL, sizeof(uint32_t)); -union PA_SC_WINDOW_SCISSOR_BR { +union alignas(uint32_t) PA_SC_WINDOW_SCISSOR_BR { struct { uint32_t br_x : 14; // +0 uint32_t : 2; // +14 @@ -400,6 +427,7 @@ union PA_SC_WINDOW_SCISSOR_BR { uint32_t value; static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR; }; +static_assert_size(PA_SC_WINDOW_SCISSOR_BR, sizeof(uint32_t)); /******************************************************************************* ___ ___ @@ -409,15 +437,16 @@ union PA_SC_WINDOW_SCISSOR_BR { *******************************************************************************/ -union RB_MODECONTROL { +union alignas(uint32_t) RB_MODECONTROL { struct { xenos::ModeControl edram_mode : 3; // +0 }; uint32_t value; static constexpr Register register_index = XE_GPU_REG_RB_MODECONTROL; }; +static_assert_size(RB_MODECONTROL, sizeof(uint32_t)); -union RB_SURFACE_INFO { +union alignas(uint32_t) RB_SURFACE_INFO { struct { uint32_t surface_pitch : 14; // +0 in pixels. uint32_t : 2; // +14 @@ -427,8 +456,9 @@ union RB_SURFACE_INFO { uint32_t value; static constexpr Register register_index = XE_GPU_REG_RB_SURFACE_INFO; }; +static_assert_size(RB_SURFACE_INFO, sizeof(uint32_t)); -union RB_COLORCONTROL { +union alignas(uint32_t) RB_COLORCONTROL { struct { xenos::CompareFunction alpha_func : 3; // +0 uint32_t alpha_test_enable : 1; // +3 @@ -476,8 +506,9 @@ union RB_COLORCONTROL { uint32_t value; static constexpr Register register_index = XE_GPU_REG_RB_COLORCONTROL; }; +static_assert_size(RB_COLORCONTROL, sizeof(uint32_t)); -union RB_COLOR_INFO { +union alignas(uint32_t) RB_COLOR_INFO { struct { uint32_t color_base : 12; // +0 in tiles. uint32_t : 4; // +12 @@ -489,8 +520,9 @@ union RB_COLOR_INFO { // RB_COLOR[1-3]_INFO also use this format. static const Register rt_register_indices[4]; }; +static_assert_size(RB_COLOR_INFO, sizeof(uint32_t)); -union RB_COLOR_MASK { +union alignas(uint32_t) RB_COLOR_MASK { struct { uint32_t write_red0 : 1; // +0 uint32_t write_green0 : 1; // +1 @@ -512,8 +544,9 @@ union RB_COLOR_MASK { uint32_t value; static constexpr Register register_index = XE_GPU_REG_RB_COLOR_MASK; }; +static_assert_size(RB_COLOR_MASK, sizeof(uint32_t)); -union RB_BLENDCONTROL { +union alignas(uint32_t) RB_BLENDCONTROL { struct { xenos::BlendFactor color_srcblend : 5; // +0 xenos::BlendOp color_comb_fcn : 3; // +5 @@ -529,8 +562,9 @@ union RB_BLENDCONTROL { static constexpr Register register_index = XE_GPU_REG_RB_BLENDCONTROL0; static const Register rt_register_indices[4]; }; +static_assert_size(RB_BLENDCONTROL, sizeof(uint32_t)); -union RB_DEPTHCONTROL { +union alignas(uint32_t) RB_DEPTHCONTROL { struct { uint32_t stencil_enable : 1; // +0 uint32_t z_enable : 1; // +1 @@ -551,8 +585,9 @@ union RB_DEPTHCONTROL { uint32_t value; static constexpr Register register_index = XE_GPU_REG_RB_DEPTHCONTROL; }; +static_assert_size(RB_DEPTHCONTROL, sizeof(uint32_t)); -union RB_STENCILREFMASK { +union alignas(uint32_t) RB_STENCILREFMASK { struct { uint32_t stencilref : 8; // +0 uint32_t stencilmask : 8; // +8 @@ -562,8 +597,9 @@ union RB_STENCILREFMASK { static constexpr Register register_index = XE_GPU_REG_RB_STENCILREFMASK; // RB_STENCILREFMASK_BF also uses this format. }; +static_assert_size(RB_STENCILREFMASK, sizeof(uint32_t)); -union RB_DEPTH_INFO { +union alignas(uint32_t) RB_DEPTH_INFO { struct { uint32_t depth_base : 12; // +0 in tiles. uint32_t : 4; // +12 @@ -572,10 +608,11 @@ union RB_DEPTH_INFO { uint32_t value; static constexpr Register register_index = XE_GPU_REG_RB_DEPTH_INFO; }; +static_assert_size(RB_DEPTH_INFO, sizeof(uint32_t)); // Copy registers are very different than on Adreno. -union RB_COPY_CONTROL { +union alignas(uint32_t) RB_COPY_CONTROL { struct { uint32_t copy_src_select : 3; // +0 Depth is 4. uint32_t : 1; // +3 @@ -589,8 +626,9 @@ union RB_COPY_CONTROL { uint32_t value; static constexpr Register register_index = XE_GPU_REG_RB_COPY_CONTROL; }; +static_assert_size(RB_COPY_CONTROL, sizeof(uint32_t)); -union RB_COPY_DEST_INFO { +union alignas(uint32_t) RB_COPY_DEST_INFO { struct { xenos::Endian128 copy_dest_endian : 3; // +0 uint32_t copy_dest_array : 1; // +3 @@ -604,8 +642,9 @@ union RB_COPY_DEST_INFO { uint32_t value; static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_INFO; }; +static_assert_size(RB_COPY_DEST_INFO, sizeof(uint32_t)); -union RB_COPY_DEST_PITCH { +union alignas(uint32_t) RB_COPY_DEST_PITCH { struct { uint32_t copy_dest_pitch : 14; // +0 uint32_t : 2; // +14 @@ -614,6 +653,7 @@ union RB_COPY_DEST_PITCH { uint32_t value; static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_PITCH; }; +static_assert_size(RB_COPY_DEST_PITCH, sizeof(uint32_t)); } // namespace reg diff --git a/src/xenia/gpu/ucode.h b/src/xenia/gpu/ucode.h index f60b5c6ed..a4a169eb0 100644 --- a/src/xenia/gpu/ucode.h +++ b/src/xenia/gpu/ucode.h @@ -45,6 +45,10 @@ * SOFTWARE. */ +// Only 32-bit types (uint32_t, int32_t or enums with uint32_t / int32_t as the +// underlying type) are allowed in the bit fields here, as Visual C++ restarts +// packing when a field requires different alignment than the previous one. + namespace xe { namespace gpu { namespace ucode { @@ -175,7 +179,7 @@ struct ControlFlowExecInstruction { AddressingMode address_mode_ : 1; ControlFlowOpcode opcode_ : 4; }; -static_assert_size(ControlFlowExecInstruction, 8); +static_assert_size(ControlFlowExecInstruction, sizeof(uint32_t) * 2); // Instruction data for ControlFlowOpcode::kCondExec and kCondExecEnd. struct ControlFlowCondExecInstruction { @@ -209,7 +213,7 @@ struct ControlFlowCondExecInstruction { AddressingMode address_mode_ : 1; ControlFlowOpcode opcode_ : 4; }; -static_assert_size(ControlFlowCondExecInstruction, 8); +static_assert_size(ControlFlowCondExecInstruction, sizeof(uint32_t) * 2); // Instruction data for ControlFlowOpcode::kCondExecPred, kCondExecPredEnd, // kCondExecPredClean, kCondExecPredCleanEnd. @@ -245,7 +249,7 @@ struct ControlFlowCondExecPredInstruction { AddressingMode address_mode_ : 1; ControlFlowOpcode opcode_ : 4; }; -static_assert_size(ControlFlowCondExecPredInstruction, 8); +static_assert_size(ControlFlowCondExecPredInstruction, sizeof(uint32_t) * 2); // Instruction data for ControlFlowOpcode::kLoopStart. struct ControlFlowLoopStartInstruction { @@ -272,7 +276,7 @@ struct ControlFlowLoopStartInstruction { AddressingMode address_mode_ : 1; ControlFlowOpcode opcode_ : 4; }; -static_assert_size(ControlFlowLoopStartInstruction, 8); +static_assert_size(ControlFlowLoopStartInstruction, sizeof(uint32_t) * 2); // Instruction data for ControlFlowOpcode::kLoopEnd. struct ControlFlowLoopEndInstruction { @@ -302,7 +306,7 @@ struct ControlFlowLoopEndInstruction { AddressingMode address_mode_ : 1; ControlFlowOpcode opcode_ : 4; }; -static_assert_size(ControlFlowLoopEndInstruction, 8); +static_assert_size(ControlFlowLoopEndInstruction, sizeof(uint32_t) * 2); // Instruction data for ControlFlowOpcode::kCondCall. struct ControlFlowCondCallInstruction { @@ -333,7 +337,7 @@ struct ControlFlowCondCallInstruction { AddressingMode address_mode_ : 1; ControlFlowOpcode opcode_ : 4; }; -static_assert_size(ControlFlowCondCallInstruction, 8); +static_assert_size(ControlFlowCondCallInstruction, sizeof(uint32_t) * 2); // Instruction data for ControlFlowOpcode::kReturn. struct ControlFlowReturnInstruction { @@ -349,7 +353,7 @@ struct ControlFlowReturnInstruction { AddressingMode address_mode_ : 1; ControlFlowOpcode opcode_ : 4; }; -static_assert_size(ControlFlowReturnInstruction, 8); +static_assert_size(ControlFlowReturnInstruction, sizeof(uint32_t) * 2); // Instruction data for ControlFlowOpcode::kCondJmp. struct ControlFlowCondJmpInstruction { @@ -381,7 +385,7 @@ struct ControlFlowCondJmpInstruction { AddressingMode address_mode_ : 1; ControlFlowOpcode opcode_ : 4; }; -static_assert_size(ControlFlowCondJmpInstruction, 8); +static_assert_size(ControlFlowCondJmpInstruction, sizeof(uint32_t) * 2); // Instruction data for ControlFlowOpcode::kAlloc. struct ControlFlowAllocInstruction { @@ -403,9 +407,9 @@ struct ControlFlowAllocInstruction { uint32_t : 1; ControlFlowOpcode opcode_ : 4; }; -static_assert_size(ControlFlowAllocInstruction, 8); +static_assert_size(ControlFlowAllocInstruction, sizeof(uint32_t) * 2); -XEPACKEDUNION(ControlFlowInstruction, { +union ControlFlowInstruction { ControlFlowOpcode opcode() const { return opcode_value; } ControlFlowExecInstruction exec; // kExec* @@ -418,17 +422,17 @@ XEPACKEDUNION(ControlFlowInstruction, { ControlFlowCondJmpInstruction cond_jmp; // kCondJmp ControlFlowAllocInstruction alloc; // kAlloc - XEPACKEDSTRUCTANONYMOUS({ + struct { uint32_t unused_0 : 32; uint32_t unused_1 : 12; ControlFlowOpcode opcode_value : 4; - }); - XEPACKEDSTRUCTANONYMOUS({ + }; + struct { uint32_t dword_0; uint32_t dword_1; - }); -}); -static_assert_size(ControlFlowInstruction, 8); + }; +}; +static_assert_size(ControlFlowInstruction, sizeof(uint32_t) * 2); inline void UnpackControlFlowInstructions(const uint32_t* dwords, ControlFlowInstruction* out_ab) { @@ -587,7 +591,7 @@ enum class FetchOpcode : uint32_t { kSetTextureGradientsVert = 26, }; -struct VertexFetchInstruction { +struct alignas(uint32_t) VertexFetchInstruction { FetchOpcode opcode() const { return data_.opcode_value; } // Whether the jump is predicated (or conditional). @@ -653,8 +657,8 @@ struct VertexFetchInstruction { } private: - XEPACKEDSTRUCT(Data, { - XEPACKEDSTRUCTANONYMOUS({ + struct Data { + struct { FetchOpcode opcode_value : 5; uint32_t src_reg : 6; uint32_t src_reg_am : 1; @@ -666,8 +670,8 @@ struct VertexFetchInstruction { // Prefetch count minus 1. uint32_t prefetch_count : 3; uint32_t src_swiz : 2; - }); - XEPACKEDSTRUCTANONYMOUS({ + }; + struct { uint32_t dst_swiz : 12; uint32_t fomat_comp_all : 1; uint32_t num_format_all : 1; @@ -678,17 +682,18 @@ struct VertexFetchInstruction { int32_t exp_adjust : 6; uint32_t is_mini_fetch : 1; uint32_t is_predicated : 1; - }); - XEPACKEDSTRUCTANONYMOUS({ + }; + struct { uint32_t stride : 8; int32_t offset : 23; uint32_t pred_condition : 1; - }); - }); + }; + }; Data data_; }; +static_assert_size(VertexFetchInstruction, sizeof(uint32_t) * 3); -struct TextureFetchInstruction { +struct alignas(uint32_t) TextureFetchInstruction { FetchOpcode opcode() const { return data_.opcode_value; } // Whether the jump is predicated (or conditional). @@ -747,8 +752,8 @@ struct TextureFetchInstruction { float offset_z() const { return data_.offset_z * 0.5f; } private: - XEPACKEDSTRUCT(Data, { - XEPACKEDSTRUCTANONYMOUS({ + struct Data { + struct { FetchOpcode opcode_value : 5; uint32_t src_reg : 6; uint32_t src_reg_am : 1; @@ -758,8 +763,8 @@ struct TextureFetchInstruction { uint32_t const_index : 5; uint32_t tx_coord_denorm : 1; uint32_t src_swiz : 6; // xyz - }); - XEPACKEDSTRUCTANONYMOUS({ + }; + struct { uint32_t dst_swiz : 12; // xyzw xenos::TextureFilter mag_filter : 2; xenos::TextureFilter min_filter : 2; @@ -772,8 +777,8 @@ struct TextureFetchInstruction { uint32_t use_reg_lod : 1; uint32_t unk : 1; uint32_t is_predicated : 1; - }); - XEPACKEDSTRUCTANONYMOUS({ + }; + struct { uint32_t use_reg_gradients : 1; xenos::SampleLocation sample_location : 1; int32_t lod_bias : 7; @@ -783,11 +788,11 @@ struct TextureFetchInstruction { int32_t offset_y : 5; int32_t offset_z : 5; uint32_t pred_condition : 1; - }); - }); + }; + }; Data data_; }; -static_assert_size(TextureFetchInstruction, 12); +static_assert_size(TextureFetchInstruction, sizeof(uint32_t) * 3); // What follows is largely a mash up of the microcode assembly naming and the // R600 docs that have a near 1:1 with the instructions available in the xenos @@ -1645,7 +1650,7 @@ enum class ExportRegister : uint32_t { kExportData4, }; -struct AluInstruction { +struct alignas(uint32_t) AluInstruction { // Raw accessors. // Whether data is being exported (or written to local registers). @@ -1762,8 +1767,8 @@ struct AluInstruction { } private: - XEPACKEDSTRUCT(Data, { - XEPACKEDSTRUCTANONYMOUS({ + struct Data { + struct { // If exporting, both vector and scalar operations use the vector // destination (which can't be relative in this case). // Not very important note: If both scalar and vector operations exporting @@ -1789,8 +1794,8 @@ struct AluInstruction { uint32_t vector_clamp : 1; uint32_t scalar_clamp : 1; AluScalarOpcode scalar_opc : 6; - }); - XEPACKEDSTRUCTANONYMOUS({ + }; + struct { uint32_t src3_swiz : 8; uint32_t src2_swiz : 8; uint32_t src1_swiz : 8; @@ -1802,8 +1807,8 @@ struct AluInstruction { uint32_t address_absolute : 1; uint32_t const_1_rel_abs : 1; uint32_t const_0_rel_abs : 1; - }); - XEPACKEDSTRUCTANONYMOUS({ + }; + struct { uint32_t src3_reg : 8; uint32_t src2_reg : 8; uint32_t src1_reg : 8; @@ -1811,11 +1816,11 @@ struct AluInstruction { uint32_t src3_sel : 1; uint32_t src2_sel : 1; uint32_t src1_sel : 1; - }); - }); + }; + }; Data data_; }; -static_assert_size(AluInstruction, 12); +static_assert_size(AluInstruction, sizeof(uint32_t) * 3); } // namespace ucode } // namespace gpu diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 933584f64..24d7a9a98 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -21,6 +21,12 @@ namespace xe { namespace gpu { namespace xenos { +// enum types used in the GPU registers or the microcode must be : uint32_t or +// : int32_t, as Visual C++ restarts bit field packing when a field requires +// different alignment than the previous one, so only 32-bit types must be used +// in bit fields (registers are 32-bit, and the microcode consists of triples of +// 32-bit words). + enum class ShaderType : uint32_t { kVertex = 0, kPixel = 1, @@ -991,20 +997,21 @@ enum class FetchConstantType : uint32_t { }; // XE_GPU_REG_SHADER_CONSTANT_FETCH_* -XEPACKEDUNION(xe_gpu_vertex_fetch_t, { - XEPACKEDSTRUCTANONYMOUS({ +union alignas(uint32_t) xe_gpu_vertex_fetch_t { + struct { FetchConstantType type : 2; // +0 uint32_t address : 30; // +2 address in dwords Endian endian : 2; // +0 uint32_t size : 24; // +2 size in words uint32_t unk1 : 6; // +26 - }); - XEPACKEDSTRUCTANONYMOUS({ + }; + struct { uint32_t dword_0; uint32_t dword_1; - }); -}); + }; +}; +static_assert_size(xe_gpu_vertex_fetch_t, sizeof(uint32_t) * 2); // Byte alignment of texture subresources in memory - of each mip and stack // slice / cube face (and of textures themselves), this number of bits is also @@ -1049,8 +1056,8 @@ constexpr uint32_t kTextureLinearRowAlignmentBytes = 1 << kTextureLinearRowAlignmentBytesLog2; // XE_GPU_REG_SHADER_CONSTANT_FETCH_* -XEPACKEDUNION(xe_gpu_texture_fetch_t, { - XEPACKEDSTRUCTANONYMOUS({ +union alignas(uint32_t) xe_gpu_texture_fetch_t { + struct { FetchConstantType type : 2; // +0 dword_0 // Likely before the swizzle, seems logical from R5xx (SIGNED_COMP0/1/2/3 // set the signedness of components 0/1/2/3, while SEL_ALPHA/RED/GREEN/BLUE @@ -1140,34 +1147,35 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, { DataDimension dimension : 2; // +9 uint32_t packed_mips : 1; // +11 uint32_t mip_address : 20; // +12 mip address >> 12 - }); - XEPACKEDSTRUCTANONYMOUS({ + }; + struct { uint32_t dword_0; uint32_t dword_1; uint32_t dword_2; uint32_t dword_3; uint32_t dword_4; uint32_t dword_5; - }); -}); + }; +}; +static_assert_size(xe_gpu_texture_fetch_t, sizeof(uint32_t) * 6); // XE_GPU_REG_SHADER_CONSTANT_FETCH_* -XEPACKEDUNION(xe_gpu_fetch_group_t, { +union alignas(uint32_t) xe_gpu_fetch_group_t { xe_gpu_texture_fetch_t texture_fetch; - XEPACKEDSTRUCTANONYMOUS({ + struct { xe_gpu_vertex_fetch_t vertex_fetch_0; xe_gpu_vertex_fetch_t vertex_fetch_1; xe_gpu_vertex_fetch_t vertex_fetch_2; - }); - XEPACKEDSTRUCTANONYMOUS({ + }; + struct { uint32_t dword_0; uint32_t dword_1; uint32_t dword_2; uint32_t dword_3; uint32_t dword_4; uint32_t dword_5; - }); - XEPACKEDSTRUCTANONYMOUS({ + }; + struct { uint32_t type_0 : 2; uint32_t data_0_a : 30; uint32_t data_0_b : 32; @@ -1177,8 +1185,9 @@ XEPACKEDUNION(xe_gpu_fetch_group_t, { uint32_t type_2 : 2; uint32_t data_2_a : 30; uint32_t data_2_b : 32; - }); -}); + }; +}; +static_assert_size(xe_gpu_fetch_group_t, sizeof(uint32_t) * 6); // GPU_MEMEXPORT_STREAM_CONSTANT from a game .pdb - float constant for memexport // stream configuration. @@ -1188,8 +1197,8 @@ XEPACKEDUNION(xe_gpu_fetch_group_t, { // integers. dword_1 specifically is 2^23 because // powf(2.0f, 23.0f) + float(i) == 0x4B000000 | i // so mad can pack indices as integers in the lower bits. -XEPACKEDUNION(xe_gpu_memexport_stream_t, { - XEPACKEDSTRUCTANONYMOUS({ +union alignas(uint32_t) xe_gpu_memexport_stream_t { + struct { uint32_t base_address : 30; // +0 dword_0 physical address >> 2 uint32_t const_0x1 : 2; // +30 @@ -1205,16 +1214,17 @@ XEPACKEDUNION(xe_gpu_memexport_stream_t, { uint32_t index_count : 23; // +0 dword_3 uint32_t const_0x96 : 9; // +23 - }); - XEPACKEDSTRUCTANONYMOUS({ + }; + struct { uint32_t dword_0; uint32_t dword_1; uint32_t dword_2; uint32_t dword_3; - }); -}); + }; +}; +static_assert_size(xe_gpu_memexport_stream_t, sizeof(uint32_t) * 4); -XEPACKEDSTRUCT(xe_gpu_depth_sample_counts, { +struct alignas(uint32_t) xe_gpu_depth_sample_counts { // This is little endian as it is swapped in D3D code. // Corresponding A and B values are summed up by D3D. // Occlusion there is calculated by substracting begin from end struct. @@ -1226,7 +1236,8 @@ XEPACKEDSTRUCT(xe_gpu_depth_sample_counts, { uint32_t ZPass_B; uint32_t StencilFail_A; uint32_t StencilFail_B; -}); +}; +static_assert_size(xe_gpu_depth_sample_counts, sizeof(uint32_t) * 8); // Enum of event values used for VGT_EVENT_INITIATOR enum Event {