xenia/src/xenia/gpu/dxbc_shader_translator.h

1147 lines
49 KiB
C++

/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2018 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_DXBC_SHADER_TRANSLATOR_H_
#define XENIA_GPU_DXBC_SHADER_TRANSLATOR_H_
#include <cstring>
#include <string>
#include <vector>
#include "xenia/base/math.h"
#include "xenia/base/string_buffer.h"
#include "xenia/gpu/shader_translator.h"
namespace xe {
namespace gpu {
// Generates shader model 5_1 byte code (for Direct3D 12).
class DxbcShaderTranslator : public ShaderTranslator {
public:
DxbcShaderTranslator(bool edram_rov_used);
~DxbcShaderTranslator() override;
// Constant buffer bindings in space 0.
enum class CbufferRegister {
kSystemConstants,
kFloatConstants,
kBoolLoopConstants,
kFetchConstants,
};
enum : uint32_t {
kSysFlag_XYDividedByW_Shift,
kSysFlag_ZDividedByW_Shift,
kSysFlag_WNotReciprocal_Shift,
kSysFlag_ReverseZ_Shift,
kSysFlag_DepthStencil_Shift,
kSysFlag_DepthFloat24_Shift,
// Depth/stencil testing not done if DepthStencilRead is disabled, but
// writing may still be done.
kSysFlag_DepthPassIfLess_Shift,
kSysFlag_DepthPassIfEqual_Shift,
kSysFlag_DepthPassIfGreater_Shift,
// 1 to write new depth to the depth buffer, 0 to keep the old one if the
// depth test passes.
kSysFlag_DepthWriteMask_Shift,
kSysFlag_StencilTest_Shift,
// This doesn't include depth/stencil masks - only reflects the fact that
// the new value must be written.
kSysFlag_DepthStencilWrite_Shift,
kSysFlag_Color0Gamma_Shift,
kSysFlag_Color1Gamma_Shift,
kSysFlag_Color2Gamma_Shift,
kSysFlag_Color3Gamma_Shift,
kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift,
kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift,
kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift,
kSysFlag_ReverseZ = 1u << kSysFlag_ReverseZ_Shift,
kSysFlag_DepthStencil = 1u << kSysFlag_DepthStencil_Shift,
kSysFlag_DepthFloat24 = 1u << kSysFlag_DepthFloat24_Shift,
kSysFlag_DepthPassIfLess = 1u << kSysFlag_DepthPassIfLess_Shift,
kSysFlag_DepthPassIfEqual = 1u << kSysFlag_DepthPassIfEqual_Shift,
kSysFlag_DepthPassIfGreater = 1u << kSysFlag_DepthPassIfGreater_Shift,
kSysFlag_DepthWriteMask = 1u << kSysFlag_DepthWriteMask_Shift,
kSysFlag_StencilTest = 1u << kSysFlag_StencilTest_Shift,
kSysFlag_DepthStencilWrite = 1u << kSysFlag_DepthStencilWrite_Shift,
kSysFlag_Color0Gamma = 1u << kSysFlag_Color0Gamma_Shift,
kSysFlag_Color1Gamma = 1u << kSysFlag_Color1Gamma_Shift,
kSysFlag_Color2Gamma = 1u << kSysFlag_Color2Gamma_Shift,
kSysFlag_Color3Gamma = 1u << kSysFlag_Color3Gamma_Shift,
};
enum : uint32_t {
kStencilOp_Flag_CurrentMask_Shift,
// 0, 1 or 3 expanded to 0 or 1 or 0xFF - the value to add.
kStencilOp_Flag_Add_Shift,
kStencilOp_Flag_Saturate_Shift = kStencilOp_Flag_Add_Shift + 2,
kStencilOp_Flag_Invert_Shift,
kStencilOp_Flag_NewMask_Shift,
kStencilOp_Flag_CurrentMask = 1u << kStencilOp_Flag_CurrentMask_Shift,
kStencilOp_Flag_Increment = 1u << kStencilOp_Flag_Add_Shift,
kStencilOp_Flag_Decrement = 3u << kStencilOp_Flag_Add_Shift,
kStencilOp_Flag_Saturate = 1u << kStencilOp_Flag_Saturate_Shift,
kStencilOp_Flag_Invert = 1u << kStencilOp_Flag_Invert_Shift,
kStencilOp_Flag_NewMask = 1u << kStencilOp_Flag_NewMask_Shift,
kStencilOp_Keep = kStencilOp_Flag_CurrentMask,
kStencilOp_Zero = 0,
kStencilOp_Replace = kStencilOp_Flag_NewMask,
kStencilOp_IncrementSaturate = kStencilOp_Flag_CurrentMask |
kStencilOp_Flag_Increment |
kStencilOp_Flag_Saturate,
kStencilOp_DecrementSaturate = kStencilOp_Flag_CurrentMask |
kStencilOp_Flag_Decrement |
kStencilOp_Flag_Saturate,
kStencilOp_Invert = kStencilOp_Flag_CurrentMask | kStencilOp_Flag_Invert,
kStencilOp_Increment =
kStencilOp_Flag_CurrentMask | kStencilOp_Flag_Increment,
kStencilOp_Decrement =
kStencilOp_Flag_CurrentMask | kStencilOp_Flag_Decrement,
};
enum : uint32_t {
// Whether the render target needs to be merged with another (if the write
// mask is not 1111, or 11 for 16_16, or 1 for 32_FLOAT, or blending is
// enabled and it's not no-op).
kRTFlag_WriteR_Shift,
kRTFlag_WriteG_Shift,
kRTFlag_WriteB_Shift,
kRTFlag_WriteA_Shift,
kRTFlag_Blend_Shift,
// Whether the component does not exist in the render target format.
kRTFlag_FormatUnusedR_Shift,
kRTFlag_FormatUnusedG_Shift,
kRTFlag_FormatUnusedB_Shift,
kRTFlag_FormatUnusedA_Shift,
// Whether the format is fixed-point and needs to be converted to integer
// (k_8_8_8_8, k_2_10_10_10, k_16_16, k_16_16_16_16).
kRTFlag_FormatFixed_Shift,
// Whether the format is k_2_10_10_10_FLOAT and 7e3 conversion is needed.
kRTFlag_FormatFloat10_Shift,
// Whether the format is k_16_16_FLOAT or k_16_16_16_16_FLOAT and
// f16tof32/f32tof16 is needed.
kRTFlag_FormatFloat16_Shift,
kRTFlag_WriteR = 1u << kRTFlag_WriteR_Shift,
kRTFlag_WriteG = 1u << kRTFlag_WriteG_Shift,
kRTFlag_WriteB = 1u << kRTFlag_WriteB_Shift,
kRTFlag_WriteA = 1u << kRTFlag_WriteA_Shift,
kRTFlag_Blend = 1u << kRTFlag_Blend_Shift,
kRTFlag_FormatUnusedR = 1u << kRTFlag_FormatUnusedR_Shift,
kRTFlag_FormatUnusedG = 1u << kRTFlag_FormatUnusedG_Shift,
kRTFlag_FormatUnusedB = 1u << kRTFlag_FormatUnusedB_Shift,
kRTFlag_FormatUnusedA = 1u << kRTFlag_FormatUnusedA_Shift,
kRTFlag_FormatFixed = 1u << kRTFlag_FormatFixed_Shift,
kRTFlag_FormatFloat10 = 1u << kRTFlag_FormatFloat10_Shift,
kRTFlag_FormatFloat16 = 1u << kRTFlag_FormatFloat16_Shift,
};
enum : uint32_t {
// X/Z of the blend constant for the render target.
// For ONE_MINUS modes, enable both One and the needed factor with _Neg.
kBlendX_Src_One_Shift = 0,
kBlendX_Src_One = 1u << kBlendX_Src_One_Shift,
kBlendX_Src_SrcColor_Shift = 1,
kBlendX_Src_SrcColor_Pos = 1u << kBlendX_Src_SrcColor_Shift,
kBlendX_Src_SrcColor_Neg = 3u << kBlendX_Src_SrcColor_Shift,
kBlendX_Src_SrcAlpha_Shift = 3,
kBlendX_Src_SrcAlpha_Pos = 1u << kBlendX_Src_SrcAlpha_Shift,
kBlendX_Src_SrcAlpha_Neg = 3u << kBlendX_Src_SrcAlpha_Shift,
kBlendX_Src_DestColor_Shift = 5,
kBlendX_Src_DestColor_Pos = 1u << kBlendX_Src_DestColor_Shift,
kBlendX_Src_DestColor_Neg = 3u << kBlendX_Src_DestColor_Shift,
kBlendX_Src_DestAlpha_Shift = 7,
kBlendX_Src_DestAlpha_Pos = 1u << kBlendX_Src_DestAlpha_Shift,
kBlendX_Src_DestAlpha_Neg = 3u << kBlendX_Src_DestAlpha_Shift,
kBlendX_Src_SrcAlphaSaturate_Shift = 9,
kBlendX_Src_SrcAlphaSaturate = 1u << kBlendX_Src_SrcAlphaSaturate_Shift,
kBlendX_SrcAlpha_One_Shift = 10,
kBlendX_SrcAlpha_One = 1u << kBlendX_SrcAlpha_One_Shift,
kBlendX_SrcAlpha_SrcAlpha_Shift = 11,
kBlendX_SrcAlpha_SrcAlpha_Pos = 1u << kBlendX_SrcAlpha_SrcAlpha_Shift,
kBlendX_SrcAlpha_SrcAlpha_Neg = 3u << kBlendX_SrcAlpha_SrcAlpha_Shift,
kBlendX_SrcAlpha_DestAlpha_Shift = 13,
kBlendX_SrcAlpha_DestAlpha_Pos = 1u << kBlendX_SrcAlpha_DestAlpha_Shift,
kBlendX_SrcAlpha_DestAlpha_Neg = 3u << kBlendX_SrcAlpha_DestAlpha_Shift,
// For ONE_MINUS modes, enable both One and the needed factor with _Neg.
kBlendX_Dest_One_Shift = 15,
kBlendX_Dest_One = 1u << kBlendX_Dest_One_Shift,
kBlendX_Dest_SrcColor_Shift = 16,
kBlendX_Dest_SrcColor_Pos = 1u << kBlendX_Dest_SrcColor_Shift,
kBlendX_Dest_SrcColor_Neg = 3u << kBlendX_Dest_SrcColor_Shift,
kBlendX_Dest_SrcAlpha_Shift = 18,
kBlendX_Dest_SrcAlpha_Pos = 1u << kBlendX_Dest_SrcAlpha_Shift,
kBlendX_Dest_SrcAlpha_Neg = 3u << kBlendX_Dest_SrcAlpha_Shift,
kBlendX_Dest_DestColor_Shift = 20,
kBlendX_Dest_DestColor_Pos = 1u << kBlendX_Dest_DestColor_Shift,
kBlendX_Dest_DestColor_Neg = 3u << kBlendX_Dest_DestColor_Shift,
kBlendX_Dest_DestAlpha_Shift = 22,
kBlendX_Dest_DestAlpha_Pos = 1u << kBlendX_Dest_DestAlpha_Shift,
kBlendX_Dest_DestAlpha_Neg = 3u << kBlendX_Dest_DestAlpha_Shift,
kBlendX_Dest_SrcAlphaSaturate_Shift = 24,
kBlendX_Dest_SrcAlphaSaturate = 1u << kBlendX_Dest_SrcAlphaSaturate_Shift,
kBlendX_DestAlpha_One_Shift = 25,
kBlendX_DestAlpha_One = 1u << kBlendX_DestAlpha_One_Shift,
kBlendX_DestAlpha_SrcAlpha_Shift = 26,
kBlendX_DestAlpha_SrcAlpha_Pos = 1u << kBlendX_DestAlpha_SrcAlpha_Shift,
kBlendX_DestAlpha_SrcAlpha_Neg = 3u << kBlendX_DestAlpha_SrcAlpha_Shift,
kBlendX_DestAlpha_DestAlpha_Shift = 28,
kBlendX_DestAlpha_DestAlpha_Pos = 1u << kBlendX_DestAlpha_DestAlpha_Shift,
kBlendX_DestAlpha_DestAlpha_Neg = 3u << kBlendX_DestAlpha_DestAlpha_Shift,
// Y/W of the blend constant for the render target.
kBlendY_Src_ConstantColor_Shift = 0,
kBlendY_Src_ConstantColor_Pos = 1u << kBlendY_Src_ConstantColor_Shift,
kBlendY_Src_ConstantColor_Neg = 3u << kBlendY_Src_ConstantColor_Shift,
kBlendY_Src_ConstantAlpha_Shift = 2,
kBlendY_Src_ConstantAlpha_Pos = 1u << kBlendY_Src_ConstantAlpha_Shift,
kBlendY_Src_ConstantAlpha_Neg = 3u << kBlendY_Src_ConstantAlpha_Shift,
kBlendY_SrcAlpha_ConstantAlpha_Shift = 4,
kBlendY_SrcAlpha_ConstantAlpha_Pos =
1u << kBlendY_SrcAlpha_ConstantAlpha_Shift,
kBlendY_SrcAlpha_ConstantAlpha_Neg =
3u << kBlendY_SrcAlpha_ConstantAlpha_Shift,
kBlendY_Dest_ConstantColor_Shift = 6,
kBlendY_Dest_ConstantColor_Pos = 1u << kBlendY_Dest_ConstantColor_Shift,
kBlendY_Dest_ConstantColor_Neg = 3u << kBlendY_Dest_ConstantColor_Shift,
kBlendY_Dest_ConstantAlpha_Shift = 8,
kBlendY_Dest_ConstantAlpha_Pos = 1u << kBlendY_Dest_ConstantAlpha_Shift,
kBlendY_Dest_ConstantAlpha_Neg = 3u << kBlendY_Dest_ConstantAlpha_Shift,
kBlendY_DestAlpha_ConstantAlpha_Shift = 10,
kBlendY_DestAlpha_ConstantAlpha_Pos =
1u << kBlendY_DestAlpha_ConstantAlpha_Shift,
kBlendY_DestAlpha_ConstantAlpha_Neg =
3u << kBlendY_DestAlpha_ConstantAlpha_Shift,
// For addition/subtraction/inverse subtraction, but must be positive for
// min/max.
kBlendY_Src_OpSign_Shift = 12,
kBlendY_Src_OpSign_Pos = 1u << kBlendY_Src_OpSign_Shift,
kBlendY_Src_OpSign_Neg = 3u << kBlendY_Src_OpSign_Shift,
kBlendY_SrcAlpha_OpSign_Shift = 14,
kBlendY_SrcAlpha_OpSign_Pos = 1u << kBlendY_SrcAlpha_OpSign_Shift,
kBlendY_SrcAlpha_OpSign_Neg = 3u << kBlendY_SrcAlpha_OpSign_Shift,
kBlendY_Dest_OpSign_Shift = 16,
kBlendY_Dest_OpSign_Pos = 1u << kBlendY_Dest_OpSign_Shift,
kBlendY_Dest_OpSign_Neg = 3u << kBlendY_Dest_OpSign_Shift,
kBlendY_DestAlpha_OpSign_Shift = 18,
kBlendY_DestAlpha_OpSign_Pos = 1u << kBlendY_DestAlpha_OpSign_Shift,
kBlendY_DestAlpha_OpSign_Neg = 3u << kBlendY_DestAlpha_OpSign_Shift,
kBlendY_Color_OpMin_Shift = 20,
kBlendY_Color_OpMin = 1u << kBlendY_Color_OpMin_Shift,
kBlendY_Color_OpMax_Shift = 21,
kBlendY_Color_OpMax = 1u << kBlendY_Color_OpMax_Shift,
kBlendY_Alpha_OpMin_Shift = 22,
kBlendY_Alpha_OpMin = 1u << kBlendY_Alpha_OpMin_Shift,
kBlendY_Alpha_OpMax_Shift = 23,
kBlendY_Alpha_OpMax = 1u << kBlendY_Alpha_OpMax_Shift,
};
// IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED:
// - kSysConst enum (indices, registers and first components).
// - system_constant_rdef_.
// - d3d12/shaders/xenos_draw.hlsli (for geometry shaders).
struct SystemConstants {
// vec4 0
uint32_t flags;
uint32_t vertex_index_endian;
uint32_t vertex_base_index;
uint32_t pixel_pos_reg;
// vec4 1
float ndc_scale[3];
float pixel_half_pixel_offset;
// vec4 2
float ndc_offset[3];
// 0 - disabled, 1 - passes if in range, -1 - fails if in range.
int32_t alpha_test;
// vec4 3
float point_size[2];
float point_size_min_max[2];
// vec3 4
// Inverse scale of the host viewport (but not supersampled), with signs
// pre-applied.
float point_screen_to_ndc[2];
float ssaa_inv_scale[2];
// vec4 5
// The range is floats as uints so it's easier to pass infinity.
uint32_t alpha_test_range[2];
uint32_t edram_pitch_tiles;
uint32_t edram_depth_base_dwords;
// vec4 6
float color_exp_bias[4];
// vec4 7
uint32_t color_output_map[4];
// vec4 8
uint32_t edram_stencil_reference;
uint32_t edram_stencil_read_mask;
uint32_t edram_stencil_write_mask;
uint32_t padding_8;
// vec4 9
union {
struct {
// kStencilOp, separated into sub-operations - not the Xenos enum.
uint32_t edram_stencil_front_fail;
uint32_t edram_stencil_front_depth_fail;
uint32_t edram_stencil_front_pass;
uint32_t edram_stencil_front_comparison;
};
uint32_t edram_stencil_front[4];
};
// vec4 10
union {
struct {
// kStencilOp, separated into sub-operations - not the Xenos enum.
uint32_t edram_stencil_back_fail;
uint32_t edram_stencil_back_depth_fail;
uint32_t edram_stencil_back_pass;
uint32_t edram_stencil_back_comparison;
};
uint32_t edram_stencil_back[4];
};
// vec4 11
uint32_t edram_base_dwords[4];
// vec4 12
// Binding and format info flags.
uint32_t edram_rt_flags[4];
// vec4 13
// Format info - widths of components in the lower 32 bits (for ibfe/bfi),
// packed as 8:8:8:8 for each render target.
uint32_t edram_rt_pack_width_low[4];
// vec4 14
// Format info - offsets of components in the lower 32 bits (for ibfe/bfi),
// packed as 8:8:8:8 for each render target.
uint32_t edram_rt_pack_offset_low[4];
// vec4 15
// Format info - widths of components in the upper 32 bits (for ibfe/bfi),
// packed as 8:8:8:8 for each render target.
uint32_t edram_rt_pack_width_high[4];
// vec4 16
// Format info - offsets of components in the upper 32 bits (for ibfe/bfi),
// packed as 8:8:8:8 for each render target.
uint32_t edram_rt_pack_offset_high[4];
// vec4 17:18
// Format info - mask of color and alpha after unpacking, but before float
// conversion. Primarily to differentiate between signed and unsigned
// formats because ibfe is used for both since k_16_16 and k_16_16_16_16 are
// signed.
uint32_t edram_load_mask_rt01_rt23[2][4];
// vec4 19:20
// Format info - scale to apply to the color and the alpha of each render
// target after unpacking and converting.
float edram_load_scale_rt01_rt23[2][4];
// vec4 21:22
// Render target blending options.
uint32_t edram_blend_rt01_rt23[2][4];
// vec4 23
// The constant blend factor for the respective modes.
float edram_blend_constant[4];
// vec4 24:25
// Format info - minimum color and alpha values (as float, before
// conversion) writable to the each render target. Integer so it's easier to
// write infinity.
uint32_t edram_store_min_rt01_rt23[2][4];
// vec4 26:27
// Format info - maximum color and alpha values (as float, before
// conversion) writable to the each render target. Integer so it's easier to
// write infinity.
uint32_t edram_store_max_rt01_rt23[2][4];
// vec4 28:29
// Format info - scale to apply to the color and the alpha of each render
// target before converting and packing.
float edram_store_scale_rt01_rt23[2][4];
};
// 192 textures at most because there are 32 fetch constants, and textures can
// be 2D array, 3D or cube, and also signed and unsigned.
static constexpr uint32_t kMaxTextureSRVIndexBits = 8;
static constexpr uint32_t kMaxTextureSRVs =
(1 << kMaxTextureSRVIndexBits) - 1;
struct TextureSRV {
uint32_t fetch_constant;
TextureDimension dimension;
bool is_signed;
// Whether this SRV must be bound even if it's signed and all components are
// unsigned and vice versa (for kGetTextureComputedLod).
bool is_sign_required;
std::string name;
};
// The first binding returned is at t1 because t0 is shared memory.
const TextureSRV* GetTextureSRVs(uint32_t& count_out) const {
count_out = uint32_t(texture_srvs_.size());
return texture_srvs_.data();
}
// Arbitrary limit - there can't be more than 2048 in a shader-visible
// descriptor heap, though some older hardware (tier 1 resource binding -
// Nvidia Fermi) doesn't support more than 16 samplers bound at once (we can't
// really do anything if a game uses more than 16), but just to have some
// limit so sampler count can easily be packed into 32-bit map keys (for
// instance, for root signatures). But shaders can specify overrides for
// filtering modes, and the number of possible combinations is huge - let's
// limit it to something sane.
static constexpr uint32_t kMaxSamplerBindingIndexBits = 7;
static constexpr uint32_t kMaxSamplerBindings =
(1 << kMaxSamplerBindingIndexBits) - 1;
struct SamplerBinding {
uint32_t fetch_constant;
TextureFilter mag_filter;
TextureFilter min_filter;
TextureFilter mip_filter;
AnisoFilter aniso_filter;
std::string name;
};
const SamplerBinding* GetSamplerBindings(uint32_t& count_out) const {
count_out = uint32_t(sampler_bindings_.size());
return sampler_bindings_.data();
}
// Returns the bits that need to be added to the RT flags constant - needs to
// be done externally, not in SetColorFormatConstants, because the flags
// contain other state.
static uint32_t GetColorFormatRTFlags(ColorRenderTargetFormat format);
static void SetColorFormatSystemConstants(SystemConstants& constants,
uint32_t rt_index,
ColorRenderTargetFormat format);
// Returns whether blending should be done at all (not 1 * src + 0 * dest).
static bool GetBlendConstants(uint32_t blend_control, uint32_t& blend_x_out,
uint32_t& blend_y_out);
// Creates a special pixel shader without color outputs - this resets the
// state of the translator.
std::vector<uint8_t> CreateDepthOnlyPixelShader();
protected:
void Reset() override;
void StartTranslation() override;
std::vector<uint8_t> CompleteTranslation() override;
void ProcessLabel(uint32_t cf_index) override;
void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override;
void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override;
void ProcessLoopStartInstruction(
const ParsedLoopStartInstruction& instr) override;
void ProcessLoopEndInstruction(
const ParsedLoopEndInstruction& instr) override;
void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override;
void ProcessVertexFetchInstruction(
const ParsedVertexFetchInstruction& instr) override;
void ProcessTextureFetchInstruction(
const ParsedTextureFetchInstruction& instr) override;
void ProcessAluInstruction(const ParsedAluInstruction& instr) override;
private:
enum : uint32_t {
kSysConst_Flags_Index = 0,
kSysConst_Flags_Vec = 0,
kSysConst_Flags_Comp = 0,
kSysConst_VertexIndexEndian_Index = kSysConst_Flags_Index + 1,
kSysConst_VertexIndexEndian_Vec = kSysConst_Flags_Vec,
kSysConst_VertexIndexEndian_Comp = 1,
kSysConst_VertexBaseIndex_Index = kSysConst_VertexIndexEndian_Index + 1,
kSysConst_VertexBaseIndex_Vec = kSysConst_Flags_Vec,
kSysConst_VertexBaseIndex_Comp = 2,
kSysConst_PixelPosReg_Index = kSysConst_VertexBaseIndex_Index + 1,
kSysConst_PixelPosReg_Vec = kSysConst_Flags_Vec,
kSysConst_PixelPosReg_Comp = 3,
kSysConst_NDCScale_Index = kSysConst_PixelPosReg_Index + 1,
kSysConst_NDCScale_Vec = kSysConst_Flags_Vec + 1,
kSysConst_NDCScale_Comp = 0,
kSysConst_PixelHalfPixelOffset_Index = kSysConst_NDCScale_Index + 1,
kSysConst_PixelHalfPixelOffset_Vec = kSysConst_NDCScale_Vec,
kSysConst_PixelHalfPixelOffset_Comp = 3,
kSysConst_NDCOffset_Index = kSysConst_PixelHalfPixelOffset_Index + 1,
kSysConst_NDCOffset_Vec = kSysConst_NDCScale_Vec + 1,
kSysConst_NDCOffset_Comp = 0,
kSysConst_AlphaTest_Index = kSysConst_NDCOffset_Index + 1,
kSysConst_AlphaTest_Vec = kSysConst_NDCOffset_Vec,
kSysConst_AlphaTest_Comp = 3,
kSysConst_PointSize_Index = kSysConst_AlphaTest_Index + 1,
kSysConst_PointSize_Vec = kSysConst_NDCOffset_Vec + 1,
kSysConst_PointSize_Comp = 0,
kSysConst_PointSizeMinMax_Index = kSysConst_PointSize_Index + 1,
kSysConst_PointSizeMinMax_Vec = kSysConst_PointSize_Vec,
kSysConst_PointSizeMinMax_Comp = 2,
kSysConst_PointScreenToNDC_Index = kSysConst_PointSizeMinMax_Index + 1,
kSysConst_PointScreenToNDC_Vec = kSysConst_PointSizeMinMax_Vec + 1,
kSysConst_PointScreenToNDC_Comp = 0,
kSysConst_SSAAInvScale_Index = kSysConst_PointScreenToNDC_Index + 1,
kSysConst_SSAAInvScale_Vec = kSysConst_PointScreenToNDC_Vec,
kSysConst_SSAAInvScale_Comp = 2,
kSysConst_AlphaTestRange_Index = kSysConst_SSAAInvScale_Index + 1,
kSysConst_AlphaTestRange_Vec = kSysConst_SSAAInvScale_Vec + 1,
kSysConst_AlphaTestRange_Comp = 0,
kSysConst_EDRAMPitchTiles_Index = kSysConst_AlphaTestRange_Index + 1,
kSysConst_EDRAMPitchTiles_Vec = kSysConst_AlphaTestRange_Vec,
kSysConst_EDRAMPitchTiles_Comp = 2,
kSysConst_EDRAMDepthBaseDwords_Index = kSysConst_EDRAMPitchTiles_Index + 1,
kSysConst_EDRAMDepthBaseDwords_Vec = kSysConst_AlphaTestRange_Vec,
kSysConst_EDRAMDepthBaseDwords_Comp = 3,
kSysConst_ColorExpBias_Index = kSysConst_EDRAMDepthBaseDwords_Index + 1,
kSysConst_ColorExpBias_Vec = kSysConst_EDRAMDepthBaseDwords_Vec + 1,
kSysConst_ColorOutputMap_Index = kSysConst_ColorExpBias_Index + 1,
kSysConst_ColorOutputMap_Vec = kSysConst_ColorExpBias_Vec + 1,
kSysConst_EDRAMStencilReference_Index = kSysConst_ColorOutputMap_Index + 1,
kSysConst_EDRAMStencilReference_Vec = kSysConst_ColorOutputMap_Vec + 1,
kSysConst_EDRAMStencilReference_Comp = 0,
kSysConst_EDRAMStencilReadMask_Index =
kSysConst_EDRAMStencilReference_Index + 1,
kSysConst_EDRAMStencilReadMask_Vec = kSysConst_EDRAMStencilReference_Vec,
kSysConst_EDRAMStencilReadMask_Comp = 1,
kSysConst_EDRAMStencilWriteMask_Index =
kSysConst_EDRAMStencilReadMask_Index + 1,
kSysConst_EDRAMStencilWriteMask_Vec = kSysConst_EDRAMStencilReference_Vec,
kSysConst_EDRAMStencilWriteMask_Comp = 2,
kSysConst_EDRAMStencilFront_Index =
kSysConst_EDRAMStencilWriteMask_Index + 1,
kSysConst_EDRAMStencilFront_Vec = kSysConst_EDRAMStencilWriteMask_Vec + 1,
kSysConst_EDRAMStencilBack_Index = kSysConst_EDRAMStencilFront_Index + 1,
kSysConst_EDRAMStencilBack_Vec = kSysConst_EDRAMStencilFront_Vec + 1,
// Components of stencil front and back.
kSysConst_EDRAMStencilSide_Fail_Comp = 0,
kSysConst_EDRAMStencilSide_DepthFail_Comp = 1,
kSysConst_EDRAMStencilSide_Pass_Comp = 2,
kSysConst_EDRAMStencilSide_Comparison_Comp = 3,
kSysConst_EDRAMBaseDwords_Index = kSysConst_EDRAMStencilBack_Index + 1,
kSysConst_EDRAMBaseDwords_Vec = kSysConst_EDRAMStencilBack_Vec + 1,
kSysConst_EDRAMRTFlags_Index = kSysConst_EDRAMBaseDwords_Index + 1,
kSysConst_EDRAMRTFlags_Vec = kSysConst_EDRAMBaseDwords_Vec + 1,
kSysConst_EDRAMRTPackWidthLow_Index = kSysConst_EDRAMRTFlags_Index + 1,
kSysConst_EDRAMRTPackWidthLow_Vec = kSysConst_EDRAMRTFlags_Vec + 1,
kSysConst_EDRAMRTPackOffsetLow_Index =
kSysConst_EDRAMRTPackWidthLow_Index + 1,
kSysConst_EDRAMRTPackOffsetLow_Vec = kSysConst_EDRAMRTPackWidthLow_Vec + 1,
kSysConst_EDRAMRTPackWidthHigh_Index =
kSysConst_EDRAMRTPackOffsetLow_Index + 1,
kSysConst_EDRAMRTPackWidthHigh_Vec = kSysConst_EDRAMRTPackOffsetLow_Vec + 1,
kSysConst_EDRAMRTPackOffsetHigh_Index =
kSysConst_EDRAMRTPackWidthHigh_Index + 1,
kSysConst_EDRAMRTPackOffsetHigh_Vec =
kSysConst_EDRAMRTPackWidthHigh_Vec + 1,
kSysConst_EDRAMLoadMaskRT01_Index =
kSysConst_EDRAMRTPackOffsetHigh_Index + 1,
kSysConst_EDRAMLoadMaskRT01_Vec = kSysConst_EDRAMRTPackOffsetHigh_Vec + 1,
kSysConst_EDRAMLoadMaskRT23_Index = kSysConst_EDRAMLoadMaskRT01_Index + 1,
kSysConst_EDRAMLoadMaskRT23_Vec = kSysConst_EDRAMLoadMaskRT01_Vec + 1,
kSysConst_EDRAMLoadScaleRT01_Index = kSysConst_EDRAMLoadMaskRT23_Index + 1,
kSysConst_EDRAMLoadScaleRT01_Vec = kSysConst_EDRAMLoadMaskRT23_Vec + 1,
kSysConst_EDRAMLoadScaleRT23_Index = kSysConst_EDRAMLoadScaleRT01_Index + 1,
kSysConst_EDRAMLoadScaleRT23_Vec = kSysConst_EDRAMLoadScaleRT01_Vec + 1,
kSysConst_EDRAMBlendRT01_Index = kSysConst_EDRAMLoadScaleRT23_Index + 1,
kSysConst_EDRAMBlendRT01_Vec = kSysConst_EDRAMLoadScaleRT23_Vec + 1,
kSysConst_EDRAMBlendRT23_Index = kSysConst_EDRAMBlendRT01_Index + 1,
kSysConst_EDRAMBlendRT23_Vec = kSysConst_EDRAMBlendRT01_Vec + 1,
kSysConst_EDRAMBlendConstant_Index = kSysConst_EDRAMBlendRT23_Index + 1,
kSysConst_EDRAMBlendConstant_Vec = kSysConst_EDRAMBlendRT23_Vec + 1,
kSysConst_EDRAMStoreMinRT01_Index = kSysConst_EDRAMBlendConstant_Index + 1,
kSysConst_EDRAMStoreMinRT01_Vec = kSysConst_EDRAMBlendConstant_Vec + 1,
kSysConst_EDRAMStoreMinRT23_Index = kSysConst_EDRAMStoreMinRT01_Index + 1,
kSysConst_EDRAMStoreMinRT23_Vec = kSysConst_EDRAMStoreMinRT01_Vec + 1,
kSysConst_EDRAMStoreMaxRT01_Index = kSysConst_EDRAMStoreMinRT23_Index + 1,
kSysConst_EDRAMStoreMaxRT01_Vec = kSysConst_EDRAMStoreMinRT23_Vec + 1,
kSysConst_EDRAMStoreMaxRT23_Index = kSysConst_EDRAMStoreMaxRT01_Index + 1,
kSysConst_EDRAMStoreMaxRT23_Vec = kSysConst_EDRAMStoreMaxRT01_Vec + 1,
kSysConst_EDRAMStoreScaleRT01_Index = kSysConst_EDRAMStoreMaxRT23_Index + 1,
kSysConst_EDRAMStoreScaleRT01_Vec = kSysConst_EDRAMStoreMaxRT23_Vec + 1,
kSysConst_EDRAMStoreScaleRT23_Index =
kSysConst_EDRAMStoreScaleRT01_Index + 1,
kSysConst_EDRAMStoreScaleRT23_Vec = kSysConst_EDRAMStoreScaleRT01_Vec + 1,
kSysConst_Count = kSysConst_EDRAMStoreScaleRT23_Index + 1
};
static constexpr uint32_t kInterpolatorCount = 16;
static constexpr uint32_t kPointParametersTexCoord = kInterpolatorCount;
enum class InOutRegister : uint32_t {
// IF ANY OF THESE ARE CHANGED, WriteInputSignature and WriteOutputSignature
// MUST BE UPDATED!
kVSInVertexIndex = 0,
kVSOutInterpolators = 0,
kVSOutPointParameters = kVSOutInterpolators + kInterpolatorCount,
kVSOutPosition,
kPSInInterpolators = 0,
kPSInPointParameters = kPSInInterpolators + kInterpolatorCount,
kPSInPosition,
kPSInFrontFace,
};
static constexpr uint32_t kSwizzleXYZW = 0b11100100;
static constexpr uint32_t kSwizzleXXXX = 0b00000000;
static constexpr uint32_t kSwizzleYYYY = 0b01010101;
static constexpr uint32_t kSwizzleZZZZ = 0b10101010;
static constexpr uint32_t kSwizzleWWWW = 0b11111111;
// Operand encoding, with 32-bit immediate indices by default. None of the
// arguments must be shifted when calling.
static constexpr uint32_t EncodeScalarOperand(
uint32_t type, uint32_t index_dimension,
uint32_t index_representation_0 = 0, uint32_t index_representation_1 = 0,
uint32_t index_representation_2 = 0) {
// D3D10_SB_OPERAND_1_COMPONENT.
return 1 | (type << 12) | (index_dimension << 20) |
(index_representation_0 << 22) | (index_representation_1 << 25) |
(index_representation_0 << 28);
}
// For writing to vectors. Mask literal can be written as 0bWZYX.
static constexpr uint32_t EncodeVectorMaskedOperand(
uint32_t type, uint32_t mask, uint32_t index_dimension,
uint32_t index_representation_0 = 0, uint32_t index_representation_1 = 0,
uint32_t index_representation_2 = 0) {
// D3D10_SB_OPERAND_4_COMPONENT, D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE.
return 2 | (0 << 2) | (mask << 4) | (type << 12) | (index_dimension << 20) |
(index_representation_0 << 22) | (index_representation_1 << 25) |
(index_representation_2 << 28);
}
// For reading from vectors. Swizzle can be written as 0bWWZZYYXX.
static constexpr uint32_t EncodeVectorSwizzledOperand(
uint32_t type, uint32_t swizzle, uint32_t index_dimension,
uint32_t index_representation_0 = 0, uint32_t index_representation_1 = 0,
uint32_t index_representation_2 = 0) {
// D3D10_SB_OPERAND_4_COMPONENT, D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE.
return 2 | (1 << 2) | (swizzle << 4) | (type << 12) |
(index_dimension << 20) | (index_representation_0 << 22) |
(index_representation_1 << 25) | (index_representation_2 << 28);
}
// For reading a single component of a vector as a 4-component vector.
static constexpr uint32_t EncodeVectorReplicatedOperand(
uint32_t type, uint32_t component, uint32_t index_dimension,
uint32_t index_representation_0 = 0, uint32_t index_representation_1 = 0,
uint32_t index_representation_2 = 0) {
// D3D10_SB_OPERAND_4_COMPONENT, D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE.
return 2 | (1 << 2) | (component << 4) | (component << 6) |
(component << 8) | (component << 10) | (type << 12) |
(index_dimension << 20) | (index_representation_0 << 22) |
(index_representation_1 << 25) | (index_representation_2 << 28);
}
// For reading scalars from vectors.
static constexpr uint32_t EncodeVectorSelectOperand(
uint32_t type, uint32_t component, uint32_t index_dimension,
uint32_t index_representation_0 = 0, uint32_t index_representation_1 = 0,
uint32_t index_representation_2 = 0) {
// D3D10_SB_OPERAND_4_COMPONENT, D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE.
return 2 | (2 << 2) | (component << 4) | (type << 12) |
(index_dimension << 20) | (index_representation_0 << 22) |
(index_representation_1 << 25) | (index_representation_2 << 28);
}
// Use these instead of is_vertex_shader/is_pixel_shader because they don't
// take is_depth_only_pixel_shader_ into account.
inline bool IsDXBCVertexShader() const {
return !is_depth_only_pixel_shader_ && is_vertex_shader();
}
inline bool IsDXBCPixelShader() const {
return is_depth_only_pixel_shader_ || is_pixel_shader();
}
// Allocates a new r# register for internal use and returns its index.
uint32_t PushSystemTemp(bool zero = false);
// Frees the last allocated internal r# registers for later reuse.
void PopSystemTemp(uint32_t count = 1);
// Whether general-purpose register values should be stored in x0 rather than
// r# in this shader.
bool IndexableGPRsUsed() const;
// Writing the prologue.
void StartVertexShader_LoadVertexIndex();
void StartVertexShader();
void StartPixelShader();
// Writing the epilogue.
void CompleteVertexShader();
// Converts the depth in system_temp_depth_.x to 24-bit unorm or float,
// depending on the flag value. Uses system_temp_depth_.yz as scratch - w not
// touched.
void CompletePixelShader_DepthTo24Bit();
// This just converts the color output value from/to gamma space, not checking
// any conditions.
void CompletePixelShader_GammaCorrect(uint32_t color_temp, bool to_gamma);
void CompletePixelShader_WriteToRTVs();
// Extracts widths and offsets of the components in the lower or the upper
// dword of a pixel from the format constants, for use as ibfe and bfi
// operands later.
void CompletePixelShader_WriteToROV_ExtractPackLayout(uint32_t rt_index,
bool high,
uint32_t width_temp,
uint32_t offset_temp);
// Components of rt_format_flags_temp.
enum : uint32_t {
kROVRTFormatFlagTemp_ColorFixed,
kROVRTFormatFlagTemp_AlphaFixed,
kROVRTFormatFlagTemp_Float10,
kROVRTFormatFlagTemp_Float16,
kROVRTFormatFlagTemp_Fixed_Swizzle =
kROVRTFormatFlagTemp_ColorFixed * 0b00010101 +
kROVRTFormatFlagTemp_AlphaFixed * 0b01000000,
};
void CompletePixelShader_WriteToROV_LoadColor(
uint32_t edram_dword_offset_low_temp,
uint32_t edram_dword_offset_high_temp, uint32_t rt_index,
uint32_t rt_format_flags_temp, uint32_t target_temp);
// Clamps the color to the range representable by the render target's format.
// Will also remove NaN since min and max return the non-NaN value.
// color_in_temp and color_out_temp may be the same.
void CompletePixelShader_WriteToROV_ClampColor(uint32_t rt_index,
uint32_t color_in_temp,
uint32_t color_out_temp);
// Extracts 0.0 or plus/minus 1.0 from a blend constant. For example, it can
// be used to extract one scale for color and alpha into XY, and another scale
// for color and alpha into ZW. constant_swizzle is a bit mask indicating
// which part of the blend constant for the render target to extract the scale
// from, 0b00000000 for X/Z only, 0b01010101 for Y/W only, 0b00000001 for X/Z
// in the first component, Y/W in the rest (XY changed to ZW automatically
// according to the render target index - don't set the higher bit).
void CompletePixelShader_WriteToROV_ExtractBlendScales(
uint32_t rt_index, uint32_t constant_swizzle, bool is_signed,
uint32_t shift_x, uint32_t shift_y, uint32_t shift_z, uint32_t shift_w,
uint32_t target_temp, uint32_t write_mask = 0b1111);
void CompletePixelShader_WriteToROV_ApplyZeroBlendScale(
uint32_t scale_temp, uint32_t scale_swizzle, uint32_t factor_in_temp,
uint32_t factor_swizzle, uint32_t factor_out_temp,
uint32_t write_mask = 0b1111);
void CompletePixelShader_WriteToROV_Blend(uint32_t rt_index,
uint32_t rt_format_flags_temp,
uint32_t src_color_and_output_temp,
uint32_t dest_color_temp);
// Assumes the incoming color is already clamped to the range representable by
// the RT format.
void CompletePixelShader_WriteToROV_StoreColor(
uint32_t edram_dword_offset_low_temp,
uint32_t edram_dword_offset_high_temp, uint32_t rt_index,
uint32_t rt_format_flags_temp, uint32_t source_and_scratch_temp);
void CompletePixelShader_WriteToROV();
void CompletePixelShader();
void CompleteShaderCode();
// Writes the original instruction disassembly in the output DXBC if enabled,
// as shader messages, from instruction_disassembly_buffer_.
void EmitInstructionDisassembly();
// Abstract 4-component vector source operand.
struct DxbcSourceOperand {
enum class Type {
// GPR number in the index - used only when GPRs are not dynamically
// indexed in the shader and there are no constant zeros and ones in the
// swizzle.
kRegister,
// Immediate: float constant vector number in the index.
// Dynamic: intermediate X contains page number, intermediate Y contains
// vector number in the page.
kConstantFloat,
// The whole value preloaded to the intermediate register - used for GPRs
// when they are indexable, for bool/loop constants pre-converted to
// float, and for other operands if their swizzle contains 0 or 1.
kIntermediateRegister,
// Literal vector of zeros and positive or negative ones - when the
// swizzle contains only them, or when the parsed operand is invalid (for
// example, if it's a fetch constant in a non-tfetch texture instruction).
// 0 or 1 specified in the index as bits, can be negated.
kZerosOnes,
};
Type type;
uint32_t index;
// If the operand is dynamically indexed directly when it's used as an
// operand in DXBC instructions.
InstructionStorageAddressingMode addressing_mode;
uint32_t swizzle;
bool is_negated;
bool is_absolute_value;
// Temporary register containing data required to access the value if it has
// to be accessed in multiple operations (allocated with PushSystemTemp).
uint32_t intermediate_register;
static constexpr uint32_t kIntermediateRegisterNone = UINT32_MAX;
};
// Each Load must be followed by Unload, otherwise there may be a temporary
// register leak.
void LoadDxbcSourceOperand(const InstructionOperand& operand,
DxbcSourceOperand& dxbc_operand);
// Number of tokens this operand adds to the instruction length when used.
uint32_t DxbcSourceOperandLength(const DxbcSourceOperand& operand,
bool negate = false,
bool absolute = false) const;
// Writes the operand access tokens to the instruction (either for a scalar if
// select_component is <= 3, or for a vector).
void UseDxbcSourceOperand(const DxbcSourceOperand& operand,
uint32_t additional_swizzle = kSwizzleXYZW,
uint32_t select_component = 4, bool negate = false,
bool absolute = false);
void UnloadDxbcSourceOperand(const DxbcSourceOperand& operand);
// Writes xyzw or xxxx of the specified r# to the destination.
void StoreResult(const InstructionResult& result, uint32_t reg,
bool replicate_x);
// The nesting of `if` instructions is the following:
// - pc checks (labels).
// - exec predicate/bool constant check.
// - Instruction-level predicate checks.
// As an optimization, where possible, the DXBC translator tries to merge
// multiple execs into one, not creating endif/if doing nothing, if the
// execution condition is the same. This can't be done across labels
// (obviously) and in case `setp` is done in a predicated exec - in this case,
// the predicate value in the current exec may not match the predicate value
// in the next exec.
// Instruction-level predicate checks are also merged, and until a `setp` is
// done, if the instruction has the same predicate condition as the exec it is
// in, no instruction-level predicate `if` is created as well. One exception
// to the usual way of instruction-level predicate handling is made for
// instructions involving derivative computation, such as texture fetches with
// computed LOD. The part involving derivatives is executed disregarding the
// predication, but the result storing is predicated (this is handled in
// texture fetch instruction implementation):
// https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/dx9-graphics-reference-asm-ps-registers-output-color
// Updates the current flow control condition (to be called in the beginning
// of exec and in jumps), closing the previous conditionals if needed.
// However, if the condition is not different, the instruction-level predicate
// `if` also won't be closed - this must be checked separately if needed (for
// example, in jumps). If emit_disassembly is true, this function emits the
// last disassembly written to instruction_disassembly_buffer_ after closing
// the previous conditional and before opening a new one.
void UpdateExecConditionals(ParsedExecInstruction::Type type,
uint32_t bool_constant_index, bool condition,
bool emit_disassembly);
// Closes `if`s opened by exec and instructions within them (but not by
// labels) and updates the state accordingly.
void CloseExecConditionals();
// Opens or reopens the predicate check conditional for the instruction. If
// emit_disassembly is true, this function emits the last disassembly written
// to instruction_disassembly_buffer_ after closing the previous predicate
// conditional and before opening a new one.
void UpdateInstructionPredication(bool predicated, bool condition,
bool emit_disassembly);
// Closes the instruction-level predicate `if` if it's open, useful if a flow
// control instruction needs to do some code which needs to respect the exec's
// conditional, but can't itself be predicated.
void CloseInstructionPredication();
void JumpToLabel(uint32_t address);
// Emits copde for endian swapping of the data located in pv.
void SwapVertexData(uint32_t vfetch_index, uint32_t write_mask);
// Returns T#/t# index (they are the same in this translator).
uint32_t FindOrAddTextureSRV(uint32_t fetch_constant,
TextureDimension dimension, bool is_signed,
bool is_sign_required = false);
// Returns S#/s# index (they are the same in this translator).
uint32_t FindOrAddSamplerBinding(uint32_t fetch_constant,
TextureFilter mag_filter,
TextureFilter min_filter,
TextureFilter mip_filter,
AnisoFilter aniso_filter);
// Converts (S, T, face index) in the specified temporary register to a 3D
// cubemap coordinate.
void ArrayCoordToCubeDirection(uint32_t reg);
void ProcessVectorAluInstruction(const ParsedAluInstruction& instr);
void ProcessScalarAluInstruction(const ParsedAluInstruction& instr);
// Appends a string to a DWORD stream, returns the DWORD-aligned length.
static uint32_t AppendString(std::vector<uint32_t>& dest, const char* source);
// Returns the length of a string as if it was appended to a DWORD stream, in
// bytes.
static inline uint32_t GetStringLength(const char* source) {
return uint32_t(xe::align(std::strlen(source) + 1, sizeof(uint32_t)));
}
void WriteResourceDefinitions();
void WriteInputSignature();
void WriteOutputSignature();
void WriteShaderCode();
// Executable instructions - generated during translation.
std::vector<uint32_t> shader_code_;
// Complete shader object, with all the needed chunks and dcl_ instructions -
// generated in the end of translation.
std::vector<uint32_t> shader_object_;
// Buffer for instruction disassembly comments.
StringBuffer instruction_disassembly_buffer_;
// Whether the output merger should be emulated in pixel shaders.
bool edram_rov_used_;
// Is currently writing the empty depth-only pixel shader, for
// CompleteTranslation.
bool is_depth_only_pixel_shader_;
// Data types used in constants buffers. Listed in dependency order.
enum class RdefTypeIndex {
kFloat,
kFloat2,
kFloat3,
kFloat4,
kInt,
kUint,
kUint4,
// Float constants - size written dynamically.
kFloat4ConstantArray,
// Bool constants.
kUint4Array8,
// Loop constants.
kUint4Array32,
// Fetch constants.
kUint4Array48,
kCount,
kUnknown = kCount
};
struct RdefStructMember {
const char* name;
RdefTypeIndex type;
uint32_t offset;
};
struct RdefType {
// Name ignored for arrays.
const char* name;
// D3D10_SHADER_VARIABLE_CLASS.
uint32_t type_class;
// D3D10_SHADER_VARIABLE_TYPE.
uint32_t type;
uint32_t row_count;
uint32_t column_count;
// 0 for primitive types, 1 for structures, array size for arrays.
uint32_t element_count;
uint32_t struct_member_count;
RdefTypeIndex array_element_type;
const RdefStructMember* struct_members;
};
static const RdefType rdef_types_[size_t(RdefTypeIndex::kCount)];
// Number of constant buffer bindings used in this shader - also used for
// generation of indices of constant buffers that are optional.
uint32_t cbuffer_count_;
static constexpr uint32_t kCbufferIndexUnallocated = UINT32_MAX;
uint32_t cbuffer_index_system_constants_;
uint32_t cbuffer_index_float_constants_;
uint32_t cbuffer_index_bool_loop_constants_;
uint32_t cbuffer_index_fetch_constants_;
struct SystemConstantRdef {
const char* name;
RdefTypeIndex type;
uint32_t offset;
uint32_t size;
};
static const SystemConstantRdef system_constant_rdef_[kSysConst_Count];
// Mask of system constants (1 << kSysConst_#_Index) used in the shader, so
// the remaining ones can be marked as unused in RDEF.
uint64_t system_constants_used_;
// Whether constants are dynamically indexed and need to be marked as such in
// dcl_constantBuffer.
bool float_constants_dynamic_indexed_;
bool bool_loop_constants_dynamic_indexed_;
// Offsets of float constant indices in shader_code_, for remapping in
// CompleteTranslation (initially, at these offsets, guest float constant
// indices are written).
std::vector<uint32_t> float_constant_index_offsets_;
// Number of currently allocated Xenia internal r# registers.
uint32_t system_temp_count_current_;
// Total maximum number of temporary registers ever used during this
// translation (for the declaration).
uint32_t system_temp_count_max_;
// Vector ALU result/scratch (since Xenos write masks can contain swizzles).
uint32_t system_temp_pv_;
// Temporary register ID for previous scalar result, program counter,
// predicate and absolute address register.
uint32_t system_temp_ps_pc_p0_a0_;
// Loop index stack - .x is the active loop, shifted right to .yzw on push.
uint32_t system_temp_aL_;
// Loop counter stack, .x is the active loop. Represents number of times
// remaining to loop.
uint32_t system_temp_loop_count_;
// Explicitly set texture gradients and LOD.
uint32_t system_temp_grad_h_lod_;
uint32_t system_temp_grad_v_;
// Position in vertex shaders (because viewport and W transformations can be
// applied in the end of the shader).
uint32_t system_temp_position_;
// Color outputs in pixel shaders (because of exponent bias, alpha test and
// remapping).
uint32_t system_temp_color_[4];
// Whether the color output has been written in the execution path (ROV only).
uint32_t system_temp_color_written_;
// Depth output in pixel shader, and 3 dwords usable as scratch for operations
// related to depth. Currently only used for ROV depth.
// TODO(Triang3l): Reduce depth to 24-bit in pixel shaders when using a DSV
// for accuracy.
uint32_t system_temp_depth_;
// The bool constant number containing the condition for the currently
// processed exec (or the last - unless a label has reset this), or
// kCfExecBoolConstantNone if it's not checked.
uint32_t cf_exec_bool_constant_;
static constexpr uint32_t kCfExecBoolConstantNone = UINT32_MAX;
// The expected bool constant value in the current exec if
// cf_exec_bool_constant_ is not kCfExecBoolConstantNone.
bool cf_exec_bool_constant_condition_;
// Whether the currently processed exec is executed if a predicate is
// set/unset.
bool cf_exec_predicated_;
// The expected predicated condition if cf_exec_predicated_ is true.
bool cf_exec_predicate_condition_;
// Whether an `if` for instruction-level predicate check is currently open.
bool cf_instruction_predicate_if_open_;
// The expected predicate condition for the current or the last instruction if
// cf_exec_instruction_predicated_ is true.
bool cf_instruction_predicate_condition_;
// Whether there was a `setp` in the current exec before the current
// instruction, thus instruction-level predicate value can be different than
// the exec-level predicate value, and can't merge two execs with the same
// predicate condition anymore.
bool cf_exec_predicate_written_;
bool writes_depth_;
std::vector<TextureSRV> texture_srvs_;
std::vector<SamplerBinding> sampler_bindings_;
// The STAT chunk (based on Wine d3dcompiler_parse_stat).
struct Statistics {
uint32_t instruction_count;
uint32_t temp_register_count;
// Unknown in Wine.
uint32_t def_count;
// Only inputs and outputs.
uint32_t dcl_count;
uint32_t float_instruction_count;
uint32_t int_instruction_count;
uint32_t uint_instruction_count;
// endif, ret.
uint32_t static_flow_control_count;
// if (but not else).
uint32_t dynamic_flow_control_count;
// Unknown in Wine.
uint32_t macro_instruction_count;
uint32_t temp_array_count;
uint32_t array_instruction_count;
uint32_t cut_instruction_count;
uint32_t emit_instruction_count;
uint32_t texture_normal_instructions;
uint32_t texture_load_instructions;
uint32_t texture_comp_instructions;
uint32_t texture_bias_instructions;
uint32_t texture_gradient_instructions;
// Not including indexable temp load/store.
uint32_t mov_instruction_count;
// Unknown in Wine.
uint32_t movc_instruction_count;
uint32_t conversion_instruction_count;
// Unknown in Wine.
uint32_t unknown_22;
uint32_t input_primitive;
uint32_t gs_output_topology;
uint32_t gs_max_output_vertex_count;
uint32_t unknown_26;
// Unknown in Wine, but confirmed by testing.
uint32_t lod_instructions;
uint32_t unknown_28;
uint32_t unknown_29;
uint32_t c_control_points;
uint32_t hs_output_primitive;
uint32_t hs_partitioning;
uint32_t tessellator_domain;
// Unknown in Wine.
uint32_t c_barrier_instructions;
// Unknown in Wine.
uint32_t c_interlocked_instructions;
// Unknown in Wine, but confirmed by testing.
uint32_t c_texture_store_instructions;
};
Statistics stat_;
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_DXBC_SHADER_TRANSLATOR_H_