[DXBC/GPU] vfetch to new codegen, signed vfetch offset, fix AND with system flags

This commit is contained in:
Triang3l 2020-05-23 17:14:14 +03:00
parent e80115020e
commit 3879ff29b3
7 changed files with 411 additions and 685 deletions

View File

@ -722,7 +722,7 @@ void DxbcShaderTranslator::StartPixelShader() {
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_Flags_Vec)
.Select(kSysConst_Flags_Comp),
DxbcSrc::LU(kSysFlag_PrimitiveTwoFaced_Shift));
DxbcSrc::LU(kSysFlag_PrimitiveTwoFaced));
DxbcOpIf(true, DxbcSrc::R(param_gen_temp, DxbcSrc::kZZZZ));
{
// Negate modifier flips the sign bit even for 0 - set it to minus for
@ -1041,7 +1041,7 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
{
// Extract the killing condition.
DxbcOpAnd(temp_x_dest, flags_src,
DxbcSrc::LU(kSysFlag_KillIfAnyVertexKilled_Shift));
DxbcSrc::LU(kSysFlag_KillIfAnyVertexKilled));
DxbcOpIf(true, temp_x_src);
{
// Kill the primitive if any vertex is killed - write NaN to position.

View File

@ -1100,10 +1100,12 @@ class DxbcShaderTranslator : public ShaderTranslator {
kIAdd = 30,
kIf = 31,
kIEq = 32,
kIGE = 33,
kILT = 34,
kIMAd = 35,
kIMax = 36,
kIMin = 37,
kIMul = 38,
kINE = 39,
kIShL = 41,
kIToF = 43,
@ -1151,6 +1153,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
kBFI = 140,
kLdUAVTyped = 163,
kStoreUAVTyped = 164,
kLdRaw = 165,
kStoreRaw = 166,
kEvalSampleIndex = 204,
};
@ -1410,6 +1413,11 @@ class DxbcShaderTranslator : public ShaderTranslator {
DxbcEmitAluOp(DxbcOpcode::kIEq, 0b11, dest, src0, src1);
++stat_.int_instruction_count;
}
void DxbcOpIGE(const DxbcDest& dest, const DxbcSrc& src0,
const DxbcSrc& src1) {
DxbcEmitAluOp(DxbcOpcode::kIGE, 0b11, dest, src0, src1);
++stat_.int_instruction_count;
}
void DxbcOpILT(const DxbcDest& dest, const DxbcSrc& src0,
const DxbcSrc& src1) {
DxbcEmitAluOp(DxbcOpcode::kILT, 0b11, dest, src0, src1);
@ -1430,6 +1438,11 @@ class DxbcShaderTranslator : public ShaderTranslator {
DxbcEmitAluOp(DxbcOpcode::kIMin, 0b11, dest, src0, src1);
++stat_.int_instruction_count;
}
void DxbcOpIMul(const DxbcDest& dest_hi, const DxbcDest& dest_lo,
const DxbcSrc& src0, const DxbcSrc& src1) {
DxbcEmitAluOp(DxbcOpcode::kIMul, 0b11, dest_hi, dest_lo, src0, src1);
++stat_.int_instruction_count;
}
void DxbcOpINE(const DxbcDest& dest, const DxbcSrc& src0,
const DxbcSrc& src1) {
DxbcEmitAluOp(DxbcOpcode::kINE, 0b11, dest, src0, src1);
@ -1694,6 +1707,33 @@ class DxbcShaderTranslator : public ShaderTranslator {
++stat_.instruction_count;
++stat_.c_texture_store_instructions;
}
void DxbcOpLdRaw(const DxbcDest& dest, const DxbcSrc& byte_offset,
const DxbcSrc& src) {
// For Load, FXC emits code for writing to any component of the destination,
// with xxxx swizzle of the source SRV/UAV.
// For Load2/Load3/Load4, it's xy/xyz/xyzw write mask and xyxx/xyzx/xyzw
// swizzle.
uint32_t dest_write_mask = dest.GetMask();
assert_true(dest_write_mask == 0b0001 || dest_write_mask == 0b0010 ||
dest_write_mask == 0b0100 || dest_write_mask == 0b1000 ||
dest_write_mask == 0b0011 || dest_write_mask == 0b0111 ||
dest_write_mask == 0b1111);
uint32_t component_count = xe::bit_count(dest_write_mask);
assert_true((src.swizzle_ & ((1 << (component_count * 2)) - 1)) ==
(DxbcSrc::kXYZW & ((1 << (component_count * 2)) - 1)));
uint32_t src_mask = (1 << component_count) - 1;
uint32_t operands_length = dest.GetLength() +
byte_offset.GetLength(0b0000) +
src.GetLength(src_mask, true);
shader_code_.reserve(shader_code_.size() + 1 + operands_length);
shader_code_.push_back(
DxbcOpcodeToken(DxbcOpcode::kLdRaw, operands_length));
dest.Write(shader_code_);
byte_offset.Write(shader_code_, true, 0b0000);
src.Write(shader_code_, true, src_mask, true);
++stat_.instruction_count;
++stat_.texture_load_instructions;
}
void DxbcOpStoreRaw(const DxbcDest& dest, const DxbcSrc& byte_offset,
const DxbcSrc& value) {
uint32_t dest_write_mask = dest.GetMask();
@ -2228,9 +2268,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
void CloseInstructionPredication();
void JumpToLabel(uint32_t address);
// Emits copde for endian swapping of the data located in pv.
void SwapVertexData(uint32_t vfetch_index, uint32_t write_mask);
// Returns index in texture_srvs_, and, for bound textures, it's also relative
// to the base T#/t# index of textures.
uint32_t FindOrAddTextureSRV(uint32_t fetch_constant,

File diff suppressed because it is too large Load Diff

View File

@ -423,13 +423,14 @@ struct ParsedVertexFetchInstruction {
struct Attributes {
VertexFormat data_format = VertexFormat::kUndefined;
int offset = 0;
int stride = 0; // In dwords.
int exp_adjust = 0;
int32_t offset = 0;
uint32_t stride = 0; // In dwords.
int32_t exp_adjust = 0;
// Prefetch count minus 1.
uint32_t prefetch_count = 0;
bool is_index_rounded = false;
bool is_signed = false;
bool is_integer = false;
int prefetch_count = 0;
};
// Attributes describing the fetch operation.
Attributes attributes;

View File

@ -989,10 +989,10 @@ void ShaderTranslator::ParseVertexFetchInstruction(
i.attributes.offset = op.offset();
i.attributes.stride = full_op.stride();
i.attributes.exp_adjust = op.exp_adjust();
i.attributes.prefetch_count = op.prefetch_count();
i.attributes.is_index_rounded = op.is_index_rounded();
i.attributes.is_signed = op.is_signed();
i.attributes.is_integer = !op.is_normalized();
i.attributes.prefetch_count = op.prefetch_count();
// Store for later use by mini fetches.
if (!op.is_mini_fetch()) {

View File

@ -506,10 +506,10 @@ struct VertexFetchInstruction {
bool is_signed() const { return data_.fomat_comp_all == 1; }
bool is_normalized() const { return data_.num_format_all == 0; }
bool is_index_rounded() const { return data_.is_index_rounded == 1; }
// Dword stride, [0-255].
// Dword stride, [0, 255].
uint32_t stride() const { return data_.stride; }
// Dword offset, [
uint32_t offset() const { return data_.offset; }
// Dword offset, [-4194304, 4194303].
int32_t offset() const { return data_.offset; }
void AssignFromFull(const VertexFetchInstruction& full) {
data_.stride = full.data_.stride;
@ -528,6 +528,7 @@ struct VertexFetchInstruction {
uint32_t must_be_one : 1;
uint32_t const_index : 5;
uint32_t const_index_sel : 2;
// Prefetch count minus 1.
uint32_t prefetch_count : 3;
uint32_t src_swiz : 2;
});
@ -545,7 +546,7 @@ struct VertexFetchInstruction {
});
XEPACKEDSTRUCTANONYMOUS({
uint32_t stride : 8;
uint32_t offset : 23;
int32_t offset : 23;
uint32_t pred_condition : 1;
});
});
@ -1375,6 +1376,7 @@ constexpr uint32_t GetAluVectorOpNeededSourceComponents(
AluVectorOpcode vector_opcode, uint32_t src_index,
uint32_t used_result_components) {
assert_not_zero(src_index);
assert_zero(used_result_components & ~uint32_t(0b1111));
uint32_t components = used_result_components;
switch (vector_opcode) {
case AluVectorOpcode::kDp4:

View File

@ -401,6 +401,43 @@ inline int GetVertexFormatSizeInWords(VertexFormat format) {
}
}
inline uint32_t GetVertexFormatNeededWords(VertexFormat format,
uint32_t used_components) {
assert_zero(used_components & ~uint32_t(0b1111));
if (!used_components) {
return 0;
}
switch (format) {
case VertexFormat::k_8_8_8_8:
case VertexFormat::k_2_10_10_10:
return 0b0001;
case VertexFormat::k_10_11_11:
case VertexFormat::k_11_11_10:
return (used_components & 0b0111) ? 0b0001 : 0b0000;
case VertexFormat::k_16_16:
case VertexFormat::k_16_16_FLOAT:
return (used_components & 0b0011) ? 0b0001 : 0b0000;
case VertexFormat::k_16_16_16_16:
case VertexFormat::k_16_16_16_16_FLOAT:
return ((used_components & 0b0011) ? 0b0001 : 0b0000) |
((used_components & 0b1100) ? 0b0010 : 0b0000);
case VertexFormat::k_32:
case VertexFormat::k_32_FLOAT:
return used_components & 0b0001;
case VertexFormat::k_32_32:
case VertexFormat::k_32_32_FLOAT:
return used_components & 0b0011;
case VertexFormat::k_32_32_32_32:
case VertexFormat::k_32_32_32_32_FLOAT:
return used_components;
case VertexFormat::k_32_32_32_FLOAT:
return used_components & 0b0111;
default:
assert_unhandled_case(format);
return 0b0000;
}
}
enum class CompareFunction : uint32_t {
kNever = 0b000,
kLess = 0b001,