[DXBC/GPU] vfetch to new codegen, signed vfetch offset, fix AND with system flags
This commit is contained in:
parent
e80115020e
commit
3879ff29b3
|
@ -722,7 +722,7 @@ void DxbcShaderTranslator::StartPixelShader() {
|
||||||
uint32_t(CbufferRegister::kSystemConstants),
|
uint32_t(CbufferRegister::kSystemConstants),
|
||||||
kSysConst_Flags_Vec)
|
kSysConst_Flags_Vec)
|
||||||
.Select(kSysConst_Flags_Comp),
|
.Select(kSysConst_Flags_Comp),
|
||||||
DxbcSrc::LU(kSysFlag_PrimitiveTwoFaced_Shift));
|
DxbcSrc::LU(kSysFlag_PrimitiveTwoFaced));
|
||||||
DxbcOpIf(true, DxbcSrc::R(param_gen_temp, DxbcSrc::kZZZZ));
|
DxbcOpIf(true, DxbcSrc::R(param_gen_temp, DxbcSrc::kZZZZ));
|
||||||
{
|
{
|
||||||
// Negate modifier flips the sign bit even for 0 - set it to minus for
|
// Negate modifier flips the sign bit even for 0 - set it to minus for
|
||||||
|
@ -1041,7 +1041,7 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
|
||||||
{
|
{
|
||||||
// Extract the killing condition.
|
// Extract the killing condition.
|
||||||
DxbcOpAnd(temp_x_dest, flags_src,
|
DxbcOpAnd(temp_x_dest, flags_src,
|
||||||
DxbcSrc::LU(kSysFlag_KillIfAnyVertexKilled_Shift));
|
DxbcSrc::LU(kSysFlag_KillIfAnyVertexKilled));
|
||||||
DxbcOpIf(true, temp_x_src);
|
DxbcOpIf(true, temp_x_src);
|
||||||
{
|
{
|
||||||
// Kill the primitive if any vertex is killed - write NaN to position.
|
// Kill the primitive if any vertex is killed - write NaN to position.
|
||||||
|
|
|
@ -1100,10 +1100,12 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
kIAdd = 30,
|
kIAdd = 30,
|
||||||
kIf = 31,
|
kIf = 31,
|
||||||
kIEq = 32,
|
kIEq = 32,
|
||||||
|
kIGE = 33,
|
||||||
kILT = 34,
|
kILT = 34,
|
||||||
kIMAd = 35,
|
kIMAd = 35,
|
||||||
kIMax = 36,
|
kIMax = 36,
|
||||||
kIMin = 37,
|
kIMin = 37,
|
||||||
|
kIMul = 38,
|
||||||
kINE = 39,
|
kINE = 39,
|
||||||
kIShL = 41,
|
kIShL = 41,
|
||||||
kIToF = 43,
|
kIToF = 43,
|
||||||
|
@ -1151,6 +1153,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
kBFI = 140,
|
kBFI = 140,
|
||||||
kLdUAVTyped = 163,
|
kLdUAVTyped = 163,
|
||||||
kStoreUAVTyped = 164,
|
kStoreUAVTyped = 164,
|
||||||
|
kLdRaw = 165,
|
||||||
kStoreRaw = 166,
|
kStoreRaw = 166,
|
||||||
kEvalSampleIndex = 204,
|
kEvalSampleIndex = 204,
|
||||||
};
|
};
|
||||||
|
@ -1410,6 +1413,11 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
DxbcEmitAluOp(DxbcOpcode::kIEq, 0b11, dest, src0, src1);
|
DxbcEmitAluOp(DxbcOpcode::kIEq, 0b11, dest, src0, src1);
|
||||||
++stat_.int_instruction_count;
|
++stat_.int_instruction_count;
|
||||||
}
|
}
|
||||||
|
void DxbcOpIGE(const DxbcDest& dest, const DxbcSrc& src0,
|
||||||
|
const DxbcSrc& src1) {
|
||||||
|
DxbcEmitAluOp(DxbcOpcode::kIGE, 0b11, dest, src0, src1);
|
||||||
|
++stat_.int_instruction_count;
|
||||||
|
}
|
||||||
void DxbcOpILT(const DxbcDest& dest, const DxbcSrc& src0,
|
void DxbcOpILT(const DxbcDest& dest, const DxbcSrc& src0,
|
||||||
const DxbcSrc& src1) {
|
const DxbcSrc& src1) {
|
||||||
DxbcEmitAluOp(DxbcOpcode::kILT, 0b11, dest, src0, src1);
|
DxbcEmitAluOp(DxbcOpcode::kILT, 0b11, dest, src0, src1);
|
||||||
|
@ -1430,6 +1438,11 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
DxbcEmitAluOp(DxbcOpcode::kIMin, 0b11, dest, src0, src1);
|
DxbcEmitAluOp(DxbcOpcode::kIMin, 0b11, dest, src0, src1);
|
||||||
++stat_.int_instruction_count;
|
++stat_.int_instruction_count;
|
||||||
}
|
}
|
||||||
|
void DxbcOpIMul(const DxbcDest& dest_hi, const DxbcDest& dest_lo,
|
||||||
|
const DxbcSrc& src0, const DxbcSrc& src1) {
|
||||||
|
DxbcEmitAluOp(DxbcOpcode::kIMul, 0b11, dest_hi, dest_lo, src0, src1);
|
||||||
|
++stat_.int_instruction_count;
|
||||||
|
}
|
||||||
void DxbcOpINE(const DxbcDest& dest, const DxbcSrc& src0,
|
void DxbcOpINE(const DxbcDest& dest, const DxbcSrc& src0,
|
||||||
const DxbcSrc& src1) {
|
const DxbcSrc& src1) {
|
||||||
DxbcEmitAluOp(DxbcOpcode::kINE, 0b11, dest, src0, src1);
|
DxbcEmitAluOp(DxbcOpcode::kINE, 0b11, dest, src0, src1);
|
||||||
|
@ -1694,6 +1707,33 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.c_texture_store_instructions;
|
++stat_.c_texture_store_instructions;
|
||||||
}
|
}
|
||||||
|
void DxbcOpLdRaw(const DxbcDest& dest, const DxbcSrc& byte_offset,
|
||||||
|
const DxbcSrc& src) {
|
||||||
|
// For Load, FXC emits code for writing to any component of the destination,
|
||||||
|
// with xxxx swizzle of the source SRV/UAV.
|
||||||
|
// For Load2/Load3/Load4, it's xy/xyz/xyzw write mask and xyxx/xyzx/xyzw
|
||||||
|
// swizzle.
|
||||||
|
uint32_t dest_write_mask = dest.GetMask();
|
||||||
|
assert_true(dest_write_mask == 0b0001 || dest_write_mask == 0b0010 ||
|
||||||
|
dest_write_mask == 0b0100 || dest_write_mask == 0b1000 ||
|
||||||
|
dest_write_mask == 0b0011 || dest_write_mask == 0b0111 ||
|
||||||
|
dest_write_mask == 0b1111);
|
||||||
|
uint32_t component_count = xe::bit_count(dest_write_mask);
|
||||||
|
assert_true((src.swizzle_ & ((1 << (component_count * 2)) - 1)) ==
|
||||||
|
(DxbcSrc::kXYZW & ((1 << (component_count * 2)) - 1)));
|
||||||
|
uint32_t src_mask = (1 << component_count) - 1;
|
||||||
|
uint32_t operands_length = dest.GetLength() +
|
||||||
|
byte_offset.GetLength(0b0000) +
|
||||||
|
src.GetLength(src_mask, true);
|
||||||
|
shader_code_.reserve(shader_code_.size() + 1 + operands_length);
|
||||||
|
shader_code_.push_back(
|
||||||
|
DxbcOpcodeToken(DxbcOpcode::kLdRaw, operands_length));
|
||||||
|
dest.Write(shader_code_);
|
||||||
|
byte_offset.Write(shader_code_, true, 0b0000);
|
||||||
|
src.Write(shader_code_, true, src_mask, true);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.texture_load_instructions;
|
||||||
|
}
|
||||||
void DxbcOpStoreRaw(const DxbcDest& dest, const DxbcSrc& byte_offset,
|
void DxbcOpStoreRaw(const DxbcDest& dest, const DxbcSrc& byte_offset,
|
||||||
const DxbcSrc& value) {
|
const DxbcSrc& value) {
|
||||||
uint32_t dest_write_mask = dest.GetMask();
|
uint32_t dest_write_mask = dest.GetMask();
|
||||||
|
@ -2228,9 +2268,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
void CloseInstructionPredication();
|
void CloseInstructionPredication();
|
||||||
void JumpToLabel(uint32_t address);
|
void JumpToLabel(uint32_t address);
|
||||||
|
|
||||||
// Emits copde for endian swapping of the data located in pv.
|
|
||||||
void SwapVertexData(uint32_t vfetch_index, uint32_t write_mask);
|
|
||||||
|
|
||||||
// Returns index in texture_srvs_, and, for bound textures, it's also relative
|
// Returns index in texture_srvs_, and, for bound textures, it's also relative
|
||||||
// to the base T#/t# index of textures.
|
// to the base T#/t# index of textures.
|
||||||
uint32_t FindOrAddTextureSRV(uint32_t fetch_constant,
|
uint32_t FindOrAddTextureSRV(uint32_t fetch_constant,
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -423,13 +423,14 @@ struct ParsedVertexFetchInstruction {
|
||||||
|
|
||||||
struct Attributes {
|
struct Attributes {
|
||||||
VertexFormat data_format = VertexFormat::kUndefined;
|
VertexFormat data_format = VertexFormat::kUndefined;
|
||||||
int offset = 0;
|
int32_t offset = 0;
|
||||||
int stride = 0; // In dwords.
|
uint32_t stride = 0; // In dwords.
|
||||||
int exp_adjust = 0;
|
int32_t exp_adjust = 0;
|
||||||
|
// Prefetch count minus 1.
|
||||||
|
uint32_t prefetch_count = 0;
|
||||||
bool is_index_rounded = false;
|
bool is_index_rounded = false;
|
||||||
bool is_signed = false;
|
bool is_signed = false;
|
||||||
bool is_integer = false;
|
bool is_integer = false;
|
||||||
int prefetch_count = 0;
|
|
||||||
};
|
};
|
||||||
// Attributes describing the fetch operation.
|
// Attributes describing the fetch operation.
|
||||||
Attributes attributes;
|
Attributes attributes;
|
||||||
|
|
|
@ -989,10 +989,10 @@ void ShaderTranslator::ParseVertexFetchInstruction(
|
||||||
i.attributes.offset = op.offset();
|
i.attributes.offset = op.offset();
|
||||||
i.attributes.stride = full_op.stride();
|
i.attributes.stride = full_op.stride();
|
||||||
i.attributes.exp_adjust = op.exp_adjust();
|
i.attributes.exp_adjust = op.exp_adjust();
|
||||||
|
i.attributes.prefetch_count = op.prefetch_count();
|
||||||
i.attributes.is_index_rounded = op.is_index_rounded();
|
i.attributes.is_index_rounded = op.is_index_rounded();
|
||||||
i.attributes.is_signed = op.is_signed();
|
i.attributes.is_signed = op.is_signed();
|
||||||
i.attributes.is_integer = !op.is_normalized();
|
i.attributes.is_integer = !op.is_normalized();
|
||||||
i.attributes.prefetch_count = op.prefetch_count();
|
|
||||||
|
|
||||||
// Store for later use by mini fetches.
|
// Store for later use by mini fetches.
|
||||||
if (!op.is_mini_fetch()) {
|
if (!op.is_mini_fetch()) {
|
||||||
|
|
|
@ -506,10 +506,10 @@ struct VertexFetchInstruction {
|
||||||
bool is_signed() const { return data_.fomat_comp_all == 1; }
|
bool is_signed() const { return data_.fomat_comp_all == 1; }
|
||||||
bool is_normalized() const { return data_.num_format_all == 0; }
|
bool is_normalized() const { return data_.num_format_all == 0; }
|
||||||
bool is_index_rounded() const { return data_.is_index_rounded == 1; }
|
bool is_index_rounded() const { return data_.is_index_rounded == 1; }
|
||||||
// Dword stride, [0-255].
|
// Dword stride, [0, 255].
|
||||||
uint32_t stride() const { return data_.stride; }
|
uint32_t stride() const { return data_.stride; }
|
||||||
// Dword offset, [
|
// Dword offset, [-4194304, 4194303].
|
||||||
uint32_t offset() const { return data_.offset; }
|
int32_t offset() const { return data_.offset; }
|
||||||
|
|
||||||
void AssignFromFull(const VertexFetchInstruction& full) {
|
void AssignFromFull(const VertexFetchInstruction& full) {
|
||||||
data_.stride = full.data_.stride;
|
data_.stride = full.data_.stride;
|
||||||
|
@ -528,6 +528,7 @@ struct VertexFetchInstruction {
|
||||||
uint32_t must_be_one : 1;
|
uint32_t must_be_one : 1;
|
||||||
uint32_t const_index : 5;
|
uint32_t const_index : 5;
|
||||||
uint32_t const_index_sel : 2;
|
uint32_t const_index_sel : 2;
|
||||||
|
// Prefetch count minus 1.
|
||||||
uint32_t prefetch_count : 3;
|
uint32_t prefetch_count : 3;
|
||||||
uint32_t src_swiz : 2;
|
uint32_t src_swiz : 2;
|
||||||
});
|
});
|
||||||
|
@ -545,7 +546,7 @@ struct VertexFetchInstruction {
|
||||||
});
|
});
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
XEPACKEDSTRUCTANONYMOUS({
|
||||||
uint32_t stride : 8;
|
uint32_t stride : 8;
|
||||||
uint32_t offset : 23;
|
int32_t offset : 23;
|
||||||
uint32_t pred_condition : 1;
|
uint32_t pred_condition : 1;
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
@ -1375,6 +1376,7 @@ constexpr uint32_t GetAluVectorOpNeededSourceComponents(
|
||||||
AluVectorOpcode vector_opcode, uint32_t src_index,
|
AluVectorOpcode vector_opcode, uint32_t src_index,
|
||||||
uint32_t used_result_components) {
|
uint32_t used_result_components) {
|
||||||
assert_not_zero(src_index);
|
assert_not_zero(src_index);
|
||||||
|
assert_zero(used_result_components & ~uint32_t(0b1111));
|
||||||
uint32_t components = used_result_components;
|
uint32_t components = used_result_components;
|
||||||
switch (vector_opcode) {
|
switch (vector_opcode) {
|
||||||
case AluVectorOpcode::kDp4:
|
case AluVectorOpcode::kDp4:
|
||||||
|
|
|
@ -401,6 +401,43 @@ inline int GetVertexFormatSizeInWords(VertexFormat format) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline uint32_t GetVertexFormatNeededWords(VertexFormat format,
|
||||||
|
uint32_t used_components) {
|
||||||
|
assert_zero(used_components & ~uint32_t(0b1111));
|
||||||
|
if (!used_components) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
switch (format) {
|
||||||
|
case VertexFormat::k_8_8_8_8:
|
||||||
|
case VertexFormat::k_2_10_10_10:
|
||||||
|
return 0b0001;
|
||||||
|
case VertexFormat::k_10_11_11:
|
||||||
|
case VertexFormat::k_11_11_10:
|
||||||
|
return (used_components & 0b0111) ? 0b0001 : 0b0000;
|
||||||
|
case VertexFormat::k_16_16:
|
||||||
|
case VertexFormat::k_16_16_FLOAT:
|
||||||
|
return (used_components & 0b0011) ? 0b0001 : 0b0000;
|
||||||
|
case VertexFormat::k_16_16_16_16:
|
||||||
|
case VertexFormat::k_16_16_16_16_FLOAT:
|
||||||
|
return ((used_components & 0b0011) ? 0b0001 : 0b0000) |
|
||||||
|
((used_components & 0b1100) ? 0b0010 : 0b0000);
|
||||||
|
case VertexFormat::k_32:
|
||||||
|
case VertexFormat::k_32_FLOAT:
|
||||||
|
return used_components & 0b0001;
|
||||||
|
case VertexFormat::k_32_32:
|
||||||
|
case VertexFormat::k_32_32_FLOAT:
|
||||||
|
return used_components & 0b0011;
|
||||||
|
case VertexFormat::k_32_32_32_32:
|
||||||
|
case VertexFormat::k_32_32_32_32_FLOAT:
|
||||||
|
return used_components;
|
||||||
|
case VertexFormat::k_32_32_32_FLOAT:
|
||||||
|
return used_components & 0b0111;
|
||||||
|
default:
|
||||||
|
assert_unhandled_case(format);
|
||||||
|
return 0b0000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
enum class CompareFunction : uint32_t {
|
enum class CompareFunction : uint32_t {
|
||||||
kNever = 0b000,
|
kNever = 0b000,
|
||||||
kLess = 0b001,
|
kLess = 0b001,
|
||||||
|
|
Loading…
Reference in New Issue