forked from ShuriZma/suyu
shader: Add integer attribute get optimization pass
Works around an nvidia driver bug, where casting the integer attributes to float and back to an integer always returned 0.
This commit is contained in:
parent
640fc1418b
commit
14ac0c2923
|
@ -126,6 +126,22 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal
|
|||
}
|
||||
}
|
||||
|
||||
void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32) {
|
||||
switch (attr) {
|
||||
case IR::Attribute::PrimitiveId:
|
||||
ctx.Add("MOV.S {}.x,primitive.id;", inst);
|
||||
break;
|
||||
case IR::Attribute::InstanceId:
|
||||
ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::VertexId:
|
||||
ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Get U32 attribute {}", attr);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value,
|
||||
[[maybe_unused]] ScalarU32 vertex) {
|
||||
const u32 element{static_cast<u32>(attr) % 4};
|
||||
|
|
|
@ -50,6 +50,7 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
|||
void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
|
||||
void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
|
||||
void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
|
||||
void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex);
|
||||
void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex);
|
||||
void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex);
|
||||
|
|
|
@ -221,6 +221,22 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
|
|||
}
|
||||
}
|
||||
|
||||
void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, std::string_view) {
|
||||
switch (attr) {
|
||||
case IR::Attribute::PrimitiveId:
|
||||
ctx.AddU32("{}=uint(gl_PrimitiveID);", inst);
|
||||
break;
|
||||
case IR::Attribute::InstanceId:
|
||||
ctx.AddU32("{}=uint(gl_InstanceID);", inst);
|
||||
break;
|
||||
case IR::Attribute::VertexId:
|
||||
ctx.AddU32("{}=uint(gl_VertexID);", inst);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Get U32 attribute {}", attr);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
|
||||
[[maybe_unused]] std::string_view vertex) {
|
||||
if (IR::IsGeneric(attr)) {
|
||||
|
|
|
@ -60,6 +60,8 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
|
|||
const IR::Value& offset);
|
||||
void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
|
||||
std::string_view vertex);
|
||||
void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
|
||||
std::string_view vertex);
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
|
||||
std::string_view vertex);
|
||||
void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
|
||||
|
|
|
@ -355,6 +355,31 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
|
|||
}
|
||||
}
|
||||
|
||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
|
||||
switch (attr) {
|
||||
case IR::Attribute::PrimitiveId:
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
|
||||
case IR::Attribute::InstanceId:
|
||||
if (ctx.profile.support_vertex_instance_id) {
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
|
||||
} else {
|
||||
const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)};
|
||||
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)};
|
||||
return ctx.OpISub(ctx.U32[1], index, base);
|
||||
}
|
||||
case IR::Attribute::VertexId:
|
||||
if (ctx.profile.support_vertex_instance_id) {
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.vertex_id);
|
||||
} else {
|
||||
const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)};
|
||||
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
|
||||
return ctx.OpISub(ctx.U32[1], index, base);
|
||||
}
|
||||
default:
|
||||
throw NotImplementedException("Read U32 attribute {}", attr);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) {
|
||||
const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)};
|
||||
if (!output) {
|
||||
|
|
|
@ -53,6 +53,7 @@ Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
|
|||
Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
|
||||
Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
|
||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id vertex);
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex);
|
||||
Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex);
|
||||
void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex);
|
||||
|
|
|
@ -40,6 +40,7 @@ OPCODE(GetCbufU32, U32, U32,
|
|||
OPCODE(GetCbufF32, F32, U32, U32, )
|
||||
OPCODE(GetCbufU32x2, U32x2, U32, U32, )
|
||||
OPCODE(GetAttribute, F32, Attribute, U32, )
|
||||
OPCODE(GetAttributeU32, U32, Attribute, U32, )
|
||||
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
|
||||
OPCODE(GetAttributeIndexed, F32, U32, U32, )
|
||||
OPCODE(SetAttributeIndexed, Void, U32, F32, U32, )
|
||||
|
|
|
@ -389,6 +389,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
|||
info.uses_demote_to_helper_invocation = true;
|
||||
break;
|
||||
case IR::Opcode::GetAttribute:
|
||||
case IR::Opcode::GetAttributeU32:
|
||||
info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
|
||||
break;
|
||||
case IR::Opcode::SetAttribute:
|
||||
|
|
|
@ -505,6 +505,29 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
|
|||
return;
|
||||
}
|
||||
}
|
||||
if constexpr (op == IR::Opcode::BitCastU32F32) {
|
||||
// Workaround for new NVIDIA driver bug, where:
|
||||
// uint attr = ftou(itof(gl_InstanceID));
|
||||
// always returned 0.
|
||||
// We can instead manually optimize this and work around the driver bug:
|
||||
// uint attr = uint(gl_InstanceID);
|
||||
if (arg_inst->GetOpcode() == IR::Opcode::GetAttribute) {
|
||||
const IR::Attribute attr{arg_inst->Arg(0).Attribute()};
|
||||
switch (attr) {
|
||||
case IR::Attribute::PrimitiveId:
|
||||
case IR::Attribute::InstanceId:
|
||||
case IR::Attribute::VertexId:
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
// Replace the bitcasts with an integer attribute get
|
||||
inst.ReplaceOpcode(IR::Opcode::GetAttributeU32);
|
||||
inst.SetArg(0, arg_inst->Arg(0));
|
||||
inst.SetArg(1, arg_inst->Arg(1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
|
||||
|
|
Loading…
Reference in New Issue