forked from ShuriZma/suyu
shader_recompiler: emulate 8-bit and 16-bit storage writes with cas loop
This commit is contained in:
parent
6533dfd7ce
commit
2a0d707ce1
|
@ -65,6 +65,14 @@ void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value&
|
||||||
WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32),
|
WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32),
|
||||||
&StorageDefinitions::U32, index_offset);
|
&StorageDefinitions::U32, index_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void WriteStorageByCasLoop(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||||
|
Id value, Id bit_offset, Id bit_count) {
|
||||||
|
const Id pointer{StoragePointer(ctx, binding, offset, ctx.storage_types.U32, sizeof(u32),
|
||||||
|
&StorageDefinitions::U32)};
|
||||||
|
ctx.OpFunctionCall(ctx.TypeVoid(), ctx.write_storage_cas_loop_func, pointer, value, bit_offset,
|
||||||
|
bit_count);
|
||||||
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
void EmitLoadGlobalU8(EmitContext&) {
|
void EmitLoadGlobalU8(EmitContext&) {
|
||||||
|
@ -219,26 +227,42 @@ Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Valu
|
||||||
|
|
||||||
void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||||
Id value) {
|
Id value) {
|
||||||
|
if (ctx.profile.support_int8) {
|
||||||
WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8,
|
WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8,
|
||||||
sizeof(u8), &StorageDefinitions::U8);
|
sizeof(u8), &StorageDefinitions::U8);
|
||||||
|
} else {
|
||||||
|
WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset8(offset), ctx.Const(8u));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||||
Id value) {
|
Id value) {
|
||||||
|
if (ctx.profile.support_int8) {
|
||||||
WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8,
|
WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8,
|
||||||
sizeof(s8), &StorageDefinitions::S8);
|
sizeof(s8), &StorageDefinitions::S8);
|
||||||
|
} else {
|
||||||
|
WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset8(offset), ctx.Const(8u));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||||
Id value) {
|
Id value) {
|
||||||
|
if (ctx.profile.support_int16) {
|
||||||
WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16,
|
WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16,
|
||||||
sizeof(u16), &StorageDefinitions::U16);
|
sizeof(u16), &StorageDefinitions::U16);
|
||||||
|
} else {
|
||||||
|
WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset16(offset), ctx.Const(16u));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||||
Id value) {
|
Id value) {
|
||||||
|
if (ctx.profile.support_int16) {
|
||||||
WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16,
|
WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16,
|
||||||
sizeof(s16), &StorageDefinitions::S16);
|
sizeof(s16), &StorageDefinitions::S16);
|
||||||
|
} else {
|
||||||
|
WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset16(offset), ctx.Const(16u));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||||
|
|
|
@ -480,6 +480,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
|
||||||
DefineTextures(program.info, texture_binding, bindings.texture_scaling_index);
|
DefineTextures(program.info, texture_binding, bindings.texture_scaling_index);
|
||||||
DefineImages(program.info, image_binding, bindings.image_scaling_index);
|
DefineImages(program.info, image_binding, bindings.image_scaling_index);
|
||||||
DefineAttributeMemAccess(program.info);
|
DefineAttributeMemAccess(program.info);
|
||||||
|
DefineWriteStorageCasLoopFunction(program.info);
|
||||||
DefineGlobalMemoryFunctions(program.info);
|
DefineGlobalMemoryFunctions(program.info);
|
||||||
DefineRescalingInput(program.info);
|
DefineRescalingInput(program.info);
|
||||||
DefineRenderArea(program.info);
|
DefineRenderArea(program.info);
|
||||||
|
@ -877,6 +878,56 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitContext::DefineWriteStorageCasLoopFunction(const Info& info) {
|
||||||
|
if (profile.support_int8 && profile.support_int16) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!info.uses_int8 && !info.uses_int16) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
AddCapability(spv::Capability::VariablePointersStorageBuffer);
|
||||||
|
|
||||||
|
const Id ptr_type{TypePointer(spv::StorageClass::StorageBuffer, U32[1])};
|
||||||
|
const Id func_type{TypeFunction(void_id, ptr_type, U32[1], U32[1], U32[1])};
|
||||||
|
const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)};
|
||||||
|
const Id pointer{OpFunctionParameter(ptr_type)};
|
||||||
|
const Id value{OpFunctionParameter(U32[1])};
|
||||||
|
const Id bit_offset{OpFunctionParameter(U32[1])};
|
||||||
|
const Id bit_count{OpFunctionParameter(U32[1])};
|
||||||
|
|
||||||
|
AddLabel();
|
||||||
|
const Id scope_device{Const(1u)};
|
||||||
|
const Id ordering_relaxed{u32_zero_value};
|
||||||
|
const Id body_label{OpLabel()};
|
||||||
|
const Id continue_label{OpLabel()};
|
||||||
|
const Id endloop_label{OpLabel()};
|
||||||
|
const Id beginloop_label{OpLabel()};
|
||||||
|
OpBranch(beginloop_label);
|
||||||
|
|
||||||
|
AddLabel(beginloop_label);
|
||||||
|
OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone);
|
||||||
|
OpBranch(body_label);
|
||||||
|
|
||||||
|
AddLabel(body_label);
|
||||||
|
const Id expected_value{OpLoad(U32[1], pointer)};
|
||||||
|
const Id desired_value{OpBitFieldInsert(U32[1], expected_value, value, bit_offset, bit_count)};
|
||||||
|
const Id actual_value{OpAtomicCompareExchange(U32[1], pointer, scope_device, ordering_relaxed,
|
||||||
|
ordering_relaxed, desired_value, expected_value)};
|
||||||
|
const Id store_successful{OpIEqual(U1, expected_value, actual_value)};
|
||||||
|
OpBranchConditional(store_successful, endloop_label, continue_label);
|
||||||
|
|
||||||
|
AddLabel(endloop_label);
|
||||||
|
OpReturn();
|
||||||
|
|
||||||
|
AddLabel(continue_label);
|
||||||
|
OpBranch(beginloop_label);
|
||||||
|
|
||||||
|
OpFunctionEnd();
|
||||||
|
|
||||||
|
write_storage_cas_loop_func = func;
|
||||||
|
}
|
||||||
|
|
||||||
void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
|
void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
|
||||||
if (!info.uses_global_memory || !profile.support_int64) {
|
if (!info.uses_global_memory || !profile.support_int64) {
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -325,6 +325,8 @@ public:
|
||||||
Id f32x2_min_cas{};
|
Id f32x2_min_cas{};
|
||||||
Id f32x2_max_cas{};
|
Id f32x2_max_cas{};
|
||||||
|
|
||||||
|
Id write_storage_cas_loop_func{};
|
||||||
|
|
||||||
Id load_global_func_u32{};
|
Id load_global_func_u32{};
|
||||||
Id load_global_func_u32x2{};
|
Id load_global_func_u32x2{};
|
||||||
Id load_global_func_u32x4{};
|
Id load_global_func_u32x4{};
|
||||||
|
@ -372,6 +374,7 @@ private:
|
||||||
void DefineTextures(const Info& info, u32& binding, u32& scaling_index);
|
void DefineTextures(const Info& info, u32& binding, u32& scaling_index);
|
||||||
void DefineImages(const Info& info, u32& binding, u32& scaling_index);
|
void DefineImages(const Info& info, u32& binding, u32& scaling_index);
|
||||||
void DefineAttributeMemAccess(const Info& info);
|
void DefineAttributeMemAccess(const Info& info);
|
||||||
|
void DefineWriteStorageCasLoopFunction(const Info& info);
|
||||||
void DefineGlobalMemoryFunctions(const Info& info);
|
void DefineGlobalMemoryFunctions(const Info& info);
|
||||||
void DefineRescalingInput(const Info& info);
|
void DefineRescalingInput(const Info& info);
|
||||||
void DefineRescalingInputPushConstant();
|
void DefineRescalingInputPushConstant();
|
||||||
|
|
Loading…
Reference in New Issue