diff --git a/chip/pm4_me.pas b/chip/pm4_me.pas index 58f277a5..b2ea5914 100644 --- a/chip/pm4_me.pas +++ b/chip/pm4_me.pas @@ -3292,6 +3292,13 @@ begin end; end; +procedure pm4_PfpSyncMe(var ctx:t_me_render_context;node:p_pm4_node_PfpSyncMe); +begin + if not ctx.WaitConfirmOrSwitch then Exit; + + RTLEventSetEvent(node^.event); +end; + // procedure pm4_me_thread(me:p_pm4_me); SysV_ABI_CDecl; @@ -3402,6 +3409,8 @@ begin ntWaitOnCECounter :pm4_WaitOnCECounter (ctx,Pointer(ctx.node)); ntWaitOnDECounterDiff:pm4_WaitOnDECounterDiff(ctx,Pointer(ctx.node)); + ntPfpSyncMe :pm4_PfpSyncMe (ctx,Pointer(ctx.node)); + else begin Writeln(stderr,'me:+',ctx.node^.ntype); diff --git a/chip/pm4_pfp.pas b/chip/pm4_pfp.pas index 2c8971ea..4e006886 100644 --- a/chip/pm4_pfp.pas +++ b/chip/pm4_pfp.pas @@ -56,6 +56,7 @@ type curr_ibuf :p_pm4_ibuffer; // LastSetReg:Word; + event:PRTLEvent; // function stream_type:t_pm4_stream_type; procedure init; @@ -1045,6 +1046,28 @@ begin end; +procedure FlushAndWaitMe(pctx:p_pfp_ctx); +var + event:PRTLEvent; +begin + if (pctx^.stream_type=stGfxDcb) then + begin + + if (pctx^.event=nil) then + begin + pctx^.event:=RTLEventCreate; + end; + + event:=pctx^.event; + + pctx^.stream[stGfxDcb].PfpSyncMe(event); + + pctx^.Flush_stream(stGfxDcb); + + RTLEventWaitFor(event); + end; +end; + procedure onEventWrite(pctx:p_pfp_ctx;Body:PTPM4CMDEVENTWRITE); begin Assert(pctx^.stream_type=stGfxDcb); @@ -1109,7 +1132,9 @@ begin pctx^.stream[stGfxDcb].EventWriteEop(Pointer(Body^.address),Body^.DATA,Body^.eventType,Body^.dataSel,Body^.intSel); - pctx^.Flush_stream(stGfxDcb); + //pctx^.Flush_stream(stGfxDcb); + + FlushAndWaitMe(pctx); end; procedure onEventWriteEos(pctx:p_pfp_ctx;Body:PPM4CMDEVENTWRITEEOS); @@ -1143,6 +1168,8 @@ begin DWORD(pctx^.CX_REG.VGT_EVENT_INITIATOR):=Body^.eventType; pctx^.stream[stGfxDcb].EventWriteEos(Pointer(Body^.address),Body^.data,Body^.eventType,Body^.command); + + FlushAndWaitMe(pctx); end; const @@ -1196,6 +1223,8 @@ begin begin //Execute on the parser side + //FlushAndWaitMe(pctx); + adrDst_dmem:=get_dmem_ptr(Pointer(adrDst)); case (srcSel or (dstSel shl 4)) of @@ -1283,6 +1312,8 @@ begin WRITE_DATA_ENGINE_PFP: begin + //FlushAndWaitMe(pctx); + case dstSel of WRITE_DATA_DST_SEL_MEMORY_SYNC, //writeDataInline WRITE_DATA_DST_SEL_TCL2, //writeDataInlineThroughL2 @@ -1353,6 +1384,8 @@ begin WAIT_REG_MEM_ENGINE_PFP: begin + pctx^.Flush_stream(pctx^.stream_type); + while not me_test_mem(Pointer(Body^.pollAddress),Body^.reference,Body^.mask,Body^.compareFunc) do begin msleep_td(hz div 10000); @@ -1695,6 +1728,8 @@ begin pctx^.stream[stGfxDcb].DrawIndex2(pctx^.SG_REG, pctx^.CX_REG, pctx^.UC_REG); + + //FlushAndWaitMe(pctx); end; procedure onDrawIndexOffset2(pctx:p_pfp_ctx;Body:PPM4CMDDRAWINDEXOFFSET2); @@ -1716,6 +1751,8 @@ begin pctx^.CX_REG, pctx^.UC_REG, Body^.indexOffset); + + //FlushAndWaitMe(pctx); end; procedure onDrawIndexAuto(pctx:p_pfp_ctx;Body:PPM4CMDDRAWINDEXAUTO); @@ -1740,6 +1777,8 @@ begin pctx^.stream[stGfxDcb].DrawIndexAuto(pctx^.SG_REG, pctx^.CX_REG, pctx^.UC_REG); + + FlushAndWaitMe(pctx); end; procedure onDrawIndexIndirectCountMulti(pctx:p_pfp_ctx;Body:PPM4CMDDRAWINDEXINDIRECTMULTI); @@ -1781,7 +1820,7 @@ begin //stallCommandBufferParser //PFP waits until the ME completes all preceding commands before allowing the next batch to proceed. - pctx^.Flush_stream(stGfxDcb); + FlushAndWaitMe(pctx); end; procedure onPushMarker(pctx:p_pfp_ctx;Body:PChar;size:Integer); diff --git a/chip/pm4_stream.pas b/chip/pm4_stream.pas index fb647f3b..f730b5ad 100644 --- a/chip/pm4_stream.pas +++ b/chip/pm4_stream.pas @@ -129,7 +129,8 @@ type ntDrawIndex2, ntDrawIndexOffset2, ntDrawIndexAuto, - ntDispatchDirect + ntDispatchDirect, + ntPfpSyncMe ); const @@ -375,6 +376,11 @@ type end; + p_pm4_node_PfpSyncMe=^t_pm4_node_PfpSyncMe; + t_pm4_node_PfpSyncMe=object(t_pm4_node) + event:PRTLEvent; + end; + p_pm4_stream=^t_pm4_stream; t_pm4_stream=object(t_pm4_resource_stream_scope) // @@ -441,6 +447,7 @@ type var UC_REG:TUSERCONFIG_REG_SHORT); procedure Build_cs_info (node:p_pm4_node_DispatchDirect;var GPU_REGS:TGPU_REGS); procedure DispatchDirect(var SC_REG:TSH_REG_COMPUTE_GROUP); + procedure PfpSyncMe(event:PRTLEvent); end; implementation @@ -1811,6 +1818,20 @@ begin add_node(node); end; +procedure t_pm4_stream.PfpSyncMe(event:PRTLEvent); +var + node:p_pm4_node_PfpSyncMe; +begin + node:=allocator.Alloc(SizeOf(t_pm4_node_PfpSyncMe)); + + node^.ntype:=ntPfpSyncMe; + node^.scope:=Default(t_pm4_resource_curr_scope); + + node^.event:=event; + + add_node(node); +end; + // procedure t_cache_block_allocator.init;