diff --git a/chip/pm4_me.pas b/chip/pm4_me.pas index 320c4245..769442e7 100644 --- a/chip/pm4_me.pas +++ b/chip/pm4_me.pas @@ -878,6 +878,8 @@ var begin //ClearDepthTarget + CmdBuffer.EndRenderPass; + ri:=FetchImage(CmdBuffer, rt_info.DB_INFO.FImageInfo, [iu_depthstenc] @@ -1392,6 +1394,63 @@ begin end; +procedure pm4_FastClear(var ctx:t_me_render_context;node:p_pm4_node_FastClear); +{ +var + ri:TvImage2; + range:TVkImageSubresourceRange; + + resource_instance:p_pm4_resource_instance; +} +begin +{ + // + pm4_InitStream(ctx); + // + + StartFrameCapture; + + ctx.BeginCmdBuffer; + + ctx.Cmd.EndRenderPass; + + ri:=FetchImage(ctx.Cmd, + node^.RT.FImageInfo, + [iu_attachment] + ); + + ri.PushBarrier(ctx.Cmd, + ord(VK_ACCESS_TRANSFER_WRITE_BIT), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + ord(VK_PIPELINE_STAGE_TRANSFER_BIT)); + + range:=ri.GetSubresRange; + + ctx.Cmd.ClearColorImage(ri.FHandle, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + @node^.RT.CLEAR_COLOR, + 1,@range); + + //writeback + ri.mark_init; + + resource_instance:=ctx.node^.scope.find_image_resource_instance(node^.RT.FImageInfo); + Assert(resource_instance<>nil); + + if (resource_instance^.next_overlap.mem_usage<>0) then + begin + pm4_write_back(ctx.Cmd,ri); + // + resource_instance^.resource^.rwriteback:=False; + end else + begin + // + resource_instance^.resource^.rwriteback:=True; + end; + //writeback +} +end; + procedure Prepare_htile(var ctx:t_me_render_context; var UniformBuilder:TvUniformBuilder); var @@ -1591,7 +1650,7 @@ begin ctx.me^.knote_eventid($40,0,curr*NSEC_PER_UNIT,0); //(absolute time) (freq???) end; - ctx.on_idle; + //ctx.on_idle; end; procedure pm4_SubmitFlipEop(var ctx:t_me_render_context;node:p_pm4_node_SubmitFlipEop); @@ -1614,7 +1673,7 @@ begin ctx.me^.knote_eventid($40,0,curr*NSEC_PER_UNIT,0); //(absolute time) (freq???) end; - ctx.on_idle; + //ctx.on_idle; end; procedure pm4_EventWrite(var ctx:t_me_render_context;node:p_pm4_node_EventWrite); @@ -1624,7 +1683,8 @@ begin //CACHE_FLUSH_AND_INV_EVENT :Writeln(' eventType=FLUSH_AND_INV_EVENT'); //FLUSH_AND_INV_CB_PIXEL_DATA:Writeln(' eventType=FLUSH_AND_INV_CB_PIXEL_DATA'); //FLUSH_AND_INV_DB_DATA_TS :Writeln(' eventType=FLUSH_AND_INV_DB_DATA_TS'); - FLUSH_AND_INV_DB_META: + FLUSH_AND_INV_DB_META, //HTILE + FLUSH_AND_INV_CB_META: //CMASK begin if (ctx.Cmd<>nil) and ctx.Cmd.IsAllocated then begin @@ -1633,7 +1693,6 @@ begin end; end; //FLUSH_AND_INV_CB_DATA_TS :Writeln(' eventType=FLUSH_AND_INV_CB_DATA_TS'); - //FLUSH_AND_INV_CB_META :Writeln(' eventType=FLUSH_AND_INV_CB_META'); THREAD_TRACE_MARKER: begin // @@ -1961,6 +2020,7 @@ begin ntDrawIndex2 :pm4_Draw (ctx,Pointer(ctx.node)); ntDrawIndexAuto :pm4_Draw (ctx,Pointer(ctx.node)); ntClearDepth :pm4_Draw (ctx,Pointer(ctx.node)); + ntFastClear :pm4_FastClear (ctx,Pointer(ctx.node)); ntDispatchDirect:pm4_DispatchDirect(ctx,Pointer(ctx.node)); ntEventWrite :pm4_EventWrite (ctx,Pointer(ctx.node)); ntEventWriteEop :pm4_EventWriteEop (ctx,Pointer(ctx.node)); @@ -2016,7 +2076,7 @@ begin ctx.rel_time:=0; //reset time // - ctx.on_idle; + //ctx.on_idle; // RTLEventWaitFor(me^.event); diff --git a/chip/pm4_stream.pas b/chip/pm4_stream.pas index d21303be..e25f941c 100644 --- a/chip/pm4_stream.pas +++ b/chip/pm4_stream.pas @@ -294,7 +294,7 @@ type p_pm4_node_FastClear=^t_pm4_node_FastClear; t_pm4_node_FastClear=object(t_pm4_node) - CX_REG:TCONTEXT_REG_GROUP; // 0xA000 + RT:TRT_INFO; end; p_pm4_node_Resolve=^t_pm4_node_Resolve; @@ -959,13 +959,40 @@ end; procedure t_pm4_stream.FastClear(var CX_REG:TCONTEXT_REG_GROUP); var + GPU_REGS:TGPU_REGS; + RT:TRT_INFO; + node:p_pm4_node_FastClear; begin + GPU_REGS:=Default(TGPU_REGS); + GPU_REGS.CX_REG:=@CX_REG; + node:=allocator.Alloc(SizeOf(t_pm4_node_FastClear)); node^.ntype :=ntFastClear; node^.scope :=Default(t_pm4_resource_curr_scope); - node^.CX_REG:=CX_REG; + + // + RT:=GPU_REGS.GET_RT_INFO(0); + + { + //clear TM_READ + RT.IMAGE_USAGE:=RT.IMAGE_USAGE and (not TM_READ); + //set TM_CLEAR + RT.IMAGE_USAGE:=RT.IMAGE_USAGE or TM_CLEAR; + + // + + insert_image_resource(@node^.scope, + RT.FImageInfo, + RT.IMAGE_USAGE, + [iu_attachment]); + + } + + // + node^.RT:=RT; + // add_node(node); end; diff --git a/sys/dev/dev_gc.pas b/sys/dev/dev_gc.pas index ce1e6f75..275062da 100644 --- a/sys/dev/dev_gc.pas +++ b/sys/dev/dev_gc.pas @@ -251,6 +251,8 @@ begin Continue; end; + pm4_me_gfx.on_idle(); + RTLEventWaitFor(ring_gfx_event); until false; diff --git a/vulkan/vCmdBuffer.pas b/vulkan/vCmdBuffer.pas index be4d9b37..091506d7 100644 --- a/vulkan/vCmdBuffer.pas +++ b/vulkan/vCmdBuffer.pas @@ -1369,6 +1369,7 @@ begin Exit; end; + EndRenderPass; if (not BeginCmdBuffer) then Exit; Case eventType of @@ -1383,6 +1384,17 @@ begin ord(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)); //dstStageMask end; + FLUSH_AND_INV_CB_META: + begin + Inc(cmd_count); + + vkMemoryBarrier(FCmdbuf, + VK_ACCESS_PS, //srcAccessMask + ord(VK_ACCESS_TRANSFER_WRITE_BIT), //dstAccessMask + ord(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT), //srcStageMask + ord(VK_PIPELINE_STAGE_TRANSFER_BIT)); //dstStageMask + + end; else Assert(false,'WriteEvent.eventType'); diff --git a/vulkan/vShader.pas b/vulkan/vShader.pas index c2029ecc..148da1b4 100644 --- a/vulkan/vShader.pas +++ b/vulkan/vShader.pas @@ -367,13 +367,15 @@ var begin if (size<=SizeOf(TSPIRVHeader)) then Exit; if (PSPIRVHeader(data)^.MAGIC<>MagicNumber) then Exit; + data:=data+SizeOf(TSPIRVHeader); size:=size-SizeOf(TSPIRVHeader); //orig_data:=data; //orig_size:=size; - repeat + while (size<>0) do + begin I:=PSPIRVInstruction(data)^; Case I.OP of OpSourceExtension: @@ -449,7 +451,7 @@ begin if (size=1) then + begin + FInstance.STEP_RATE_0:=STEP_RATE_0; + end; + + if (VGPR_COMP_CNT>=2) then + begin + FInstance.STEP_RATE_1:=STEP_RATE_1; + end; + +end; + /// function GetSharpByPatch(pData:Pointer;const addr:ADataLayout):Pointer; diff --git a/vulkan/vShaderManager.pas b/vulkan/vShaderManager.pas index b26da685..d45fc547 100644 --- a/vulkan/vShaderManager.pas +++ b/vulkan/vShaderManager.pas @@ -391,6 +391,29 @@ begin end; end; +function test_instance(FShader:TvShaderExt;FStage:TvShaderStage;var GPU_REGS:TGPU_REGS):Boolean; +var + VGPR_COMP_CNT:Byte; +begin + if (FStage<>vShaderStageVs) then Exit(True); + + VGPR_COMP_CNT:=GPU_REGS.SH_REG^.SPI_SHADER_PGM_RSRC1_VS.VGPR_COMP_CNT; + + if (FShader.FInstance.VGPR_COMP_CNT<>VGPR_COMP_CNT) then Exit(False); + + if (VGPR_COMP_CNT>=1) then + begin + if (FShader.FInstance.STEP_RATE_0<>GPU_REGS.CX_REG^.VGT_INSTANCE_STEP_RATE_0) then Exit(False); + end; + + if (VGPR_COMP_CNT>=2) then + begin + if (FShader.FInstance.STEP_RATE_1<>GPU_REGS.CX_REG^.VGT_INSTANCE_STEP_RATE_1) then Exit(False); + end; + + Result:=True; +end; + function test_unif(FShader:TvShaderExt;FDescSetId:Integer;pUserData:Pointer):Boolean; var ch:TvBufOffsetChecker; @@ -460,6 +483,7 @@ begin FShader:=t.FShaderAliases[i]; if test_func(FShader,pUserData) then + if test_instance(FShader,FStage,GPU_REGS) then if test_unif(FShader,FDescSetId,pUserData) then //Checking offsets within a shader if test_push_const(FShader,pc_offset,pc_size) then begin @@ -481,6 +505,13 @@ begin FShader:=t.AddShader(FDescSetId,M,pUserData); + if (FStage=vShaderStageVs) then + begin + FShader.SetInstance(GPU_REGS.SH_REG^.SPI_SHADER_PGM_RSRC1_VS.VGPR_COMP_CNT, + GPU_REGS.CX_REG^.VGT_INSTANCE_STEP_RATE_0, + GPU_REGS.CX_REG^.VGT_INSTANCE_STEP_RATE_1); + end; + DumpSpv(FStage,M); M.Free;