diff --git a/chip/pm4_me.pas b/chip/pm4_me.pas index 7f4f9d2e..6937a4ce 100644 --- a/chip/pm4_me.pas +++ b/chip/pm4_me.pas @@ -208,18 +208,12 @@ end; var FCmdPool:TvCmdPool; -procedure pm4_DrawPrepare(SH_REG:PSH_REG_GROUP; - CX_REG:PCONTEXT_REG_GROUP; - UC_REG:PUSERCONFIG_REG_SHORT; +procedure pm4_DrawPrepare(var rt_info:t_pm4_rt_info; CmdBuffer:TvCmdBuffer; RenderCmd:TvRenderTargets); var i:Integer; - GPU_REGS:TGPU_REGS; - - FShaderGroup:TvShaderGroup; - FAttrBuilder:TvAttrBuilder; FUniformBuilder:TvUniformBuilder; @@ -227,8 +221,6 @@ var RP_KEY:TvRenderPassKey; RP:TvRenderPass2; - BI:TBLEND_INFO; - GP_KEY:TvGraphicsPipelineKey; GP:TvGraphicsPipeline2; @@ -247,52 +239,25 @@ var FDescriptorGroup:TvDescriptorGroup; begin - GPU_REGS:=Default(TGPU_REGS); - GPU_REGS.SH_REG:=SH_REG; - GPU_REGS.CX_REG:=CX_REG; - GPU_REGS.UC_REG:=UC_REG; - - for i:=0 to 31 do - begin - if (CX_REG^.SPI_PS_INPUT_CNTL[i].OFFSET<>0) and (CX_REG^.SPI_PS_INPUT_CNTL[i].OFFSET<>i) then - begin - Assert(false, 'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].OFFSET=' +IntToStr(CX_REG^.SPI_PS_INPUT_CNTL[i].OFFSET )); - end; - Assert(CX_REG^.SPI_PS_INPUT_CNTL[i].DEFAULT_VAL =0,'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].DEFAULT_VAL=' +IntToStr(CX_REG^.SPI_PS_INPUT_CNTL[i].DEFAULT_VAL )); - Assert(CX_REG^.SPI_PS_INPUT_CNTL[i].FLAT_SHADE =0,'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].FLAT_SHADE=' +IntToStr(CX_REG^.SPI_PS_INPUT_CNTL[i].FLAT_SHADE )); - Assert(CX_REG^.SPI_PS_INPUT_CNTL[i].FP16_INTERP_MODE=0,'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].FP16_INTERP_MODE='+IntToStr(CX_REG^.SPI_PS_INPUT_CNTL[i].FP16_INTERP_MODE)); - end; - - {fdump_ps:=}DumpPS(GPU_REGS); - {fdump_vs:=}DumpVS(GPU_REGS); - - FShaderGroup:=FetchShaderGroup(GPU_REGS,nil{@pa}); - Assert(FShaderGroup<>nil); RP_KEY.Clear; - RenderCmd.RT_COUNT:=0; + RenderCmd.RT_COUNT:=rt_info.RT_COUNT; - if GPU_REGS.COMP_ENABLE then - For i:=0 to GPU_REGS.GET_HI_RT do + if (rt_info.RT_COUNT<>0) then + For i:=0 to rt_info.RT_COUNT-1 do begin - RenderCmd.RT_INFO[RenderCmd.RT_COUNT]:=GPU_REGS.GET_RT_INFO(i); + RenderCmd.RT_INFO[i]:=rt_info.RT_INFO[i]; - //hack - //RT_INFO[RT_COUNT].IMAGE_USAGE:=TM_CLEAR or TM_WRITE; - // - - RP_KEY.AddColorAt(RenderCmd.RT_INFO[RenderCmd.RT_COUNT].attachment, - RenderCmd.RT_INFO[RenderCmd.RT_COUNT].FImageInfo.cformat, - RenderCmd.RT_INFO[RenderCmd.RT_COUNT].IMAGE_USAGE, - RenderCmd.RT_INFO[RenderCmd.RT_COUNT].FImageInfo.params.samples); - - Inc(RenderCmd.RT_COUNT); + RP_KEY.AddColorAt(RenderCmd.RT_INFO[i].attachment, + RenderCmd.RT_INFO[i].FImageInfo.cformat, + RenderCmd.RT_INFO[i].IMAGE_USAGE, + RenderCmd.RT_INFO[i].FImageInfo.params.samples); end; - if GPU_REGS.DB_ENABLE then + if rt_info.DB_ENABLE then begin - RenderCmd.DB_INFO:=GPU_REGS.GET_DB_INFO; + RenderCmd.DB_INFO:=rt_info.DB_INFO; RP_KEY.AddDepthAt(RenderCmd.RT_COUNT, //add to last attachment id RenderCmd.DB_INFO.FImageInfo.cformat, @@ -305,22 +270,20 @@ begin RP:=FetchRenderPass(CmdBuffer,@RP_KEY); - BI:=GPU_REGS.GET_BLEND_INFO; - GP_KEY.Clear; GP_KEY.FRenderPass :=RP; - GP_KEY.FShaderGroup:=FShaderGroup; + GP_KEY.FShaderGroup:=rt_info.ShaderGroup; - GP_KEY.SetBlendInfo(BI.logicOp,@BI.blendConstants); + GP_KEY.SetBlendInfo(rt_info.BLEND_INFO.logicOp,@rt_info.BLEND_INFO.blendConstants); - GP_KEY.SetPrimType (GPU_REGS.GET_PRIM_TYPE); - GP_KEY.SetPrimReset(GPU_REGS.GET_PRIM_RESET); + GP_KEY.SetPrimType (TVkPrimitiveTopology(rt_info.PRIM_TYPE)); + GP_KEY.SetPrimReset(rt_info.PRIM_RESET); - For i:=0 to 15 do - if GPU_REGS.VP_ENABLE(i) then + if (rt_info.VP_COUNT<>0) then + For i:=0 to rt_info.VP_COUNT-1 do begin - GP_KEY.AddVPort(GPU_REGS.GET_VPORT(i),GPU_REGS.GET_SCISSOR(i)); + GP_KEY.AddVPort(rt_info.VPORT[i],rt_info.SCISSOR[i]); end; if (RenderCmd.RT_COUNT<>0) then @@ -330,19 +293,19 @@ begin end; FAttrBuilder:=Default(TvAttrBuilder); - FShaderGroup.ExportAttrBuilder(FAttrBuilder,GPU_REGS); + rt_info.ShaderGroup.ExportAttrBuilder(FAttrBuilder,@rt_info.USERDATA); if not limits.VK_EXT_vertex_input_dynamic_state then begin GP_KEY.SetVertexInput(FAttrBuilder); end; - GP_KEY.rasterizer :=GPU_REGS.GET_RASTERIZATION; - GP_KEY.multisampling:=GPU_REGS.GET_MULTISAMPLE; + GP_KEY.rasterizer :=rt_info.RASTERIZATION; + GP_KEY.multisampling:=rt_info.MULTISAMPLE; - GP_KEY.SetProvoking(GPU_REGS.GET_PROVOKING); + GP_KEY.SetProvoking(TVkProvokingVertexModeEXT(rt_info.PROVOKING)); - if GPU_REGS.DB_ENABLE then + if rt_info.DB_ENABLE then begin GP_KEY.DepthStencil:=RenderCmd.DB_INFO.ds_state; end; @@ -354,7 +317,7 @@ begin FB_KEY:=Default(TvFramebufferImagelessKey); FB_KEY.SetRenderPass(RP); - FB_KEY.SetSize(GPU_REGS.GET_SCREEN_SIZE); + FB_KEY.SetSize(rt_info.SCREEN_SIZE); if (RenderCmd.RT_COUNT<>0) then For i:=0 to RenderCmd.RT_COUNT-1 do @@ -362,7 +325,7 @@ begin FB_KEY.AddImageAt(RenderCmd.RT_INFO[i].FImageInfo); end; - if GPU_REGS.DB_ENABLE then + if rt_info.DB_ENABLE then begin FB_KEY.AddImageAt(RenderCmd.DB_INFO.FImageInfo); end; @@ -371,13 +334,13 @@ begin FB_KEY2:=Default(TvFramebufferBindedKey); FB_KEY2.SetRenderPass(RP); - FB_KEY2.SetSize(GPU_REGS.GET_SCREEN_SIZE); + FB_KEY2.SetSize(rt_info.SCREEN_SIZE); end; RenderCmd.FRenderPass:=RP; RenderCmd.FPipeline :=GP; - RenderCmd.FRenderArea:=GPU_REGS.GET_SCREEN; + RenderCmd.FRenderArea:=rt_info.SCREEN_RECT; if limits.VK_KHR_imageless_framebuffer then begin @@ -422,7 +385,7 @@ begin end; - if GPU_REGS.DB_ENABLE then + if rt_info.DB_ENABLE then begin RenderCmd.AddClearColor(RenderCmd.DB_INFO.CLEAR_VALUE); @@ -459,7 +422,7 @@ begin //////// FUniformBuilder:=Default(TvUniformBuilder); - FShaderGroup.ExportUnifBuilder(FUniformBuilder,GPU_REGS); + rt_info.ShaderGroup.ExportUnifBuilder(FUniformBuilder,@rt_info.USERDATA); if (Length(FUniformBuilder.FImages)<>0) then begin @@ -515,7 +478,7 @@ begin if (FDescriptorGroup=nil) then begin - FDescriptorGroup:=FetchDescriptorGroup(CmdBuffer,FShaderGroup.FLayout); + FDescriptorGroup:=FetchDescriptorGroup(CmdBuffer,rt_info.ShaderGroup.FLayout); end; FDescriptorGroup.FSets[fset].BindImg(bind,0, @@ -537,7 +500,7 @@ begin if (FDescriptorGroup=nil) then begin - FDescriptorGroup:=FetchDescriptorGroup(CmdBuffer,FShaderGroup.FLayout); + FDescriptorGroup:=FetchDescriptorGroup(CmdBuffer,rt_info.ShaderGroup.FLayout); end; FDescriptorGroup.FSets[fset].BindSmp(bind,0,sm.FHandle); @@ -573,7 +536,7 @@ begin if (FDescriptorGroup=nil) then begin - FDescriptorGroup:=FetchDescriptorGroup(CmdBuffer,FShaderGroup.FLayout); + FDescriptorGroup:=FetchDescriptorGroup(CmdBuffer,rt_info.ShaderGroup.FLayout); end; FDescriptorGroup.FSets[fset].BindBuf(bind,0, @@ -592,8 +555,6 @@ begin CmdBuffer.BindSets(VK_PIPELINE_BIND_POINT_GRAPHICS,FDescriptorGroup); end; - CmdBuffer.FinstanceCount:=GPU_REGS.UC_REG^.VGT_NUM_INSTANCES; - CmdBuffer.FINDEX_TYPE :=GPU_REGS.GET_INDEX_TYPE; end; procedure pm4_Writeback(CmdBuffer:TvCmdBuffer; @@ -679,7 +640,7 @@ begin //write back end; -procedure pm4_DrawIndex2(node:p_pm4_node_DrawIndex2); +procedure pm4_Draw(node:p_pm4_node_draw); var RenderCmd:TvRenderTargets; @@ -703,72 +664,26 @@ begin RenderCmd:=TvRenderTargets.Create; - pm4_DrawPrepare(@node^.SH_REG, - @node^.CX_REG, - @node^.UC_REG, + pm4_DrawPrepare(node^.rt_info, CmdBuffer, RenderCmd); - CmdBuffer.DrawIndex2(node^.addr, - node^.UC_REG.VGT_NUM_INDICES); - ///////// + CmdBuffer.FinstanceCount:=node^.numInstances; + CmdBuffer.FINDEX_TYPE :=TVkIndexType(node^.INDEX_TYPE); - CmdBuffer.EndRenderPass; - - pm4_Writeback(CmdBuffer,RenderCmd); - - r:=CmdBuffer.QueueSubmit; - - if (r<>VK_SUCCESS) then - begin - Assert(false,'QueueSubmit'); + case node^.ntype of + ntDrawIndex2: + begin + CmdBuffer.DrawIndex2(Pointer(node^.indexBase),node^.indexCount); + end; + ntDrawIndexAuto: + begin + CmdBuffer.DrawIndexAuto(node^.indexCount); + end; + else; + Assert(false); end; - Writeln('QueueSubmit:',r); - - r:=CmdBuffer.Wait(QWORD(-1)); - - Writeln('CmdBuffer:',r); - - r:=RenderQueue.WaitIdle; - Writeln('WaitIdle:',r); - - CmdBuffer.ReleaseResource; - - CmdBuffer.Free; -end; - -procedure pm4_DrawIndexAuto(node:p_pm4_node_DrawIndexAuto); -var - RenderCmd:TvRenderTargets; - - CmdBuffer:TvCmdBuffer; - - r:TVkResult; -begin - - StartFrameCapture; - - // - if (FCmdPool=nil) then - begin - FCmdPool:=TvCmdPool.Create(VulkanApp.FGFamily); - end; - - CmdBuffer:=TvCmdBuffer.Create(FCmdPool,RenderQueue); - //CmdBuffer.submit_id:=submit_id; - - // - - RenderCmd:=TvRenderTargets.Create; - - pm4_DrawPrepare(@node^.SH_REG, - @node^.CX_REG, - @node^.UC_REG, - CmdBuffer, - RenderCmd); - - CmdBuffer.DrawIndexAuto(node^.UC_REG.VGT_NUM_INDICES); ///////// CmdBuffer.EndRenderPass; @@ -887,8 +802,8 @@ begin Writeln('+',node^.ntype); case node^.ntype of - ntDrawIndex2 :pm4_DrawIndex2 (Pointer(node)); - ntDrawIndexAuto:pm4_DrawIndexAuto(Pointer(node)); + ntDrawIndex2 :pm4_Draw (Pointer(node)); + ntDrawIndexAuto:pm4_Draw (Pointer(node)); ntEventWriteEop:pm4_EventWriteEop(Pointer(node),me); ntSubmitFlipEop:pm4_SubmitFlipEop(Pointer(node),me); else diff --git a/chip/pm4_pfp.pas b/chip/pm4_pfp.pas index ef9aa6d1..f86f8f22 100644 --- a/chip/pm4_pfp.pas +++ b/chip/pm4_pfp.pas @@ -1300,7 +1300,6 @@ end; procedure onIndexBufferSize(pctx:p_pfp_ctx;Body:PPM4CMDDRAWINDEXBUFFERSIZE); begin - pctx^.CX_REG.VGT_DMA_SIZE :=Body^.numIndices; pctx^.UC_REG.VGT_NUM_INDICES:=Body^.numIndices; end; @@ -1325,9 +1324,6 @@ begin end; procedure onDrawIndex2(pctx:p_pfp_ctx;Body:PPM4CMDDRAWINDEX2); -var - addr:Pointer; - size:QWORD; begin if (DWORD(Body^.drawInitiator)<>0) then begin @@ -1341,19 +1337,7 @@ begin pctx^.UC_REG.VGT_NUM_INDICES :=Body^.indexCount; pctx^.CX_REG.VGT_DRAW_INITIATOR :=Body^.drawInitiator; - addr:=nil; - size:=0; - - if get_dmem_ptr(PPointer(@Body^.indexBaseLo)^,@addr,@size) then - begin - // - end else - begin - Assert(false,'addr:0x'+HexStr(PPointer(@Body^.indexBaseLo)^)+' not in dmem!'); - end; - - pctx^.stream_dcb.DrawIndex2(addr, - pctx^.SH_REG, + pctx^.stream_dcb.DrawIndex2(pctx^.SH_REG, pctx^.CX_REG, pctx^.UC_REG); end; @@ -1439,6 +1423,9 @@ begin mmCB_COLOR6_DCC_BASE, mmCB_COLOR7_DCC_BASE, + mmDB_STENCIL_CLEAR, + mmDB_RENDER_CONTROL, + mmDB_HTILE_SURFACE: begin if pctx^.print_hint then diff --git a/chip/pm4_stream.pas b/chip/pm4_stream.pas index e2f78945..d44b4527 100644 --- a/chip/pm4_stream.pas +++ b/chip/pm4_stream.pas @@ -6,6 +6,7 @@ unit pm4_stream; interface uses + sysutils, mqueue, LFQueue, md_map, @@ -14,7 +15,30 @@ uses si_ci_vi_merged_offset, si_ci_vi_merged_enum, si_ci_vi_merged_registers, - si_ci_vi_merged_groups; + si_ci_vi_merged_groups, + + Vulkan, + vDevice, + vBuffer, + vHostBufferManager, + vImage, + vImageManager, + vRender, + vRenderPassManager, + vPipelineManager, + vFramebufferManager, + vShader, + vShaderExt, + vShaderManager, + vRegs2Vulkan, + vCmdBuffer, + vPipeline, + vSetsPoolManager, + vSampler, + vSamplerManager, + + shader_dump + ; type t_cache_block_allocator=object @@ -50,6 +74,34 @@ type Procedure Free; end; + p_pm4_rt_info=^t_pm4_rt_info; + t_pm4_rt_info=object + USERDATA:TGPU_USERDATA; + + ShaderGroup:TvShaderGroup; + + RT_INFO:array[0..7] of TRT_INFO; + DB_INFO:TDB_INFO; + + BLEND_INFO:TBLEND_INFO; + + VPORT :array[0..15] of TVkViewport; + SCISSOR:array[0..15] of TVkRect2D; + + RASTERIZATION:TVkPipelineRasterizationStateCreateInfo; + MULTISAMPLE :TVkPipelineMultisampleStateCreateInfo; + + SCREEN_RECT:TVkRect2D; + SCREEN_SIZE:TVkExtent2D; + + RT_COUNT :Byte; + DB_ENABLE :Boolean; + PRIM_TYPE :Byte; + PRIM_RESET:Byte; + VP_COUNT :Byte; + PROVOKING :Byte; + end; + t_pm4_node_type=( ntLoadConstRam, ntEventWrite, @@ -143,20 +195,17 @@ type CX_REG:TCONTEXT_REG_GROUP; // 0xA000 end; - p_pm4_node_DrawIndex2=^t_pm4_node_DrawIndex2; - t_pm4_node_DrawIndex2=object(t_pm4_node) - addr :Pointer; - // - SH_REG:TSH_REG_GROUP; // 0x2C00 - CX_REG:TCONTEXT_REG_GROUP; // 0xA000 - UC_REG:TUSERCONFIG_REG_SHORT; // 0xC000 - end; + p_pm4_node_draw=^t_pm4_node_draw; + t_pm4_node_draw=object(t_pm4_node) + rt_info:t_pm4_rt_info; - p_pm4_node_DrawIndexAuto=^t_pm4_node_DrawIndexAuto; - t_pm4_node_DrawIndexAuto=object(t_pm4_node) - SH_REG:TSH_REG_GROUP; // 0x2C00 - CX_REG:TCONTEXT_REG_GROUP; // 0xA000 - UC_REG:TUSERCONFIG_REG_SHORT; // 0xC000 + indexBase :QWORD; + indexOffset :DWORD; + indexCount :DWORD; + numInstances:DWORD; + + INDEX_TYPE:Byte; + SWAP_MODE :Byte; end; p_pm4_node_DispatchDirect=^t_pm4_node_DispatchDirect; @@ -188,10 +237,14 @@ type procedure FastClear (var CX_REG:TCONTEXT_REG_GROUP); procedure Resolve (var CX_REG:TCONTEXT_REG_GROUP); function ColorControl (var CX_REG:TCONTEXT_REG_GROUP):Boolean; - procedure DrawIndex2 (addr:Pointer; + procedure Build_rt_info(var rt_info:t_pm4_rt_info;var GPU_REGS:TGPU_REGS); + procedure BuildDraw (ntype:t_pm4_node_type; var SH_REG:TSH_REG_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT); + procedure DrawIndex2 (var SH_REG:TSH_REG_GROUP; + var CX_REG:TCONTEXT_REG_GROUP; + var UC_REG:TUSERCONFIG_REG_SHORT); procedure DrawIndexAuto(var SH_REG:TSH_REG_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT); @@ -399,42 +452,120 @@ begin end; -procedure t_pm4_stream.DrawIndex2(addr:Pointer; - var SH_REG:TSH_REG_GROUP; +procedure t_pm4_stream.Build_rt_info(var rt_info:t_pm4_rt_info;var GPU_REGS:TGPU_REGS); +var + i:Integer; +begin + for i:=0 to 31 do + begin + if (GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].OFFSET<>0) and (GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].OFFSET<>i) then + begin + Assert(false, 'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].OFFSET=' +IntToStr(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].OFFSET )); + end; + Assert(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].DEFAULT_VAL =0,'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].DEFAULT_VAL=' +IntToStr(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].DEFAULT_VAL )); + Assert(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].FLAT_SHADE =0,'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].FLAT_SHADE=' +IntToStr(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].FLAT_SHADE )); + Assert(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].FP16_INTERP_MODE=0,'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].FP16_INTERP_MODE='+IntToStr(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].FP16_INTERP_MODE)); + end; + + GPU_REGS.export_user_data(@rt_info.USERDATA); + + {fdump_ps:=}DumpPS(GPU_REGS); + {fdump_vs:=}DumpVS(GPU_REGS); + + rt_info.ShaderGroup:=FetchShaderGroupRT(GPU_REGS,nil{@pa}); + Assert(rt_info.ShaderGroup<>nil); + + rt_info.RT_COUNT:=0; + + if GPU_REGS.COMP_ENABLE then + For i:=0 to GPU_REGS.GET_HI_RT do + begin + rt_info.RT_INFO[rt_info.RT_COUNT]:=GPU_REGS.GET_RT_INFO(i); + + Inc(rt_info.RT_COUNT); + end; + + rt_info.DB_ENABLE:=GPU_REGS.DB_ENABLE; + + if rt_info.DB_ENABLE then + begin + rt_info.DB_INFO:=GPU_REGS.GET_DB_INFO; + end; + + rt_info.BLEND_INFO:=GPU_REGS.GET_BLEND_INFO; + + rt_info.PRIM_TYPE :=ord(GPU_REGS.GET_PRIM_TYPE); + rt_info.PRIM_RESET:=GPU_REGS.GET_PRIM_RESET; + + rt_info.VP_COUNT:=0; + + For i:=0 to 15 do + if GPU_REGS.VP_ENABLE(i) then + begin + rt_info.VPORT [rt_info.VP_COUNT]:=GPU_REGS.GET_VPORT(i); + rt_info.SCISSOR[rt_info.VP_COUNT]:=GPU_REGS.GET_SCISSOR(i) ; + + Inc(rt_info.VP_COUNT); + end; + + rt_info.RASTERIZATION:=GPU_REGS.GET_RASTERIZATION; + rt_info.MULTISAMPLE :=GPU_REGS.GET_MULTISAMPLE; + + rt_info.PROVOKING:=ord(GPU_REGS.GET_PROVOKING); + + rt_info.SCREEN_RECT:=GPU_REGS.GET_SCREEN; + rt_info.SCREEN_SIZE:=GPU_REGS.GET_SCREEN_SIZE; + +end; + +procedure t_pm4_stream.BuildDraw(ntype:t_pm4_node_type; + var SH_REG:TSH_REG_GROUP; + var CX_REG:TCONTEXT_REG_GROUP; + var UC_REG:TUSERCONFIG_REG_SHORT); +var + GPU_REGS:TGPU_REGS; + + node:p_pm4_node_draw; + +begin + GPU_REGS:=Default(TGPU_REGS); + GPU_REGS.SH_REG:=@SH_REG; + GPU_REGS.CX_REG:=@CX_REG; + GPU_REGS.UC_REG:=@UC_REG; + + node:=allocator.Alloc(SizeOf(t_pm4_node_draw)); + + node^.ntype :=ntype; + + Build_rt_info(node^.rt_info,GPU_REGS); + + node^.indexBase :=CX_REG.VGT_DMA_BASE or (QWORD(CX_REG.VGT_DMA_BASE_HI.BASE_ADDR) shl 32); + node^.indexOffset :=CX_REG.VGT_INDX_OFFSET; + node^.indexCount :=UC_REG.VGT_NUM_INDICES; + node^.numInstances:=UC_REG.VGT_NUM_INSTANCES; + + node^.INDEX_TYPE:=ord(GPU_REGS.GET_INDEX_TYPE); + node^.SWAP_MODE :=CX_REG.VGT_DMA_INDEX_TYPE.SWAP_MODE; + + add_node(node); +end; + +procedure t_pm4_stream.DrawIndex2(var SH_REG:TSH_REG_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT); -var - node:p_pm4_node_DrawIndex2; begin if ColorControl(CX_REG) then Exit; - node:=allocator.Alloc(SizeOf(t_pm4_node_DrawIndex2)); - - node^.ntype :=ntDrawIndex2; - node^.addr :=addr; - node^.SH_REG:=SH_REG; - node^.CX_REG:=CX_REG; - node^.UC_REG:=UC_REG; - - add_node(node); + BuildDraw(ntDrawIndex2,SH_REG,CX_REG,UC_REG); end; procedure t_pm4_stream.DrawIndexAuto(var SH_REG:TSH_REG_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT); -var - node:p_pm4_node_DrawIndexAuto; begin if ColorControl(CX_REG) then Exit; - node:=allocator.Alloc(SizeOf(t_pm4_node_DrawIndexAuto)); - - node^.ntype :=ntDrawIndexAuto; - node^.SH_REG:=SH_REG; - node^.CX_REG:=CX_REG; - node^.UC_REG:=UC_REG; - - add_node(node); + BuildDraw(ntDrawIndexAuto,SH_REG,CX_REG,UC_REG); end; procedure t_pm4_stream.DispatchDirect(var SH_REG:TSH_REG_GROUP); diff --git a/vulkan/vCmdBuffer.pas b/vulkan/vCmdBuffer.pas index 07086929..8472084d 100644 --- a/vulkan/vCmdBuffer.pas +++ b/vulkan/vCmdBuffer.pas @@ -129,9 +129,9 @@ type //Procedure dmaData(src:DWORD;dst:Pointer;byteCount:DWORD;isBlocking:Boolean); //Procedure writeAtEndOfShader(eventType:Byte;dst:Pointer;value:DWORD); - Procedure DrawIndexOffset2(Addr:Pointer;OFFSET,INDICES:DWORD); - Procedure DrawIndex2(Addr:Pointer;INDICES:DWORD); - Procedure DrawIndexAuto(INDICES:DWORD); + Procedure DrawIndexOffset2(IndexBase:Pointer;indexOffset,indexCount:DWORD); + Procedure DrawIndex2(IndexBase:Pointer;indexCount:DWORD); + Procedure DrawIndexAuto(indexCount:DWORD); end; implementation @@ -879,8 +879,9 @@ begin end; end; -Procedure TvCmdBuffer.DrawIndexOffset2(Addr:Pointer;OFFSET,INDICES:DWORD); +Procedure TvCmdBuffer.DrawIndexOffset2(IndexBase:Pointer;indexOffset,indexCount:DWORD); var + Addr:Pointer; rb:TvHostBuffer; Size:TVkDeviceSize; BufOffset:TVkDeviceSize; @@ -893,7 +894,13 @@ begin if (FinstanceCount=0) then FinstanceCount:=1; - Size:=(OFFSET+INDICES)*GET_INDEX_TYPE_SIZE(FINDEX_TYPE); + Size:=(indexOffset+indexCount)*GET_INDEX_TYPE_SIZE(FINDEX_TYPE); + + Addr:=nil; + if not get_dmem_ptr(IndexBase,@Addr,nil) then + begin + Assert(false,'addr:0x'+HexStr(IndexBase)+' not in dmem!'); + end; rb:=FetchHostBuffer(Self,QWORD(Addr),Size,ord(VK_BUFFER_USAGE_INDEX_BUFFER_BIT)); Assert(rb<>nil); @@ -915,17 +922,17 @@ begin begin vkCmdDrawIndexed( Fcmdbuf, - INDICES, //indexCount + indexCount, //indexCount FinstanceCount, //instanceCount - OFFSET, //firstIndex + indexOffset, //firstIndex 0, //vertexOffset 0); //firstInstance end; DI_PT_QUADLIST: begin Assert(FinstanceCount<=1,'instance DI_PT_QUADLIST'); - Assert(OFFSET=0,'OFFSET DI_PT_QUADLIST'); - h:=INDICES div 4; + Assert(indexOffset=0,'OFFSET DI_PT_QUADLIST'); + h:=indexCount div 4; if (h>0) then h:=h-1; For i:=0 to h do begin @@ -944,12 +951,12 @@ begin end; -Procedure TvCmdBuffer.DrawIndex2(Addr:Pointer;INDICES:DWORD); +Procedure TvCmdBuffer.DrawIndex2(IndexBase:Pointer;indexCount:DWORD); begin - DrawIndexOffset2(Addr,0,INDICES); + DrawIndexOffset2(IndexBase,0,indexCount); end; -Procedure TvCmdBuffer.DrawIndexAuto(INDICES:DWORD); +Procedure TvCmdBuffer.DrawIndexAuto(indexCount:DWORD); var i,h:DWORD; begin @@ -965,7 +972,7 @@ begin begin vkCmdDraw( FCmdbuf, - INDICES, //vertexCount + indexCount, //vertexCount FinstanceCount, //instanceCount 0, //firstVertex 0); //firstInstance @@ -983,7 +990,7 @@ begin //0 1 2 //0 2 3 - h:=INDICES div 3; + h:=indexCount div 3; if (h>0) then h:=h-1; For i:=0 to h do begin @@ -1001,7 +1008,7 @@ begin DI_PT_QUADLIST: begin Assert(FinstanceCount<=1,'instance DI_PT_QUADLIST'); - h:=INDICES div 4; + h:=indexCount div 4; if (h>0) then h:=h-1; For i:=0 to h do begin diff --git a/vulkan/vRegs2Vulkan.pas b/vulkan/vRegs2Vulkan.pas index 59bec2e0..666c9e78 100644 --- a/vulkan/vRegs2Vulkan.pas +++ b/vulkan/vRegs2Vulkan.pas @@ -76,6 +76,8 @@ type PCONTEXT_REG_GROUP =^TCONTEXT_REG_GROUP; PUSERCONFIG_REG_SHORT=^TUSERCONFIG_REG_SHORT; + PGPU_USERDATA=^TGPU_USERDATA; + PGPU_REGS=^TGPU_REGS; TGPU_REGS=packed object SH_REG:PSH_REG_GROUP; // 0x2C00 @@ -111,6 +113,12 @@ type Function get_code_addr(FStage:TvShaderStage):Pointer; Function get_user_data(FStage:TvShaderStage):Pointer; + procedure export_user_data(dst:PGPU_USERDATA); + end; + + TGPU_USERDATA=packed object + A:array[TvShaderStage] of TSPI_USER_DATA; + Function get_user_data(FStage:TvShaderStage):Pointer; end; function GET_INDEX_TYPE_SIZE(i:TVkIndexType):Byte; @@ -1476,6 +1484,22 @@ begin end; end; +procedure TGPU_REGS.export_user_data(dst:PGPU_USERDATA); +begin + dst^.A[vShaderStageLs]:=SH_REG^.SPI_SHADER_USER_DATA_LS; + dst^.A[vShaderStageHs]:=SH_REG^.SPI_SHADER_USER_DATA_HS; + dst^.A[vShaderStageEs]:=SH_REG^.SPI_SHADER_USER_DATA_ES; + dst^.A[vShaderStageGs]:=SH_REG^.SPI_SHADER_USER_DATA_GS; + dst^.A[vShaderStageVs]:=SH_REG^.SPI_SHADER_USER_DATA_VS; + dst^.A[vShaderStagePs]:=SH_REG^.SPI_SHADER_USER_DATA_PS; + dst^.A[vShaderStageCs]:=SH_REG^.COMPUTE_USER_DATA; +end; + +Function TGPU_USERDATA.get_user_data(FStage:TvShaderStage):Pointer; +begin + Result:=@A[FStage]; +end; + /// function _get_vsharp_cformat(PV:PVSharpResource4):TVkFormat; diff --git a/vulkan/vShaderExt.pas b/vulkan/vShaderExt.pas index d4e26081..f4fb5125 100644 --- a/vulkan/vShaderExt.pas +++ b/vulkan/vShaderExt.pas @@ -215,8 +215,8 @@ type FLayout:TvPipelineLayout; Procedure Clear; Function Compile:Boolean; - Procedure ExportAttrBuilder(var AttrBuilder:TvAttrBuilder;var GPU_REGS:TGPU_REGS); - Procedure ExportUnifBuilder(var UniformBuilder:TvUniformBuilder;var GPU_REGS:TGPU_REGS); + Procedure ExportAttrBuilder(var AttrBuilder :TvAttrBuilder ;GPU_USERDATA:PGPU_USERDATA); + Procedure ExportUnifBuilder(var UniformBuilder:TvUniformBuilder;GPU_USERDATA:PGPU_USERDATA); end; function GetSharpByPatch(pData:Pointer;const addr:ADataLayout):Pointer; @@ -1203,18 +1203,18 @@ begin Result:=(FLayout<>nil); end; -Procedure TvShaderGroup.ExportAttrBuilder(var AttrBuilder:TvAttrBuilder;var GPU_REGS:TGPU_REGS); +Procedure TvShaderGroup.ExportAttrBuilder(var AttrBuilder:TvAttrBuilder;GPU_USERDATA:PGPU_USERDATA); var Shader:TvShaderExt; begin Shader:=FKey.FShaders[vShaderStageVs]; if (Shader<>nil) then begin - Shader.EnumVertLayout(@AttrBuilder.AddAttr,Shader.FDescSetId,GPU_REGS.get_user_data(vShaderStageVs)) + Shader.EnumVertLayout(@AttrBuilder.AddAttr,Shader.FDescSetId,GPU_USERDATA^.get_user_data(vShaderStageVs)) end; end; -Procedure TvShaderGroup.ExportUnifBuilder(var UniformBuilder:TvUniformBuilder;var GPU_REGS:TGPU_REGS); +Procedure TvShaderGroup.ExportUnifBuilder(var UniformBuilder:TvUniformBuilder;GPU_USERDATA:PGPU_USERDATA); var Shader:TvShaderExt; i:TvShaderStage; @@ -1224,7 +1224,7 @@ begin Shader:=FKey.FShaders[i]; if (Shader<>nil) then begin - Shader.EnumUnifLayout(@UniformBuilder.AddAttr,Shader.FDescSetId,GPU_REGS.get_user_data(i)); + Shader.EnumUnifLayout(@UniformBuilder.AddAttr,Shader.FDescSetId,GPU_USERDATA^.get_user_data(i)); end; end; end; diff --git a/vulkan/vShaderManager.pas b/vulkan/vShaderManager.pas index e9ba54ad..acddd296 100644 --- a/vulkan/vShaderManager.pas +++ b/vulkan/vShaderManager.pas @@ -72,7 +72,8 @@ type function FetchShader(FStage:TvShaderStage;FDescSetId:Integer;var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderExt; function FetchShaderGroup(F:PvShadersKey):TvShaderGroup; -function FetchShaderGroup(var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderGroup; +function FetchShaderGroupRT(var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderGroup; +function FetchShaderGroupCS(var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderGroup; implementation @@ -577,7 +578,7 @@ begin FShaderGroupSet.Unlock_wr; end; -function FetchShaderGroup(var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderGroup; +function FetchShaderGroupRT(var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderGroup; var FShadersKey:TvShadersKey; i:TvShaderStage; @@ -600,5 +601,16 @@ begin Result:=FetchShaderGroup(@FShadersKey); end; +function FetchShaderGroupCS(var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderGroup; +var + FShadersKey:TvShadersKey; +begin + FShadersKey:=Default(TvShadersKey); + + FShadersKey.FShaders[vShaderStageCs]:=FetchShader(vShaderStageCs,0,GPU_REGS,pc); + + Result:=FetchShaderGroup(@FShadersKey); +end; + end.