From 944538ef425ef5dc70358fcbfb2baf776f74f364 Mon Sep 17 00:00:00 2001 From: Pavel <68122101+red-prig@users.noreply.github.com> Date: Thu, 8 Aug 2024 09:54:48 +0300 Subject: [PATCH] + --- chip/pm4_me.pas | 49 +++++++++++++++-- chip/pm4_pfp.pas | 56 ++++++++++++++----- chip/pm4_stream.pas | 116 +++++++++++++++++++++++++++++++++++++++ chip/pm4defs.pas | 2 + sys/dev/dev_dce.pas | 4 ++ sys/dev/dev_gc.pas | 18 ++++-- sys/kern/kern_synch.pas | 2 +- vulkan/vImageManager.pas | 54 +++++++++++++++++- 8 files changed, 272 insertions(+), 29 deletions(-) diff --git a/chip/pm4_me.pas b/chip/pm4_me.pas index 136681ac..70c89db4 100644 --- a/chip/pm4_me.pas +++ b/chip/pm4_me.pas @@ -174,7 +174,8 @@ uses kern_dmem, kern_proc, vm_map, - vm_tracking_map; + vm_tracking_map, + dev_dce; procedure StartFrameCapture; begin @@ -1818,10 +1819,16 @@ var addr_dmem:Pointer; data_size:Byte; begin - ctx.InsertLabel(PChar('WriteEop:0x'+HexStr(QWORD(node^.addr),10))); + if not ctx.stream^.hint_repeat then + begin + ctx.InsertLabel(PChar('WriteEop:0x'+HexStr(QWORD(node^.addr),10))); + ctx.stream^.hint_repeat:=True; + end; if not ctx.WaitConfirmOrSwitch then Exit; + ctx.stream^.hint_repeat:=False; + curr:=md_rdtsc_unit; diff:=curr-ctx.rel_time; @@ -1892,10 +1899,16 @@ procedure pm4_SubmitFlipEop(var ctx:t_me_render_context;node:p_pm4_node_SubmitFl var curr:QWORD; begin - ctx.InsertLabel('SubmitFlipEop'); + if not ctx.stream^.hint_repeat then + begin + ctx.InsertLabel(PChar('SubmitFlipEop:0x'+HexStr(node^.eop_value,16))); + ctx.stream^.hint_repeat:=True; + end; if not ctx.WaitConfirmOrSwitch then Exit; + ctx.stream^.hint_repeat:=False; + if (ctx.me^.on_submit_flip_eop<>nil) then begin ctx.me^.on_submit_flip_eop(node^.eop_value); @@ -1923,10 +1936,16 @@ var addr_dmem:Pointer; data_size:Byte; begin - ctx.InsertLabel(PChar('ReleaseMem:0x'+HexStr(QWORD(node^.addr),10))); + if not ctx.stream^.hint_repeat then + begin + ctx.InsertLabel(PChar('ReleaseMem:0x'+HexStr(QWORD(node^.addr),10))); + ctx.stream^.hint_repeat:=True; + end; if not ctx.WaitConfirmOrSwitch then Exit; + ctx.stream^.hint_repeat:=False; + curr:=md_rdtsc_unit; diff:=curr-ctx.rel_time; @@ -2223,6 +2242,17 @@ begin end; +function get_dce_label_id(addr_dmem:Pointer):Integer; +begin + Result:=-1; + + if (QWORD(addr_dmem)>=QWORD(@dev_dce.dce_page^.labels) ) and + (QWORD(addr_dmem)< QWORD(@dev_dce.dce_page^.label_)+8) then + begin + Result:=(QWORD(addr_dmem)-QWORD(@dev_dce.dce_page^.labels)) div 8; + end; +end; + Function me_test_mem(node:p_pm4_node_WaitRegMem):Boolean; var addr_dmem:Pointer; @@ -2233,6 +2263,8 @@ begin Assert(false,'addr:0x'+HexStr(node^.pollAddr)+' not in dmem!'); end; + //Writeln('me_test_mem:',get_dce_label_id(addr_dmem),' ',node^.refValue); + val:=PDWORD(addr_dmem)^ and node^.mask; ref:=node^.refValue; Case node^.compareFunc of @@ -2250,11 +2282,16 @@ end; procedure pm4_WaitRegMem(var ctx:t_me_render_context;node:p_pm4_node_WaitRegMem); begin - - ctx.InsertLabel(PChar('WaitRegMem:0x'+HexStr(QWORD(node^.pollAddr),10))); + if not ctx.stream^.hint_repeat then + begin + ctx.InsertLabel(PChar('WaitRegMem:0x'+HexStr(QWORD(node^.pollAddr),10))); + ctx.stream^.hint_repeat:=True; + end; if not ctx.WaitConfirmOrSwitch then Exit; + ctx.stream^.hint_repeat:=False; + if not me_test_mem(node) then begin ctx.switch_task; diff --git a/chip/pm4_pfp.pas b/chip/pm4_pfp.pas index f8bfd38a..0c02ccdc 100644 --- a/chip/pm4_pfp.pas +++ b/chip/pm4_pfp.pas @@ -1085,7 +1085,7 @@ begin Writeln(' interrupt =0x',HexStr(Body^.intSel,2)); Writeln(' srcSelector=0x',HexStr(Body^.dataSel,2)); - Writeln(' dstGpuAddr =0x',HexStr(Body^.address,16)); + Writeln(' dstGpuAddr =0x',HexStr(Body^.address,10)); Writeln(' immValue =0x',HexStr(Body^.DATA,16)); end; @@ -1126,6 +1126,9 @@ begin pctx^.stream[stGfxDcb].EventWriteEos(Pointer(Body^.address),Body^.data,Body^.eventType,Body^.command); end; +const + engine_str:array[0..3] of RawByteString=('ME','PFP','CE','3'); + procedure onDmaData(pctx:p_pfp_ctx;Body:PPM4DMADATA); var adrSrc:QWORD; @@ -1197,7 +1200,7 @@ begin end; else - Assert(false,'DmaData: engine=0x'+HexStr(Body^.Flags1.engine,1)); + Assert(false,'DmaData: engine='+engine_str[Body^.Flags1.engine]); end; end; @@ -1214,6 +1217,20 @@ var begin Assert(Body^.CONTROL.wrOneAddr=0,'WriteData: wrOneAddr<>0'); + if p_print_gpu_ops then + begin + Writeln(' engine =',engine_str[Body^.CONTROL.engineSel]); + Writeln(' dstSel =',Body^.CONTROL.dstSel,' ',Body^.CONTROL.wrConfirm); + Writeln(' dstAddr =0x',HexStr(Body^.dstAddr,10)); + Writeln(' length =',(Body^.header.count-2)*4); + + case Body^.header.count of + 3:Writeln(' data =0x',HexStr(PDWORD(@Body^.DATA)^,8)); + 4:Writeln(' data =0x',HexStr(PQWORD(@Body^.DATA)^,16)); + else; + end; + end; + count:=Body^.header.count; if (count<3) then Exit; @@ -1260,7 +1277,7 @@ begin end; else - Assert(false,'WriteData: engineSel=0x'+HexStr(engineSel,1)); + Assert(false,'WriteData: engineSel='+engine_str[engineSel]); end; end; @@ -1268,6 +1285,19 @@ end; procedure onWaitRegMem(pctx:p_pfp_ctx;Body:PPM4CMDWAITREGMEM); begin + if p_print_gpu_ops then + begin + Writeln(' engine =',engine_str[Body^.engine]); + Writeln(' memSpace =',Body^.memSpace); + Writeln(' operation =',Body^.operation); + Writeln(' pollAddress=0x',HexStr(Body^.pollAddress,10)); + Writeln(' reference =0x',HexStr(Body^.reference,8)); + Writeln(' mask =0x',HexStr(Body^.mask,8)); + Writeln(' compareFunc=0x',HexStr(Body^.compareFunc,1)); + end; + + Assert(Body^.operation=0,'WaitRegMem: operation=0x'+HexStr(Body^.operation,1)); + Case Body^.memSpace of WAIT_REG_MEM_SPACE_MEMORY:; else @@ -1281,10 +1311,10 @@ begin end; WAIT_REG_MEM_ENGINE_PFP: begin - Assert(false,'WaitRegMem: engine=0x'+HexStr(Body^.engine,1)); + Assert(false,'WaitRegMem: engine='+engine_str[Body^.engine]); end; else - Assert(false,'WaitRegMem: engine=0x'+HexStr(Body^.engine,1)); + Assert(false,'WaitRegMem: engine='+engine_str[Body^.engine]); end; end; @@ -1411,7 +1441,7 @@ begin pctx^.set_reg(r,v); end; // - pctx^.LastSetReg:=CONFIG_SPACE_START+c-1; + pctx^.LastSetReg:=CONFIG_SPACE_START+Body^.REG_OFFSET+c-1; end; end; @@ -1435,13 +1465,13 @@ begin // if p_print_gpu_ops then begin - Writeln(' SET:',getRegName(r+$A000),':=0x',HexStr(v,8)); + Writeln(' SET:',getRegName(r+CONTEXT_REG_BASE),':=0x',HexStr(v,8)); end; // pctx^.set_ctx_reg(r,v); end; // - pctx^.LastSetReg:=CONTEXT_REG_BASE+c-1; + pctx^.LastSetReg:=CONTEXT_REG_BASE+Body^.REG_OFFSET+c-1; end; end; @@ -1465,13 +1495,13 @@ begin // if p_print_gpu_ops then begin - Writeln(' SET:',getRegName(r+$2C00),':=0x',HexStr(v,8)); + Writeln(' SET:',getRegName(r+SH_REG_BASE),':=0x',HexStr(v,8)); end; // pctx^.set_sh_reg(r,v); end; // - pctx^.LastSetReg:=SH_REG_BASE+c-1; + pctx^.LastSetReg:=SH_REG_BASE+Body^.REG_OFFSET+c-1; end; end; @@ -1501,7 +1531,7 @@ begin pctx^.set_reg(r,v); end; // - pctx^.LastSetReg:=USERCONFIG_REG_BASE+c-1; + pctx^.LastSetReg:=USERCONFIG_REG_BASE+Body^.REG_OFFSET+c-1; end; end; @@ -1720,7 +1750,7 @@ begin mmCB_COLOR7_DCC_BASE, mmDB_STENCIL_CLEAR, - mmDB_RENDER_CONTROL, + //mmDB_RENDER_CONTROL, mmDB_HTILE_SURFACE: begin @@ -1938,7 +1968,7 @@ begin Writeln(' interrupt =0x',HexStr(Body^.intSel,2)); Writeln(' srcSelector=0x',HexStr(Body^.dataSel,2)); Writeln(' dstSelector=0x',HexStr(Body^.dstSel,2)); - Writeln(' dstGpuAddr =0x',HexStr(Body^.address,16)); + Writeln(' dstGpuAddr =0x',HexStr(Body^.address,10)); Writeln(' immValue =0x',HexStr(Body^.data,16)); end; diff --git a/chip/pm4_stream.pas b/chip/pm4_stream.pas index 9f94bbad..d971aa63 100644 --- a/chip/pm4_stream.pas +++ b/chip/pm4_stream.pas @@ -370,6 +370,7 @@ type buft:t_pm4_stream_type; // init:Boolean; + hint_repeat:Boolean; // curr:p_pm4_node; // @@ -912,6 +913,11 @@ begin node^.num_dw:=num_dw; node^.offset:=offset; + insert_buffer_resource(@node^.scope, + addr, + num_dw*SizeOf(DWORD), + TM_READ); + add_node(node); end; @@ -956,6 +962,20 @@ end; procedure t_pm4_stream.EventWriteEop(addr:Pointer;data:QWORD;eventType,dataSel,intSel:Byte); var node:p_pm4_node_EventWriteEop; + + function get_data_size:DWORD; inline; + begin + Result:=0; + // + Case dataSel of + EVENTWRITEEOP_DATA_SEL_SEND_DATA32 :Result:=4; + EVENTWRITEEOP_DATA_SEL_SEND_DATA64 :Result:=8; + EVENTWRITEEOP_DATA_SEL_SEND_GPU_CLOCK :Result:=8; + EVENTWRITEEOP_DATA_SEL_SEND_CP_PERFCOUNTER:Result:=8; + else; + end; + end; + begin node:=allocator.Alloc(SizeOf(t_pm4_node_EventWriteEop)); @@ -967,12 +987,31 @@ begin node^.dataSel :=dataSel; node^.intSel :=intSel; + if (addr<>nil) then + begin + insert_buffer_resource(@node^.scope, + addr, + get_data_size, + TM_WRITE); + end; + add_node(node); end; procedure t_pm4_stream.EventWriteEos(addr:Pointer;data:DWORD;eventType,command:Byte); var node:p_pm4_node_EventWriteEos; + + function get_data_size:DWORD; inline; + begin + Result:=0; + // + Case command of + EVENT_WRITE_EOS_CMD_STORE_32BIT_DATA_TO_MEMORY:Result:=4; + else; + end; + end; + begin node:=allocator.Alloc(SizeOf(t_pm4_node_EventWriteEos)); @@ -983,6 +1022,14 @@ begin node^.eventType:=eventType; node^.command :=command; + if (addr<>nil) then + begin + insert_buffer_resource(@node^.scope, + addr, + get_data_size, + TM_WRITE); + end; + add_node(node); end; @@ -1003,6 +1050,20 @@ end; procedure t_pm4_stream.ReleaseMem(addr:Pointer;data:QWORD;eventType,srcSel,dstSel,intSel:Byte); var node:p_pm4_node_ReleaseMem; + + function get_data_size:DWORD; inline; + begin + Result:=0; + // + Case srcSel of + RELEASEMEM_DATA_SEL_SEND_DATA32 :Result:=4; + RELEASEMEM_DATA_SEL_SEND_DATA64 :Result:=8; + RELEASEMEM_DATA_SEL_SEND_GPU_CLOCK :Result:=8; + RELEASEMEM_DATA_SEL_SEND_CP_PERFCOUNTER:Result:=8; + else; + end; + end; + begin node:=allocator.Alloc(SizeOf(t_pm4_node_ReleaseMem)); @@ -1015,6 +1076,14 @@ begin node^.dstSel :=dstSel; node^.intSel :=intSel; + if (addr<>nil) then + begin + insert_buffer_resource(@node^.scope, + addr, + get_data_size, + TM_WRITE); + end; + add_node(node); end; @@ -1033,6 +1102,31 @@ begin node^.dstSel :=dstSel; node^.cpSync :=isBlocking; + case srcSel of + kDmaDataSrcMemory, + kDmaDataSrcMemoryUsingL2: + if (srcOrData<>0) then + begin + insert_buffer_resource(@node^.scope, + Pointer(srcOrData), + numBytes, + TM_READ); + end; + else; + end; + + case dstSel of + kDmaDataDstMemory, + kDmaDataDstMemoryUsingL2: + if (dst<>0) then + begin + insert_buffer_resource(@node^.scope, + Pointer(dst), + numBytes, + TM_WRITE); + end; + end; + add_node(node); end; @@ -1056,6 +1150,28 @@ begin //Move(src^,node^.src^,num_dw*SizeOf(DWORD)); + if (src<>nil) then + begin + insert_buffer_resource(@node^.scope, + src, + num_dw*SizeOf(DWORD), + TM_READ); + end; + + case dstSel of + WRITE_DATA_DST_SEL_MEMORY_SYNC, + WRITE_DATA_DST_SEL_TCL2, + WRITE_DATA_DST_SEL_MEMORY_ASYNC: + if (dst<>nil) then + begin + insert_buffer_resource(@node^.scope, + Pointer(dst), + num_dw*SizeOf(DWORD), + TM_WRITE); + end; + else; + end; + add_node(node); end; diff --git a/chip/pm4defs.pas b/chip/pm4defs.pas index c715d943..c4a7857b 100644 --- a/chip/pm4defs.pas +++ b/chip/pm4defs.pas @@ -146,6 +146,7 @@ const OP_HINT_SET_SSHARP_IN_USER_DATA =$68750006; OP_HINT_SET_USER_DATA_REGION =$6875000D; OP_HINT_BASE_MARK_DISPATCH_DRAW_ACB_ADDRESS =$68750012; + OP_HINT_PREPARE_FLIP =$68750776; OP_HINT_PREPARE_FLIP_VOID =$68750777; OP_HINT_PREPARE_FLIP_LABEL =$68750778; OP_HINT_PREPARE_FLIP_WITH_EOP_INTERRUPT_VOID =$68750780; @@ -1025,6 +1026,7 @@ begin OP_HINT_SET_SSHARP_IN_USER_DATA :Result:='SET_SSHARP_IN_USER_DATA'; OP_HINT_SET_USER_DATA_REGION :Result:='SET_USER_DATA_REGION'; OP_HINT_BASE_MARK_DISPATCH_DRAW_ACB_ADDRESS :Result:='BASE_MARK_DISPATCH_DRAW_ACB_ADDRESS'; + OP_HINT_PREPARE_FLIP :Result:='PREPARE_FLIP'; OP_HINT_PREPARE_FLIP_VOID :Result:='PREPARE_FLIP_VOID'; OP_HINT_PREPARE_FLIP_LABEL :Result:='PREPARE_FLIP_LABEL'; OP_HINT_PREPARE_FLIP_WITH_EOP_INTERRUPT_VOID :Result:='PREPARE_FLIP_WITH_EOP_INTERRUPT_VOID'; diff --git a/sys/dev/dev_dce.pas b/sys/dev/dev_dce.pas index 381aba73..055501db 100644 --- a/sys/dev/dev_dce.pas +++ b/sys/dev/dev_dce.pas @@ -1008,6 +1008,8 @@ begin submit_eop:=(QWORD(f_eop_count) shl 32) or QWORD($ff00a5a5); + Writeln('submit_eop=0x',HexStr(submit_eop,16)); + f_eop_count:=f_eop_count+1; Result:=dce_handle.SubmitFlipEop(@submit,submit_eop); @@ -1020,6 +1022,8 @@ begin mtx_unlock(dce_mtx); + //print_backtrace_td(stderr); + Writeln('submit_flip: ','bufferIndex=',data^.bufferIndex,' ', 'flipMode=',data^.flipMode,' ', 'flipArg=','0x',HexStr(data^.flipArg,16),' ', diff --git a/sys/dev/dev_gc.pas b/sys/dev/dev_gc.pas index 6b82f142..ea0bc9fc 100644 --- a/sys/dev/dev_gc.pas +++ b/sys/dev/dev_gc.pas @@ -171,15 +171,16 @@ var //asc_queues +//gfx ring only procedure onEventWriteEop(pctx:p_pfp_ctx;Body:PPM4CMDEVENTWRITEEOP); var - submit_id:DWORD; + submit_id:QWORD; begin submit_id:=Body^.DATA; if p_print_gpu_ops then begin - Writeln('submit_eop_flip=0x',HexStr(submit_id,8)); + Writeln('[R]IT_EVENT_WRITE_EOP=0x',HexStr(submit_id,16),' ',Body^.intSel); end; pctx^.stream[stGfxDcb].SubmitFlipEop(Body^.DATA,Body^.intSel); @@ -197,7 +198,7 @@ begin begin if p_print_gpu_ops then begin - Writeln('INDIRECT_BUFFER (ccb) 0x',HexStr(PPM4CMDINDIRECTBUFFER(buff)^.ibBase,10)); + Writeln('[R]INDIRECT_BUFFER (ccb) 0x',HexStr(PPM4CMDINDIRECTBUFFER(buff)^.ibBase,10)); end; if pm4_ibuf_init(@ibuf,buff,@pm4_parse_ccb,stGfxCcb) then begin @@ -214,7 +215,7 @@ begin begin if p_print_gpu_ops then begin - Writeln('INDIRECT_BUFFER (dcb) 0x',HexStr(PPM4CMDINDIRECTBUFFER(buff)^.ibBase,10)); + Writeln('[R]INDIRECT_BUFFER (dcb) 0x',HexStr(PPM4CMDINDIRECTBUFFER(buff)^.ibBase,10)); end; if pm4_ibuf_init(@ibuf,buff,@pm4_parse_dcb,stGfxDcb) then begin @@ -231,14 +232,17 @@ begin begin if p_print_gpu_ops then begin - Writeln('SWITCH_BUFFER'); + Writeln('[R]SWITCH_BUFFER'); end; end; $C0044700: //IT_EVENT_WRITE_EOP begin onEventWriteEop(pctx,buff); end; - else; + else + begin + Assert(False); + end; end; end; @@ -827,6 +831,8 @@ begin now this is directly sended "eop_v" } + Writeln('submit_eop=0x',HexStr(p_submit_args(data)^.eop_v,16),' ',p_submit_args(data)^.wait); + Result:=gc_pm4_event_write_eop(@ring_gfx, nil, p_submit_args(data)^.eop_v, diff --git a/sys/kern/kern_synch.pas b/sys/kern/kern_synch.pas index c8f98948..4caf81f1 100644 --- a/sys/kern/kern_synch.pas +++ b/sys/kern/kern_synch.pas @@ -103,7 +103,7 @@ begin sleepq_set_timeout(ident,timo); end; - if (timo<>catch) then + if (timo<>0) and (catch<>0) then Result:=sleepq_timedwait_sig(ident,pri) else if (timo<>0) then Result:=sleepq_timedwait(ident,pri) diff --git a/vulkan/vImageManager.pas b/vulkan/vImageManager.pas index 5ba21575..75440f0b 100644 --- a/vulkan/vImageManager.pas +++ b/vulkan/vImageManager.pas @@ -887,7 +887,6 @@ begin // Result.StencilOnly:=TvChildImage2.Create; Result.StencilOnly.key :=GetStencilOnly(F); - Result.StencilOnly.Parent:=Result; end; //depth VK_FORMAT_D16_UNORM, @@ -898,7 +897,6 @@ begin Result.key :=F; Result.FUsage:=usage; // - Result.Parent :=Result; Result.DepthOnly:=Result; end; //depth stencil @@ -1004,6 +1002,56 @@ begin Result:=True; end; +procedure _SetName(t:TvCustomImage2); +var + ch:Char; +begin + + Case t.key.cformat of + //stencil + VK_FORMAT_S8_UINT: + begin + Ch:='S'; + end; + //depth + VK_FORMAT_D16_UNORM, + VK_FORMAT_X8_D24_UNORM_PACK32, + VK_FORMAT_D32_SFLOAT: + begin + Ch:='D'; + end; + //depth stencil + VK_FORMAT_D16_UNORM_S8_UINT, + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_D32_SFLOAT_S8_UINT: + begin + Ch:='X'; + end; + else + begin + if (t.key.params.cube<>0) then + begin + Ch:='C'; + end else + if (t.key.params.arrayLayers>1) then + begin + Ch:='A'; + end else + begin + Ch:='I'; + end; + end; + end; + + t.SetObjectName(Ch+'_0x'+HexStr(QWORD(t.key.Addr),10)+ + '_'+IntToStr(t.key.params.width)+'x'+IntToStr(t.key.params.height)+ + '_m'+IntToStr(t.key.params.mipLevels)+ + '_a'+IntToStr(t.key.params.arrayLayers)+ + '_t'+IntToStr(t.key.params.tiling.idx)+'|'+IntToStr(t.key.params.tiling.alt) + ); + +end; + function _FetchImage(const F:TvImageKey;usage:s_image_usage):TvImage2; label _repeat; @@ -1039,7 +1087,7 @@ begin end else begin - t.SetObjectName('I_0x'+HexStr(QWORD(F.Addr),10)+'_'+IntToStr(F.params.width)+'x'+IntToStr(F.params.height)); + _SetName(t); Fdevc:=MemManager.FetchMemory( t.GetRequirements,