diff --git a/chip/pm4_me.pas b/chip/pm4_me.pas index 94d5d116..f63dcbd3 100644 --- a/chip/pm4_me.pas +++ b/chip/pm4_me.pas @@ -500,12 +500,7 @@ begin if (Cmd<>nil) then Exit; //Already allocated buft:=stream^.buft; - - if (buft<>stGfxDcb) and - (buft<>stGfxCcb) then - begin - Assert(false,'TODO'); - end; + //Select Vulkan compute only queue? imdone_count:=me^.imdone_count; @@ -1913,6 +1908,94 @@ begin //ctx.on_idle; end; +function get_compute_pipe_id(buft:t_pm4_stream_type):Byte; inline; +begin + Result:=ord(buft) - ord(stCompute0); +end; + +procedure pm4_ReleaseMem(var ctx:t_me_render_context;node:p_pm4_node_ReleaseMem); +var + curr,diff:QWORD; + addr_dmem:Pointer; + data_size:Byte; +begin + ctx.InsertLabel(PChar('ReleaseMem:0x'+HexStr(QWORD(node^.addr),10))); + + if not ctx.WaitConfirmOrSwitch then Exit; + + curr:=md_rdtsc_unit; + diff:=curr-ctx.rel_time; + + if (node^.addr<>nil) then + begin + if (node^.srcSel<>RELEASEMEM_DATA_SEL_DISCARD) then + begin + if not get_dmem_ptr(node^.addr,@addr_dmem,nil) then + begin + Assert(false,'addr:0x'+HexStr(node^.addr)+' not in dmem!'); + end; + end; + + Case node^.dstSel of + RELEASEMEM_DST_SEL_MEMORY:; + RELEASEMEM_DST_SEL_L2 :Assert(false,'RELEASEMEM_DST_SEL_L2'); + else + Assert(false,'pm4_ReleaseMem:dstSel'); + end; + + Case node^.srcSel of + // + RELEASEMEM_DATA_SEL_DISCARD: + data_size:=0; + + //32bit data + RELEASEMEM_DATA_SEL_SEND_DATA32: + begin + PDWORD(addr_dmem)^:=node^.data; + + data_size:=4; + end; + + //64bit data + RELEASEMEM_DATA_SEL_SEND_DATA64: + begin + PQWORD(addr_dmem)^:=node^.data; + + data_size:=8; + end; + + //system 100Mhz global clock. (relative time) + RELEASEMEM_DATA_SEL_SEND_GPU_CLOCK: + begin + PQWORD(addr_dmem)^:=mul_div_u64(GLOBAL_CLOCK_FREQUENCY,UNIT_PER_SEC,diff); + + data_size:=8; + end; + + //GPU 800Mhz clock. (relative time) + RELEASEMEM_DATA_SEL_SEND_CP_PERFCOUNTER: + begin + PQWORD(addr_dmem)^:=mul_div_u64(GPU_CORE_CLOCK_FREQUENCY,UNIT_PER_SEC,diff); + + data_size:=8; + end; + + else + Assert(false,'pm4_ReleaseMem:srcSel'); + end; + + vm_map_track_trigger(p_proc.p_vmspace,QWORD(node^.addr),QWORD(node^.addr)+data_size,nil,M_DMEM_WRITE); + end; + + if (node^.intSel=RELEASEMEM_INT_SEL_SEND_INT) or + (node^.intSel=RELEASEMEM_INT_SEL_SEND_INT_ON_CONFIRM) then + begin + ctx.me^.knote_eventid(get_compute_pipe_id(ctx.stream^.buft),0,curr*NSEC_PER_UNIT,0); //(absolute time) (freq???) + end; + + //ctx.on_idle; +end; + procedure pm4_EventWrite(var ctx:t_me_render_context;node:p_pm4_node_EventWrite); begin @@ -2278,6 +2361,7 @@ begin ntEventWrite :pm4_EventWrite (ctx,Pointer(ctx.node)); ntEventWriteEop :pm4_EventWriteEop (ctx,Pointer(ctx.node)); ntSubmitFlipEop :pm4_SubmitFlipEop (ctx,Pointer(ctx.node)); + ntReleaseMem :pm4_ReleaseMem (ctx,Pointer(ctx.node)); ntEventWriteEos :pm4_EventWriteEos (ctx,Pointer(ctx.node)); ntWriteData :pm4_WriteData (ctx,Pointer(ctx.node)); ntDmaData :pm4_DmaData (ctx,Pointer(ctx.node)); diff --git a/chip/pm4_pfp.pas b/chip/pm4_pfp.pas index a740707c..061c6c44 100644 --- a/chip/pm4_pfp.pas +++ b/chip/pm4_pfp.pas @@ -77,13 +77,12 @@ function pm4_ibuf_init(ibuf:p_pm4_ibuffer; size:Ptruint; icb:t_pm4_parse_cb; buft:t_pm4_stream_type; - c_id:Byte):Boolean; + c_id:Byte=0):Boolean; function pm4_ibuf_init(ibuf:p_pm4_ibuffer; buf:PPM4CMDINDIRECTBUFFER; icb:t_pm4_parse_cb; - buft:t_pm4_stream_type; - c_id:Byte):Boolean; + buft:t_pm4_stream_type):Boolean; function pm4_ibuf_parse(pctx:p_pfp_ctx;ibuf:p_pm4_ibuffer):Integer; @@ -115,7 +114,7 @@ function pm4_ibuf_init(ibuf:p_pm4_ibuffer; size:Ptruint; icb:t_pm4_parse_cb; buft:t_pm4_stream_type; - c_id:Byte):Boolean; + c_id:Byte=0):Boolean; begin Result:=True; ibuf^.next:=Default(TAILQ_ENTRY); @@ -131,8 +130,7 @@ end; function pm4_ibuf_init(ibuf:p_pm4_ibuffer; buf:PPM4CMDINDIRECTBUFFER; icb:t_pm4_parse_cb; - buft:t_pm4_stream_type; - c_id:Byte):Boolean; + buft:t_pm4_stream_type):Boolean; var op:DWORD; ib_base:QWORD; @@ -165,13 +163,13 @@ begin //Writeln(' addr:0x'+HexStr(ib_base,16)+' '+HexStr(ib_size,16)); ibuf^.next:=Default(TAILQ_ENTRY); - ibuf^.base:=Pointer(ib_base); + ibuf^.base:=Pointer(ib_base); //adjust guest addr ibuf^.buff:=addr; ibuf^.size:=ib_size; ibuf^.bpos:=0; ibuf^.picb:=icb; ibuf^.buft:=buft; - ibuf^.c_id:=c_id; + ibuf^.c_id:=0; Result:=True; end; @@ -965,7 +963,11 @@ begin IT_LOAD_CONST_RAM:onLoadConstRam(pctx,buff); - else; + else + begin + Writeln(stderr,'PM4_TYPE_3.opcode:',get_op_name(PM4_TYPE_3_HEADER(token).opcode)); + Assert(False); + end; end; end; @@ -1158,8 +1160,6 @@ var engineSel:Byte; dstSel:Byte; begin - Assert(pctx^.stream_type=stGfxDcb); - Assert(Body^.CONTROL.wrOneAddr=0,'WriteData: wrOneAddr<>0'); count:=Body^.header.count; @@ -1180,10 +1180,10 @@ begin with pctx^.curr_ibuf^ do begin - src:=base+(QWORD(src_dmem)-QWORD(buff)); + src:=base+(Int64(src_dmem)-Int64(buff)); end; - pctx^.stream[stGfxDcb].WriteData(dstSel,dst,src,count,Body^.CONTROL.wrConfirm); + pctx^.stream[pctx^.stream_type].WriteData(dstSel,dst,src,count,Body^.CONTROL.wrConfirm); end; WRITE_DATA_ENGINE_PFP: begin @@ -1215,7 +1215,6 @@ end; procedure onWaitRegMem(pctx:p_pfp_ctx;Body:PPM4CMDWAITREGMEM); begin - Assert(pctx^.stream_type=stGfxDcb); Case Body^.memSpace of WAIT_REG_MEM_SPACE_MEMORY:; @@ -1226,7 +1225,7 @@ begin Case Body^.engine of WAIT_REG_MEM_ENGINE_ME: begin - pctx^.stream[stGfxDcb].WaitRegMem(Pointer(Body^.pollAddress),Body^.reference,Body^.mask,Body^.compareFunc); + pctx^.stream[pctx^.stream_type].WaitRegMem(Pointer(Body^.pollAddress),Body^.reference,Body^.mask,Body^.compareFunc); end; WAIT_REG_MEM_ENGINE_PFP: begin @@ -1778,7 +1777,7 @@ begin else begin - Writeln(stderr,'PM4_TYPE_3.opcode:0x',HexStr(PM4_TYPE_3_HEADER(token).opcode,2)); + Writeln(stderr,'PM4_TYPE_3.opcode:',get_op_name(PM4_TYPE_3_HEADER(token).opcode)); Assert(False); end; end; @@ -1925,12 +1924,14 @@ begin case PM4_TYPE_3_HEADER(token).opcode of IT_NOP :onNop (pctx,buff); + IT_WRITE_DATA :onWriteData (pctx,buff); IT_SET_SH_REG :onSetShRegCompute (pctx,buff); IT_DISPATCH_DIRECT :onDispatchDirectCompute(pctx,buff); IT_RELEASE_MEM :onReleaseMemCompute (pctx,buff); + IT_WAIT_REG_MEM :onWaitRegMem (pctx,buff); else begin - Writeln(stderr,'[ASC]PM4_TYPE_3.opcode:0x',HexStr(PM4_TYPE_3_HEADER(token).opcode,2)); + Writeln(stderr,'[ASC]PM4_TYPE_3.opcode:',get_op_name(PM4_TYPE_3_HEADER(token).opcode)); Assert(False); end; end; diff --git a/chip/pm4_ring.pas b/chip/pm4_ring.pas index 4e191be6..17e2a2a7 100644 --- a/chip/pm4_ring.pas +++ b/chip/pm4_ring.pas @@ -61,10 +61,11 @@ Function gc_map_hqd(ringBaseAddress:Pointer; g_queueId :DWORD; pipePriority :DWORD; hqd:p_gc_hqd):Integer; +Function gc_unmap_hqd(hqd:p_gc_hqd):Integer; + Function gc_map_hdq_ding_dong(hqd:p_gc_hqd;NextOffsetDw:DWORD):Integer; -function gc_map_hdq_peek (hqd:p_gc_hqd;size:PDWORD;buff:PPointer):Byte; +function gc_map_hdq_peek (hqd:p_gc_hqd;size:PDWORD;buff:PPointer):Boolean; function gc_map_hdq_drain (hqd:p_gc_hqd;size:DWORD):Boolean; -function gc_map_hdq_copy (hqd:p_gc_hqd;size:DWORD;dst:Pointer):Boolean; implementation @@ -385,6 +386,13 @@ begin end; +Function gc_unmap_hqd(hqd:p_gc_hqd):Integer; +begin + hqd^:=Default(t_gc_hqd); + + Result:=0; +end; + //single producer Function gc_map_hdq_ding_dong(hqd:p_gc_hqd;NextOffsetDw:DWORD):Integer; begin @@ -399,12 +407,12 @@ begin end; //single consumer -function gc_map_hdq_peek(hqd:p_gc_hqd;size:PDWORD;buff:PPointer):Byte; +function gc_map_hdq_peek(hqd:p_gc_hqd;size:PDWORD;buff:PPointer):Boolean; var ReadOffsetDw:DWORD; NextOffsetDw:DWORD; begin - Result:=0; + Result:=False; ReadOffsetDw:=hqd^.ReadOffsetDw and (hqd^.ringSizeDw-1); NextOffsetDw:=hqd^.NextOffsetDw and (hqd^.ringSizeDw-1); @@ -414,14 +422,14 @@ begin if (NextOffsetDw>ReadOffsetDw) then begin size^:=(NextOffsetDw-ReadOffsetDw) shl 2; - buff^:=hqd^.base_dmem_addr + (ReadOffsetDw*4); - Result:=1; end else begin - size^:=(hqd^.ringSizeDw-ReadOffsetDw+NextOffsetDw) shl 2; - Result:=2; + size^:=(hqd^.ringSizeDw-ReadOffsetDw) shl 2; end; + buff^:=hqd^.base_dmem_addr + (ReadOffsetDw*4); + + Result:=True; end; //single consumer @@ -448,7 +456,7 @@ begin if (size>s) then Exit; - ReadOffsetDw:=(NextOffsetDw + (size shr 2)) and (hqd^.ringSizeDw-1); + ReadOffsetDw:=(ReadOffsetDw + (size shr 2)) and (hqd^.ringSizeDw-1); hqd^.ReadOffsetDw :=ReadOffsetDw; hqd^.read_dmem_addr^:=(ReadOffsetDw shl 2); @@ -456,66 +464,6 @@ begin Result:=True; end; -//single consumer -function gc_map_hdq_copy(hqd:p_gc_hqd;size:DWORD;dst:Pointer):Boolean; -var - ReadOffsetDw:DWORD; - NextOffsetDw:DWORD; - s :DWORD; -begin - Result:=False; - - ReadOffsetDw:=hqd^.ReadOffsetDw and (hqd^.ringSizeDw-1); - NextOffsetDw:=hqd^.NextOffsetDw and (hqd^.ringSizeDw-1); - - if (ReadOffsetDw=NextOffsetDw) then Exit; - - if (NextOffsetDw>ReadOffsetDw) then - begin - s:=(NextOffsetDw-ReadOffsetDw) shl 2; - if (size>s) then Exit; - - Move((hqd^.base_dmem_addr + (ReadOffsetDw*4))^,dst^,size); - end else - begin - s:=(hqd^.ringSizeDw-ReadOffsetDw+NextOffsetDw) shl 2; - if (size>s) then Exit; - - //ReadOffsetDw..ringSizeDw - - s:=(hqd^.ringSizeDw-ReadOffsetDw) shl 2; - - if (s>size) then - begin - s:=size; - size:=0; - end else - begin - size:=size - s; - end; - - Move((hqd^.base_dmem_addr + (ReadOffsetDw*4))^,dst^,s); - dst:=dst + s; - - if (size<>0) then - begin - //0..NextOffsetDw - - s:=NextOffsetDw shl 2; - - if (s>size) then - begin - s:=size; - end; - - Move(hqd^.base_dmem_addr^,dst^,s); - end; - - end; - - Result:=True; -end; - end. diff --git a/sys/dev/dev_gc.pas b/sys/dev/dev_gc.pas index 15257f9a..235f8465 100644 --- a/sys/dev/dev_gc.pas +++ b/sys/dev/dev_gc.pas @@ -135,6 +135,13 @@ type pipePriority :DWORD; end; + p_unmap_compute_queue_args=^t_unmap_compute_queue_args; + t_unmap_compute_queue_args=packed record + pipeHi :DWORD; + pipeLo :DWORD; + queueId:DWORD; + end; + p_ding_dong_args=^t_ding_dong_args; t_ding_dong_args=packed record pipeHi :DWORD; @@ -194,7 +201,7 @@ begin begin Writeln('INDIRECT_BUFFER (ccb) 0x',HexStr(PPM4CMDINDIRECTBUFFER(buff)^.ibBase,10)); end; - if pm4_ibuf_init(@ibuf,buff,@pm4_parse_ccb,stGfxCcb,0) then + if pm4_ibuf_init(@ibuf,buff,@pm4_parse_ccb,stGfxCcb) then begin i:=pm4_ibuf_parse(pctx,@ibuf); if (i<>0) then @@ -211,7 +218,7 @@ begin begin Writeln('INDIRECT_BUFFER (dcb) 0x',HexStr(PPM4CMDINDIRECTBUFFER(buff)^.ibBase,10)); end; - if pm4_ibuf_init(@ibuf,buff,@pm4_parse_dcb,stGfxDcb,0) then + if pm4_ibuf_init(@ibuf,buff,@pm4_parse_dcb,stGfxDcb) then begin i:=pm4_ibuf_parse(pctx,@ibuf); if (i<>0) then @@ -253,7 +260,6 @@ var bits:QWORD; c_id:DWORD; - tmp :Pointer; begin if LoadVulkan then @@ -267,7 +273,7 @@ begin begin //Writeln('packet:0x',HexStr(buff),':',size); - if pm4_ibuf_init(@ibuf,buff,size,@pm4_parse_gfx_ring,stGfxRing,0) then + if pm4_ibuf_init(@ibuf,buff,size,@pm4_parse_gfx_ring,stGfxRing) then begin i:=pm4_ibuf_parse(@pfp_ctx,@ibuf); @@ -301,45 +307,31 @@ begin begin c_id:=BsfQWord(bits); - tmp:=nil; - rw_wlock(ring_gfx_lock); - i:=gc_map_hdq_peek(@map_queue_hqd[c_id],@size,@buff); - - if (i<>0) then + while gc_map_hdq_peek(@map_queue_hqd[c_id],@size,@buff) do begin - if (i=2) then - begin - tmp:=AllocMem(size); - - gc_map_hdq_copy(@map_queue_hqd[c_id],size,tmp); - - buff:=tmp; - end; - if pm4_ibuf_init(@ibuf,buff,size,@pm4_parse_compute_ring,get_compute_stream_type(c_id),c_id) then begin + //adjust guest addr + ibuf.base:=map_queue_hqd[c_id].base_guest_addr + (buff - map_queue_hqd[c_id].base_dmem_addr); + i:=pm4_ibuf_parse(@pfp_ctx,@ibuf); - pfp_ctx.add_stall(@ibuf); - end; - - if (tmp<>nil) then - begin - FreeMem(tmp); - tmp:=nil; + if (i<>0) then + begin + pfp_ctx.add_stall(@ibuf); + end; end; gc_map_hdq_drain(@map_queue_hqd[c_id],size); - - end; + end; //while rw_wunlock(ring_gfx_lock); //clear bits:=bits and (not (1 shl c_id)); - end; + end; //while // for buft:=stCompute0 to stCompute6 do @@ -520,6 +512,74 @@ begin end; +Function gc_unmap_compute_queue(data:p_unmap_compute_queue_args):Integer; +var + pipeHi :DWORD; + pipeLo :DWORD; + queueId :DWORD; + id :DWORD; +begin + Result:=0; + + pipeHi :=data^.pipeHi; + pipeLo :=data^.pipeLo; + queueId:=data^.queueId; + + //if (not IsDevKit) or (not IsDiag) + + if (pipeHi <> $0769c766) or + (pipeLo <> $72e8e3c1) or + (queueId <> $db72af28) then + begin + + //if (not IsDevKit) or (not IsDiag) + + if (pipeHi <> $e13ec1f1) or + (pipeLo <> $76c0801c) or + (queueId <> $75c36152) then + begin + + if (pipeHi = 2) and + (pipeLo = 3) then + begin + Exit(Integer($804c000a)); + end; + + if (1 < (pipeHi - 1)) then + begin + Exit(Integer($804c000b)); + end; + + if (3 < pipeLo) then + begin + Exit(Integer($804c000b)); + end; + + if (7 < queueId) then + begin + Exit(Integer($804c000b)); + end; + + end else + begin + pipeLo :=3; + pipeHi :=2; + queueId:=3; + end; + end else + begin + queueId:=4; + pipeLo :=3; + pipeHi :=2; + end; + + id:=(pipeHi - 1) * 32 + pipeLo * 8 + queueId; + + gc_unmap_hqd(@map_queue_hqd[id]); + + map_queue_valid[id]:=False; +end; + Function gc_ding_dong(data:p_ding_dong_args):Integer; var pipeHi :DWORD; @@ -756,6 +816,15 @@ begin rw_wunlock(ring_gfx_lock); end; + $C00C810E: //sceGnmUnmapComputeQueue + begin + rw_wlock(ring_gfx_lock); + + Result:=gc_unmap_compute_queue(data); + + rw_wunlock(ring_gfx_lock); + end; + $C010811C: //sceGnmDingDong begin start_gfx_ring;