diff --git a/chip/pm4_me.pas b/chip/pm4_me.pas index 5a0df8bc..89341ec2 100644 --- a/chip/pm4_me.pas +++ b/chip/pm4_me.pas @@ -36,6 +36,7 @@ uses sys_event, time, + md_time, kern_thr, md_sleep, bittype, @@ -47,18 +48,23 @@ uses si_ci_vi_merged_groups; type + t_on_submit_flip_eop=function(submit_id:QWORD):Integer; + p_pm4_me=^t_pm4_me; t_pm4_me=object // queue:TIntrusiveMPSCQueue; //p_pm4_stream event:PRTLEvent; on_idle:TProcedure; + on_submit_flip_eop:t_on_submit_flip_eop; // started:Pointer; td:p_kthread; // gc_knlist:p_knlist; // + rel_time:QWORD; + // procedure Init(knlist:p_knlist); procedure start; procedure trigger; @@ -252,7 +258,10 @@ begin for i:=0 to 31 do begin - Assert(CX_REG^.SPI_PS_INPUT_CNTL[i].OFFSET =0,'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].OFFSET=' +IntToStr(CX_REG^.SPI_PS_INPUT_CNTL[i].OFFSET )); + if (CX_REG^.SPI_PS_INPUT_CNTL[i].OFFSET<>0) and (CX_REG^.SPI_PS_INPUT_CNTL[i].OFFSET<>i) then + begin + Assert(false, 'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].OFFSET=' +IntToStr(CX_REG^.SPI_PS_INPUT_CNTL[i].OFFSET )); + end; Assert(CX_REG^.SPI_PS_INPUT_CNTL[i].DEFAULT_VAL =0,'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].DEFAULT_VAL=' +IntToStr(CX_REG^.SPI_PS_INPUT_CNTL[i].DEFAULT_VAL )); Assert(CX_REG^.SPI_PS_INPUT_CNTL[i].FLAT_SHADE =0,'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].FLAT_SHADE=' +IntToStr(CX_REG^.SPI_PS_INPUT_CNTL[i].FLAT_SHADE )); Assert(CX_REG^.SPI_PS_INPUT_CNTL[i].FP16_INTERP_MODE=0,'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].FP16_INTERP_MODE='+IntToStr(CX_REG^.SPI_PS_INPUT_CNTL[i].FP16_INTERP_MODE)); @@ -802,24 +811,63 @@ begin CmdBuffer.Free; end; +function mul_div_u64(m,d,v:QWORD):QWORD; sysv_abi_default; assembler; nostackframe; +asm + movq v,%rax + mulq m + divq d +end; + procedure pm4_EventWriteEop(node:p_pm4_node_EventWriteEop;me:p_pm4_me); +var + curr,diff:QWORD; begin EndFrameCapture; + curr:=md_rdtsc_unit; + diff:=curr-me^.rel_time; + + if (node^.addr<>nil) then Case node^.dataSel of // - EVENTWRITEEOP_DATA_SEL_DISCARD :; - EVENTWRITEEOP_DATA_SEL_SEND_DATA32 :PDWORD(node^.addr)^:=node^.data; - EVENTWRITEEOP_DATA_SEL_SEND_DATA64 :PQWORD(node^.addr)^:=node^.data; - EVENTWRITEEOP_DATA_SEL_SEND_GPU_CLOCK :; //system 100Mhz global clock. - EVENTWRITEEOP_DATA_SEL_SEND_CP_PERFCOUNTER:; //GPU 800Mhz clock. - else; + EVENTWRITEEOP_DATA_SEL_DISCARD:; + + //32bit data + EVENTWRITEEOP_DATA_SEL_SEND_DATA32:PDWORD(node^.addr)^:=node^.data; + + //64bit data + EVENTWRITEEOP_DATA_SEL_SEND_DATA64:PQWORD(node^.addr)^:=node^.data; + + //system 100Mhz global clock. (relative time) + EVENTWRITEEOP_DATA_SEL_SEND_GPU_CLOCK:PQWORD(node^.addr)^:=mul_div_u64(100*1000000,UNIT_PER_SEC,diff); + + //GPU 800Mhz clock. (relative time) + EVENTWRITEEOP_DATA_SEL_SEND_CP_PERFCOUNTER:PQWORD(node^.addr)^:=mul_div_u64(800*1000000,UNIT_PER_SEC,diff); + + else + Assert(false,'pm4_EventWriteEop'); end; if (node^.intSel<>0) then begin - //on submit eop - me^.knote_eventid($40,0,rdtsc(),0); + me^.knote_eventid($40,0,curr*NSEC_PER_UNIT,0); //(absolute time) (freq???) + end; +end; + +procedure pm4_SubmitFlipEop(node:p_pm4_node_SubmitFlipEop;me:p_pm4_me); +var + curr:QWORD; +begin + if (me^.on_submit_flip_eop<>nil) then + begin + me^.on_submit_flip_eop(node^.eop_value); + end; + + curr:=md_rdtsc_unit; + + if (node^.intSel<>0) then + begin + me^.knote_eventid($40,0,curr*NSEC_PER_UNIT,0); //(absolute time) (freq???) end; end; @@ -837,6 +885,13 @@ begin repeat + //start relative timer + if (me^.rel_time=0) then + begin + me^.rel_time:=md_rdtsc_unit; + end; + // + stream:=nil; if me^.queue.Pop(stream) then begin @@ -850,6 +905,7 @@ begin ntDrawIndex2 :pm4_DrawIndex2 (Pointer(node)); ntDrawIndexAuto:pm4_DrawIndexAuto(Pointer(node)); ntEventWriteEop:pm4_EventWriteEop(Pointer(node),me); + ntSubmitFlipEop:pm4_SubmitFlipEop(Pointer(node),me); else end; @@ -862,6 +918,7 @@ begin Continue; end; + me^.rel_time:=0; //reset time // if (me^.on_idle<>nil) then begin diff --git a/chip/pm4_pfp.pas b/chip/pm4_pfp.pas index e4121950..ef9aa6d1 100644 --- a/chip/pm4_pfp.pas +++ b/chip/pm4_pfp.pas @@ -6,6 +6,7 @@ unit pm4_pfp; interface uses + sysutils, mqueue, bittype, pm4_ring, @@ -1305,8 +1306,8 @@ end; procedure onIndexType(pctx:p_pfp_ctx;Body:PPM4CMDDRAWINDEXTYPE); begin - Assert(Body^.swapMode=0); pctx^.CX_REG.VGT_DMA_INDEX_TYPE.INDEX_TYPE:=Body^.indexType; + pctx^.CX_REG.VGT_DMA_INDEX_TYPE.SWAP_MODE :=Body^.swapMode; pctx^.UC_REG.VGT_INDEX_TYPE.INDEX_TYPE :=Body^.indexType; end; @@ -1373,6 +1374,17 @@ begin pctx^.UC_REG); end; +procedure onDispatchDirect(pctx:p_pfp_ctx;Body:PPM4CMDDISPATCHDIRECT); +begin + + pctx^.SH_REG.COMPUTE_DIM_X:=Body^.dimX; + pctx^.SH_REG.COMPUTE_DIM_Y:=Body^.dimY; + pctx^.SH_REG.COMPUTE_DIM_Z:=Body^.dimZ; + pctx^.SH_REG.COMPUTE_DISPATCH_INITIATOR:=Body^.dispatchInitiator; + + pctx^.stream_dcb.DispatchDirect(pctx^.SH_REG); +end; + procedure onPushMarker(Body:PChar); begin Writeln('\HINT_PUSH_MARKER:',Body); @@ -1524,7 +1536,7 @@ begin IT_DRAW_INDEX_2 :onDrawIndex2 (pctx,buff); IT_DRAW_INDEX_OFFSET_2:Assert(false,'IT_DRAW_INDEX_OFFSET_2'); IT_DRAW_INDEX_AUTO :onDrawIndexAuto (pctx,buff); - IT_DISPATCH_DIRECT :Assert(false,'IT_DISPATCH_DIRECT'); + IT_DISPATCH_DIRECT :onDispatchDirect (pctx,buff); IT_PFP_SYNC_ME :Assert(false,'IT_PFP_SYNC_ME'); IT_SET_BASE :onSetBase(buff); diff --git a/chip/pm4_ring.pas b/chip/pm4_ring.pas index 8ed57c28..035e74dc 100644 --- a/chip/pm4_ring.pas +++ b/chip/pm4_ring.pas @@ -9,30 +9,9 @@ uses errno, md_map, systm, + pm4defs, bittype; -type - //IT_INDIRECT_BUFFER_CNST = $00000033; ccb 0xc0023300 - //IT_INDIRECT_BUFFER = $0000003f; dcb 0xc0023f00 - - PPM4CMDINDIRECTBUFFER=^PM4CMDINDIRECTBUFFER; - PM4CMDINDIRECTBUFFER=bitpacked record - header :DWORD; // PM4_TYPE_3_HEADER - ibBase :bit40; // Indirect buffer base address, must be 4 byte aligned - reserved0:bit24; - // - ibSize :bit20; // Indirect buffer size - reserved1:bit4; - vmid :bit4; // Virtual memory domain ID for command buffer - reserved2:bit4; - end; - - PPM4CMDSWITCHBUFFER=^PM4CMDSWITCHBUFFER; - PM4CMDSWITCHBUFFER=bitpacked record - header:DWORD; - data :DWORD; - end; - const GC_RING_SIZE=$80000; GC_RING_PADD=64*1024; @@ -59,6 +38,7 @@ function gc_ring_pm4_drain(ring:p_pm4_ring;size:DWORD):Boolean; function gc_submit_internal (ring:p_pm4_ring;count:DWORD;cmds:Pointer):Integer; function gc_switch_buffer_internal(ring:p_pm4_ring):Integer; +function gc_pm4_event_write_eop (ring:p_pm4_ring;addr:Pointer;data:QWORD;intSel,wait:Integer):Integer; implementation @@ -293,5 +273,43 @@ begin gc_ring_pm4_submit(ring); end; +function gc_pm4_event_write_eop(ring:p_pm4_ring;addr:Pointer;data:QWORD;intSel,wait:Integer):Integer; +var + buf:PPM4CMDEVENTWRITEEOP; +begin + Result:=0; + + buf:=nil; + if not gc_ring_pm4_alloc(ring,sizeof(PM4CMDEVENTWRITEEOP),@buf) then + begin + Writeln(stderr,'### gc_pm4_event_write_eop : Cannot allocate a space in ring buffer.'); + Exit(EBUSY); + end; + + buf^:=Default(PM4CMDEVENTWRITEEOP); + + // IT_EVENT_WRITE_EOP + DWORD(buf^.header):=$C0044700; + + if (wait=0) then + begin + buf^.eventType:=kEopCbDbReadsDone; + end else + begin + buf^.eventType:=kEopFlushCbDbCaches; + end; + + buf^.eventIndex :=5; + buf^.invalidateL2:=1; + buf^.address :=QWORD(addr); + buf^.intSel :=ord(intSel<>0)*2; + buf^.dataSel :=EVENTWRITEEOP_DATA_SEL_SEND_DATA64; + buf^.DATA :=data; + + gc_ring_pm4_submit(ring); +end; + + end. + diff --git a/chip/pm4_stream.pas b/chip/pm4_stream.pas index a3a8c84c..e2f78945 100644 --- a/chip/pm4_stream.pas +++ b/chip/pm4_stream.pas @@ -55,13 +55,15 @@ type ntEventWrite, ntEventWriteEop, ntEventWriteEos, + ntSubmitFlipEop, ntDmaData, ntWriteData, ntWaitRegMem, ntFastClear, ntResolve, ntDrawIndex2, - ntDrawIndexAuto + ntDrawIndexAuto, + ntDispatchDirect ); p_pm4_node=^t_pm4_node; @@ -99,6 +101,12 @@ type command :Byte; end; + p_pm4_node_SubmitFlipEop=^t_pm4_node_SubmitFlipEop; + t_pm4_node_SubmitFlipEop=packed object(t_pm4_node) + eop_value:QWORD; + intSel :Byte + end; + p_pm4_node_DmaData=^t_pm4_node_DmaData; t_pm4_node_DmaData=packed object(t_pm4_node) dst :QWORD; @@ -151,6 +159,11 @@ type UC_REG:TUSERCONFIG_REG_SHORT; // 0xC000 end; + p_pm4_node_DispatchDirect=^t_pm4_node_DispatchDirect; + t_pm4_node_DispatchDirect=object(t_pm4_node) + SH_REG:TSH_REG_GROUP; // 0x2C00 + end; + p_pm4_stream=^t_pm4_stream; t_pm4_stream=object next_:Pointer; @@ -168,6 +181,7 @@ type procedure EventWrite (eventType:Byte); procedure EventWriteEop(addr:Pointer;data:QWORD;eventType,dataSel,intSel:Byte); procedure EventWriteEos(addr:Pointer;data:DWORD;eventType,command:Byte); + procedure SubmitFlipEop(eop_value:QWORD;intSel:Byte); procedure DmaData (dstSel:Byte;dst:QWORD;srcSel:Byte;srcOrData:QWORD;numBytes:DWORD;isBlocking:Byte); procedure WriteData (dstSel:Byte;dst,src:QWORD;num_dw:Word); procedure WaitRegMem (pollAddr:QWORD;refValue,mask:DWORD;compareFunc:Byte); @@ -181,6 +195,7 @@ type procedure DrawIndexAuto(var SH_REG:TSH_REG_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT); + procedure DispatchDirect(var SH_REG:TSH_REG_GROUP); end; implementation @@ -272,6 +287,19 @@ begin add_node(node); end; +procedure t_pm4_stream.SubmitFlipEop(eop_value:QWORD;intSel:Byte); +var + node:p_pm4_node_SubmitFlipEop; +begin + node:=allocator.Alloc(SizeOf(t_pm4_node_SubmitFlipEop)); + + node^.ntype :=ntSubmitFlipEop; + node^.eop_value:=eop_value; + node^.intSel :=intSel; + + add_node(node); +end; + procedure t_pm4_stream.DmaData(dstSel:Byte;dst:QWORD;srcSel:Byte;srcOrData:QWORD;numBytes:DWORD;isBlocking:Byte); var node:p_pm4_node_DmaData; @@ -409,6 +437,18 @@ begin add_node(node); end; +procedure t_pm4_stream.DispatchDirect(var SH_REG:TSH_REG_GROUP); +var + node:p_pm4_node_DispatchDirect; +begin + node:=allocator.Alloc(SizeOf(t_pm4_node_DispatchDirect)); + + node^.ntype :=ntDispatchDirect; + node^.SH_REG:=SH_REG; + + add_node(node); +end; + // procedure t_cache_block_allocator.init; diff --git a/chip/pm4defs.pas b/chip/pm4defs.pas index e0c028f3..f7738242 100644 --- a/chip/pm4defs.pas +++ b/chip/pm4defs.pas @@ -403,8 +403,8 @@ const kEopCsDone = $00000028; //wait cs shader, label .....EOP type - PPM4CMDEVENTWRITEEOP=^TPM4CMDEVENTWRITEEOP; - TPM4CMDEVENTWRITEEOP=bitpacked record + PPM4CMDEVENTWRITEEOP=^PM4CMDEVENTWRITEEOP; + PM4CMDEVENTWRITEEOP=bitpacked record header :PM4_TYPE_3_HEADER; eventType :bit6; //00 // < event type written to VGT_EVENT_INITIATOR @@ -739,6 +739,27 @@ type dispatchInitiator:TCOMPUTE_DISPATCH_INITIATOR; ///< Dispatch Initiator Register end; + //IT_INDIRECT_BUFFER_CNST = $00000033; ccb 0xc0023300 + //IT_INDIRECT_BUFFER = $0000003f; dcb 0xc0023f00 + + PPM4CMDINDIRECTBUFFER=^PM4CMDINDIRECTBUFFER; + PM4CMDINDIRECTBUFFER=bitpacked record + header :DWORD; // PM4_TYPE_3_HEADER + ibBase :bit40; // Indirect buffer base address, must be 4 byte aligned + reserved0:bit24; + // + ibSize :bit20; // Indirect buffer size + reserved1:bit4; + vmid :bit4; // Virtual memory domain ID for command buffer + reserved2:bit4; + end; + + PPM4CMDSWITCHBUFFER=^PM4CMDSWITCHBUFFER; + PM4CMDSWITCHBUFFER=bitpacked record + header:DWORD; + data :DWORD; + end; + TUSERCONFIG_REG_SHORT=packed record CP_COHER_BASE_HI :TCP_COHER_BASE_HI; // 0xC079 CP_COHER_CNTL :TCP_COHER_CNTL; // 0xC07C diff --git a/chip/shader_dump.pas b/chip/shader_dump.pas index 3c553c36..2e7e1fa4 100644 --- a/chip/shader_dump.pas +++ b/chip/shader_dump.pas @@ -30,6 +30,9 @@ function DumpVS(var GPU_REGS:TGPU_REGS):RawByteString; implementation +uses + kern_dmem; + Procedure DUMP_BLOCK(F:THandle;REG:WORD;P:Pointer;Size:DWORD); const MAX_SIZE=($FFFF+1)*4; @@ -173,6 +176,12 @@ begin base:=GPU_REGS.get_code_addr(vShaderStageCs); if (base<>nil) then begin + + if not get_dmem_ptr(base,@base,nil) then + begin + Assert(false,'DumpCS:get_dmem_ptr'); + end; + size:=_calc_shader_size(base); hash:=MurmurHash64A(base,size,0); @@ -216,6 +225,12 @@ begin base:=GPU_REGS.get_code_addr(vShaderStagePs); if (base<>nil) then begin + + if not get_dmem_ptr(base,@base,nil) then + begin + Assert(false,'DumpPS:get_dmem_ptr'); + end; + size:=_calc_shader_size(base); hash:=MurmurHash64A(base,size,0); @@ -268,6 +283,12 @@ begin base:=GPU_REGS.get_code_addr(vShaderStageVs); if (base<>nil) then begin + + if not get_dmem_ptr(base,@base,nil) then + begin + Assert(false,'DumpVS:get_dmem_ptr'); + end; + size:=_calc_shader_size(base); hash:=MurmurHash64A(base,size,0); diff --git a/sys/dev/dev_dce.pas b/sys/dev/dev_dce.pas index 451e5f0f..5f982225 100644 --- a/sys/dev/dev_dce.pas +++ b/sys/dev/dev_dce.pas @@ -17,6 +17,8 @@ uses procedure dce_initialize(); +function TriggerFlipEop(submit_id:QWORD):Integer; + type p_dce_page=^t_dce_page; t_dce_page=packed record //0x170 @@ -936,6 +938,17 @@ type rout :PQWORD; //extraout of result end; +function TriggerFlipEop(submit_id:QWORD):Integer; +begin + if (dce_handle=nil) then + begin + Result:=EINVAL; + end else + begin + Result:=dce_handle.TriggerFlipEop(submit_id); + end; +end; + Function dce_submit_flip(dev:p_cdev;data:p_submit_flip_args):Integer; var submit:t_submit_flip; diff --git a/sys/dev/dev_gc.pas b/sys/dev/dev_gc.pas index dbbd1fd4..c96abbab 100644 --- a/sys/dev/dev_gc.pas +++ b/sys/dev/dev_gc.pas @@ -28,10 +28,13 @@ uses kern_proc, kern_thr, md_sleep, + pm4defs, pm4_ring, pm4_pfp, pm4_me, + dev_dce, + vDevice, vMemory, @@ -128,6 +131,17 @@ var pm4_me_gfx:t_pm4_me; +procedure onEventWriteEop(pctx:p_pfp_ctx;Body:PPM4CMDEVENTWRITEEOP); +var + submit_id:DWORD; +begin + submit_id:=Body^.DATA; + + Writeln('submit eop flip:',submit_id); + + pctx^.stream_dcb.SubmitFlipEop(Body^.DATA,(Body^.intSel shr 1)); +end; + function pm4_parse_ring(pctx:p_pfp_ctx;token:DWORD;buff:Pointer):Integer; var ibuf:t_pm4_ibuffer; @@ -173,6 +187,10 @@ begin Writeln('SWITCH_BUFFER'); end; end; + $C0044700: //IT_EVENT_WRITE_EOP + begin + onEventWriteEop(pctx,buff); + end else; end; @@ -276,8 +294,6 @@ begin if (GC_SRI_event=nil) then Exit; if (pm4_me_gfx.started=nil) then Exit; - pm4_me_gfx.on_idle:=@gc_idle; - gc_wait_GC_SRI; GC_SRI_label:=1; @@ -383,9 +399,9 @@ begin $C0108102: //submit begin - rw_wlock(ring_gfx_lock); + start_gfx_ring; - start_gfx_ring; + rw_wlock(ring_gfx_lock); Result:=gc_submit_internal(@ring_gfx, p_submit_args(data)^.count, @@ -396,11 +412,41 @@ begin trigger_gfx_ring; end; - $C0088101: //switch_buffer + $C020810C: //submit eop begin + start_gfx_ring; + rw_wlock(ring_gfx_lock); - start_gfx_ring; + Result:=gc_submit_internal(@ring_gfx, + p_submit_args(data)^.count, + p_submit_args(data)^.cmds); + + if (Result=0) then + begin + { + The original data is an incremental "submit_id | (vmid << 32)", + now this is directly sended "eop_v" + } + + Result:=gc_pm4_event_write_eop(@ring_gfx, + nil, + p_submit_args(data)^.eop_v, + 1, + p_submit_args(data)^.wait + ); + end; + + rw_wunlock(ring_gfx_lock); + + trigger_gfx_ring; + end; + + $C0088101: //switch_buffer + begin + start_gfx_ring; + + rw_wlock(ring_gfx_lock); Result:=gc_switch_buffer_internal(@ring_gfx); @@ -630,6 +676,8 @@ begin gc_ring_create(@ring_gfx,GC_RING_SIZE); pm4_me_gfx.Init(@gc_knlist); + pm4_me_gfx.on_idle:=@gc_idle; + pm4_me_gfx.on_submit_flip_eop:=@dev_dce.TriggerFlipEop; end; diff --git a/sys/dev/display_interface.pas b/sys/dev/display_interface.pas index 28ce3f0e..41648a80 100644 --- a/sys/dev/display_interface.pas +++ b/sys/dev/display_interface.pas @@ -116,6 +116,7 @@ type function UnregisterBuffer (index:Integer):Integer; virtual; function SubmitFlip (submit:p_submit_flip):Integer; virtual; function SubmitFlipEop (submit:p_submit_flip;submit_id:QWORD):Integer; virtual; + function TriggerFlipEop (submit_id:QWORD):Integer; virtual; function Vblank ():Integer; virtual; end; @@ -219,6 +220,11 @@ begin Result:=0; end; +function TDisplayHandle.TriggerFlipEop(submit_id:QWORD):Integer; +begin + Result:=0; +end; + function TDisplayHandle.Vblank():Integer; begin Result:=0; diff --git a/sys/dev/display_soft.pas b/sys/dev/display_soft.pas index f753f88e..61df916c 100644 --- a/sys/dev/display_soft.pas +++ b/sys/dev/display_soft.pas @@ -29,20 +29,36 @@ type end; PQNode=^TQNode; - TOnParent=Procedure(node:PQNode) of object; TQNode=object - next_ :PQNode; + next_:PQNode; + end; + + PQNodeSubmit=^TQNodeSubmit; + TQNodeSubmit=object(TQNode) submit:t_submit_flip; tsc :QWORD; end; - TSubmitQueue=object - FNodes:array[0..31] of TQNode; + TSubmitAlloc=object + FNodes:array[0..31] of TQNodeSubmit; FAlloc:TIntrusiveMPSCQueue; - FQueue:TIntrusiveMPSCQueue; Procedure Init; - function Alloc:PQNode; - procedure Free(P:PQNode); + function Alloc:PQNodeSubmit; + procedure Free(P:PQNodeSubmit); + end; + + PQNodeFlip=^TQNodeFlip; + TQNodeFlip=object(TQNode) + submit :PQNodeSubmit; + submit_id:QWORD; + end; + + TFlipAlloc=object + FNodes:array[0..17] of TQNodeFlip; + FAlloc:TIntrusiveMPSCQueue; + Procedure Init; + function Alloc:PQNodeFlip; + procedure Free(P:PQNodeFlip); end; TDisplayHandleSoft=class(TDisplayHandle) @@ -51,7 +67,12 @@ type FEvent:PRTLEvent; Ftd:p_kthread; - FQueue:TSubmitQueue; + + FSubmitAlloc:TSubmitAlloc; + FSubmitQueue:TIntrusiveMPSCQueue; + + FFlipAlloc:TFlipAlloc; + FTerminate:Boolean; flip_rate:Integer; @@ -80,10 +101,11 @@ type function RegisterBuffer (buf:p_register_buffer):Integer; override; function UnregisterBuffer (index:Integer):Integer; override; function SubmitFlip (submit:p_submit_flip):Integer; override; - //function SubmitFlipEop (submit:p_submit_flip;submit_id:QWORD):Integer; virtual; + function SubmitFlipEop (submit:p_submit_flip;submit_id:QWORD):Integer; override; + function TriggerFlipEop (submit_id:QWORD):Integer; override; function Vblank ():Integer; override; // - procedure OnSubmit(Node:PQNode); + procedure OnSubmit(Node:PQNodeSubmit); end; implementation @@ -100,30 +122,58 @@ uses kern_dmem, dev_dce; -Procedure TSubmitQueue.Init; +// + +Procedure TSubmitAlloc.Init; var i:Integer; begin FAlloc.Create; - FQueue.Create; For i:=0 to High(FNodes) do begin FAlloc.Push(@FNodes[i]); end; end; -function TSubmitQueue.Alloc:PQNode; +function TSubmitAlloc.Alloc:PQNodeSubmit; begin Result:=nil; FAlloc.Pop(Result); end; -procedure TSubmitQueue.Free(P:PQNode); +procedure TSubmitAlloc.Free(P:PQNodeSubmit); begin if (P=nil) then Exit; FAlloc.Push(P); end; +// + +Procedure TFlipAlloc.Init; +var + i:Integer; +begin + FAlloc.Create; + For i:=0 to High(FNodes) do + begin + FAlloc.Push(@FNodes[i]); + end; +end; + +function TFlipAlloc.Alloc:PQNodeFlip; +begin + Result:=nil; + FAlloc.Pop(Result); +end; + +procedure TFlipAlloc.Free(P:PQNodeFlip); +begin + if (P=nil) then Exit; + FAlloc.Push(P); +end; + +// + procedure dce_thread(parameter:pointer); SysV_ABI_CDecl; forward; function TDisplayHandleSoft.Open():Integer; @@ -138,7 +188,10 @@ begin FEvent:=RTLEventCreate; - FQueue.Init; + FSubmitAlloc.Init; + FSubmitQueue.Create; + + FFlipAlloc.Init; if (Ftd=nil) then begin @@ -496,7 +549,7 @@ begin bi.bmiHeader.biBitCount :=32; bi.bmiHeader.biCompression:=BI_RGB; - if (attr^.attr.tilingMode<>0) then + if {(attr^.attr.tilingMode<>0)} false then begin //alloc aligned 128x128 bi.bmiHeader.biWidth :=(attr^.attr.pitchPixel+127) and (not 127); @@ -558,7 +611,7 @@ function TDisplayHandleSoft.SubmitFlip(submit:p_submit_flip):Integer; var buf :p_buffer; attr:p_attr; - Node:PQNode; + Node:PQNodeSubmit; begin if (submit^.bufferIndex<>-1) then begin @@ -568,29 +621,112 @@ begin if (attr^.init=0) then Exit(EINVAL); end; - Node:=FQueue.Alloc; + Node:=FSubmitAlloc.Alloc; if (Node=nil) then Exit(EBUSY); Node^.submit:=submit^; Node^.tsc :=rdtsc(); - FQueue.FQueue.Push(Node); - if (submit^.bufferIndex<>-1) then begin dce_page^.labels[submit^.bufferIndex]:=1; end; - last_status.flipPendingNum0:=last_status.flipPendingNum0+1; + // + System.InterlockedIncrement(last_status.flipPendingNum0); - if (submit^.flipMode=SCE_VIDEO_OUT_FLIP_MODE_HSYNC) then + FSubmitQueue.Push(Node); + + if (Node^.submit.flipMode=SCE_VIDEO_OUT_FLIP_MODE_HSYNC) then begin RTLEventSetEvent(FEvent); end; + // Result:=0; end; +function TDisplayHandleSoft.SubmitFlipEop(submit:p_submit_flip;submit_id:QWORD):Integer; +var + buf :p_buffer; + attr:p_attr; + Node:PQNodeSubmit; + Flip:PQNodeFlip; +begin + if (submit^.bufferIndex<>-1) then + begin + buf:=@m_bufs[submit^.bufferIndex]; + if (buf^.init=0) then Exit(EINVAL); + attr:=@m_attr[buf^.attr]; + if (attr^.init=0) then Exit(EINVAL); + end; + + Node:=FSubmitAlloc.Alloc; + if (Node=nil) then Exit(EBUSY); + + Flip:=FFlipAlloc.Alloc; + if (Flip=nil) then Exit(EBUSY); + + Node^.submit:=submit^; + Node^.tsc :=rdtsc(); + + Flip^.next_ :=nil; + Flip^.submit :=Node; + Flip^.submit_id:=submit_id; + + if (submit^.bufferIndex<>-1) then + begin + dce_page^.labels[submit^.bufferIndex]:=1; + end; + + System.InterlockedIncrement(last_status.gcQueueNum); + + Result:=0; +end; + +function TDisplayHandleSoft.TriggerFlipEop(submit_id:QWORD):Integer; +var + Node:PQNodeSubmit; + Flip:PQNodeFlip; + i:Integer; +begin + Result:=0; + // + For i:=0 to High(FFlipAlloc.FNodes) do + begin + Flip:=@FFlipAlloc.FNodes[i]; + + if (Flip^.next_=nil) and + (Flip^.submit<>nil) and + (Flip^.submit_id=submit_id) then + begin + Node:=Flip^.submit; + + Flip^.submit :=nil; + Flip^.submit_id:=0; + FFlipAlloc.Free(Flip); + + System.InterlockedDecrement(last_status.gcQueueNum); + + // + System.InterlockedIncrement(last_status.flipPendingNum0); + + FSubmitQueue.Push(Node); + + if (Node^.submit.flipMode=SCE_VIDEO_OUT_FLIP_MODE_HSYNC) then + begin + RTLEventSetEvent(FEvent); + end; + // + + Exit; + end; + + end; + // + Result:=1; +end; + function TDisplayHandleSoft.Vblank():Integer; begin vblank_count:=vblank_count+1; @@ -604,7 +740,7 @@ begin Result:=0; end; -procedure TDisplayHandleSoft.OnSubmit(Node:PQNode); +procedure TDisplayHandleSoft.OnSubmit(Node:PQNodeSubmit); var i:Integer; submit:p_submit_flip; @@ -645,7 +781,8 @@ begin end; dce_page^.label_:=0; - last_status.flipPendingNum0:=last_status.flipPendingNum0-1; + System.InterlockedDecrement(last_status.flipPendingNum0); + last_status.flipArg :=submit^.flipArg; last_status.flipArg2 :=submit^.flipArg2; last_status.count :=last_status.count+1; @@ -678,7 +815,7 @@ end; procedure dce_thread(parameter:pointer); SysV_ABI_CDecl; var dce:TDisplayHandleSoft; - Node:PQNode; + Node:PQNodeSubmit; begin dce:=TDisplayHandleSoft(parameter); @@ -686,10 +823,10 @@ begin RTLEventWaitFor(dce.FEvent); Node:=nil; - if dce.FQueue.FQueue.Pop(Node) then + if dce.FSubmitQueue.Pop(Node) then begin dce.OnSubmit(Node); - dce.FQueue.Free(Node); + dce.FSubmitAlloc.Free(Node); end; until dce.FTerminate; diff --git a/sys/md/md_exception.pas b/sys/md/md_exception.pas index 58f3f698..0fef7913 100644 --- a/sys/md/md_exception.pas +++ b/sys/md/md_exception.pas @@ -241,6 +241,7 @@ end; function ProcessException(p:PExceptionPointers):longint; stdcall; begin Result:=EXCEPTION_CONTINUE_SEARCH; + if (curkthread=nil) then Exit; case p^.ExceptionRecord^.ExceptionCode of FPC_EXCEPTION_CODE :Exit; @@ -272,7 +273,6 @@ end; function UnhandledException(p:PExceptionPointers):longint; stdcall; var - td:p_kthread; rec:PExceptionRecord; code: Longint; ExObj:Exception; @@ -280,14 +280,15 @@ begin Result:=EXCEPTION_CONTINUE_SEARCH; case p^.ExceptionRecord^.ExceptionCode of - FPC_EXCEPTION_CODE :Exit; - FPC_SET_EH_HANDLER :Exit(EXCEPTION_CONTINUE_EXECUTION); - EXCEPTION_BREAKPOINT :Exit; - EXCEPTION_SET_THREADNAME:Exit; + FPC_EXCEPTION_CODE :Exit; + FPC_SET_EH_HANDLER :Exit(EXCEPTION_CONTINUE_EXECUTION); + EXCEPTION_BREAKPOINT :Exit; + EXCEPTION_SET_THREADNAME :Exit; + DBG_PRINTEXCEPTION_C :Exit(EXCEPTION_CONTINUE_EXECUTION); + DBG_PRINTEXCEPTION_WIDE_C:Exit(EXCEPTION_CONTINUE_EXECUTION); //RenderDoc issuse end; - td:=curkthread; - if (td=nil) then Exit; + if (curkthread=nil) then Exit; rec:=p^.ExceptionRecord; @@ -304,8 +305,10 @@ begin ExObj:=Exception(TExceptObjProc(ExceptObjProc)(abs(code),rec^)); end; - if curkthread<>nil then + if (curkthread<>nil) then + begin Writeln('curkthread^.td_name:',curkthread^.td_name); + end; if (ExObj=nil) then begin diff --git a/sys/time.pas b/sys/time.pas index 5d1e8056..a6c158f5 100644 --- a/sys/time.pas +++ b/sys/time.pas @@ -308,9 +308,13 @@ end; function itimerfix(tv:p_timeval):Integer; begin if (tv^.tv_sec < 0) or (tv^.tv_usec < 0) or (tv^.tv_usec >= 1000000) then + begin Exit(EINVAL); + end; if (tv^.tv_sec=0) and (tv^.tv_usec<>0) and (tv^.tv_usec < tick) then + begin tv^.tv_usec:=tick; + end; Exit(0); end; diff --git a/vulkan/vRegs2Vulkan.pas b/vulkan/vRegs2Vulkan.pas index 79c7c29f..59bec2e0 100644 --- a/vulkan/vRegs2Vulkan.pas +++ b/vulkan/vRegs2Vulkan.pas @@ -1403,6 +1403,11 @@ end; function TGPU_REGS.GET_INDEX_TYPE:TVkIndexType; begin + if (CX_REG^.VGT_DMA_INDEX_TYPE.SWAP_MODE<>0) then + begin + Assert(false,'swapMode:'+IntToStr(CX_REG^.VGT_DMA_INDEX_TYPE.SWAP_MODE)); + end; + Case UC_REG^.VGT_INDEX_TYPE.INDEX_TYPE of VGT_INDEX_16:Result:=VK_INDEX_TYPE_UINT16; VGT_INDEX_32:Result:=VK_INDEX_TYPE_UINT32;