unit pm4_stream; {$mode ObjFPC}{$H+} {$CALLING SysV_ABI_CDecl} interface uses sysutils, mqueue, LFQueue, md_map, bittype, pm4defs, si_ci_vi_merged_enum, si_ci_vi_merged_registers, si_ci_vi_merged_groups, Vulkan, vImage, vShader, vShaderExt, vShaderManager, vRegs2Vulkan, vImageTiling, g_node_splay ; type t_cache_block_allocator=object const mem_size =64*1024; max_count=256; // var queue:TIntrusiveMPSCQueue; xlock:Pointer; count:QWORD; // procedure Init; Function Alloc:Pointer; Procedure Free(node:Pointer); end; t_pm4_allocator=object type PAllocNode=^TAllocNode; TAllocNode=packed record link:PAllocNode; size:QWORD; data:record end; end; var pHead:SLIST_HEAD; curr_apos:ptruint; //alloc pos in current node curr_size:ptruint; //useable size of current node used_size:ptruint; //full usable size full_size:ptruint; //full alloc size Function Alloc(Size:ptruint):Pointer; Procedure Free; end; p_pm4_rt_info=^t_pm4_rt_info; t_pm4_rt_info=object USERDATA :TGPU_USERDATA; SHADERDATA:TGPU_SHADERDATA_RT; ShaderGroup:TvShaderGroup; RT_INFO:array[0..7] of TRT_INFO; DB_INFO:TDB_INFO; BLEND_INFO:TBLEND_INFO; VPORT :array[0..15] of TVkViewport; SCISSOR:array[0..15] of TVkRect2D; RASTERIZATION:TRASTERIZATION_INFO; MULTISAMPLE :TVkPipelineMultisampleStateCreateInfo; SCREEN_RECT:TVkRect2D; SCREEN_SIZE:TVkExtent2D; RT_COUNT :Byte; DB_ENABLE :Boolean; PRIM_TYPE :Byte; PRIM_RESET:Byte; VP_COUNT :Byte; PROVOKING :Byte; end; t_pm4_stream_type=( stGfxRing, stGfxDcb, stGfxCcb, stCompute0, stCompute1, stCompute2, stCompute3, stCompute4, stCompute5, stCompute6 ); t_pm4_node_type=( ntHint, ntLoadConstRam, ntDumpConstRam, ntIncrementCE, ntIncrementDE, ntWaitOnCECounter, ntWaitOnDECounterDiff, ntEventWrite, ntEventWriteEop, ntEventWriteEos, ntSubmitFlipEop, ntReleaseMem, ntDmaData, ntWriteData, ntWaitRegMem, ntFastClear, ntResolve, ntClearDepth, ntDrawIndex2, ntDrawIndexOffset2, ntDrawIndexAuto, ntDispatchDirect, ntPfpSyncMe ); const R_IMG =0; R_BUF =1; R_HTILE=2; R_CMASK=3; type t_pm4_usage=packed record case Byte of 0:(DATA:QWORD); 1:(mem_usage:Byte; shd_usage:Byte; clr_usage:Byte; dsa_usage:Byte; img_usage:s_image_usage ); end; {$IF sizeof(s_image_usage)<>4}{$STOP sizeof(s_image_usage)<>4}{$ENDIF} operator + (a,b:t_pm4_usage):t_pm4_usage; type p_pm4_resource_instance =^t_pm4_resource_instance; p_pm4_resource_curr_scope =^t_pm4_resource_curr_scope; p_pm4_resource_stream_scope=^t_pm4_resource_stream_scope; p_pm4_resource_init_scope=^t_pm4_resource_init_scope; t_pm4_resource_init_scope=object list:TAILQ_HEAD; //p_pm4_resource_instance function first:p_pm4_resource_instance; procedure insert(i:p_pm4_resource_instance); end; p_pm4_resource=^t_pm4_resource; t_pm4_resource=object pLeft :p_pm4_resource; //t_pm4_resource_set pRight:p_pm4_resource; //t_pm4_resource_set // rwrite:p_pm4_resource_instance; // rtype :Integer; rsize :DWORD; rkey :TvImageKey; // uall:t_pm4_usage; // rimage:TObject; // rcombined :Boolean; rclear :Boolean; rcmask :Boolean; rwriteback:Boolean; // function c(n1,n2:p_pm4_resource):Integer; static; end; t_pm4_resource_set=specialize TNodeSplay; t_pm4_resource_instance=object init_entry:TAILQ_ENTRY; //p_pm4_resource_init_scope // pLeft :p_pm4_resource_instance; //t_pm4_resource_instance_set pRight:p_pm4_resource_instance; //t_pm4_resource_instance_set // init_scope:p_pm4_resource_init_scope; curr_scope:p_pm4_resource_curr_scope; // resource:p_pm4_resource; // prepared:Boolean; // curr:t_pm4_usage; prev:t_pm4_usage; next:t_pm4_usage; // prev_overlap:t_pm4_usage; next_overlap:t_pm4_usage; // function c(n1,n2:p_pm4_resource_instance):Integer; static; end; t_pm4_resource_instance_set=specialize TNodeSplay; t_pm4_resource_curr_scope=object resource_instance_set:t_pm4_resource_instance_set; function Min:p_pm4_resource_instance; function Next(node:p_pm4_resource_instance):p_pm4_resource_instance; procedure insert(i:p_pm4_resource_instance); function find_resource_instance(r:p_pm4_resource):p_pm4_resource_instance; function find_image_resource_instance (const rkey:TvImageKey):p_pm4_resource_instance; function find_buffer_resource_instance(rtype:Integer;addr:Pointer;size:DWORD):p_pm4_resource_instance; end; t_pm4_resource_stream_scope=object next_:TAILQ_HEAD; //Must be the first element in memory // allocator:t_pm4_allocator; // resource_set:t_pm4_resource_set; init_scope:t_pm4_resource_init_scope; // function find_image_resource (const rkey:TvImageKey):p_pm4_resource; function fetch_image_resource (const rkey:TvImageKey;hint:PChar):p_pm4_resource; function find_buffer_resource (rtype:Integer;addr:Pointer;size:DWORD):p_pm4_resource; function fetch_buffer_resource (rtype:Integer;addr:Pointer;size:DWORD;hint:PChar):p_pm4_resource; function fetch_resource_instance (scope:p_pm4_resource_curr_scope;r:p_pm4_resource;mem_usage:Integer;img_usage:s_image_usage):p_pm4_resource_instance; function insert_image_resource (scope:p_pm4_resource_curr_scope;const rkey:TvImageKey;mem_usage:Integer;img_usage:s_image_usage;hint:PChar):p_pm4_resource_instance; function insert_buffer_resource (scope:p_pm4_resource_curr_scope;rtype:Integer;addr:Pointer;size:DWORD;mem_usage:Integer;hint:PChar):p_pm4_resource_instance; procedure connect_resource_instance (i:p_pm4_resource_instance); procedure connect_resource_scope (scope:p_pm4_resource_curr_scope); end; p_pm4_node=^t_pm4_node; t_pm4_node=object entry:TAILQ_ENTRY; // scope:t_pm4_resource_curr_scope; // ntype:t_pm4_node_type; // id:QWORD; end; p_pm4_node_Hint=^t_pm4_node_Hint; t_pm4_node_Hint=packed object(t_pm4_node) data:record end; //@pchar end; p_pm4_node_LoadConstRam=^t_pm4_node_LoadConstRam; t_pm4_node_LoadConstRam=packed object(t_pm4_node) addr :Pointer; num_dw:Word; offset:Word; end; p_pm4_node_WaitOnDECounterDiff=^t_pm4_node_WaitOnDECounterDiff; t_pm4_node_WaitOnDECounterDiff=packed object(t_pm4_node) diff:DWORD; end; p_pm4_node_EventWrite=^t_pm4_node_EventWrite; t_pm4_node_EventWrite=packed object(t_pm4_node) eventType:Byte; end; p_pm4_node_EventWriteEop=^t_pm4_node_EventWriteEop; t_pm4_node_EventWriteEop=packed object(t_pm4_node) addr :Pointer; data :QWORD; eventType:Byte; dataSel :Byte; intSel :Byte; end; p_pm4_node_EventWriteEos=^t_pm4_node_EventWriteEos; t_pm4_node_EventWriteEos=packed object(t_pm4_node) addr :Pointer; data :DWORD; eventType:Byte; command :Byte; end; p_pm4_node_SubmitFlipEop=^t_pm4_node_SubmitFlipEop; t_pm4_node_SubmitFlipEop=packed object(t_pm4_node) eop_value:QWORD; intSel :Byte end; p_pm4_node_ReleaseMem=^t_pm4_node_ReleaseMem; t_pm4_node_ReleaseMem=packed object(t_pm4_node) addr :Pointer; data :QWORD; eventType:Byte; srcSel :Byte; dstSel :Byte; intSel :Byte; end; p_pm4_node_DmaData=^t_pm4_node_DmaData; t_pm4_node_DmaData=packed object(t_pm4_node) dst :QWORD; src :QWORD; numBytes:DWORD; srcSel :Byte; dstSel :Byte; cpSync :Byte; end; p_pm4_node_WriteData=^t_pm4_node_WriteData; t_pm4_node_WriteData=packed object(t_pm4_node) dst :Pointer; src :Pointer; num_dw :Word; dstSel :Byte; wrConfirm:Boolean; end; p_pm4_node_WaitRegMem=^t_pm4_node_WaitRegMem; t_pm4_node_WaitRegMem=packed object(t_pm4_node) pollAddr :Pointer; refValue :DWORD; mask :DWORD; compareFunc :Byte; end; p_pm4_node_FastClear=^t_pm4_node_FastClear; t_pm4_node_FastClear=object(t_pm4_node) RT:TRT_INFO; end; p_pm4_node_Resolve=^t_pm4_node_Resolve; t_pm4_node_Resolve=object(t_pm4_node) RT:array[0..1] of TRT_INFO; SCREEN:TVkRect2D; end; p_pm4_node_draw=^t_pm4_node_draw; t_pm4_node_draw=object(t_pm4_node) rt_info:t_pm4_rt_info; indexBase :QWORD; indexOffset :DWORD; vertexOffset:DWORD; indexCount :DWORD; numInstances:DWORD; INDEX_TYPE:Byte; SWAP_MODE :Byte; end; p_pm4_node_DispatchDirect=^t_pm4_node_DispatchDirect; t_pm4_node_DispatchDirect=object(t_pm4_node) COMPUTE_GROUP:TSH_REG_COMPUTE_GROUP; ShaderGroup:TvShaderGroup; DIM_X:DWORD; DIM_Y:DWORD; DIM_Z:DWORD; end; p_pm4_node_PfpSyncMe=^t_pm4_node_PfpSyncMe; t_pm4_node_PfpSyncMe=object(t_pm4_node) event:PRTLEvent; end; p_pm4_stream=^t_pm4_stream; t_pm4_stream=object(t_pm4_resource_stream_scope) // list:TAILQ_HEAD; //t_pm4_node // buft:t_pm4_stream_type; // init:Boolean; hint_repeat:Boolean; hint_loop:Ptruint; hint_cmds:Boolean; // curr:p_pm4_node; // refs:Ptruint; // procedure Free; Procedure add_node(node:p_pm4_node); function First:p_pm4_node; function Next(node:p_pm4_node):p_pm4_node; static; // procedure Acquire; function Release:Boolean; // procedure Hint (P1,P2:PChar;maxsize:Integer); procedure LoadConstRam (addr:Pointer;num_dw,offset:Word); procedure DumpConstRam (addr:Pointer;num_dw,offset:Word); procedure IncrementCE (); procedure IncrementDE (); procedure WaitOnCECounter(); procedure WaitOnDECounterDiff(diff:DWORD); procedure EventWrite (eventType:Byte); procedure EventWriteEop(addr:Pointer;data:QWORD;eventType,dataSel,intSel:Byte); procedure EventWriteEos(addr:Pointer;data:DWORD;eventType,command:Byte); procedure SubmitFlipEop(eop_value:QWORD;intSel:Byte); procedure ReleaseMem (addr:Pointer;data:QWORD;eventType,srcSel,dstSel,intSel:Byte); procedure DmaData (dstSel:Byte;dst:QWORD;srcSel:Byte;srcOrData:QWORD;numBytes:DWORD;isBlocking:Byte); procedure WriteData (dstSel:Byte;dst,src:Pointer;num_dw:Word;wrConfirm:Byte); procedure WaitRegMem (pollAddr:Pointer;refValue,mask:DWORD;compareFunc:Byte); procedure FastClear (var CX_REG:TCONTEXT_REG_GROUP); procedure Resolve (var CX_REG:TCONTEXT_REG_GROUP); function ColorControl (var CX_REG:TCONTEXT_REG_GROUP):Boolean; procedure Init_Uniforms(node:p_pm4_node;var UniformBuilder:TvUniformBuilder); procedure Init_Pushs (node:p_pm4_node; ShaderGroup:TvShaderGroup; var GPU_REGS:TGPU_REGS); procedure Build_rt_info(node:p_pm4_node; var rt_info:t_pm4_rt_info; var GPU_REGS:TGPU_REGS); procedure BuildDraw (ntype:t_pm4_node_type; var SG_REG:TSH_REG_GFX_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT; indexOffset:DWORD); procedure DrawIndex2 (var SG_REG:TSH_REG_GFX_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT); procedure DrawIndexOffset2(var SG_REG:TSH_REG_GFX_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT; indexOffset:DWORD); procedure DrawIndexAuto(var SG_REG:TSH_REG_GFX_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT); procedure Build_cs_info (node:p_pm4_node_DispatchDirect;var GPU_REGS:TGPU_REGS); procedure DispatchDirect(var SC_REG:TSH_REG_COMPUTE_GROUP); procedure PfpSyncMe(event:PRTLEvent); end; implementation uses sys_bootparam; var cache_block_allocator:t_cache_block_allocator; // operator + (a,b:t_pm4_usage):t_pm4_usage; inline; begin //hack Result.DATA:=a.DATA or b.DATA; end; // function t_pm4_resource.c(n1,n2:p_pm4_resource):Integer; begin //0 Addr Result:=Integer(n1^.rkey.Addr>n2^.rkey.Addr)-Integer(n1^.rkey.Addr0) then Exit; //1 rtype Result:=Integer(n1^.rtype>n2^.rtype)-Integer(n1^.rtype0) then Exit; case n1^.rtype of R_IMG: begin Result:=CompareNormalized(n1^.rkey,n2^.rkey); end; R_BUF, R_HTILE, R_CMASK: begin //2 rsize Result:=Integer(n1^.rsize>n2^.rsize)-Integer(n1^.rsizenil) or (i^.init_entry.tqe_prev<>nil) then Exit; if (list.tqh_last=nil) then begin TAILQ_INIT(@list); end; TAILQ_INSERT_TAIL(@list,i,@i^.init_entry); i^.init_scope:=@self; end; // function t_pm4_resource_curr_scope.Min:p_pm4_resource_instance; begin Result:=resource_instance_set.Min; end; function t_pm4_resource_curr_scope.Next(node:p_pm4_resource_instance):p_pm4_resource_instance; begin Result:=resource_instance_set.Next(node); end; procedure t_pm4_resource_curr_scope.insert(i:p_pm4_resource_instance); var f:p_pm4_resource_instance; begin f:=resource_instance_set.Find(i); if (f<>nil) then begin //union f^.curr:=f^.curr + i^.curr; f^.prev:=f^.prev + i^.prev; f^.next:=f^.next + i^.next; end else begin resource_instance_set.Insert(i); end; // end; function t_pm4_resource_curr_scope.find_resource_instance(r:p_pm4_resource):p_pm4_resource_instance; var tmp:t_pm4_resource_instance; begin if (r=nil) then Exit(nil); tmp:=Default(t_pm4_resource_instance); tmp.resource:=r; Result:=resource_instance_set.Find(@tmp); end; function t_pm4_resource_curr_scope.find_image_resource_instance(const rkey:TvImageKey):p_pm4_resource_instance; var tmp:t_pm4_resource; begin if (rkey.cformat=VK_FORMAT_UNDEFINED) then Exit(nil); if (rkey.params.invalid<>0) then Exit(nil); tmp:=Default(t_pm4_resource); tmp.rtype:=R_IMG; tmp.rkey :=rkey; Result:=find_resource_instance(@tmp); end; function t_pm4_resource_curr_scope.find_buffer_resource_instance(rtype:Integer;addr:Pointer;size:DWORD):p_pm4_resource_instance; var tmp:t_pm4_resource; begin tmp:=Default(t_pm4_resource); tmp.rtype:=rtype; tmp.rkey.Addr:=addr; tmp.rsize:=size; Result:=find_resource_instance(@tmp); end; function t_pm4_resource_stream_scope.find_image_resource(const rkey:TvImageKey):p_pm4_resource; var tmp:t_pm4_resource; begin tmp:=Default(t_pm4_resource); tmp.rtype:=R_IMG; tmp.rkey :=rkey; Result:=resource_set.Find(@tmp); end; function t_pm4_resource_stream_scope.fetch_image_resource(const rkey:TvImageKey;hint:PChar):p_pm4_resource; var tmp:t_pm4_resource; begin tmp:=Default(t_pm4_resource); tmp.rtype:=R_IMG; tmp.rkey :=rkey; Result:=resource_set.Find(@tmp); if (Result=nil) then begin tmp.rsize:=get_image_size(rkey); if p_print_gpu_ops then begin Writeln('fetch_image_resource:0x',HexStr(rkey.Addr),' 0x',HexStr(tmp.rsize,4)); end; Result:=allocator.Alloc(SizeOf(t_pm4_resource)); Result^:=tmp; resource_set.Insert(Result); end; end; function t_pm4_resource_stream_scope.find_buffer_resource(rtype:Integer;addr:Pointer;size:DWORD):p_pm4_resource; var tmp:t_pm4_resource; begin tmp:=Default(t_pm4_resource); tmp.rtype:=rtype; tmp.rkey.Addr:=addr; tmp.rsize:=size; Result:=resource_set.Find(@tmp); end; function t_pm4_resource_stream_scope.fetch_buffer_resource(rtype:Integer;addr:Pointer;size:DWORD;hint:PChar):p_pm4_resource; var tmp:t_pm4_resource; begin tmp:=Default(t_pm4_resource); tmp.rtype:=rtype; tmp.rkey.Addr:=addr; tmp.rsize:=size; Result:=resource_set.Find(@tmp); if (Result=nil) then begin Result:=allocator.Alloc(SizeOf(t_pm4_resource)); Result^:=tmp; if p_print_gpu_ops then begin Writeln('fetch_buffer_resource(',hint,'):0x',HexStr(addr),' 0x',HexStr(size,4)); end; resource_set.Insert(Result); end; end; function t_pm4_resource_stream_scope.fetch_resource_instance(scope:p_pm4_resource_curr_scope;r:p_pm4_resource;mem_usage:Integer;img_usage:s_image_usage):p_pm4_resource_instance; var curr:t_pm4_usage; begin Result:=scope^.find_resource_instance(r); if (Result=nil) then begin Result:=allocator.Alloc(SizeOf(t_pm4_resource_instance)); Result^:=Default(t_pm4_resource_instance); // Result^.resource:=r; end; { TODO: This is a hack for loading storage textures that may not be fully filled, or may only attached only one texture layer. } if (iu_storage in img_usage) then begin mem_usage:=mem_usage or TM_READ; end; curr.mem_usage:=mem_usage; curr.shd_usage:=0; curr.clr_usage:=0; curr.dsa_usage:=0; curr.img_usage:=img_usage; if ([iu_sampled,iu_storage]*img_usage<>[]) then begin curr.shd_usage:=mem_usage; end; if (iu_attachment in img_usage) then begin curr.clr_usage:=mem_usage; end; if (iu_depthstenc in img_usage) then begin curr.dsa_usage:=mem_usage; end; Result^.curr:=Result^.curr + curr; r^.uall:=r^.uall + curr; end; function t_pm4_resource_stream_scope.insert_image_resource(scope:p_pm4_resource_curr_scope;const rkey:TvImageKey;mem_usage:Integer;img_usage:s_image_usage;hint:PChar):p_pm4_resource_instance; var r:p_pm4_resource; i:p_pm4_resource_instance; begin if (rkey.cformat=VK_FORMAT_UNDEFINED) then Exit(nil); if (rkey.params.invalid<>0) then Exit(nil); r:=fetch_image_resource (rkey,hint); i:=fetch_resource_instance(scope,r,mem_usage,img_usage); if ((mem_usage and TM_READ)<>0) then if (i^.prev.mem_usage=0) then //no prev usage begin //init init_scope.insert(i); end; scope^.insert(i); Result:=i; end; function t_pm4_resource_stream_scope.insert_buffer_resource(scope:p_pm4_resource_curr_scope;rtype:Integer;addr:Pointer;size:DWORD;mem_usage:Integer;hint:PChar):p_pm4_resource_instance; var r:p_pm4_resource; i:p_pm4_resource_instance; begin r:=fetch_buffer_resource (rtype,addr,size,hint); i:=fetch_resource_instance(scope,r,mem_usage,[iu_buffer]); if ((mem_usage and TM_READ)<>0) then if (i^.prev.mem_usage=0) then //no prev usage begin //init init_scope.insert(i); end; scope^.insert(i); Result:=i; end; // procedure t_pm4_resource_stream_scope.connect_resource_instance(i:p_pm4_resource_instance); var start:Pointer; __end:Pointer; node:p_pm4_resource; prev:p_pm4_resource_instance; tmp:t_pm4_resource; begin //find cross tmp:=Default(t_pm4_resource); tmp:=i^.resource^; start:=tmp.rkey.Addr; __end:=start+tmp.rsize; tmp.rtype:=High(Integer); tmp.rkey.Addr:=start; //[s|new|e] -> // [s|old|e] node:=resource_set.Find_ls(@tmp); while (node<>nil) do begin // if (__end>(node^.rkey.Addr)) and (start<(node^.rkey.Addr+node^.rsize)) then begin prev:=node^.rwrite; if (prev<>nil) and (prev<>i) then begin //sum prev of curr i^.prev:= i^.prev + prev^.curr; //sum next of prev prev^.next:=prev^.next + i^.curr; if (prev^.resource<>i^.resource) then begin //sum prev of curr i^.prev_overlap:= i^.prev_overlap + prev^.curr; //sum next of prev prev^.next_overlap:=prev^.next_overlap + i^.curr; end; end; // if ((i^.curr.mem_usage and (TM_WRITE or TM_CLEAR))<>0) then begin node^.rwrite:=i; end; end; node:=resource_set.Prev(node); end; end; procedure t_pm4_resource_stream_scope.connect_resource_scope(scope:p_pm4_resource_curr_scope); var node:p_pm4_resource_instance; begin node:=scope^.resource_instance_set.Min; while (node<>nil) do begin connect_resource_instance(node); node:=scope^.resource_instance_set.Next(node); end; end; // procedure t_pm4_stream.Free; begin list:=Default(TAILQ_HEAD); allocator.Free; end; var global_id:QWORD=0; Procedure t_pm4_stream.add_node(node:p_pm4_node); begin if (list.tqh_last=nil) then begin TAILQ_INIT(@list); end; node^.id:=System.InterlockedIncrement64(global_id); //Writeln('add_node:',node^.id); TAILQ_INSERT_TAIL(@list,node,@node^.entry); connect_resource_scope(@node^.scope); end; function t_pm4_stream.First:p_pm4_node; begin Result:=TAILQ_FIRST(@list); end; function t_pm4_stream.Next(node:p_pm4_node):p_pm4_node; begin Result:=TAILQ_NEXT(node,@node^.entry); end; // procedure t_pm4_stream.Acquire; begin System.InterlockedIncrement(Pointer(refs)); end; function t_pm4_stream.Release:Boolean; begin Result:=System.InterlockedDecrement(Pointer(refs))=nil; end; // procedure t_pm4_stream.Hint(P1,P2:PChar;maxsize:Integer); var len1,len2:Integer; node:p_pm4_node_Hint; begin len1:=StrLen(P1); len2:=0; while (maxsize<>0) do begin Inc(len2); if (P2[len2]=#0) then begin Break; end; Dec(maxsize); end; node:=allocator.Alloc(SizeOf(t_pm4_node_Hint)+len1+len2+1); node^.ntype :=ntHint; node^.scope :=Default(t_pm4_resource_curr_scope); Move(P1^,PChar(@node^.data)[0] ,len1); Move(P2^,PChar(@node^.data)[len1],len2); PChar(@node^.data)[len1+len2]:=#0; add_node(node); end; procedure t_pm4_stream.LoadConstRam(addr:Pointer;num_dw,offset:Word); var node:p_pm4_node_LoadConstRam; begin node:=allocator.Alloc(SizeOf(t_pm4_node_LoadConstRam)); node^.ntype :=ntLoadConstRam; node^.scope :=Default(t_pm4_resource_curr_scope); node^.addr :=addr; node^.num_dw:=num_dw; node^.offset:=offset; insert_buffer_resource(@node^.scope, R_BUF, addr, num_dw*SizeOf(DWORD), TM_READ, 'LoadConstRam'); add_node(node); end; procedure t_pm4_stream.DumpConstRam(addr:Pointer;num_dw,offset:Word); var node:p_pm4_node_LoadConstRam; begin node:=allocator.Alloc(SizeOf(t_pm4_node_LoadConstRam)); node^.ntype :=ntDumpConstRam; node^.scope :=Default(t_pm4_resource_curr_scope); node^.addr :=addr; node^.num_dw:=num_dw; node^.offset:=offset; insert_buffer_resource(@node^.scope, R_BUF, addr, num_dw*SizeOf(DWORD), TM_WRITE, 'DumpConstRam'); add_node(node); end; procedure t_pm4_stream.IncrementCE(); var node:p_pm4_node; begin node:=allocator.Alloc(SizeOf(t_pm4_node)); node^.ntype:=ntIncrementCE; node^.scope:=Default(t_pm4_resource_curr_scope); add_node(node); end; procedure t_pm4_stream.IncrementDE(); var node:p_pm4_node; begin node:=allocator.Alloc(SizeOf(t_pm4_node)); node^.ntype:=ntIncrementDE; node^.scope:=Default(t_pm4_resource_curr_scope); add_node(node); end; procedure t_pm4_stream.WaitOnCECounter(); var node:p_pm4_node; begin node:=allocator.Alloc(SizeOf(t_pm4_node)); node^.ntype:=ntWaitOnCECounter; node^.scope:=Default(t_pm4_resource_curr_scope); add_node(node); end; procedure t_pm4_stream.WaitOnDECounterDiff(diff:DWORD); var node:p_pm4_node_WaitOnDECounterDiff; begin node:=allocator.Alloc(SizeOf(t_pm4_node_WaitOnDECounterDiff)); node^.ntype:=ntWaitOnDECounterDiff; node^.scope:=Default(t_pm4_resource_curr_scope); node^.diff :=diff; add_node(node); end; procedure t_pm4_stream.EventWrite(eventType:Byte); var node:p_pm4_node_EventWrite; begin node:=allocator.Alloc(SizeOf(t_pm4_node_EventWrite)); node^.ntype :=ntEventWrite; node^.scope :=Default(t_pm4_resource_curr_scope); node^.eventType:=eventType; add_node(node); end; procedure t_pm4_stream.EventWriteEop(addr:Pointer;data:QWORD;eventType,dataSel,intSel:Byte); var node:p_pm4_node_EventWriteEop; function get_data_size:DWORD; inline; begin Result:=0; // Case dataSel of EVENTWRITEEOP_DATA_SEL_SEND_DATA32 :Result:=4; EVENTWRITEEOP_DATA_SEL_SEND_DATA64 :Result:=8; EVENTWRITEEOP_DATA_SEL_SEND_GPU_CLOCK :Result:=8; EVENTWRITEEOP_DATA_SEL_SEND_CP_PERFCOUNTER:Result:=8; else; end; end; begin node:=allocator.Alloc(SizeOf(t_pm4_node_EventWriteEop)); node^.ntype :=ntEventWriteEop; node^.scope :=Default(t_pm4_resource_curr_scope); node^.addr :=addr; node^.data :=data; node^.eventType:=eventType; node^.dataSel :=dataSel; node^.intSel :=intSel; if (addr<>nil) then begin insert_buffer_resource(@node^.scope, R_BUF, addr, get_data_size, TM_WRITE, 'EventWriteEop'); end; add_node(node); end; procedure t_pm4_stream.EventWriteEos(addr:Pointer;data:DWORD;eventType,command:Byte); var node:p_pm4_node_EventWriteEos; function get_data_size:DWORD; inline; begin Result:=0; // Case command of EVENT_WRITE_EOS_CMD_STORE_32BIT_DATA_TO_MEMORY:Result:=4; else; end; end; begin node:=allocator.Alloc(SizeOf(t_pm4_node_EventWriteEos)); node^.ntype :=ntEventWriteEos; node^.scope :=Default(t_pm4_resource_curr_scope); node^.addr :=addr; node^.data :=data; node^.eventType:=eventType; node^.command :=command; if (addr<>nil) then begin insert_buffer_resource(@node^.scope, R_BUF, addr, get_data_size, TM_WRITE, 'EventWriteEos'); end; add_node(node); end; procedure t_pm4_stream.SubmitFlipEop(eop_value:QWORD;intSel:Byte); var node:p_pm4_node_SubmitFlipEop; begin node:=allocator.Alloc(SizeOf(t_pm4_node_SubmitFlipEop)); node^.ntype :=ntSubmitFlipEop; node^.scope :=Default(t_pm4_resource_curr_scope); node^.eop_value:=eop_value; node^.intSel :=intSel; add_node(node); end; procedure t_pm4_stream.ReleaseMem(addr:Pointer;data:QWORD;eventType,srcSel,dstSel,intSel:Byte); var node:p_pm4_node_ReleaseMem; function get_data_size:DWORD; inline; begin Result:=0; // Case srcSel of RELEASEMEM_DATA_SEL_SEND_DATA32 :Result:=4; RELEASEMEM_DATA_SEL_SEND_DATA64 :Result:=8; RELEASEMEM_DATA_SEL_SEND_GPU_CLOCK :Result:=8; RELEASEMEM_DATA_SEL_SEND_CP_PERFCOUNTER:Result:=8; else; end; end; begin node:=allocator.Alloc(SizeOf(t_pm4_node_ReleaseMem)); node^.ntype :=ntReleaseMem; node^.scope :=Default(t_pm4_resource_curr_scope); node^.addr :=addr; node^.data :=data; node^.eventType:=eventType; node^.srcSel :=srcSel; node^.dstSel :=dstSel; node^.intSel :=intSel; if (addr<>nil) then begin insert_buffer_resource(@node^.scope, R_BUF, addr, get_data_size, TM_WRITE, 'ReleaseMem'); end; add_node(node); end; procedure t_pm4_stream.DmaData(dstSel:Byte;dst:QWORD;srcSel:Byte;srcOrData:QWORD;numBytes:DWORD;isBlocking:Byte); var node:p_pm4_node_DmaData; begin node:=allocator.Alloc(SizeOf(t_pm4_node_DmaData)); node^.ntype :=ntDmaData; node^.scope :=Default(t_pm4_resource_curr_scope); node^.dst :=dst; node^.src :=srcOrData; node^.numBytes:=numBytes; node^.srcSel :=srcSel; node^.dstSel :=dstSel; node^.cpSync :=isBlocking; case srcSel of kDmaDataSrcMemory, kDmaDataSrcMemoryUsingL2: if (srcOrData<>0) then begin insert_buffer_resource(@node^.scope, R_BUF, Pointer(srcOrData), numBytes, TM_READ, 'DmaData'); end; else; end; case dstSel of kDmaDataDstMemory, kDmaDataDstMemoryUsingL2: if (dst<>0) then begin insert_buffer_resource(@node^.scope, R_BUF, Pointer(dst), numBytes, TM_WRITE, 'DmaData'); end; end; add_node(node); end; procedure t_pm4_stream.WriteData(dstSel:Byte;dst,src:Pointer;num_dw:Word;wrConfirm:Byte); var node:p_pm4_node_WriteData; begin //Can I copy the link? //Or do I have to copy the data? node:=allocator.Alloc(SizeOf(t_pm4_node_WriteData){+num_dw*SizeOf(DWORD)}); node^.ntype :=ntWriteData; node^.scope :=Default(t_pm4_resource_curr_scope); node^.dst :=dst; //node^.src :=Pointer(node+1); node^.src :=src; node^.num_dw :=num_dw; node^.dstSel :=dstSel; node^.wrConfirm:=(wrConfirm<>0); //Move(src^,node^.src^,num_dw*SizeOf(DWORD)); if (src<>nil) then begin insert_buffer_resource(@node^.scope, R_BUF, src, num_dw*SizeOf(DWORD), TM_READ, 'WriteData'); end; case dstSel of WRITE_DATA_DST_SEL_MEMORY_SYNC, WRITE_DATA_DST_SEL_TCL2, WRITE_DATA_DST_SEL_MEMORY_ASYNC: if (dst<>nil) then begin insert_buffer_resource(@node^.scope, R_BUF, Pointer(dst), num_dw*SizeOf(DWORD), TM_WRITE, 'WriteData'); end; else; end; add_node(node); end; procedure t_pm4_stream.WaitRegMem(pollAddr:Pointer;refValue,mask:DWORD;compareFunc:Byte); var node:p_pm4_node_WaitRegMem; begin node:=allocator.Alloc(SizeOf(t_pm4_node_WaitRegMem)); node^.ntype :=ntWaitRegMem; node^.scope :=Default(t_pm4_resource_curr_scope); node^.pollAddr :=pollAddr; node^.refValue :=refValue; node^.mask :=mask; node^.compareFunc:=compareFunc; add_node(node); end; procedure t_pm4_stream.FastClear(var CX_REG:TCONTEXT_REG_GROUP); var GPU_REGS:TGPU_REGS; RT:TRT_INFO; node:p_pm4_node_FastClear; begin GPU_REGS:=Default(TGPU_REGS); GPU_REGS.CX_REG:=@CX_REG; node:=allocator.Alloc(SizeOf(t_pm4_node_FastClear)); node^.ntype :=ntFastClear; node^.scope :=Default(t_pm4_resource_curr_scope); // RT:=GPU_REGS.GET_RT_INFO(0,True); //-TM_READ +TM_CLEAR RT.IMAGE_USAGE:=RT.IMAGE_USAGE and (not TM_READ) or TM_CLEAR; Assert(RT.CMASK_INFO.KEY.Addr<>nil); // insert_image_resource(@node^.scope, RT.FImageInfo, RT.IMAGE_USAGE, [iu_transfer], 'FastClear'); insert_buffer_resource(@node^.scope, R_CMASK, RT.CMASK_INFO.KEY.Addr, RT.CMASK_INFO.SIZE, RT.IMAGE_USAGE, 'FastClear' ); // node^.RT:=RT; // add_node(node); end; procedure t_pm4_stream.Resolve(var CX_REG:TCONTEXT_REG_GROUP); var GPU_REGS:TGPU_REGS; RT:array[0..1] of TRT_INFO; SCREEN:TVkRect2D; node:p_pm4_node_Resolve; begin GPU_REGS:=Default(TGPU_REGS); GPU_REGS.CX_REG:=@CX_REG; Assert(DWORD(CX_REG.CB_TARGET_MASK)=$F); node:=allocator.Alloc(SizeOf(t_pm4_node_Resolve)); node^.ntype :=ntResolve; node^.scope :=Default(t_pm4_resource_curr_scope); // RT[0]:=GPU_REGS.GET_RT_INFO(0,True); RT[1]:=GPU_REGS.GET_RT_INFO(1,True); RT[0].IMAGE_USAGE:=TM_READ; RT[1].IMAGE_USAGE:=TM_WRITE; insert_image_resource(@node^.scope, RT[0].FImageInfo, RT[0].IMAGE_USAGE, [iu_transfer], 'Resolve'); insert_image_resource(@node^.scope, RT[1].FImageInfo, RT[1].IMAGE_USAGE, [iu_transfer], 'Resolve'); SCREEN:=GPU_REGS.GET_SCREEN; node^.RT:=RT; node^.SCREEN:=SCREEN; add_node(node); end; function t_pm4_stream.ColorControl(var CX_REG:TCONTEXT_REG_GROUP):Boolean; begin Result:=False; case CX_REG.CB_COLOR_CONTROL.MODE of CB_DISABLE: if p_print_gpu_ops then begin Writeln('DISABLE'); end; CB_NORMAL:; //next CB_ELIMINATE_FAST_CLEAR: // Expand latest specified clear color into pixel data for the fast cleared color/depth resource. begin FastClear(CX_REG); Exit(True); end; CB_RESOLVE: // Fixed function resolve. (MSAA) begin Resolve(CX_REG); Exit(True); end; CB_DECOMPRESS: if p_print_gpu_ops then begin Writeln('DECOMPRESS'); end; CB_FMASK_DECOMPRESS: // Fmask decompression for shader readability. if p_print_gpu_ops then begin Writeln('FMASK_DECOMPRESS'); end; CB_DCC_DECOMPRESS: // Indicates this color target view is for a DCC decompress if p_print_gpu_ops then begin Writeln('DCC_DECOMPRESS'); end; else Assert(False,'unknow color control:0x'+HexStr(CX_REG.CB_COLOR_CONTROL.MODE,1)); end; end; procedure t_pm4_stream.Init_Uniforms(node:p_pm4_node;var UniformBuilder:TvUniformBuilder); var i:Integer; begin //images if (Length(UniformBuilder.FImages)<>0) then begin For i:=0 to High(UniformBuilder.FImages) do With UniformBuilder.FImages[i] do begin case btype of vbSampled: begin insert_image_resource(@node^.scope, FImage, memuse, [iu_sampled], 'Init_Uniforms'); end; vbStorage, vbMipStorage: begin insert_image_resource(@node^.scope, FImage, memuse, [iu_storage], 'Init_Uniforms'); end; else Assert(false); end; end; end; //images //buffers if (Length(UniformBuilder.FBuffers)<>0) then begin For i:=0 to High(UniformBuilder.FBuffers) do With UniformBuilder.FBuffers[i] do if (memuse and TM_INVAL)=0 then begin insert_buffer_resource(@node^.scope, R_BUF, addr, size, memuse, 'Init_Uniforms'); end; end; //buffers end; procedure t_pm4_stream.Init_Pushs(node:p_pm4_node; ShaderGroup:TvShaderGroup; var GPU_REGS:TGPU_REGS); var Shader:TvShaderExt; i:TvShaderStage; FData:PDWORD; addr:Pointer; begin For i:=Low(TvShaderStage) to High(TvShaderStage) do begin Shader:=ShaderGroup.FKey.FShaders[i]; if (Shader<>nil) then if (Shader.FPushConst.size<>0) then begin FData:=GPU_REGS.get_user_data(i); addr :=Shader.GetPushConstData(FData); insert_buffer_resource(@node^.scope, R_BUF, addr, Shader.FPushConst.size, TM_READ, 'Init_Pushs'); end; end; end; procedure DumpShaderGroup(ShaderGroup:TvShaderGroup); var i:TvShaderStage; str:RawByteString; begin str:='[DumpShaderGroup]'#13#10; For i:=Low(TvShaderStage) to High(TvShaderStage) do if (ShaderGroup.FKey.FShaders[i]<>nil) then begin str:=str+' ('+HexStr(ShaderGroup.FKey.FShaders[i].FHash_gcn,16)+') '+GetDumpSpvName(i,ShaderGroup.FKey.FShaders[i].FHash_spv)+#13#10; end; Writeln(stderr,str); end; procedure t_pm4_stream.Build_rt_info(node:p_pm4_node; var rt_info:t_pm4_rt_info; var GPU_REGS:TGPU_REGS); var i:Integer; RT:TRT_INFO; FUniformBuilder:TvUniformBuilder; pa:TPushConstAllocator; pp:PPushConstAllocator; r:p_pm4_resource; resource_instance:p_pm4_resource_instance; begin for i:=0 to 31 do begin Assert(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].FP16_INTERP_MODE=0,'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].FP16_INTERP_MODE='+IntToStr(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].FP16_INTERP_MODE)); end; GPU_REGS.export_user_data_rt(@rt_info.USERDATA); //copy rt_info.SHADERDATA.SG_REG :=GPU_REGS.SG_REG^ ; rt_info.SHADERDATA.SPI_PS_INPUT_ENA :=GPU_REGS.CX_REG^.SPI_PS_INPUT_ENA ; rt_info.SHADERDATA.SPI_PS_INPUT_ADDR :=GPU_REGS.CX_REG^.SPI_PS_INPUT_ADDR ; rt_info.SHADERDATA.SPI_INTERP_CONTROL_0 :=GPU_REGS.CX_REG^.SPI_INTERP_CONTROL_0 ; rt_info.SHADERDATA.SPI_PS_IN_CONTROL :=GPU_REGS.CX_REG^.SPI_PS_IN_CONTROL ; rt_info.SHADERDATA.SPI_PS_INPUT_CNTL :=GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL ; rt_info.SHADERDATA.DB_SHADER_CONTROL :=GPU_REGS.CX_REG^.DB_SHADER_CONTROL ; rt_info.SHADERDATA.VGT_INSTANCE_STEP_RATE_0:=GPU_REGS.CX_REG^.VGT_INSTANCE_STEP_RATE_0; rt_info.SHADERDATA.VGT_INSTANCE_STEP_RATE_1:=GPU_REGS.CX_REG^.VGT_INSTANCE_STEP_RATE_1; rt_info.SHADERDATA.RENDER_TARGET :=GPU_REGS.CX_REG^.RENDER_TARGET ; rt_info.SHADERDATA.UC_REG :=GPU_REGS.UC_REG^; rt_info.RT_COUNT:=0; if GPU_REGS.COMP_ENABLE then For i:=0 to GPU_REGS.GET_HI_RT do begin RT:=GPU_REGS.GET_RT_INFO(i); // if (RT.CMASK_INFO.KEY.Addr<>nil) then begin //perfetch check r:=find_buffer_resource(R_CMASK,RT.CMASK_INFO.KEY.Addr,RT.CMASK_INFO.SIZE); if (r<>nil) then if (r^.rcmask) then begin //-TM_READ +TM_CLEAR RT.IMAGE_USAGE:=RT.IMAGE_USAGE and (not TM_READ) or TM_CLEAR; // r^.rcmask:=False; end; insert_buffer_resource(@node^.scope, R_CMASK, RT.CMASK_INFO.KEY.Addr, RT.CMASK_INFO.SIZE, RT.IMAGE_USAGE, 'Build_rt_info'); end; insert_image_resource(@node^.scope, RT.FImageInfo, RT.IMAGE_USAGE, [iu_attachment], 'Build_rt_info'); // rt_info.RT_INFO[rt_info.RT_COUNT]:=RT; Inc(rt_info.RT_COUNT); end; rt_info.DB_ENABLE:=GPU_REGS.DB_ENABLE; if rt_info.DB_ENABLE then begin rt_info.DB_INFO:=GPU_REGS.GET_DB_INFO; // resource_instance:=insert_image_resource(@node^.scope, GetDepthOnly(rt_info.DB_INFO.FImageInfo), rt_info.DB_INFO.DEPTH_USAGE, [iu_depthstenc], 'Build_rt_info'); if (resource_instance<>nil) then with resource_instance^.resource^ do begin rcombined:=rcombined or IsDepthAndStencil(rt_info.DB_INFO.FImageInfo.cformat); end; resource_instance:=insert_image_resource(@node^.scope, GetStencilOnly(rt_info.DB_INFO.FImageInfo), rt_info.DB_INFO.STENCIL_USAGE, [iu_depthstenc], 'Build_rt_info'); if (resource_instance<>nil) then with resource_instance^.resource^ do begin rcombined:=rcombined or IsDepthAndStencil(rt_info.DB_INFO.FImageInfo.cformat); end; if (rt_info.DB_INFO.HTILE_INFO.TILE_SURFACE_ENABLE<>0) then begin resource_instance:=insert_buffer_resource(@node^.scope, R_HTILE, rt_info.DB_INFO.HTILE_INFO.KEY.Addr, rt_info.DB_INFO.HTILE_INFO.SIZE, rt_info.DB_INFO.DEPTH_USAGE, 'Build_rt_info'); end; end; if (rt_info.RT_COUNT=0) and (not rt_info.DB_ENABLE) then begin Writeln('zero attachment???'); end; rt_info.BLEND_INFO:=GPU_REGS.GET_BLEND_INFO; rt_info.PRIM_TYPE :=ord(GPU_REGS.GET_PRIM_TYPE); rt_info.PRIM_RESET:=GPU_REGS.GET_PRIM_RESET; rt_info.VP_COUNT:=0; For i:=0 to 15 do if GPU_REGS.VP_ENABLE(i) then begin rt_info.VPORT [rt_info.VP_COUNT]:=GPU_REGS.GET_VPORT (i); rt_info.SCISSOR[rt_info.VP_COUNT]:=GPU_REGS.GET_SCISSOR(i); Inc(rt_info.VP_COUNT); end; rt_info.RASTERIZATION:=GPU_REGS.GET_RASTERIZATION; rt_info.MULTISAMPLE :=GPU_REGS.GET_MULTISAMPLE; rt_info.PROVOKING:=ord(GPU_REGS.GET_PROVOKING); rt_info.SCREEN_RECT:=GPU_REGS.GET_SCREEN; rt_info.SCREEN_SIZE:=GPU_REGS.GET_SCREEN_SIZE; // pa.Init; pp:=@pa; rt_info.ShaderGroup:=FetchShaderGroupRT(GPU_REGS,pp); Assert(rt_info.ShaderGroup<>nil); //DumpShaderGroup(rt_info.ShaderGroup); // FUniformBuilder:=Default(TvUniformBuilder); rt_info.ShaderGroup.ExportUnifBuilder(FUniformBuilder,@rt_info.USERDATA); Init_Uniforms(node,FUniformBuilder); end; procedure t_pm4_stream.BuildDraw(ntype:t_pm4_node_type; var SG_REG:TSH_REG_GFX_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT; indexOffset:DWORD); var GPU_REGS:TGPU_REGS; node:p_pm4_node_draw; begin GPU_REGS:=Default(TGPU_REGS); GPU_REGS.SG_REG:=@SG_REG; GPU_REGS.CX_REG:=@CX_REG; GPU_REGS.UC_REG:=@UC_REG; if DWORD(CX_REG.VGT_SHADER_STAGES_EN)<>0 then begin Writeln('Skip tessellation:0x',HexStr(DWORD(CX_REG.VGT_SHADER_STAGES_EN),8)); Exit; end; node:=allocator.Alloc(SizeOf(t_pm4_node_draw)); node^.ntype :=ntype; node^.scope :=Default(t_pm4_resource_curr_scope); Build_rt_info(node,node^.rt_info,GPU_REGS); node^.indexBase :=CX_REG.VGT_DMA_BASE or (QWORD(CX_REG.VGT_DMA_BASE_HI.BASE_ADDR) shl 32); node^.indexOffset :=indexOffset; node^.vertexOffset:=CX_REG.VGT_INDX_OFFSET; node^.indexCount :=UC_REG.VGT_NUM_INDICES; node^.numInstances:=UC_REG.VGT_NUM_INSTANCES; node^.INDEX_TYPE:=ord(GPU_REGS.GET_INDEX_TYPE); node^.SWAP_MODE :=CX_REG.VGT_DMA_INDEX_TYPE.SWAP_MODE; //heuristic if (ntype=ntDrawIndexAuto) and (node^.numInstances<=1) and (node^.rt_info.RT_COUNT=0) and (node^.rt_info.DB_ENABLE) and ( ((node^.rt_info.DB_INFO.DEPTH_USAGE and TM_CLEAR)<>0) or ((node^.rt_info.DB_INFO.STENCIL_USAGE and TM_CLEAR)<>0) ) then if IsClearDepthShaders(node^.rt_info.ShaderGroup.FKey.FShaders) then begin //ClearDepthTarget node^.ntype:=ntClearDepth; end; //clearRenderTarget //VS 0xFE54CC4687E2FF59 //PS 0x91E6C1F562F6F2DE add_node(node); end; procedure t_pm4_stream.DrawIndex2(var SG_REG:TSH_REG_GFX_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT); begin if ColorControl(CX_REG) then Exit; BuildDraw(ntDrawIndex2,SG_REG,CX_REG,UC_REG,0); end; procedure t_pm4_stream.DrawIndexAuto(var SG_REG:TSH_REG_GFX_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT); begin if ColorControl(CX_REG) then Exit; BuildDraw(ntDrawIndexAuto,SG_REG,CX_REG,UC_REG,0); end; procedure t_pm4_stream.DrawIndexOffset2(var SG_REG:TSH_REG_GFX_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT; indexOffset:DWORD); begin if ColorControl(CX_REG) then Exit; BuildDraw(ntDrawIndexOffset2,SG_REG,CX_REG,UC_REG,indexOffset); end; procedure t_pm4_stream.Build_cs_info(node:p_pm4_node_DispatchDirect;var GPU_REGS:TGPU_REGS); var dst:PGPU_USERDATA; FUniformBuilder:TvUniformBuilder; pa:TPushConstAllocator; pp:PPushConstAllocator; begin //copy node^.COMPUTE_GROUP:=GPU_REGS.SC_REG^; //hack dst:=Pointer(@node^.COMPUTE_GROUP.COMPUTE_USER_DATA)-Ptruint(@TGPU_USERDATA(nil^).A[vShaderStageCs]); pa.Init; pp:=@pa; node^.ShaderGroup:=FetchShaderGroupCS(GPU_REGS,pp); Assert(node^.ShaderGroup<>nil); node^.DIM_X:=GPU_REGS.SC_REG^.COMPUTE_DIM_X; node^.DIM_Y:=GPU_REGS.SC_REG^.COMPUTE_DIM_Y; node^.DIM_Z:=GPU_REGS.SC_REG^.COMPUTE_DIM_Z; // FUniformBuilder:=Default(TvUniformBuilder); node^.ShaderGroup.ExportUnifBuilder(FUniformBuilder,dst); Init_Uniforms(node,FUniformBuilder); end; procedure t_pm4_stream.DispatchDirect(var SC_REG:TSH_REG_COMPUTE_GROUP); var GPU_REGS:TGPU_REGS; node:p_pm4_node_DispatchDirect; begin GPU_REGS:=Default(TGPU_REGS); GPU_REGS.SC_REG:=@SC_REG; node:=allocator.Alloc(SizeOf(t_pm4_node_DispatchDirect)); node^.ntype:=ntDispatchDirect; node^.scope:=Default(t_pm4_resource_curr_scope); Build_cs_info(node,GPU_REGS); add_node(node); end; procedure t_pm4_stream.PfpSyncMe(event:PRTLEvent); var node:p_pm4_node_PfpSyncMe; begin node:=allocator.Alloc(SizeOf(t_pm4_node_PfpSyncMe)); node^.ntype:=ntPfpSyncMe; node^.scope:=Default(t_pm4_resource_curr_scope); node^.event:=event; add_node(node); end; // procedure t_cache_block_allocator.init; begin queue.Create; xlock:=nil; count:=0; end; Function t_cache_block_allocator.Alloc:Pointer; begin Result:=nil; if (XCHG(xlock,Pointer(1))=nil) then begin if queue.Pop(Result) then begin XCHG(xlock,nil); // System.InterlockedDecrement64(count); Exit; end; XCHG(xlock,nil); end; // md_mmap(Result,mem_size,VM_RW); end; Procedure t_cache_block_allocator.Free(node:Pointer); begin if (node=nil) then Exit; // if (count+1<=max_count) then begin if (System.InterlockedIncrement64(count)<=max_count) then begin queue.Push(node); Exit; end else begin System.InterlockedDecrement64(count); end; end; // md_unmap(node,mem_size); end; // Function t_pm4_allocator.Alloc(Size:ptruint):Pointer; var mem_size:ptruint; node:PAllocNode; begin if (pHead.slh_first=nil) or (Size>curr_size) then begin if (Size>(cache_block_allocator.mem_size-SizeOf(TAllocNode))) then begin mem_size:=Align(Size+SizeOf(TAllocNode),64*1024); // node:=nil; md_mmap(node,mem_size,VM_RW); end else begin mem_size:=cache_block_allocator.mem_size; // node:=cache_block_allocator.Alloc; end; node^.size:=mem_size; SLIST_INSERT_HEAD(@pHead,node,@node^.link); curr_apos:=0; curr_size:=mem_size-SizeOf(TAllocNode); Inc(full_size,mem_size); end; node:=SLIST_FIRST(@pHead); Result:=@PByte(@node^.data)[curr_apos]; Inc(used_size,Size); Size:=Align(Size,SizeOf(ptruint)); Inc(curr_apos,Size); Dec(curr_size,Size); end; Procedure t_pm4_allocator.Free; var node:PAllocNode; begin node:=SLIST_FIRST(@pHead); While (node<>nil) do begin SLIST_REMOVE(@pHead,node,@node^.link); if (node^.size=cache_block_allocator.mem_size) then begin cache_block_allocator.Free(node); end else begin md_unmap(node,node^.size); end; node:=SLIST_FIRST(@pHead); end; Self:=Default(t_pm4_allocator); end; initialization cache_block_allocator.Init; end.