unit pm4_stream; {$mode ObjFPC}{$H+} {$CALLING SysV_ABI_CDecl} interface uses sysutils, mqueue, LFQueue, md_map, bittype, pm4defs, si_ci_vi_merged_enum, si_ci_vi_merged_registers, si_ci_vi_merged_groups, Vulkan, vImage, vShader, vShaderExt, vShaderManager, vRegs2Vulkan, vImageTiling, g_node_splay ; type t_cache_block_allocator=object const mem_size =64*1024; max_count=256; // var queue:TIntrusiveMPSCQueue; xlock:Pointer; count:QWORD; // procedure Init; Function Alloc:Pointer; Procedure Free(node:Pointer); end; t_pm4_allocator=object type PAllocNode=^TAllocNode; TAllocNode=packed record link:PAllocNode; size:QWORD; data:record end; end; var pHead:SLIST_HEAD; curr_apos:ptruint; //alloc pos in current node curr_size:ptruint; //useable size of current node used_size:ptruint; //full usable size full_size:ptruint; //full alloc size Function Alloc(Size:ptruint):Pointer; Procedure Free; end; p_pm4_rt_info=^t_pm4_rt_info; t_pm4_rt_info=object USERDATA:TGPU_USERDATA; ShaderGroup:TvShaderGroup; RT_INFO:array[0..7] of TRT_INFO; DB_INFO:TDB_INFO; BLEND_INFO:TBLEND_INFO; VPORT :array[0..15] of TVkViewport; SCISSOR:array[0..15] of TVkRect2D; RASTERIZATION:TVkPipelineRasterizationStateCreateInfo; MULTISAMPLE :TVkPipelineMultisampleStateCreateInfo; SCREEN_RECT:TVkRect2D; SCREEN_SIZE:TVkExtent2D; RT_COUNT :Byte; DB_ENABLE :Boolean; PRIM_TYPE :Byte; PRIM_RESET:Byte; VP_COUNT :Byte; PROVOKING :Byte; end; t_pm4_stream_type=( stGfxRing, stGfxDcb, stGfxCcb, stCompute0, stCompute1, stCompute2, stCompute3, stCompute4, stCompute5, stCompute6 ); t_pm4_node_type=( ntLoadConstRam, ntEventWrite, ntEventWriteEop, ntEventWriteEos, ntSubmitFlipEop, ntDmaData, ntWriteData, ntWaitRegMem, ntFastClear, ntResolve, ntClearDepth, ntDrawIndex2, ntDrawIndexAuto, ntDispatchDirect ); const R_IMG =0; R_BUF =1; R_HTILE=2; type t_pm4_usage=record mem_usage:Integer; img_usage:s_image_usage; end; operator + (a,b:t_pm4_usage):t_pm4_usage; type p_pm4_resource_instance =^t_pm4_resource_instance; p_pm4_resource_curr_scope =^t_pm4_resource_curr_scope; p_pm4_resource_stream_scope=^t_pm4_resource_stream_scope; p_pm4_resource_init_scope=^t_pm4_resource_init_scope; t_pm4_resource_init_scope=object list:TAILQ_HEAD; //p_pm4_resource_instance function first:p_pm4_resource_instance; procedure insert(i:p_pm4_resource_instance); end; p_pm4_resource=^t_pm4_resource; t_pm4_resource=object pLeft :p_pm4_resource; //t_pm4_resource_set pRight:p_pm4_resource; //t_pm4_resource_set // rwrite:p_pm4_resource_instance; // rtype :Integer; rsize :DWORD; rkey :TvImageKey; // rclear :Boolean; rwriteback:Boolean; // function c(n1,n2:p_pm4_resource):Integer; static; end; t_pm4_resource_set=specialize TNodeSplay; t_pm4_resource_instance=object init_entry:TAILQ_ENTRY; //p_pm4_resource_init_scope // pLeft :p_pm4_resource_instance; //t_pm4_resource_instance_set pRight:p_pm4_resource_instance; //t_pm4_resource_instance_set // init_scope:p_pm4_resource_init_scope; curr_scope:p_pm4_resource_curr_scope; // resource:p_pm4_resource; // curr:t_pm4_usage; prev:t_pm4_usage; next:t_pm4_usage; // prev_overlap:t_pm4_usage; next_overlap:t_pm4_usage; // function c(n1,n2:p_pm4_resource_instance):Integer; static; end; t_pm4_resource_instance_set=specialize TNodeSplay; t_pm4_resource_curr_scope=object resource_instance_set:t_pm4_resource_instance_set; procedure insert(i:p_pm4_resource_instance); function find_resource_instance(r:p_pm4_resource):p_pm4_resource_instance; function find_image_resource_instance (const rkey:TvImageKey):p_pm4_resource_instance; function find_buffer_resource_instance(addr:Pointer;size:DWORD):p_pm4_resource_instance; function find_htile_resource_instance (addr:Pointer;size:DWORD):p_pm4_resource_instance; end; t_pm4_resource_stream_scope=object next_:TAILQ_HEAD; //Must be the first element in memory // allocator:t_pm4_allocator; // resource_set:t_pm4_resource_set; init_scope:t_pm4_resource_init_scope; // function find_image_resource (const rkey:TvImageKey):p_pm4_resource; function fetch_image_resource (const rkey:TvImageKey):p_pm4_resource; function find_buffer_resource (addr:Pointer;size:DWORD):p_pm4_resource; function fetch_buffer_resource (addr:Pointer;size:DWORD):p_pm4_resource; function find_htile_resource (addr:Pointer;size:DWORD):p_pm4_resource; function fetch_htile_resource (addr:Pointer;size:DWORD):p_pm4_resource; function fetch_resource_instance (scope:p_pm4_resource_curr_scope;r:p_pm4_resource;mem_usage:Integer;img_usage:s_image_usage):p_pm4_resource_instance; function insert_image_resource (scope:p_pm4_resource_curr_scope;const rkey:TvImageKey;mem_usage:Integer;img_usage:s_image_usage):p_pm4_resource_instance; function insert_buffer_resource (scope:p_pm4_resource_curr_scope;addr:Pointer;size:DWORD;mem_usage:Integer):p_pm4_resource_instance; function insert_htile_resource (scope:p_pm4_resource_curr_scope;addr:Pointer;size:DWORD;mem_usage:Integer):p_pm4_resource_instance; procedure connect_resource_instance (i:p_pm4_resource_instance); procedure connect_resource_scope (scope:p_pm4_resource_curr_scope); end; p_pm4_node=^t_pm4_node; t_pm4_node=object entry:TAILQ_ENTRY; // scope:t_pm4_resource_curr_scope; // ntype:t_pm4_node_type; end; p_pm4_node_LoadConstRam=^t_pm4_node_LoadConstRam; t_pm4_node_LoadConstRam=packed object(t_pm4_node) addr :Pointer; num_dw:Word; offset:Word; end; p_pm4_node_EventWrite=^t_pm4_node_EventWrite; t_pm4_node_EventWrite=packed object(t_pm4_node) eventType:Byte; end; p_pm4_node_EventWriteEop=^t_pm4_node_EventWriteEop; t_pm4_node_EventWriteEop=packed object(t_pm4_node) addr :Pointer; data :QWORD; eventType:Byte; dataSel :Byte; intSel :Byte; end; p_pm4_node_EventWriteEos=^t_pm4_node_EventWriteEos; t_pm4_node_EventWriteEos=packed object(t_pm4_node) addr :Pointer; data :DWORD; eventType:Byte; command :Byte; end; p_pm4_node_SubmitFlipEop=^t_pm4_node_SubmitFlipEop; t_pm4_node_SubmitFlipEop=packed object(t_pm4_node) eop_value:QWORD; intSel :Byte end; p_pm4_node_DmaData=^t_pm4_node_DmaData; t_pm4_node_DmaData=packed object(t_pm4_node) dst :QWORD; src :QWORD; numBytes:DWORD; srcSel :Byte; dstSel :Byte; cpSync :Byte; end; p_pm4_node_WriteData=^t_pm4_node_WriteData; t_pm4_node_WriteData=packed object(t_pm4_node) dst :Pointer; src :Pointer; num_dw :Word; dstSel :Byte; wrConfirm:Boolean; end; p_pm4_node_WaitRegMem=^t_pm4_node_WaitRegMem; t_pm4_node_WaitRegMem=packed object(t_pm4_node) pollAddr :Pointer; refValue :DWORD; mask :DWORD; compareFunc :Byte; end; p_pm4_node_FastClear=^t_pm4_node_FastClear; t_pm4_node_FastClear=object(t_pm4_node) RT:TRT_INFO; end; p_pm4_node_Resolve=^t_pm4_node_Resolve; t_pm4_node_Resolve=object(t_pm4_node) CX_REG:TCONTEXT_REG_GROUP; // 0xA000 end; p_pm4_node_draw=^t_pm4_node_draw; t_pm4_node_draw=object(t_pm4_node) rt_info:t_pm4_rt_info; indexBase :QWORD; indexOffset :DWORD; indexCount :DWORD; numInstances:DWORD; INDEX_TYPE:Byte; SWAP_MODE :Byte; end; p_pm4_node_DispatchDirect=^t_pm4_node_DispatchDirect; t_pm4_node_DispatchDirect=object(t_pm4_node) USER_DATA_CS:TSPI_USER_DATA; ShaderGroup:TvShaderGroup; DIM_X:DWORD; DIM_Y:DWORD; DIM_Z:DWORD; //SH_REG:TSH_REG_GROUP; // 0x2C00 end; p_pm4_stream=^t_pm4_stream; t_pm4_stream=object(t_pm4_resource_stream_scope) // list:TAILQ_HEAD; //t_pm4_node // buft:t_pm4_stream_type; // init:Boolean; // curr:p_pm4_node; // refs:Ptruint; // procedure Free; Procedure add_node(node:p_pm4_node); function First:p_pm4_node; function Next(node:p_pm4_node):p_pm4_node; static; // procedure Acquire; function Release:Boolean; // procedure LoadConstRam (addr:Pointer;num_dw,offset:Word); procedure EventWrite (eventType:Byte); procedure EventWriteEop(addr:Pointer;data:QWORD;eventType,dataSel,intSel:Byte); procedure EventWriteEos(addr:Pointer;data:DWORD;eventType,command:Byte); procedure SubmitFlipEop(eop_value:QWORD;intSel:Byte); procedure DmaData (dstSel:Byte;dst:QWORD;srcSel:Byte;srcOrData:QWORD;numBytes:DWORD;isBlocking:Byte); procedure WriteData (dstSel:Byte;dst,src:Pointer;num_dw:Word;wrConfirm:Byte); procedure WaitRegMem (pollAddr:Pointer;refValue,mask:DWORD;compareFunc:Byte); procedure FastClear (var CX_REG:TCONTEXT_REG_GROUP); procedure Resolve (var CX_REG:TCONTEXT_REG_GROUP); function ColorControl (var CX_REG:TCONTEXT_REG_GROUP):Boolean; procedure Init_Uniforms(node:p_pm4_node;var FUniformBuilder:TvUniformBuilder); procedure Build_rt_info(node:p_pm4_node; var rt_info:t_pm4_rt_info; var GPU_REGS:TGPU_REGS); procedure BuildDraw (ntype:t_pm4_node_type; var SH_REG:TSH_REG_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT); procedure DrawIndex2 (var SH_REG:TSH_REG_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT); procedure DrawIndexAuto(var SH_REG:TSH_REG_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT); procedure Build_cs_info (node:p_pm4_node_DispatchDirect;var GPU_REGS:TGPU_REGS); procedure DispatchDirect(var SH_REG:TSH_REG_GROUP); end; implementation uses sys_bootparam; var cache_block_allocator:t_cache_block_allocator; // operator + (a,b:t_pm4_usage):t_pm4_usage; begin Result.mem_usage:=a.mem_usage or b.mem_usage; Result.img_usage:=b.img_usage + b.img_usage; end; // function t_pm4_resource.c(n1,n2:p_pm4_resource):Integer; begin //0 Addr Result:=Integer(n1^.rkey.Addr>n2^.rkey.Addr)-Integer(n1^.rkey.Addr0) then Exit; //1 rtype Result:=Integer(n1^.rtype>n2^.rtype)-Integer(n1^.rtype0) then Exit; case n1^.rtype of R_IMG: begin Result:=CompareNormalized(n1^.rkey,n2^.rkey); end; R_BUF, R_HTILE: begin //2 rsize Result:=Integer(n1^.rsize>n2^.rsize)-Integer(n1^.rsizenil) or (i^.init_entry.tqe_prev<>nil) then Exit; if (list.tqh_first=nil) and (list.tqh_last =nil) then begin TAILQ_INIT(@list); end; TAILQ_INSERT_TAIL(@list,i,@i^.init_entry); i^.init_scope:=@self; end; // procedure t_pm4_resource_curr_scope.insert(i:p_pm4_resource_instance); var f:p_pm4_resource_instance; begin f:=resource_instance_set.Find(i); if (f<>nil) then begin //union f^.curr:=f^.curr + i^.curr; f^.prev:=f^.prev + i^.prev; f^.next:=f^.next + i^.next; end else begin resource_instance_set.Insert(i); end; // end; function t_pm4_resource_curr_scope.find_resource_instance(r:p_pm4_resource):p_pm4_resource_instance; var tmp:t_pm4_resource_instance; begin if (r=nil) then Exit(nil); tmp:=Default(t_pm4_resource_instance); tmp.resource:=r; Result:=resource_instance_set.Find(@tmp); end; function t_pm4_resource_curr_scope.find_image_resource_instance(const rkey:TvImageKey):p_pm4_resource_instance; var tmp:t_pm4_resource; begin if (rkey.cformat=VK_FORMAT_UNDEFINED) then Exit(nil); tmp:=Default(t_pm4_resource); tmp.rtype:=R_IMG; tmp.rkey :=rkey; Result:=find_resource_instance(@tmp); end; function t_pm4_resource_curr_scope.find_buffer_resource_instance(addr:Pointer;size:DWORD):p_pm4_resource_instance; var tmp:t_pm4_resource; begin tmp:=Default(t_pm4_resource); tmp.rtype:=R_BUF; tmp.rkey.Addr:=addr; tmp.rsize:=size; Result:=find_resource_instance(@tmp); end; function t_pm4_resource_curr_scope.find_htile_resource_instance(addr:Pointer;size:DWORD):p_pm4_resource_instance; var tmp:t_pm4_resource; begin tmp:=Default(t_pm4_resource); tmp.rtype:=R_HTILE; tmp.rkey.Addr:=addr; tmp.rsize:=size; Result:=find_resource_instance(@tmp); end; function t_pm4_resource_stream_scope.find_image_resource(const rkey:TvImageKey):p_pm4_resource; var tmp:t_pm4_resource; begin tmp:=Default(t_pm4_resource); tmp.rtype:=R_IMG; tmp.rkey :=rkey; Result:=resource_set.Find(@tmp); end; function t_pm4_resource_stream_scope.fetch_image_resource(const rkey:TvImageKey):p_pm4_resource; var tmp:t_pm4_resource; begin tmp:=Default(t_pm4_resource); tmp.rtype:=R_IMG; tmp.rkey :=rkey; Result:=resource_set.Find(@tmp); if (Result=nil) then begin tmp.rsize:=get_image_size(rkey); Result:=allocator.Alloc(SizeOf(t_pm4_resource)); Result^:=tmp; resource_set.Insert(Result); end; end; function t_pm4_resource_stream_scope.find_buffer_resource(addr:Pointer;size:DWORD):p_pm4_resource; var tmp:t_pm4_resource; begin tmp:=Default(t_pm4_resource); tmp.rtype:=R_BUF; tmp.rkey.Addr:=addr; tmp.rsize:=size; Result:=resource_set.Find(@tmp); end; function t_pm4_resource_stream_scope.fetch_buffer_resource(addr:Pointer;size:DWORD):p_pm4_resource; var tmp:t_pm4_resource; begin tmp:=Default(t_pm4_resource); tmp.rtype:=R_BUF; tmp.rkey.Addr:=addr; tmp.rsize:=size; Result:=resource_set.Find(@tmp); if (Result=nil) then begin Result:=allocator.Alloc(SizeOf(t_pm4_resource)); Result^:=tmp; resource_set.Insert(Result); end; end; function t_pm4_resource_stream_scope.find_htile_resource(addr:Pointer;size:DWORD):p_pm4_resource; var tmp:t_pm4_resource; begin tmp:=Default(t_pm4_resource); tmp.rtype:=R_HTILE; tmp.rkey.Addr:=addr; tmp.rsize:=size; Result:=resource_set.Find(@tmp); end; function t_pm4_resource_stream_scope.fetch_htile_resource(addr:Pointer;size:DWORD):p_pm4_resource; var tmp:t_pm4_resource; begin tmp:=Default(t_pm4_resource); tmp.rtype:=R_HTILE; tmp.rkey.Addr:=addr; tmp.rsize:=size; Result:=resource_set.Find(@tmp); if (Result=nil) then begin Result:=allocator.Alloc(SizeOf(t_pm4_resource)); Result^:=tmp; resource_set.Insert(Result); end; end; function t_pm4_resource_stream_scope.fetch_resource_instance(scope:p_pm4_resource_curr_scope;r:p_pm4_resource;mem_usage:Integer;img_usage:s_image_usage):p_pm4_resource_instance; begin Result:=scope^.find_resource_instance(r); if (Result=nil) then begin Result:=allocator.Alloc(SizeOf(t_pm4_resource_instance)); Result^:=Default(t_pm4_resource_instance); // Result^.resource:=r; Result^.curr.mem_usage:=mem_usage; Result^.curr.img_usage:=img_usage; end else begin Result^.curr.mem_usage:=Result^.curr.mem_usage or mem_usage; Result^.curr.img_usage:=Result^.curr.img_usage + img_usage; end; end; function t_pm4_resource_stream_scope.insert_image_resource(scope:p_pm4_resource_curr_scope;const rkey:TvImageKey;mem_usage:Integer;img_usage:s_image_usage):p_pm4_resource_instance; var r:p_pm4_resource; i:p_pm4_resource_instance; begin if (rkey.cformat=VK_FORMAT_UNDEFINED) then Exit; r:=fetch_image_resource(rkey); i:=fetch_resource_instance(scope,r,mem_usage,img_usage); if ((mem_usage and TM_READ)<>0) then if (i^.prev.mem_usage=0) then //no prev usage begin //init init_scope.insert(i); end; scope^.insert(i); Result:=i; end; function t_pm4_resource_stream_scope.insert_buffer_resource(scope:p_pm4_resource_curr_scope;addr:Pointer;size:DWORD;mem_usage:Integer):p_pm4_resource_instance; var r:p_pm4_resource; i:p_pm4_resource_instance; begin r:=fetch_buffer_resource(addr,size); i:=fetch_resource_instance(scope,r,mem_usage,[iu_buffer]); if ((mem_usage and TM_READ)<>0) then if (i^.prev.mem_usage=0) then //no prev usage begin //init init_scope.insert(i); end; scope^.insert(i); Result:=i; end; function t_pm4_resource_stream_scope.insert_htile_resource(scope:p_pm4_resource_curr_scope;addr:Pointer;size:DWORD;mem_usage:Integer):p_pm4_resource_instance; var r:p_pm4_resource; i:p_pm4_resource_instance; begin r:=fetch_htile_resource(addr,size); i:=fetch_resource_instance(scope,r,mem_usage,[iu_htile]); //iu_htile if ((mem_usage and TM_READ)<>0) then if (i^.prev.mem_usage=0) then //no prev usage begin //init init_scope.insert(i); end; scope^.insert(i); Result:=i; end; // procedure t_pm4_resource_stream_scope.connect_resource_instance(i:p_pm4_resource_instance); var start:Pointer; __end:Pointer; node:p_pm4_resource; prev:p_pm4_resource_instance; tmp:t_pm4_resource; begin //find cross tmp:=Default(t_pm4_resource); tmp:=i^.resource^; start:=tmp.rkey.Addr; __end:=start+tmp.rsize; tmp.rtype:=High(Integer); tmp.rkey.Addr:=start; //[s|new|e] -> // [s|old|e] node:=resource_set.Find_ls(@tmp); while (node<>nil) do begin // if (__end>(node^.rkey.Addr)) and (start<(node^.rkey.Addr+node^.rsize)) then begin prev:=node^.rwrite; if (prev<>nil) and (prev<>i) then begin //sum prev of curr i^.prev:= i^.prev + prev^.curr; //sum next of prev prev^.next:=prev^.next + i^.curr; if (prev^.resource<>i^.resource) then begin //sum prev of curr i^.prev_overlap:= i^.prev_overlap + prev^.curr; //sum next of prev prev^.next_overlap:=prev^.next_overlap + i^.curr; end; end; // if ((i^.curr.mem_usage and (TM_WRITE or TM_CLEAR))<>0) then begin node^.rwrite:=i; end; end; node:=resource_set.Prev(node); end; end; procedure t_pm4_resource_stream_scope.connect_resource_scope(scope:p_pm4_resource_curr_scope); var node:p_pm4_resource_instance; begin node:=scope^.resource_instance_set.Min; while (node<>nil) do begin connect_resource_instance(node); node:=scope^.resource_instance_set.Next(node); end; end; // procedure t_pm4_stream.Free; begin list:=Default(TAILQ_HEAD); allocator.Free; end; Procedure t_pm4_stream.add_node(node:p_pm4_node); begin if (list.tqh_first=nil) and (list.tqh_last =nil) then begin TAILQ_INIT(@list); end; TAILQ_INSERT_TAIL(@list,node,@node^.entry); connect_resource_scope(@node^.scope); end; function t_pm4_stream.First:p_pm4_node; begin Result:=TAILQ_FIRST(@list); end; function t_pm4_stream.Next(node:p_pm4_node):p_pm4_node; begin Result:=TAILQ_NEXT(node,@node^.entry); end; // procedure t_pm4_stream.Acquire; begin System.InterlockedIncrement(Pointer(refs)); end; function t_pm4_stream.Release:Boolean; begin Result:=System.InterlockedDecrement(Pointer(refs))=nil; end; // procedure t_pm4_stream.LoadConstRam(addr:Pointer;num_dw,offset:Word); var node:p_pm4_node_LoadConstRam; begin node:=allocator.Alloc(SizeOf(t_pm4_node_LoadConstRam)); node^.ntype :=ntLoadConstRam; node^.scope :=Default(t_pm4_resource_curr_scope); node^.addr :=addr; node^.num_dw:=num_dw; node^.offset:=offset; add_node(node); end; procedure t_pm4_stream.EventWrite(eventType:Byte); var node:p_pm4_node_EventWrite; begin node:=allocator.Alloc(SizeOf(t_pm4_node_EventWrite)); node^.ntype :=ntEventWrite; node^.scope :=Default(t_pm4_resource_curr_scope); node^.eventType:=eventType; add_node(node); end; procedure t_pm4_stream.EventWriteEop(addr:Pointer;data:QWORD;eventType,dataSel,intSel:Byte); var node:p_pm4_node_EventWriteEop; begin node:=allocator.Alloc(SizeOf(t_pm4_node_EventWriteEop)); node^.ntype :=ntEventWriteEop; node^.scope :=Default(t_pm4_resource_curr_scope); node^.addr :=addr; node^.data :=data; node^.eventType:=eventType; node^.dataSel :=dataSel; node^.intSel :=intSel; add_node(node); end; procedure t_pm4_stream.EventWriteEos(addr:Pointer;data:DWORD;eventType,command:Byte); var node:p_pm4_node_EventWriteEos; begin node:=allocator.Alloc(SizeOf(t_pm4_node_EventWriteEos)); node^.ntype :=ntEventWriteEos; node^.scope :=Default(t_pm4_resource_curr_scope); node^.addr :=addr; node^.data :=data; node^.eventType:=eventType; node^.command :=command; add_node(node); end; procedure t_pm4_stream.SubmitFlipEop(eop_value:QWORD;intSel:Byte); var node:p_pm4_node_SubmitFlipEop; begin node:=allocator.Alloc(SizeOf(t_pm4_node_SubmitFlipEop)); node^.ntype :=ntSubmitFlipEop; node^.scope :=Default(t_pm4_resource_curr_scope); node^.eop_value:=eop_value; node^.intSel :=intSel; add_node(node); end; procedure t_pm4_stream.DmaData(dstSel:Byte;dst:QWORD;srcSel:Byte;srcOrData:QWORD;numBytes:DWORD;isBlocking:Byte); var node:p_pm4_node_DmaData; begin node:=allocator.Alloc(SizeOf(t_pm4_node_DmaData)); node^.ntype :=ntDmaData; node^.scope :=Default(t_pm4_resource_curr_scope); node^.dst :=dst; node^.src :=srcOrData; node^.numBytes:=numBytes; node^.srcSel :=srcSel; node^.dstSel :=dstSel; node^.cpSync :=isBlocking; add_node(node); end; procedure t_pm4_stream.WriteData(dstSel:Byte;dst,src:Pointer;num_dw:Word;wrConfirm:Byte); var node:p_pm4_node_WriteData; begin //Can I copy the link? //Or do I have to copy the data? node:=allocator.Alloc(SizeOf(t_pm4_node_WriteData){+num_dw*SizeOf(DWORD)}); node^.ntype :=ntWriteData; node^.scope :=Default(t_pm4_resource_curr_scope); node^.dst :=dst; //node^.src :=Pointer(node+1); node^.src :=src; node^.num_dw :=num_dw; node^.dstSel :=dstSel; node^.wrConfirm:=(wrConfirm<>0); //Move(src^,node^.src^,num_dw*SizeOf(DWORD)); add_node(node); end; procedure t_pm4_stream.WaitRegMem(pollAddr:Pointer;refValue,mask:DWORD;compareFunc:Byte); var node:p_pm4_node_WaitRegMem; begin node:=allocator.Alloc(SizeOf(t_pm4_node_WaitRegMem)); node^.ntype :=ntWaitRegMem; node^.scope :=Default(t_pm4_resource_curr_scope); node^.pollAddr :=pollAddr; node^.refValue :=refValue; node^.mask :=mask; node^.compareFunc:=compareFunc; add_node(node); end; procedure t_pm4_stream.FastClear(var CX_REG:TCONTEXT_REG_GROUP); var GPU_REGS:TGPU_REGS; RT:TRT_INFO; node:p_pm4_node_FastClear; begin GPU_REGS:=Default(TGPU_REGS); GPU_REGS.CX_REG:=@CX_REG; node:=allocator.Alloc(SizeOf(t_pm4_node_FastClear)); node^.ntype :=ntFastClear; node^.scope :=Default(t_pm4_resource_curr_scope); // RT:=GPU_REGS.GET_RT_INFO(0); { //clear TM_READ RT.IMAGE_USAGE:=RT.IMAGE_USAGE and (not TM_READ); //set TM_CLEAR RT.IMAGE_USAGE:=RT.IMAGE_USAGE or TM_CLEAR; // insert_image_resource(@node^.scope, RT.FImageInfo, RT.IMAGE_USAGE, [iu_attachment]); } // node^.RT:=RT; // add_node(node); end; procedure t_pm4_stream.Resolve(var CX_REG:TCONTEXT_REG_GROUP); var node:p_pm4_node_Resolve; begin node:=allocator.Alloc(SizeOf(t_pm4_node_Resolve)); node^.ntype :=ntResolve; node^.scope :=Default(t_pm4_resource_curr_scope); node^.CX_REG:=CX_REG; add_node(node); end; function t_pm4_stream.ColorControl(var CX_REG:TCONTEXT_REG_GROUP):Boolean; begin Result:=False; case CX_REG.CB_COLOR_CONTROL.MODE of CB_DISABLE: if p_print_gpu_ops then begin Writeln('DISABLE'); end; CB_NORMAL:; //next CB_ELIMINATE_FAST_CLEAR: // Expand latest specified clear color into pixel data for the fast cleared color/depth resource. begin FastClear(CX_REG); Exit(True); end; CB_RESOLVE: // Fixed function resolve. (MSAA) begin Resolve(CX_REG); Exit(True); end; CB_DECOMPRESS: if p_print_gpu_ops then begin Writeln('DECOMPRESS'); end; CB_FMASK_DECOMPRESS: // Fmask decompression for shader readability. if p_print_gpu_ops then begin Writeln('FMASK_DECOMPRESS'); end; CB_DCC_DECOMPRESS: // Indicates this color target view is for a DCC decompress if p_print_gpu_ops then begin Writeln('DCC_DECOMPRESS'); end; else Assert(False,'unknow color control:0x'+HexStr(CX_REG.CB_COLOR_CONTROL.MODE,1)); end; end; procedure t_pm4_stream.Init_Uniforms(node:p_pm4_node;var FUniformBuilder:TvUniformBuilder); var i:Integer; begin //images if (Length(FUniformBuilder.FImages)<>0) then begin For i:=0 to High(FUniformBuilder.FImages) do With FUniformBuilder.FImages[i] do begin insert_image_resource(@node^.scope, FImage, TM_READ, [iu_sampled]); end; end; //images //buffers if (Length(FUniformBuilder.FBuffers)<>0) then begin For i:=0 to High(FUniformBuilder.FBuffers) do With FUniformBuilder.FBuffers[i] do begin //TODO: check write flag insert_buffer_resource(@node^.scope, addr, size, TM_READ or TM_WRITE); end; end; //buffers end; procedure t_pm4_stream.Build_rt_info(node:p_pm4_node; var rt_info:t_pm4_rt_info; var GPU_REGS:TGPU_REGS); var i:Integer; RT:TRT_INFO; FUniformBuilder:TvUniformBuilder; begin for i:=0 to 31 do begin if (GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].OFFSET<>0) and (GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].OFFSET<>i) then begin Assert(false, 'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].OFFSET=' +IntToStr(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].OFFSET )); end; Assert(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].DEFAULT_VAL =0,'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].DEFAULT_VAL=' +IntToStr(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].DEFAULT_VAL )); Assert(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].FLAT_SHADE =0,'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].FLAT_SHADE=' +IntToStr(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].FLAT_SHADE )); Assert(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].FP16_INTERP_MODE=0,'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].FP16_INTERP_MODE='+IntToStr(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].FP16_INTERP_MODE)); end; GPU_REGS.export_user_data_rt(@rt_info.USERDATA); rt_info.ShaderGroup:=FetchShaderGroupRT(GPU_REGS,nil{@pa}); Assert(rt_info.ShaderGroup<>nil); rt_info.RT_COUNT:=0; if GPU_REGS.COMP_ENABLE then For i:=0 to GPU_REGS.GET_HI_RT do begin RT:=GPU_REGS.GET_RT_INFO(i); // insert_image_resource(@node^.scope, RT.FImageInfo, RT.IMAGE_USAGE, [iu_attachment]); // rt_info.RT_INFO[rt_info.RT_COUNT]:=RT; Inc(rt_info.RT_COUNT); end; rt_info.DB_ENABLE:=GPU_REGS.DB_ENABLE; if rt_info.DB_ENABLE then begin rt_info.DB_INFO:=GPU_REGS.GET_DB_INFO; // insert_image_resource(@node^.scope, GetDepthOnly(rt_info.DB_INFO.FImageInfo), rt_info.DB_INFO.DEPTH_USAGE, [iu_depthstenc]); insert_image_resource(@node^.scope, GetStencilOnly(rt_info.DB_INFO.FImageInfo), rt_info.DB_INFO.STENCIL_USAGE, [iu_depthstenc]); if (rt_info.DB_INFO.HTILE_INFO.TILE_SURFACE_ENABLE<>0) then begin insert_htile_resource(@node^.scope, rt_info.DB_INFO.HTILE_INFO.KEY.Addr, rt_info.DB_INFO.HTILE_INFO.SIZE, rt_info.DB_INFO.DEPTH_USAGE); end; end; rt_info.BLEND_INFO:=GPU_REGS.GET_BLEND_INFO; rt_info.PRIM_TYPE :=ord(GPU_REGS.GET_PRIM_TYPE); rt_info.PRIM_RESET:=GPU_REGS.GET_PRIM_RESET; rt_info.VP_COUNT:=0; For i:=0 to 15 do if GPU_REGS.VP_ENABLE(i) then begin rt_info.VPORT [rt_info.VP_COUNT]:=GPU_REGS.GET_VPORT (i); rt_info.SCISSOR[rt_info.VP_COUNT]:=GPU_REGS.GET_SCISSOR(i); Inc(rt_info.VP_COUNT); end; rt_info.RASTERIZATION:=GPU_REGS.GET_RASTERIZATION; rt_info.MULTISAMPLE :=GPU_REGS.GET_MULTISAMPLE; rt_info.PROVOKING:=ord(GPU_REGS.GET_PROVOKING); rt_info.SCREEN_RECT:=GPU_REGS.GET_SCREEN; rt_info.SCREEN_SIZE:=GPU_REGS.GET_SCREEN_SIZE; // FUniformBuilder:=Default(TvUniformBuilder); rt_info.ShaderGroup.ExportUnifBuilder(FUniformBuilder,@rt_info.USERDATA); Init_Uniforms(node,FUniformBuilder); end; function IsClearDepthShaders(const FShaders:AvShaderStage):Boolean; inline; begin Result:=False; if (FShaders[vShaderStageLs]=nil) and (FShaders[vShaderStageHs]=nil) and (FShaders[vShaderStageEs]=nil) and (FShaders[vShaderStageGs]=nil) and (FShaders[vShaderStageVs]<>nil) and (FShaders[vShaderStagePs]<>nil) and (FShaders[vShaderStageCs]=nil) then if (FShaders[vShaderStageVs].FHash=QWORD($00DF6E6331449451)) and (FShaders[vShaderStagePs].FHash=QWORD($E9FF5D4699E5B9AD)) then begin Result:=True; end; end; procedure t_pm4_stream.BuildDraw(ntype:t_pm4_node_type; var SH_REG:TSH_REG_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT); var GPU_REGS:TGPU_REGS; node:p_pm4_node_draw; begin GPU_REGS:=Default(TGPU_REGS); GPU_REGS.SH_REG:=@SH_REG; GPU_REGS.CX_REG:=@CX_REG; GPU_REGS.UC_REG:=@UC_REG; node:=allocator.Alloc(SizeOf(t_pm4_node_draw)); node^.ntype :=ntype; node^.scope :=Default(t_pm4_resource_curr_scope); Build_rt_info(node,node^.rt_info,GPU_REGS); node^.indexBase :=CX_REG.VGT_DMA_BASE or (QWORD(CX_REG.VGT_DMA_BASE_HI.BASE_ADDR) shl 32); node^.indexOffset :=CX_REG.VGT_INDX_OFFSET; node^.indexCount :=UC_REG.VGT_NUM_INDICES; node^.numInstances:=UC_REG.VGT_NUM_INSTANCES; node^.INDEX_TYPE:=ord(GPU_REGS.GET_INDEX_TYPE); node^.SWAP_MODE :=CX_REG.VGT_DMA_INDEX_TYPE.SWAP_MODE; //heuristic if (ntype=ntDrawIndexAuto) and (node^.numInstances<=1) and (node^.rt_info.RT_COUNT=0) and (node^.rt_info.DB_ENABLE) and ( ((node^.rt_info.DB_INFO.DEPTH_USAGE and TM_CLEAR)<>0) or ((node^.rt_info.DB_INFO.STENCIL_USAGE and TM_CLEAR)<>0) ) then if IsClearDepthShaders(node^.rt_info.ShaderGroup.FKey.FShaders) then begin //ClearDepthTarget node^.ntype:=ntClearDepth; end; add_node(node); end; procedure t_pm4_stream.DrawIndex2(var SH_REG:TSH_REG_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT); begin if ColorControl(CX_REG) then Exit; BuildDraw(ntDrawIndex2,SH_REG,CX_REG,UC_REG); end; procedure t_pm4_stream.DrawIndexAuto(var SH_REG:TSH_REG_GROUP; var CX_REG:TCONTEXT_REG_GROUP; var UC_REG:TUSERCONFIG_REG_SHORT); begin if ColorControl(CX_REG) then Exit; BuildDraw(ntDrawIndexAuto,SH_REG,CX_REG,UC_REG); end; procedure t_pm4_stream.Build_cs_info(node:p_pm4_node_DispatchDirect;var GPU_REGS:TGPU_REGS); var dst:PGPU_USERDATA; FUniformBuilder:TvUniformBuilder; begin //hack dst:=Pointer(@node^.USER_DATA_CS)-Ptruint(@TGPU_USERDATA(nil^).A[vShaderStageCs]); GPU_REGS.export_user_data_cs(dst); node^.ShaderGroup:=FetchShaderGroupCS(GPU_REGS,nil{@pa}); Assert(node^.ShaderGroup<>nil); node^.DIM_X:=GPU_REGS.SH_REG^.COMPUTE_DIM_X; node^.DIM_Y:=GPU_REGS.SH_REG^.COMPUTE_DIM_Y; node^.DIM_Z:=GPU_REGS.SH_REG^.COMPUTE_DIM_Z; // FUniformBuilder:=Default(TvUniformBuilder); node^.ShaderGroup.ExportUnifBuilder(FUniformBuilder,dst); Init_Uniforms(node,FUniformBuilder); end; procedure t_pm4_stream.DispatchDirect(var SH_REG:TSH_REG_GROUP); var GPU_REGS:TGPU_REGS; node:p_pm4_node_DispatchDirect; begin GPU_REGS:=Default(TGPU_REGS); GPU_REGS.SH_REG:=@SH_REG; node:=allocator.Alloc(SizeOf(t_pm4_node_DispatchDirect)); node^.ntype:=ntDispatchDirect; node^.scope:=Default(t_pm4_resource_curr_scope); Build_cs_info(node,GPU_REGS); add_node(node); end; // procedure t_cache_block_allocator.init; begin queue.Create; xlock:=nil; count:=0; end; Function t_cache_block_allocator.Alloc:Pointer; begin Result:=nil; if (XCHG(xlock,Pointer(1))=nil) then begin if queue.Pop(Result) then begin XCHG(xlock,nil); // System.InterlockedDecrement64(count); Exit; end; XCHG(xlock,nil); end; // md_mmap(Result,mem_size,VM_RW); end; Procedure t_cache_block_allocator.Free(node:Pointer); begin if (node=nil) then Exit; // if (count+1<=max_count) then begin if (System.InterlockedIncrement64(count)<=max_count) then begin queue.Push(node); Exit; end else begin System.InterlockedDecrement64(count); end; end; // md_unmap(node,mem_size); end; // Function t_pm4_allocator.Alloc(Size:ptruint):Pointer; var mem_size:ptruint; node:PAllocNode; begin if (pHead.slh_first=nil) or (Size>curr_size) then begin if (Size>(cache_block_allocator.mem_size-SizeOf(TAllocNode))) then begin mem_size:=Align(Size+SizeOf(TAllocNode),64*1024); // node:=nil; md_mmap(node,mem_size,VM_RW); end else begin mem_size:=cache_block_allocator.mem_size; // node:=cache_block_allocator.Alloc; end; node^.size:=mem_size; SLIST_INSERT_HEAD(@pHead,node,@node^.link); curr_apos:=0; curr_size:=mem_size-SizeOf(TAllocNode); Inc(full_size,mem_size); end; node:=SLIST_FIRST(@pHead); Result:=@PByte(@node^.data)[curr_apos]; Inc(used_size,Size); Size:=Align(Size,SizeOf(ptruint)); Inc(curr_apos,Size); Dec(curr_size,Size); end; Procedure t_pm4_allocator.Free; var node:PAllocNode; begin node:=SLIST_FIRST(@pHead); While (node<>nil) do begin SLIST_REMOVE(@pHead,node,@node^.link); if (node^.size=cache_block_allocator.mem_size) then begin cache_block_allocator.Free(node); end else begin md_unmap(node,node^.size); end; node:=SLIST_FIRST(@pHead); end; Self:=Default(t_pm4_allocator); end; initialization cache_block_allocator.Init; end.