FPPS4/chip/pm4_stream.pas

2033 lines
47 KiB
Plaintext

unit pm4_stream;
{$mode ObjFPC}{$H+}
{$CALLING SysV_ABI_CDecl}
interface
uses
sysutils,
mqueue,
LFQueue,
md_map,
bittype,
pm4defs,
si_ci_vi_merged_enum,
si_ci_vi_merged_registers,
si_ci_vi_merged_groups,
Vulkan,
vImage,
vShader,
vShaderExt,
vShaderManager,
vRegs2Vulkan,
vImageTiling,
g_node_splay
;
type
t_cache_block_allocator=object
const
mem_size =64*1024;
max_count=256;
//
var
queue:TIntrusiveMPSCQueue;
xlock:Pointer;
count:QWORD;
//
procedure Init;
Function Alloc:Pointer;
Procedure Free(node:Pointer);
end;
t_pm4_allocator=object
type
PAllocNode=^TAllocNode;
TAllocNode=packed record
link:PAllocNode;
size:QWORD;
data:record end;
end;
var
pHead:SLIST_HEAD;
curr_apos:ptruint; //alloc pos in current node
curr_size:ptruint; //useable size of current node
used_size:ptruint; //full usable size
full_size:ptruint; //full alloc size
Function Alloc(Size:ptruint):Pointer;
Procedure Free;
end;
p_pm4_rt_info=^t_pm4_rt_info;
t_pm4_rt_info=object
USERDATA :TGPU_USERDATA;
SHADERDATA:TGPU_SHADERDATA_RT;
ShaderGroup:TvShaderGroup;
RT_INFO:array[0..7] of TRT_INFO;
DB_INFO:TDB_INFO;
BLEND_INFO:TBLEND_INFO;
VPORT :array[0..15] of TVkViewport;
SCISSOR:array[0..15] of TVkRect2D;
RASTERIZATION:TRASTERIZATION_INFO;
MULTISAMPLE :TVkPipelineMultisampleStateCreateInfo;
SCREEN_RECT:TVkRect2D;
SCREEN_SIZE:TVkExtent2D;
RT_COUNT :Byte;
DB_ENABLE :Boolean;
PRIM_TYPE :Byte;
PRIM_RESET:Byte;
VP_COUNT :Byte;
PROVOKING :Byte;
end;
t_pm4_stream_type=(
stGfxRing,
stGfxDcb,
stGfxCcb,
stCompute0,
stCompute1,
stCompute2,
stCompute3,
stCompute4,
stCompute5,
stCompute6
);
t_pm4_node_type=(
ntHint,
ntLoadConstRam,
ntDumpConstRam,
ntIncrementCE,
ntIncrementDE,
ntWaitOnCECounter,
ntWaitOnDECounterDiff,
ntEventWrite,
ntPipeStatDump,
ntEventWriteEop,
ntEventWriteEos,
ntSubmitFlipEop,
ntReleaseMem,
ntDmaData,
ntWriteData,
ntWaitRegMem,
ntFastClear,
ntResolve,
ntClearDepth,
ntDrawIndex2,
ntDrawIndexOffset2,
ntDrawIndexAuto,
ntDispatchDirect,
ntDispatchIndirect,
ntPfpSyncMe
);
const
R_IMG =0;
R_BUF =1;
R_HTILE=2;
R_CMASK=3;
type
t_pm4_usage=packed record
case Byte of
0:(DATA:QWORD);
1:(mem_usage:Byte;
shd_usage:Byte;
clr_usage:Byte;
dsa_usage:Byte;
img_usage:s_image_usage
);
end;
{$IF sizeof(s_image_usage)<>4}{$STOP sizeof(s_image_usage)<>4}{$ENDIF}
operator + (a,b:t_pm4_usage):t_pm4_usage;
type
p_pm4_resource_instance =^t_pm4_resource_instance;
p_pm4_resource_curr_scope =^t_pm4_resource_curr_scope;
p_pm4_resource_stream_scope=^t_pm4_resource_stream_scope;
p_pm4_resource_init_scope=^t_pm4_resource_init_scope;
t_pm4_resource_init_scope=object
list:TAILQ_HEAD; //p_pm4_resource_instance
function first:p_pm4_resource_instance;
procedure insert(i:p_pm4_resource_instance);
end;
p_pm4_resource=^t_pm4_resource;
t_pm4_resource=object
pLeft :p_pm4_resource; //t_pm4_resource_set
pRight:p_pm4_resource; //t_pm4_resource_set
//
rwrite:p_pm4_resource_instance;
//
rtype :Integer;
rsize :DWORD;
rkey :TvImageKey;
//
uall:t_pm4_usage;
//
rimage:TObject;
//
rcombined :Boolean;
rclear :Boolean;
rcmask :Boolean;
rwriteback:Boolean;
//
function c(n1,n2:p_pm4_resource):Integer; static;
end;
t_pm4_resource_set=specialize TNodeSplay<t_pm4_resource>;
t_pm4_resource_instance=object
init_entry:TAILQ_ENTRY; //p_pm4_resource_init_scope
//
pLeft :p_pm4_resource_instance; //t_pm4_resource_instance_set
pRight:p_pm4_resource_instance; //t_pm4_resource_instance_set
//
init_scope:p_pm4_resource_init_scope;
curr_scope:p_pm4_resource_curr_scope;
//
resource:p_pm4_resource;
//
prepared:Boolean;
//
curr:t_pm4_usage;
prev:t_pm4_usage;
next:t_pm4_usage;
//
prev_overlap:t_pm4_usage;
next_overlap:t_pm4_usage;
//
function c(n1,n2:p_pm4_resource_instance):Integer; static;
end;
t_pm4_resource_instance_set=specialize TNodeSplay<t_pm4_resource_instance>;
t_pm4_resource_curr_scope=object
resource_instance_set:t_pm4_resource_instance_set;
function Min:p_pm4_resource_instance;
function Next(node:p_pm4_resource_instance):p_pm4_resource_instance;
procedure insert(i:p_pm4_resource_instance);
function find_resource_instance(r:p_pm4_resource):p_pm4_resource_instance;
function find_image_resource_instance (const rkey:TvImageKey):p_pm4_resource_instance;
function find_buffer_resource_instance(rtype:Integer;addr:Pointer;size:DWORD):p_pm4_resource_instance;
end;
t_pm4_resource_stream_scope=object
next_:TAILQ_HEAD; //Must be the first element in memory
//
allocator:t_pm4_allocator;
//
resource_set:t_pm4_resource_set;
init_scope:t_pm4_resource_init_scope;
//
function find_image_resource (const rkey:TvImageKey):p_pm4_resource;
function fetch_image_resource (const rkey:TvImageKey;hint:PChar):p_pm4_resource;
function find_buffer_resource (rtype:Integer;addr:Pointer;size:DWORD):p_pm4_resource;
function fetch_buffer_resource (rtype:Integer;addr:Pointer;size:DWORD;hint:PChar):p_pm4_resource;
function fetch_resource_instance (scope:p_pm4_resource_curr_scope;r:p_pm4_resource;mem_usage:Integer;img_usage:s_image_usage):p_pm4_resource_instance;
function insert_image_resource (scope:p_pm4_resource_curr_scope;const rkey:TvImageKey;mem_usage:Integer;img_usage:s_image_usage;hint:PChar):p_pm4_resource_instance;
function insert_buffer_resource (scope:p_pm4_resource_curr_scope;rtype:Integer;addr:Pointer;size:DWORD;mem_usage:Integer;hint:PChar):p_pm4_resource_instance;
procedure connect_resource_instance (i:p_pm4_resource_instance);
procedure connect_resource_scope (scope:p_pm4_resource_curr_scope);
end;
p_pm4_node=^t_pm4_node;
t_pm4_node=object
entry:TAILQ_ENTRY;
//
scope:t_pm4_resource_curr_scope;
//
ntype:t_pm4_node_type;
//
id:QWORD;
end;
p_pm4_node_Hint=^t_pm4_node_Hint;
t_pm4_node_Hint=packed object(t_pm4_node)
data:record end; //@pchar
end;
p_pm4_node_LoadConstRam=^t_pm4_node_LoadConstRam;
t_pm4_node_LoadConstRam=packed object(t_pm4_node)
addr :Pointer;
num_dw:Word;
offset:Word;
end;
p_pm4_node_WaitOnDECounterDiff=^t_pm4_node_WaitOnDECounterDiff;
t_pm4_node_WaitOnDECounterDiff=packed object(t_pm4_node)
diff:DWORD;
end;
p_pm4_node_EventWrite=^t_pm4_node_EventWrite;
t_pm4_node_EventWrite=packed object(t_pm4_node)
eventType:Byte;
end;
p_pm4_node_PipeStatDump=^t_pm4_node_PipeStatDump;
t_pm4_node_PipeStatDump=packed object(t_pm4_node)
Address:QWORD;
Control:TPixelPipeStatControl;
end;
p_pm4_node_EventWriteEop=^t_pm4_node_EventWriteEop;
t_pm4_node_EventWriteEop=packed object(t_pm4_node)
addr :Pointer;
data :QWORD;
eventType:Byte;
dataSel :Byte;
intSel :Byte;
end;
p_pm4_node_EventWriteEos=^t_pm4_node_EventWriteEos;
t_pm4_node_EventWriteEos=packed object(t_pm4_node)
addr :Pointer;
data :DWORD;
eventType:Byte;
command :Byte;
end;
p_pm4_node_SubmitFlipEop=^t_pm4_node_SubmitFlipEop;
t_pm4_node_SubmitFlipEop=packed object(t_pm4_node)
eop_value:QWORD;
intSel :Byte
end;
p_pm4_node_ReleaseMem=^t_pm4_node_ReleaseMem;
t_pm4_node_ReleaseMem=packed object(t_pm4_node)
addr :Pointer;
data :QWORD;
eventType:Byte;
srcSel :Byte;
dstSel :Byte;
intSel :Byte;
end;
p_pm4_node_DmaData=^t_pm4_node_DmaData;
t_pm4_node_DmaData=packed object(t_pm4_node)
dst :QWORD;
src :QWORD;
numBytes:DWORD;
srcSel :Byte;
dstSel :Byte;
cpSync :Byte;
end;
p_pm4_node_WriteData=^t_pm4_node_WriteData;
t_pm4_node_WriteData=packed object(t_pm4_node)
dst :Pointer;
src :Pointer;
num_dw :Word;
dstSel :Byte;
wrConfirm:Boolean;
end;
p_pm4_node_WaitRegMem=^t_pm4_node_WaitRegMem;
t_pm4_node_WaitRegMem=packed object(t_pm4_node)
pollAddr :Pointer;
refValue :DWORD;
mask :DWORD;
compareFunc :Byte;
end;
p_pm4_node_FastClear=^t_pm4_node_FastClear;
t_pm4_node_FastClear=object(t_pm4_node)
RT:TRT_INFO;
end;
p_pm4_node_Resolve=^t_pm4_node_Resolve;
t_pm4_node_Resolve=object(t_pm4_node)
RT:array[0..1] of TRT_INFO;
SCREEN:TVkRect2D;
end;
p_pm4_node_draw=^t_pm4_node_draw;
t_pm4_node_draw=object(t_pm4_node)
rt_info:t_pm4_rt_info;
indexBase :QWORD;
indexOffset :DWORD;
vertexOffset:DWORD;
indexCount :DWORD;
numInstances:DWORD;
INDEX_TYPE:Byte;
SWAP_MODE :Byte;
end;
p_pm4_node_Dispatch=^t_pm4_node_Dispatch;
t_pm4_node_Dispatch=object(t_pm4_node)
COMPUTE_GROUP:TSH_REG_COMPUTE_GROUP;
ShaderGroup:TvShaderGroup;
end;
p_pm4_node_DispatchDirect=^t_pm4_node_DispatchDirect;
t_pm4_node_DispatchDirect=object(t_pm4_node_Dispatch)
DIM_X:DWORD;
DIM_Y:DWORD;
DIM_Z:DWORD;
end;
p_pm4_node_DispatchIndirect=^t_pm4_node_DispatchIndirect;
t_pm4_node_DispatchIndirect=object(t_pm4_node_Dispatch)
BASE :QWORD;
Offset:DWORD;
end;
p_pm4_node_PfpSyncMe=^t_pm4_node_PfpSyncMe;
t_pm4_node_PfpSyncMe=object(t_pm4_node)
event:PRTLEvent;
end;
p_pm4_stream=^t_pm4_stream;
t_pm4_stream=object(t_pm4_resource_stream_scope)
//
list:TAILQ_HEAD; //t_pm4_node
//
buft:t_pm4_stream_type;
//
init:Boolean;
hint_repeat:Boolean;
hint_loop:Ptruint;
hint_cmds:Boolean;
//
curr:p_pm4_node;
//
refs:Ptruint;
//
procedure Free;
Procedure add_node(node:p_pm4_node);
function First:p_pm4_node;
function Next(node:p_pm4_node):p_pm4_node; static;
//
procedure Acquire;
function Release:Boolean;
//
procedure Hint (P1,P2:PChar;maxsize:Integer);
procedure LoadConstRam (addr:Pointer;num_dw,offset:Word);
procedure DumpConstRam (addr:Pointer;num_dw,offset:Word);
procedure IncrementCE ();
procedure IncrementDE ();
procedure WaitOnCECounter();
procedure WaitOnDECounterDiff(diff:DWORD);
procedure EventWrite (eventType:Byte);
procedure PipeStatDump (Address:QWORD;Control:TPixelPipeStatControl);
procedure EventWriteEop(addr:Pointer;data:QWORD;eventType,dataSel,intSel:Byte);
procedure EventWriteEos(addr:Pointer;data:DWORD;eventType,command:Byte);
procedure SubmitFlipEop(eop_value:QWORD;intSel:Byte);
procedure ReleaseMem (addr:Pointer;data:QWORD;eventType,srcSel,dstSel,intSel:Byte);
procedure DmaData (dstSel:Byte;dst:QWORD;srcSel:Byte;srcOrData:QWORD;numBytes:DWORD;isBlocking:Byte);
procedure WriteData (dstSel:Byte;dst,src:Pointer;num_dw:Word;wrConfirm:Byte);
procedure WaitRegMem (pollAddr:Pointer;refValue,mask:DWORD;compareFunc:Byte);
procedure FastClear (var CX_REG:TCONTEXT_REG_GROUP);
procedure Resolve (var CX_REG:TCONTEXT_REG_GROUP);
function ColorControl (var CX_REG:TCONTEXT_REG_GROUP):Boolean;
procedure Init_Uniforms(node:p_pm4_node;var UniformBuilder:TvUniformBuilder);
procedure Init_Pushs (node:p_pm4_node;
ShaderGroup:TvShaderGroup;
var GPU_REGS:TGPU_REGS);
procedure Build_rt_info(node:p_pm4_node;
var rt_info:t_pm4_rt_info;
var GPU_REGS:TGPU_REGS);
procedure BuildDraw (ntype:t_pm4_node_type;
var SG_REG:TSH_REG_GFX_GROUP;
var CX_REG:TCONTEXT_REG_GROUP;
var UC_REG:TUSERCONFIG_REG_SHORT;
indexOffset:DWORD);
procedure DrawIndex2 (var SG_REG:TSH_REG_GFX_GROUP;
var CX_REG:TCONTEXT_REG_GROUP;
var UC_REG:TUSERCONFIG_REG_SHORT);
procedure DrawIndexOffset2(var SG_REG:TSH_REG_GFX_GROUP;
var CX_REG:TCONTEXT_REG_GROUP;
var UC_REG:TUSERCONFIG_REG_SHORT;
indexOffset:DWORD);
procedure DrawIndexAuto(var SG_REG:TSH_REG_GFX_GROUP;
var CX_REG:TCONTEXT_REG_GROUP;
var UC_REG:TUSERCONFIG_REG_SHORT);
procedure Build_cs_info (node:p_pm4_node_Dispatch;var GPU_REGS:TGPU_REGS);
procedure DispatchDirect(var SC_REG:TSH_REG_COMPUTE_GROUP);
procedure DispatchIndirect(var SC_REG:TSH_REG_COMPUTE_GROUP;
BASE :QWORD;
Offset:DWORD);
procedure PfpSyncMe(event:PRTLEvent);
end;
implementation
uses
sys_bootparam;
var
cache_block_allocator:t_cache_block_allocator;
//
operator + (a,b:t_pm4_usage):t_pm4_usage; inline;
begin
//hack
Result.DATA:=a.DATA or b.DATA;
end;
//
function t_pm4_resource.c(n1,n2:p_pm4_resource):Integer;
begin
//0 Addr
Result:=Integer(n1^.rkey.Addr>n2^.rkey.Addr)-Integer(n1^.rkey.Addr<n2^.rkey.Addr);
if (Result<>0) then Exit;
//1 rtype
Result:=Integer(n1^.rtype>n2^.rtype)-Integer(n1^.rtype<n2^.rtype);
if (Result<>0) then Exit;
case n1^.rtype of
R_IMG:
begin
Result:=CompareNormalized(n1^.rkey,n2^.rkey);
end;
R_BUF,
R_HTILE,
R_CMASK:
begin
//2 rsize
Result:=Integer(n1^.rsize>n2^.rsize)-Integer(n1^.rsize<n2^.rsize);
end;
else;
end;
end;
function t_pm4_resource_instance.c(n1,n2:p_pm4_resource_instance):Integer;
begin
Result:=t_pm4_resource.c(n1^.resource,n2^.resource);
end;
//
function t_pm4_resource_init_scope.first:p_pm4_resource_instance;
begin
Result:=TAILQ_FIRST(@list);
end;
procedure t_pm4_resource_init_scope.insert(i:p_pm4_resource_instance);
begin
if (i^.init_entry.tqe_next<>nil) or
(i^.init_entry.tqe_prev<>nil) then Exit;
if (list.tqh_last=nil) then
begin
TAILQ_INIT(@list);
end;
TAILQ_INSERT_TAIL(@list,i,@i^.init_entry);
i^.init_scope:=@self;
end;
//
function t_pm4_resource_curr_scope.Min:p_pm4_resource_instance;
begin
Result:=resource_instance_set.Min;
end;
function t_pm4_resource_curr_scope.Next(node:p_pm4_resource_instance):p_pm4_resource_instance;
begin
Result:=resource_instance_set.Next(node);
end;
procedure t_pm4_resource_curr_scope.insert(i:p_pm4_resource_instance);
var
f:p_pm4_resource_instance;
begin
f:=resource_instance_set.Find(i);
if (f<>nil) then
begin
//union
f^.curr:=f^.curr + i^.curr;
f^.prev:=f^.prev + i^.prev;
f^.next:=f^.next + i^.next;
end else
begin
resource_instance_set.Insert(i);
end;
//
end;
function t_pm4_resource_curr_scope.find_resource_instance(r:p_pm4_resource):p_pm4_resource_instance;
var
tmp:t_pm4_resource_instance;
begin
if (r=nil) then Exit(nil);
tmp:=Default(t_pm4_resource_instance);
tmp.resource:=r;
Result:=resource_instance_set.Find(@tmp);
end;
function t_pm4_resource_curr_scope.find_image_resource_instance(const rkey:TvImageKey):p_pm4_resource_instance;
var
tmp:t_pm4_resource;
begin
if (rkey.cformat=VK_FORMAT_UNDEFINED) then Exit(nil);
if (rkey.params.invalid<>0) then Exit(nil);
tmp:=Default(t_pm4_resource);
tmp.rtype:=R_IMG;
tmp.rkey :=rkey;
Result:=find_resource_instance(@tmp);
end;
function t_pm4_resource_curr_scope.find_buffer_resource_instance(rtype:Integer;addr:Pointer;size:DWORD):p_pm4_resource_instance;
var
tmp:t_pm4_resource;
begin
tmp:=Default(t_pm4_resource);
tmp.rtype:=rtype;
tmp.rkey.Addr:=addr;
tmp.rsize:=size;
Result:=find_resource_instance(@tmp);
end;
function t_pm4_resource_stream_scope.find_image_resource(const rkey:TvImageKey):p_pm4_resource;
var
tmp:t_pm4_resource;
begin
tmp:=Default(t_pm4_resource);
tmp.rtype:=R_IMG;
tmp.rkey :=rkey;
Result:=resource_set.Find(@tmp);
end;
function t_pm4_resource_stream_scope.fetch_image_resource(const rkey:TvImageKey;hint:PChar):p_pm4_resource;
var
tmp:t_pm4_resource;
begin
tmp:=Default(t_pm4_resource);
tmp.rtype:=R_IMG;
tmp.rkey :=rkey;
Result:=resource_set.Find(@tmp);
if (Result=nil) then
begin
tmp.rsize:=get_image_size(rkey);
if p_print_gpu_ops then
begin
Writeln('fetch_image_resource:0x',HexStr(rkey.Addr),' 0x',HexStr(tmp.rsize,4));
end;
Result:=allocator.Alloc(SizeOf(t_pm4_resource));
Result^:=tmp;
resource_set.Insert(Result);
end;
end;
function t_pm4_resource_stream_scope.find_buffer_resource(rtype:Integer;addr:Pointer;size:DWORD):p_pm4_resource;
var
tmp:t_pm4_resource;
begin
tmp:=Default(t_pm4_resource);
tmp.rtype:=rtype;
tmp.rkey.Addr:=addr;
tmp.rsize:=size;
Result:=resource_set.Find(@tmp);
end;
function t_pm4_resource_stream_scope.fetch_buffer_resource(rtype:Integer;addr:Pointer;size:DWORD;hint:PChar):p_pm4_resource;
var
tmp:t_pm4_resource;
begin
tmp:=Default(t_pm4_resource);
tmp.rtype:=rtype;
tmp.rkey.Addr:=addr;
tmp.rsize:=size;
Result:=resource_set.Find(@tmp);
if (Result=nil) then
begin
Result:=allocator.Alloc(SizeOf(t_pm4_resource));
Result^:=tmp;
if p_print_gpu_ops then
begin
Writeln('fetch_buffer_resource(',hint,'):0x',HexStr(addr),' 0x',HexStr(size,4));
end;
resource_set.Insert(Result);
end;
end;
function t_pm4_resource_stream_scope.fetch_resource_instance(scope:p_pm4_resource_curr_scope;r:p_pm4_resource;mem_usage:Integer;img_usage:s_image_usage):p_pm4_resource_instance;
var
curr:t_pm4_usage;
begin
Result:=scope^.find_resource_instance(r);
if (Result=nil) then
begin
Result:=allocator.Alloc(SizeOf(t_pm4_resource_instance));
Result^:=Default(t_pm4_resource_instance);
//
Result^.resource:=r;
end;
{
TODO: This is a hack for loading storage textures that
may not be fully filled, or may only attached only one texture layer.
}
if (iu_storage in img_usage) then
begin
mem_usage:=mem_usage or TM_READ;
end;
curr.mem_usage:=mem_usage;
curr.shd_usage:=0;
curr.clr_usage:=0;
curr.dsa_usage:=0;
curr.img_usage:=img_usage;
if ([iu_sampled,iu_storage]*img_usage<>[]) then
begin
curr.shd_usage:=mem_usage;
end;
if (iu_attachment in img_usage) then
begin
curr.clr_usage:=mem_usage;
end;
if (iu_depthstenc in img_usage) then
begin
curr.dsa_usage:=mem_usage;
end;
Result^.curr:=Result^.curr + curr;
r^.uall:=r^.uall + curr;
end;
function t_pm4_resource_stream_scope.insert_image_resource(scope:p_pm4_resource_curr_scope;const rkey:TvImageKey;mem_usage:Integer;img_usage:s_image_usage;hint:PChar):p_pm4_resource_instance;
var
r:p_pm4_resource;
i:p_pm4_resource_instance;
begin
if (rkey.cformat=VK_FORMAT_UNDEFINED) then Exit(nil);
if (rkey.params.invalid<>0) then Exit(nil);
r:=fetch_image_resource (rkey,hint);
i:=fetch_resource_instance(scope,r,mem_usage,img_usage);
if ((mem_usage and TM_READ)<>0) then
if (i^.prev.mem_usage=0) then //no prev usage
begin
//init
init_scope.insert(i);
end;
scope^.insert(i);
Result:=i;
end;
function t_pm4_resource_stream_scope.insert_buffer_resource(scope:p_pm4_resource_curr_scope;rtype:Integer;addr:Pointer;size:DWORD;mem_usage:Integer;hint:PChar):p_pm4_resource_instance;
var
r:p_pm4_resource;
i:p_pm4_resource_instance;
begin
r:=fetch_buffer_resource (rtype,addr,size,hint);
i:=fetch_resource_instance(scope,r,mem_usage,[iu_buffer]);
if ((mem_usage and TM_READ)<>0) then
if (i^.prev.mem_usage=0) then //no prev usage
begin
//init
init_scope.insert(i);
end;
scope^.insert(i);
Result:=i;
end;
//
procedure t_pm4_resource_stream_scope.connect_resource_instance(i:p_pm4_resource_instance);
var
start:Pointer;
__end:Pointer;
node:p_pm4_resource;
prev:p_pm4_resource_instance;
tmp:t_pm4_resource;
begin
//find cross
tmp:=Default(t_pm4_resource);
tmp:=i^.resource^;
start:=tmp.rkey.Addr;
__end:=start+tmp.rsize;
tmp.rtype:=High(Integer);
tmp.rkey.Addr:=start;
//[s|new|e] ->
// [s|old|e]
node:=resource_set.Find_ls(@tmp);
while (node<>nil) do
begin
//
if (__end>(node^.rkey.Addr)) and (start<(node^.rkey.Addr+node^.rsize)) then
begin
prev:=node^.rwrite;
if (prev<>nil) and (prev<>i) then
begin
//sum prev of curr
i^.prev:= i^.prev + prev^.curr;
//sum next of prev
prev^.next:=prev^.next + i^.curr;
if (prev^.resource<>i^.resource) then
begin
//sum prev of curr
i^.prev_overlap:= i^.prev_overlap + prev^.curr;
//sum next of prev
prev^.next_overlap:=prev^.next_overlap + i^.curr;
end;
end;
//
if ((i^.curr.mem_usage and (TM_WRITE or TM_CLEAR))<>0) then
begin
node^.rwrite:=i;
end;
end;
node:=resource_set.Prev(node);
end;
end;
procedure t_pm4_resource_stream_scope.connect_resource_scope(scope:p_pm4_resource_curr_scope);
var
node:p_pm4_resource_instance;
begin
node:=scope^.resource_instance_set.Min;
while (node<>nil) do
begin
connect_resource_instance(node);
node:=scope^.resource_instance_set.Next(node);
end;
end;
//
procedure t_pm4_stream.Free;
begin
list:=Default(TAILQ_HEAD);
allocator.Free;
end;
var
global_id:QWORD=0;
Procedure t_pm4_stream.add_node(node:p_pm4_node);
begin
if (list.tqh_last=nil) then
begin
TAILQ_INIT(@list);
end;
node^.id:=System.InterlockedIncrement64(global_id);
//Writeln('add_node:',node^.id);
TAILQ_INSERT_TAIL(@list,node,@node^.entry);
connect_resource_scope(@node^.scope);
end;
function t_pm4_stream.First:p_pm4_node;
begin
Result:=TAILQ_FIRST(@list);
end;
function t_pm4_stream.Next(node:p_pm4_node):p_pm4_node;
begin
Result:=TAILQ_NEXT(node,@node^.entry);
end;
//
procedure t_pm4_stream.Acquire;
begin
System.InterlockedIncrement(Pointer(refs));
end;
function t_pm4_stream.Release:Boolean;
begin
Result:=System.InterlockedDecrement(Pointer(refs))=nil;
end;
//
procedure t_pm4_stream.Hint(P1,P2:PChar;maxsize:Integer);
var
len1,len2:Integer;
node:p_pm4_node_Hint;
begin
len1:=StrLen(P1);
len2:=0;
while (maxsize<>0) do
begin
Inc(len2);
if (P2[len2]=#0) then
begin
Break;
end;
Dec(maxsize);
end;
node:=allocator.Alloc(SizeOf(t_pm4_node_Hint)+len1+len2+1);
node^.ntype :=ntHint;
node^.scope :=Default(t_pm4_resource_curr_scope);
Move(P1^,PChar(@node^.data)[0] ,len1);
Move(P2^,PChar(@node^.data)[len1],len2);
PChar(@node^.data)[len1+len2]:=#0;
add_node(node);
end;
procedure t_pm4_stream.LoadConstRam(addr:Pointer;num_dw,offset:Word);
var
node:p_pm4_node_LoadConstRam;
begin
node:=allocator.Alloc(SizeOf(t_pm4_node_LoadConstRam));
node^.ntype :=ntLoadConstRam;
node^.scope :=Default(t_pm4_resource_curr_scope);
node^.addr :=addr;
node^.num_dw:=num_dw;
node^.offset:=offset;
insert_buffer_resource(@node^.scope,
R_BUF,
addr,
num_dw*SizeOf(DWORD),
TM_READ,
'LoadConstRam');
add_node(node);
end;
procedure t_pm4_stream.DumpConstRam(addr:Pointer;num_dw,offset:Word);
var
node:p_pm4_node_LoadConstRam;
begin
node:=allocator.Alloc(SizeOf(t_pm4_node_LoadConstRam));
node^.ntype :=ntDumpConstRam;
node^.scope :=Default(t_pm4_resource_curr_scope);
node^.addr :=addr;
node^.num_dw:=num_dw;
node^.offset:=offset;
insert_buffer_resource(@node^.scope,
R_BUF,
addr,
num_dw*SizeOf(DWORD),
TM_WRITE,
'DumpConstRam');
add_node(node);
end;
procedure t_pm4_stream.IncrementCE();
var
node:p_pm4_node;
begin
node:=allocator.Alloc(SizeOf(t_pm4_node));
node^.ntype:=ntIncrementCE;
node^.scope:=Default(t_pm4_resource_curr_scope);
add_node(node);
end;
procedure t_pm4_stream.IncrementDE();
var
node:p_pm4_node;
begin
node:=allocator.Alloc(SizeOf(t_pm4_node));
node^.ntype:=ntIncrementDE;
node^.scope:=Default(t_pm4_resource_curr_scope);
add_node(node);
end;
procedure t_pm4_stream.WaitOnCECounter();
var
node:p_pm4_node;
begin
node:=allocator.Alloc(SizeOf(t_pm4_node));
node^.ntype:=ntWaitOnCECounter;
node^.scope:=Default(t_pm4_resource_curr_scope);
add_node(node);
end;
procedure t_pm4_stream.WaitOnDECounterDiff(diff:DWORD);
var
node:p_pm4_node_WaitOnDECounterDiff;
begin
node:=allocator.Alloc(SizeOf(t_pm4_node_WaitOnDECounterDiff));
node^.ntype:=ntWaitOnDECounterDiff;
node^.scope:=Default(t_pm4_resource_curr_scope);
node^.diff :=diff;
add_node(node);
end;
procedure t_pm4_stream.EventWrite(eventType:Byte);
var
node:p_pm4_node_EventWrite;
begin
node:=allocator.Alloc(SizeOf(t_pm4_node_EventWrite));
node^.ntype :=ntEventWrite;
node^.scope :=Default(t_pm4_resource_curr_scope);
node^.eventType:=eventType;
add_node(node);
end;
procedure t_pm4_stream.PipeStatDump(Address:QWORD;Control:TPixelPipeStatControl);
var
node:p_pm4_node_PipeStatDump;
begin
node:=allocator.Alloc(SizeOf(t_pm4_node_PipeStatDump));
node^.ntype :=ntPipeStatDump;
node^.scope :=Default(t_pm4_resource_curr_scope);
node^.Address:=Address;
node^.Control:=Control;
add_node(node);
end;
procedure t_pm4_stream.EventWriteEop(addr:Pointer;data:QWORD;eventType,dataSel,intSel:Byte);
var
node:p_pm4_node_EventWriteEop;
function get_data_size:DWORD; inline;
begin
Result:=0;
//
Case dataSel of
EVENTWRITEEOP_DATA_SEL_SEND_DATA32 :Result:=4;
EVENTWRITEEOP_DATA_SEL_SEND_DATA64 :Result:=8;
EVENTWRITEEOP_DATA_SEL_SEND_GPU_CLOCK :Result:=8;
EVENTWRITEEOP_DATA_SEL_SEND_CP_PERFCOUNTER:Result:=8;
else;
end;
end;
begin
node:=allocator.Alloc(SizeOf(t_pm4_node_EventWriteEop));
node^.ntype :=ntEventWriteEop;
node^.scope :=Default(t_pm4_resource_curr_scope);
node^.addr :=addr;
node^.data :=data;
node^.eventType:=eventType;
node^.dataSel :=dataSel;
node^.intSel :=intSel;
if (addr<>nil) then
begin
insert_buffer_resource(@node^.scope,
R_BUF,
addr,
get_data_size,
TM_WRITE,
'EventWriteEop');
end;
add_node(node);
end;
procedure t_pm4_stream.EventWriteEos(addr:Pointer;data:DWORD;eventType,command:Byte);
var
node:p_pm4_node_EventWriteEos;
function get_data_size:DWORD; inline;
begin
Result:=0;
//
Case command of
EVENT_WRITE_EOS_CMD_STORE_32BIT_DATA_TO_MEMORY:Result:=4;
else;
end;
end;
begin
node:=allocator.Alloc(SizeOf(t_pm4_node_EventWriteEos));
node^.ntype :=ntEventWriteEos;
node^.scope :=Default(t_pm4_resource_curr_scope);
node^.addr :=addr;
node^.data :=data;
node^.eventType:=eventType;
node^.command :=command;
if (addr<>nil) then
begin
insert_buffer_resource(@node^.scope,
R_BUF,
addr,
get_data_size,
TM_WRITE,
'EventWriteEos');
end;
add_node(node);
end;
procedure t_pm4_stream.SubmitFlipEop(eop_value:QWORD;intSel:Byte);
var
node:p_pm4_node_SubmitFlipEop;
begin
node:=allocator.Alloc(SizeOf(t_pm4_node_SubmitFlipEop));
node^.ntype :=ntSubmitFlipEop;
node^.scope :=Default(t_pm4_resource_curr_scope);
node^.eop_value:=eop_value;
node^.intSel :=intSel;
add_node(node);
end;
procedure t_pm4_stream.ReleaseMem(addr:Pointer;data:QWORD;eventType,srcSel,dstSel,intSel:Byte);
var
node:p_pm4_node_ReleaseMem;
function get_data_size:DWORD; inline;
begin
Result:=0;
//
Case srcSel of
RELEASEMEM_DATA_SEL_SEND_DATA32 :Result:=4;
RELEASEMEM_DATA_SEL_SEND_DATA64 :Result:=8;
RELEASEMEM_DATA_SEL_SEND_GPU_CLOCK :Result:=8;
RELEASEMEM_DATA_SEL_SEND_CP_PERFCOUNTER:Result:=8;
else;
end;
end;
begin
node:=allocator.Alloc(SizeOf(t_pm4_node_ReleaseMem));
node^.ntype :=ntReleaseMem;
node^.scope :=Default(t_pm4_resource_curr_scope);
node^.addr :=addr;
node^.data :=data;
node^.eventType:=eventType;
node^.srcSel :=srcSel;
node^.dstSel :=dstSel;
node^.intSel :=intSel;
if (addr<>nil) then
begin
insert_buffer_resource(@node^.scope,
R_BUF,
addr,
get_data_size,
TM_WRITE,
'ReleaseMem');
end;
add_node(node);
end;
procedure t_pm4_stream.DmaData(dstSel:Byte;dst:QWORD;srcSel:Byte;srcOrData:QWORD;numBytes:DWORD;isBlocking:Byte);
var
node:p_pm4_node_DmaData;
begin
node:=allocator.Alloc(SizeOf(t_pm4_node_DmaData));
node^.ntype :=ntDmaData;
node^.scope :=Default(t_pm4_resource_curr_scope);
node^.dst :=dst;
node^.src :=srcOrData;
node^.numBytes:=numBytes;
node^.srcSel :=srcSel;
node^.dstSel :=dstSel;
node^.cpSync :=isBlocking;
case srcSel of
kDmaDataSrcMemory,
kDmaDataSrcMemoryUsingL2:
if (srcOrData<>0) then
begin
insert_buffer_resource(@node^.scope,
R_BUF,
Pointer(srcOrData),
numBytes,
TM_READ,
'DmaData');
end;
else;
end;
case dstSel of
kDmaDataDstMemory,
kDmaDataDstMemoryUsingL2:
if (dst<>0) then
begin
insert_buffer_resource(@node^.scope,
R_BUF,
Pointer(dst),
numBytes,
TM_WRITE,
'DmaData');
end;
end;
add_node(node);
end;
procedure t_pm4_stream.WriteData(dstSel:Byte;dst,src:Pointer;num_dw:Word;wrConfirm:Byte);
var
node:p_pm4_node_WriteData;
begin
//Can I copy the link?
//Or do I have to copy the data?
node:=allocator.Alloc(SizeOf(t_pm4_node_WriteData){+num_dw*SizeOf(DWORD)});
node^.ntype :=ntWriteData;
node^.scope :=Default(t_pm4_resource_curr_scope);
node^.dst :=dst;
//node^.src :=Pointer(node+1);
node^.src :=src;
node^.num_dw :=num_dw;
node^.dstSel :=dstSel;
node^.wrConfirm:=(wrConfirm<>0);
//Move(src^,node^.src^,num_dw*SizeOf(DWORD));
if (src<>nil) then
begin
insert_buffer_resource(@node^.scope,
R_BUF,
src,
num_dw*SizeOf(DWORD),
TM_READ,
'WriteData');
end;
case dstSel of
WRITE_DATA_DST_SEL_MEMORY_SYNC,
WRITE_DATA_DST_SEL_TCL2,
WRITE_DATA_DST_SEL_MEMORY_ASYNC:
if (dst<>nil) then
begin
insert_buffer_resource(@node^.scope,
R_BUF,
Pointer(dst),
num_dw*SizeOf(DWORD),
TM_WRITE,
'WriteData');
end;
else;
end;
add_node(node);
end;
procedure t_pm4_stream.WaitRegMem(pollAddr:Pointer;refValue,mask:DWORD;compareFunc:Byte);
var
node:p_pm4_node_WaitRegMem;
begin
node:=allocator.Alloc(SizeOf(t_pm4_node_WaitRegMem));
node^.ntype :=ntWaitRegMem;
node^.scope :=Default(t_pm4_resource_curr_scope);
node^.pollAddr :=pollAddr;
node^.refValue :=refValue;
node^.mask :=mask;
node^.compareFunc:=compareFunc;
add_node(node);
end;
procedure t_pm4_stream.FastClear(var CX_REG:TCONTEXT_REG_GROUP);
var
GPU_REGS:TGPU_REGS;
RT:TRT_INFO;
node:p_pm4_node_FastClear;
begin
GPU_REGS:=Default(TGPU_REGS);
GPU_REGS.CX_REG:=@CX_REG;
node:=allocator.Alloc(SizeOf(t_pm4_node_FastClear));
node^.ntype :=ntFastClear;
node^.scope :=Default(t_pm4_resource_curr_scope);
//
RT:=GPU_REGS.GET_RT_INFO(0,True);
//-TM_READ +TM_CLEAR
RT.IMAGE_USAGE:=RT.IMAGE_USAGE and (not TM_READ) or TM_CLEAR;
Assert(RT.CMASK_INFO.KEY.Addr<>nil);
//
insert_image_resource(@node^.scope,
RT.FImageInfo,
RT.IMAGE_USAGE,
[iu_transfer],
'FastClear');
insert_buffer_resource(@node^.scope,
R_CMASK,
RT.CMASK_INFO.KEY.Addr,
RT.CMASK_INFO.SIZE,
RT.IMAGE_USAGE,
'FastClear'
);
//
node^.RT:=RT;
//
add_node(node);
end;
procedure t_pm4_stream.Resolve(var CX_REG:TCONTEXT_REG_GROUP);
var
GPU_REGS:TGPU_REGS;
RT:array[0..1] of TRT_INFO;
SCREEN:TVkRect2D;
node:p_pm4_node_Resolve;
begin
GPU_REGS:=Default(TGPU_REGS);
GPU_REGS.CX_REG:=@CX_REG;
Assert(DWORD(CX_REG.CB_TARGET_MASK)=$F);
node:=allocator.Alloc(SizeOf(t_pm4_node_Resolve));
node^.ntype :=ntResolve;
node^.scope :=Default(t_pm4_resource_curr_scope);
//
RT[0]:=GPU_REGS.GET_RT_INFO(0,True);
RT[1]:=GPU_REGS.GET_RT_INFO(1,True);
RT[0].IMAGE_USAGE:=TM_READ;
RT[1].IMAGE_USAGE:=TM_WRITE;
insert_image_resource(@node^.scope,
RT[0].FImageInfo,
RT[0].IMAGE_USAGE,
[iu_transfer],
'Resolve');
insert_image_resource(@node^.scope,
RT[1].FImageInfo,
RT[1].IMAGE_USAGE,
[iu_transfer],
'Resolve');
SCREEN:=GPU_REGS.GET_SCREEN;
node^.RT:=RT;
node^.SCREEN:=SCREEN;
add_node(node);
end;
function t_pm4_stream.ColorControl(var CX_REG:TCONTEXT_REG_GROUP):Boolean;
begin
Result:=False;
case CX_REG.CB_COLOR_CONTROL.MODE of
CB_DISABLE:
if p_print_gpu_ops then
begin
Writeln('DISABLE');
end;
CB_NORMAL:; //next
CB_ELIMINATE_FAST_CLEAR:
// Expand latest specified clear color into pixel data for the fast cleared color/depth resource.
begin
FastClear(CX_REG);
Exit(True);
end;
CB_RESOLVE:
// Fixed function resolve. (MSAA)
begin
Resolve(CX_REG);
Exit(True);
end;
CB_DECOMPRESS:
if p_print_gpu_ops then
begin
Writeln('DECOMPRESS');
end;
CB_FMASK_DECOMPRESS: // Fmask decompression for shader readability.
if p_print_gpu_ops then
begin
Writeln('FMASK_DECOMPRESS');
end;
CB_DCC_DECOMPRESS: // Indicates this color target view is for a DCC decompress
if p_print_gpu_ops then
begin
Writeln('DCC_DECOMPRESS');
end;
else
Assert(False,'unknow color control:0x'+HexStr(CX_REG.CB_COLOR_CONTROL.MODE,1));
end;
end;
procedure t_pm4_stream.Init_Uniforms(node:p_pm4_node;var UniformBuilder:TvUniformBuilder);
var
i:Integer;
begin
//images
if (Length(UniformBuilder.FImages)<>0) then
begin
For i:=0 to High(UniformBuilder.FImages) do
With UniformBuilder.FImages[i] do
begin
case btype of
vbSampled:
begin
insert_image_resource(@node^.scope,
FImage,
memuse,
[iu_sampled],
'Init_Uniforms');
end;
vbStorage,
vbMipStorage:
begin
insert_image_resource(@node^.scope,
FImage,
memuse,
[iu_storage],
'Init_Uniforms');
end;
else
Assert(false);
end;
end;
end;
//images
//buffers
if (Length(UniformBuilder.FBuffers)<>0) then
begin
For i:=0 to High(UniformBuilder.FBuffers) do
With UniformBuilder.FBuffers[i] do
if (memuse and TM_INVAL)=0 then
begin
insert_buffer_resource(@node^.scope,
R_BUF,
addr,
size,
memuse,
'Init_Uniforms');
end;
end;
//buffers
end;
procedure t_pm4_stream.Init_Pushs(node:p_pm4_node;
ShaderGroup:TvShaderGroup;
var GPU_REGS:TGPU_REGS);
var
Shader:TvShaderExt;
i:TvShaderStage;
FData:PDWORD;
addr:Pointer;
begin
For i:=Low(TvShaderStage) to High(TvShaderStage) do
begin
Shader:=ShaderGroup.FKey.FShaders[i];
if (Shader<>nil) then
if (Shader.FPushConst.size<>0) then
begin
FData:=GPU_REGS.get_user_data(i);
addr :=Shader.GetPushConstData(FData);
insert_buffer_resource(@node^.scope,
R_BUF,
addr,
Shader.FPushConst.size,
TM_READ,
'Init_Pushs');
end;
end;
end;
procedure DumpShaderGroup(ShaderGroup:TvShaderGroup);
var
i:TvShaderStage;
str:RawByteString;
begin
str:='[DumpShaderGroup]'#13#10;
For i:=Low(TvShaderStage) to High(TvShaderStage) do
if (ShaderGroup.FKey.FShaders[i]<>nil) then
begin
str:=str+' ('+HexStr(ShaderGroup.FKey.FShaders[i].FHash_gcn,16)+') '+GetDumpSpvName(i,ShaderGroup.FKey.FShaders[i].FHash_spv)+#13#10;
end;
Writeln(stderr,str);
end;
procedure t_pm4_stream.Build_rt_info(node:p_pm4_node;
var rt_info:t_pm4_rt_info;
var GPU_REGS:TGPU_REGS);
var
i:Integer;
RT:TRT_INFO;
FUniformBuilder:TvUniformBuilder;
pa:TPushConstAllocator;
pp:PPushConstAllocator;
r:p_pm4_resource;
resource_instance:p_pm4_resource_instance;
begin
if (p_neomode<>0) then
begin
//FP16_INTERP_MODE support only Neo mode
for i:=0 to 31 do
begin
Assert(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].FP16_INTERP_MODE=0,'SPI_PS_INPUT_CNTL['+IntToStr(i)+'].FP16_INTERP_MODE='+IntToStr(GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL[i].FP16_INTERP_MODE));
end;
end;
GPU_REGS.export_user_data_rt(@rt_info.USERDATA);
//copy
rt_info.SHADERDATA.SG_REG :=GPU_REGS.SG_REG^ ;
rt_info.SHADERDATA.SPI_PS_INPUT_ENA :=GPU_REGS.CX_REG^.SPI_PS_INPUT_ENA ;
rt_info.SHADERDATA.SPI_PS_INPUT_ADDR :=GPU_REGS.CX_REG^.SPI_PS_INPUT_ADDR ;
rt_info.SHADERDATA.SPI_INTERP_CONTROL_0 :=GPU_REGS.CX_REG^.SPI_INTERP_CONTROL_0 ;
rt_info.SHADERDATA.SPI_PS_IN_CONTROL :=GPU_REGS.CX_REG^.SPI_PS_IN_CONTROL ;
rt_info.SHADERDATA.SPI_PS_INPUT_CNTL :=GPU_REGS.CX_REG^.SPI_PS_INPUT_CNTL ;
rt_info.SHADERDATA.DB_SHADER_CONTROL :=GPU_REGS.CX_REG^.DB_SHADER_CONTROL ;
rt_info.SHADERDATA.VGT_INSTANCE_STEP_RATE_0:=GPU_REGS.CX_REG^.VGT_INSTANCE_STEP_RATE_0;
rt_info.SHADERDATA.VGT_INSTANCE_STEP_RATE_1:=GPU_REGS.CX_REG^.VGT_INSTANCE_STEP_RATE_1;
rt_info.SHADERDATA.RENDER_TARGET :=GPU_REGS.CX_REG^.RENDER_TARGET ;
rt_info.SHADERDATA.UC_REG :=GPU_REGS.UC_REG^;
rt_info.RT_COUNT:=0;
if GPU_REGS.COMP_ENABLE then
For i:=0 to GPU_REGS.GET_HI_RT do
begin
RT:=GPU_REGS.GET_RT_INFO(i);
//
if (RT.CMASK_INFO.KEY.Addr<>nil) then
begin
//perfetch check
r:=find_buffer_resource(R_CMASK,RT.CMASK_INFO.KEY.Addr,RT.CMASK_INFO.SIZE);
if (r<>nil) then
if (r^.rcmask) then
begin
//-TM_READ +TM_CLEAR
RT.IMAGE_USAGE:=RT.IMAGE_USAGE and (not TM_READ) or TM_CLEAR;
//
r^.rcmask:=False;
end;
insert_buffer_resource(@node^.scope,
R_CMASK,
RT.CMASK_INFO.KEY.Addr,
RT.CMASK_INFO.SIZE,
RT.IMAGE_USAGE,
'Build_rt_info');
end;
insert_image_resource(@node^.scope,
RT.FImageInfo,
RT.IMAGE_USAGE,
[iu_attachment],
'Build_rt_info');
//
rt_info.RT_INFO[rt_info.RT_COUNT]:=RT;
Inc(rt_info.RT_COUNT);
end;
rt_info.DB_ENABLE:=GPU_REGS.DB_ENABLE;
if rt_info.DB_ENABLE then
begin
rt_info.DB_INFO:=GPU_REGS.GET_DB_INFO;
//
resource_instance:=insert_image_resource(@node^.scope,
GetDepthOnly(rt_info.DB_INFO.FImageInfo),
rt_info.DB_INFO.DEPTH_USAGE,
[iu_depthstenc],
'Build_rt_info');
if (resource_instance<>nil) then
with resource_instance^.resource^ do
begin
rcombined:=rcombined or IsDepthAndStencil(rt_info.DB_INFO.FImageInfo.cformat);
end;
resource_instance:=insert_image_resource(@node^.scope,
GetStencilOnly(rt_info.DB_INFO.FImageInfo),
rt_info.DB_INFO.STENCIL_USAGE,
[iu_depthstenc],
'Build_rt_info');
if (resource_instance<>nil) then
with resource_instance^.resource^ do
begin
rcombined:=rcombined or IsDepthAndStencil(rt_info.DB_INFO.FImageInfo.cformat);
end;
if (rt_info.DB_INFO.HTILE_INFO.TILE_SURFACE_ENABLE<>0) then
begin
resource_instance:=insert_buffer_resource(@node^.scope,
R_HTILE,
rt_info.DB_INFO.HTILE_INFO.KEY.Addr,
rt_info.DB_INFO.HTILE_INFO.SIZE,
rt_info.DB_INFO.DEPTH_USAGE,
'Build_rt_info');
end;
end;
if (rt_info.RT_COUNT=0) and (not rt_info.DB_ENABLE) then
begin
Writeln('zero attachment???');
end;
rt_info.BLEND_INFO:=GPU_REGS.GET_BLEND_INFO;
rt_info.PRIM_TYPE :=ord(GPU_REGS.GET_PRIM_TYPE);
rt_info.PRIM_RESET:=GPU_REGS.GET_PRIM_RESET;
rt_info.VP_COUNT:=0;
For i:=0 to 15 do
if GPU_REGS.VP_ENABLE(i) then
begin
rt_info.VPORT [rt_info.VP_COUNT]:=GPU_REGS.GET_VPORT (i);
rt_info.SCISSOR[rt_info.VP_COUNT]:=GPU_REGS.GET_SCISSOR(i);
Inc(rt_info.VP_COUNT);
end;
rt_info.RASTERIZATION:=GPU_REGS.GET_RASTERIZATION;
rt_info.MULTISAMPLE :=GPU_REGS.GET_MULTISAMPLE;
rt_info.PROVOKING:=ord(GPU_REGS.GET_PROVOKING);
rt_info.SCREEN_RECT:=GPU_REGS.GET_SCREEN;
rt_info.SCREEN_SIZE:=GPU_REGS.GET_SCREEN_SIZE;
//
pa.Init;
pp:=@pa;
rt_info.ShaderGroup:=FetchShaderGroupRT(GPU_REGS,pp);
Assert(rt_info.ShaderGroup<>nil);
//DumpShaderGroup(rt_info.ShaderGroup);
//
FUniformBuilder:=Default(TvUniformBuilder);
rt_info.ShaderGroup.ExportUnifBuilder(FUniformBuilder,@rt_info.USERDATA);
Init_Uniforms(node,FUniformBuilder);
end;
procedure t_pm4_stream.BuildDraw(ntype:t_pm4_node_type;
var SG_REG:TSH_REG_GFX_GROUP;
var CX_REG:TCONTEXT_REG_GROUP;
var UC_REG:TUSERCONFIG_REG_SHORT;
indexOffset:DWORD);
var
GPU_REGS:TGPU_REGS;
node:p_pm4_node_draw;
begin
GPU_REGS:=Default(TGPU_REGS);
GPU_REGS.SG_REG:=@SG_REG;
GPU_REGS.CX_REG:=@CX_REG;
GPU_REGS.UC_REG:=@UC_REG;
if DWORD(CX_REG.VGT_SHADER_STAGES_EN)<>0 then
begin
Writeln('Skip tessellation:0x',HexStr(DWORD(CX_REG.VGT_SHADER_STAGES_EN),8));
Exit;
end;
node:=allocator.Alloc(SizeOf(t_pm4_node_draw));
node^.ntype :=ntype;
node^.scope :=Default(t_pm4_resource_curr_scope);
Build_rt_info(node,node^.rt_info,GPU_REGS);
node^.indexBase :=CX_REG.VGT_DMA_BASE or (QWORD(CX_REG.VGT_DMA_BASE_HI.BASE_ADDR) shl 32);
node^.indexOffset :=indexOffset;
node^.vertexOffset:=CX_REG.VGT_INDX_OFFSET;
node^.indexCount :=UC_REG.VGT_NUM_INDICES;
node^.numInstances:=UC_REG.VGT_NUM_INSTANCES;
node^.INDEX_TYPE:=ord(GPU_REGS.GET_INDEX_TYPE);
node^.SWAP_MODE :=CX_REG.VGT_DMA_INDEX_TYPE.SWAP_MODE;
//heuristic
if (ntype=ntDrawIndexAuto) and
(node^.numInstances<=1) and
(node^.rt_info.RT_COUNT=0) and
(node^.rt_info.DB_ENABLE) and
(
((node^.rt_info.DB_INFO.DEPTH_USAGE and TM_CLEAR)<>0) or
((node^.rt_info.DB_INFO.STENCIL_USAGE and TM_CLEAR)<>0)
) then
if IsClearDepthShaders(node^.rt_info.ShaderGroup.FKey.FShaders) then
begin
//ClearDepthTarget
node^.ntype:=ntClearDepth;
end;
//clearRenderTarget
//VS 0xFE54CC4687E2FF59
//PS 0x91E6C1F562F6F2DE
add_node(node);
end;
procedure t_pm4_stream.DrawIndex2(var SG_REG:TSH_REG_GFX_GROUP;
var CX_REG:TCONTEXT_REG_GROUP;
var UC_REG:TUSERCONFIG_REG_SHORT);
begin
if ColorControl(CX_REG) then Exit;
BuildDraw(ntDrawIndex2,SG_REG,CX_REG,UC_REG,0);
end;
procedure t_pm4_stream.DrawIndexAuto(var SG_REG:TSH_REG_GFX_GROUP;
var CX_REG:TCONTEXT_REG_GROUP;
var UC_REG:TUSERCONFIG_REG_SHORT);
begin
if ColorControl(CX_REG) then Exit;
BuildDraw(ntDrawIndexAuto,SG_REG,CX_REG,UC_REG,0);
end;
procedure t_pm4_stream.DrawIndexOffset2(var SG_REG:TSH_REG_GFX_GROUP;
var CX_REG:TCONTEXT_REG_GROUP;
var UC_REG:TUSERCONFIG_REG_SHORT;
indexOffset:DWORD);
begin
if ColorControl(CX_REG) then Exit;
BuildDraw(ntDrawIndexOffset2,SG_REG,CX_REG,UC_REG,indexOffset);
end;
procedure t_pm4_stream.Build_cs_info(node:p_pm4_node_Dispatch;var GPU_REGS:TGPU_REGS);
var
dst:PGPU_USERDATA;
FUniformBuilder:TvUniformBuilder;
pa:TPushConstAllocator;
pp:PPushConstAllocator;
begin
//copy
node^.COMPUTE_GROUP:=GPU_REGS.SC_REG^;
//hack
dst:=Pointer(@node^.COMPUTE_GROUP.COMPUTE_USER_DATA)-Ptruint(@TGPU_USERDATA(nil^).A[vShaderStageCs]);
pa.Init;
pp:=@pa;
node^.ShaderGroup:=FetchShaderGroupCS(GPU_REGS,pp);
Assert(node^.ShaderGroup<>nil);
//
FUniformBuilder:=Default(TvUniformBuilder);
node^.ShaderGroup.ExportUnifBuilder(FUniformBuilder,dst);
Init_Uniforms(node,FUniformBuilder);
end;
procedure t_pm4_stream.DispatchDirect(var SC_REG:TSH_REG_COMPUTE_GROUP);
var
GPU_REGS:TGPU_REGS;
node:p_pm4_node_DispatchDirect;
begin
GPU_REGS:=Default(TGPU_REGS);
GPU_REGS.SC_REG:=@SC_REG;
node:=allocator.Alloc(SizeOf(t_pm4_node_DispatchDirect));
node^.ntype:=ntDispatchDirect;
node^.scope:=Default(t_pm4_resource_curr_scope);
Build_cs_info(node,GPU_REGS);
node^.DIM_X:=GPU_REGS.SC_REG^.COMPUTE_DIM_X;
node^.DIM_Y:=GPU_REGS.SC_REG^.COMPUTE_DIM_Y;
node^.DIM_Z:=GPU_REGS.SC_REG^.COMPUTE_DIM_Z;
add_node(node);
end;
procedure t_pm4_stream.DispatchIndirect(var SC_REG:TSH_REG_COMPUTE_GROUP;
BASE :QWORD;
Offset:DWORD);
var
GPU_REGS:TGPU_REGS;
node:p_pm4_node_DispatchIndirect;
begin
GPU_REGS:=Default(TGPU_REGS);
GPU_REGS.SC_REG:=@SC_REG;
node:=allocator.Alloc(SizeOf(t_pm4_node_DispatchIndirect));
node^.ntype:=ntDispatchIndirect;
node^.scope:=Default(t_pm4_resource_curr_scope);
Build_cs_info(node,GPU_REGS);
node^.BASE :=BASE;
node^.Offset:=Offset;
if (BASE<>0) then
begin
insert_buffer_resource(@node^.scope,
R_BUF,
Pointer(BASE+Offset),
3*4,
TM_READ,
'DispatchIndirect');
end;
add_node(node);
end;
procedure t_pm4_stream.PfpSyncMe(event:PRTLEvent);
var
node:p_pm4_node_PfpSyncMe;
begin
node:=allocator.Alloc(SizeOf(t_pm4_node_PfpSyncMe));
node^.ntype:=ntPfpSyncMe;
node^.scope:=Default(t_pm4_resource_curr_scope);
node^.event:=event;
add_node(node);
end;
//
procedure t_cache_block_allocator.init;
begin
queue.Create;
xlock:=nil;
count:=0;
end;
Function t_cache_block_allocator.Alloc:Pointer;
begin
Result:=nil;
if (XCHG(xlock,Pointer(1))=nil) then
begin
if queue.Pop(Result) then
begin
XCHG(xlock,nil);
//
System.InterlockedDecrement64(count);
Exit;
end;
XCHG(xlock,nil);
end;
//
Result:=kmem_alloc(mem_size,VM_RW);
Assert(Result<>nil);
end;
Procedure t_cache_block_allocator.Free(node:Pointer);
begin
if (node=nil) then Exit;
//
if (count+1<=max_count) then
begin
if (System.InterlockedIncrement64(count)<=max_count) then
begin
queue.Push(node);
Exit;
end else
begin
System.InterlockedDecrement64(count);
end;
end;
//
kmem_free(node,mem_size);
end;
//
Function t_pm4_allocator.Alloc(Size:ptruint):Pointer;
var
mem_size:ptruint;
node:PAllocNode;
begin
if (pHead.slh_first=nil) or (Size>curr_size) then
begin
if (Size>(cache_block_allocator.mem_size-SizeOf(TAllocNode))) then
begin
mem_size:=Align(Size+SizeOf(TAllocNode),64*1024);
//
node:=kmem_alloc(mem_size,VM_RW);
Assert(node<>nil);
end else
begin
mem_size:=cache_block_allocator.mem_size;
//
node:=cache_block_allocator.Alloc;
end;
node^.size:=mem_size;
SLIST_INSERT_HEAD(@pHead,node,@node^.link);
curr_apos:=0;
curr_size:=mem_size-SizeOf(TAllocNode);
Inc(full_size,mem_size);
end;
node:=SLIST_FIRST(@pHead);
Result:=@PByte(@node^.data)[curr_apos];
Inc(used_size,Size);
Size:=Align(Size,SizeOf(ptruint));
Inc(curr_apos,Size);
Dec(curr_size,Size);
end;
Procedure t_pm4_allocator.Free;
var
node:PAllocNode;
begin
node:=SLIST_FIRST(@pHead);
While (node<>nil) do
begin
SLIST_REMOVE(@pHead,node,@node^.link);
if (node^.size=cache_block_allocator.mem_size) then
begin
cache_block_allocator.Free(node);
end else
begin
kmem_free(node,node^.size);
end;
node:=SLIST_FIRST(@pHead);
end;
Self:=Default(t_pm4_allocator);
end;
initialization
cache_block_allocator.Init;
end.