mirror of https://github.com/red-prig/fpPS4.git
3780 lines
80 KiB
Plaintext
3780 lines
80 KiB
Plaintext
unit pm4_me;
|
|
|
|
{$mode ObjFPC}{$H+}
|
|
{$CALLING SysV_ABI_CDecl}
|
|
|
|
interface
|
|
|
|
uses
|
|
sysutils,
|
|
TypInfo,
|
|
mqueue,
|
|
LFQueue,
|
|
|
|
sys_eventvar,
|
|
|
|
si_ci_vi_merged_enum,
|
|
si_ci_vi_merged_groups,
|
|
|
|
sys_bootparam,
|
|
host_ipc_interface,
|
|
md_sleep,
|
|
|
|
Vulkan,
|
|
vDevice,
|
|
vMemory,
|
|
vBuffer,
|
|
vHostBufferManager,
|
|
vImage,
|
|
vImageManager,
|
|
vRender,
|
|
vRenderPassManager,
|
|
vPipelineManager,
|
|
vFramebufferManager,
|
|
vShader,
|
|
vShaderExt,
|
|
vShaderManager,
|
|
vRegs2Vulkan,
|
|
vCmdBuffer,
|
|
vDescriptorSet,
|
|
vSampler,
|
|
vSamplerManager,
|
|
vMetaManager,
|
|
|
|
vImageTiling,
|
|
vDependence,
|
|
|
|
renderdoc,
|
|
|
|
sys_event,
|
|
time,
|
|
md_time,
|
|
kern_thr,
|
|
sched_ule,
|
|
pm4defs,
|
|
pm4_stream;
|
|
|
|
function gc_add_internal_ptr (kq,ptr,udata:Pointer):Integer; register; external;
|
|
function gc_del_internal_ptr (kq,ptr:Pointer):Integer; register; external;
|
|
procedure gc_wakeup_internal_ptr(ptr:Pointer); register; external;
|
|
|
|
Const
|
|
CONST_RAM_SIZE=48*1024;
|
|
|
|
type
|
|
t_on_submit_flip_eop=function(submit_id:QWORD):Integer;
|
|
|
|
p_pm4_stall=^t_pm4_stall;
|
|
t_pm4_stall=record
|
|
next:p_pm4_stall;
|
|
//
|
|
list:TAILQ_HEAD; //p_pm4_stream
|
|
//
|
|
count:Ptruint;
|
|
flow :Ptruint;
|
|
end;
|
|
|
|
p_me_wait_addr=^t_me_wait_addr;
|
|
t_me_wait_addr=object
|
|
Fcode_addr:Pointer;
|
|
Fdmem_addr:Pointer;
|
|
Fregs_addr:Pointer;
|
|
//
|
|
procedure add_reg(kq:Pointer);
|
|
procedure del_reg(kq:Pointer);
|
|
procedure set_adr(kq,addr:Pointer);
|
|
end;
|
|
|
|
p_pm4_me=^t_pm4_me;
|
|
t_pm4_me=object
|
|
//
|
|
queue:TIntrusiveMPSCQueue; //p_pm4_stream
|
|
//
|
|
stall:array[t_pm4_stream_type] of t_pm4_stall;
|
|
//
|
|
sheduler:record
|
|
start :p_pm4_stall;
|
|
switch:Boolean;
|
|
count :Byte;
|
|
end;
|
|
//
|
|
//event:PRTLEvent;
|
|
on_idle:TProcedure;
|
|
on_submit_flip_eop:t_on_submit_flip_eop;
|
|
//
|
|
started:Pointer;
|
|
td:p_kthread;
|
|
//
|
|
gc_knlist:p_knlist;
|
|
gc_kqueue:p_kqueue;
|
|
//
|
|
wait_ptr:array[t_pm4_stream_type] of t_me_wait_addr;
|
|
//
|
|
imdone_count:QWORD;
|
|
//
|
|
CONST_RAM:array[0..CONST_RAM_SIZE-1] of Byte; //48KB
|
|
//
|
|
CE_COUNT:DWORD;
|
|
DE_COUNT:DWORD;
|
|
//
|
|
procedure Init(knlist:p_knlist);
|
|
procedure start;
|
|
procedure trigger;
|
|
procedure wait;
|
|
procedure imdone;
|
|
procedure knote_eventid(event_id,me_id:Byte;timestamp:QWORD;lockflags:Integer);
|
|
procedure Push(var stream:t_pm4_stream);
|
|
procedure reset_sheduler;
|
|
procedure set_step(s:t_pm4_stream_type);
|
|
procedure next_step;
|
|
function next_task:Boolean;
|
|
procedure switch_task;
|
|
procedure add_stream (stream:p_pm4_stream);
|
|
function get_next :p_pm4_stream;
|
|
procedure remove_stream(stream:p_pm4_stream);
|
|
end;
|
|
|
|
PvCmdFreeNode=^TvCmdFreeNode;
|
|
TvCmdFreeNode=record
|
|
entry:STAILQ_ENTRY;
|
|
FCmd :TVkCommandBuffer;
|
|
end;
|
|
|
|
TvCmdCachedPool=class(TvCmdPool)
|
|
FMemCache:STAILQ_HEAD; //PvCmdFreeNode
|
|
FDeffered:STAILQ_HEAD; //PvCmdFreeNode
|
|
FTrimCount:Integer;
|
|
Constructor Create(FFamily:TVkUInt32);
|
|
procedure Free(cmd:TVkCommandBuffer); register; override;
|
|
procedure Trim; register; override;
|
|
end;
|
|
|
|
t_pool_line=array[0..3] of TvCustomCmdPool;
|
|
t_pool_cache=object
|
|
queue:TvQueue;
|
|
line :t_pool_line;
|
|
last :TvCustomCmdPool;
|
|
Procedure Init(Q:TvQueue);
|
|
function fetch(i:QWORD):TvCustomCmdPool;
|
|
procedure trim;
|
|
procedure trim_all;
|
|
end;
|
|
|
|
TvStreamCmdBuffer=class(TvCmdBuffer)
|
|
entry :TAILQ_ENTRY; //stall
|
|
stream:p_pm4_stream;
|
|
//
|
|
function OnAlloc(size:Ptruint):Pointer; register; override;
|
|
Procedure OnFree (P:Pointer ); register; override;
|
|
function IsLinearAlloc:Boolean; register; override;
|
|
end;
|
|
|
|
t_me_render_context=object
|
|
me :p_pm4_me;
|
|
stream :p_pm4_stream;
|
|
node :p_pm4_node;
|
|
//
|
|
rel_time:QWORD;
|
|
//
|
|
rt_info :p_pm4_rt_info;
|
|
Render :TvRenderPassBeginInfo;
|
|
//
|
|
gfx_pool:t_pool_cache;
|
|
//
|
|
Cmd :TvStreamCmdBuffer;
|
|
stall :array[t_pm4_stream_type] of TAILQ_HEAD; //TvStreamCmdBuffer
|
|
//
|
|
dep:TvDependenciesObject;
|
|
images_size:QWORD;
|
|
buffer_size:QWORD;
|
|
//
|
|
procedure Init;
|
|
procedure BeginCmdBuffer;
|
|
procedure FinishCmdBuffer;
|
|
function CmdStatus(i:t_pm4_stream_type):TVkResult;
|
|
function PingCmd:Boolean;
|
|
function WaitConfirm:Boolean;
|
|
function WaitConfirmOrSwitch:Boolean;
|
|
Procedure InsertLabel(pLabelName:PVkChar);
|
|
Procedure BeginLabel(pLabelName:PVkChar);
|
|
Procedure EndLabel();
|
|
//
|
|
procedure switch_task;
|
|
procedure complete_and_next_task;
|
|
procedure on_idle;
|
|
|
|
Procedure RefToParent(obj:TvRefsObject); register;
|
|
procedure FlushParent; register;
|
|
end;
|
|
|
|
var
|
|
use_renderdoc_capture:Boolean=False;
|
|
act_renderdoc_capture:Boolean=False;
|
|
wait_loop_detect :Boolean=True;
|
|
wait_loop_autoskip :Boolean=False;
|
|
|
|
implementation
|
|
|
|
uses
|
|
windows,
|
|
kern_dmem,
|
|
kern_proc,
|
|
vm_map,
|
|
vm_tracking_map,
|
|
dev_dce;
|
|
|
|
function GetAsyncKeyState(vKey:longint):Boolean; inline;
|
|
begin
|
|
Result:=(Windows.GetKeyState(vKey) and $8000)<>0;
|
|
end;
|
|
|
|
procedure StartFrameCapture;
|
|
begin
|
|
if use_renderdoc_capture then
|
|
begin
|
|
|
|
if GetAsyncKeyState(VK_F1) then
|
|
begin
|
|
act_renderdoc_capture:=True;
|
|
end;
|
|
|
|
if GetAsyncKeyState(VK_F2) then
|
|
begin
|
|
act_renderdoc_capture:=False;
|
|
end;
|
|
|
|
if act_renderdoc_capture then
|
|
begin
|
|
if (renderdoc.IsFrameCapturing()=0) then
|
|
begin
|
|
SetCaptureOptionU32(eRENDERDOC_Option_RefAllResources,1);
|
|
renderdoc.StartFrameCapture(0,0);
|
|
end;
|
|
end else
|
|
begin
|
|
if (renderdoc.IsFrameCapturing()<>0) then
|
|
begin
|
|
renderdoc.EndFrameCapture(0,0);
|
|
end;
|
|
end;
|
|
|
|
end;
|
|
end;
|
|
|
|
procedure EndFrameCapture;
|
|
begin
|
|
if use_renderdoc_capture then
|
|
begin
|
|
if (renderdoc.IsFrameCapturing()<>0) then
|
|
begin
|
|
renderdoc.EndFrameCapture(0,0);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure t_pm4_me.Init(knlist:p_knlist);
|
|
var
|
|
i:t_pm4_stream_type;
|
|
begin
|
|
queue.Create;
|
|
|
|
for i:=Low(t_pm4_stream_type) to High(t_pm4_stream_type) do
|
|
begin
|
|
if (i=High(t_pm4_stream_type)) then
|
|
begin
|
|
stall[i].next:=@stall[Low(t_pm4_stream_type)];
|
|
end else
|
|
begin
|
|
stall[i].next:=@stall[Succ(i)];
|
|
end;
|
|
//
|
|
TAILQ_INIT(@stall[i].list);
|
|
end;
|
|
|
|
gc_knlist:=knlist;
|
|
|
|
gc_kqueue:=kern_kqueue2('[gc_kqueue]',nil,nil);
|
|
gc_add_internal_ptr(gc_kqueue,@queue,@queue);
|
|
end;
|
|
|
|
procedure pm4_me_thread(me:p_pm4_me); SysV_ABI_CDecl; forward;
|
|
|
|
procedure t_pm4_me.start;
|
|
begin
|
|
if (XCHG(started,Pointer(1))=nil) then
|
|
begin
|
|
//event:=RTLEventCreate;
|
|
//
|
|
kthread_add(@pm4_me_thread,@self,@td,(8*1024*1024) div (16*1024),'[GFX_ME]');
|
|
end;
|
|
end;
|
|
|
|
procedure t_pm4_me.trigger;
|
|
begin
|
|
if (gc_kqueue<>nil) then
|
|
begin
|
|
gc_wakeup_internal_ptr(@queue);
|
|
end;
|
|
|
|
{
|
|
if (event<>nil) then
|
|
begin
|
|
RTLEventSetEvent(event);
|
|
end;
|
|
}
|
|
end;
|
|
|
|
procedure t_pm4_me.wait;
|
|
var
|
|
kev:array[0..15] of t_kevent;
|
|
t:timespec;
|
|
i,r:Integer;
|
|
wait_addr:p_me_wait_addr;
|
|
wmin_addr:p_me_wait_addr;
|
|
begin
|
|
t:=Default(timespec);
|
|
|
|
t.tv_sec :=0;
|
|
t.tv_nsec:=1000000000 div 1000;
|
|
|
|
r:=0;
|
|
|
|
if (gc_kqueue<>nil) then
|
|
begin
|
|
kern_kevent2(gc_kqueue,nil,0,@kev,Length(kev),@t,@r);
|
|
end;
|
|
|
|
wmin_addr:=nil;
|
|
|
|
if (r<>0) then
|
|
For i:=0 to r-1 do
|
|
begin
|
|
if (kev[i].udata=@queue) then
|
|
begin
|
|
//
|
|
end else
|
|
begin
|
|
wait_addr:=kev[i].udata;
|
|
if (wmin_addr=nil) or
|
|
(wmin_addr>wait_addr) then
|
|
begin
|
|
wmin_addr:=wait_addr
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
if (wmin_addr<>nil) then
|
|
begin
|
|
i:=(PtrUint(wmin_addr)-PtrUint(@wait_ptr)) div SizeOf(t_me_wait_addr);
|
|
set_step(t_pm4_stream_type(i));
|
|
end;
|
|
end;
|
|
|
|
procedure t_pm4_me.imdone;
|
|
begin
|
|
System.InterlockedIncrement64(imdone_count);
|
|
trigger;
|
|
end;
|
|
|
|
procedure t_pm4_me.knote_eventid(event_id,me_id:Byte;timestamp:QWORD;lockflags:Integer);
|
|
begin
|
|
knote(gc_knlist, event_id or (me_id shl 8) or (timestamp shl 16), lockflags);
|
|
end;
|
|
|
|
procedure t_pm4_me.Push(var stream:t_pm4_stream);
|
|
var
|
|
node:p_pm4_stream;
|
|
buft:t_pm4_stream_type;
|
|
begin
|
|
if (stream.First=nil) then Exit;
|
|
//self alloc
|
|
node:=stream.allocator.Alloc(SizeOf(t_pm4_stream));
|
|
//
|
|
node^:=stream;
|
|
//
|
|
buft:=stream.buft;
|
|
stream:=Default(t_pm4_stream);
|
|
stream.buft:=buft;
|
|
//
|
|
queue.Push(node);
|
|
//
|
|
start;
|
|
//
|
|
trigger;
|
|
end;
|
|
|
|
procedure t_pm4_me.reset_sheduler;
|
|
begin
|
|
//reset stall iterator
|
|
sheduler.start :=@stall[Low(t_pm4_stream_type)];
|
|
sheduler.switch:=False;
|
|
sheduler.count :=0;
|
|
end;
|
|
|
|
procedure t_pm4_me.set_step(s:t_pm4_stream_type);
|
|
begin
|
|
sheduler.start :=@stall[s];
|
|
sheduler.switch:=False;
|
|
sheduler.count :=0;
|
|
end;
|
|
|
|
procedure t_pm4_me.next_step;
|
|
begin
|
|
//next
|
|
sheduler.start:=sheduler.start^.next;
|
|
//
|
|
if (sheduler.start^.flow=0) then
|
|
begin
|
|
sheduler.start^.flow:=sheduler.start^.count;
|
|
end;
|
|
end;
|
|
|
|
function t_pm4_me.next_task:Boolean;
|
|
begin
|
|
if TAILQ_EMPTY(@sheduler.start^.list) or
|
|
(sheduler.start^.flow=0) then
|
|
begin
|
|
//next
|
|
next_step;
|
|
//
|
|
Result:=True;
|
|
end else
|
|
begin
|
|
Dec(sheduler.start^.flow);
|
|
//
|
|
Result:=False;
|
|
end;
|
|
end;
|
|
|
|
procedure t_pm4_me.switch_task;
|
|
begin
|
|
sheduler.switch:=True;
|
|
//
|
|
Inc(sheduler.count);
|
|
//
|
|
if (sheduler.count=Length(stall)) then
|
|
begin
|
|
//next
|
|
next_step;
|
|
//wait
|
|
wait;
|
|
//msleep_td(hz div 10000);
|
|
//
|
|
sheduler.count:=0;
|
|
end else
|
|
begin
|
|
//next
|
|
next_step;
|
|
end;
|
|
end;
|
|
|
|
procedure t_pm4_me.add_stream(stream:p_pm4_stream);
|
|
var
|
|
i:t_pm4_stream_type;
|
|
begin
|
|
i:=stream^.buft;
|
|
TAILQ_INSERT_TAIL(@stall[i].list,stream,@stream^.next_);
|
|
//
|
|
Inc(stall[i].count);
|
|
//
|
|
stream^.Acquire; //stall
|
|
end;
|
|
|
|
function t_pm4_me.get_next:p_pm4_stream;
|
|
var
|
|
i:t_pm4_stream_type;
|
|
begin
|
|
for i:=Low(t_pm4_stream_type) to High(t_pm4_stream_type) do
|
|
begin
|
|
Result:=TAILQ_FIRST(@sheduler.start^.list);
|
|
if (Result<>nil) then Break;
|
|
//next
|
|
next_step;
|
|
end;
|
|
end;
|
|
|
|
procedure free_stream(stream:p_pm4_stream);
|
|
var
|
|
tmp:t_pm4_stream;
|
|
begin
|
|
tmp:=stream^;
|
|
tmp.Free;
|
|
end;
|
|
|
|
procedure t_pm4_me.remove_stream(stream:p_pm4_stream);
|
|
var
|
|
i:t_pm4_stream_type;
|
|
begin
|
|
//pop
|
|
i:=stream^.buft;
|
|
TAILQ_REMOVE(@stall[i].list,stream,@stream^.next_);
|
|
//
|
|
Dec(stall[i].count);
|
|
//
|
|
if stream^.Release then //stall
|
|
begin
|
|
//
|
|
free_stream(stream);
|
|
end;
|
|
end;
|
|
|
|
//
|
|
|
|
Constructor TvCmdCachedPool.Create(FFamily:TVkUInt32);
|
|
begin
|
|
inherited;
|
|
|
|
STAILQ_INIT(@FMemCache);
|
|
STAILQ_INIT(@FDeffered);
|
|
end;
|
|
|
|
procedure TvCmdCachedPool.Free(cmd:TVkCommandBuffer); register;
|
|
var
|
|
node:PvCmdFreeNode;
|
|
begin
|
|
if STAILQ_EMPTY(@FMemCache) then
|
|
begin
|
|
node:=AllocMem(SizeOf(TvCmdFreeNode));
|
|
end else
|
|
begin
|
|
node:=STAILQ_FIRST(@FMemCache);
|
|
STAILQ_REMOVE(@FMemCache,node,@node^.entry);
|
|
end;
|
|
|
|
node^.FCmd:=cmd;
|
|
|
|
STAILQ_INSERT_TAIL(@FDeffered,node,@node^.entry);
|
|
end;
|
|
|
|
procedure TvCmdCachedPool.Trim; register;
|
|
var
|
|
node:PvCmdFreeNode;
|
|
begin
|
|
node:=STAILQ_FIRST(@FDeffered);
|
|
|
|
while (node<>nil) do
|
|
begin
|
|
STAILQ_REMOVE(@FDeffered,node,@node^.entry);
|
|
|
|
inherited Free(node^.FCmd);
|
|
|
|
STAILQ_INSERT_TAIL(@FMemCache,node,@node^.entry);
|
|
|
|
//
|
|
node:=STAILQ_FIRST(@FDeffered);
|
|
end;
|
|
|
|
Inc(FTrimCount);
|
|
|
|
if (FTrimCount>=5000) then
|
|
begin
|
|
FTrimCount:=0;
|
|
inherited Trim;
|
|
end;
|
|
end;
|
|
|
|
//
|
|
|
|
Procedure t_pool_cache.Init(Q:TvQueue);
|
|
begin
|
|
queue:=Q;
|
|
end;
|
|
|
|
function t_pool_cache.fetch(i:QWORD):TvCustomCmdPool;
|
|
var
|
|
p:Byte;
|
|
begin
|
|
p:=i mod Length(t_pool_line);
|
|
|
|
if (line[p]=nil) then
|
|
begin
|
|
line[p]:=TvCmdCachedPool.Create(queue.FFamily);
|
|
end;
|
|
|
|
if (last<>line[p]) then
|
|
begin
|
|
last:=line[p];
|
|
last.Trim;
|
|
end;
|
|
|
|
Result:=last;
|
|
end;
|
|
|
|
procedure t_pool_cache.trim;
|
|
begin
|
|
if (last<>nil) then
|
|
begin
|
|
last.Trim;
|
|
end;
|
|
end;
|
|
|
|
procedure t_pool_cache.trim_all;
|
|
var
|
|
i:Byte;
|
|
begin
|
|
For i:=0 to High(t_pool_line) do
|
|
begin
|
|
line[i].Trim;
|
|
end;
|
|
end;
|
|
|
|
//
|
|
|
|
function TvStreamCmdBuffer.OnAlloc(size:Ptruint):Pointer; register;
|
|
begin
|
|
Result:=stream^.allocator.Alloc(size);
|
|
FillChar(Result^,size,0);
|
|
end;
|
|
|
|
Procedure TvStreamCmdBuffer.OnFree(P:Pointer); register;
|
|
begin
|
|
//
|
|
end;
|
|
|
|
function TvStreamCmdBuffer.IsLinearAlloc:Boolean; register;
|
|
begin
|
|
Result:=True;
|
|
end;
|
|
|
|
procedure t_me_render_context.RefToParent(obj:TvRefsObject); register;
|
|
begin
|
|
if (dep=nil) then
|
|
begin
|
|
dep:=TvDependenciesObject.Create;
|
|
end;
|
|
dep.RefTo(obj);
|
|
//
|
|
if obj.InheritsFrom(TvCustomImage) then
|
|
begin
|
|
images_size:=images_size+TvCustomImage(obj).FSize;
|
|
end;
|
|
if obj.InheritsFrom(TvBuffer) then
|
|
begin
|
|
buffer_size:=buffer_size+TvBuffer(obj).FSize;
|
|
end;
|
|
end;
|
|
|
|
procedure t_me_render_context.FlushParent; register;
|
|
begin
|
|
if (dep<>nil) then
|
|
begin
|
|
dep.ReleaseAllDependencies(dep);
|
|
end;
|
|
images_size:=0;
|
|
buffer_size:=0;
|
|
end;
|
|
|
|
//
|
|
|
|
procedure t_me_render_context.Init;
|
|
var
|
|
i:t_pm4_stream_type;
|
|
begin
|
|
gfx_pool.Init(RenderQueue);
|
|
|
|
for i:=Low(t_pm4_stream_type) to High(t_pm4_stream_type) do
|
|
begin
|
|
TAILQ_INIT(@stall[i]);
|
|
end;
|
|
|
|
end;
|
|
|
|
procedure t_me_render_context.BeginCmdBuffer;
|
|
var
|
|
buft:t_pm4_stream_type;
|
|
imdone_count:QWORD;
|
|
Pool:TvCustomCmdPool;
|
|
begin
|
|
if (Cmd<>nil) then Exit; //Already allocated
|
|
|
|
buft:=stream^.buft;
|
|
//Select Vulkan compute only queue?
|
|
|
|
imdone_count:=me^.imdone_count;
|
|
|
|
Pool:=gfx_pool.fetch(imdone_count);
|
|
|
|
Cmd:=TvStreamCmdBuffer.Create(Pool,gfx_pool.queue);
|
|
Cmd.stream:=stream;
|
|
|
|
stream^.Acquire; //TvStreamCmdBuffer
|
|
end;
|
|
|
|
procedure free_cmd_buffer(cmd:TvStreamCmdBuffer);
|
|
var
|
|
stream:p_pm4_stream;
|
|
begin
|
|
stream:=cmd.stream;
|
|
|
|
//
|
|
cmd.ReleaseResource;
|
|
cmd.Free;
|
|
//
|
|
|
|
if stream^.Release then //TvStreamCmdBuffer
|
|
begin
|
|
free_stream(stream);
|
|
end;
|
|
end;
|
|
|
|
procedure pm4_Writeback_Finish(var ctx:t_me_render_context); forward;
|
|
|
|
//
|
|
procedure t_me_render_context.FinishCmdBuffer;
|
|
var
|
|
buft:t_pm4_stream_type;
|
|
|
|
r:TVkResult;
|
|
begin
|
|
if (Cmd=nil) then Exit;
|
|
|
|
pm4_Writeback_Finish(Self);
|
|
|
|
r:=Cmd.QueueSubmit;
|
|
|
|
if p_print_gpu_ops then
|
|
begin
|
|
Writeln('QueueSubmit:',r);
|
|
end;
|
|
|
|
if (r<>VK_SUCCESS) then
|
|
begin
|
|
EndFrameCapture;
|
|
PrintMemoryBudget;
|
|
Assert(false,'QueueSubmit');
|
|
end;
|
|
|
|
r:=Cmd.Status;
|
|
|
|
case r of
|
|
VK_SUCCESS :;
|
|
VK_NOT_READY:
|
|
begin
|
|
//insert
|
|
|
|
buft:=Cmd.stream^.buft;
|
|
|
|
TAILQ_INSERT_TAIL(@stall[buft],Cmd,@Cmd.entry);
|
|
|
|
Cmd:=nil;
|
|
|
|
Exit;
|
|
end;
|
|
else
|
|
Writeln(stderr,'last.Status=',r); //error
|
|
end;
|
|
|
|
free_cmd_buffer(Cmd);
|
|
|
|
Cmd:=nil;
|
|
end;
|
|
|
|
function t_me_render_context.CmdStatus(i:t_pm4_stream_type):TVkResult;
|
|
var
|
|
last:TvStreamCmdBuffer;
|
|
begin
|
|
last:=TvStreamCmdBuffer(TAILQ_FIRST(@stall[i]));
|
|
|
|
while (last<>nil) do
|
|
begin
|
|
|
|
Result:=last.Status;
|
|
|
|
case Result of
|
|
VK_SUCCESS :;
|
|
VK_NOT_READY:Exit;
|
|
else
|
|
Writeln(stderr,'last.Status=',Result); //error
|
|
end;
|
|
|
|
TAILQ_REMOVE(@stall[i],last,@last.entry);
|
|
|
|
free_cmd_buffer(last);
|
|
|
|
last:=TvStreamCmdBuffer(TAILQ_FIRST(@stall[i]));
|
|
end;
|
|
|
|
Result:=VK_SUCCESS;
|
|
end;
|
|
|
|
function t_me_render_context.PingCmd:Boolean;
|
|
var
|
|
i:t_pm4_stream_type;
|
|
begin
|
|
Result:=False;
|
|
for i:=Low(t_pm4_stream_type) to High(t_pm4_stream_type) do
|
|
begin
|
|
Result:=Result or (CmdStatus(i)=VK_NOT_READY);
|
|
end;
|
|
end;
|
|
|
|
function t_me_render_context.WaitConfirm:Boolean;
|
|
begin
|
|
gfx_pool.trim;
|
|
|
|
FinishCmdBuffer;
|
|
|
|
Result:=(CmdStatus(stream^.buft)<>VK_NOT_READY);
|
|
end;
|
|
|
|
function t_me_render_context.WaitConfirmOrSwitch:Boolean;
|
|
begin
|
|
gfx_pool.trim;
|
|
|
|
FinishCmdBuffer;
|
|
|
|
if (stream=nil) then Exit(True);
|
|
|
|
Result:=(CmdStatus(stream^.buft)<>VK_NOT_READY);
|
|
|
|
if not Result then
|
|
begin
|
|
switch_task;
|
|
end;
|
|
end;
|
|
|
|
Procedure t_me_render_context.InsertLabel(pLabelName:PVkChar);
|
|
begin
|
|
if (DebugReport.FCmdInsertDebugUtilsLabel=nil) then Exit;
|
|
|
|
if (Cmd=nil) then Exit;
|
|
|
|
BeginCmdBuffer;
|
|
|
|
Cmd.InsertLabel(pLabelName);
|
|
end;
|
|
|
|
Procedure t_me_render_context.BeginLabel(pLabelName:PVkChar);
|
|
begin
|
|
if (DebugReport.FCmdBeginDebugUtilsLabel=nil) then Exit;
|
|
|
|
if (Cmd=nil) then Exit;
|
|
|
|
BeginCmdBuffer;
|
|
|
|
Cmd.BeginLabel(pLabelName);
|
|
end;
|
|
|
|
Procedure t_me_render_context.EndLabel();
|
|
begin
|
|
if (Cmd=nil) then Exit;
|
|
|
|
Cmd.EndLabel();
|
|
end;
|
|
|
|
procedure t_me_render_context.switch_task;
|
|
begin
|
|
FinishCmdBuffer;
|
|
//
|
|
me^.switch_task;
|
|
end;
|
|
|
|
procedure t_me_render_context.complete_and_next_task;
|
|
begin
|
|
FinishCmdBuffer;
|
|
//
|
|
me^.next_task;
|
|
end;
|
|
|
|
procedure t_me_render_context.on_idle;
|
|
begin
|
|
if (me^.on_idle<>nil) then
|
|
begin
|
|
me^.on_idle();
|
|
end;
|
|
end;
|
|
|
|
//
|
|
|
|
function GetMixedFlag(const curr:t_pm4_usage):Byte;
|
|
begin
|
|
if (PopCnt(DWORD(curr.img_usage))>1) then
|
|
begin
|
|
Result:=TM_MIXED;
|
|
end else
|
|
begin
|
|
Result:=0;
|
|
end;
|
|
end;
|
|
|
|
function GetImageLayout(const curr:t_pm4_usage):TVkImageLayout;
|
|
begin
|
|
if (PopCnt(DWORD(curr.img_usage))>1) then
|
|
begin
|
|
Result:=VK_IMAGE_LAYOUT_GENERAL;
|
|
end else
|
|
case t_image_usage(BsfDWord(DWORD(curr.img_usage))) of
|
|
iu_attachment:
|
|
begin
|
|
Result:=VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
|
|
end;
|
|
iu_depthstenc:
|
|
begin
|
|
if ((curr.shd_usage and (TM_WRITE or TM_CLEAR))<>0) then
|
|
begin
|
|
Result:=VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
|
end else
|
|
begin
|
|
Result:=VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
|
|
end;
|
|
end;
|
|
iu_sampled,
|
|
iu_storage:
|
|
begin
|
|
if ((curr.shd_usage and (TM_WRITE or TM_CLEAR))<>0) then
|
|
begin
|
|
Result:=VK_IMAGE_LAYOUT_GENERAL;
|
|
end else
|
|
begin
|
|
Result:=VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
|
end;
|
|
end;
|
|
iu_transfer:
|
|
begin
|
|
//mem_usage ???
|
|
if ((curr.mem_usage and (TM_WRITE or TM_READ))=(TM_WRITE or TM_READ)) then
|
|
begin
|
|
Result:=VK_IMAGE_LAYOUT_GENERAL;
|
|
end else
|
|
if ((curr.mem_usage and TM_WRITE)<>0) then
|
|
begin
|
|
Result:=VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
|
end else
|
|
begin
|
|
Result:=VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
|
|
end;
|
|
end;
|
|
else
|
|
Result:=VK_IMAGE_LAYOUT_UNDEFINED;
|
|
end;
|
|
end;
|
|
|
|
function ConvertRW(IMAGE_USAGE:Byte;R,W:TVkAccessFlagBits):TVkAccessFlags; inline;
|
|
begin
|
|
Result:=(ord(R)*ord((IMAGE_USAGE and TM_READ )<>0) ) or
|
|
(ord(W)*ord((IMAGE_USAGE and (TM_WRITE or TM_CLEAR))<>0) );
|
|
end;
|
|
|
|
function GetAccessMaskImg(const curr:t_pm4_usage):TVkAccessFlags;
|
|
begin
|
|
Result:=
|
|
ConvertRW(curr.shd_usage,VK_ACCESS_SHADER_READ_BIT ,VK_ACCESS_SHADER_WRITE_BIT ) or
|
|
ConvertRW(curr.clr_usage,VK_ACCESS_COLOR_ATTACHMENT_READ_BIT ,VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT ) or
|
|
ConvertRW(curr.dsa_usage,VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT,VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
|
|
end;
|
|
|
|
function GetAccessMaskBuf(const curr:t_pm4_usage):TVkAccessFlags;
|
|
begin
|
|
Result:=ConvertRW(curr.mem_usage,VK_ACCESS_SHADER_READ_BIT,VK_ACCESS_SHADER_WRITE_BIT);
|
|
end;
|
|
|
|
function GetStageMask(BindPoint:TVkPipelineBindPoint):TVkPipelineStageFlags;
|
|
begin
|
|
case BindPoint of
|
|
BP_GRAPHICS:Result:=ord(VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT);
|
|
BP_COMPUTE :Result:=ord(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
|
|
else Result:=ord(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
|
|
end;
|
|
end;
|
|
|
|
function AlignDw(addr:PtrUInt;alignment:PtrUInt):PtrUInt; inline;
|
|
begin
|
|
Result:=addr-(addr mod alignment);
|
|
end;
|
|
|
|
const
|
|
VK_ACCESS_BUF_ANY=ord(VK_ACCESS_MEMORY_READ_BIT) or ord(VK_ACCESS_MEMORY_WRITE_BIT);
|
|
VK_STAGE_BUF_ANY =ord(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
|
|
|
|
function _FetchImageForce(var ctx:t_me_render_context;const F:TvImageKey;usage:s_image_usage):TvImage2;
|
|
begin
|
|
repeat
|
|
|
|
Result:=FetchImage(ctx.Cmd,F,usage);
|
|
|
|
if (Result=nil) then
|
|
begin
|
|
repeat
|
|
msleep_td(hz div 10000);
|
|
until ctx.WaitConfirm;
|
|
ctx.BeginCmdBuffer;
|
|
end;
|
|
|
|
until (Result<>nil);
|
|
end;
|
|
|
|
function ConvertImage(var ctx:t_me_render_context;usage:s_image_usage;src:TvImage2;ToFormat:TVkFormat):TvImage2;
|
|
var
|
|
F:TvImageKey;
|
|
dst:TvImage2;
|
|
range:TVkImageCopy;
|
|
range_all:array[0..15] of TVkImageCopy;
|
|
i,m:Integer;
|
|
begin
|
|
Assert(src<>nil);
|
|
|
|
F:=src.key;
|
|
F.cformat:=ToFormat;
|
|
|
|
if not ExtractImage(src) then
|
|
begin
|
|
Assert(false,'ExtractImage');
|
|
end;
|
|
|
|
dst:=_FetchImageForce(ctx,F,usage);
|
|
|
|
src.PushBarrier(ctx.cmd,
|
|
ord(VK_ACCESS_TRANSFER_READ_BIT),
|
|
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
|
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
|
|
|
|
dst.PushBarrier(ctx.cmd,
|
|
ord(VK_ACCESS_TRANSFER_WRITE_BIT),
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
|
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
|
|
|
|
//
|
|
range.srcSubresource.aspectMask :=GetAspectMaskByFormat(src.key.cformat);
|
|
range.srcSubresource.mipLevel :=0;
|
|
range.srcSubresource.baseArrayLayer:=0;
|
|
range.srcSubresource.layerCount :=src.key.params.layerCount;
|
|
|
|
range.srcOffset.x:=0;
|
|
range.srcOffset.y:=0;
|
|
range.srcOffset.z:=0;
|
|
|
|
range.dstSubresource.aspectMask :=GetAspectMaskByFormat(dst.key.cformat);
|
|
range.dstSubresource.mipLevel :=0;
|
|
range.dstSubresource.baseArrayLayer:=0;
|
|
range.dstSubresource.layerCount :=dst.key.params.layerCount;
|
|
|
|
range.dstOffset.x:=0;
|
|
range.dstOffset.y:=0;
|
|
range.dstOffset.z:=0;
|
|
|
|
range.extent.width :=src.key.params.width;
|
|
range.extent.height:=src.key.params.height;
|
|
range.extent.depth :=src.key.params.depth;
|
|
//
|
|
|
|
m:=src.key.params.mipLevels;
|
|
For i:=0 to m-1 do
|
|
begin
|
|
range_all[i]:=range;
|
|
range_all[i].srcSubresource.mipLevel:=i;
|
|
range_all[i].dstSubresource.mipLevel:=i;
|
|
end;
|
|
|
|
ctx.Cmd.CopyImage(
|
|
src.FHandle,
|
|
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
|
dst.FHandle,
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
|
m,
|
|
@range_all[0]
|
|
);
|
|
|
|
Result:=dst;
|
|
end;
|
|
|
|
function FetchImageForce(var ctx:t_me_render_context;const F:TvImageKey;usage:s_image_usage):TvImage2;
|
|
begin
|
|
|
|
Result:=_FetchImageForce(ctx,F,usage);
|
|
|
|
//function ExtractImage(img:TvImage2):Boolean;
|
|
|
|
Assert(Result<>nil);
|
|
|
|
//TODO: more general type compatibility checking
|
|
case Result.FFormat of
|
|
//
|
|
VK_FORMAT_R32_UINT,
|
|
VK_FORMAT_R32_SINT,
|
|
VK_FORMAT_R32_SFLOAT:
|
|
if (iu_depthstenc in usage) then
|
|
begin
|
|
//R32 -> D32
|
|
Result:=ConvertImage(ctx,usage,Result,VK_FORMAT_D32_SFLOAT);
|
|
end;
|
|
//
|
|
VK_FORMAT_R16_UNORM,
|
|
VK_FORMAT_R16_SNORM,
|
|
VK_FORMAT_R16_UINT,
|
|
VK_FORMAT_R16_SINT,
|
|
VK_FORMAT_R16_SFLOAT:
|
|
if (iu_depthstenc in usage) then
|
|
begin
|
|
//R16 -> D16
|
|
Result:=ConvertImage(ctx,usage,Result,VK_FORMAT_D16_UNORM);
|
|
end;
|
|
//
|
|
VK_FORMAT_D32_SFLOAT:
|
|
if (iu_storage in usage) then
|
|
begin
|
|
//D32 -> R32
|
|
Result:=ConvertImage(ctx,usage,Result,VK_FORMAT_R32_SFLOAT);
|
|
end;
|
|
VK_FORMAT_D16_UNORM:
|
|
if (iu_storage in usage) then
|
|
begin
|
|
//D16 -> R16
|
|
Result:=ConvertImage(ctx,usage,Result,VK_FORMAT_R16_UNORM);
|
|
end
|
|
//
|
|
else;
|
|
end;
|
|
|
|
ctx.RefToParent(Result);
|
|
|
|
end;
|
|
|
|
procedure Prepare_Uniforms(var ctx:t_me_render_context;
|
|
BindPoint:TVkPipelineBindPoint;
|
|
var UniformBuilder:TvUniformBuilder);
|
|
var
|
|
i:Integer;
|
|
|
|
ri:TvImage2;
|
|
|
|
buf:TvHostBuffer;
|
|
diff_u:TVkDeviceSize;
|
|
diff_a:TVkDeviceSize;
|
|
|
|
resource_instance:p_pm4_resource_instance;
|
|
b:Boolean;
|
|
begin
|
|
//Writeln('[Prepare_Uniforms]->');
|
|
|
|
if (Length(UniformBuilder.FImages)<>0) then
|
|
begin
|
|
For i:=0 to High(UniformBuilder.FImages) do
|
|
With UniformBuilder.FImages[i] do
|
|
begin
|
|
|
|
if (FImage.params.invalid<>0) then
|
|
begin
|
|
//skip
|
|
Continue;
|
|
end;
|
|
|
|
resource_instance:=ctx.node^.scope.find_image_resource_instance(FImage);
|
|
|
|
if (resource_instance=nil) then
|
|
begin
|
|
case btype of
|
|
vbSampled:
|
|
begin
|
|
resource_instance:=ctx.stream^.insert_image_resource(
|
|
@ctx.node^.scope,
|
|
FImage,
|
|
memuse,
|
|
[iu_sampled],
|
|
'Prepare_Uniforms');
|
|
end;
|
|
vbStorage,
|
|
vbMipStorage:
|
|
begin
|
|
resource_instance:=ctx.stream^.insert_image_resource(
|
|
@ctx.node^.scope,
|
|
FImage,
|
|
memuse,
|
|
[iu_storage],
|
|
'Prepare_Uniforms');
|
|
end;
|
|
else
|
|
Assert(false);
|
|
end;
|
|
end;
|
|
|
|
Assert(resource_instance<>nil);
|
|
|
|
if not resource_instance^.prepared then
|
|
begin
|
|
resource_instance^.prepared:=true;
|
|
|
|
//ri:=TvImage2(resource_instance^.resource^.rimage);
|
|
ri:=nil;
|
|
|
|
if (ri<>nil) then
|
|
begin
|
|
ctx.Cmd.RefTo(ri);
|
|
end;
|
|
|
|
if (ri<>nil) then
|
|
if (ri.is_invalid) then
|
|
begin
|
|
resource_instance^.resource^.rimage:=nil;
|
|
ri:=nil;
|
|
end;
|
|
|
|
if (ri=nil) then
|
|
begin
|
|
ri:=FetchImageForce(ctx,
|
|
FImage,
|
|
resource_instance^.curr.img_usage);
|
|
|
|
resource_instance^.resource^.rimage:=ri;
|
|
end;
|
|
|
|
//Writeln(GetVkFormatStr(ri.key.cformat));
|
|
|
|
repeat
|
|
|
|
b:=pm4_load_from(ctx.Cmd,ri,resource_instance^.curr.mem_usage);
|
|
|
|
if (not b) then
|
|
begin
|
|
repeat until ctx.WaitConfirm;
|
|
ctx.BeginCmdBuffer;
|
|
end;
|
|
until (b);
|
|
|
|
ri.PushBarrier(ctx.Cmd,
|
|
GetAccessMaskImg(resource_instance^.curr),
|
|
GetImageLayout(resource_instance^.curr),
|
|
GetStageMask(BindPoint));
|
|
|
|
end;
|
|
|
|
end;
|
|
end;
|
|
|
|
//Buffers
|
|
|
|
//buffers
|
|
if (Length(UniformBuilder.FBuffers)<>0) then
|
|
begin
|
|
For i:=0 to High(UniformBuilder.FBuffers) do
|
|
With UniformBuilder.FBuffers[i] do
|
|
if (memuse and TM_INVAL)=0 then
|
|
begin
|
|
|
|
resource_instance:=ctx.node^.scope.find_buffer_resource_instance(R_BUF,addr,size);
|
|
|
|
if (resource_instance=nil) then
|
|
begin
|
|
resource_instance:=ctx.stream^.insert_buffer_resource(
|
|
@ctx.node^.scope,
|
|
R_BUF,
|
|
addr,
|
|
size,
|
|
memuse,
|
|
'Prepare_Uniforms');
|
|
end;
|
|
|
|
Assert(resource_instance<>nil);
|
|
|
|
if not resource_instance^.prepared then
|
|
begin
|
|
resource_instance^.prepared:=true;
|
|
|
|
//buf:=TvHostBuffer(resource_instance^.resource^.rimage);
|
|
buf:=nil;
|
|
|
|
if (buf<>nil) then
|
|
begin
|
|
ctx.Cmd.RefTo(buf);
|
|
end;
|
|
|
|
if (buf<>nil) then
|
|
if (buf.is_invalid) then
|
|
begin
|
|
resource_instance^.resource^.rimage:=nil;
|
|
buf:=nil;
|
|
end;
|
|
|
|
if (buf=nil) then
|
|
begin
|
|
|
|
repeat
|
|
|
|
buf:=FetchHostBuffer(ctx.Cmd,QWORD(addr),size);
|
|
|
|
if (buf=nil) then
|
|
begin
|
|
repeat until ctx.WaitConfirm;
|
|
ctx.BeginCmdBuffer;
|
|
end;
|
|
until (buf<>nil);
|
|
|
|
Assert(buf<>nil);
|
|
|
|
ctx.RefToParent(buf);
|
|
|
|
resource_instance^.resource^.rimage:=buf;
|
|
|
|
diff_u:=QWORD(addr)-buf.FAddr;
|
|
diff_a:=AlignDw(diff_u,limits.minStorageBufferOffsetAlignment);
|
|
|
|
//TODO: Barrier state cache
|
|
ctx.Cmd.BufferMemoryBarrier(buf.FHandle,
|
|
VK_ACCESS_BUF_ANY,
|
|
GetAccessMaskBuf(resource_instance^.curr),
|
|
diff_a,size,
|
|
VK_STAGE_BUF_ANY,
|
|
GetStageMask(BindPoint)
|
|
);
|
|
|
|
|
|
end;
|
|
|
|
end;
|
|
|
|
end;
|
|
end;
|
|
//buffers
|
|
|
|
//Writeln('<-[Prepare_Uniforms]');
|
|
end;
|
|
|
|
procedure BindMipStorage(var ctx:t_me_render_context;
|
|
fset,bind:TVkUInt32;
|
|
DescriptorGroup:TvDescriptorInterface;
|
|
ri:TvImage2;
|
|
const FView:TvImageViewKey;
|
|
Layout:TVkImageLayout);
|
|
var
|
|
i,p:Integer;
|
|
iv:TvImageView2;
|
|
aiv:array[0..15] of TVkImageView;
|
|
MView:TvImageViewKey;
|
|
begin
|
|
if (ri=nil) then
|
|
begin
|
|
|
|
For i:=0 to 15 do
|
|
begin
|
|
aiv[i]:=VK_NULL_HANDLE;
|
|
end;
|
|
|
|
end else
|
|
begin
|
|
|
|
p:=0;
|
|
For i:=FView.base_level to FView.last_level do
|
|
begin
|
|
MView:=FView;
|
|
MView.base_level:=i;
|
|
MView.last_level:=i;
|
|
//
|
|
iv:=ri.FetchView(ctx.Cmd,MView,iu_storage);
|
|
aiv[p]:=iv.FHandle;
|
|
//
|
|
Inc(p);
|
|
end;
|
|
|
|
//fill by 16?
|
|
|
|
while (p<16) do
|
|
begin
|
|
aiv[p]:=iv.FHandle;
|
|
//
|
|
Inc(p);
|
|
end;
|
|
|
|
end;
|
|
|
|
DescriptorGroup.BindStorages(fset,bind,
|
|
0,p,
|
|
@aiv[0],
|
|
Layout);
|
|
|
|
end;
|
|
|
|
Function get_bind_str(FBind:TvPointer):RawByteString;
|
|
begin
|
|
if (FBind.FMemory=nil) then
|
|
begin
|
|
Result:='(nil)';
|
|
end else
|
|
begin
|
|
Result:='0x'+HexStr(FBind.FMemory.FHandle,16);
|
|
end;
|
|
end;
|
|
|
|
procedure Bind_Uniforms(var ctx:t_me_render_context;
|
|
BindPoint:TVkPipelineBindPoint;
|
|
var UniformBuilder:TvUniformBuilder);
|
|
|
|
var
|
|
i:Integer;
|
|
|
|
DescriptorGroup:TvDescriptorInterface;
|
|
|
|
ri:TvImage2;
|
|
iv:TvImageView2;
|
|
sm:TvSampler;
|
|
|
|
buf:TvHostBuffer;
|
|
|
|
diff_u:TVkDeviceSize;
|
|
diff_a:TVkDeviceSize;
|
|
align :TVkDeviceSize;
|
|
range :TVkDeviceSize;
|
|
|
|
resource_instance:p_pm4_resource_instance;
|
|
|
|
Layout:TVkImageLayout;
|
|
begin
|
|
DescriptorGroup:=ctx.Cmd.FetchDescriptorInterface(BindPoint);
|
|
|
|
//images
|
|
if (Length(UniformBuilder.FImages)<>0) then
|
|
begin
|
|
For i:=0 to High(UniformBuilder.FImages) do
|
|
With UniformBuilder.FImages[i] do
|
|
begin
|
|
|
|
if (FImage.params.invalid<>0) then
|
|
begin
|
|
|
|
if (limits.nullDescriptor<>VK_TRUE) then
|
|
begin
|
|
Assert(false,'unsupported nullDescriptor');
|
|
end;
|
|
|
|
case btype of
|
|
vbSampled:
|
|
begin
|
|
DescriptorGroup.BindImage(fset,bind,
|
|
VK_NULL_HANDLE,
|
|
VK_IMAGE_LAYOUT_GENERAL);
|
|
end;
|
|
vbStorage:
|
|
begin
|
|
DescriptorGroup.BindStorage(fset,bind,
|
|
VK_NULL_HANDLE,
|
|
VK_IMAGE_LAYOUT_GENERAL);
|
|
end;
|
|
vbMipStorage:
|
|
begin
|
|
BindMipStorage(ctx,
|
|
fset,bind,
|
|
DescriptorGroup,
|
|
nil,
|
|
FView,
|
|
VK_IMAGE_LAYOUT_GENERAL);
|
|
|
|
end;
|
|
else
|
|
Assert(false);
|
|
end;
|
|
|
|
end else
|
|
begin
|
|
resource_instance:=ctx.node^.scope.find_image_resource_instance(FImage);
|
|
|
|
Assert(resource_instance<>nil);
|
|
|
|
//ri:=TvImage2(resource_instance^.resource^.rimage);
|
|
|
|
ri:=FetchImage(ctx.Cmd,
|
|
FImage,
|
|
resource_instance^.curr.img_usage
|
|
);
|
|
|
|
Assert(ri<>nil);
|
|
|
|
Layout:=GetImageLayout(resource_instance^.curr);
|
|
|
|
case btype of
|
|
vbSampled:
|
|
begin
|
|
iv:=ri.FetchView(ctx.Cmd,FView,iu_sampled);
|
|
Assert(iv<>nil);
|
|
|
|
Writeln('BindImage:->[',i,']'#13#10,
|
|
' 0x',HexStr(ri.FHandle,16),':',GetVkFormatStr(ri.key.cformat),':',ri.FName,'->'#13#10,
|
|
' 0x',HexStr(iv.FHandle,16),':',GetVkFormatStr(iv.key.cformat),':',iv.FName);
|
|
|
|
DescriptorGroup.BindImage(fset,bind,
|
|
iv.FHandle,
|
|
Layout);
|
|
end;
|
|
vbStorage:
|
|
begin
|
|
//reset dst_sel
|
|
FView.dstSel:=Default(TvDstSel);
|
|
//
|
|
|
|
iv:=ri.FetchView(ctx.Cmd,FView,iu_storage);
|
|
Assert(iv<>nil);
|
|
|
|
Writeln('BindStorage:->[',i,']'#13#10,
|
|
' 0x',HexStr(ri.FHandle,16),':',ri.key.cformat,':',ri.FName,'->'#13#10,
|
|
' 0x',HexStr(iv.FHandle,16),':',iv.key.cformat,':',iv.FName);
|
|
|
|
DescriptorGroup.BindStorage(fset,bind,
|
|
iv.FHandle,
|
|
Layout);
|
|
end;
|
|
vbMipStorage:
|
|
begin
|
|
//reset dst_sel
|
|
FView.dstSel:=Default(TvDstSel);
|
|
//
|
|
|
|
BindMipStorage(ctx,
|
|
fset,bind,
|
|
DescriptorGroup,
|
|
ri,
|
|
FView,
|
|
Layout);
|
|
|
|
end;
|
|
else
|
|
Assert(false);
|
|
end;
|
|
|
|
end;
|
|
|
|
end;
|
|
end;
|
|
//images
|
|
|
|
//samplers
|
|
if (Length(UniformBuilder.FSamplers)<>0) then
|
|
begin
|
|
For i:=0 to High(UniformBuilder.FSamplers) do
|
|
With UniformBuilder.FSamplers[i] do
|
|
begin
|
|
sm:=FetchSampler(ctx.Cmd,PS);
|
|
|
|
DescriptorGroup.BindSampler(fset,bind,sm.FHandle);
|
|
|
|
end;
|
|
end;
|
|
//samplers
|
|
|
|
//buffers
|
|
if (Length(UniformBuilder.FBuffers)<>0) then
|
|
begin
|
|
For i:=0 to High(UniformBuilder.FBuffers) do
|
|
With UniformBuilder.FBuffers[i] do
|
|
if (memuse and TM_INVAL)=0 then
|
|
begin
|
|
|
|
resource_instance:=ctx.node^.scope.find_buffer_resource_instance(R_BUF,addr,size);
|
|
|
|
{
|
|
if (resource_instance<>nil) then
|
|
begin
|
|
|
|
Writeln('rb:curr:',HexStr(resource_instance^.curr.mem_usage,1),
|
|
' prev:',HexStr(resource_instance^.prev.mem_usage,1),
|
|
' next:',HexStr(resource_instance^.next.mem_usage,1)
|
|
);
|
|
|
|
end;
|
|
}
|
|
|
|
buf:=FetchHostBuffer(ctx.Cmd,QWORD(addr),size);
|
|
|
|
Assert(buf<>nil);
|
|
|
|
diff_u:=QWORD(addr)-buf.FAddr;
|
|
diff_a:=AlignDw(diff_u,limits.minStorageBufferOffsetAlignment);
|
|
|
|
align:=diff_u-diff_a;
|
|
|
|
if (align<>offset) then
|
|
begin
|
|
Assert(false,'wrong buffer align '+IntToStr(align)+'<>'+IntToStr(offset));
|
|
end;
|
|
|
|
range:=size;
|
|
|
|
Writeln('BindBuffer:->[',i,':',bind,']',' 0x',HexStr(QWORD(addr),10),' ',get_bind_str(buf.FBind),#13#10,
|
|
' 0x',HexStr(buf.FHandle,16),':',buf.FName,'->[',diff_a,'..',diff_a+range,']');
|
|
|
|
DescriptorGroup.BindBuffer(fset,bind,
|
|
buf.FHandle,
|
|
diff_a,
|
|
range {VK_WHOLE_SIZE});
|
|
|
|
if ((memuse and TM_WRITE)<>0) then
|
|
begin
|
|
ctx.Cmd.AddPlannedTrigger(QWORD(addr),QWORD(addr)+size,nil);
|
|
end;
|
|
|
|
end;
|
|
end;
|
|
//buffers
|
|
|
|
end;
|
|
|
|
procedure Bind_Pushs(var ctx:t_me_render_context;
|
|
ShaderGroup:TvShaderGroup;
|
|
dst:PGPU_USERDATA);
|
|
const
|
|
bind_points:array[Boolean] of TVkPipelineBindPoint=(VK_PIPELINE_BIND_POINT_GRAPHICS,VK_PIPELINE_BIND_POINT_COMPUTE);
|
|
var
|
|
Shader:TvShaderExt;
|
|
i:TvShaderStage;
|
|
FData:PDWORD;
|
|
addr:Pointer;
|
|
begin
|
|
For i:=Low(TvShaderStage) to High(TvShaderStage) do
|
|
begin
|
|
Shader:=ShaderGroup.FKey.FShaders[i];
|
|
if (Shader<>nil) then
|
|
if (Shader.FPushConst.size<>0) then
|
|
begin
|
|
FData:=dst^.get_user_data(i);
|
|
addr :=Shader.GetPushConstData(FData);
|
|
|
|
Assert(addr<>nil,'push const is NULL');
|
|
|
|
ctx.Cmd.PushConstant(bind_points[Shader.FStage=VK_SHADER_STAGE_COMPUTE_BIT],
|
|
ord(Shader.FStage),
|
|
Shader.FPushConst.offset,
|
|
Shader.FPushConst.size,
|
|
addr);
|
|
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure pm4_InitStream(var ctx:t_me_render_context);
|
|
var
|
|
i:p_pm4_resource_instance;
|
|
resource:p_pm4_resource;
|
|
|
|
ri:TvImage2;
|
|
ht:TvMetaHtile;
|
|
hc:TvMetaCmask;
|
|
begin
|
|
if ctx.stream^.init then Exit;
|
|
|
|
i:=ctx.stream^.init_scope.first;
|
|
|
|
if (i=nil) then Exit;
|
|
|
|
while (i<>nil) do
|
|
begin
|
|
|
|
resource:=i^.resource;
|
|
|
|
if (resource^.rtype=R_IMG) and
|
|
(not resource^.rcombined) then
|
|
begin
|
|
|
|
//start on demaind
|
|
|
|
StartFrameCapture;
|
|
|
|
ctx.BeginCmdBuffer;
|
|
|
|
//
|
|
|
|
//Writeln('init_img:',HexStr(resource^.rkey.Addr),' ',(resource^.rkey.params.width),'x',(resource^.rkey.params.height));
|
|
|
|
//now preload only sampled image
|
|
if (resource^.uall.img_usage=[iu_sampled]) then
|
|
begin
|
|
ri:=FetchImage(ctx.Cmd,
|
|
resource^.rkey,
|
|
i^.curr.img_usage + i^.next.img_usage
|
|
);
|
|
|
|
if (ri=nil) then
|
|
begin
|
|
//NO MEM
|
|
Break;
|
|
end;
|
|
|
|
resource^.rimage:=ri;
|
|
|
|
//pm4_load_from(ctx.Cmd,ri,i^.curr.mem_usage);
|
|
end;
|
|
end else
|
|
if (resource^.rtype=R_HTILE) then
|
|
begin
|
|
|
|
//start on demaind
|
|
|
|
ctx.BeginCmdBuffer;
|
|
|
|
ht:=FetchHtile(ctx.Cmd,resource^.rkey,resource^.rsize);
|
|
|
|
resource^.rclear:=ht.rclear;
|
|
end else
|
|
if (resource^.rtype=R_CMASK) then
|
|
begin
|
|
|
|
//start on demaind
|
|
|
|
ctx.BeginCmdBuffer;
|
|
|
|
hc:=FetchCmask(ctx.Cmd,resource^.rkey,resource^.rsize);
|
|
|
|
resource^.rclear:=hc.rclear;
|
|
end;
|
|
|
|
i:=TAILQ_NEXT(i,@i^.init_entry);
|
|
end;
|
|
|
|
ctx.stream^.init:=True;
|
|
end;
|
|
|
|
|
|
procedure pm4_ClearDepth(var rt_info:t_pm4_rt_info;
|
|
var ctx:t_me_render_context);
|
|
var
|
|
ri:TvImage2;
|
|
iv:TvImageView2;
|
|
cclear:array[0..1] of Boolean;
|
|
range :TVkImageSubresourceRange;
|
|
begin
|
|
//ClearDepthTarget
|
|
|
|
ctx.Cmd.EndRenderPass;
|
|
|
|
ctx.Cmd.BeginLabel('ClearDepth');
|
|
|
|
ri:=FetchImageForce(ctx,
|
|
rt_info.DB_INFO.FImageInfo,
|
|
[iu_depthstenc]);
|
|
|
|
Assert(ri<>nil);
|
|
|
|
iv:=ri.FetchView(ctx.Cmd,rt_info.DB_INFO.FImageView,iu_depthstenc);
|
|
|
|
ctx.RefToParent(ri);
|
|
|
|
ri.PushBarrier(ctx.Cmd,
|
|
ord(VK_ACCESS_TRANSFER_WRITE_BIT),
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
|
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
|
|
|
|
cclear[0]:=((rt_info.DB_INFO.DEPTH_USAGE and TM_CLEAR)<>0) and
|
|
(GetDepthOnlyFormat (ri.key.cformat)<>VK_FORMAT_UNDEFINED);
|
|
|
|
cclear[1]:=((rt_info.DB_INFO.STENCIL_USAGE and TM_CLEAR)<>0) and
|
|
(GetStencilOnlyFormat(ri.key.cformat)<>VK_FORMAT_UNDEFINED);
|
|
|
|
range:=iv.GetSubresRange;
|
|
|
|
range.aspectMask:=(ord(VK_IMAGE_ASPECT_DEPTH_BIT )*ord(cclear[0])) or
|
|
(ord(VK_IMAGE_ASPECT_STENCIL_BIT)*ord(cclear[1]));
|
|
|
|
ctx.Cmd.ClearDepthStencilImage(ri.FHandle,
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
|
@rt_info.DB_INFO.CLEAR_VALUE.depthStencil,
|
|
range);
|
|
|
|
ctx.Cmd.EndLabel();
|
|
|
|
ctx.FlushParent;
|
|
end;
|
|
|
|
procedure DumpShaderGroup(ShaderGroup:TvShaderGroup);
|
|
var
|
|
i:TvShaderStage;
|
|
str:RawByteString;
|
|
begin
|
|
str:='[DumpShaderGroup]'#13#10;
|
|
For i:=Low(TvShaderStage) to High(TvShaderStage) do
|
|
if (ShaderGroup.FKey.FShaders[i]<>nil) then
|
|
begin
|
|
str:=str+' ('+HexStr(ShaderGroup.FKey.FShaders[i].FHash_gcn,16)+') '+GetDumpSpvName(i,ShaderGroup.FKey.FShaders[i].FHash_spv)+#13#10;
|
|
end;
|
|
|
|
Writeln(stderr,str);
|
|
end;
|
|
|
|
procedure pm4_DrawPrepare(var ctx:t_me_render_context);
|
|
var
|
|
i:Integer;
|
|
|
|
FAttrBuilder:TvAttrBuilder;
|
|
|
|
FUniformBuilder:TvUniformBuilder;
|
|
|
|
RP_KEY:TvRenderPassKey;
|
|
RP:TvRenderPass2;
|
|
|
|
GP_KEY:TvGraphicsPipelineKey;
|
|
GP:TvGraphicsPipeline2;
|
|
|
|
FB_KEY:TvFramebufferImagelessKey;
|
|
FB_KEY2:TvFramebufferBindedKey;
|
|
FB:TvFramebuffer;
|
|
|
|
ri:TvImage2;
|
|
rd:TvCustomImage2;
|
|
rs:TvCustomImage2;
|
|
iv:TvImageView2;
|
|
|
|
color_instance:array[0..7] of p_pm4_resource_instance;
|
|
|
|
flag:Integer;
|
|
img_usage:s_image_usage;
|
|
|
|
meta_instance:p_pm4_resource_instance;
|
|
d_instance:p_pm4_resource_instance;
|
|
s_instance:p_pm4_resource_instance;
|
|
|
|
//
|
|
GPU_REGS:TGPU_REGS;
|
|
CX_REG :TCONTEXT_REG_GROUP; // 0xA000
|
|
|
|
pa:TPushConstAllocator;
|
|
pp:PPushConstAllocator;
|
|
begin
|
|
//recheck shaders
|
|
GPU_REGS.SG_REG:=@ctx.rt_info^.SHADERDATA.SG_REG;
|
|
GPU_REGS.CX_REG:=@CX_REG;
|
|
GPU_REGS.UC_REG:=@ctx.rt_info^.SHADERDATA.UC_REG;
|
|
|
|
CX_REG:=Default(TCONTEXT_REG_GROUP);
|
|
|
|
CX_REG.SPI_PS_INPUT_ENA :=ctx.rt_info^.SHADERDATA.SPI_PS_INPUT_ENA ;
|
|
CX_REG.SPI_PS_INPUT_ADDR :=ctx.rt_info^.SHADERDATA.SPI_PS_INPUT_ADDR ;
|
|
CX_REG.SPI_INTERP_CONTROL_0 :=ctx.rt_info^.SHADERDATA.SPI_INTERP_CONTROL_0 ;
|
|
CX_REG.SPI_PS_IN_CONTROL :=ctx.rt_info^.SHADERDATA.SPI_PS_IN_CONTROL ;
|
|
CX_REG.SPI_PS_INPUT_CNTL :=ctx.rt_info^.SHADERDATA.SPI_PS_INPUT_CNTL ;
|
|
CX_REG.DB_SHADER_CONTROL :=ctx.rt_info^.SHADERDATA.DB_SHADER_CONTROL ;
|
|
CX_REG.VGT_INSTANCE_STEP_RATE_0:=ctx.rt_info^.SHADERDATA.VGT_INSTANCE_STEP_RATE_0;
|
|
CX_REG.VGT_INSTANCE_STEP_RATE_1:=ctx.rt_info^.SHADERDATA.VGT_INSTANCE_STEP_RATE_1;
|
|
CX_REG.RENDER_TARGET :=ctx.rt_info^.SHADERDATA.RENDER_TARGET ;
|
|
|
|
pa.Init;
|
|
pp:=@pa;
|
|
|
|
ctx.rt_info^.ShaderGroup:=FetchShaderGroupRT(GPU_REGS,pp);
|
|
Assert(ctx.rt_info^.ShaderGroup<>nil);
|
|
//recheck shaders
|
|
|
|
RP_KEY.Clear;
|
|
|
|
if (ctx.rt_info^.RT_COUNT<>0) then
|
|
For i:=0 to ctx.rt_info^.RT_COUNT-1 do
|
|
begin
|
|
|
|
if (ctx.rt_info^.RT_INFO[i].CMASK_INFO.KEY.Addr<>nil) then
|
|
begin
|
|
|
|
meta_instance:=ctx.node^.scope.find_buffer_resource_instance(R_CMASK,
|
|
ctx.rt_info^.RT_INFO[i].CMASK_INFO.KEY.Addr,
|
|
ctx.rt_info^.RT_INFO[i].CMASK_INFO.SIZE);
|
|
Assert(meta_instance<>nil);
|
|
|
|
if meta_instance^.resource^.rclear then
|
|
begin
|
|
//-TM_READ +TM_CLEAR
|
|
ctx.rt_info^.RT_INFO[i].IMAGE_USAGE:=ctx.rt_info^.RT_INFO[i].IMAGE_USAGE and (not TM_READ) or TM_CLEAR;
|
|
|
|
meta_instance^.resource^.rclear:=False;
|
|
end;
|
|
|
|
end;
|
|
|
|
if (ctx.rt_info^.RT_INFO[i].FImageInfo.params.invalid<>0) then
|
|
begin
|
|
//skip
|
|
color_instance[i]:=nil;
|
|
end else
|
|
begin
|
|
color_instance[i]:=ctx.node^.scope.find_image_resource_instance(ctx.rt_info^.RT_INFO[i].FImageInfo);
|
|
|
|
Assert(color_instance[i]<>nil);
|
|
end;
|
|
|
|
//TODO: fixup cformat
|
|
|
|
flag:=0;
|
|
if (color_instance[i]<>nil) then
|
|
begin
|
|
flag:=GetMixedFlag(color_instance[i]^.curr);
|
|
end;
|
|
|
|
//TODO: fixup cformat
|
|
|
|
RP_KEY.AddColorAt(ctx.rt_info^.RT_INFO[i].attachment,
|
|
ctx.rt_info^.RT_INFO[i].FImageInfo.cformat,
|
|
ctx.rt_info^.RT_INFO[i].IMAGE_USAGE or
|
|
flag,
|
|
ctx.rt_info^.RT_INFO[i].FImageInfo.params.samples);
|
|
|
|
end;
|
|
|
|
if ctx.rt_info^.DB_ENABLE then
|
|
begin
|
|
|
|
//set clear flag on cleared htile
|
|
if (ctx.rt_info^.DB_INFO.HTILE_INFO.TILE_SURFACE_ENABLE<>0) then
|
|
begin
|
|
meta_instance:=ctx.node^.scope.find_buffer_resource_instance(R_HTILE,
|
|
ctx.rt_info^.DB_INFO.HTILE_INFO.KEY.Addr,
|
|
ctx.rt_info^.DB_INFO.HTILE_INFO.SIZE);
|
|
Assert(meta_instance<>nil);
|
|
|
|
if meta_instance^.resource^.rclear then
|
|
begin
|
|
//-TM_READ +TM_CLEAR
|
|
ctx.rt_info^.DB_INFO.DEPTH_USAGE:=ctx.rt_info^.DB_INFO.DEPTH_USAGE and (not TM_READ) or TM_CLEAR;
|
|
|
|
meta_instance^.resource^.rclear:=False;
|
|
end;
|
|
|
|
end;
|
|
|
|
//TODO: fixup cformat
|
|
|
|
RP_KEY.AddDepthAt(ctx.rt_info^.RT_COUNT, //add to last attachment id
|
|
ctx.rt_info^.DB_INFO.FImageInfo.cformat,
|
|
ctx.rt_info^.DB_INFO.DEPTH_USAGE,
|
|
ctx.rt_info^.DB_INFO.STENCIL_USAGE,
|
|
ctx.rt_info^.DB_INFO.FImageInfo.params.samples);
|
|
|
|
RP_KEY.SetZorderStage(ctx.rt_info^.DB_INFO.zorder_stage);
|
|
|
|
end;
|
|
|
|
//DumpShaderGroup(ctx.rt_info^.ShaderGroup);
|
|
|
|
RP:=FetchRenderPass(ctx.Cmd,@RP_KEY);
|
|
|
|
if (RP=nil) then
|
|
begin
|
|
DumpShaderGroup(ctx.rt_info^.ShaderGroup);
|
|
|
|
Assert(false,'FetchRenderPass');
|
|
end;
|
|
|
|
GP_KEY.Clear;
|
|
|
|
GP_KEY.FRenderPass :=RP;
|
|
GP_KEY.FShaderGroup:=ctx.rt_info^.ShaderGroup;
|
|
|
|
GP_KEY.SetBlendInfo(ctx.rt_info^.BLEND_INFO.logicOp,@ctx.rt_info^.BLEND_INFO.blendConstants);
|
|
|
|
GP_KEY.SetPrimType (ctx.rt_info^.PRIM_TYPE,GP_KEY.FShaderGroup.FKey.FPrimtype);
|
|
GP_KEY.SetPrimReset(ctx.rt_info^.PRIM_RESET);
|
|
|
|
if (ctx.rt_info^.VP_COUNT<>0) then
|
|
For i:=0 to ctx.rt_info^.VP_COUNT-1 do
|
|
begin
|
|
GP_KEY.AddVPort(ctx.rt_info^.VPORT[i],ctx.rt_info^.SCISSOR[i]);
|
|
end;
|
|
|
|
if (ctx.rt_info^.RT_COUNT<>0) then
|
|
For i:=0 to ctx.rt_info^.RT_COUNT-1 do
|
|
begin
|
|
GP_KEY.AddBlend(ctx.rt_info^.RT_INFO[i].blend);
|
|
end;
|
|
|
|
FAttrBuilder:=Default(TvAttrBuilder);
|
|
ctx.rt_info^.ShaderGroup.ExportAttrBuilder(FAttrBuilder,@ctx.rt_info^.USERDATA);
|
|
|
|
if not limits.VK_EXT_vertex_input_dynamic_state then
|
|
begin
|
|
GP_KEY.SetVertexInput(FAttrBuilder);
|
|
end;
|
|
|
|
GP_KEY.rasterizer :=ctx.rt_info^.RASTERIZATION.State;
|
|
GP_KEY.ClipSpace :=ctx.rt_info^.RASTERIZATION.ClipSpace;
|
|
GP_KEY.DepthClip :=ctx.rt_info^.RASTERIZATION.DepthClip;
|
|
GP_KEY.multisampling:=ctx.rt_info^.MULTISAMPLE;
|
|
|
|
GP_KEY.SetProvoking(TVkProvokingVertexModeEXT(ctx.rt_info^.PROVOKING));
|
|
|
|
if ctx.rt_info^.DB_ENABLE then
|
|
begin
|
|
GP_KEY.DepthStencil:=ctx.rt_info^.DB_INFO.ds_state;
|
|
end;
|
|
|
|
GP:=FetchGraphicsPipeline(ctx.Cmd,@GP_KEY);
|
|
|
|
if limits.VK_KHR_imageless_framebuffer then
|
|
begin
|
|
FB_KEY:=Default(TvFramebufferImagelessKey);
|
|
|
|
FB_KEY.SetRenderPass(RP);
|
|
FB_KEY.SetSize(ctx.rt_info^.SCREEN_SIZE);
|
|
|
|
if (ctx.rt_info^.RT_COUNT<>0) then
|
|
For i:=0 to ctx.rt_info^.RT_COUNT-1 do
|
|
begin
|
|
//TODO: fixup cformat
|
|
|
|
FB_KEY.AddImageAt(ctx.rt_info^.RT_INFO[i].FImageInfo);
|
|
end;
|
|
|
|
if ctx.rt_info^.DB_ENABLE then
|
|
begin
|
|
//TODO: fixup cformat
|
|
|
|
FB_KEY.AddImageAt(ctx.rt_info^.DB_INFO.FImageInfo);
|
|
end;
|
|
end else
|
|
begin
|
|
FB_KEY2:=Default(TvFramebufferBindedKey);
|
|
|
|
FB_KEY2.SetRenderPass(RP);
|
|
FB_KEY2.SetSize(ctx.rt_info^.SCREEN_SIZE);
|
|
end;
|
|
|
|
ctx.Render:=Default(TvRenderPassBeginInfo);
|
|
|
|
ctx.Render.SetRenderPass(RP);
|
|
ctx.Render.SetRenderArea(ctx.rt_info^.SCREEN_RECT);
|
|
|
|
if limits.VK_KHR_imageless_framebuffer then
|
|
begin
|
|
FB:=FetchFramebufferImageless(ctx.Cmd,@FB_KEY);
|
|
ctx.Render.SetFramebuffer(FB);
|
|
end;
|
|
|
|
if (ctx.rt_info^.RT_COUNT<>0) then
|
|
For i:=0 to ctx.rt_info^.RT_COUNT-1 do
|
|
begin
|
|
|
|
//ri:=TvImage2(color_instance[i]^.resource^.rimage);
|
|
ri:=nil;
|
|
|
|
if (ri<>nil) then
|
|
begin
|
|
ctx.Cmd.RefTo(ri);
|
|
end;
|
|
|
|
if (ri<>nil) then
|
|
if (ri.is_invalid) then
|
|
begin
|
|
color_instance[i]^.resource^.rimage:=nil;
|
|
ri:=nil;
|
|
end;
|
|
|
|
if (ri=nil) then
|
|
begin
|
|
|
|
img_usage:=[];
|
|
if (color_instance[i]<>nil) then
|
|
begin
|
|
{[iu_attachment]}
|
|
img_usage:=color_instance[i]^.curr.img_usage;
|
|
end;
|
|
|
|
ri:=FetchImageForce(ctx,
|
|
ctx.rt_info^.RT_INFO[i].FImageInfo,
|
|
img_usage);
|
|
|
|
if (color_instance[i]<>nil) then
|
|
begin
|
|
color_instance[i]^.resource^.rimage:=ri;
|
|
end;
|
|
end;
|
|
|
|
pm4_load_from(ctx.Cmd,ri,ctx.rt_info^.RT_INFO[i].IMAGE_USAGE);
|
|
|
|
iv:=ri.FetchView(ctx.Cmd,ctx.rt_info^.RT_INFO[i].FImageView,iu_attachment);
|
|
|
|
if (color_instance[i]<>nil) then
|
|
begin
|
|
ri.PushBarrier(ctx.Cmd,
|
|
GetAccessMaskImg(color_instance[i]^.curr),
|
|
GetImageLayout(color_instance[i]^.curr),
|
|
ord(VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) or
|
|
ord(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT) );
|
|
end;
|
|
|
|
//
|
|
|
|
ctx.Render.AddClearColor(ctx.rt_info^.RT_INFO[i].CLEAR_COLOR);
|
|
|
|
Writeln('BindFrame:->[',i,']'#13#10,
|
|
' 0x',HexStr(ri.FHandle,16),':',GetVkFormatStr(ri.key.cformat),':',ri.FName,'->'#13#10,
|
|
' 0x',HexStr(iv.FHandle,16),':',GetVkFormatStr(iv.key.cformat),':',iv.FName);
|
|
|
|
//
|
|
if limits.VK_KHR_imageless_framebuffer then
|
|
begin
|
|
ctx.Render.AddImageView(iv);
|
|
end else
|
|
begin
|
|
FB_KEY2.AddImageView(iv);
|
|
end;
|
|
//
|
|
|
|
end;
|
|
|
|
if ctx.rt_info^.DB_ENABLE then
|
|
begin
|
|
|
|
d_instance:=ctx.node^.scope.find_image_resource_instance(GetDepthOnly (ctx.rt_info^.DB_INFO.FImageInfo));
|
|
s_instance:=ctx.node^.scope.find_image_resource_instance(GetStencilOnly(ctx.rt_info^.DB_INFO.FImageInfo));
|
|
|
|
ri:=nil;
|
|
rd:=nil;
|
|
rs:=nil;
|
|
|
|
{
|
|
if (d_instance<>nil) then
|
|
begin
|
|
rd:=TvCustomImage2(d_instance^.resource^.rimage);
|
|
end;
|
|
}
|
|
|
|
if (rd<>nil) then
|
|
begin
|
|
ctx.Cmd.RefTo(rd);
|
|
end;
|
|
|
|
if (rd<>nil) then
|
|
if (rd.is_invalid) then
|
|
begin
|
|
d_instance^.resource^.rimage:=nil;
|
|
rd:=nil;
|
|
end;
|
|
|
|
{
|
|
if (s_instance<>nil) then
|
|
begin
|
|
rs:=TvCustomImage2(s_instance^.resource^.rimage);
|
|
end;
|
|
}
|
|
|
|
if (rs<>nil) then
|
|
begin
|
|
ctx.Cmd.RefTo(rs);
|
|
end;
|
|
|
|
if (rs<>nil) then
|
|
if (rs.is_invalid) then
|
|
begin
|
|
s_instance^.resource^.rimage:=nil;
|
|
rs:=nil;
|
|
end;
|
|
|
|
if (rd<>nil) then
|
|
begin
|
|
ri:=TvImage2(rd.Parent);
|
|
end else
|
|
if (rs<>nil) then
|
|
begin
|
|
ri:=TvImage2(rs.Parent);
|
|
end;
|
|
|
|
if (ri<>nil) then
|
|
if (ri.DepthOnly <>rd) or
|
|
(ri.StencilOnly<>rs) then
|
|
begin
|
|
ri:=nil;
|
|
rd:=nil;
|
|
rs:=nil;
|
|
end;
|
|
|
|
//
|
|
|
|
if (ri=nil) then
|
|
begin
|
|
ri:=FetchImageForce(ctx,
|
|
ctx.rt_info^.DB_INFO.FImageInfo,
|
|
[iu_depthstenc]);
|
|
|
|
Assert(ri<>nil);
|
|
|
|
rd:=ri.DepthOnly;
|
|
rs:=ri.StencilOnly;
|
|
|
|
if (d_instance<>nil) then
|
|
begin
|
|
d_instance^.resource^.rimage:=rd;
|
|
|
|
ctx.RefToParent(ri);
|
|
end;
|
|
|
|
if (s_instance<>nil) then
|
|
begin
|
|
s_instance^.resource^.rimage:=rs;
|
|
|
|
ctx.RefToParent(ri);
|
|
end;
|
|
end;
|
|
|
|
//
|
|
|
|
pm4_load_from(ctx.Cmd,rd,ctx.rt_info^.DB_INFO.DEPTH_USAGE);
|
|
pm4_load_from(ctx.Cmd,rs,ctx.rt_info^.DB_INFO.STENCIL_USAGE);
|
|
|
|
iv:=ri.FetchView(ctx.Cmd,ctx.rt_info^.DB_INFO.FImageView,iu_depthstenc);
|
|
|
|
ri.PushBarrier(ctx.Cmd,
|
|
GetDepthStencilAccessAttachMask(ctx.rt_info^.DB_INFO.DEPTH_USAGE,ctx.rt_info^.DB_INFO.STENCIL_USAGE),
|
|
GetDepthStencilSendLayout(ctx.rt_info^.DB_INFO.DEPTH_USAGE,ctx.rt_info^.DB_INFO.STENCIL_USAGE),
|
|
ord(VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) or
|
|
ctx.rt_info^.DB_INFO.zorder_stage
|
|
);
|
|
|
|
//
|
|
|
|
ctx.Render.AddClearColor(ctx.rt_info^.DB_INFO.CLEAR_VALUE);
|
|
|
|
Writeln('BindDepth:->'#13#10,
|
|
' 0x',HexStr(ri.FHandle,16),':',GetVkFormatStr(ri.key.cformat),':',ri.FName,'->'#13#10,
|
|
' 0x',HexStr(iv.FHandle,16),':',GetVkFormatStr(iv.key.cformat),':',iv.FName);
|
|
|
|
//
|
|
if limits.VK_KHR_imageless_framebuffer then
|
|
begin
|
|
ctx.Render.AddImageView(iv);
|
|
end else
|
|
begin
|
|
FB_KEY2.AddImageView(iv);
|
|
end;
|
|
//
|
|
|
|
end;
|
|
|
|
if not limits.VK_KHR_imageless_framebuffer then
|
|
begin
|
|
FB:=FetchFramebufferBinded(ctx.Cmd,@FB_KEY2);
|
|
ctx.Render.SetFramebuffer(FB);
|
|
end;
|
|
|
|
////////
|
|
FUniformBuilder:=Default(TvUniformBuilder);
|
|
ctx.rt_info^.ShaderGroup.ExportUnifBuilder(FUniformBuilder,@ctx.rt_info^.USERDATA);
|
|
|
|
Prepare_Uniforms(ctx,BP_GRAPHICS,FUniformBuilder);
|
|
////////
|
|
|
|
DumpShaderGroup(ctx.rt_info^.ShaderGroup);
|
|
|
|
if not ctx.Cmd.BeginRenderPass(@ctx.Render,GP) then
|
|
begin
|
|
Writeln(stderr,'BeginRenderPass(ctx.Render)');
|
|
|
|
DumpShaderGroup(ctx.rt_info^.ShaderGroup);
|
|
|
|
Assert (false ,'BeginRenderPass(ctx.Render)');
|
|
end;
|
|
|
|
ctx.Cmd.SetVertexInput (FAttrBuilder);
|
|
ctx.Cmd.BindVertexBuffers(FAttrBuilder);
|
|
|
|
Bind_Uniforms(ctx,
|
|
BP_GRAPHICS,
|
|
FUniformBuilder);
|
|
|
|
Bind_Pushs(ctx,ctx.rt_info^.ShaderGroup,@ctx.rt_info^.USERDATA);
|
|
|
|
end;
|
|
|
|
procedure pm4_Writeback_After(var ctx:t_me_render_context);
|
|
var
|
|
//i:Integer;
|
|
|
|
ri:TvImage2;
|
|
//rd:TvCustomImage2;
|
|
//rs:TvCustomImage2;
|
|
|
|
resource_instance:p_pm4_resource_instance;
|
|
|
|
//d_instance:p_pm4_resource_instance;
|
|
//s_instance:p_pm4_resource_instance;
|
|
begin
|
|
//write back
|
|
|
|
resource_instance:=ctx.node^.scope.Min;
|
|
|
|
while (resource_instance<>nil) do
|
|
begin
|
|
|
|
if (resource_instance^.resource^.rtype=R_IMG) then
|
|
begin
|
|
ri:=TvImage2(resource_instance^.resource^.rimage);
|
|
|
|
if (ri<>nil) then
|
|
if not ri.IsDepthAndStencil then
|
|
begin
|
|
|
|
//is write on current stage
|
|
if ((resource_instance^.curr.mem_usage and TM_WRITE)<>0) then
|
|
begin
|
|
|
|
ri.mark_init;
|
|
|
|
//is used in fuzzy match resources
|
|
if (resource_instance^.next_overlap.mem_usage<>0) then
|
|
begin
|
|
pm4_write_back(ctx.Cmd,ri);
|
|
//
|
|
resource_instance^.resource^.rwriteback:=False;
|
|
end else
|
|
begin
|
|
//
|
|
resource_instance^.resource^.rwriteback:=True;
|
|
end;
|
|
|
|
end;
|
|
end;
|
|
|
|
end;
|
|
|
|
resource_instance:=ctx.node^.scope.Next(resource_instance);
|
|
end;
|
|
|
|
{
|
|
if ctx.rt_info^.DB_ENABLE then
|
|
begin
|
|
|
|
d_instance:=ctx.node^.scope.find_image_resource_instance(GetDepthOnly (ctx.rt_info^.DB_INFO.FImageInfo));
|
|
s_instance:=ctx.node^.scope.find_image_resource_instance(GetStencilOnly(ctx.rt_info^.DB_INFO.FImageInfo));
|
|
|
|
ri:=nil;
|
|
rd:=nil;
|
|
rs:=nil;
|
|
|
|
if (d_instance<>nil) then
|
|
begin
|
|
rd:=TvCustomImage2(d_instance^.resource^.rimage);
|
|
end;
|
|
|
|
if (s_instance<>nil) then
|
|
begin
|
|
rs:=TvCustomImage2(s_instance^.resource^.rimage);
|
|
end;
|
|
|
|
if (rd<>nil) then
|
|
begin
|
|
rd.mark_init;
|
|
|
|
Assert(d_instance<>nil);
|
|
|
|
if (d_instance^.next_overlap.mem_usage<>0) then
|
|
begin
|
|
pm4_write_back(ctx.Cmd,rd);
|
|
//
|
|
d_instance^.resource^.rwriteback:=False;
|
|
end else
|
|
begin
|
|
//
|
|
d_instance^.resource^.rwriteback:=True;
|
|
end;
|
|
|
|
end;
|
|
|
|
//
|
|
|
|
if (rs<>nil) then
|
|
begin
|
|
rs.mark_init;
|
|
|
|
Assert(s_instance<>nil);
|
|
|
|
if (s_instance^.next_overlap.mem_usage<>0) then
|
|
begin
|
|
pm4_write_back(ctx.Cmd,rs);
|
|
//
|
|
s_instance^.resource^.rwriteback:=False;
|
|
end else
|
|
begin
|
|
//
|
|
s_instance^.resource^.rwriteback:=True;
|
|
end;
|
|
|
|
end;
|
|
|
|
//
|
|
end;
|
|
}
|
|
|
|
//write back
|
|
end;
|
|
|
|
procedure pm4_Writeback_Finish(var ctx:t_me_render_context);
|
|
var
|
|
ri:TvImage2;
|
|
ht:TvMetaHtile;
|
|
hc:TvMetaCmask;
|
|
|
|
resource:p_pm4_resource;
|
|
begin
|
|
if (ctx.stream=nil) then Exit;
|
|
|
|
//write back
|
|
|
|
resource:=ctx.stream^.resource_set.Min;
|
|
|
|
while (resource<>nil) do
|
|
begin
|
|
|
|
if resource^.rwriteback then
|
|
begin
|
|
|
|
if (resource^.rtype=R_IMG) then
|
|
begin
|
|
|
|
ri:=TvImage2(resource^.rimage);
|
|
|
|
Assert(ri<>nil);
|
|
|
|
//
|
|
pm4_write_back(ctx.Cmd,ri);
|
|
//
|
|
resource^.rwriteback:=False;
|
|
end;
|
|
|
|
end;
|
|
|
|
if (resource^.rtype=R_HTILE) then
|
|
begin
|
|
ht:=FetchHtile(ctx.Cmd,resource^.rkey,resource^.rsize);
|
|
|
|
ht.rclear:=resource^.rclear;
|
|
end else
|
|
if (resource^.rtype=R_CMASK) then
|
|
begin
|
|
hc:=FetchCmask(ctx.Cmd,resource^.rkey,resource^.rsize);
|
|
|
|
hc.rclear:=resource^.rclear;
|
|
end;
|
|
|
|
resource:=ctx.stream^.resource_set.Next(resource);
|
|
end;
|
|
|
|
//write back
|
|
end;
|
|
|
|
procedure pm4_Hint(var ctx:t_me_render_context;node:p_pm4_node_hint);
|
|
begin
|
|
ctx.InsertLabel(PChar(@node^.data));
|
|
end;
|
|
|
|
procedure pm4_Draw(var ctx:t_me_render_context;node:p_pm4_node_draw);
|
|
begin
|
|
ctx.rt_info:=@node^.rt_info;
|
|
|
|
if (ctx.rt_info^.RT_COUNT=0) and (not ctx.rt_info^.DB_ENABLE) then
|
|
begin
|
|
ctx.InsertLabel('decompress Dcc/Depth/Fmask');
|
|
//zero attachment (decompress Dcc/Depth/Fmask) skip
|
|
Exit;
|
|
end;
|
|
|
|
//
|
|
pm4_InitStream(ctx);
|
|
//
|
|
|
|
//if not ctx.WaitConfirmOrSwitch then Exit;
|
|
|
|
StartFrameCapture;
|
|
|
|
ctx.BeginCmdBuffer;
|
|
|
|
//
|
|
|
|
if (node^.ntype<>ntClearDepth) then
|
|
begin
|
|
pm4_DrawPrepare(ctx);
|
|
end;
|
|
|
|
ctx.Cmd.FinstanceCount:=node^.numInstances;
|
|
ctx.Cmd.FINDEX_TYPE :=TVkIndexType(node^.INDEX_TYPE);
|
|
|
|
case node^.ntype of
|
|
ntDrawIndex2:
|
|
begin
|
|
Writeln(node^.id,':DrawIndexOffset2(',node^.indexOffset,',',node^.vertexOffset,',',node^.indexCount,')');
|
|
ctx.Cmd.DrawIndexOffset2(Pointer(node^.indexBase),node^.indexOffset,node^.vertexOffset,node^.indexCount);
|
|
end;
|
|
ntDrawIndexOffset2:
|
|
begin
|
|
Writeln(node^.id,':DrawIndexOffset2(',node^.indexOffset,',',node^.vertexOffset,',',node^.indexCount,')');
|
|
ctx.Cmd.DrawIndexOffset2(Pointer(node^.indexBase),node^.indexOffset,node^.vertexOffset,node^.indexCount);
|
|
end;
|
|
ntDrawIndexAuto:
|
|
begin
|
|
Writeln(node^.id,':DrawIndexAuto(',node^.vertexOffset,',',node^.indexCount,')');
|
|
ctx.Cmd.DrawIndexAuto(node^.vertexOffset,node^.indexCount);
|
|
end;
|
|
ntClearDepth:
|
|
begin
|
|
pm4_ClearDepth(node^.rt_info,ctx);
|
|
end;
|
|
else;
|
|
Assert(false,'pm4_Draw');
|
|
end;
|
|
|
|
/////////
|
|
|
|
pm4_Writeback_After(ctx);
|
|
|
|
ctx.FlushParent;
|
|
end;
|
|
|
|
procedure pm4_Resolve(var ctx:t_me_render_context;node:p_pm4_node_Resolve);
|
|
var
|
|
ri_src,ri_dst:TvImage2;
|
|
range:TVkImageResolve;
|
|
|
|
begin
|
|
//
|
|
pm4_InitStream(ctx);
|
|
//
|
|
|
|
//if not ctx.WaitConfirmOrSwitch then Exit;
|
|
|
|
StartFrameCapture;
|
|
|
|
ctx.BeginCmdBuffer;
|
|
|
|
ctx.Cmd.EndRenderPass;
|
|
|
|
ri_src:=FetchImageForce(ctx,
|
|
node^.RT[0].FImageInfo,
|
|
[iu_transfer]
|
|
);
|
|
|
|
Assert(ri_src<>nil);
|
|
|
|
ri_dst:=FetchImageForce(ctx,
|
|
node^.RT[1].FImageInfo,
|
|
[iu_transfer]
|
|
);
|
|
|
|
Assert(ri_dst<>nil);
|
|
|
|
ri_src.PushBarrier(ctx.Cmd,
|
|
ord(VK_ACCESS_TRANSFER_READ_BIT),
|
|
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
|
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
|
|
|
|
ri_dst.PushBarrier(ctx.Cmd,
|
|
ord(VK_ACCESS_TRANSFER_WRITE_BIT),
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
|
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
|
|
|
|
range:=Default(TVkImageResolve);
|
|
|
|
range.srcSubresource:=ri_src.GetSubresLayer;
|
|
range.dstSubresource:=ri_dst.GetSubresLayer;
|
|
|
|
range.srcOffset.Create(node^.SCREEN.offset.x,node^.SCREEN.offset.y,0);
|
|
range.dstOffset:=range.srcOffset;
|
|
|
|
range.extent.Create(node^.SCREEN.extent.width,node^.SCREEN.extent.height,1);
|
|
|
|
ctx.Cmd.ResolveImage(ri_src.FHandle,
|
|
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
|
ri_dst.FHandle,
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
|
1,@range);
|
|
|
|
ctx.FlushParent;
|
|
end;
|
|
|
|
procedure pm4_FastClear(var ctx:t_me_render_context;node:p_pm4_node_FastClear);
|
|
var
|
|
ri:TvImage2;
|
|
range:TVkImageSubresourceRange;
|
|
|
|
resource_instance:p_pm4_resource_instance;
|
|
begin
|
|
{
|
|
//
|
|
pm4_InitStream(ctx);
|
|
//
|
|
|
|
StartFrameCapture;
|
|
|
|
ctx.BeginCmdBuffer;
|
|
|
|
ctx.Cmd.EndRenderPass;
|
|
|
|
resource_instance:=ctx.node^.scope.find_image_resource_instance(node^.RT.FImageInfo);
|
|
Assert(resource_instance<>nil);
|
|
|
|
ri:=FetchImage(ctx.Cmd,
|
|
node^.RT.FImageInfo,
|
|
resource_instance^.curr.img_usage
|
|
);
|
|
|
|
ri.PushBarrier(ctx.Cmd,
|
|
ord(VK_ACCESS_TRANSFER_WRITE_BIT),
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
|
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
|
|
|
|
range:=ri.GetSubresRange;
|
|
|
|
ctx.Cmd.ClearColorImage(ri.FHandle,
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
|
@node^.RT.CLEAR_COLOR,
|
|
1,@range);
|
|
|
|
//writeback
|
|
ri.mark_init;
|
|
|
|
if (resource_instance^.next_overlap.mem_usage<>0) then
|
|
begin
|
|
pm4_write_back(ctx.Cmd,ri);
|
|
//
|
|
resource_instance^.resource^.rwriteback:=False;
|
|
end else
|
|
begin
|
|
//
|
|
resource_instance^.resource^.rwriteback:=True;
|
|
end;
|
|
//writeback
|
|
}
|
|
|
|
end;
|
|
|
|
procedure Prepare_buf_clear(var ctx:t_me_render_context;
|
|
var UniformBuilder:TvUniformBuilder);
|
|
var
|
|
i:Integer;
|
|
|
|
resource_instance:p_pm4_resource_instance;
|
|
buffer,meta:p_pm4_resource;
|
|
|
|
hb:TvMetaBuffer;
|
|
begin
|
|
buffer:=nil;
|
|
|
|
//buffers
|
|
if (Length(UniformBuilder.FBuffers)<>0) then
|
|
begin
|
|
For i:=0 to High(UniformBuilder.FBuffers) do
|
|
With UniformBuilder.FBuffers[i] do
|
|
begin
|
|
|
|
//get buffer with write usege
|
|
if ((memuse and TM_WRITE)<>0) then
|
|
begin
|
|
resource_instance:=ctx.node^.scope.find_buffer_resource_instance(R_BUF,addr,size);
|
|
if (resource_instance<>nil) then
|
|
begin
|
|
buffer:=resource_instance^.resource;
|
|
Break;
|
|
end;
|
|
end;
|
|
|
|
end;
|
|
end;
|
|
//buffers
|
|
|
|
//TODO: get clear value!
|
|
|
|
Assert(buffer<>nil);
|
|
|
|
//set flag by buffer in current stream
|
|
buffer^.rclear:=True;
|
|
|
|
//set flag by buffer to next stream
|
|
hb:=FetchBuffer(ctx.Cmd,buffer^.rkey.Addr,buffer^.rsize);
|
|
|
|
Assert(hb<>nil);
|
|
|
|
hb.rclear:=True;
|
|
|
|
//set flag by htile in current stream
|
|
meta:=ctx.stream^.find_buffer_resource(R_HTILE,buffer^.rkey.Addr,buffer^.rsize);
|
|
//
|
|
if (meta<>nil) then
|
|
begin
|
|
meta^.rclear:=True;
|
|
end;
|
|
|
|
//set flag by cmask in current stream
|
|
meta:=ctx.stream^.find_buffer_resource(R_CMASK,buffer^.rkey.Addr,buffer^.rsize);
|
|
//
|
|
if (meta<>nil) then
|
|
begin
|
|
meta^.rclear:=True;
|
|
end;
|
|
|
|
end;
|
|
|
|
function pm4_DispatchPrepare(var ctx:t_me_render_context;node:p_pm4_node_Dispatch):Boolean;
|
|
var
|
|
dst:PGPU_USERDATA;
|
|
|
|
CP_KEY:TvComputePipelineKey;
|
|
CP:TvComputePipeline2;
|
|
|
|
FUniformBuilder:TvUniformBuilder;
|
|
|
|
//
|
|
GPU_REGS:TGPU_REGS;
|
|
pa:TPushConstAllocator;
|
|
pp:PPushConstAllocator;
|
|
begin
|
|
Result:=False;
|
|
////////
|
|
|
|
//hack
|
|
dst:=Pointer(@node^.COMPUTE_GROUP.COMPUTE_USER_DATA)-Ptruint(@TGPU_USERDATA(nil^).A[vShaderStageCs]);
|
|
|
|
//recheck shaders
|
|
GPU_REGS.SC_REG:=@node^.COMPUTE_GROUP;
|
|
|
|
pa.Init;
|
|
pp:=@pa;
|
|
|
|
node^.ShaderGroup:=FetchShaderGroupCS(GPU_REGS,pp);
|
|
Assert(node^.ShaderGroup<>nil);
|
|
//recheck shaders
|
|
|
|
CP_KEY.FShaderGroup:=node^.ShaderGroup;
|
|
CP:=FetchComputePipeline(ctx.Cmd,@CP_KEY);
|
|
|
|
FUniformBuilder:=Default(TvUniformBuilder);
|
|
CP_KEY.FShaderGroup.ExportUnifBuilder(FUniformBuilder,dst);
|
|
|
|
//htile/cmask/rt heuristic
|
|
if (CP_KEY.FShaderGroup.FKey.FShaders[vShaderStageCs].IsCSClearShader) then
|
|
begin
|
|
Prepare_buf_clear(ctx,FUniformBuilder);
|
|
//
|
|
ctx.InsertLabel('clear htile/cmask/rt');
|
|
end;
|
|
|
|
Prepare_Uniforms(ctx,BP_COMPUTE,FUniformBuilder);
|
|
////////
|
|
|
|
DumpShaderGroup(CP_KEY.FShaderGroup);
|
|
|
|
if not ctx.Cmd.BindCompute(CP) then
|
|
begin
|
|
Writeln(stderr,'BindCompute(CP)');
|
|
|
|
DumpShaderGroup(CP_KEY.FShaderGroup);
|
|
|
|
Assert(false ,'BindCompute(CP)');
|
|
end;
|
|
|
|
Bind_Uniforms(ctx,
|
|
BP_COMPUTE,
|
|
FUniformBuilder);
|
|
|
|
Bind_Pushs(ctx,CP_KEY.FShaderGroup,dst);
|
|
|
|
Result:=True;
|
|
end;
|
|
|
|
procedure pm4_DispatchDirect(var ctx:t_me_render_context;node:p_pm4_node_DispatchDirect);
|
|
begin
|
|
//
|
|
pm4_InitStream(ctx);
|
|
//
|
|
|
|
//if not ctx.WaitConfirmOrSwitch then Exit;
|
|
|
|
StartFrameCapture;
|
|
|
|
ctx.BeginCmdBuffer;
|
|
|
|
//
|
|
ctx.Cmd.EndRenderPass;
|
|
|
|
if not pm4_DispatchPrepare(ctx,node) then Exit;
|
|
|
|
Writeln('DispatchDirect(',node^.DIM_X,',',node^.DIM_Y,',',node^.DIM_Z,')');
|
|
|
|
ctx.Cmd.DispatchDirect(node^.DIM_X,node^.DIM_Y,node^.DIM_Z);
|
|
|
|
/////////
|
|
|
|
pm4_Writeback_After(ctx);
|
|
|
|
ctx.FlushParent;
|
|
end;
|
|
|
|
procedure pm4_DispatchIndirect(var ctx:t_me_render_context;node:p_pm4_node_DispatchIndirect);
|
|
begin
|
|
//
|
|
pm4_InitStream(ctx);
|
|
//
|
|
|
|
//if not ctx.WaitConfirmOrSwitch then Exit;
|
|
|
|
StartFrameCapture;
|
|
|
|
ctx.BeginCmdBuffer;
|
|
|
|
//
|
|
ctx.Cmd.EndRenderPass;
|
|
|
|
if not pm4_DispatchPrepare(ctx,node) then Exit;
|
|
|
|
Writeln('DispatchIndirect(0x',HexStr(node^.BASE,11),',0x',HexStr(node^.Offset,8),')');
|
|
|
|
ctx.Cmd.DispatchIndirect(Pointer(node^.BASE),node^.Offset);
|
|
|
|
/////////
|
|
|
|
pm4_Writeback_After(ctx);
|
|
|
|
ctx.FlushParent;
|
|
end;
|
|
|
|
function mul_div_u64(m,d,v:QWORD):QWORD; sysv_abi_default; assembler; nostackframe;
|
|
asm
|
|
movq v,%rax
|
|
mulq m
|
|
divq d
|
|
end;
|
|
|
|
const
|
|
GLOBAL_CLOCK_FREQUENCY =100*1000*1000; //100MHz
|
|
GPU_CORE_CLOCK_FREQUENCY=800*1000*1000; //800MHz
|
|
|
|
//neo mode & ext_gpu_timer -> 911*000*000
|
|
|
|
|
|
procedure pm4_EventWriteEop(var ctx:t_me_render_context;node:p_pm4_node_EventWriteEop);
|
|
var
|
|
curr,diff:QWORD;
|
|
addr_dmem:Pointer;
|
|
data_size:Byte;
|
|
begin
|
|
if not ctx.stream^.hint_repeat then
|
|
begin
|
|
ctx.InsertLabel(PChar('WriteEop:0x'+HexStr(QWORD(node^.addr),10)));
|
|
if p_print_gpu_ops then
|
|
begin
|
|
Writeln('WriteEop:0x'+HexStr(QWORD(node^.addr),10));
|
|
end;
|
|
ctx.stream^.hint_repeat:=True;
|
|
end;
|
|
|
|
if not ctx.WaitConfirmOrSwitch then Exit;
|
|
|
|
ctx.stream^.hint_repeat:=False;
|
|
|
|
curr:=md_rdtsc_unit;
|
|
diff:=curr-ctx.rel_time;
|
|
|
|
if (node^.addr<>nil) then
|
|
begin
|
|
|
|
addr_dmem:=nil;
|
|
if (node^.dataSel<>EVENTWRITEEOP_DATA_SEL_DISCARD) then
|
|
begin
|
|
addr_dmem:=get_dmem_ptr(node^.addr);
|
|
end;
|
|
|
|
if (addr_dmem<>nil) then
|
|
Case node^.dataSel of
|
|
//
|
|
EVENTWRITEEOP_DATA_SEL_DISCARD:
|
|
data_size:=0;
|
|
|
|
//32bit data
|
|
EVENTWRITEEOP_DATA_SEL_SEND_DATA32:
|
|
begin
|
|
PDWORD(addr_dmem)^:=node^.data;
|
|
|
|
data_size:=4;
|
|
end;
|
|
|
|
//64bit data
|
|
EVENTWRITEEOP_DATA_SEL_SEND_DATA64:
|
|
begin
|
|
PQWORD(addr_dmem)^:=node^.data;
|
|
|
|
data_size:=8;
|
|
end;
|
|
|
|
//system 100Mhz global clock. (relative time)
|
|
EVENTWRITEEOP_DATA_SEL_SEND_GPU_CLOCK:
|
|
begin
|
|
PQWORD(addr_dmem)^:=mul_div_u64(GLOBAL_CLOCK_FREQUENCY,UNIT_PER_SEC,diff);
|
|
|
|
data_size:=8;
|
|
end;
|
|
|
|
//GPU 800Mhz clock. (relative time)
|
|
EVENTWRITEEOP_DATA_SEL_SEND_CP_PERFCOUNTER:
|
|
begin
|
|
PQWORD(addr_dmem)^:=mul_div_u64(GPU_CORE_CLOCK_FREQUENCY,UNIT_PER_SEC,diff);
|
|
|
|
data_size:=8;
|
|
end;
|
|
|
|
else
|
|
Assert(false,'pm4_EventWriteEop');
|
|
end;
|
|
|
|
vm_map_track_trigger(p_proc.p_vmspace,QWORD(node^.addr),QWORD(node^.addr)+data_size,nil,M_DMEM_WRITE);
|
|
end;
|
|
|
|
if (node^.intSel=EVENTWRITEEOP_INT_SEL_SEND_INT) or
|
|
(node^.intSel=EVENTWRITEEOP_INT_SEL_SEND_INT_ON_CONFIRM) then
|
|
begin
|
|
ctx.me^.knote_eventid($40,0,curr*NSEC_PER_UNIT,0); //(absolute time) (freq???)
|
|
end;
|
|
|
|
end;
|
|
|
|
procedure pm4_SubmitFlipEop(var ctx:t_me_render_context;node:p_pm4_node_SubmitFlipEop);
|
|
var
|
|
curr:QWORD;
|
|
begin
|
|
if not ctx.stream^.hint_repeat then
|
|
begin
|
|
ctx.InsertLabel(PChar('SubmitFlipEop:0x'+HexStr(node^.eop_value,16)));
|
|
ctx.stream^.hint_repeat:=True;
|
|
end;
|
|
|
|
if not ctx.WaitConfirmOrSwitch then Exit;
|
|
|
|
ctx.stream^.hint_repeat:=False;
|
|
|
|
if (ctx.me^.on_submit_flip_eop<>nil) then
|
|
begin
|
|
ctx.me^.on_submit_flip_eop(node^.eop_value);
|
|
end;
|
|
|
|
curr:=md_rdtsc_unit;
|
|
|
|
if (node^.intSel=EVENTWRITEEOP_INT_SEL_SEND_INT) or
|
|
(node^.intSel=EVENTWRITEEOP_INT_SEL_SEND_INT_ON_CONFIRM) then
|
|
begin
|
|
ctx.me^.knote_eventid($40,0,curr*NSEC_PER_UNIT,0); //(absolute time) (freq???)
|
|
end;
|
|
|
|
end;
|
|
|
|
function get_compute_pipe_id(buft:t_pm4_stream_type):Byte; inline;
|
|
begin
|
|
Result:=ord(buft) - ord(stCompute0);
|
|
end;
|
|
|
|
procedure pm4_ReleaseMem(var ctx:t_me_render_context;node:p_pm4_node_ReleaseMem);
|
|
var
|
|
curr,diff:QWORD;
|
|
addr_dmem:Pointer;
|
|
data_size:Byte;
|
|
begin
|
|
if not ctx.stream^.hint_repeat then
|
|
begin
|
|
ctx.InsertLabel(PChar('ReleaseMem:0x'+HexStr(QWORD(node^.addr),10)));
|
|
ctx.stream^.hint_repeat:=True;
|
|
end;
|
|
|
|
if not ctx.WaitConfirmOrSwitch then Exit;
|
|
|
|
ctx.stream^.hint_repeat:=False;
|
|
|
|
curr:=md_rdtsc_unit;
|
|
diff:=curr-ctx.rel_time;
|
|
|
|
if (node^.addr<>nil) then
|
|
begin
|
|
|
|
addr_dmem:=nil;
|
|
if (node^.srcSel<>RELEASEMEM_DATA_SEL_DISCARD) then
|
|
begin
|
|
addr_dmem:=get_dmem_ptr(node^.addr);
|
|
end;
|
|
|
|
Case node^.dstSel of
|
|
RELEASEMEM_DST_SEL_MEMORY:;
|
|
RELEASEMEM_DST_SEL_L2 :Assert(false,'RELEASEMEM_DST_SEL_L2');
|
|
else
|
|
Assert(false,'pm4_ReleaseMem:dstSel');
|
|
end;
|
|
|
|
if (addr_dmem<>nil) then
|
|
Case node^.srcSel of
|
|
//
|
|
RELEASEMEM_DATA_SEL_DISCARD:
|
|
data_size:=0;
|
|
|
|
//32bit data
|
|
RELEASEMEM_DATA_SEL_SEND_DATA32:
|
|
begin
|
|
PDWORD(addr_dmem)^:=node^.data;
|
|
|
|
data_size:=4;
|
|
end;
|
|
|
|
//64bit data
|
|
RELEASEMEM_DATA_SEL_SEND_DATA64:
|
|
begin
|
|
PQWORD(addr_dmem)^:=node^.data;
|
|
|
|
data_size:=8;
|
|
end;
|
|
|
|
//system 100Mhz global clock. (relative time)
|
|
RELEASEMEM_DATA_SEL_SEND_GPU_CLOCK:
|
|
begin
|
|
PQWORD(addr_dmem)^:=mul_div_u64(GLOBAL_CLOCK_FREQUENCY,UNIT_PER_SEC,diff);
|
|
|
|
data_size:=8;
|
|
end;
|
|
|
|
//GPU 800Mhz clock. (relative time)
|
|
RELEASEMEM_DATA_SEL_SEND_CP_PERFCOUNTER:
|
|
begin
|
|
PQWORD(addr_dmem)^:=mul_div_u64(GPU_CORE_CLOCK_FREQUENCY,UNIT_PER_SEC,diff);
|
|
|
|
data_size:=8;
|
|
end;
|
|
|
|
else
|
|
Assert(false,'pm4_ReleaseMem:srcSel');
|
|
end;
|
|
|
|
vm_map_track_trigger(p_proc.p_vmspace,QWORD(node^.addr),QWORD(node^.addr)+data_size,nil,M_DMEM_WRITE);
|
|
end;
|
|
|
|
if (node^.intSel=RELEASEMEM_INT_SEL_SEND_INT) or
|
|
(node^.intSel=RELEASEMEM_INT_SEL_SEND_INT_ON_CONFIRM) then
|
|
begin
|
|
ctx.me^.knote_eventid(get_compute_pipe_id(ctx.stream^.buft),0,curr*NSEC_PER_UNIT,0); //(absolute time) (freq???)
|
|
end;
|
|
|
|
end;
|
|
|
|
procedure pm4_EventWrite(var ctx:t_me_render_context;node:p_pm4_node_EventWrite);
|
|
begin
|
|
Case node^.eventType of
|
|
CS_PARTIAL_FLUSH, //CS
|
|
CACHE_FLUSH_AND_INV_EVENT, //CB,DB
|
|
DB_CACHE_FLUSH_AND_INV, //DB
|
|
FLUSH_AND_INV_DB_META, //HTILE
|
|
FLUSH_AND_INV_CB_META, //CMASK
|
|
FLUSH_AND_INV_CB_PIXEL_DATA: //CB
|
|
begin
|
|
if (ctx.Cmd<>nil) and ctx.Cmd.IsAllocated then
|
|
begin
|
|
//GPU
|
|
ctx.Cmd.WriteEvent(node^.eventType);
|
|
end;
|
|
end;
|
|
//FLUSH_AND_INV_CB_DATA_TS :Writeln(' eventType=FLUSH_AND_INV_CB_DATA_TS');
|
|
THREAD_TRACE_MARKER:
|
|
begin
|
|
ctx.InsertLabel('THREAD_TRACE_MARKER');
|
|
end;
|
|
PIPELINESTAT_STOP:
|
|
begin
|
|
ctx.InsertLabel('PIPELINESTAT_STOP');
|
|
end;
|
|
PERFCOUNTER_START:
|
|
begin
|
|
ctx.InsertLabel('PERFCOUNTER_START');
|
|
end;
|
|
PERFCOUNTER_STOP:
|
|
begin
|
|
ctx.InsertLabel('PERFCOUNTER_STOP');
|
|
end;
|
|
PERFCOUNTER_SAMPLE:
|
|
begin
|
|
ctx.InsertLabel('PERFCOUNTER_SAMPLE');
|
|
end;
|
|
PIXEL_PIPE_STAT_RESET: //[OcclusionQuery] Reset this query
|
|
begin
|
|
Writeln(stderr,'TODO:PIXEL_PIPE_STAT_RESET');
|
|
end;
|
|
else
|
|
begin
|
|
Writeln(stderr,'EventWrite eventType=0x',HexStr(node^.eventType,2));
|
|
Assert (false ,'EventWrite eventType=0x'+HexStr(node^.eventType,2));
|
|
end;
|
|
|
|
end;
|
|
|
|
end;
|
|
|
|
var
|
|
fake_zpass_counter:QWORD=0;
|
|
|
|
procedure pm4_PipeStatDump(var ctx:t_me_render_context;node:p_pm4_node_PipeStatDump);
|
|
const
|
|
c_db_counts:array[0..1] of Byte=(8,16);
|
|
c_db_stride:array[0..3] of Byte=(4,8,16,32);
|
|
c_ready_mask_64=QWORD(1) shl 63;
|
|
c_ready_mask_32=QWORD(1) shl 31;
|
|
var
|
|
i,count,stride:Byte;
|
|
instance_mask :Word;
|
|
addr_dmem:Pointer;
|
|
begin
|
|
if not ctx.WaitConfirmOrSwitch then Exit;
|
|
|
|
count :=c_db_counts[p_neomode and 1];
|
|
stride:=c_db_stride[node^.Control.stride];
|
|
instance_mask:=node^.Control.instance_enable;
|
|
|
|
addr_dmem:=get_dmem_ptr(Pointer(node^.address));
|
|
|
|
fake_zpass_counter:=fake_zpass_counter+1;
|
|
|
|
if (stride=4) then
|
|
begin
|
|
|
|
For i:=0 to count-1 do
|
|
if (instance_mask and (1 shl i))<>0 then
|
|
begin
|
|
PDWORD(addr_dmem)[i]:=c_ready_mask_32 or fake_zpass_counter;
|
|
end;
|
|
|
|
end else
|
|
begin
|
|
|
|
For i:=0 to count-1 do
|
|
begin
|
|
if (instance_mask and (1 shl i))<>0 then
|
|
begin
|
|
PQWORD(addr_dmem)^:=c_ready_mask_64 or fake_zpass_counter;
|
|
end;
|
|
addr_dmem:=addr_dmem+stride;
|
|
end;
|
|
|
|
end;
|
|
|
|
end;
|
|
|
|
procedure pm4_EventWriteEos(var ctx:t_me_render_context;node:p_pm4_node_EventWriteEos);
|
|
var
|
|
addr_dmem:Pointer;
|
|
begin
|
|
|
|
if (node^.addr<>nil) then
|
|
Case node^.command of
|
|
|
|
//32bit data
|
|
EVENT_WRITE_EOS_CMD_STORE_32BIT_DATA_TO_MEMORY:
|
|
begin
|
|
|
|
if (ctx.Cmd<>nil) and ctx.Cmd.IsAllocated then
|
|
begin
|
|
//GPU
|
|
ctx.Cmd.WriteEos(node^.eventType,node^.addr,node^.data,false);
|
|
end else
|
|
begin
|
|
//soft
|
|
|
|
addr_dmem:=get_dmem_ptr(node^.addr);
|
|
|
|
PDWORD(addr_dmem)^:=node^.data;
|
|
|
|
vm_map_track_trigger(p_proc.p_vmspace,QWORD(node^.addr),QWORD(node^.addr)+4,nil,M_DMEM_WRITE);
|
|
end;
|
|
|
|
end;
|
|
|
|
else
|
|
Assert(false,'pm4_EventWriteEos');
|
|
end;
|
|
|
|
end;
|
|
|
|
procedure pm4_WriteData(var ctx:t_me_render_context;node:p_pm4_node_WriteData);
|
|
var
|
|
src_dmem:PDWORD;
|
|
dst_dmem:PDWORD;
|
|
byteSize:QWORD;
|
|
begin
|
|
StartFrameCapture;
|
|
|
|
case node^.dstSel of
|
|
WRITE_DATA_DST_SEL_MEMORY_SYNC, //writeDataInline
|
|
WRITE_DATA_DST_SEL_TCL2, //writeDataInlineThroughL2
|
|
WRITE_DATA_DST_SEL_MEMORY_ASYNC:
|
|
if (node^.dst<>nil) then
|
|
begin
|
|
|
|
if (ctx.Cmd<>nil) and ctx.Cmd.IsAllocated then
|
|
begin
|
|
//GPU
|
|
byteSize:=node^.num_dw*SizeOf(DWORD);
|
|
|
|
if p_print_gpu_ops then
|
|
begin
|
|
Writeln('[1]WriteData:0x',HexStr(QWORD(node^.src),10),'->',HexStr(QWORD(node^.dst),10),':size=0x',HexStr(byteSize,5));
|
|
end;
|
|
|
|
ctx.Cmd.dmaData1(node^.src,node^.dst,byteSize,node^.wrConfirm);
|
|
end else
|
|
begin
|
|
//soft
|
|
|
|
if p_print_gpu_ops then
|
|
begin
|
|
Writeln('[2]WriteData:0x',HexStr(QWORD(node^.src),10),'->',HexStr(QWORD(node^.dst),10),':size=0x',HexStr(byteSize,5));
|
|
end;
|
|
|
|
src_dmem:=get_dmem_ptr(node^.src);
|
|
|
|
dst_dmem:=get_dmem_ptr(node^.dst);
|
|
|
|
byteSize:=node^.num_dw*SizeOf(DWORD);
|
|
|
|
Move(src_dmem^,dst_dmem^,byteSize);
|
|
|
|
vm_map_track_trigger(p_proc.p_vmspace,QWORD(node^.dst),QWORD(node^.dst)+byteSize,nil,M_DMEM_WRITE);
|
|
end;
|
|
end;
|
|
else
|
|
Assert(false,'WriteData: dstSel=0x'+HexStr(node^.dstSel,1));
|
|
end;
|
|
|
|
end;
|
|
|
|
const
|
|
DmaDataStr:array[0..15] of Pchar=(
|
|
{0} 'Memory',
|
|
{1} 'Gds',
|
|
{2} 'Data',
|
|
{3} 'MemoryUsingL2',
|
|
{4} 'Register',
|
|
{5} '0x5',
|
|
{6} '0x6',
|
|
{7} '0x7',
|
|
{8} '0x8',
|
|
{9} '0x9',
|
|
{A} '0xA',
|
|
{B} '0xB',
|
|
{C} 'RegisterNoIncrement',
|
|
{D} '0xD',
|
|
{E} '0xE',
|
|
{F} '0xF'
|
|
);
|
|
|
|
procedure pm4_DmaData(var ctx:t_me_render_context;node:p_pm4_node_DmaData);
|
|
var
|
|
adrSrc:QWORD;
|
|
adrDst:QWORD;
|
|
adrSrc_dmem:Pointer;
|
|
adrDst_dmem:Pointer;
|
|
byteCount:DWORD;
|
|
srcSel,dstSel:Byte;
|
|
begin
|
|
|
|
StartFrameCapture;
|
|
|
|
adrDst :=node^.dst;
|
|
adrSrc :=node^.src;
|
|
byteCount:=node^.numBytes;
|
|
srcSel :=node^.srcSel;
|
|
dstSel :=node^.dstSel;
|
|
|
|
case (srcSel or (dstSel shl 4)) of
|
|
(kDmaDataSrcMemory or (kDmaDataDstMemory shl 4)),
|
|
(kDmaDataSrcMemoryUsingL2 or (kDmaDataDstMemory shl 4)),
|
|
(kDmaDataSrcMemory or (kDmaDataDstMemoryUsingL2 shl 4)),
|
|
(kDmaDataSrcMemoryUsingL2 or (kDmaDataDstMemoryUsingL2 shl 4)):
|
|
begin
|
|
|
|
if (ctx.Cmd<>nil) and ctx.Cmd.IsAllocated then
|
|
begin
|
|
//GPU
|
|
|
|
ctx.Cmd.dmaData1(Pointer(adrSrc),Pointer(adrDst),byteCount,node^.cpSync<>0);
|
|
|
|
//GPU
|
|
end else
|
|
begin
|
|
//soft
|
|
|
|
adrDst_dmem:=get_dmem_ptr(Pointer(adrDst));
|
|
|
|
adrSrc_dmem:=get_dmem_ptr(Pointer(adrSrc));
|
|
|
|
Move(adrSrc_dmem^,adrDst_dmem^,byteCount);
|
|
|
|
vm_map_track_trigger(p_proc.p_vmspace,QWORD(adrDst),QWORD(adrDst)+byteCount,nil,M_DMEM_WRITE);
|
|
|
|
//soft
|
|
end;
|
|
|
|
end;
|
|
(kDmaDataSrcData or (kDmaDataDstMemory shl 4)),
|
|
(kDmaDataSrcData or (kDmaDataDstMemoryUsingL2 shl 4)):
|
|
begin
|
|
|
|
if (ctx.Cmd<>nil) and ctx.Cmd.IsAllocated then
|
|
begin
|
|
//GPU
|
|
|
|
ctx.Cmd.dmaData2(DWORD(adrSrc),Pointer(adrDst),byteCount,node^.cpSync<>0);
|
|
|
|
//GPU
|
|
end else
|
|
begin
|
|
//soft
|
|
|
|
adrDst_dmem:=get_dmem_ptr(Pointer(adrDst));
|
|
|
|
FillDWORD(adrDst_dmem^,(byteCount div 4),DWORD(adrSrc));
|
|
|
|
vm_map_track_trigger(p_proc.p_vmspace,QWORD(adrDst),QWORD(adrDst)+byteCount,nil,M_DMEM_WRITE);
|
|
|
|
//soft
|
|
end;
|
|
|
|
end;
|
|
else
|
|
Writeln('DmaData: srcSel='+DmaDataStr[srcSel and 15]+' dstSel='+DmaDataStr[dstSel and 15]);
|
|
Assert(false,'DmaData: srcSel='+DmaDataStr[srcSel and 15]+' dstSel='+DmaDataStr[dstSel and 15]);
|
|
end;
|
|
|
|
end;
|
|
|
|
function get_dce_label_id(addr_dmem:Pointer):Integer;
|
|
begin
|
|
Result:=-1;
|
|
|
|
if (QWORD(addr_dmem)>=QWORD(@dev_dce.dce_page^.labels) ) and
|
|
(QWORD(addr_dmem)< QWORD(@dev_dce.dce_page^.label_)+8) then
|
|
begin
|
|
Result:=(QWORD(addr_dmem)-QWORD(@dev_dce.dce_page^.labels)) div 8;
|
|
end;
|
|
end;
|
|
|
|
Function me_test_mem(node:p_pm4_node_WaitRegMem;{var }dmem:PDWORD):Boolean;
|
|
var
|
|
val,ref:DWORD;
|
|
begin
|
|
{
|
|
dmem:=nil;
|
|
if not get_dmem_ptr(node^.pollAddr,@dmem,nil) then
|
|
begin
|
|
Assert(false,'addr:0x'+HexStr(node^.pollAddr)+' not in dmem!');
|
|
end;
|
|
}
|
|
|
|
//Writeln('me_test_mem:labels[',get_dce_label_id(dmem),']=',dmem^,' refValue=',node^.refValue,' compareFunc=',node^.compareFunc);
|
|
|
|
val:=dmem^ and node^.mask;
|
|
ref:=node^.refValue;
|
|
Case node^.compareFunc of
|
|
WAIT_REG_MEM_FUNC_ALWAYS :Result:=True;
|
|
WAIT_REG_MEM_FUNC_LESS :Result:=(val<ref);
|
|
WAIT_REG_MEM_FUNC_LESS_EQUAL :Result:=(val<=ref);
|
|
WAIT_REG_MEM_FUNC_EQUAL :Result:=(val=ref);
|
|
WAIT_REG_MEM_FUNC_NOT_EQUAL :Result:=(val<>ref);
|
|
WAIT_REG_MEM_FUNC_GREATER_EQUAL:Result:=(val>=ref);
|
|
WAIT_REG_MEM_FUNC_GREATER :Result:=(val>ref);
|
|
else
|
|
Assert(false,'me_test_mem');
|
|
end;
|
|
end;
|
|
|
|
procedure t_me_wait_addr.add_reg(kq:Pointer);
|
|
begin
|
|
if (Fdmem_addr<>nil) then
|
|
begin
|
|
Fregs_addr:=Fdmem_addr;
|
|
gc_add_internal_ptr(kq,Fregs_addr,@Self);
|
|
end;
|
|
end;
|
|
|
|
procedure t_me_wait_addr.del_reg(kq:Pointer);
|
|
begin
|
|
if (Fregs_addr<>nil) then
|
|
begin
|
|
gc_del_internal_ptr(kq,Fregs_addr);
|
|
Fregs_addr:=nil;
|
|
end;
|
|
end;
|
|
|
|
procedure t_me_wait_addr.set_adr(kq,addr:Pointer);
|
|
begin
|
|
if (Fcode_addr=addr) then Exit;
|
|
|
|
del_reg(kq);
|
|
|
|
Fcode_addr:=addr;
|
|
Fdmem_addr:=get_dmem_ptr(addr);
|
|
end;
|
|
|
|
function SendWarnMsg(const s:RawByteString):Integer;
|
|
begin
|
|
Result:=p_host_ipc.SendSync(HashIpcStr('WARNING'),Length(s)+1,pchar(s));
|
|
end;
|
|
|
|
procedure pm4_WaitRegMem(var ctx:t_me_render_context;node:p_pm4_node_WaitRegMem);
|
|
label
|
|
_repeat,
|
|
_reset;
|
|
var
|
|
wait_addr:p_me_wait_addr;
|
|
begin
|
|
if not ctx.stream^.hint_repeat then
|
|
begin
|
|
ctx.InsertLabel(PChar('WaitRegMem:0x'+HexStr(QWORD(node^.pollAddr),10)));
|
|
ctx.stream^.hint_repeat:=True;
|
|
end;
|
|
|
|
if not ctx.WaitConfirmOrSwitch then Exit;
|
|
|
|
ctx.stream^.hint_repeat:=False;
|
|
|
|
wait_addr:=@ctx.me^.wait_ptr[ctx.stream^.buft];
|
|
|
|
wait_addr^.set_adr(ctx.me^.gc_kqueue,node^.pollAddr);
|
|
|
|
_repeat:
|
|
|
|
if me_test_mem(node,wait_addr^.Fdmem_addr) then
|
|
begin
|
|
ctx.stream^.hint_loop:=0;
|
|
end else
|
|
begin
|
|
wait_addr^.add_reg(ctx.me^.gc_kqueue);
|
|
//
|
|
Inc(ctx.stream^.hint_loop);
|
|
//
|
|
if wait_loop_detect then
|
|
if (ctx.stream^.hint_loop>10000) then
|
|
begin
|
|
//loop detection
|
|
if wait_loop_autoskip then
|
|
begin
|
|
Writeln(stderr,'WaitRegMem hang detected 0x',HexStr(QWORD(node^.pollAddr),10),' -> skip');
|
|
goto _reset;
|
|
end else
|
|
begin
|
|
Writeln(stderr,'WaitRegMem hang detected 0x',HexStr(QWORD(node^.pollAddr),10));
|
|
//
|
|
if SendWarnMsg('Hang in WaitRegMem instruction detected, skip instruction?')=0 then
|
|
begin
|
|
Writeln(stderr,' -> skip');
|
|
goto _reset;
|
|
end else
|
|
begin
|
|
Writeln(stderr,' -> repeat');
|
|
ctx.stream^.hint_loop:=0;
|
|
end;
|
|
//
|
|
end;
|
|
end; //hint_loop
|
|
//
|
|
ctx.switch_task;
|
|
//early check
|
|
if (ctx.me^.sheduler.start=@ctx.me^.stall[ctx.stream^.buft]) then
|
|
begin
|
|
goto _repeat;
|
|
end;
|
|
//
|
|
Exit; //dont reset wait addr
|
|
end;
|
|
|
|
_reset:
|
|
ctx.stream^.hint_loop:=0;
|
|
wait_addr^.set_adr(ctx.me^.gc_kqueue,nil);
|
|
end;
|
|
|
|
//
|
|
|
|
procedure pm4_LoadConstRam(var ctx:t_me_render_context;node:p_pm4_node_LoadConstRam);
|
|
var
|
|
addr_dmem:Pointer;
|
|
|
|
start:DWORD;
|
|
__end:DWORD;
|
|
size :DWORD;
|
|
begin
|
|
//if not ctx.WaitConfirmOrSwitch then Exit;
|
|
|
|
addr_dmem:=get_dmem_ptr(node^.addr);
|
|
|
|
start:=node^.offset;
|
|
__end:=start+(node^.num_dw*SizeOf(DWORD));
|
|
|
|
if (start>CONST_RAM_SIZE) then
|
|
begin
|
|
start:=CONST_RAM_SIZE;
|
|
end;
|
|
|
|
if (__end>CONST_RAM_SIZE) then
|
|
begin
|
|
__end:=CONST_RAM_SIZE;
|
|
end;
|
|
|
|
size:=(__end-start);
|
|
|
|
if p_print_gpu_ops then
|
|
begin
|
|
Writeln('LoadConstRam:0x',HexStr(QWORD(addr_dmem),10),'->[0x',HexStr(start,4),']:size=0x',HexStr(size,6));
|
|
end;
|
|
|
|
Move(addr_dmem^,ctx.me^.CONST_RAM[start],size);
|
|
end;
|
|
|
|
procedure pm4_DumpConstRam(var ctx:t_me_render_context;node:p_pm4_node_LoadConstRam);
|
|
var
|
|
addr_dmem:Pointer;
|
|
|
|
start:DWORD;
|
|
__end:DWORD;
|
|
size :DWORD;
|
|
begin
|
|
//if not ctx.WaitConfirmOrSwitch then Exit;
|
|
|
|
addr_dmem:=get_dmem_ptr(node^.addr);
|
|
|
|
start:=node^.offset;
|
|
__end:=start+(node^.num_dw*SizeOf(DWORD));
|
|
|
|
if (start>CONST_RAM_SIZE) then
|
|
begin
|
|
start:=CONST_RAM_SIZE;
|
|
end;
|
|
|
|
if (__end>CONST_RAM_SIZE) then
|
|
begin
|
|
__end:=CONST_RAM_SIZE;
|
|
end;
|
|
|
|
size:=(__end-start);
|
|
|
|
if p_print_gpu_ops then
|
|
begin
|
|
Writeln('DumpConstRam:[0x',HexStr(start,4),']->0x',HexStr(QWORD(addr_dmem),10),':size=0x',HexStr(size,6));
|
|
end;
|
|
|
|
Move(ctx.me^.CONST_RAM[start],addr_dmem^,size);
|
|
|
|
ctx.BeginCmdBuffer;
|
|
ctx.Cmd.AddPlannedTrigger(QWORD(node^.addr),QWORD(node^.addr)+size,nil);
|
|
end;
|
|
|
|
//
|
|
|
|
procedure pm4_IncrementCE(var ctx:t_me_render_context;node:p_pm4_node);
|
|
begin
|
|
Inc(ctx.me^.CE_COUNT);
|
|
end;
|
|
|
|
procedure pm4_IncrementDE(var ctx:t_me_render_context;node:p_pm4_node);
|
|
begin
|
|
Inc(ctx.me^.DE_COUNT);
|
|
end;
|
|
|
|
procedure pm4_WaitOnCECounter(var ctx:t_me_render_context;node:p_pm4_node);
|
|
begin
|
|
if (ctx.me^.CE_COUNT <= ctx.me^.DE_COUNT) then
|
|
begin
|
|
if p_print_gpu_ops then
|
|
begin
|
|
Writeln('WaitOnCECounter:(',ctx.me^.CE_COUNT,' <= ',ctx.me^.DE_COUNT,')');
|
|
end;
|
|
ctx.switch_task;
|
|
end else
|
|
begin
|
|
if p_print_gpu_ops then
|
|
begin
|
|
Writeln('WaitOnCECounter:(',ctx.me^.CE_COUNT,' > ',ctx.me^.DE_COUNT,')');
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure pm4_WaitOnDECounterDiff(var ctx:t_me_render_context;node:p_pm4_node_WaitOnDECounterDiff);
|
|
var
|
|
diff:DWORD;
|
|
begin
|
|
diff:=node^.diff;
|
|
//force unsigned compare
|
|
if (DWORD(ctx.me^.DE_COUNT - ctx.me^.CE_COUNT) >= diff) then
|
|
begin
|
|
if p_print_gpu_ops then
|
|
begin
|
|
Writeln('WaitOnDECounterDiff:(',ctx.me^.DE_COUNT,' - ',ctx.me^.CE_COUNT,') >= ',diff);
|
|
end;
|
|
ctx.switch_task;
|
|
end else
|
|
begin
|
|
if p_print_gpu_ops then
|
|
begin
|
|
Writeln('WaitOnDECounterDiff:(',ctx.me^.DE_COUNT,' - ',ctx.me^.CE_COUNT,') < ',diff);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure pm4_PfpSyncMe(var ctx:t_me_render_context;node:p_pm4_node_PfpSyncMe);
|
|
begin
|
|
if not ctx.WaitConfirmOrSwitch then Exit;
|
|
|
|
RTLEventSetEvent(node^.event);
|
|
end;
|
|
|
|
//
|
|
|
|
procedure pm4_me_thread(me:p_pm4_me); SysV_ABI_CDecl;
|
|
var
|
|
ctx:t_me_render_context;
|
|
imdone_count:QWORD;
|
|
begin
|
|
sched_prio(curkthread,64);
|
|
|
|
ctx:=Default(t_me_render_context);
|
|
ctx.Init;
|
|
ctx.me:=me;
|
|
|
|
imdone_count:=0;
|
|
|
|
if use_renderdoc_capture then
|
|
begin
|
|
if not IsRenderDocPreLoaded then
|
|
begin
|
|
//disable capture if we are not working with Renderdoc GUI
|
|
use_renderdoc_capture:=False;
|
|
end else
|
|
begin
|
|
renderdoc.LoadRenderDoc;
|
|
renderdoc.UnloadCrashHandler;
|
|
end;
|
|
end;
|
|
|
|
me^.reset_sheduler;
|
|
|
|
repeat
|
|
|
|
//test submit done
|
|
if (me^.imdone_count<>imdone_count) then
|
|
begin
|
|
imdone_count:=me^.imdone_count;
|
|
EndFrameCapture;
|
|
end;
|
|
|
|
//read from queue
|
|
ctx.stream:=nil;
|
|
if me^.queue.Pop(ctx.stream) then
|
|
begin
|
|
me^.add_stream(ctx.stream);
|
|
//
|
|
ctx.stream:=nil;
|
|
end;
|
|
|
|
//get next task
|
|
ctx.stream:=me^.get_next;
|
|
|
|
if (ctx.stream<>nil) then
|
|
begin
|
|
|
|
//start relative timer
|
|
if (ctx.rel_time=0) then
|
|
begin
|
|
ctx.rel_time:=md_rdtsc_unit;
|
|
end;
|
|
//
|
|
|
|
//restore cursor
|
|
ctx.node:=ctx.stream^.curr;
|
|
if (ctx.node=nil) then
|
|
begin
|
|
ctx.node:=ctx.stream^.First;
|
|
ctx.stream^.curr:=ctx.node;
|
|
end;
|
|
|
|
while (ctx.node<>nil) do
|
|
begin
|
|
|
|
if not ctx.stream^.hint_cmds then
|
|
begin
|
|
if p_print_gpu_ops then
|
|
begin
|
|
Writeln('+',ctx.node^.id,':',ctx.node^.ntype);
|
|
end;
|
|
ctx.stream^.hint_cmds:=True;
|
|
end;
|
|
|
|
//wait last stall cmd ???
|
|
//if ctx.WaitConfirm then
|
|
begin
|
|
case ctx.node^.ntype of
|
|
ntHint :pm4_Hint (ctx,Pointer(ctx.node));
|
|
ntDrawIndex2 :pm4_Draw (ctx,Pointer(ctx.node));
|
|
ntDrawIndexOffset2 :pm4_Draw (ctx,Pointer(ctx.node));
|
|
ntDrawIndexAuto :pm4_Draw (ctx,Pointer(ctx.node));
|
|
ntClearDepth :pm4_Draw (ctx,Pointer(ctx.node));
|
|
ntResolve :pm4_Resolve (ctx,Pointer(ctx.node));
|
|
ntFastClear :pm4_FastClear (ctx,Pointer(ctx.node));
|
|
ntDispatchDirect :pm4_DispatchDirect (ctx,Pointer(ctx.node));
|
|
ntDispatchIndirect :pm4_DispatchIndirect (ctx,Pointer(ctx.node));
|
|
ntEventWrite :pm4_EventWrite (ctx,Pointer(ctx.node));
|
|
ntPipeStatDump :pm4_PipeStatDump (ctx,Pointer(ctx.node));
|
|
ntEventWriteEop :pm4_EventWriteEop (ctx,Pointer(ctx.node));
|
|
ntSubmitFlipEop :pm4_SubmitFlipEop (ctx,Pointer(ctx.node));
|
|
ntReleaseMem :pm4_ReleaseMem (ctx,Pointer(ctx.node));
|
|
ntEventWriteEos :pm4_EventWriteEos (ctx,Pointer(ctx.node));
|
|
ntWriteData :pm4_WriteData (ctx,Pointer(ctx.node));
|
|
ntDmaData :pm4_DmaData (ctx,Pointer(ctx.node));
|
|
ntWaitRegMem :pm4_WaitRegMem (ctx,Pointer(ctx.node));
|
|
|
|
ntLoadConstRam :pm4_LoadConstRam (ctx,Pointer(ctx.node));
|
|
ntDumpConstRam :pm4_DumpConstRam (ctx,Pointer(ctx.node));
|
|
|
|
ntIncrementCE :pm4_IncrementCE (ctx,Pointer(ctx.node));
|
|
ntIncrementDE :pm4_IncrementDE (ctx,Pointer(ctx.node));
|
|
ntWaitOnCECounter :pm4_WaitOnCECounter (ctx,Pointer(ctx.node));
|
|
ntWaitOnDECounterDiff:pm4_WaitOnDECounterDiff(ctx,Pointer(ctx.node));
|
|
|
|
ntPfpSyncMe :pm4_PfpSyncMe (ctx,Pointer(ctx.node));
|
|
|
|
else
|
|
begin
|
|
Writeln(stderr,'me:+',ctx.node^.ntype);
|
|
Assert(false,'me:+'+GetEnumName(TypeInfo(t_pm4_node_type),ord(ctx.node^.ntype)));
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
if me^.sheduler.switch then
|
|
begin
|
|
//save position
|
|
ctx.stream^.curr:=ctx.node;
|
|
//Switching to another task
|
|
Break;
|
|
end;
|
|
|
|
//reset hint
|
|
ctx.stream^.hint_cmds:=False;
|
|
|
|
//next command
|
|
ctx.node:=ctx.stream^.Next(ctx.node);
|
|
end;
|
|
|
|
if me^.sheduler.switch then
|
|
begin
|
|
//
|
|
me^.sheduler.switch:=False;
|
|
//Switching to another task
|
|
Continue;
|
|
end else
|
|
begin
|
|
//Complete the task and switch to the next one
|
|
ctx.complete_and_next_task;
|
|
end;
|
|
|
|
//
|
|
me^.remove_stream(ctx.stream);
|
|
ctx.stream:=nil;
|
|
|
|
//
|
|
Continue;
|
|
end;
|
|
|
|
//stall is empty!
|
|
|
|
me^.reset_sheduler;
|
|
|
|
ctx.rel_time:=0; //reset time
|
|
|
|
//TODO: Timeline semaphore
|
|
if not ctx.PingCmd then
|
|
begin
|
|
ctx.on_idle;
|
|
end;
|
|
|
|
//RTLEventWaitFor(me^.event,100);
|
|
me^.wait;
|
|
|
|
until false;
|
|
|
|
end;
|
|
|
|
end.
|
|
|