FPPS4/chip/pm4_me.pas

3780 lines
80 KiB
Plaintext

unit pm4_me;
{$mode ObjFPC}{$H+}
{$CALLING SysV_ABI_CDecl}
interface
uses
sysutils,
TypInfo,
mqueue,
LFQueue,
sys_eventvar,
si_ci_vi_merged_enum,
si_ci_vi_merged_groups,
sys_bootparam,
host_ipc_interface,
md_sleep,
Vulkan,
vDevice,
vMemory,
vBuffer,
vHostBufferManager,
vImage,
vImageManager,
vRender,
vRenderPassManager,
vPipelineManager,
vFramebufferManager,
vShader,
vShaderExt,
vShaderManager,
vRegs2Vulkan,
vCmdBuffer,
vDescriptorSet,
vSampler,
vSamplerManager,
vMetaManager,
vImageTiling,
vDependence,
renderdoc,
sys_event,
time,
md_time,
kern_thr,
sched_ule,
pm4defs,
pm4_stream;
function gc_add_internal_ptr (kq,ptr,udata:Pointer):Integer; register; external;
function gc_del_internal_ptr (kq,ptr:Pointer):Integer; register; external;
procedure gc_wakeup_internal_ptr(ptr:Pointer); register; external;
Const
CONST_RAM_SIZE=48*1024;
type
t_on_submit_flip_eop=function(submit_id:QWORD):Integer;
p_pm4_stall=^t_pm4_stall;
t_pm4_stall=record
next:p_pm4_stall;
//
list:TAILQ_HEAD; //p_pm4_stream
//
count:Ptruint;
flow :Ptruint;
end;
p_me_wait_addr=^t_me_wait_addr;
t_me_wait_addr=object
Fcode_addr:Pointer;
Fdmem_addr:Pointer;
Fregs_addr:Pointer;
//
procedure add_reg(kq:Pointer);
procedure del_reg(kq:Pointer);
procedure set_adr(kq,addr:Pointer);
end;
p_pm4_me=^t_pm4_me;
t_pm4_me=object
//
queue:TIntrusiveMPSCQueue; //p_pm4_stream
//
stall:array[t_pm4_stream_type] of t_pm4_stall;
//
sheduler:record
start :p_pm4_stall;
switch:Boolean;
count :Byte;
end;
//
//event:PRTLEvent;
on_idle:TProcedure;
on_submit_flip_eop:t_on_submit_flip_eop;
//
started:Pointer;
td:p_kthread;
//
gc_knlist:p_knlist;
gc_kqueue:p_kqueue;
//
wait_ptr:array[t_pm4_stream_type] of t_me_wait_addr;
//
imdone_count:QWORD;
//
CONST_RAM:array[0..CONST_RAM_SIZE-1] of Byte; //48KB
//
CE_COUNT:DWORD;
DE_COUNT:DWORD;
//
procedure Init(knlist:p_knlist);
procedure start;
procedure trigger;
procedure wait;
procedure imdone;
procedure knote_eventid(event_id,me_id:Byte;timestamp:QWORD;lockflags:Integer);
procedure Push(var stream:t_pm4_stream);
procedure reset_sheduler;
procedure set_step(s:t_pm4_stream_type);
procedure next_step;
function next_task:Boolean;
procedure switch_task;
procedure add_stream (stream:p_pm4_stream);
function get_next :p_pm4_stream;
procedure remove_stream(stream:p_pm4_stream);
end;
PvCmdFreeNode=^TvCmdFreeNode;
TvCmdFreeNode=record
entry:STAILQ_ENTRY;
FCmd :TVkCommandBuffer;
end;
TvCmdCachedPool=class(TvCmdPool)
FMemCache:STAILQ_HEAD; //PvCmdFreeNode
FDeffered:STAILQ_HEAD; //PvCmdFreeNode
FTrimCount:Integer;
Constructor Create(FFamily:TVkUInt32);
procedure Free(cmd:TVkCommandBuffer); register; override;
procedure Trim; register; override;
end;
t_pool_line=array[0..3] of TvCustomCmdPool;
t_pool_cache=object
queue:TvQueue;
line :t_pool_line;
last :TvCustomCmdPool;
Procedure Init(Q:TvQueue);
function fetch(i:QWORD):TvCustomCmdPool;
procedure trim;
procedure trim_all;
end;
TvStreamCmdBuffer=class(TvCmdBuffer)
entry :TAILQ_ENTRY; //stall
stream:p_pm4_stream;
//
function OnAlloc(size:Ptruint):Pointer; register; override;
Procedure OnFree (P:Pointer ); register; override;
function IsLinearAlloc:Boolean; register; override;
end;
t_me_render_context=object
me :p_pm4_me;
stream :p_pm4_stream;
node :p_pm4_node;
//
rel_time:QWORD;
//
rt_info :p_pm4_rt_info;
Render :TvRenderPassBeginInfo;
//
gfx_pool:t_pool_cache;
//
Cmd :TvStreamCmdBuffer;
stall :array[t_pm4_stream_type] of TAILQ_HEAD; //TvStreamCmdBuffer
//
dep:TvDependenciesObject;
images_size:QWORD;
buffer_size:QWORD;
//
procedure Init;
procedure BeginCmdBuffer;
procedure FinishCmdBuffer;
function CmdStatus(i:t_pm4_stream_type):TVkResult;
function PingCmd:Boolean;
function WaitConfirm:Boolean;
function WaitConfirmOrSwitch:Boolean;
Procedure InsertLabel(pLabelName:PVkChar);
Procedure BeginLabel(pLabelName:PVkChar);
Procedure EndLabel();
//
procedure switch_task;
procedure complete_and_next_task;
procedure on_idle;
Procedure RefToParent(obj:TvRefsObject); register;
procedure FlushParent; register;
end;
var
use_renderdoc_capture:Boolean=False;
act_renderdoc_capture:Boolean=False;
wait_loop_detect :Boolean=True;
wait_loop_autoskip :Boolean=False;
implementation
uses
windows,
kern_dmem,
kern_proc,
vm_map,
vm_tracking_map,
dev_dce;
function GetAsyncKeyState(vKey:longint):Boolean; inline;
begin
Result:=(Windows.GetKeyState(vKey) and $8000)<>0;
end;
procedure StartFrameCapture;
begin
if use_renderdoc_capture then
begin
if GetAsyncKeyState(VK_F1) then
begin
act_renderdoc_capture:=True;
end;
if GetAsyncKeyState(VK_F2) then
begin
act_renderdoc_capture:=False;
end;
if act_renderdoc_capture then
begin
if (renderdoc.IsFrameCapturing()=0) then
begin
SetCaptureOptionU32(eRENDERDOC_Option_RefAllResources,1);
renderdoc.StartFrameCapture(0,0);
end;
end else
begin
if (renderdoc.IsFrameCapturing()<>0) then
begin
renderdoc.EndFrameCapture(0,0);
end;
end;
end;
end;
procedure EndFrameCapture;
begin
if use_renderdoc_capture then
begin
if (renderdoc.IsFrameCapturing()<>0) then
begin
renderdoc.EndFrameCapture(0,0);
end;
end;
end;
procedure t_pm4_me.Init(knlist:p_knlist);
var
i:t_pm4_stream_type;
begin
queue.Create;
for i:=Low(t_pm4_stream_type) to High(t_pm4_stream_type) do
begin
if (i=High(t_pm4_stream_type)) then
begin
stall[i].next:=@stall[Low(t_pm4_stream_type)];
end else
begin
stall[i].next:=@stall[Succ(i)];
end;
//
TAILQ_INIT(@stall[i].list);
end;
gc_knlist:=knlist;
gc_kqueue:=kern_kqueue2('[gc_kqueue]',nil,nil);
gc_add_internal_ptr(gc_kqueue,@queue,@queue);
end;
procedure pm4_me_thread(me:p_pm4_me); SysV_ABI_CDecl; forward;
procedure t_pm4_me.start;
begin
if (XCHG(started,Pointer(1))=nil) then
begin
//event:=RTLEventCreate;
//
kthread_add(@pm4_me_thread,@self,@td,(8*1024*1024) div (16*1024),'[GFX_ME]');
end;
end;
procedure t_pm4_me.trigger;
begin
if (gc_kqueue<>nil) then
begin
gc_wakeup_internal_ptr(@queue);
end;
{
if (event<>nil) then
begin
RTLEventSetEvent(event);
end;
}
end;
procedure t_pm4_me.wait;
var
kev:array[0..15] of t_kevent;
t:timespec;
i,r:Integer;
wait_addr:p_me_wait_addr;
wmin_addr:p_me_wait_addr;
begin
t:=Default(timespec);
t.tv_sec :=0;
t.tv_nsec:=1000000000 div 1000;
r:=0;
if (gc_kqueue<>nil) then
begin
kern_kevent2(gc_kqueue,nil,0,@kev,Length(kev),@t,@r);
end;
wmin_addr:=nil;
if (r<>0) then
For i:=0 to r-1 do
begin
if (kev[i].udata=@queue) then
begin
//
end else
begin
wait_addr:=kev[i].udata;
if (wmin_addr=nil) or
(wmin_addr>wait_addr) then
begin
wmin_addr:=wait_addr
end;
end;
end;
if (wmin_addr<>nil) then
begin
i:=(PtrUint(wmin_addr)-PtrUint(@wait_ptr)) div SizeOf(t_me_wait_addr);
set_step(t_pm4_stream_type(i));
end;
end;
procedure t_pm4_me.imdone;
begin
System.InterlockedIncrement64(imdone_count);
trigger;
end;
procedure t_pm4_me.knote_eventid(event_id,me_id:Byte;timestamp:QWORD;lockflags:Integer);
begin
knote(gc_knlist, event_id or (me_id shl 8) or (timestamp shl 16), lockflags);
end;
procedure t_pm4_me.Push(var stream:t_pm4_stream);
var
node:p_pm4_stream;
buft:t_pm4_stream_type;
begin
if (stream.First=nil) then Exit;
//self alloc
node:=stream.allocator.Alloc(SizeOf(t_pm4_stream));
//
node^:=stream;
//
buft:=stream.buft;
stream:=Default(t_pm4_stream);
stream.buft:=buft;
//
queue.Push(node);
//
start;
//
trigger;
end;
procedure t_pm4_me.reset_sheduler;
begin
//reset stall iterator
sheduler.start :=@stall[Low(t_pm4_stream_type)];
sheduler.switch:=False;
sheduler.count :=0;
end;
procedure t_pm4_me.set_step(s:t_pm4_stream_type);
begin
sheduler.start :=@stall[s];
sheduler.switch:=False;
sheduler.count :=0;
end;
procedure t_pm4_me.next_step;
begin
//next
sheduler.start:=sheduler.start^.next;
//
if (sheduler.start^.flow=0) then
begin
sheduler.start^.flow:=sheduler.start^.count;
end;
end;
function t_pm4_me.next_task:Boolean;
begin
if TAILQ_EMPTY(@sheduler.start^.list) or
(sheduler.start^.flow=0) then
begin
//next
next_step;
//
Result:=True;
end else
begin
Dec(sheduler.start^.flow);
//
Result:=False;
end;
end;
procedure t_pm4_me.switch_task;
begin
sheduler.switch:=True;
//
Inc(sheduler.count);
//
if (sheduler.count=Length(stall)) then
begin
//next
next_step;
//wait
wait;
//msleep_td(hz div 10000);
//
sheduler.count:=0;
end else
begin
//next
next_step;
end;
end;
procedure t_pm4_me.add_stream(stream:p_pm4_stream);
var
i:t_pm4_stream_type;
begin
i:=stream^.buft;
TAILQ_INSERT_TAIL(@stall[i].list,stream,@stream^.next_);
//
Inc(stall[i].count);
//
stream^.Acquire; //stall
end;
function t_pm4_me.get_next:p_pm4_stream;
var
i:t_pm4_stream_type;
begin
for i:=Low(t_pm4_stream_type) to High(t_pm4_stream_type) do
begin
Result:=TAILQ_FIRST(@sheduler.start^.list);
if (Result<>nil) then Break;
//next
next_step;
end;
end;
procedure free_stream(stream:p_pm4_stream);
var
tmp:t_pm4_stream;
begin
tmp:=stream^;
tmp.Free;
end;
procedure t_pm4_me.remove_stream(stream:p_pm4_stream);
var
i:t_pm4_stream_type;
begin
//pop
i:=stream^.buft;
TAILQ_REMOVE(@stall[i].list,stream,@stream^.next_);
//
Dec(stall[i].count);
//
if stream^.Release then //stall
begin
//
free_stream(stream);
end;
end;
//
Constructor TvCmdCachedPool.Create(FFamily:TVkUInt32);
begin
inherited;
STAILQ_INIT(@FMemCache);
STAILQ_INIT(@FDeffered);
end;
procedure TvCmdCachedPool.Free(cmd:TVkCommandBuffer); register;
var
node:PvCmdFreeNode;
begin
if STAILQ_EMPTY(@FMemCache) then
begin
node:=AllocMem(SizeOf(TvCmdFreeNode));
end else
begin
node:=STAILQ_FIRST(@FMemCache);
STAILQ_REMOVE(@FMemCache,node,@node^.entry);
end;
node^.FCmd:=cmd;
STAILQ_INSERT_TAIL(@FDeffered,node,@node^.entry);
end;
procedure TvCmdCachedPool.Trim; register;
var
node:PvCmdFreeNode;
begin
node:=STAILQ_FIRST(@FDeffered);
while (node<>nil) do
begin
STAILQ_REMOVE(@FDeffered,node,@node^.entry);
inherited Free(node^.FCmd);
STAILQ_INSERT_TAIL(@FMemCache,node,@node^.entry);
//
node:=STAILQ_FIRST(@FDeffered);
end;
Inc(FTrimCount);
if (FTrimCount>=5000) then
begin
FTrimCount:=0;
inherited Trim;
end;
end;
//
Procedure t_pool_cache.Init(Q:TvQueue);
begin
queue:=Q;
end;
function t_pool_cache.fetch(i:QWORD):TvCustomCmdPool;
var
p:Byte;
begin
p:=i mod Length(t_pool_line);
if (line[p]=nil) then
begin
line[p]:=TvCmdCachedPool.Create(queue.FFamily);
end;
if (last<>line[p]) then
begin
last:=line[p];
last.Trim;
end;
Result:=last;
end;
procedure t_pool_cache.trim;
begin
if (last<>nil) then
begin
last.Trim;
end;
end;
procedure t_pool_cache.trim_all;
var
i:Byte;
begin
For i:=0 to High(t_pool_line) do
begin
line[i].Trim;
end;
end;
//
function TvStreamCmdBuffer.OnAlloc(size:Ptruint):Pointer; register;
begin
Result:=stream^.allocator.Alloc(size);
FillChar(Result^,size,0);
end;
Procedure TvStreamCmdBuffer.OnFree(P:Pointer); register;
begin
//
end;
function TvStreamCmdBuffer.IsLinearAlloc:Boolean; register;
begin
Result:=True;
end;
procedure t_me_render_context.RefToParent(obj:TvRefsObject); register;
begin
if (dep=nil) then
begin
dep:=TvDependenciesObject.Create;
end;
dep.RefTo(obj);
//
if obj.InheritsFrom(TvCustomImage) then
begin
images_size:=images_size+TvCustomImage(obj).FSize;
end;
if obj.InheritsFrom(TvBuffer) then
begin
buffer_size:=buffer_size+TvBuffer(obj).FSize;
end;
end;
procedure t_me_render_context.FlushParent; register;
begin
if (dep<>nil) then
begin
dep.ReleaseAllDependencies(dep);
end;
images_size:=0;
buffer_size:=0;
end;
//
procedure t_me_render_context.Init;
var
i:t_pm4_stream_type;
begin
gfx_pool.Init(RenderQueue);
for i:=Low(t_pm4_stream_type) to High(t_pm4_stream_type) do
begin
TAILQ_INIT(@stall[i]);
end;
end;
procedure t_me_render_context.BeginCmdBuffer;
var
buft:t_pm4_stream_type;
imdone_count:QWORD;
Pool:TvCustomCmdPool;
begin
if (Cmd<>nil) then Exit; //Already allocated
buft:=stream^.buft;
//Select Vulkan compute only queue?
imdone_count:=me^.imdone_count;
Pool:=gfx_pool.fetch(imdone_count);
Cmd:=TvStreamCmdBuffer.Create(Pool,gfx_pool.queue);
Cmd.stream:=stream;
stream^.Acquire; //TvStreamCmdBuffer
end;
procedure free_cmd_buffer(cmd:TvStreamCmdBuffer);
var
stream:p_pm4_stream;
begin
stream:=cmd.stream;
//
cmd.ReleaseResource;
cmd.Free;
//
if stream^.Release then //TvStreamCmdBuffer
begin
free_stream(stream);
end;
end;
procedure pm4_Writeback_Finish(var ctx:t_me_render_context); forward;
//
procedure t_me_render_context.FinishCmdBuffer;
var
buft:t_pm4_stream_type;
r:TVkResult;
begin
if (Cmd=nil) then Exit;
pm4_Writeback_Finish(Self);
r:=Cmd.QueueSubmit;
if p_print_gpu_ops then
begin
Writeln('QueueSubmit:',r);
end;
if (r<>VK_SUCCESS) then
begin
EndFrameCapture;
PrintMemoryBudget;
Assert(false,'QueueSubmit');
end;
r:=Cmd.Status;
case r of
VK_SUCCESS :;
VK_NOT_READY:
begin
//insert
buft:=Cmd.stream^.buft;
TAILQ_INSERT_TAIL(@stall[buft],Cmd,@Cmd.entry);
Cmd:=nil;
Exit;
end;
else
Writeln(stderr,'last.Status=',r); //error
end;
free_cmd_buffer(Cmd);
Cmd:=nil;
end;
function t_me_render_context.CmdStatus(i:t_pm4_stream_type):TVkResult;
var
last:TvStreamCmdBuffer;
begin
last:=TvStreamCmdBuffer(TAILQ_FIRST(@stall[i]));
while (last<>nil) do
begin
Result:=last.Status;
case Result of
VK_SUCCESS :;
VK_NOT_READY:Exit;
else
Writeln(stderr,'last.Status=',Result); //error
end;
TAILQ_REMOVE(@stall[i],last,@last.entry);
free_cmd_buffer(last);
last:=TvStreamCmdBuffer(TAILQ_FIRST(@stall[i]));
end;
Result:=VK_SUCCESS;
end;
function t_me_render_context.PingCmd:Boolean;
var
i:t_pm4_stream_type;
begin
Result:=False;
for i:=Low(t_pm4_stream_type) to High(t_pm4_stream_type) do
begin
Result:=Result or (CmdStatus(i)=VK_NOT_READY);
end;
end;
function t_me_render_context.WaitConfirm:Boolean;
begin
gfx_pool.trim;
FinishCmdBuffer;
Result:=(CmdStatus(stream^.buft)<>VK_NOT_READY);
end;
function t_me_render_context.WaitConfirmOrSwitch:Boolean;
begin
gfx_pool.trim;
FinishCmdBuffer;
if (stream=nil) then Exit(True);
Result:=(CmdStatus(stream^.buft)<>VK_NOT_READY);
if not Result then
begin
switch_task;
end;
end;
Procedure t_me_render_context.InsertLabel(pLabelName:PVkChar);
begin
if (DebugReport.FCmdInsertDebugUtilsLabel=nil) then Exit;
if (Cmd=nil) then Exit;
BeginCmdBuffer;
Cmd.InsertLabel(pLabelName);
end;
Procedure t_me_render_context.BeginLabel(pLabelName:PVkChar);
begin
if (DebugReport.FCmdBeginDebugUtilsLabel=nil) then Exit;
if (Cmd=nil) then Exit;
BeginCmdBuffer;
Cmd.BeginLabel(pLabelName);
end;
Procedure t_me_render_context.EndLabel();
begin
if (Cmd=nil) then Exit;
Cmd.EndLabel();
end;
procedure t_me_render_context.switch_task;
begin
FinishCmdBuffer;
//
me^.switch_task;
end;
procedure t_me_render_context.complete_and_next_task;
begin
FinishCmdBuffer;
//
me^.next_task;
end;
procedure t_me_render_context.on_idle;
begin
if (me^.on_idle<>nil) then
begin
me^.on_idle();
end;
end;
//
function GetMixedFlag(const curr:t_pm4_usage):Byte;
begin
if (PopCnt(DWORD(curr.img_usage))>1) then
begin
Result:=TM_MIXED;
end else
begin
Result:=0;
end;
end;
function GetImageLayout(const curr:t_pm4_usage):TVkImageLayout;
begin
if (PopCnt(DWORD(curr.img_usage))>1) then
begin
Result:=VK_IMAGE_LAYOUT_GENERAL;
end else
case t_image_usage(BsfDWord(DWORD(curr.img_usage))) of
iu_attachment:
begin
Result:=VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
end;
iu_depthstenc:
begin
if ((curr.shd_usage and (TM_WRITE or TM_CLEAR))<>0) then
begin
Result:=VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
end else
begin
Result:=VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
end;
end;
iu_sampled,
iu_storage:
begin
if ((curr.shd_usage and (TM_WRITE or TM_CLEAR))<>0) then
begin
Result:=VK_IMAGE_LAYOUT_GENERAL;
end else
begin
Result:=VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
end;
end;
iu_transfer:
begin
//mem_usage ???
if ((curr.mem_usage and (TM_WRITE or TM_READ))=(TM_WRITE or TM_READ)) then
begin
Result:=VK_IMAGE_LAYOUT_GENERAL;
end else
if ((curr.mem_usage and TM_WRITE)<>0) then
begin
Result:=VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
end else
begin
Result:=VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
end;
end;
else
Result:=VK_IMAGE_LAYOUT_UNDEFINED;
end;
end;
function ConvertRW(IMAGE_USAGE:Byte;R,W:TVkAccessFlagBits):TVkAccessFlags; inline;
begin
Result:=(ord(R)*ord((IMAGE_USAGE and TM_READ )<>0) ) or
(ord(W)*ord((IMAGE_USAGE and (TM_WRITE or TM_CLEAR))<>0) );
end;
function GetAccessMaskImg(const curr:t_pm4_usage):TVkAccessFlags;
begin
Result:=
ConvertRW(curr.shd_usage,VK_ACCESS_SHADER_READ_BIT ,VK_ACCESS_SHADER_WRITE_BIT ) or
ConvertRW(curr.clr_usage,VK_ACCESS_COLOR_ATTACHMENT_READ_BIT ,VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT ) or
ConvertRW(curr.dsa_usage,VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT,VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
end;
function GetAccessMaskBuf(const curr:t_pm4_usage):TVkAccessFlags;
begin
Result:=ConvertRW(curr.mem_usage,VK_ACCESS_SHADER_READ_BIT,VK_ACCESS_SHADER_WRITE_BIT);
end;
function GetStageMask(BindPoint:TVkPipelineBindPoint):TVkPipelineStageFlags;
begin
case BindPoint of
BP_GRAPHICS:Result:=ord(VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT);
BP_COMPUTE :Result:=ord(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
else Result:=ord(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
end;
end;
function AlignDw(addr:PtrUInt;alignment:PtrUInt):PtrUInt; inline;
begin
Result:=addr-(addr mod alignment);
end;
const
VK_ACCESS_BUF_ANY=ord(VK_ACCESS_MEMORY_READ_BIT) or ord(VK_ACCESS_MEMORY_WRITE_BIT);
VK_STAGE_BUF_ANY =ord(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
function _FetchImageForce(var ctx:t_me_render_context;const F:TvImageKey;usage:s_image_usage):TvImage2;
begin
repeat
Result:=FetchImage(ctx.Cmd,F,usage);
if (Result=nil) then
begin
repeat
msleep_td(hz div 10000);
until ctx.WaitConfirm;
ctx.BeginCmdBuffer;
end;
until (Result<>nil);
end;
function ConvertImage(var ctx:t_me_render_context;usage:s_image_usage;src:TvImage2;ToFormat:TVkFormat):TvImage2;
var
F:TvImageKey;
dst:TvImage2;
range:TVkImageCopy;
range_all:array[0..15] of TVkImageCopy;
i,m:Integer;
begin
Assert(src<>nil);
F:=src.key;
F.cformat:=ToFormat;
if not ExtractImage(src) then
begin
Assert(false,'ExtractImage');
end;
dst:=_FetchImageForce(ctx,F,usage);
src.PushBarrier(ctx.cmd,
ord(VK_ACCESS_TRANSFER_READ_BIT),
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
dst.PushBarrier(ctx.cmd,
ord(VK_ACCESS_TRANSFER_WRITE_BIT),
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
//
range.srcSubresource.aspectMask :=GetAspectMaskByFormat(src.key.cformat);
range.srcSubresource.mipLevel :=0;
range.srcSubresource.baseArrayLayer:=0;
range.srcSubresource.layerCount :=src.key.params.layerCount;
range.srcOffset.x:=0;
range.srcOffset.y:=0;
range.srcOffset.z:=0;
range.dstSubresource.aspectMask :=GetAspectMaskByFormat(dst.key.cformat);
range.dstSubresource.mipLevel :=0;
range.dstSubresource.baseArrayLayer:=0;
range.dstSubresource.layerCount :=dst.key.params.layerCount;
range.dstOffset.x:=0;
range.dstOffset.y:=0;
range.dstOffset.z:=0;
range.extent.width :=src.key.params.width;
range.extent.height:=src.key.params.height;
range.extent.depth :=src.key.params.depth;
//
m:=src.key.params.mipLevels;
For i:=0 to m-1 do
begin
range_all[i]:=range;
range_all[i].srcSubresource.mipLevel:=i;
range_all[i].dstSubresource.mipLevel:=i;
end;
ctx.Cmd.CopyImage(
src.FHandle,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
dst.FHandle,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
m,
@range_all[0]
);
Result:=dst;
end;
function FetchImageForce(var ctx:t_me_render_context;const F:TvImageKey;usage:s_image_usage):TvImage2;
begin
Result:=_FetchImageForce(ctx,F,usage);
//function ExtractImage(img:TvImage2):Boolean;
Assert(Result<>nil);
//TODO: more general type compatibility checking
case Result.FFormat of
//
VK_FORMAT_R32_UINT,
VK_FORMAT_R32_SINT,
VK_FORMAT_R32_SFLOAT:
if (iu_depthstenc in usage) then
begin
//R32 -> D32
Result:=ConvertImage(ctx,usage,Result,VK_FORMAT_D32_SFLOAT);
end;
//
VK_FORMAT_R16_UNORM,
VK_FORMAT_R16_SNORM,
VK_FORMAT_R16_UINT,
VK_FORMAT_R16_SINT,
VK_FORMAT_R16_SFLOAT:
if (iu_depthstenc in usage) then
begin
//R16 -> D16
Result:=ConvertImage(ctx,usage,Result,VK_FORMAT_D16_UNORM);
end;
//
VK_FORMAT_D32_SFLOAT:
if (iu_storage in usage) then
begin
//D32 -> R32
Result:=ConvertImage(ctx,usage,Result,VK_FORMAT_R32_SFLOAT);
end;
VK_FORMAT_D16_UNORM:
if (iu_storage in usage) then
begin
//D16 -> R16
Result:=ConvertImage(ctx,usage,Result,VK_FORMAT_R16_UNORM);
end
//
else;
end;
ctx.RefToParent(Result);
end;
procedure Prepare_Uniforms(var ctx:t_me_render_context;
BindPoint:TVkPipelineBindPoint;
var UniformBuilder:TvUniformBuilder);
var
i:Integer;
ri:TvImage2;
buf:TvHostBuffer;
diff_u:TVkDeviceSize;
diff_a:TVkDeviceSize;
resource_instance:p_pm4_resource_instance;
b:Boolean;
begin
//Writeln('[Prepare_Uniforms]->');
if (Length(UniformBuilder.FImages)<>0) then
begin
For i:=0 to High(UniformBuilder.FImages) do
With UniformBuilder.FImages[i] do
begin
if (FImage.params.invalid<>0) then
begin
//skip
Continue;
end;
resource_instance:=ctx.node^.scope.find_image_resource_instance(FImage);
if (resource_instance=nil) then
begin
case btype of
vbSampled:
begin
resource_instance:=ctx.stream^.insert_image_resource(
@ctx.node^.scope,
FImage,
memuse,
[iu_sampled],
'Prepare_Uniforms');
end;
vbStorage,
vbMipStorage:
begin
resource_instance:=ctx.stream^.insert_image_resource(
@ctx.node^.scope,
FImage,
memuse,
[iu_storage],
'Prepare_Uniforms');
end;
else
Assert(false);
end;
end;
Assert(resource_instance<>nil);
if not resource_instance^.prepared then
begin
resource_instance^.prepared:=true;
//ri:=TvImage2(resource_instance^.resource^.rimage);
ri:=nil;
if (ri<>nil) then
begin
ctx.Cmd.RefTo(ri);
end;
if (ri<>nil) then
if (ri.is_invalid) then
begin
resource_instance^.resource^.rimage:=nil;
ri:=nil;
end;
if (ri=nil) then
begin
ri:=FetchImageForce(ctx,
FImage,
resource_instance^.curr.img_usage);
resource_instance^.resource^.rimage:=ri;
end;
//Writeln(GetVkFormatStr(ri.key.cformat));
repeat
b:=pm4_load_from(ctx.Cmd,ri,resource_instance^.curr.mem_usage);
if (not b) then
begin
repeat until ctx.WaitConfirm;
ctx.BeginCmdBuffer;
end;
until (b);
ri.PushBarrier(ctx.Cmd,
GetAccessMaskImg(resource_instance^.curr),
GetImageLayout(resource_instance^.curr),
GetStageMask(BindPoint));
end;
end;
end;
//Buffers
//buffers
if (Length(UniformBuilder.FBuffers)<>0) then
begin
For i:=0 to High(UniformBuilder.FBuffers) do
With UniformBuilder.FBuffers[i] do
if (memuse and TM_INVAL)=0 then
begin
resource_instance:=ctx.node^.scope.find_buffer_resource_instance(R_BUF,addr,size);
if (resource_instance=nil) then
begin
resource_instance:=ctx.stream^.insert_buffer_resource(
@ctx.node^.scope,
R_BUF,
addr,
size,
memuse,
'Prepare_Uniforms');
end;
Assert(resource_instance<>nil);
if not resource_instance^.prepared then
begin
resource_instance^.prepared:=true;
//buf:=TvHostBuffer(resource_instance^.resource^.rimage);
buf:=nil;
if (buf<>nil) then
begin
ctx.Cmd.RefTo(buf);
end;
if (buf<>nil) then
if (buf.is_invalid) then
begin
resource_instance^.resource^.rimage:=nil;
buf:=nil;
end;
if (buf=nil) then
begin
repeat
buf:=FetchHostBuffer(ctx.Cmd,QWORD(addr),size);
if (buf=nil) then
begin
repeat until ctx.WaitConfirm;
ctx.BeginCmdBuffer;
end;
until (buf<>nil);
Assert(buf<>nil);
ctx.RefToParent(buf);
resource_instance^.resource^.rimage:=buf;
diff_u:=QWORD(addr)-buf.FAddr;
diff_a:=AlignDw(diff_u,limits.minStorageBufferOffsetAlignment);
//TODO: Barrier state cache
ctx.Cmd.BufferMemoryBarrier(buf.FHandle,
VK_ACCESS_BUF_ANY,
GetAccessMaskBuf(resource_instance^.curr),
diff_a,size,
VK_STAGE_BUF_ANY,
GetStageMask(BindPoint)
);
end;
end;
end;
end;
//buffers
//Writeln('<-[Prepare_Uniforms]');
end;
procedure BindMipStorage(var ctx:t_me_render_context;
fset,bind:TVkUInt32;
DescriptorGroup:TvDescriptorInterface;
ri:TvImage2;
const FView:TvImageViewKey;
Layout:TVkImageLayout);
var
i,p:Integer;
iv:TvImageView2;
aiv:array[0..15] of TVkImageView;
MView:TvImageViewKey;
begin
if (ri=nil) then
begin
For i:=0 to 15 do
begin
aiv[i]:=VK_NULL_HANDLE;
end;
end else
begin
p:=0;
For i:=FView.base_level to FView.last_level do
begin
MView:=FView;
MView.base_level:=i;
MView.last_level:=i;
//
iv:=ri.FetchView(ctx.Cmd,MView,iu_storage);
aiv[p]:=iv.FHandle;
//
Inc(p);
end;
//fill by 16?
while (p<16) do
begin
aiv[p]:=iv.FHandle;
//
Inc(p);
end;
end;
DescriptorGroup.BindStorages(fset,bind,
0,p,
@aiv[0],
Layout);
end;
Function get_bind_str(FBind:TvPointer):RawByteString;
begin
if (FBind.FMemory=nil) then
begin
Result:='(nil)';
end else
begin
Result:='0x'+HexStr(FBind.FMemory.FHandle,16);
end;
end;
procedure Bind_Uniforms(var ctx:t_me_render_context;
BindPoint:TVkPipelineBindPoint;
var UniformBuilder:TvUniformBuilder);
var
i:Integer;
DescriptorGroup:TvDescriptorInterface;
ri:TvImage2;
iv:TvImageView2;
sm:TvSampler;
buf:TvHostBuffer;
diff_u:TVkDeviceSize;
diff_a:TVkDeviceSize;
align :TVkDeviceSize;
range :TVkDeviceSize;
resource_instance:p_pm4_resource_instance;
Layout:TVkImageLayout;
begin
DescriptorGroup:=ctx.Cmd.FetchDescriptorInterface(BindPoint);
//images
if (Length(UniformBuilder.FImages)<>0) then
begin
For i:=0 to High(UniformBuilder.FImages) do
With UniformBuilder.FImages[i] do
begin
if (FImage.params.invalid<>0) then
begin
if (limits.nullDescriptor<>VK_TRUE) then
begin
Assert(false,'unsupported nullDescriptor');
end;
case btype of
vbSampled:
begin
DescriptorGroup.BindImage(fset,bind,
VK_NULL_HANDLE,
VK_IMAGE_LAYOUT_GENERAL);
end;
vbStorage:
begin
DescriptorGroup.BindStorage(fset,bind,
VK_NULL_HANDLE,
VK_IMAGE_LAYOUT_GENERAL);
end;
vbMipStorage:
begin
BindMipStorage(ctx,
fset,bind,
DescriptorGroup,
nil,
FView,
VK_IMAGE_LAYOUT_GENERAL);
end;
else
Assert(false);
end;
end else
begin
resource_instance:=ctx.node^.scope.find_image_resource_instance(FImage);
Assert(resource_instance<>nil);
//ri:=TvImage2(resource_instance^.resource^.rimage);
ri:=FetchImage(ctx.Cmd,
FImage,
resource_instance^.curr.img_usage
);
Assert(ri<>nil);
Layout:=GetImageLayout(resource_instance^.curr);
case btype of
vbSampled:
begin
iv:=ri.FetchView(ctx.Cmd,FView,iu_sampled);
Assert(iv<>nil);
Writeln('BindImage:->[',i,']'#13#10,
' 0x',HexStr(ri.FHandle,16),':',GetVkFormatStr(ri.key.cformat),':',ri.FName,'->'#13#10,
' 0x',HexStr(iv.FHandle,16),':',GetVkFormatStr(iv.key.cformat),':',iv.FName);
DescriptorGroup.BindImage(fset,bind,
iv.FHandle,
Layout);
end;
vbStorage:
begin
//reset dst_sel
FView.dstSel:=Default(TvDstSel);
//
iv:=ri.FetchView(ctx.Cmd,FView,iu_storage);
Assert(iv<>nil);
Writeln('BindStorage:->[',i,']'#13#10,
' 0x',HexStr(ri.FHandle,16),':',ri.key.cformat,':',ri.FName,'->'#13#10,
' 0x',HexStr(iv.FHandle,16),':',iv.key.cformat,':',iv.FName);
DescriptorGroup.BindStorage(fset,bind,
iv.FHandle,
Layout);
end;
vbMipStorage:
begin
//reset dst_sel
FView.dstSel:=Default(TvDstSel);
//
BindMipStorage(ctx,
fset,bind,
DescriptorGroup,
ri,
FView,
Layout);
end;
else
Assert(false);
end;
end;
end;
end;
//images
//samplers
if (Length(UniformBuilder.FSamplers)<>0) then
begin
For i:=0 to High(UniformBuilder.FSamplers) do
With UniformBuilder.FSamplers[i] do
begin
sm:=FetchSampler(ctx.Cmd,PS);
DescriptorGroup.BindSampler(fset,bind,sm.FHandle);
end;
end;
//samplers
//buffers
if (Length(UniformBuilder.FBuffers)<>0) then
begin
For i:=0 to High(UniformBuilder.FBuffers) do
With UniformBuilder.FBuffers[i] do
if (memuse and TM_INVAL)=0 then
begin
resource_instance:=ctx.node^.scope.find_buffer_resource_instance(R_BUF,addr,size);
{
if (resource_instance<>nil) then
begin
Writeln('rb:curr:',HexStr(resource_instance^.curr.mem_usage,1),
' prev:',HexStr(resource_instance^.prev.mem_usage,1),
' next:',HexStr(resource_instance^.next.mem_usage,1)
);
end;
}
buf:=FetchHostBuffer(ctx.Cmd,QWORD(addr),size);
Assert(buf<>nil);
diff_u:=QWORD(addr)-buf.FAddr;
diff_a:=AlignDw(diff_u,limits.minStorageBufferOffsetAlignment);
align:=diff_u-diff_a;
if (align<>offset) then
begin
Assert(false,'wrong buffer align '+IntToStr(align)+'<>'+IntToStr(offset));
end;
range:=size;
Writeln('BindBuffer:->[',i,':',bind,']',' 0x',HexStr(QWORD(addr),10),' ',get_bind_str(buf.FBind),#13#10,
' 0x',HexStr(buf.FHandle,16),':',buf.FName,'->[',diff_a,'..',diff_a+range,']');
DescriptorGroup.BindBuffer(fset,bind,
buf.FHandle,
diff_a,
range {VK_WHOLE_SIZE});
if ((memuse and TM_WRITE)<>0) then
begin
ctx.Cmd.AddPlannedTrigger(QWORD(addr),QWORD(addr)+size,nil);
end;
end;
end;
//buffers
end;
procedure Bind_Pushs(var ctx:t_me_render_context;
ShaderGroup:TvShaderGroup;
dst:PGPU_USERDATA);
const
bind_points:array[Boolean] of TVkPipelineBindPoint=(VK_PIPELINE_BIND_POINT_GRAPHICS,VK_PIPELINE_BIND_POINT_COMPUTE);
var
Shader:TvShaderExt;
i:TvShaderStage;
FData:PDWORD;
addr:Pointer;
begin
For i:=Low(TvShaderStage) to High(TvShaderStage) do
begin
Shader:=ShaderGroup.FKey.FShaders[i];
if (Shader<>nil) then
if (Shader.FPushConst.size<>0) then
begin
FData:=dst^.get_user_data(i);
addr :=Shader.GetPushConstData(FData);
Assert(addr<>nil,'push const is NULL');
ctx.Cmd.PushConstant(bind_points[Shader.FStage=VK_SHADER_STAGE_COMPUTE_BIT],
ord(Shader.FStage),
Shader.FPushConst.offset,
Shader.FPushConst.size,
addr);
end;
end;
end;
procedure pm4_InitStream(var ctx:t_me_render_context);
var
i:p_pm4_resource_instance;
resource:p_pm4_resource;
ri:TvImage2;
ht:TvMetaHtile;
hc:TvMetaCmask;
begin
if ctx.stream^.init then Exit;
i:=ctx.stream^.init_scope.first;
if (i=nil) then Exit;
while (i<>nil) do
begin
resource:=i^.resource;
if (resource^.rtype=R_IMG) and
(not resource^.rcombined) then
begin
//start on demaind
StartFrameCapture;
ctx.BeginCmdBuffer;
//
//Writeln('init_img:',HexStr(resource^.rkey.Addr),' ',(resource^.rkey.params.width),'x',(resource^.rkey.params.height));
//now preload only sampled image
if (resource^.uall.img_usage=[iu_sampled]) then
begin
ri:=FetchImage(ctx.Cmd,
resource^.rkey,
i^.curr.img_usage + i^.next.img_usage
);
if (ri=nil) then
begin
//NO MEM
Break;
end;
resource^.rimage:=ri;
//pm4_load_from(ctx.Cmd,ri,i^.curr.mem_usage);
end;
end else
if (resource^.rtype=R_HTILE) then
begin
//start on demaind
ctx.BeginCmdBuffer;
ht:=FetchHtile(ctx.Cmd,resource^.rkey,resource^.rsize);
resource^.rclear:=ht.rclear;
end else
if (resource^.rtype=R_CMASK) then
begin
//start on demaind
ctx.BeginCmdBuffer;
hc:=FetchCmask(ctx.Cmd,resource^.rkey,resource^.rsize);
resource^.rclear:=hc.rclear;
end;
i:=TAILQ_NEXT(i,@i^.init_entry);
end;
ctx.stream^.init:=True;
end;
procedure pm4_ClearDepth(var rt_info:t_pm4_rt_info;
var ctx:t_me_render_context);
var
ri:TvImage2;
iv:TvImageView2;
cclear:array[0..1] of Boolean;
range :TVkImageSubresourceRange;
begin
//ClearDepthTarget
ctx.Cmd.EndRenderPass;
ctx.Cmd.BeginLabel('ClearDepth');
ri:=FetchImageForce(ctx,
rt_info.DB_INFO.FImageInfo,
[iu_depthstenc]);
Assert(ri<>nil);
iv:=ri.FetchView(ctx.Cmd,rt_info.DB_INFO.FImageView,iu_depthstenc);
ctx.RefToParent(ri);
ri.PushBarrier(ctx.Cmd,
ord(VK_ACCESS_TRANSFER_WRITE_BIT),
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
cclear[0]:=((rt_info.DB_INFO.DEPTH_USAGE and TM_CLEAR)<>0) and
(GetDepthOnlyFormat (ri.key.cformat)<>VK_FORMAT_UNDEFINED);
cclear[1]:=((rt_info.DB_INFO.STENCIL_USAGE and TM_CLEAR)<>0) and
(GetStencilOnlyFormat(ri.key.cformat)<>VK_FORMAT_UNDEFINED);
range:=iv.GetSubresRange;
range.aspectMask:=(ord(VK_IMAGE_ASPECT_DEPTH_BIT )*ord(cclear[0])) or
(ord(VK_IMAGE_ASPECT_STENCIL_BIT)*ord(cclear[1]));
ctx.Cmd.ClearDepthStencilImage(ri.FHandle,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
@rt_info.DB_INFO.CLEAR_VALUE.depthStencil,
range);
ctx.Cmd.EndLabel();
ctx.FlushParent;
end;
procedure DumpShaderGroup(ShaderGroup:TvShaderGroup);
var
i:TvShaderStage;
str:RawByteString;
begin
str:='[DumpShaderGroup]'#13#10;
For i:=Low(TvShaderStage) to High(TvShaderStage) do
if (ShaderGroup.FKey.FShaders[i]<>nil) then
begin
str:=str+' ('+HexStr(ShaderGroup.FKey.FShaders[i].FHash_gcn,16)+') '+GetDumpSpvName(i,ShaderGroup.FKey.FShaders[i].FHash_spv)+#13#10;
end;
Writeln(stderr,str);
end;
procedure pm4_DrawPrepare(var ctx:t_me_render_context);
var
i:Integer;
FAttrBuilder:TvAttrBuilder;
FUniformBuilder:TvUniformBuilder;
RP_KEY:TvRenderPassKey;
RP:TvRenderPass2;
GP_KEY:TvGraphicsPipelineKey;
GP:TvGraphicsPipeline2;
FB_KEY:TvFramebufferImagelessKey;
FB_KEY2:TvFramebufferBindedKey;
FB:TvFramebuffer;
ri:TvImage2;
rd:TvCustomImage2;
rs:TvCustomImage2;
iv:TvImageView2;
color_instance:array[0..7] of p_pm4_resource_instance;
flag:Integer;
img_usage:s_image_usage;
meta_instance:p_pm4_resource_instance;
d_instance:p_pm4_resource_instance;
s_instance:p_pm4_resource_instance;
//
GPU_REGS:TGPU_REGS;
CX_REG :TCONTEXT_REG_GROUP; // 0xA000
pa:TPushConstAllocator;
pp:PPushConstAllocator;
begin
//recheck shaders
GPU_REGS.SG_REG:=@ctx.rt_info^.SHADERDATA.SG_REG;
GPU_REGS.CX_REG:=@CX_REG;
GPU_REGS.UC_REG:=@ctx.rt_info^.SHADERDATA.UC_REG;
CX_REG:=Default(TCONTEXT_REG_GROUP);
CX_REG.SPI_PS_INPUT_ENA :=ctx.rt_info^.SHADERDATA.SPI_PS_INPUT_ENA ;
CX_REG.SPI_PS_INPUT_ADDR :=ctx.rt_info^.SHADERDATA.SPI_PS_INPUT_ADDR ;
CX_REG.SPI_INTERP_CONTROL_0 :=ctx.rt_info^.SHADERDATA.SPI_INTERP_CONTROL_0 ;
CX_REG.SPI_PS_IN_CONTROL :=ctx.rt_info^.SHADERDATA.SPI_PS_IN_CONTROL ;
CX_REG.SPI_PS_INPUT_CNTL :=ctx.rt_info^.SHADERDATA.SPI_PS_INPUT_CNTL ;
CX_REG.DB_SHADER_CONTROL :=ctx.rt_info^.SHADERDATA.DB_SHADER_CONTROL ;
CX_REG.VGT_INSTANCE_STEP_RATE_0:=ctx.rt_info^.SHADERDATA.VGT_INSTANCE_STEP_RATE_0;
CX_REG.VGT_INSTANCE_STEP_RATE_1:=ctx.rt_info^.SHADERDATA.VGT_INSTANCE_STEP_RATE_1;
CX_REG.RENDER_TARGET :=ctx.rt_info^.SHADERDATA.RENDER_TARGET ;
pa.Init;
pp:=@pa;
ctx.rt_info^.ShaderGroup:=FetchShaderGroupRT(GPU_REGS,pp);
Assert(ctx.rt_info^.ShaderGroup<>nil);
//recheck shaders
RP_KEY.Clear;
if (ctx.rt_info^.RT_COUNT<>0) then
For i:=0 to ctx.rt_info^.RT_COUNT-1 do
begin
if (ctx.rt_info^.RT_INFO[i].CMASK_INFO.KEY.Addr<>nil) then
begin
meta_instance:=ctx.node^.scope.find_buffer_resource_instance(R_CMASK,
ctx.rt_info^.RT_INFO[i].CMASK_INFO.KEY.Addr,
ctx.rt_info^.RT_INFO[i].CMASK_INFO.SIZE);
Assert(meta_instance<>nil);
if meta_instance^.resource^.rclear then
begin
//-TM_READ +TM_CLEAR
ctx.rt_info^.RT_INFO[i].IMAGE_USAGE:=ctx.rt_info^.RT_INFO[i].IMAGE_USAGE and (not TM_READ) or TM_CLEAR;
meta_instance^.resource^.rclear:=False;
end;
end;
if (ctx.rt_info^.RT_INFO[i].FImageInfo.params.invalid<>0) then
begin
//skip
color_instance[i]:=nil;
end else
begin
color_instance[i]:=ctx.node^.scope.find_image_resource_instance(ctx.rt_info^.RT_INFO[i].FImageInfo);
Assert(color_instance[i]<>nil);
end;
//TODO: fixup cformat
flag:=0;
if (color_instance[i]<>nil) then
begin
flag:=GetMixedFlag(color_instance[i]^.curr);
end;
//TODO: fixup cformat
RP_KEY.AddColorAt(ctx.rt_info^.RT_INFO[i].attachment,
ctx.rt_info^.RT_INFO[i].FImageInfo.cformat,
ctx.rt_info^.RT_INFO[i].IMAGE_USAGE or
flag,
ctx.rt_info^.RT_INFO[i].FImageInfo.params.samples);
end;
if ctx.rt_info^.DB_ENABLE then
begin
//set clear flag on cleared htile
if (ctx.rt_info^.DB_INFO.HTILE_INFO.TILE_SURFACE_ENABLE<>0) then
begin
meta_instance:=ctx.node^.scope.find_buffer_resource_instance(R_HTILE,
ctx.rt_info^.DB_INFO.HTILE_INFO.KEY.Addr,
ctx.rt_info^.DB_INFO.HTILE_INFO.SIZE);
Assert(meta_instance<>nil);
if meta_instance^.resource^.rclear then
begin
//-TM_READ +TM_CLEAR
ctx.rt_info^.DB_INFO.DEPTH_USAGE:=ctx.rt_info^.DB_INFO.DEPTH_USAGE and (not TM_READ) or TM_CLEAR;
meta_instance^.resource^.rclear:=False;
end;
end;
//TODO: fixup cformat
RP_KEY.AddDepthAt(ctx.rt_info^.RT_COUNT, //add to last attachment id
ctx.rt_info^.DB_INFO.FImageInfo.cformat,
ctx.rt_info^.DB_INFO.DEPTH_USAGE,
ctx.rt_info^.DB_INFO.STENCIL_USAGE,
ctx.rt_info^.DB_INFO.FImageInfo.params.samples);
RP_KEY.SetZorderStage(ctx.rt_info^.DB_INFO.zorder_stage);
end;
//DumpShaderGroup(ctx.rt_info^.ShaderGroup);
RP:=FetchRenderPass(ctx.Cmd,@RP_KEY);
if (RP=nil) then
begin
DumpShaderGroup(ctx.rt_info^.ShaderGroup);
Assert(false,'FetchRenderPass');
end;
GP_KEY.Clear;
GP_KEY.FRenderPass :=RP;
GP_KEY.FShaderGroup:=ctx.rt_info^.ShaderGroup;
GP_KEY.SetBlendInfo(ctx.rt_info^.BLEND_INFO.logicOp,@ctx.rt_info^.BLEND_INFO.blendConstants);
GP_KEY.SetPrimType (ctx.rt_info^.PRIM_TYPE,GP_KEY.FShaderGroup.FKey.FPrimtype);
GP_KEY.SetPrimReset(ctx.rt_info^.PRIM_RESET);
if (ctx.rt_info^.VP_COUNT<>0) then
For i:=0 to ctx.rt_info^.VP_COUNT-1 do
begin
GP_KEY.AddVPort(ctx.rt_info^.VPORT[i],ctx.rt_info^.SCISSOR[i]);
end;
if (ctx.rt_info^.RT_COUNT<>0) then
For i:=0 to ctx.rt_info^.RT_COUNT-1 do
begin
GP_KEY.AddBlend(ctx.rt_info^.RT_INFO[i].blend);
end;
FAttrBuilder:=Default(TvAttrBuilder);
ctx.rt_info^.ShaderGroup.ExportAttrBuilder(FAttrBuilder,@ctx.rt_info^.USERDATA);
if not limits.VK_EXT_vertex_input_dynamic_state then
begin
GP_KEY.SetVertexInput(FAttrBuilder);
end;
GP_KEY.rasterizer :=ctx.rt_info^.RASTERIZATION.State;
GP_KEY.ClipSpace :=ctx.rt_info^.RASTERIZATION.ClipSpace;
GP_KEY.DepthClip :=ctx.rt_info^.RASTERIZATION.DepthClip;
GP_KEY.multisampling:=ctx.rt_info^.MULTISAMPLE;
GP_KEY.SetProvoking(TVkProvokingVertexModeEXT(ctx.rt_info^.PROVOKING));
if ctx.rt_info^.DB_ENABLE then
begin
GP_KEY.DepthStencil:=ctx.rt_info^.DB_INFO.ds_state;
end;
GP:=FetchGraphicsPipeline(ctx.Cmd,@GP_KEY);
if limits.VK_KHR_imageless_framebuffer then
begin
FB_KEY:=Default(TvFramebufferImagelessKey);
FB_KEY.SetRenderPass(RP);
FB_KEY.SetSize(ctx.rt_info^.SCREEN_SIZE);
if (ctx.rt_info^.RT_COUNT<>0) then
For i:=0 to ctx.rt_info^.RT_COUNT-1 do
begin
//TODO: fixup cformat
FB_KEY.AddImageAt(ctx.rt_info^.RT_INFO[i].FImageInfo);
end;
if ctx.rt_info^.DB_ENABLE then
begin
//TODO: fixup cformat
FB_KEY.AddImageAt(ctx.rt_info^.DB_INFO.FImageInfo);
end;
end else
begin
FB_KEY2:=Default(TvFramebufferBindedKey);
FB_KEY2.SetRenderPass(RP);
FB_KEY2.SetSize(ctx.rt_info^.SCREEN_SIZE);
end;
ctx.Render:=Default(TvRenderPassBeginInfo);
ctx.Render.SetRenderPass(RP);
ctx.Render.SetRenderArea(ctx.rt_info^.SCREEN_RECT);
if limits.VK_KHR_imageless_framebuffer then
begin
FB:=FetchFramebufferImageless(ctx.Cmd,@FB_KEY);
ctx.Render.SetFramebuffer(FB);
end;
if (ctx.rt_info^.RT_COUNT<>0) then
For i:=0 to ctx.rt_info^.RT_COUNT-1 do
begin
//ri:=TvImage2(color_instance[i]^.resource^.rimage);
ri:=nil;
if (ri<>nil) then
begin
ctx.Cmd.RefTo(ri);
end;
if (ri<>nil) then
if (ri.is_invalid) then
begin
color_instance[i]^.resource^.rimage:=nil;
ri:=nil;
end;
if (ri=nil) then
begin
img_usage:=[];
if (color_instance[i]<>nil) then
begin
{[iu_attachment]}
img_usage:=color_instance[i]^.curr.img_usage;
end;
ri:=FetchImageForce(ctx,
ctx.rt_info^.RT_INFO[i].FImageInfo,
img_usage);
if (color_instance[i]<>nil) then
begin
color_instance[i]^.resource^.rimage:=ri;
end;
end;
pm4_load_from(ctx.Cmd,ri,ctx.rt_info^.RT_INFO[i].IMAGE_USAGE);
iv:=ri.FetchView(ctx.Cmd,ctx.rt_info^.RT_INFO[i].FImageView,iu_attachment);
if (color_instance[i]<>nil) then
begin
ri.PushBarrier(ctx.Cmd,
GetAccessMaskImg(color_instance[i]^.curr),
GetImageLayout(color_instance[i]^.curr),
ord(VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) or
ord(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT) );
end;
//
ctx.Render.AddClearColor(ctx.rt_info^.RT_INFO[i].CLEAR_COLOR);
Writeln('BindFrame:->[',i,']'#13#10,
' 0x',HexStr(ri.FHandle,16),':',GetVkFormatStr(ri.key.cformat),':',ri.FName,'->'#13#10,
' 0x',HexStr(iv.FHandle,16),':',GetVkFormatStr(iv.key.cformat),':',iv.FName);
//
if limits.VK_KHR_imageless_framebuffer then
begin
ctx.Render.AddImageView(iv);
end else
begin
FB_KEY2.AddImageView(iv);
end;
//
end;
if ctx.rt_info^.DB_ENABLE then
begin
d_instance:=ctx.node^.scope.find_image_resource_instance(GetDepthOnly (ctx.rt_info^.DB_INFO.FImageInfo));
s_instance:=ctx.node^.scope.find_image_resource_instance(GetStencilOnly(ctx.rt_info^.DB_INFO.FImageInfo));
ri:=nil;
rd:=nil;
rs:=nil;
{
if (d_instance<>nil) then
begin
rd:=TvCustomImage2(d_instance^.resource^.rimage);
end;
}
if (rd<>nil) then
begin
ctx.Cmd.RefTo(rd);
end;
if (rd<>nil) then
if (rd.is_invalid) then
begin
d_instance^.resource^.rimage:=nil;
rd:=nil;
end;
{
if (s_instance<>nil) then
begin
rs:=TvCustomImage2(s_instance^.resource^.rimage);
end;
}
if (rs<>nil) then
begin
ctx.Cmd.RefTo(rs);
end;
if (rs<>nil) then
if (rs.is_invalid) then
begin
s_instance^.resource^.rimage:=nil;
rs:=nil;
end;
if (rd<>nil) then
begin
ri:=TvImage2(rd.Parent);
end else
if (rs<>nil) then
begin
ri:=TvImage2(rs.Parent);
end;
if (ri<>nil) then
if (ri.DepthOnly <>rd) or
(ri.StencilOnly<>rs) then
begin
ri:=nil;
rd:=nil;
rs:=nil;
end;
//
if (ri=nil) then
begin
ri:=FetchImageForce(ctx,
ctx.rt_info^.DB_INFO.FImageInfo,
[iu_depthstenc]);
Assert(ri<>nil);
rd:=ri.DepthOnly;
rs:=ri.StencilOnly;
if (d_instance<>nil) then
begin
d_instance^.resource^.rimage:=rd;
ctx.RefToParent(ri);
end;
if (s_instance<>nil) then
begin
s_instance^.resource^.rimage:=rs;
ctx.RefToParent(ri);
end;
end;
//
pm4_load_from(ctx.Cmd,rd,ctx.rt_info^.DB_INFO.DEPTH_USAGE);
pm4_load_from(ctx.Cmd,rs,ctx.rt_info^.DB_INFO.STENCIL_USAGE);
iv:=ri.FetchView(ctx.Cmd,ctx.rt_info^.DB_INFO.FImageView,iu_depthstenc);
ri.PushBarrier(ctx.Cmd,
GetDepthStencilAccessAttachMask(ctx.rt_info^.DB_INFO.DEPTH_USAGE,ctx.rt_info^.DB_INFO.STENCIL_USAGE),
GetDepthStencilSendLayout(ctx.rt_info^.DB_INFO.DEPTH_USAGE,ctx.rt_info^.DB_INFO.STENCIL_USAGE),
ord(VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) or
ctx.rt_info^.DB_INFO.zorder_stage
);
//
ctx.Render.AddClearColor(ctx.rt_info^.DB_INFO.CLEAR_VALUE);
Writeln('BindDepth:->'#13#10,
' 0x',HexStr(ri.FHandle,16),':',GetVkFormatStr(ri.key.cformat),':',ri.FName,'->'#13#10,
' 0x',HexStr(iv.FHandle,16),':',GetVkFormatStr(iv.key.cformat),':',iv.FName);
//
if limits.VK_KHR_imageless_framebuffer then
begin
ctx.Render.AddImageView(iv);
end else
begin
FB_KEY2.AddImageView(iv);
end;
//
end;
if not limits.VK_KHR_imageless_framebuffer then
begin
FB:=FetchFramebufferBinded(ctx.Cmd,@FB_KEY2);
ctx.Render.SetFramebuffer(FB);
end;
////////
FUniformBuilder:=Default(TvUniformBuilder);
ctx.rt_info^.ShaderGroup.ExportUnifBuilder(FUniformBuilder,@ctx.rt_info^.USERDATA);
Prepare_Uniforms(ctx,BP_GRAPHICS,FUniformBuilder);
////////
DumpShaderGroup(ctx.rt_info^.ShaderGroup);
if not ctx.Cmd.BeginRenderPass(@ctx.Render,GP) then
begin
Writeln(stderr,'BeginRenderPass(ctx.Render)');
DumpShaderGroup(ctx.rt_info^.ShaderGroup);
Assert (false ,'BeginRenderPass(ctx.Render)');
end;
ctx.Cmd.SetVertexInput (FAttrBuilder);
ctx.Cmd.BindVertexBuffers(FAttrBuilder);
Bind_Uniforms(ctx,
BP_GRAPHICS,
FUniformBuilder);
Bind_Pushs(ctx,ctx.rt_info^.ShaderGroup,@ctx.rt_info^.USERDATA);
end;
procedure pm4_Writeback_After(var ctx:t_me_render_context);
var
//i:Integer;
ri:TvImage2;
//rd:TvCustomImage2;
//rs:TvCustomImage2;
resource_instance:p_pm4_resource_instance;
//d_instance:p_pm4_resource_instance;
//s_instance:p_pm4_resource_instance;
begin
//write back
resource_instance:=ctx.node^.scope.Min;
while (resource_instance<>nil) do
begin
if (resource_instance^.resource^.rtype=R_IMG) then
begin
ri:=TvImage2(resource_instance^.resource^.rimage);
if (ri<>nil) then
if not ri.IsDepthAndStencil then
begin
//is write on current stage
if ((resource_instance^.curr.mem_usage and TM_WRITE)<>0) then
begin
ri.mark_init;
//is used in fuzzy match resources
if (resource_instance^.next_overlap.mem_usage<>0) then
begin
pm4_write_back(ctx.Cmd,ri);
//
resource_instance^.resource^.rwriteback:=False;
end else
begin
//
resource_instance^.resource^.rwriteback:=True;
end;
end;
end;
end;
resource_instance:=ctx.node^.scope.Next(resource_instance);
end;
{
if ctx.rt_info^.DB_ENABLE then
begin
d_instance:=ctx.node^.scope.find_image_resource_instance(GetDepthOnly (ctx.rt_info^.DB_INFO.FImageInfo));
s_instance:=ctx.node^.scope.find_image_resource_instance(GetStencilOnly(ctx.rt_info^.DB_INFO.FImageInfo));
ri:=nil;
rd:=nil;
rs:=nil;
if (d_instance<>nil) then
begin
rd:=TvCustomImage2(d_instance^.resource^.rimage);
end;
if (s_instance<>nil) then
begin
rs:=TvCustomImage2(s_instance^.resource^.rimage);
end;
if (rd<>nil) then
begin
rd.mark_init;
Assert(d_instance<>nil);
if (d_instance^.next_overlap.mem_usage<>0) then
begin
pm4_write_back(ctx.Cmd,rd);
//
d_instance^.resource^.rwriteback:=False;
end else
begin
//
d_instance^.resource^.rwriteback:=True;
end;
end;
//
if (rs<>nil) then
begin
rs.mark_init;
Assert(s_instance<>nil);
if (s_instance^.next_overlap.mem_usage<>0) then
begin
pm4_write_back(ctx.Cmd,rs);
//
s_instance^.resource^.rwriteback:=False;
end else
begin
//
s_instance^.resource^.rwriteback:=True;
end;
end;
//
end;
}
//write back
end;
procedure pm4_Writeback_Finish(var ctx:t_me_render_context);
var
ri:TvImage2;
ht:TvMetaHtile;
hc:TvMetaCmask;
resource:p_pm4_resource;
begin
if (ctx.stream=nil) then Exit;
//write back
resource:=ctx.stream^.resource_set.Min;
while (resource<>nil) do
begin
if resource^.rwriteback then
begin
if (resource^.rtype=R_IMG) then
begin
ri:=TvImage2(resource^.rimage);
Assert(ri<>nil);
//
pm4_write_back(ctx.Cmd,ri);
//
resource^.rwriteback:=False;
end;
end;
if (resource^.rtype=R_HTILE) then
begin
ht:=FetchHtile(ctx.Cmd,resource^.rkey,resource^.rsize);
ht.rclear:=resource^.rclear;
end else
if (resource^.rtype=R_CMASK) then
begin
hc:=FetchCmask(ctx.Cmd,resource^.rkey,resource^.rsize);
hc.rclear:=resource^.rclear;
end;
resource:=ctx.stream^.resource_set.Next(resource);
end;
//write back
end;
procedure pm4_Hint(var ctx:t_me_render_context;node:p_pm4_node_hint);
begin
ctx.InsertLabel(PChar(@node^.data));
end;
procedure pm4_Draw(var ctx:t_me_render_context;node:p_pm4_node_draw);
begin
ctx.rt_info:=@node^.rt_info;
if (ctx.rt_info^.RT_COUNT=0) and (not ctx.rt_info^.DB_ENABLE) then
begin
ctx.InsertLabel('decompress Dcc/Depth/Fmask');
//zero attachment (decompress Dcc/Depth/Fmask) skip
Exit;
end;
//
pm4_InitStream(ctx);
//
//if not ctx.WaitConfirmOrSwitch then Exit;
StartFrameCapture;
ctx.BeginCmdBuffer;
//
if (node^.ntype<>ntClearDepth) then
begin
pm4_DrawPrepare(ctx);
end;
ctx.Cmd.FinstanceCount:=node^.numInstances;
ctx.Cmd.FINDEX_TYPE :=TVkIndexType(node^.INDEX_TYPE);
case node^.ntype of
ntDrawIndex2:
begin
Writeln(node^.id,':DrawIndexOffset2(',node^.indexOffset,',',node^.vertexOffset,',',node^.indexCount,')');
ctx.Cmd.DrawIndexOffset2(Pointer(node^.indexBase),node^.indexOffset,node^.vertexOffset,node^.indexCount);
end;
ntDrawIndexOffset2:
begin
Writeln(node^.id,':DrawIndexOffset2(',node^.indexOffset,',',node^.vertexOffset,',',node^.indexCount,')');
ctx.Cmd.DrawIndexOffset2(Pointer(node^.indexBase),node^.indexOffset,node^.vertexOffset,node^.indexCount);
end;
ntDrawIndexAuto:
begin
Writeln(node^.id,':DrawIndexAuto(',node^.vertexOffset,',',node^.indexCount,')');
ctx.Cmd.DrawIndexAuto(node^.vertexOffset,node^.indexCount);
end;
ntClearDepth:
begin
pm4_ClearDepth(node^.rt_info,ctx);
end;
else;
Assert(false,'pm4_Draw');
end;
/////////
pm4_Writeback_After(ctx);
ctx.FlushParent;
end;
procedure pm4_Resolve(var ctx:t_me_render_context;node:p_pm4_node_Resolve);
var
ri_src,ri_dst:TvImage2;
range:TVkImageResolve;
begin
//
pm4_InitStream(ctx);
//
//if not ctx.WaitConfirmOrSwitch then Exit;
StartFrameCapture;
ctx.BeginCmdBuffer;
ctx.Cmd.EndRenderPass;
ri_src:=FetchImageForce(ctx,
node^.RT[0].FImageInfo,
[iu_transfer]
);
Assert(ri_src<>nil);
ri_dst:=FetchImageForce(ctx,
node^.RT[1].FImageInfo,
[iu_transfer]
);
Assert(ri_dst<>nil);
ri_src.PushBarrier(ctx.Cmd,
ord(VK_ACCESS_TRANSFER_READ_BIT),
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
ri_dst.PushBarrier(ctx.Cmd,
ord(VK_ACCESS_TRANSFER_WRITE_BIT),
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
range:=Default(TVkImageResolve);
range.srcSubresource:=ri_src.GetSubresLayer;
range.dstSubresource:=ri_dst.GetSubresLayer;
range.srcOffset.Create(node^.SCREEN.offset.x,node^.SCREEN.offset.y,0);
range.dstOffset:=range.srcOffset;
range.extent.Create(node^.SCREEN.extent.width,node^.SCREEN.extent.height,1);
ctx.Cmd.ResolveImage(ri_src.FHandle,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
ri_dst.FHandle,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1,@range);
ctx.FlushParent;
end;
procedure pm4_FastClear(var ctx:t_me_render_context;node:p_pm4_node_FastClear);
var
ri:TvImage2;
range:TVkImageSubresourceRange;
resource_instance:p_pm4_resource_instance;
begin
{
//
pm4_InitStream(ctx);
//
StartFrameCapture;
ctx.BeginCmdBuffer;
ctx.Cmd.EndRenderPass;
resource_instance:=ctx.node^.scope.find_image_resource_instance(node^.RT.FImageInfo);
Assert(resource_instance<>nil);
ri:=FetchImage(ctx.Cmd,
node^.RT.FImageInfo,
resource_instance^.curr.img_usage
);
ri.PushBarrier(ctx.Cmd,
ord(VK_ACCESS_TRANSFER_WRITE_BIT),
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
range:=ri.GetSubresRange;
ctx.Cmd.ClearColorImage(ri.FHandle,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
@node^.RT.CLEAR_COLOR,
1,@range);
//writeback
ri.mark_init;
if (resource_instance^.next_overlap.mem_usage<>0) then
begin
pm4_write_back(ctx.Cmd,ri);
//
resource_instance^.resource^.rwriteback:=False;
end else
begin
//
resource_instance^.resource^.rwriteback:=True;
end;
//writeback
}
end;
procedure Prepare_buf_clear(var ctx:t_me_render_context;
var UniformBuilder:TvUniformBuilder);
var
i:Integer;
resource_instance:p_pm4_resource_instance;
buffer,meta:p_pm4_resource;
hb:TvMetaBuffer;
begin
buffer:=nil;
//buffers
if (Length(UniformBuilder.FBuffers)<>0) then
begin
For i:=0 to High(UniformBuilder.FBuffers) do
With UniformBuilder.FBuffers[i] do
begin
//get buffer with write usege
if ((memuse and TM_WRITE)<>0) then
begin
resource_instance:=ctx.node^.scope.find_buffer_resource_instance(R_BUF,addr,size);
if (resource_instance<>nil) then
begin
buffer:=resource_instance^.resource;
Break;
end;
end;
end;
end;
//buffers
//TODO: get clear value!
Assert(buffer<>nil);
//set flag by buffer in current stream
buffer^.rclear:=True;
//set flag by buffer to next stream
hb:=FetchBuffer(ctx.Cmd,buffer^.rkey.Addr,buffer^.rsize);
Assert(hb<>nil);
hb.rclear:=True;
//set flag by htile in current stream
meta:=ctx.stream^.find_buffer_resource(R_HTILE,buffer^.rkey.Addr,buffer^.rsize);
//
if (meta<>nil) then
begin
meta^.rclear:=True;
end;
//set flag by cmask in current stream
meta:=ctx.stream^.find_buffer_resource(R_CMASK,buffer^.rkey.Addr,buffer^.rsize);
//
if (meta<>nil) then
begin
meta^.rclear:=True;
end;
end;
function pm4_DispatchPrepare(var ctx:t_me_render_context;node:p_pm4_node_Dispatch):Boolean;
var
dst:PGPU_USERDATA;
CP_KEY:TvComputePipelineKey;
CP:TvComputePipeline2;
FUniformBuilder:TvUniformBuilder;
//
GPU_REGS:TGPU_REGS;
pa:TPushConstAllocator;
pp:PPushConstAllocator;
begin
Result:=False;
////////
//hack
dst:=Pointer(@node^.COMPUTE_GROUP.COMPUTE_USER_DATA)-Ptruint(@TGPU_USERDATA(nil^).A[vShaderStageCs]);
//recheck shaders
GPU_REGS.SC_REG:=@node^.COMPUTE_GROUP;
pa.Init;
pp:=@pa;
node^.ShaderGroup:=FetchShaderGroupCS(GPU_REGS,pp);
Assert(node^.ShaderGroup<>nil);
//recheck shaders
CP_KEY.FShaderGroup:=node^.ShaderGroup;
CP:=FetchComputePipeline(ctx.Cmd,@CP_KEY);
FUniformBuilder:=Default(TvUniformBuilder);
CP_KEY.FShaderGroup.ExportUnifBuilder(FUniformBuilder,dst);
//htile/cmask/rt heuristic
if (CP_KEY.FShaderGroup.FKey.FShaders[vShaderStageCs].IsCSClearShader) then
begin
Prepare_buf_clear(ctx,FUniformBuilder);
//
ctx.InsertLabel('clear htile/cmask/rt');
end;
Prepare_Uniforms(ctx,BP_COMPUTE,FUniformBuilder);
////////
DumpShaderGroup(CP_KEY.FShaderGroup);
if not ctx.Cmd.BindCompute(CP) then
begin
Writeln(stderr,'BindCompute(CP)');
DumpShaderGroup(CP_KEY.FShaderGroup);
Assert(false ,'BindCompute(CP)');
end;
Bind_Uniforms(ctx,
BP_COMPUTE,
FUniformBuilder);
Bind_Pushs(ctx,CP_KEY.FShaderGroup,dst);
Result:=True;
end;
procedure pm4_DispatchDirect(var ctx:t_me_render_context;node:p_pm4_node_DispatchDirect);
begin
//
pm4_InitStream(ctx);
//
//if not ctx.WaitConfirmOrSwitch then Exit;
StartFrameCapture;
ctx.BeginCmdBuffer;
//
ctx.Cmd.EndRenderPass;
if not pm4_DispatchPrepare(ctx,node) then Exit;
Writeln('DispatchDirect(',node^.DIM_X,',',node^.DIM_Y,',',node^.DIM_Z,')');
ctx.Cmd.DispatchDirect(node^.DIM_X,node^.DIM_Y,node^.DIM_Z);
/////////
pm4_Writeback_After(ctx);
ctx.FlushParent;
end;
procedure pm4_DispatchIndirect(var ctx:t_me_render_context;node:p_pm4_node_DispatchIndirect);
begin
//
pm4_InitStream(ctx);
//
//if not ctx.WaitConfirmOrSwitch then Exit;
StartFrameCapture;
ctx.BeginCmdBuffer;
//
ctx.Cmd.EndRenderPass;
if not pm4_DispatchPrepare(ctx,node) then Exit;
Writeln('DispatchIndirect(0x',HexStr(node^.BASE,11),',0x',HexStr(node^.Offset,8),')');
ctx.Cmd.DispatchIndirect(Pointer(node^.BASE),node^.Offset);
/////////
pm4_Writeback_After(ctx);
ctx.FlushParent;
end;
function mul_div_u64(m,d,v:QWORD):QWORD; sysv_abi_default; assembler; nostackframe;
asm
movq v,%rax
mulq m
divq d
end;
const
GLOBAL_CLOCK_FREQUENCY =100*1000*1000; //100MHz
GPU_CORE_CLOCK_FREQUENCY=800*1000*1000; //800MHz
//neo mode & ext_gpu_timer -> 911*000*000
procedure pm4_EventWriteEop(var ctx:t_me_render_context;node:p_pm4_node_EventWriteEop);
var
curr,diff:QWORD;
addr_dmem:Pointer;
data_size:Byte;
begin
if not ctx.stream^.hint_repeat then
begin
ctx.InsertLabel(PChar('WriteEop:0x'+HexStr(QWORD(node^.addr),10)));
if p_print_gpu_ops then
begin
Writeln('WriteEop:0x'+HexStr(QWORD(node^.addr),10));
end;
ctx.stream^.hint_repeat:=True;
end;
if not ctx.WaitConfirmOrSwitch then Exit;
ctx.stream^.hint_repeat:=False;
curr:=md_rdtsc_unit;
diff:=curr-ctx.rel_time;
if (node^.addr<>nil) then
begin
addr_dmem:=nil;
if (node^.dataSel<>EVENTWRITEEOP_DATA_SEL_DISCARD) then
begin
addr_dmem:=get_dmem_ptr(node^.addr);
end;
if (addr_dmem<>nil) then
Case node^.dataSel of
//
EVENTWRITEEOP_DATA_SEL_DISCARD:
data_size:=0;
//32bit data
EVENTWRITEEOP_DATA_SEL_SEND_DATA32:
begin
PDWORD(addr_dmem)^:=node^.data;
data_size:=4;
end;
//64bit data
EVENTWRITEEOP_DATA_SEL_SEND_DATA64:
begin
PQWORD(addr_dmem)^:=node^.data;
data_size:=8;
end;
//system 100Mhz global clock. (relative time)
EVENTWRITEEOP_DATA_SEL_SEND_GPU_CLOCK:
begin
PQWORD(addr_dmem)^:=mul_div_u64(GLOBAL_CLOCK_FREQUENCY,UNIT_PER_SEC,diff);
data_size:=8;
end;
//GPU 800Mhz clock. (relative time)
EVENTWRITEEOP_DATA_SEL_SEND_CP_PERFCOUNTER:
begin
PQWORD(addr_dmem)^:=mul_div_u64(GPU_CORE_CLOCK_FREQUENCY,UNIT_PER_SEC,diff);
data_size:=8;
end;
else
Assert(false,'pm4_EventWriteEop');
end;
vm_map_track_trigger(p_proc.p_vmspace,QWORD(node^.addr),QWORD(node^.addr)+data_size,nil,M_DMEM_WRITE);
end;
if (node^.intSel=EVENTWRITEEOP_INT_SEL_SEND_INT) or
(node^.intSel=EVENTWRITEEOP_INT_SEL_SEND_INT_ON_CONFIRM) then
begin
ctx.me^.knote_eventid($40,0,curr*NSEC_PER_UNIT,0); //(absolute time) (freq???)
end;
end;
procedure pm4_SubmitFlipEop(var ctx:t_me_render_context;node:p_pm4_node_SubmitFlipEop);
var
curr:QWORD;
begin
if not ctx.stream^.hint_repeat then
begin
ctx.InsertLabel(PChar('SubmitFlipEop:0x'+HexStr(node^.eop_value,16)));
ctx.stream^.hint_repeat:=True;
end;
if not ctx.WaitConfirmOrSwitch then Exit;
ctx.stream^.hint_repeat:=False;
if (ctx.me^.on_submit_flip_eop<>nil) then
begin
ctx.me^.on_submit_flip_eop(node^.eop_value);
end;
curr:=md_rdtsc_unit;
if (node^.intSel=EVENTWRITEEOP_INT_SEL_SEND_INT) or
(node^.intSel=EVENTWRITEEOP_INT_SEL_SEND_INT_ON_CONFIRM) then
begin
ctx.me^.knote_eventid($40,0,curr*NSEC_PER_UNIT,0); //(absolute time) (freq???)
end;
end;
function get_compute_pipe_id(buft:t_pm4_stream_type):Byte; inline;
begin
Result:=ord(buft) - ord(stCompute0);
end;
procedure pm4_ReleaseMem(var ctx:t_me_render_context;node:p_pm4_node_ReleaseMem);
var
curr,diff:QWORD;
addr_dmem:Pointer;
data_size:Byte;
begin
if not ctx.stream^.hint_repeat then
begin
ctx.InsertLabel(PChar('ReleaseMem:0x'+HexStr(QWORD(node^.addr),10)));
ctx.stream^.hint_repeat:=True;
end;
if not ctx.WaitConfirmOrSwitch then Exit;
ctx.stream^.hint_repeat:=False;
curr:=md_rdtsc_unit;
diff:=curr-ctx.rel_time;
if (node^.addr<>nil) then
begin
addr_dmem:=nil;
if (node^.srcSel<>RELEASEMEM_DATA_SEL_DISCARD) then
begin
addr_dmem:=get_dmem_ptr(node^.addr);
end;
Case node^.dstSel of
RELEASEMEM_DST_SEL_MEMORY:;
RELEASEMEM_DST_SEL_L2 :Assert(false,'RELEASEMEM_DST_SEL_L2');
else
Assert(false,'pm4_ReleaseMem:dstSel');
end;
if (addr_dmem<>nil) then
Case node^.srcSel of
//
RELEASEMEM_DATA_SEL_DISCARD:
data_size:=0;
//32bit data
RELEASEMEM_DATA_SEL_SEND_DATA32:
begin
PDWORD(addr_dmem)^:=node^.data;
data_size:=4;
end;
//64bit data
RELEASEMEM_DATA_SEL_SEND_DATA64:
begin
PQWORD(addr_dmem)^:=node^.data;
data_size:=8;
end;
//system 100Mhz global clock. (relative time)
RELEASEMEM_DATA_SEL_SEND_GPU_CLOCK:
begin
PQWORD(addr_dmem)^:=mul_div_u64(GLOBAL_CLOCK_FREQUENCY,UNIT_PER_SEC,diff);
data_size:=8;
end;
//GPU 800Mhz clock. (relative time)
RELEASEMEM_DATA_SEL_SEND_CP_PERFCOUNTER:
begin
PQWORD(addr_dmem)^:=mul_div_u64(GPU_CORE_CLOCK_FREQUENCY,UNIT_PER_SEC,diff);
data_size:=8;
end;
else
Assert(false,'pm4_ReleaseMem:srcSel');
end;
vm_map_track_trigger(p_proc.p_vmspace,QWORD(node^.addr),QWORD(node^.addr)+data_size,nil,M_DMEM_WRITE);
end;
if (node^.intSel=RELEASEMEM_INT_SEL_SEND_INT) or
(node^.intSel=RELEASEMEM_INT_SEL_SEND_INT_ON_CONFIRM) then
begin
ctx.me^.knote_eventid(get_compute_pipe_id(ctx.stream^.buft),0,curr*NSEC_PER_UNIT,0); //(absolute time) (freq???)
end;
end;
procedure pm4_EventWrite(var ctx:t_me_render_context;node:p_pm4_node_EventWrite);
begin
Case node^.eventType of
CS_PARTIAL_FLUSH, //CS
CACHE_FLUSH_AND_INV_EVENT, //CB,DB
DB_CACHE_FLUSH_AND_INV, //DB
FLUSH_AND_INV_DB_META, //HTILE
FLUSH_AND_INV_CB_META, //CMASK
FLUSH_AND_INV_CB_PIXEL_DATA: //CB
begin
if (ctx.Cmd<>nil) and ctx.Cmd.IsAllocated then
begin
//GPU
ctx.Cmd.WriteEvent(node^.eventType);
end;
end;
//FLUSH_AND_INV_CB_DATA_TS :Writeln(' eventType=FLUSH_AND_INV_CB_DATA_TS');
THREAD_TRACE_MARKER:
begin
ctx.InsertLabel('THREAD_TRACE_MARKER');
end;
PIPELINESTAT_STOP:
begin
ctx.InsertLabel('PIPELINESTAT_STOP');
end;
PERFCOUNTER_START:
begin
ctx.InsertLabel('PERFCOUNTER_START');
end;
PERFCOUNTER_STOP:
begin
ctx.InsertLabel('PERFCOUNTER_STOP');
end;
PERFCOUNTER_SAMPLE:
begin
ctx.InsertLabel('PERFCOUNTER_SAMPLE');
end;
PIXEL_PIPE_STAT_RESET: //[OcclusionQuery] Reset this query
begin
Writeln(stderr,'TODO:PIXEL_PIPE_STAT_RESET');
end;
else
begin
Writeln(stderr,'EventWrite eventType=0x',HexStr(node^.eventType,2));
Assert (false ,'EventWrite eventType=0x'+HexStr(node^.eventType,2));
end;
end;
end;
var
fake_zpass_counter:QWORD=0;
procedure pm4_PipeStatDump(var ctx:t_me_render_context;node:p_pm4_node_PipeStatDump);
const
c_db_counts:array[0..1] of Byte=(8,16);
c_db_stride:array[0..3] of Byte=(4,8,16,32);
c_ready_mask_64=QWORD(1) shl 63;
c_ready_mask_32=QWORD(1) shl 31;
var
i,count,stride:Byte;
instance_mask :Word;
addr_dmem:Pointer;
begin
if not ctx.WaitConfirmOrSwitch then Exit;
count :=c_db_counts[p_neomode and 1];
stride:=c_db_stride[node^.Control.stride];
instance_mask:=node^.Control.instance_enable;
addr_dmem:=get_dmem_ptr(Pointer(node^.address));
fake_zpass_counter:=fake_zpass_counter+1;
if (stride=4) then
begin
For i:=0 to count-1 do
if (instance_mask and (1 shl i))<>0 then
begin
PDWORD(addr_dmem)[i]:=c_ready_mask_32 or fake_zpass_counter;
end;
end else
begin
For i:=0 to count-1 do
begin
if (instance_mask and (1 shl i))<>0 then
begin
PQWORD(addr_dmem)^:=c_ready_mask_64 or fake_zpass_counter;
end;
addr_dmem:=addr_dmem+stride;
end;
end;
end;
procedure pm4_EventWriteEos(var ctx:t_me_render_context;node:p_pm4_node_EventWriteEos);
var
addr_dmem:Pointer;
begin
if (node^.addr<>nil) then
Case node^.command of
//32bit data
EVENT_WRITE_EOS_CMD_STORE_32BIT_DATA_TO_MEMORY:
begin
if (ctx.Cmd<>nil) and ctx.Cmd.IsAllocated then
begin
//GPU
ctx.Cmd.WriteEos(node^.eventType,node^.addr,node^.data,false);
end else
begin
//soft
addr_dmem:=get_dmem_ptr(node^.addr);
PDWORD(addr_dmem)^:=node^.data;
vm_map_track_trigger(p_proc.p_vmspace,QWORD(node^.addr),QWORD(node^.addr)+4,nil,M_DMEM_WRITE);
end;
end;
else
Assert(false,'pm4_EventWriteEos');
end;
end;
procedure pm4_WriteData(var ctx:t_me_render_context;node:p_pm4_node_WriteData);
var
src_dmem:PDWORD;
dst_dmem:PDWORD;
byteSize:QWORD;
begin
StartFrameCapture;
case node^.dstSel of
WRITE_DATA_DST_SEL_MEMORY_SYNC, //writeDataInline
WRITE_DATA_DST_SEL_TCL2, //writeDataInlineThroughL2
WRITE_DATA_DST_SEL_MEMORY_ASYNC:
if (node^.dst<>nil) then
begin
if (ctx.Cmd<>nil) and ctx.Cmd.IsAllocated then
begin
//GPU
byteSize:=node^.num_dw*SizeOf(DWORD);
if p_print_gpu_ops then
begin
Writeln('[1]WriteData:0x',HexStr(QWORD(node^.src),10),'->',HexStr(QWORD(node^.dst),10),':size=0x',HexStr(byteSize,5));
end;
ctx.Cmd.dmaData1(node^.src,node^.dst,byteSize,node^.wrConfirm);
end else
begin
//soft
if p_print_gpu_ops then
begin
Writeln('[2]WriteData:0x',HexStr(QWORD(node^.src),10),'->',HexStr(QWORD(node^.dst),10),':size=0x',HexStr(byteSize,5));
end;
src_dmem:=get_dmem_ptr(node^.src);
dst_dmem:=get_dmem_ptr(node^.dst);
byteSize:=node^.num_dw*SizeOf(DWORD);
Move(src_dmem^,dst_dmem^,byteSize);
vm_map_track_trigger(p_proc.p_vmspace,QWORD(node^.dst),QWORD(node^.dst)+byteSize,nil,M_DMEM_WRITE);
end;
end;
else
Assert(false,'WriteData: dstSel=0x'+HexStr(node^.dstSel,1));
end;
end;
const
DmaDataStr:array[0..15] of Pchar=(
{0} 'Memory',
{1} 'Gds',
{2} 'Data',
{3} 'MemoryUsingL2',
{4} 'Register',
{5} '0x5',
{6} '0x6',
{7} '0x7',
{8} '0x8',
{9} '0x9',
{A} '0xA',
{B} '0xB',
{C} 'RegisterNoIncrement',
{D} '0xD',
{E} '0xE',
{F} '0xF'
);
procedure pm4_DmaData(var ctx:t_me_render_context;node:p_pm4_node_DmaData);
var
adrSrc:QWORD;
adrDst:QWORD;
adrSrc_dmem:Pointer;
adrDst_dmem:Pointer;
byteCount:DWORD;
srcSel,dstSel:Byte;
begin
StartFrameCapture;
adrDst :=node^.dst;
adrSrc :=node^.src;
byteCount:=node^.numBytes;
srcSel :=node^.srcSel;
dstSel :=node^.dstSel;
case (srcSel or (dstSel shl 4)) of
(kDmaDataSrcMemory or (kDmaDataDstMemory shl 4)),
(kDmaDataSrcMemoryUsingL2 or (kDmaDataDstMemory shl 4)),
(kDmaDataSrcMemory or (kDmaDataDstMemoryUsingL2 shl 4)),
(kDmaDataSrcMemoryUsingL2 or (kDmaDataDstMemoryUsingL2 shl 4)):
begin
if (ctx.Cmd<>nil) and ctx.Cmd.IsAllocated then
begin
//GPU
ctx.Cmd.dmaData1(Pointer(adrSrc),Pointer(adrDst),byteCount,node^.cpSync<>0);
//GPU
end else
begin
//soft
adrDst_dmem:=get_dmem_ptr(Pointer(adrDst));
adrSrc_dmem:=get_dmem_ptr(Pointer(adrSrc));
Move(adrSrc_dmem^,adrDst_dmem^,byteCount);
vm_map_track_trigger(p_proc.p_vmspace,QWORD(adrDst),QWORD(adrDst)+byteCount,nil,M_DMEM_WRITE);
//soft
end;
end;
(kDmaDataSrcData or (kDmaDataDstMemory shl 4)),
(kDmaDataSrcData or (kDmaDataDstMemoryUsingL2 shl 4)):
begin
if (ctx.Cmd<>nil) and ctx.Cmd.IsAllocated then
begin
//GPU
ctx.Cmd.dmaData2(DWORD(adrSrc),Pointer(adrDst),byteCount,node^.cpSync<>0);
//GPU
end else
begin
//soft
adrDst_dmem:=get_dmem_ptr(Pointer(adrDst));
FillDWORD(adrDst_dmem^,(byteCount div 4),DWORD(adrSrc));
vm_map_track_trigger(p_proc.p_vmspace,QWORD(adrDst),QWORD(adrDst)+byteCount,nil,M_DMEM_WRITE);
//soft
end;
end;
else
Writeln('DmaData: srcSel='+DmaDataStr[srcSel and 15]+' dstSel='+DmaDataStr[dstSel and 15]);
Assert(false,'DmaData: srcSel='+DmaDataStr[srcSel and 15]+' dstSel='+DmaDataStr[dstSel and 15]);
end;
end;
function get_dce_label_id(addr_dmem:Pointer):Integer;
begin
Result:=-1;
if (QWORD(addr_dmem)>=QWORD(@dev_dce.dce_page^.labels) ) and
(QWORD(addr_dmem)< QWORD(@dev_dce.dce_page^.label_)+8) then
begin
Result:=(QWORD(addr_dmem)-QWORD(@dev_dce.dce_page^.labels)) div 8;
end;
end;
Function me_test_mem(node:p_pm4_node_WaitRegMem;{var }dmem:PDWORD):Boolean;
var
val,ref:DWORD;
begin
{
dmem:=nil;
if not get_dmem_ptr(node^.pollAddr,@dmem,nil) then
begin
Assert(false,'addr:0x'+HexStr(node^.pollAddr)+' not in dmem!');
end;
}
//Writeln('me_test_mem:labels[',get_dce_label_id(dmem),']=',dmem^,' refValue=',node^.refValue,' compareFunc=',node^.compareFunc);
val:=dmem^ and node^.mask;
ref:=node^.refValue;
Case node^.compareFunc of
WAIT_REG_MEM_FUNC_ALWAYS :Result:=True;
WAIT_REG_MEM_FUNC_LESS :Result:=(val<ref);
WAIT_REG_MEM_FUNC_LESS_EQUAL :Result:=(val<=ref);
WAIT_REG_MEM_FUNC_EQUAL :Result:=(val=ref);
WAIT_REG_MEM_FUNC_NOT_EQUAL :Result:=(val<>ref);
WAIT_REG_MEM_FUNC_GREATER_EQUAL:Result:=(val>=ref);
WAIT_REG_MEM_FUNC_GREATER :Result:=(val>ref);
else
Assert(false,'me_test_mem');
end;
end;
procedure t_me_wait_addr.add_reg(kq:Pointer);
begin
if (Fdmem_addr<>nil) then
begin
Fregs_addr:=Fdmem_addr;
gc_add_internal_ptr(kq,Fregs_addr,@Self);
end;
end;
procedure t_me_wait_addr.del_reg(kq:Pointer);
begin
if (Fregs_addr<>nil) then
begin
gc_del_internal_ptr(kq,Fregs_addr);
Fregs_addr:=nil;
end;
end;
procedure t_me_wait_addr.set_adr(kq,addr:Pointer);
begin
if (Fcode_addr=addr) then Exit;
del_reg(kq);
Fcode_addr:=addr;
Fdmem_addr:=get_dmem_ptr(addr);
end;
function SendWarnMsg(const s:RawByteString):Integer;
begin
Result:=p_host_ipc.SendSync(HashIpcStr('WARNING'),Length(s)+1,pchar(s));
end;
procedure pm4_WaitRegMem(var ctx:t_me_render_context;node:p_pm4_node_WaitRegMem);
label
_repeat,
_reset;
var
wait_addr:p_me_wait_addr;
begin
if not ctx.stream^.hint_repeat then
begin
ctx.InsertLabel(PChar('WaitRegMem:0x'+HexStr(QWORD(node^.pollAddr),10)));
ctx.stream^.hint_repeat:=True;
end;
if not ctx.WaitConfirmOrSwitch then Exit;
ctx.stream^.hint_repeat:=False;
wait_addr:=@ctx.me^.wait_ptr[ctx.stream^.buft];
wait_addr^.set_adr(ctx.me^.gc_kqueue,node^.pollAddr);
_repeat:
if me_test_mem(node,wait_addr^.Fdmem_addr) then
begin
ctx.stream^.hint_loop:=0;
end else
begin
wait_addr^.add_reg(ctx.me^.gc_kqueue);
//
Inc(ctx.stream^.hint_loop);
//
if wait_loop_detect then
if (ctx.stream^.hint_loop>10000) then
begin
//loop detection
if wait_loop_autoskip then
begin
Writeln(stderr,'WaitRegMem hang detected 0x',HexStr(QWORD(node^.pollAddr),10),' -> skip');
goto _reset;
end else
begin
Writeln(stderr,'WaitRegMem hang detected 0x',HexStr(QWORD(node^.pollAddr),10));
//
if SendWarnMsg('Hang in WaitRegMem instruction detected, skip instruction?')=0 then
begin
Writeln(stderr,' -> skip');
goto _reset;
end else
begin
Writeln(stderr,' -> repeat');
ctx.stream^.hint_loop:=0;
end;
//
end;
end; //hint_loop
//
ctx.switch_task;
//early check
if (ctx.me^.sheduler.start=@ctx.me^.stall[ctx.stream^.buft]) then
begin
goto _repeat;
end;
//
Exit; //dont reset wait addr
end;
_reset:
ctx.stream^.hint_loop:=0;
wait_addr^.set_adr(ctx.me^.gc_kqueue,nil);
end;
//
procedure pm4_LoadConstRam(var ctx:t_me_render_context;node:p_pm4_node_LoadConstRam);
var
addr_dmem:Pointer;
start:DWORD;
__end:DWORD;
size :DWORD;
begin
//if not ctx.WaitConfirmOrSwitch then Exit;
addr_dmem:=get_dmem_ptr(node^.addr);
start:=node^.offset;
__end:=start+(node^.num_dw*SizeOf(DWORD));
if (start>CONST_RAM_SIZE) then
begin
start:=CONST_RAM_SIZE;
end;
if (__end>CONST_RAM_SIZE) then
begin
__end:=CONST_RAM_SIZE;
end;
size:=(__end-start);
if p_print_gpu_ops then
begin
Writeln('LoadConstRam:0x',HexStr(QWORD(addr_dmem),10),'->[0x',HexStr(start,4),']:size=0x',HexStr(size,6));
end;
Move(addr_dmem^,ctx.me^.CONST_RAM[start],size);
end;
procedure pm4_DumpConstRam(var ctx:t_me_render_context;node:p_pm4_node_LoadConstRam);
var
addr_dmem:Pointer;
start:DWORD;
__end:DWORD;
size :DWORD;
begin
//if not ctx.WaitConfirmOrSwitch then Exit;
addr_dmem:=get_dmem_ptr(node^.addr);
start:=node^.offset;
__end:=start+(node^.num_dw*SizeOf(DWORD));
if (start>CONST_RAM_SIZE) then
begin
start:=CONST_RAM_SIZE;
end;
if (__end>CONST_RAM_SIZE) then
begin
__end:=CONST_RAM_SIZE;
end;
size:=(__end-start);
if p_print_gpu_ops then
begin
Writeln('DumpConstRam:[0x',HexStr(start,4),']->0x',HexStr(QWORD(addr_dmem),10),':size=0x',HexStr(size,6));
end;
Move(ctx.me^.CONST_RAM[start],addr_dmem^,size);
ctx.BeginCmdBuffer;
ctx.Cmd.AddPlannedTrigger(QWORD(node^.addr),QWORD(node^.addr)+size,nil);
end;
//
procedure pm4_IncrementCE(var ctx:t_me_render_context;node:p_pm4_node);
begin
Inc(ctx.me^.CE_COUNT);
end;
procedure pm4_IncrementDE(var ctx:t_me_render_context;node:p_pm4_node);
begin
Inc(ctx.me^.DE_COUNT);
end;
procedure pm4_WaitOnCECounter(var ctx:t_me_render_context;node:p_pm4_node);
begin
if (ctx.me^.CE_COUNT <= ctx.me^.DE_COUNT) then
begin
if p_print_gpu_ops then
begin
Writeln('WaitOnCECounter:(',ctx.me^.CE_COUNT,' <= ',ctx.me^.DE_COUNT,')');
end;
ctx.switch_task;
end else
begin
if p_print_gpu_ops then
begin
Writeln('WaitOnCECounter:(',ctx.me^.CE_COUNT,' > ',ctx.me^.DE_COUNT,')');
end;
end;
end;
procedure pm4_WaitOnDECounterDiff(var ctx:t_me_render_context;node:p_pm4_node_WaitOnDECounterDiff);
var
diff:DWORD;
begin
diff:=node^.diff;
//force unsigned compare
if (DWORD(ctx.me^.DE_COUNT - ctx.me^.CE_COUNT) >= diff) then
begin
if p_print_gpu_ops then
begin
Writeln('WaitOnDECounterDiff:(',ctx.me^.DE_COUNT,' - ',ctx.me^.CE_COUNT,') >= ',diff);
end;
ctx.switch_task;
end else
begin
if p_print_gpu_ops then
begin
Writeln('WaitOnDECounterDiff:(',ctx.me^.DE_COUNT,' - ',ctx.me^.CE_COUNT,') < ',diff);
end;
end;
end;
procedure pm4_PfpSyncMe(var ctx:t_me_render_context;node:p_pm4_node_PfpSyncMe);
begin
if not ctx.WaitConfirmOrSwitch then Exit;
RTLEventSetEvent(node^.event);
end;
//
procedure pm4_me_thread(me:p_pm4_me); SysV_ABI_CDecl;
var
ctx:t_me_render_context;
imdone_count:QWORD;
begin
sched_prio(curkthread,64);
ctx:=Default(t_me_render_context);
ctx.Init;
ctx.me:=me;
imdone_count:=0;
if use_renderdoc_capture then
begin
if not IsRenderDocPreLoaded then
begin
//disable capture if we are not working with Renderdoc GUI
use_renderdoc_capture:=False;
end else
begin
renderdoc.LoadRenderDoc;
renderdoc.UnloadCrashHandler;
end;
end;
me^.reset_sheduler;
repeat
//test submit done
if (me^.imdone_count<>imdone_count) then
begin
imdone_count:=me^.imdone_count;
EndFrameCapture;
end;
//read from queue
ctx.stream:=nil;
if me^.queue.Pop(ctx.stream) then
begin
me^.add_stream(ctx.stream);
//
ctx.stream:=nil;
end;
//get next task
ctx.stream:=me^.get_next;
if (ctx.stream<>nil) then
begin
//start relative timer
if (ctx.rel_time=0) then
begin
ctx.rel_time:=md_rdtsc_unit;
end;
//
//restore cursor
ctx.node:=ctx.stream^.curr;
if (ctx.node=nil) then
begin
ctx.node:=ctx.stream^.First;
ctx.stream^.curr:=ctx.node;
end;
while (ctx.node<>nil) do
begin
if not ctx.stream^.hint_cmds then
begin
if p_print_gpu_ops then
begin
Writeln('+',ctx.node^.id,':',ctx.node^.ntype);
end;
ctx.stream^.hint_cmds:=True;
end;
//wait last stall cmd ???
//if ctx.WaitConfirm then
begin
case ctx.node^.ntype of
ntHint :pm4_Hint (ctx,Pointer(ctx.node));
ntDrawIndex2 :pm4_Draw (ctx,Pointer(ctx.node));
ntDrawIndexOffset2 :pm4_Draw (ctx,Pointer(ctx.node));
ntDrawIndexAuto :pm4_Draw (ctx,Pointer(ctx.node));
ntClearDepth :pm4_Draw (ctx,Pointer(ctx.node));
ntResolve :pm4_Resolve (ctx,Pointer(ctx.node));
ntFastClear :pm4_FastClear (ctx,Pointer(ctx.node));
ntDispatchDirect :pm4_DispatchDirect (ctx,Pointer(ctx.node));
ntDispatchIndirect :pm4_DispatchIndirect (ctx,Pointer(ctx.node));
ntEventWrite :pm4_EventWrite (ctx,Pointer(ctx.node));
ntPipeStatDump :pm4_PipeStatDump (ctx,Pointer(ctx.node));
ntEventWriteEop :pm4_EventWriteEop (ctx,Pointer(ctx.node));
ntSubmitFlipEop :pm4_SubmitFlipEop (ctx,Pointer(ctx.node));
ntReleaseMem :pm4_ReleaseMem (ctx,Pointer(ctx.node));
ntEventWriteEos :pm4_EventWriteEos (ctx,Pointer(ctx.node));
ntWriteData :pm4_WriteData (ctx,Pointer(ctx.node));
ntDmaData :pm4_DmaData (ctx,Pointer(ctx.node));
ntWaitRegMem :pm4_WaitRegMem (ctx,Pointer(ctx.node));
ntLoadConstRam :pm4_LoadConstRam (ctx,Pointer(ctx.node));
ntDumpConstRam :pm4_DumpConstRam (ctx,Pointer(ctx.node));
ntIncrementCE :pm4_IncrementCE (ctx,Pointer(ctx.node));
ntIncrementDE :pm4_IncrementDE (ctx,Pointer(ctx.node));
ntWaitOnCECounter :pm4_WaitOnCECounter (ctx,Pointer(ctx.node));
ntWaitOnDECounterDiff:pm4_WaitOnDECounterDiff(ctx,Pointer(ctx.node));
ntPfpSyncMe :pm4_PfpSyncMe (ctx,Pointer(ctx.node));
else
begin
Writeln(stderr,'me:+',ctx.node^.ntype);
Assert(false,'me:+'+GetEnumName(TypeInfo(t_pm4_node_type),ord(ctx.node^.ntype)));
end;
end;
end;
if me^.sheduler.switch then
begin
//save position
ctx.stream^.curr:=ctx.node;
//Switching to another task
Break;
end;
//reset hint
ctx.stream^.hint_cmds:=False;
//next command
ctx.node:=ctx.stream^.Next(ctx.node);
end;
if me^.sheduler.switch then
begin
//
me^.sheduler.switch:=False;
//Switching to another task
Continue;
end else
begin
//Complete the task and switch to the next one
ctx.complete_and_next_task;
end;
//
me^.remove_stream(ctx.stream);
ctx.stream:=nil;
//
Continue;
end;
//stall is empty!
me^.reset_sheduler;
ctx.rel_time:=0; //reset time
//TODO: Timeline semaphore
if not ctx.PingCmd then
begin
ctx.on_idle;
end;
//RTLEventWaitFor(me^.event,100);
me^.wait;
until false;
end;
end.