This commit is contained in:
Pavel 2024-04-16 16:55:18 +03:00
parent f676c96383
commit 0ebb240619
17 changed files with 688 additions and 527 deletions

View File

@ -10,9 +10,15 @@ uses
LFQueue,
vBuffer,
vHostBufferManager,
vImage,
vImageManager,
vRenderPassManager,
vShaderExt,
vShaderManager,
vRegs2Vulkan,
shader_dump,
kern_thr,
md_sleep,
@ -52,7 +58,7 @@ procedure t_pm4_me.start;
begin
if (XCHG(started,Pointer(1))=nil) then
begin
kthread_add(@pm4_me_thread,@self,@td,0,'[GFX_ME]');
kthread_add(@pm4_me_thread,@self,@td,(8*1024*1024) div (16*1024),'[GFX_ME]');
end;
end;
@ -83,6 +89,29 @@ end;
//
procedure pm4_DrawIndex2(node:p_pm4_node_DrawIndex2);
var
GPU_REGS:TGPU_REGS;
FVSShader:TvShaderExt;
FPSShader:TvShaderExt;
FShadersKey:TvShadersKey;
FShaderGroup:TvShaderGroup;
begin
GPU_REGS:=Default(TGPU_REGS);
GPU_REGS.SH_REG:=@node^.SH_REG;
GPU_REGS.CX_REG:=@node^.CX_REG;
{fdump_ps:=}DumpPS(GPU_REGS);
{fdump_vs:=}DumpVS(GPU_REGS);
FPSShader:=FetchShader(vShaderStagePs,0,GPU_REGS,nil{@pa});
FVSShader:=FetchShader(vShaderStageVs,1,GPU_REGS,nil{@pa});
end;
procedure pm4_me_thread(me:p_pm4_me); SysV_ABI_CDecl;
var
stream:p_pm4_stream;
@ -99,6 +128,12 @@ begin
while (node<>nil) do
begin
Writeln('+',node^.ntype);
case node^.ntype of
ntDrawIndex2:pm4_DrawIndex2(Pointer(node));
else
end;
//
node:=stream^.Next(node);
end;

View File

@ -7,77 +7,28 @@ interface
uses
Classes,
SysUtils,
ps4_program,
kern_authinfo,
kern_proc,
murmurhash,
si_ci_vi_merged_offset,
ps4_shader,
ps4_gpu_regs;
vRegs2Vulkan;
type
TDUMP_WORD=packed record
REG,COUNT:WORD;
end;
Function FastHash(data:PByte;len:DWORD):DWORD;
Procedure DUMP_BLOCK(F:THandle;REG:WORD;P:Pointer;Size:DWORD);
Function get_dev_progname:RawByteString;
function DumpCS(var GPU_REGS:TGPU_REGS):RawByteString;
function DumpPS(var GPU_REGS:TGPU_REGS):RawByteString;
function DumpVS(var GPU_REGS:TGPU_REGS):RawByteString;
implementation
Function FastHash(data:PByte;len:DWORD):DWORD;
var
hash,tmp,rem:DWORD;
begin
if (len=0) or (data=nil) then Exit(0);
hash:=len;
rem:=len and 3;
len:=len shr 2;
While (len>0) do
begin
tmp :=PDWORD(data)[0];
hash:=hash+PWORD(@tmp)[0];
tmp :=(PWORD(@tmp)[1] shl 11) xor hash;
hash:=(hash shl 16) xor tmp;
data:=@PWORD(data)[2];
hash:=hash+(hash shr 11);
Dec(len);
end;
Case rem of
3:
begin
hash:=hash+PWORD(data)[0];
hash:=hash xor (hash shl 16);
hash:=hash xor (PShortint(data)[2] shl 18);
hash:=hash+(hash shr 11);
end;
2:
begin
hash:=hash+PWORD(data)[0];
hash:=hash xor (hash shl 11);
hash:=hash+(hash shr 17);
end;
1:
begin
hash:=hash+PShortint(data)[0];
hash:=hash xor (hash shl 10);
hash:=hash+(hash shr 1);
end;
end;
hash:=hash xor (hash shl 3);
hash:=hash+(hash shr 5);
hash:=hash xor (hash shl 4);
hash:=hash+(hash shr 17);
hash:=hash xor (hash shl 25);
hash:=hash+(hash shr 6);
Result:=hash;
end;
Procedure DUMP_BLOCK(F:THandle;REG:WORD;P:Pointer;Size:DWORD);
const
MAX_SIZE=($FFFF+1)*4;
@ -93,6 +44,14 @@ begin
FileWrite(F,P^,System.Align(Size,4));
end;
Procedure DUMP_REG(F:THandle;REG:WORD;var GPU_REGS:TGPU_REGS);
var
DATA:DWORD;
begin
DATA:=GPU_REGS.get_reg(REG);
DUMP_BLOCK(F,REG,@DATA,SizeOf(DWORD));
end;
type
TUSER_DATA_USEAGE=array[0..15] of Byte;
@ -178,6 +137,28 @@ begin
end;
end;
function Trim(const S: RawByteString): RawByteString;
var
Ofs, Len: sizeint;
begin
len := Length(S);
while (Len>0) and ((S[Len]<=' ') or (S[Len]='?')) do
dec(Len);
Ofs := 1;
while (Ofs<=Len) and ((S[Ofs]<=' ') or (S[Ofs]='?')) do
Inc(Ofs);
result := Copy(S, Ofs, 1 + Len - Ofs);
end;
Function get_dev_progname:RawByteString;
begin
Result:=Trim(g_appinfo.CUSANAME);
if (Result='') then
begin
Result:=Trim(p_proc.p_comm);
end;
end;
function DumpCS(var GPU_REGS:TGPU_REGS):RawByteString;
var
size,hash:DWORD;
@ -186,12 +167,13 @@ var
fname:RawByteString;
begin
Result:='';
base:=getCodeAddress(GPU_REGS.SPI.CS.LO,GPU_REGS.SPI.CS.HI);
base:=GPU_REGS.get_cs_addr;
if (base<>nil) then
begin
size:=_calc_shader_size(base);
hash:=FastHash(base,size);
hash:=MurmurHash64A(base,size,0);
fname:='shader_dump\'+get_dev_progname+'_cs_'+HexStr(hash,8)+'.dump';
Result:=fname;
@ -201,18 +183,18 @@ begin
F:=FileCreate(fname);
DUMP_BLOCK(F,mmCOMPUTE_PGM_LO,base,size);
DUMP_BLOCK(F,mmCOMPUTE_PGM_RSRC1 ,@GPU_REGS.SPI.CS.RSRC1 ,SizeOf(DWORD));
DUMP_BLOCK(F,mmCOMPUTE_PGM_RSRC2 ,@GPU_REGS.SPI.CS.RSRC2 ,SizeOf(DWORD));
DUMP_REG(F,mmCOMPUTE_PGM_RSRC1 ,GPU_REGS);
DUMP_REG(F,mmCOMPUTE_PGM_RSRC2 ,GPU_REGS);
DUMP_BLOCK(F,mmCOMPUTE_NUM_THREAD_X,@GPU_REGS.SPI.CS.NUM_THREAD_X,SizeOf(DWORD));
DUMP_BLOCK(F,mmCOMPUTE_NUM_THREAD_Y,@GPU_REGS.SPI.CS.NUM_THREAD_Y,SizeOf(DWORD));
DUMP_BLOCK(F,mmCOMPUTE_NUM_THREAD_Z,@GPU_REGS.SPI.CS.NUM_THREAD_Z,SizeOf(DWORD));
DUMP_REG(F,mmCOMPUTE_NUM_THREAD_X,GPU_REGS);
DUMP_REG(F,mmCOMPUTE_NUM_THREAD_Y,GPU_REGS);
DUMP_REG(F,mmCOMPUTE_NUM_THREAD_Z,GPU_REGS);
DUMP_USER_DATA(F,base,mmCOMPUTE_USER_DATA_0,@GPU_REGS.SPI.CS.USER_DATA);
DUMP_USER_DATA(F,base,mmCOMPUTE_USER_DATA_0,@GPU_REGS.SH_REG^.COMPUTE_USER_DATA);
DUMP_BLOCK(F,mmCOMPUTE_STATIC_THREAD_MGMT_SE0,@GPU_REGS.SPI.CS.STATIC_THREAD_MGMT_SE0,SizeOf(DWORD));
DUMP_BLOCK(F,mmCOMPUTE_STATIC_THREAD_MGMT_SE1,@GPU_REGS.SPI.CS.STATIC_THREAD_MGMT_SE1,SizeOf(DWORD));
DUMP_BLOCK(F,mmCOMPUTE_RESOURCE_LIMITS ,@GPU_REGS.SPI.CS.RESOURCE_LIMITS ,SizeOf(DWORD));
DUMP_REG(F,mmCOMPUTE_STATIC_THREAD_MGMT_SE0,GPU_REGS);
DUMP_REG(F,mmCOMPUTE_STATIC_THREAD_MGMT_SE1,GPU_REGS);
DUMP_REG(F,mmCOMPUTE_RESOURCE_LIMITS ,GPU_REGS);
FileClose(F);
@ -228,12 +210,12 @@ var
fname:RawByteString;
begin
Result:='';
base:=getCodeAddress(GPU_REGS.SPI.PS.LO,GPU_REGS.SPI.PS.HI);
base:=GPU_REGS.get_ps_addr;
if (base<>nil) then
begin
size:=_calc_shader_size(base);
hash:=FastHash(base,size);
hash:=MurmurHash64A(base,size,0);
fname:='shader_dump\'+get_dev_progname+'_ps_'+HexStr(hash,8)+'.dump';
Result:=fname;
@ -243,27 +225,27 @@ begin
F:=FileCreate(fname);
DUMP_BLOCK(F,mmSPI_SHADER_PGM_LO_PS,base,size);
DUMP_BLOCK(F,mmSPI_SHADER_PGM_RSRC1_PS,@GPU_REGS.SPI.PS.RSRC1,SizeOf(DWORD));
DUMP_BLOCK(F,mmSPI_SHADER_PGM_RSRC2_PS,@GPU_REGS.SPI.PS.RSRC2,SizeOf(DWORD));
DUMP_BLOCK(F,mmSPI_SHADER_PGM_RSRC3_PS,@GPU_REGS.SPI.PS.RSRC3,SizeOf(DWORD));
DUMP_REG(F,mmSPI_SHADER_PGM_RSRC1_PS,GPU_REGS);
DUMP_REG(F,mmSPI_SHADER_PGM_RSRC2_PS,GPU_REGS);
DUMP_REG(F,mmSPI_SHADER_PGM_RSRC3_PS,GPU_REGS);
DUMP_BLOCK(F,mmSPI_SHADER_Z_FORMAT ,@GPU_REGS.SPI.PS.Z_FORMAT ,SizeOf(DWORD));
DUMP_BLOCK(F,mmSPI_SHADER_COL_FORMAT ,@GPU_REGS.SPI.PS.COL_FORMAT,SizeOf(DWORD));
DUMP_REG(F,mmSPI_SHADER_Z_FORMAT ,GPU_REGS);
DUMP_REG(F,mmSPI_SHADER_COL_FORMAT ,GPU_REGS);
DUMP_BLOCK(F,mmSPI_PS_INPUT_ENA ,@GPU_REGS.SPI.PS.INPUT_ENA ,SizeOf(DWORD));
DUMP_BLOCK(F,mmSPI_PS_INPUT_ADDR ,@GPU_REGS.SPI.PS.INPUT_ADDR,SizeOf(DWORD));
DUMP_BLOCK(F,mmSPI_PS_IN_CONTROL ,@GPU_REGS.SPI.PS.IN_CONTROL,SizeOf(DWORD));
DUMP_REG(F,mmSPI_PS_INPUT_ENA ,GPU_REGS);
DUMP_REG(F,mmSPI_PS_INPUT_ADDR ,GPU_REGS);
DUMP_REG(F,mmSPI_PS_IN_CONTROL ,GPU_REGS);
DUMP_BLOCK(F,mmSPI_BARYC_CNTL ,@GPU_REGS.SPI.PS.BARYC_CNTL,SizeOf(DWORD));
DUMP_REG(F,mmSPI_BARYC_CNTL ,GPU_REGS);
DUMP_BLOCK(F,mmDB_SHADER_CONTROL ,@GPU_REGS.SPI.PS.SHADER_CONTROL,SizeOf(DWORD));
DUMP_BLOCK(F,mmCB_SHADER_MASK ,@GPU_REGS.SPI.PS.SHADER_MASK ,SizeOf(DWORD));
DUMP_REG(F,mmDB_SHADER_CONTROL ,GPU_REGS);
DUMP_REG(F,mmCB_SHADER_MASK ,GPU_REGS);
DUMP_USER_DATA(F,base,mmSPI_SHADER_USER_DATA_PS_0,@GPU_REGS.SPI.PS.USER_DATA);
DUMP_USER_DATA(F,base,mmSPI_SHADER_USER_DATA_PS_0,@GPU_REGS.SH_REG^.SPI_SHADER_USER_DATA_PS);
For i:=0 to 31 do
begin
DUMP_BLOCK(F,mmSPI_PS_INPUT_CNTL_0+i,@GPU_REGS.SPI.PS.INPUT_CNTL[i],SizeOf(DWORD));
DUMP_REG(F,mmSPI_PS_INPUT_CNTL_0+i,GPU_REGS);
end;
FileClose(F);
@ -279,12 +261,12 @@ var
fname:RawByteString;
begin
Result:='';
base:=getCodeAddress(GPU_REGS.SPI.VS.LO,GPU_REGS.SPI.VS.HI);
base:=GPU_REGS.get_vs_addr;
if (base<>nil) then
begin
size:=_calc_shader_size(base);
hash:=FastHash(base,size);
hash:=MurmurHash64A(base,size,0);
fname:='shader_dump\'+get_dev_progname+'_vs_'+HexStr(hash,8)+'.dump';
Result:=fname;
@ -294,17 +276,20 @@ begin
F:=FileCreate(fname);
DUMP_BLOCK(F,mmSPI_SHADER_PGM_LO_VS,base,size);
DUMP_BLOCK(F,mmSPI_SHADER_PGM_RSRC1_VS,@GPU_REGS.SPI.VS.RSRC1,SizeOf(DWORD));
DUMP_BLOCK(F,mmSPI_SHADER_PGM_RSRC2_VS,@GPU_REGS.SPI.VS.RSRC2,SizeOf(DWORD));
DUMP_BLOCK(F,mmSPI_SHADER_PGM_RSRC3_VS,@GPU_REGS.SPI.VS.RSRC3,SizeOf(DWORD));
DUMP_REG(F,mmSPI_SHADER_PGM_RSRC1_VS,GPU_REGS);
DUMP_REG(F,mmSPI_SHADER_PGM_RSRC2_VS,GPU_REGS);
DUMP_REG(F,mmSPI_SHADER_PGM_RSRC3_VS,GPU_REGS);
DUMP_BLOCK(F,mmSPI_VS_OUT_CONFIG ,@GPU_REGS.SPI.VS.OUT_CONFIG,SizeOf(DWORD));
DUMP_BLOCK(F,mmSPI_SHADER_POS_FORMAT,@GPU_REGS.SPI.VS.POS_FORMAT,SizeOf(DWORD));
DUMP_BLOCK(F,mmPA_CL_VS_OUT_CNTL ,@GPU_REGS.SPI.VS.OUT_CNTL ,SizeOf(DWORD));
DUMP_REG(F,mmSPI_VS_OUT_CONFIG ,GPU_REGS);
DUMP_REG(F,mmSPI_SHADER_POS_FORMAT,GPU_REGS);
DUMP_REG(F,mmPA_CL_VS_OUT_CNTL ,GPU_REGS);
DUMP_USER_DATA(F,base,mmSPI_SHADER_USER_DATA_VS_0,@GPU_REGS.SPI.VS.USER_DATA);
DUMP_USER_DATA(F,base,mmSPI_SHADER_USER_DATA_VS_0,@GPU_REGS.SH_REG^.SPI_SHADER_USER_DATA_VS);
DUMP_REG(F,mmVGT_DMA_NUM_INSTANCES,GPU_REGS);
//DUMP_REG(F,mmVGT_NUM_INSTANCES,@GPU_REGS.VGT_NUM_INSTANCES,SizeOf(DWORD));
DUMP_BLOCK(F,mmVGT_NUM_INSTANCES ,@GPU_REGS.VGT_NUM_INSTANCES,SizeOf(DWORD));
FileClose(F);

View File

@ -33,6 +33,7 @@ uses
pm4_me,
vDevice,
vMemory,
subr_backtrace;
@ -45,6 +46,14 @@ var
gc_knl_lock:mtx;
gc_knlist:t_knlist;
procedure unmap_dmem_gc(start,__end:DWORD); public;
begin
if (MemManager<>nil) then
begin
MemManager.unmap_host(start,__end);
end;
end;
function mmap_addr(paddr,psize:QWORD;
prot:Integer;
pout_addr:PQWORD):Integer;

View File

@ -614,10 +614,65 @@ begin
curkthread^.td_rmap_def_user:=entry;
end;
function rmem_map_delete(map :p_rmem_map;
vaddr:DWORD;
start:DWORD;
__end:DWORD):Integer;
procedure unmap_dmem_gc(start,__end:DWORD); external;
procedure rmem_map_unmap_check(map :p_rmem_map;
start:DWORD;
__end:DWORD);
var
entry :p_rmem_map_entry;
first_entry:p_rmem_map_entry;
s,e:DWORD;
begin
if (not rmem_map_lookup_entry_any(map,start,@first_entry)) then
begin
entry:=first_entry^.next;
end else
begin
entry:=first_entry;
end;
repeat
if (entry^.start>start) then
begin
s:=start;
if (entry^.start>__end) then
begin
e:=__end;
end else
begin
e:=entry^.start;
end;
if (s<>e) then
begin
unmap_dmem_gc(IDX_TO_OFF(s),IDX_TO_OFF(e));
end;
start:=e;
end else
if (entry^.__end>start) then
begin
start:=entry^.__end;
end;
if (start>=__end) or (entry=@map^.header) or (entry^.start>=__end) then
begin
Break;
end;
entry:=entry^.next;
until false;
end;
function rmem_map_delete(map :p_rmem_map;
vaddr:DWORD;
start:DWORD;
__end:DWORD):Integer;
var
entry :p_rmem_map_entry;
first_entry:p_rmem_map_entry;
@ -669,6 +724,9 @@ begin
entry:=next;
end;
rmem_map_unmap_check(map,start,__end);
Result:=(0);
end;
@ -715,6 +773,9 @@ begin
entry:=next;
end;
unmap_dmem_gc(IDX_TO_OFF(start),IDX_TO_OFF(__end));
Result:=(0);
end;

View File

@ -894,40 +894,17 @@ function vm_object_rmap_release(map :vm_map_t;
obj :vm_object_t;
start :vm_offset_t;
__end :vm_offset_t;
offset:vm_ooffset_t;
p_free:Boolean):Integer;
offset:vm_ooffset_t):Integer;
var
rmap:p_rmem_map;
length:vm_offset_t;
entry:p_rmem_map_entry;
begin
rmap:=map^.rmap;
length:=__end-start;
rmem_map_lock(rmap);
if p_free then
begin
Result:=rmem_map_delete(rmap, OFF_TO_IDX(start), OFF_TO_IDX(offset), OFF_TO_IDX(offset+length));
end else
begin
Result:=0;
end;
{
if (Result=0) then
begin
p_rem^:=not rmem_map_lookup_entry_any(rmap, OFF_TO_IDX(offset), @entry)
end;
if p_rem then
begin
//unmap vulkan
end else
begin
//ext unmap vulkan
end;
}
Result:=rmem_map_delete(rmap, OFF_TO_IDX(start), OFF_TO_IDX(offset), OFF_TO_IDX(offset+length));
rmem_map_unlock(rmap);
end;
@ -2402,7 +2379,6 @@ var
first_entry:vm_map_entry_t;
next :vm_map_entry_t;
obj :vm_object_t;
p_rem :Boolean;
begin
VM_MAP_ASSERT_LOCKED(map);
@ -2480,8 +2456,7 @@ begin
next:=entry^.next;
p_rem:=True;
if (obj<>nil) then
if rmap_free and (obj<>nil) then
begin
if ((obj^.flags and (OBJ_DMEM_EXT or OBJ_DMEM_EXT2))<>0) or
(obj^.otype=OBJT_PHYSHM) then
@ -2490,8 +2465,7 @@ begin
obj,
entry^.start,
entry^.__end,
entry^.offset,
rmap_free);
entry^.offset);
end;
end;
@ -2514,6 +2488,7 @@ begin
* will be set in the wrong object!)
}
vm_map_entry_delete(map, entry);
entry:=next;
end;
Result:=(KERN_SUCCESS);

View File

@ -22,6 +22,9 @@ type
function GetDedicatedAllocation:Boolean;
function BindMem(P:TvPointer):TVkResult;
procedure OnReleaseMem(Sender:TObject);
//
function Acquire:Boolean;
procedure Release;
end;
function VkBindSparseBufferMemory(queue:TVkQueue;buffer:TVkBuffer;bindCount:TVkUInt32;pBinds:PVkSparseMemoryBind):TVkResult;
@ -169,18 +172,46 @@ end;
function TvBuffer.BindMem(P:TvPointer):TVkResult;
begin
Result:=vkBindBufferMemory(Device.FHandle,FHandle,P.FMemory.FHandle,P.FOffset);
if (Result=VK_SUCCESS) then
if P.Acquire then
begin
FBind:=P;
P.FMemory.AddDependence(@Self.OnReleaseMem);
Result:=vkBindBufferMemory(Device.FHandle,FHandle,P.FMemory.FHandle,P.FOffset);
//
if (Result=VK_SUCCESS) then
begin
FBind:=P;
P.FMemory.AddDependence(@Self.OnReleaseMem);
end else
begin
P.Release;
end;
//
end else
begin
Result:=VK_ERROR_UNKNOWN;
end;
end;
procedure TvBuffer.OnReleaseMem(Sender:TObject);
begin
FBind.FMemory:=nil;
//
if (FHandle<>VK_NULL_HANDLE) then
begin
vkDestroyBuffer(Device.FHandle,FHandle,nil);
FHandle:=VK_NULL_HANDLE;
end;
end;
function TvBuffer.Acquire:Boolean;
begin
Result:=FBind.Acquire;
end;
procedure TvBuffer.Release;
begin
FBind.Release;
end;
end.

View File

@ -7,7 +7,6 @@ interface
uses
Classes,
SysUtils,
//RWLock,
//ps4_types,
g23tree,
//ps4_libSceVideoOut,

View File

@ -17,6 +17,7 @@ type
TvRelease=specialize T23treeSet<TvReleaseCb,TvReleaseCompare>;
TvDependenciesObject=class
FDep_lock :Pointer;
FDependencies:TvRelease;
//
function AddDependence(cb:TvReleaseCb):Boolean;
@ -57,32 +58,55 @@ end;
//
function TvDependenciesObject.AddDependence(cb:TvReleaseCb):Boolean;
begin
Result:=False;
if (cb=nil) then Exit;
rw_wlock(FDep_lock);
Result:=FDependencies.Insert(cb);
rw_wunlock(FDep_lock);
end;
function TvDependenciesObject.DelDependence(cb:TvReleaseCb):Boolean;
begin
Result:=False;
if (cb=nil) then Exit;
rw_wlock(FDep_lock);
Result:=FDependencies.delete(cb);
rw_wunlock(FDep_lock);
end;
Procedure TvDependenciesObject.ReleaseAllDependencies(Sender:TObject);
var
It:TvRelease.Iterator;
cb:TvReleaseCb;
begin
rw_wlock(FDep_lock);
while (FDependencies.size<>0) do
begin
It:=FDependencies.cbegin;
if (It.Item=nil) then Break;
cb:=It.Item^;
FDependencies.erase(It);
TvReleaseCb(It.Item^)(Sender);
if (cb<>nil) then
begin
rw_wunlock(FDep_lock);
cb(Sender);
rw_wlock(FDep_lock);
end;
end;
rw_wunlock(FDep_lock);
end;
Destructor TvDependenciesObject.Destroy;

View File

@ -6,300 +6,249 @@ interface
uses
SysUtils,
RWLock,
sys_types,
g23tree,
Vulkan,
vDevice,
vMemory,
vBuffer,
vCmdBuffer;
vDependence;
type
AVkSparseMemoryBind=array of TVkSparseMemoryBind;
TvHostBuffer=class(TvBuffer)
FAddr:Pointer;
Fhost:TvPointer;
Foffset:TVkDeviceSize; //offset inside buffer
FAddr:QWORD;
//
FSparse:AVkSparseMemoryBind;
//
FRefs:ptruint;
Procedure Acquire(Sender:TObject);
procedure Release(Sender:TObject);
procedure OnReleaseCmd(Sender:TObject);
end;
function FetchHostBuffer(cmd:TvCustomCmdBuffer;Addr:Pointer;Size:TVkDeviceSize;usage:TVkFlags):TvHostBuffer;
function FetchHostBuffer(cmd:TvDependenciesObject;
Addr:QWORD;
Size:TVkDeviceSize;
usage:TVkFlags;
device_local:Boolean=False):TvHostBuffer;
implementation
const
buf_ext:TVkExternalMemoryBufferCreateInfo=(
sType:VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO;
pNext:nil;
handleTypes:ord(VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
);
uses
kern_rwlock;
type
TvAddrCompare=object
function c(a,b:PPointer):Integer; static;
TvHostBufferKey=packed record
FAddr :QWORD;
FUsage :TVkFlags;
FBuffer:TvHostBuffer;
end;
_TvHostBufferSet=specialize T23treeSet<PPointer,TvAddrCompare>;
TvHostBufferSet=object(_TvHostBufferSet)
lock:TRWLock;
Procedure Init;
Procedure Lock_wr;
Procedure Unlock;
TvAddrCompare=object
function c(const a,b:TvHostBufferKey):Integer; static;
end;
_TvHostBufferSet=specialize T23treeSet<TvHostBufferKey,TvAddrCompare>;
TvHostBufferSet=object(_TvHostBufferSet)
lock:Pointer;
Procedure Lock_wr;
Procedure Unlock_wr;
end;
procedure TvHostBuffer.OnReleaseCmd(Sender:TObject);
begin
Release;
end;
var
FHostBufferSet:TvHostBufferSet;
Procedure TvHostBufferSet.Init;
begin
rwlock_init(lock);
end;
Procedure TvHostBufferSet.Lock_wr;
begin
rwlock_wrlock(lock);
rw_wlock(lock);
end;
Procedure TvHostBufferSet.Unlock;
Procedure TvHostBufferSet.Unlock_wr;
begin
rwlock_unlock(lock);
rw_wunlock(lock);
end;
function TvAddrCompare.c(a,b:PPointer):Integer;
function TvAddrCompare.c(const a,b:TvHostBufferKey):Integer;
begin
Result:=Integer(a^>b^)-Integer(a^<b^);
//1 FAddr
Result:=Integer(a.FAddr>b.FAddr)-Integer(a.FAddr<b.FAddr);
if (Result<>0) then Exit;
//2 FUsage
Result:=Integer(a.FUsage>b.FUsage)-Integer(a.FUsage<b.FUsage);
end;
function _Find(Addr:Pointer):TvHostBuffer;
var
i:TvHostBufferSet.Iterator;
begin
Result:=nil;
i:=FHostBufferSet.find(@Addr);
if (i.Item<>nil) then
begin
Result:=TvHostBuffer(ptruint(i.Item^)-ptruint(@TvHostBuffer(nil).FAddr));
end;
end;
function Max(a,b:QWORD):QWORD; inline;
begin
if (a>b) then Result:=a else Result:=b;
end;
function Min(a,b:QWORD):QWORD; inline;
begin
if (a<b) then Result:=a else Result:=b;
end;
function _fix_buf_size(sparce:Boolean;var Addr:Pointer;var Size:TVkDeviceSize;usage:TVkFlags):TVkDeviceSize;
function _fix_buf_size(var Addr:QWORD;var Size:TVkDeviceSize;usage:TVkFlags):TVkDeviceSize;
var
mr:TVkMemoryRequirements;
begin
mr:=GetRequirements(sparce,Size,usage,@buf_ext);
mr:=GetRequirements(false,Size,usage,@buf_ext);
Result:=(ptruint(Addr) mod mr.alignment);
Result:=(Addr mod mr.alignment);
Addr:=Pointer(ptruint(Addr)-Result);
Addr:=Addr-Result;
Size:=Size+Result;
end;
function _is_sparce(Addr:Pointer;Size:TVkDeviceSize;usage:TVkFlags):Integer;
var
host:TvPointer;
hsize:qword;
begin
_fix_buf_size(False,Addr,Size,usage);
host:=Default(TvPointer);
if not TryGetHostPointerByAddr(addr,host,@hsize) then
begin
Exit(-1);
end;
if (hsize>=Size) then
begin
Result:=0;
end else
begin
Result:=1;
end;
end;
function _New_simple(Addr:Pointer;Size:TVkDeviceSize;usage:TVkFlags):TvHostBuffer;
var
host:TvPointer;
t:TvHostBuffer;
delta:TVkDeviceSize;
begin
Result:=nil;
delta:=_fix_buf_size(False,Addr,Size,usage);
host:=Default(TvPointer);
if not TryGetHostPointerByAddr(addr,host) then Exit;
t:=TvHostBuffer.Create(Size,usage,@buf_ext);
t.Fhost :=host;
t.Foffset:=delta;
t.BindMem(host);
Result:=t;
end;
function _New_sparce(queue:TVkQueue;Addr:Pointer;Size:TVkDeviceSize;usage:TVkFlags):TvHostBuffer;
var
host:TvPointer;
asize:qword;
hsize:qword;
msize:qword;
Offset,delta:TVkDeviceSize;
bind:TVkSparseMemoryBind;
Binds:AVkSparseMemoryBind;
i:Integer;
t:TvHostBuffer;
begin
Result:=nil;
//hack; alignment is the same in virtual memory
delta:=_fix_buf_size(True,Addr,Size,usage);
Binds:=Default(AVkSparseMemoryBind);
host :=Default(TvPointer);
hsize:=0;
Offset:=0;
asize:=Size;
While (asize<>0) do
begin
if not TryGetHostPointerByAddr(addr,host,@hsize) then Exit;
msize:=Min(hsize,asize);
bind:=Default(TVkSparseMemoryBind);
bind.resourceOffset:=Offset;
bind.size :=msize;
bind.memory :=host.FHandle;
bind.memoryOffset :=host.FOffset;
i:=Length(Binds);
SetLength(Binds,i+1);
Binds[i]:=bind;
//next
Offset:=Offset+msize;
addr :=addr +msize;
asize :=asize -msize;
end;
t:=TvHostBuffer.CreateSparce(Size,usage,@buf_ext);
t.Foffset:=delta;
t.FSparse:=Binds;
if (VkBindSparseBufferMemory(queue,t.FHandle,Length(Binds),@Binds[0])<>VK_SUCCESS) then
begin
t.Free;
Exit;
end;
Result:=t;
end;
function FetchHostBuffer(cmd:TvCustomCmdBuffer;Addr:Pointer;Size:TVkDeviceSize;usage:TVkFlags):TvHostBuffer;
var
t:TvHostBuffer;
function _FindHostBuffer(Addr:QWORD;Size:TVkDeviceSize;usage:TVkFlags):TvHostBuffer;
label
_exit;
_repeat;
var
It:TvHostBufferSet.Iterator;
key:TvHostBufferKey;
buf:TvHostBuffer;
__end:QWORD;
begin
__end:=Addr+Size;
key:=Default(TvHostBufferKey);
key.FAddr :=Addr;
key.FUsage:=0;
_repeat:
It:=FHostBufferSet.find(key);
while (It.Item<>nil) do
begin
buf:=It.Item^.FBuffer;
if buf.Acquire then
begin
if (__end>buf.FAddr) and
(Addr<(buf.FAddr+buf.FSize)) and
((buf.FUsage and usage)=usage) then
begin
Exit(buf);
end;
buf.Release;
end else
begin
//mem is deleted, free buf
FHostBufferSet.erase(It);
FreeAndNil(buf);
goto _repeat;
end;
It.Next;
end;
end;
function FetchHostBuffer(cmd:TvDependenciesObject;
Addr:QWORD;
Size:TVkDeviceSize;
usage:TVkFlags;
device_local:Boolean=False):TvHostBuffer;
label
_repeat;
var
key:TvHostBufferKey;
mem:TvPointer;
begin
Result:=nil;
Assert(Size<>0);
_fix_buf_size(Addr,Size,usage);
key:=Default(TvHostBufferKey);
key.FAddr :=Addr;
key.FUsage:=usage;
//
FHostBufferSet.Lock_wr;
//
t:=_Find(Addr); //find by key
_repeat:
if (t<>nil) then
key.FBuffer:=_FindHostBuffer(Addr,Size,usage);
//
FHostBufferSet.Unlock_wr;
//
if (key.FBuffer<>nil) then
begin
if (t.FSize<(t.Foffset+Size)) or
((t.FUsage and usage)<>usage) then
//
end else
begin
//create new
mem:=MemManager.FetchHostMap(Addr,Size,device_local);
if (mem.FMemory=nil) then
begin
usage:=usage or t.FUsage;
FHostBufferSet.delete(@t.FAddr);
t.Release(nil);
t:=nil;
end;
end;
if (t=nil) then
begin
//Writeln('NewBuf:',HexStr(Addr));
if device_local then
begin
mem:=MemManager.FetchHostMap(Addr,Size,False);
if (mem.FMemory=nil) then
begin
//ENOMEM
Exit(nil);
end;
end else
begin
//ENOMEM
Exit(nil);
end;
t:=nil;
Case _is_sparce(Addr,Size,usage) of
0:begin
t:=_New_simple(Addr,Size,usage);
Assert(t<>nil,'create simple buffer fail');
end;
1:begin //is Sparse buffers
Assert(vDevice.sparseBinding,'sparseBinding not support');
Assert(MemManager.SparceSupportHost,'sparse not support for host');
t:=_New_sparce(cmd.FQueue.FHandle,Addr,Size,usage);
Assert(t<>nil,'create sparse buffer fail');
end;
else
Assert(false,'Is not GPU Addr:'+HexStr(Addr));
end;
t.FAddr:=addr; //save key
key.FBuffer:=TvHostBuffer.Create(Size,usage,@buf_ext);
key.FBuffer.FAddr:=Addr;
FHostBufferSet.Insert(@t.FAddr);
t.Acquire(nil);
end;
if (cmd<>nil) and (t<>nil) then
begin
if cmd.AddDependence(@t.Release) then
if (key.FBuffer.BindMem(mem)<>VK_SUCCESS) then
begin
t.Acquire(cmd);
//unknow error
FreeAndNil(key.FBuffer);
mem.Release; //release [FetchHostMap]
//
Exit(nil);
end;
mem.Release; //release [FetchHostMap]
//
FHostBufferSet.Lock_wr;
//
if not FHostBufferSet.Insert(key) then
begin
//collision?
key.FBuffer.Release; //release [BindMem]
FreeAndNil(key.FBuffer);
//
goto _repeat;
end;
//
FHostBufferSet.Unlock_wr;
//
//create new
end;
//add dep
if (cmd<>nil) then
begin
if cmd.AddDependence(@key.FBuffer.OnReleaseCmd) then
begin
//
end else
begin
key.FBuffer.Release; //release [BindMem]/[_FindHostBuffer]
end;
end;
_exit:
FHostBufferSet.Unlock;
Result:=t;
Result:=key.FBuffer;
end;
Procedure TvHostBuffer.Acquire(Sender:TObject);
begin
System.InterlockedIncrement(Pointer(FRefs));
end;
procedure TvHostBuffer.Release(Sender:TObject);
begin
if System.InterlockedDecrement(Pointer(FRefs))=nil then
begin
Free;
end;
end;
initialization
FHostBufferSet.Init;
end.

View File

@ -64,9 +64,9 @@ type
function GetImageInfo:TVkImageCreateInfo; virtual; abstract;
function GetRequirements:TVkMemoryRequirements;
function GetDedicatedAllocation:Boolean;
function Compile(ext:Pointer):Boolean;
function BindMem(P:TvPointer):TVkResult;
procedure OnReleaseMem(Sender:TObject);
function Compile(ext:Pointer):Boolean;
end;
const
@ -551,19 +551,63 @@ begin
(rded.prefersDedicatedAllocation <>VK_FALSE);
end;
function TvCustomImage.Compile(ext:Pointer):Boolean;
var
cinfo:TVkImageCreateInfo;
r:TVkResult;
begin
Result:=False;
if (FHandle<>VK_NULL_HANDLE) then
begin
vkDestroyImage(Device.FHandle,FHandle,nil);
FHandle:=VK_NULL_HANDLE;
end;
cinfo:=GetImageInfo;
cinfo.pNext:=ext;
cinfo.format:=vkFixFormatSupport(cinfo.format,cinfo.tiling,cinfo.usage);
r:=vkCreateImage(Device.FHandle,@cinfo,nil,@FHandle);
if (r<>VK_SUCCESS) then
begin
Writeln(StdErr,'vkCreateImage:',r);
Exit;
end;
Result:=True;
end;
function TvCustomImage.BindMem(P:TvPointer):TVkResult;
begin
Result:=vkBindImageMemory(Device.FHandle,FHandle,P.FMemory.FHandle,P.FOffset);
if (Result=VK_SUCCESS) then
if P.Acquire then
begin
FBind:=P;
P.FMemory.AddDependence(@Self.OnReleaseMem);
Result:=vkBindImageMemory(Device.FHandle,FHandle,P.FMemory.FHandle,P.FOffset);
//
if (Result=VK_SUCCESS) then
begin
FBind:=P;
P.FMemory.AddDependence(@Self.OnReleaseMem);
end else
begin
P.Release;
end;
//
end else
begin
Result:=VK_ERROR_UNKNOWN;
end;
end;
procedure TvCustomImage.OnReleaseMem(Sender:TObject);
begin
FBind.FMemory:=nil;
//
if (FHandle<>VK_NULL_HANDLE) then
begin
vkDestroyImage(Device.FHandle,FHandle,nil);
FHandle:=VK_NULL_HANDLE;
end;
end;
procedure _test_and_set_to(var new:TVkFlags;
@ -651,33 +695,6 @@ begin
end;
function TvCustomImage.Compile(ext:Pointer):Boolean;
var
cinfo:TVkImageCreateInfo;
r:TVkResult;
begin
Result:=False;
if (FHandle<>VK_NULL_HANDLE) then
begin
vkDestroyImage(Device.FHandle,FHandle,nil);
FHandle:=VK_NULL_HANDLE;
end;
cinfo:=GetImageInfo;
cinfo.pNext:=ext;
cinfo.format:=vkFixFormatSupport(cinfo.format,cinfo.tiling,cinfo.usage);
r:=vkCreateImage(Device.FHandle,@cinfo,nil,@FHandle);
if (r<>VK_SUCCESS) then
begin
Writeln(StdErr,'vkCreateImage:',r);
Exit;
end;
Result:=True;
end;
Constructor TvImage.Create(format:TVkFormat;extent:TVkExtent3D;usage:TVkFlags;flags:TVkImageCreateFlags;ext:Pointer=nil);
begin
FFormat:=format;

View File

@ -6,7 +6,6 @@ interface
uses
SysUtils,
RWLock,
g23tree,
//sys_types,
Vulkan,
@ -62,7 +61,7 @@ type
key:TvImageKey;
FUsage:TVkFlags;
//
lock:TRWLock;
lock:Pointer;
FViews:TvImageView2Set;
//
Barrier:TvImageBarrier;
@ -110,6 +109,9 @@ var
implementation
uses
kern_rwlock;
type
TvImageKeyCompare=object
function c(a,b:PvImageKey):Integer; static;
@ -117,28 +119,22 @@ type
_TvImage2Set=specialize T23treeSet<PvImageKey,TvImageKeyCompare>;
TvImage2Set=object(_TvImage2Set)
lock:TRWLock;
Procedure Init;
lock:Pointer;
Procedure Lock_wr;
Procedure Unlock;
Procedure Unlock_wr;
end;
var
FImage2Set:TvImage2Set;
Procedure TvImage2Set.Init;
begin
rwlock_init(lock);
end;
Procedure TvImage2Set.Lock_wr;
begin
rwlock_wrlock(lock);
rw_wlock(lock);
end;
Procedure TvImage2Set.Unlock;
Procedure TvImage2Set.Unlock_wr;
begin
rwlock_unlock(lock);
rw_wunlock(lock);
end;
function TvImageKeyCompare.c(a,b:PvImageKey):Integer;
@ -216,7 +212,6 @@ end;
Constructor TvImage2.Create;
begin
inherited;
rwlock_init(lock);
Barrier.Init;
end;
@ -320,7 +315,7 @@ begin
if (Self=nil) then Exit;
if (FHandle=VK_NULL_HANDLE) then Exit;
rwlock_wrlock(lock);
rw_wlock(lock);
t:=nil;
i:=FViews.find(@F);
@ -351,7 +346,7 @@ begin
r:=vkCreateImageView(Device.FHandle,@cinfo,nil,@FView);
if (r<>VK_SUCCESS) then
begin
rwlock_unlock(lock);
rw_wunlock(lock);
Writeln(StdErr,'vkCreateImageView:',r);
Exit;
end;
@ -373,7 +368,7 @@ begin
end;
end;
rwlock_unlock(lock);
rw_wunlock(lock);
Result:=t;
end;
@ -476,7 +471,7 @@ begin
if (cmd=nil) then Exit;
if (not cmd.BeginCmdBuffer) then Exit;
rwlock_wrlock(lock);
rw_wlock(lock);
if Barrier.Push(cmd.cmdbuf,
FHandle,
@ -488,7 +483,7 @@ begin
Inc(cmd.cmd_count);
end;
rwlock_unlock(lock);
rw_wunlock(lock);
end;
{
@ -679,7 +674,7 @@ begin
end;
FImage2Set.Unlock;
FImage2Set.Unlock_wr;
end;
function FindImage(cmd:TvCustomCmdBuffer;Addr:Pointer;cformat:TVkFormat):TvImage2;
@ -696,11 +691,9 @@ begin
end;
end;
FImage2Set.Unlock;
FImage2Set.Unlock_wr;
end;
initialization
FImage2Set.Init;
end.

View File

@ -44,6 +44,7 @@ type
TvPointer=packed object
FMemory:TvDeviceMemory;
FOffset:TVkDeviceSize;
function Acquire:Boolean;
procedure Release;
end;
@ -94,8 +95,6 @@ type
FHeaps:array of TvHeap;
lock:Pointer;
FDevBlocks:array of TvDeviceMemory;
FFreeSet:TFreeDevNodeSet;
FAllcSet:TAllcDevNodeSet;
@ -125,12 +124,20 @@ type
Function _shrink_dev_block(max:TVkDeviceSize;heap_index:Byte):TVkDeviceSize;
Function _shrink_host_map(max:TVkDeviceSize):TVkDeviceSize;
procedure unmap_host(start,__end:QWORD);
Function AllocHostMap(Addr,Size:TVkDeviceSize;mtindex:Byte):TvPointer;
Function FetchHostMap(Addr,Size:TVkDeviceSize;mtindex:Byte):TvPointer;
Function FetchHostMap(Addr,Size:TVkDeviceSize;device_local:Boolean):TvPointer;
end;
var
MemManager:TvMemManager;
const
buf_ext:TVkExternalMemoryBufferCreateInfo=(
sType:VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO;
pNext:nil;
handleTypes:ord(VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
);
function vkAllocMemory(device:TVkDevice;Size:TVkDeviceSize;mtindex:TVkUInt32):TVkDeviceMemory;
function vkAllocHostMemory(device:TVkDevice;Size:TVkDeviceSize;mtindex:TVkUInt32;adr:Pointer):TVkDeviceMemory;
function vkAllocDedicatedImage(device:TVkDevice;Size:TVkDeviceSize;mtindex:TVkUInt32;FHandle:TVkImage):TVkDeviceMemory;
@ -149,6 +156,9 @@ implementation
uses
kern_rwlock;
var
global_mem_lock:Pointer=nil;
Procedure TvDeviceMemory.Acquire;
begin
System.InterlockedIncrement(Pointer(FRefs));
@ -197,6 +207,25 @@ end;
//
function TvPointer.Acquire:Boolean;
begin
Result:=False;
if (FMemory=nil) then Exit;
//
rw_rlock(global_mem_lock);
//
if (FMemory<>nil) then
begin
FMemory.Acquire;
Result:=True;
end;
//
rw_runlock(global_mem_lock);
end;
procedure TvPointer.Release;
begin
if (FMemory<>nil) then
@ -230,13 +259,6 @@ begin
Result:=Integer(a.FOffset>b.FOffset)-Integer(a.FOffset<b.FOffset);
end;
const
buf_ext:TVkExternalMemoryBufferCreateInfo=(
sType:VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO;
pNext:nil;
handleTypes:ord(VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
);
function GetHostMappedRequirements:TVkMemoryRequirements;
var
cinfo:TVkBufferCreateInfo;
@ -762,7 +784,8 @@ begin
key:=Default(TDevNode);
Size:=System.Align(Size,8);
if (Align>GRANULAR_DEV_BLOCK_SIZE) then Align:=GRANULAR_DEV_BLOCK_SIZE;
rw_wlock(lock);
//
rw_wlock(global_mem_lock);
//
if _FetchFree_a(Size,Align,mtindex,key) then
begin
@ -818,7 +841,7 @@ begin
Result.FMemory.Acquire;
end;
//
rw_wunlock(lock);
rw_wunlock(global_mem_lock);
end;
Function TvMemManager.Free(P:TvPointer):Boolean;
@ -827,7 +850,8 @@ var
begin
if (P.FMemory=nil) then Exit;
key:=Default(TDevNode);
rw_wlock(lock);
//
rw_wlock(global_mem_lock);
//
if _FindDevBlock(P.FMemory,key.FBlockId) then
if _FetchAllc(P.FOffset,key.FBlockId,key) then
@ -865,7 +889,7 @@ begin
Result:=True;
end;
//
rw_wunlock(lock);
rw_wunlock(global_mem_lock);
end;
Function TvMemManager._shrink_dev_block(max:TVkDeviceSize;heap_index:Byte):TVkDeviceSize;
@ -932,7 +956,7 @@ begin
if (start=__end) then Exit;
//
rw_wlock(lock);
rw_wlock(global_mem_lock);
//
node:=TvHostMemory(TAILQ_FIRST(@FHosts));
@ -962,8 +986,7 @@ begin
end;
//
rw_wunlock(lock);
//
rw_wunlock(global_mem_lock);
end;
function AlignUp(addr:PtrUInt;alignment:PtrUInt):PtrUInt; inline;
@ -979,7 +1002,7 @@ begin
Result:=addr-(addr mod alignment);
end;
Function TvMemManager.AllocHostMap(Addr,Size:TVkDeviceSize;mtindex:Byte):TvPointer;
Function TvMemManager.FetchHostMap(Addr,Size:TVkDeviceSize;mtindex:Byte):TvPointer;
label
_retry,
_fail;
@ -998,7 +1021,7 @@ begin
FStart:=QWORD(Addr);
F__End:=FStart+Size;
//
rw_wlock(lock);
rw_wlock(global_mem_lock);
//
node:=TvHostMemory(TAILQ_FIRST(@FHosts));
@ -1079,7 +1102,7 @@ begin
_fail:
//
rw_wunlock(lock);
rw_wunlock(global_mem_lock);
//
if (node<>nil) then
@ -1089,6 +1112,23 @@ begin
end;
end;
Function TvMemManager.FetchHostMap(Addr,Size:TVkDeviceSize;device_local:Boolean):TvPointer;
var
i:Byte;
begin
Result:=Default(TvPointer);
Assert(Length(FHeaps)<>0);
For i:=0 to High(FHeaps) do
if (FHeaps[i].host_visible) then
if (FHeaps[i].device_local=device_local) then
begin
Exit(FetchHostMap(Addr,Size,FHeaps[i].def_mem_type));
end;
end;
//
function vkAllocMemory(device:TVkDevice;Size:TVkDeviceSize;mtindex:TVkUInt32):TVkDeviceMemory;

View File

@ -86,6 +86,7 @@ type
PGPU_REGS=^TGPU_REGS;
TGPU_REGS=packed object
SH_REG:PSH_REG_GROUP;
CX_REG:PCONTEXT_REG_GROUP; // 0xA000
Function _SHADER_MASK(i:Byte):Byte; inline; //0..7
@ -103,6 +104,15 @@ type
Function DB_ENABLE:Boolean;
Function GET_DB_INFO:TDB_INFO;
function get_reg(i:word):DWORD;
Function get_cs_addr:Pointer;
Function get_ps_addr:Pointer;
Function get_vs_addr:Pointer;
Function get_gs_addr:Pointer;
Function get_es_addr:Pointer;
Function get_hs_addr:Pointer;
Function get_ls_addr:Pointer;
end;
function GET_PRIM_TYPE (const VGT_PRIMITIVE_TYPE:TVGT_PRIMITIVE_TYPE):TVkPrimitiveTopology;
@ -1158,6 +1168,53 @@ begin
Result.FImageInfo.params.arrayLayers:=1;
end;
function TGPU_REGS.get_reg(i:word):DWORD;
begin
case i of
$2C00..$2E7F:Result:=PDWORD(SH_REG)[i-$2C00];
$A000..$A38F:Result:=PDWORD(CX_REG)[i-$A000];
else
Result:=0;
end;
end;
Function TGPU_REGS.get_cs_addr:Pointer;
begin
Result:=getCodeAddress(SH_REG^.COMPUTE_PGM_LO,SH_REG^.COMPUTE_PGM_HI.DATA);
end;
Function TGPU_REGS.get_ps_addr:Pointer;
begin
Result:=getCodeAddress(SH_REG^.SPI_SHADER_PGM_LO_PS,SH_REG^.SPI_SHADER_PGM_HI_PS.MEM_BASE);
end;
Function TGPU_REGS.get_vs_addr:Pointer;
begin
Result:=getCodeAddress(SH_REG^.SPI_SHADER_PGM_LO_VS,SH_REG^.SPI_SHADER_PGM_HI_VS.MEM_BASE);
end;
Function TGPU_REGS.get_gs_addr:Pointer;
begin
Result:=getCodeAddress(SH_REG^.SPI_SHADER_PGM_LO_GS,SH_REG^.SPI_SHADER_PGM_HI_GS.MEM_BASE);
end;
Function TGPU_REGS.get_es_addr:Pointer;
begin
Result:=getCodeAddress(SH_REG^.SPI_SHADER_PGM_LO_ES,SH_REG^.SPI_SHADER_PGM_HI_ES.MEM_BASE);
end;
Function TGPU_REGS.get_hs_addr:Pointer;
begin
Result:=getCodeAddress(SH_REG^.SPI_SHADER_PGM_LO_HS,SH_REG^.SPI_SHADER_PGM_HI_HS.MEM_BASE);
end;
Function TGPU_REGS.get_ls_addr:Pointer;
begin
Result:=getCodeAddress(SH_REG^.SPI_SHADER_PGM_LO_LS,SH_REG^.SPI_SHADER_PGM_HI_LS.MEM_BASE);
end;
///
function GET_PRIM_TYPE(const VGT_PRIMITIVE_TYPE:TVGT_PRIMITIVE_TYPE):TVkPrimitiveTopology;
begin
case VGT_PRIMITIVE_TYPE.PRIM_TYPE of

View File

@ -7,8 +7,6 @@ interface
uses
Classes,
SysUtils,
RWLock,
//sys_types,
g23tree,
//ps4_libSceVideoOut,
si_ci_vi_merged_enum,

View File

@ -6,14 +6,12 @@ interface
uses
SysUtils,
RWLock,
g23tree,
Vulkan,
vDevice,
vDependence,
vPipeline{,
vImage,
vPipeline,
vImage{,
vCmdBuffer};
type
@ -46,16 +44,11 @@ type
function FetchRenderPass(cmd:TvDependenciesObject;P:PvRenderPassKey):TvRenderPass2;
////////////////
const
//useage image
TM_READ =1;
TM_WRITE=2;
TM_CLEAR=4;
////////////////
implementation
uses
kern_rwlock;
type
TvRenderPassKey2Compare=object
function c(a,b:PvRenderPassKey):Integer; static;
@ -63,10 +56,9 @@ type
_TvRenderPass2Set=specialize T23treeSet<PvRenderPassKey,TvRenderPassKey2Compare>;
TvRenderPass2Set=object(_TvRenderPass2Set)
lock:TRWLock;
Procedure Init;
lock:Pointer;
Procedure Lock_wr;
Procedure Unlock;
Procedure Unlock_wr;
end;
var
@ -306,19 +298,14 @@ begin
Result:=True;
end;
Procedure TvRenderPass2Set.Init;
begin
rwlock_init(lock);
end;
Procedure TvRenderPass2Set.Lock_wr;
begin
rwlock_wrlock(lock);
rw_wlock(lock);
end;
Procedure TvRenderPass2Set.Unlock;
Procedure TvRenderPass2Set.Unlock_wr;
begin
rwlock_unlock(lock);
rw_wunlock(lock);
end;
Procedure TvRenderPass2.Acquire;
@ -395,12 +382,10 @@ begin
end;
end;
FRenderPass2Set.Unlock;
FRenderPass2Set.Unlock_wr;
end;
initialization
FRenderPass2Set.Init;
end.

View File

@ -6,7 +6,6 @@ interface
uses
SysUtils,
RWLock,
g23tree,
Vulkan,
vPipeline;
@ -17,6 +16,9 @@ Function FetchSetLayout(FStage:TVkShaderStageFlags;
implementation
uses
kern_rwlock;
type
TvSetLayoutCompare=class
class function c(a,b:PvSetLayoutKey):Integer; static;
@ -24,28 +26,22 @@ type
_TvSetLayoutsPool=specialize T23treeSet<PvSetLayoutKey,TvSetLayoutCompare>;
TvSetLayoutsPool=object(_TvSetLayoutsPool)
lock:TRWLock;
Procedure Init;
lock:Pointer;
Procedure Lock_wr;
Procedure Unlock;
Procedure Unlock_wr;
end;
var
FSetLayoutsPool:TvSetLayoutsPool;
Procedure TvSetLayoutsPool.Init;
begin
rwlock_init(lock);
end;
Procedure TvSetLayoutsPool.Lock_wr;
begin
rwlock_wrlock(lock);
rw_wlock(lock);
end;
Procedure TvSetLayoutsPool.Unlock;
Procedure TvSetLayoutsPool.Unlock_wr;
begin
rwlock_unlock(lock);
rw_wunlock(lock);
end;
function CompareBind(var a,b:TVkDescriptorSetLayoutBinding):Integer; forward;
@ -111,7 +107,7 @@ begin
Result:=t;
end;
FSetLayoutsPool.Unlock;
FSetLayoutsPool.Unlock_wr;
t.Compile;
end;
@ -163,8 +159,6 @@ begin
Result:=CompareBinds(a^.FBinds,b^.FBinds,Length(a^.FBinds));
end;
initialization
FSetLayoutsPool.Init;
end.

View File

@ -7,14 +7,15 @@ interface
uses
SysUtils,
Classes,
RWLock,
murmurhash,
g23tree,
ps4_pssl,
ps4_shader,
ps4_gpu_regs,
vRegs2Vulkan,
shader_dump,
ps4_program,
//ps4_program,
vDevice,
@ -61,6 +62,10 @@ function FetchShaderGroup(F:PvShadersKey):TvShaderGroup;
implementation
uses
kern_rwlock,
kern_dmem;
type
TShaderCacheCompare=object
function c(a,b:PShaderDataKey):Integer; static;
@ -72,54 +77,42 @@ type
_TShaderCacheSet=specialize T23treeSet<PShaderDataKey,TShaderCacheCompare>;
TShaderCacheSet=object(_TShaderCacheSet)
lock:TRWLock;
Procedure Init;
lock:Pointer;
Procedure Lock_wr;
Procedure Unlock;
Procedure Unlock_wr;
end;
_TShaderGroupSet=specialize T23treeSet<PvShadersKey,TShadersKeyCompare>;
TShaderGroupSet=object(_TShaderGroupSet)
lock:TRWLock;
Procedure Init;
lock:Pointer;
Procedure Lock_wr;
Procedure Unlock;
Procedure Unlock_wr;
end;
var
FShaderCacheSet:TShaderCacheSet;
FShaderGroupSet:TShaderGroupSet;
Procedure TShaderCacheSet.Init;
begin
rwlock_init(lock);
end;
Procedure TShaderCacheSet.Lock_wr;
begin
rwlock_wrlock(lock);
rw_wlock(lock);
end;
Procedure TShaderCacheSet.Unlock;
Procedure TShaderCacheSet.Unlock_wr;
begin
rwlock_unlock(lock);
rw_wunlock(lock);
end;
//
Procedure TShaderGroupSet.Init;
begin
rwlock_init(lock);
end;
Procedure TShaderGroupSet.Lock_wr;
begin
rwlock_wrlock(lock);
rw_wlock(lock);
end;
Procedure TShaderGroupSet.Unlock;
Procedure TShaderGroupSet.Unlock_wr;
begin
rwlock_unlock(lock);
rw_wunlock(lock);
end;
function Max(a,b:PtrInt):PtrInt; inline;
@ -180,7 +173,7 @@ var
F:THandle;
fname:RawByteString;
begin
hash:=FastHash(M.Memory,M.Size);
hash:=MurmurHash64A(M.Memory,M.Size,0);
case FStage of
vShaderStagePs:fname:='_ps_';
@ -231,18 +224,29 @@ begin
case FStage of
vShaderStagePs :
begin
SprvEmit.InitPs(GPU_REGS.SPI.PS.RSRC1,GPU_REGS.SPI.PS.RSRC2,GPU_REGS.SPI.PS.INPUT_ENA);
SprvEmit.SetUserData(@GPU_REGS.SPI.PS.USER_DATA);
SprvEmit.InitPs(GPU_REGS.SH_REG^.SPI_SHADER_PGM_RSRC1_PS,
GPU_REGS.SH_REG^.SPI_SHADER_PGM_RSRC2_PS,
GPU_REGS.CX_REG^.SPI_PS_INPUT_ENA);
SprvEmit.SetUserData(@GPU_REGS.SH_REG^.SPI_SHADER_USER_DATA_PS);
end;
vShaderStageVs:
begin
SprvEmit.InitVs(GPU_REGS.SPI.VS.RSRC1,GPU_REGS.SPI.VS.RSRC2,GPU_REGS.VGT_NUM_INSTANCES);
SprvEmit.SetUserData(@GPU_REGS.SPI.VS.USER_DATA);
SprvEmit.InitVs(GPU_REGS.SH_REG^.SPI_SHADER_PGM_RSRC1_VS,
GPU_REGS.SH_REG^.SPI_SHADER_PGM_RSRC2_VS,
GPU_REGS.CX_REG^.VGT_DMA_NUM_INSTANCES);
SprvEmit.SetUserData(@GPU_REGS.SH_REG^.SPI_SHADER_USER_DATA_VS);
end;
vShaderStageCs:
begin
SprvEmit.InitCs(GPU_REGS.SPI.CS.RSRC1,GPU_REGS.SPI.CS.RSRC2,GPU_REGS.SPI.CS.NUM_THREAD_X,GPU_REGS.SPI.CS.NUM_THREAD_Y,GPU_REGS.SPI.CS.NUM_THREAD_Z);
SprvEmit.SetUserData(@GPU_REGS.SPI.CS.USER_DATA);
SprvEmit.InitCs(GPU_REGS.SH_REG^.COMPUTE_PGM_RSRC1,
GPU_REGS.SH_REG^.COMPUTE_PGM_RSRC2,
GPU_REGS.SH_REG^.COMPUTE_NUM_THREAD_X,
GPU_REGS.SH_REG^.COMPUTE_NUM_THREAD_Y,
GPU_REGS.SH_REG^.COMPUTE_NUM_THREAD_Z);
SprvEmit.SetUserData(@GPU_REGS.SH_REG^.COMPUTE_USER_DATA);
end;
else
@ -313,13 +317,14 @@ begin
begin
Case FStage of
vShaderStageVs:pUserData:=@GPU_REGS.SPI.VS.USER_DATA;
vShaderStagePs:pUserData:=@GPU_REGS.SPI.PS.USER_DATA;
vShaderStageCs:pUserData:=@GPU_REGS.SPI.CS.USER_DATA;
vShaderStageVs:pUserData:=@GPU_REGS.SH_REG^.SPI_SHADER_USER_DATA_VS;
vShaderStagePs:pUserData:=@GPU_REGS.SH_REG^.SPI_SHADER_USER_DATA_PS;
vShaderStageCs:pUserData:=@GPU_REGS.SH_REG^.COMPUTE_USER_DATA;
else
Assert(false);
end;
FShader:=nil;
if Length(t.FShaders)<>0 then
For i:=0 to High(t.FShaders) do
@ -445,25 +450,32 @@ end;
function FetchShader(FStage:TvShaderStage;FDescSetId:Integer;var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderExt;
var
pData:PDWORD;
pData0:PDWORD;
pData1:PDWORD;
begin
Case FStage of
vShaderStageVs:pData:=getCodeAddress(GPU_REGS.SPI.VS.LO,GPU_REGS.SPI.VS.HI);
vShaderStagePs:pData:=getCodeAddress(GPU_REGS.SPI.PS.LO,GPU_REGS.SPI.PS.HI);
vShaderStageCs:pData:=getCodeAddress(GPU_REGS.SPI.CS.LO,GPU_REGS.SPI.CS.HI);
vShaderStageVs:pData0:=GPU_REGS.get_vs_addr;
vShaderStagePs:pData0:=GPU_REGS.get_ps_addr;
vShaderStageCs:pData0:=GPU_REGS.get_cs_addr;
else
Assert(false);
end;
if (pData=nil) then Exit(nil);
if (pData0=nil) then Exit(nil);
//Assert(pData<>nil);
pData1:=nil;
if not get_dmem_ptr(pData0,@pData1,nil) then
begin
Assert(false,'get_dmem_ptr');
end;
FShaderCacheSet.Lock_wr;
Result:=_FetchShader(FStage,pData,FDescSetId,GPU_REGS,pc);
Result:=_FetchShader(FStage,pData1,FDescSetId,GPU_REGS,pc);
FShaderCacheSet.Unlock;
FShaderCacheSet.Unlock_wr;
end;
//
@ -515,12 +527,9 @@ begin
Result:=_FetchShaderGroup(F);
FShaderGroupSet.Unlock;
FShaderGroupSet.Unlock_wr;
end;
initialization
FShaderCacheSet.Init;
FShaderGroupSet.Init;
end.