diff --git a/chip/pm4_me.pas b/chip/pm4_me.pas index 294e735f..b8541598 100644 --- a/chip/pm4_me.pas +++ b/chip/pm4_me.pas @@ -10,9 +10,15 @@ uses LFQueue, vBuffer, + vHostBufferManager, vImage, vImageManager, vRenderPassManager, + vShaderExt, + vShaderManager, + vRegs2Vulkan, + + shader_dump, kern_thr, md_sleep, @@ -52,7 +58,7 @@ procedure t_pm4_me.start; begin if (XCHG(started,Pointer(1))=nil) then begin - kthread_add(@pm4_me_thread,@self,@td,0,'[GFX_ME]'); + kthread_add(@pm4_me_thread,@self,@td,(8*1024*1024) div (16*1024),'[GFX_ME]'); end; end; @@ -83,6 +89,29 @@ end; // +procedure pm4_DrawIndex2(node:p_pm4_node_DrawIndex2); +var + GPU_REGS:TGPU_REGS; + + FVSShader:TvShaderExt; + FPSShader:TvShaderExt; + + FShadersKey:TvShadersKey; + FShaderGroup:TvShaderGroup; +begin + GPU_REGS:=Default(TGPU_REGS); + GPU_REGS.SH_REG:=@node^.SH_REG; + GPU_REGS.CX_REG:=@node^.CX_REG; + + {fdump_ps:=}DumpPS(GPU_REGS); + {fdump_vs:=}DumpVS(GPU_REGS); + + FPSShader:=FetchShader(vShaderStagePs,0,GPU_REGS,nil{@pa}); + FVSShader:=FetchShader(vShaderStageVs,1,GPU_REGS,nil{@pa}); + + +end; + procedure pm4_me_thread(me:p_pm4_me); SysV_ABI_CDecl; var stream:p_pm4_stream; @@ -99,6 +128,12 @@ begin while (node<>nil) do begin Writeln('+',node^.ntype); + + case node^.ntype of + ntDrawIndex2:pm4_DrawIndex2(Pointer(node)); + else + end; + // node:=stream^.Next(node); end; diff --git a/chip/shader_dump.pas b/chip/shader_dump.pas index 421eb0be..34d16b89 100644 --- a/chip/shader_dump.pas +++ b/chip/shader_dump.pas @@ -7,77 +7,28 @@ interface uses Classes, SysUtils, - ps4_program, + kern_authinfo, + kern_proc, + murmurhash, si_ci_vi_merged_offset, ps4_shader, - ps4_gpu_regs; + vRegs2Vulkan; type TDUMP_WORD=packed record REG,COUNT:WORD; end; -Function FastHash(data:PByte;len:DWORD):DWORD; Procedure DUMP_BLOCK(F:THandle;REG:WORD;P:Pointer;Size:DWORD); + +Function get_dev_progname:RawByteString; + function DumpCS(var GPU_REGS:TGPU_REGS):RawByteString; function DumpPS(var GPU_REGS:TGPU_REGS):RawByteString; function DumpVS(var GPU_REGS:TGPU_REGS):RawByteString; implementation -Function FastHash(data:PByte;len:DWORD):DWORD; -var - hash,tmp,rem:DWORD; -begin - if (len=0) or (data=nil) then Exit(0); - hash:=len; - - rem:=len and 3; - len:=len shr 2; - - While (len>0) do - begin - tmp :=PDWORD(data)[0]; - hash:=hash+PWORD(@tmp)[0]; - tmp :=(PWORD(@tmp)[1] shl 11) xor hash; - hash:=(hash shl 16) xor tmp; - data:=@PWORD(data)[2]; - hash:=hash+(hash shr 11); - Dec(len); - end; - - Case rem of - 3: - begin - hash:=hash+PWORD(data)[0]; - hash:=hash xor (hash shl 16); - hash:=hash xor (PShortint(data)[2] shl 18); - hash:=hash+(hash shr 11); - end; - 2: - begin - hash:=hash+PWORD(data)[0]; - hash:=hash xor (hash shl 11); - hash:=hash+(hash shr 17); - end; - 1: - begin - hash:=hash+PShortint(data)[0]; - hash:=hash xor (hash shl 10); - hash:=hash+(hash shr 1); - end; - end; - - hash:=hash xor (hash shl 3); - hash:=hash+(hash shr 5); - hash:=hash xor (hash shl 4); - hash:=hash+(hash shr 17); - hash:=hash xor (hash shl 25); - hash:=hash+(hash shr 6); - - Result:=hash; -end; - Procedure DUMP_BLOCK(F:THandle;REG:WORD;P:Pointer;Size:DWORD); const MAX_SIZE=($FFFF+1)*4; @@ -93,6 +44,14 @@ begin FileWrite(F,P^,System.Align(Size,4)); end; +Procedure DUMP_REG(F:THandle;REG:WORD;var GPU_REGS:TGPU_REGS); +var + DATA:DWORD; +begin + DATA:=GPU_REGS.get_reg(REG); + DUMP_BLOCK(F,REG,@DATA,SizeOf(DWORD)); +end; + type TUSER_DATA_USEAGE=array[0..15] of Byte; @@ -178,6 +137,28 @@ begin end; end; +function Trim(const S: RawByteString): RawByteString; +var + Ofs, Len: sizeint; +begin + len := Length(S); + while (Len>0) and ((S[Len]<=' ') or (S[Len]='?')) do + dec(Len); + Ofs := 1; + while (Ofs<=Len) and ((S[Ofs]<=' ') or (S[Ofs]='?')) do + Inc(Ofs); + result := Copy(S, Ofs, 1 + Len - Ofs); +end; + +Function get_dev_progname:RawByteString; +begin + Result:=Trim(g_appinfo.CUSANAME); + if (Result='') then + begin + Result:=Trim(p_proc.p_comm); + end; +end; + function DumpCS(var GPU_REGS:TGPU_REGS):RawByteString; var size,hash:DWORD; @@ -186,12 +167,13 @@ var fname:RawByteString; begin Result:=''; - base:=getCodeAddress(GPU_REGS.SPI.CS.LO,GPU_REGS.SPI.CS.HI); + + base:=GPU_REGS.get_cs_addr; if (base<>nil) then begin size:=_calc_shader_size(base); - hash:=FastHash(base,size); + hash:=MurmurHash64A(base,size,0); fname:='shader_dump\'+get_dev_progname+'_cs_'+HexStr(hash,8)+'.dump'; Result:=fname; @@ -201,18 +183,18 @@ begin F:=FileCreate(fname); DUMP_BLOCK(F,mmCOMPUTE_PGM_LO,base,size); - DUMP_BLOCK(F,mmCOMPUTE_PGM_RSRC1 ,@GPU_REGS.SPI.CS.RSRC1 ,SizeOf(DWORD)); - DUMP_BLOCK(F,mmCOMPUTE_PGM_RSRC2 ,@GPU_REGS.SPI.CS.RSRC2 ,SizeOf(DWORD)); + DUMP_REG(F,mmCOMPUTE_PGM_RSRC1 ,GPU_REGS); + DUMP_REG(F,mmCOMPUTE_PGM_RSRC2 ,GPU_REGS); - DUMP_BLOCK(F,mmCOMPUTE_NUM_THREAD_X,@GPU_REGS.SPI.CS.NUM_THREAD_X,SizeOf(DWORD)); - DUMP_BLOCK(F,mmCOMPUTE_NUM_THREAD_Y,@GPU_REGS.SPI.CS.NUM_THREAD_Y,SizeOf(DWORD)); - DUMP_BLOCK(F,mmCOMPUTE_NUM_THREAD_Z,@GPU_REGS.SPI.CS.NUM_THREAD_Z,SizeOf(DWORD)); + DUMP_REG(F,mmCOMPUTE_NUM_THREAD_X,GPU_REGS); + DUMP_REG(F,mmCOMPUTE_NUM_THREAD_Y,GPU_REGS); + DUMP_REG(F,mmCOMPUTE_NUM_THREAD_Z,GPU_REGS); - DUMP_USER_DATA(F,base,mmCOMPUTE_USER_DATA_0,@GPU_REGS.SPI.CS.USER_DATA); + DUMP_USER_DATA(F,base,mmCOMPUTE_USER_DATA_0,@GPU_REGS.SH_REG^.COMPUTE_USER_DATA); - DUMP_BLOCK(F,mmCOMPUTE_STATIC_THREAD_MGMT_SE0,@GPU_REGS.SPI.CS.STATIC_THREAD_MGMT_SE0,SizeOf(DWORD)); - DUMP_BLOCK(F,mmCOMPUTE_STATIC_THREAD_MGMT_SE1,@GPU_REGS.SPI.CS.STATIC_THREAD_MGMT_SE1,SizeOf(DWORD)); - DUMP_BLOCK(F,mmCOMPUTE_RESOURCE_LIMITS ,@GPU_REGS.SPI.CS.RESOURCE_LIMITS ,SizeOf(DWORD)); + DUMP_REG(F,mmCOMPUTE_STATIC_THREAD_MGMT_SE0,GPU_REGS); + DUMP_REG(F,mmCOMPUTE_STATIC_THREAD_MGMT_SE1,GPU_REGS); + DUMP_REG(F,mmCOMPUTE_RESOURCE_LIMITS ,GPU_REGS); FileClose(F); @@ -228,12 +210,12 @@ var fname:RawByteString; begin Result:=''; - base:=getCodeAddress(GPU_REGS.SPI.PS.LO,GPU_REGS.SPI.PS.HI); + base:=GPU_REGS.get_ps_addr; if (base<>nil) then begin size:=_calc_shader_size(base); - hash:=FastHash(base,size); + hash:=MurmurHash64A(base,size,0); fname:='shader_dump\'+get_dev_progname+'_ps_'+HexStr(hash,8)+'.dump'; Result:=fname; @@ -243,27 +225,27 @@ begin F:=FileCreate(fname); DUMP_BLOCK(F,mmSPI_SHADER_PGM_LO_PS,base,size); - DUMP_BLOCK(F,mmSPI_SHADER_PGM_RSRC1_PS,@GPU_REGS.SPI.PS.RSRC1,SizeOf(DWORD)); - DUMP_BLOCK(F,mmSPI_SHADER_PGM_RSRC2_PS,@GPU_REGS.SPI.PS.RSRC2,SizeOf(DWORD)); - DUMP_BLOCK(F,mmSPI_SHADER_PGM_RSRC3_PS,@GPU_REGS.SPI.PS.RSRC3,SizeOf(DWORD)); + DUMP_REG(F,mmSPI_SHADER_PGM_RSRC1_PS,GPU_REGS); + DUMP_REG(F,mmSPI_SHADER_PGM_RSRC2_PS,GPU_REGS); + DUMP_REG(F,mmSPI_SHADER_PGM_RSRC3_PS,GPU_REGS); - DUMP_BLOCK(F,mmSPI_SHADER_Z_FORMAT ,@GPU_REGS.SPI.PS.Z_FORMAT ,SizeOf(DWORD)); - DUMP_BLOCK(F,mmSPI_SHADER_COL_FORMAT ,@GPU_REGS.SPI.PS.COL_FORMAT,SizeOf(DWORD)); + DUMP_REG(F,mmSPI_SHADER_Z_FORMAT ,GPU_REGS); + DUMP_REG(F,mmSPI_SHADER_COL_FORMAT ,GPU_REGS); - DUMP_BLOCK(F,mmSPI_PS_INPUT_ENA ,@GPU_REGS.SPI.PS.INPUT_ENA ,SizeOf(DWORD)); - DUMP_BLOCK(F,mmSPI_PS_INPUT_ADDR ,@GPU_REGS.SPI.PS.INPUT_ADDR,SizeOf(DWORD)); - DUMP_BLOCK(F,mmSPI_PS_IN_CONTROL ,@GPU_REGS.SPI.PS.IN_CONTROL,SizeOf(DWORD)); + DUMP_REG(F,mmSPI_PS_INPUT_ENA ,GPU_REGS); + DUMP_REG(F,mmSPI_PS_INPUT_ADDR ,GPU_REGS); + DUMP_REG(F,mmSPI_PS_IN_CONTROL ,GPU_REGS); - DUMP_BLOCK(F,mmSPI_BARYC_CNTL ,@GPU_REGS.SPI.PS.BARYC_CNTL,SizeOf(DWORD)); + DUMP_REG(F,mmSPI_BARYC_CNTL ,GPU_REGS); - DUMP_BLOCK(F,mmDB_SHADER_CONTROL ,@GPU_REGS.SPI.PS.SHADER_CONTROL,SizeOf(DWORD)); - DUMP_BLOCK(F,mmCB_SHADER_MASK ,@GPU_REGS.SPI.PS.SHADER_MASK ,SizeOf(DWORD)); + DUMP_REG(F,mmDB_SHADER_CONTROL ,GPU_REGS); + DUMP_REG(F,mmCB_SHADER_MASK ,GPU_REGS); - DUMP_USER_DATA(F,base,mmSPI_SHADER_USER_DATA_PS_0,@GPU_REGS.SPI.PS.USER_DATA); + DUMP_USER_DATA(F,base,mmSPI_SHADER_USER_DATA_PS_0,@GPU_REGS.SH_REG^.SPI_SHADER_USER_DATA_PS); For i:=0 to 31 do begin - DUMP_BLOCK(F,mmSPI_PS_INPUT_CNTL_0+i,@GPU_REGS.SPI.PS.INPUT_CNTL[i],SizeOf(DWORD)); + DUMP_REG(F,mmSPI_PS_INPUT_CNTL_0+i,GPU_REGS); end; FileClose(F); @@ -279,12 +261,12 @@ var fname:RawByteString; begin Result:=''; - base:=getCodeAddress(GPU_REGS.SPI.VS.LO,GPU_REGS.SPI.VS.HI); + base:=GPU_REGS.get_vs_addr; if (base<>nil) then begin size:=_calc_shader_size(base); - hash:=FastHash(base,size); + hash:=MurmurHash64A(base,size,0); fname:='shader_dump\'+get_dev_progname+'_vs_'+HexStr(hash,8)+'.dump'; Result:=fname; @@ -294,17 +276,20 @@ begin F:=FileCreate(fname); DUMP_BLOCK(F,mmSPI_SHADER_PGM_LO_VS,base,size); - DUMP_BLOCK(F,mmSPI_SHADER_PGM_RSRC1_VS,@GPU_REGS.SPI.VS.RSRC1,SizeOf(DWORD)); - DUMP_BLOCK(F,mmSPI_SHADER_PGM_RSRC2_VS,@GPU_REGS.SPI.VS.RSRC2,SizeOf(DWORD)); - DUMP_BLOCK(F,mmSPI_SHADER_PGM_RSRC3_VS,@GPU_REGS.SPI.VS.RSRC3,SizeOf(DWORD)); + DUMP_REG(F,mmSPI_SHADER_PGM_RSRC1_VS,GPU_REGS); + DUMP_REG(F,mmSPI_SHADER_PGM_RSRC2_VS,GPU_REGS); + DUMP_REG(F,mmSPI_SHADER_PGM_RSRC3_VS,GPU_REGS); - DUMP_BLOCK(F,mmSPI_VS_OUT_CONFIG ,@GPU_REGS.SPI.VS.OUT_CONFIG,SizeOf(DWORD)); - DUMP_BLOCK(F,mmSPI_SHADER_POS_FORMAT,@GPU_REGS.SPI.VS.POS_FORMAT,SizeOf(DWORD)); - DUMP_BLOCK(F,mmPA_CL_VS_OUT_CNTL ,@GPU_REGS.SPI.VS.OUT_CNTL ,SizeOf(DWORD)); + DUMP_REG(F,mmSPI_VS_OUT_CONFIG ,GPU_REGS); + DUMP_REG(F,mmSPI_SHADER_POS_FORMAT,GPU_REGS); + DUMP_REG(F,mmPA_CL_VS_OUT_CNTL ,GPU_REGS); - DUMP_USER_DATA(F,base,mmSPI_SHADER_USER_DATA_VS_0,@GPU_REGS.SPI.VS.USER_DATA); + DUMP_USER_DATA(F,base,mmSPI_SHADER_USER_DATA_VS_0,@GPU_REGS.SH_REG^.SPI_SHADER_USER_DATA_VS); + + DUMP_REG(F,mmVGT_DMA_NUM_INSTANCES,GPU_REGS); + + //DUMP_REG(F,mmVGT_NUM_INSTANCES,@GPU_REGS.VGT_NUM_INSTANCES,SizeOf(DWORD)); - DUMP_BLOCK(F,mmVGT_NUM_INSTANCES ,@GPU_REGS.VGT_NUM_INSTANCES,SizeOf(DWORD)); FileClose(F); diff --git a/sys/dev/dev_gc.pas b/sys/dev/dev_gc.pas index 0d5042ad..9ee54973 100644 --- a/sys/dev/dev_gc.pas +++ b/sys/dev/dev_gc.pas @@ -33,6 +33,7 @@ uses pm4_me, vDevice, + vMemory, subr_backtrace; @@ -45,6 +46,14 @@ var gc_knl_lock:mtx; gc_knlist:t_knlist; +procedure unmap_dmem_gc(start,__end:DWORD); public; +begin + if (MemManager<>nil) then + begin + MemManager.unmap_host(start,__end); + end; +end; + function mmap_addr(paddr,psize:QWORD; prot:Integer; pout_addr:PQWORD):Integer; diff --git a/sys/vm/rmem_map.pas b/sys/vm/rmem_map.pas index e064e4c7..4a855105 100644 --- a/sys/vm/rmem_map.pas +++ b/sys/vm/rmem_map.pas @@ -614,10 +614,65 @@ begin curkthread^.td_rmap_def_user:=entry; end; -function rmem_map_delete(map :p_rmem_map; - vaddr:DWORD; - start:DWORD; - __end:DWORD):Integer; +procedure unmap_dmem_gc(start,__end:DWORD); external; + +procedure rmem_map_unmap_check(map :p_rmem_map; + start:DWORD; + __end:DWORD); +var + entry :p_rmem_map_entry; + first_entry:p_rmem_map_entry; + s,e:DWORD; +begin + + if (not rmem_map_lookup_entry_any(map,start,@first_entry)) then + begin + entry:=first_entry^.next; + end else + begin + entry:=first_entry; + end; + + repeat + + if (entry^.start>start) then + begin + s:=start; + + if (entry^.start>__end) then + begin + e:=__end; + end else + begin + e:=entry^.start; + end; + + if (s<>e) then + begin + unmap_dmem_gc(IDX_TO_OFF(s),IDX_TO_OFF(e)); + end; + + start:=e; + end else + if (entry^.__end>start) then + begin + start:=entry^.__end; + end; + + if (start>=__end) or (entry=@map^.header) or (entry^.start>=__end) then + begin + Break; + end; + + entry:=entry^.next; + + until false; +end; + +function rmem_map_delete(map :p_rmem_map; + vaddr:DWORD; + start:DWORD; + __end:DWORD):Integer; var entry :p_rmem_map_entry; first_entry:p_rmem_map_entry; @@ -669,6 +724,9 @@ begin entry:=next; end; + + rmem_map_unmap_check(map,start,__end); + Result:=(0); end; @@ -715,6 +773,9 @@ begin entry:=next; end; + + unmap_dmem_gc(IDX_TO_OFF(start),IDX_TO_OFF(__end)); + Result:=(0); end; diff --git a/sys/vm/vm_map.pas b/sys/vm/vm_map.pas index d2380dd1..19167d49 100644 --- a/sys/vm/vm_map.pas +++ b/sys/vm/vm_map.pas @@ -894,40 +894,17 @@ function vm_object_rmap_release(map :vm_map_t; obj :vm_object_t; start :vm_offset_t; __end :vm_offset_t; - offset:vm_ooffset_t; - p_free:Boolean):Integer; + offset:vm_ooffset_t):Integer; var rmap:p_rmem_map; length:vm_offset_t; - entry:p_rmem_map_entry; begin rmap:=map^.rmap; length:=__end-start; rmem_map_lock(rmap); - if p_free then - begin - Result:=rmem_map_delete(rmap, OFF_TO_IDX(start), OFF_TO_IDX(offset), OFF_TO_IDX(offset+length)); - end else - begin - Result:=0; - end; - - { - if (Result=0) then - begin - p_rem^:=not rmem_map_lookup_entry_any(rmap, OFF_TO_IDX(offset), @entry) - end; - - if p_rem then - begin - //unmap vulkan - end else - begin - //ext unmap vulkan - end; - } + Result:=rmem_map_delete(rmap, OFF_TO_IDX(start), OFF_TO_IDX(offset), OFF_TO_IDX(offset+length)); rmem_map_unlock(rmap); end; @@ -2402,7 +2379,6 @@ var first_entry:vm_map_entry_t; next :vm_map_entry_t; obj :vm_object_t; - p_rem :Boolean; begin VM_MAP_ASSERT_LOCKED(map); @@ -2480,8 +2456,7 @@ begin next:=entry^.next; - p_rem:=True; - if (obj<>nil) then + if rmap_free and (obj<>nil) then begin if ((obj^.flags and (OBJ_DMEM_EXT or OBJ_DMEM_EXT2))<>0) or (obj^.otype=OBJT_PHYSHM) then @@ -2490,8 +2465,7 @@ begin obj, entry^.start, entry^.__end, - entry^.offset, - rmap_free); + entry^.offset); end; end; @@ -2514,6 +2488,7 @@ begin * will be set in the wrong object!) } vm_map_entry_delete(map, entry); + entry:=next; end; Result:=(KERN_SUCCESS); diff --git a/vulkan/vBuffer.pas b/vulkan/vBuffer.pas index 184f4a6f..ddbf27f3 100644 --- a/vulkan/vBuffer.pas +++ b/vulkan/vBuffer.pas @@ -22,6 +22,9 @@ type function GetDedicatedAllocation:Boolean; function BindMem(P:TvPointer):TVkResult; procedure OnReleaseMem(Sender:TObject); + // + function Acquire:Boolean; + procedure Release; end; function VkBindSparseBufferMemory(queue:TVkQueue;buffer:TVkBuffer;bindCount:TVkUInt32;pBinds:PVkSparseMemoryBind):TVkResult; @@ -169,18 +172,46 @@ end; function TvBuffer.BindMem(P:TvPointer):TVkResult; begin - Result:=vkBindBufferMemory(Device.FHandle,FHandle,P.FMemory.FHandle,P.FOffset); - if (Result=VK_SUCCESS) then + if P.Acquire then begin - FBind:=P; - P.FMemory.AddDependence(@Self.OnReleaseMem); + Result:=vkBindBufferMemory(Device.FHandle,FHandle,P.FMemory.FHandle,P.FOffset); + // + if (Result=VK_SUCCESS) then + begin + FBind:=P; + P.FMemory.AddDependence(@Self.OnReleaseMem); + end else + begin + P.Release; + end; + // + end else + begin + Result:=VK_ERROR_UNKNOWN; end; end; procedure TvBuffer.OnReleaseMem(Sender:TObject); begin + FBind.FMemory:=nil; // + if (FHandle<>VK_NULL_HANDLE) then + begin + vkDestroyBuffer(Device.FHandle,FHandle,nil); + FHandle:=VK_NULL_HANDLE; + end; end; +function TvBuffer.Acquire:Boolean; +begin + Result:=FBind.Acquire; +end; + +procedure TvBuffer.Release; +begin + FBind.Release; +end; + + end. diff --git a/vulkan/vCmdBuffer.pas b/vulkan/vCmdBuffer.pas index 2026a8af..f8bd7a87 100644 --- a/vulkan/vCmdBuffer.pas +++ b/vulkan/vCmdBuffer.pas @@ -7,7 +7,6 @@ interface uses Classes, SysUtils, - //RWLock, //ps4_types, g23tree, //ps4_libSceVideoOut, diff --git a/vulkan/vDependence.pas b/vulkan/vDependence.pas index f28b3f89..95c4adb0 100644 --- a/vulkan/vDependence.pas +++ b/vulkan/vDependence.pas @@ -17,6 +17,7 @@ type TvRelease=specialize T23treeSet; TvDependenciesObject=class + FDep_lock :Pointer; FDependencies:TvRelease; // function AddDependence(cb:TvReleaseCb):Boolean; @@ -57,32 +58,55 @@ end; // - function TvDependenciesObject.AddDependence(cb:TvReleaseCb):Boolean; begin Result:=False; if (cb=nil) then Exit; + + rw_wlock(FDep_lock); + Result:=FDependencies.Insert(cb); + + rw_wunlock(FDep_lock); end; function TvDependenciesObject.DelDependence(cb:TvReleaseCb):Boolean; begin Result:=False; if (cb=nil) then Exit; + + rw_wlock(FDep_lock); + Result:=FDependencies.delete(cb); + + rw_wunlock(FDep_lock); end; Procedure TvDependenciesObject.ReleaseAllDependencies(Sender:TObject); var It:TvRelease.Iterator; + cb:TvReleaseCb; begin + rw_wlock(FDep_lock); + while (FDependencies.size<>0) do begin It:=FDependencies.cbegin; if (It.Item=nil) then Break; + cb:=It.Item^; FDependencies.erase(It); - TvReleaseCb(It.Item^)(Sender); + + if (cb<>nil) then + begin + rw_wunlock(FDep_lock); + + cb(Sender); + + rw_wlock(FDep_lock); + end; end; + + rw_wunlock(FDep_lock); end; Destructor TvDependenciesObject.Destroy; diff --git a/vulkan/vHostBufferManager.pas b/vulkan/vHostBufferManager.pas index ade515ff..c7140146 100644 --- a/vulkan/vHostBufferManager.pas +++ b/vulkan/vHostBufferManager.pas @@ -6,300 +6,249 @@ interface uses SysUtils, - RWLock, - sys_types, g23tree, Vulkan, vDevice, vMemory, vBuffer, - vCmdBuffer; + vDependence; type - AVkSparseMemoryBind=array of TVkSparseMemoryBind; - TvHostBuffer=class(TvBuffer) - FAddr:Pointer; - Fhost:TvPointer; - Foffset:TVkDeviceSize; //offset inside buffer + FAddr:QWORD; // - FSparse:AVkSparseMemoryBind; - // - FRefs:ptruint; - Procedure Acquire(Sender:TObject); - procedure Release(Sender:TObject); + procedure OnReleaseCmd(Sender:TObject); end; -function FetchHostBuffer(cmd:TvCustomCmdBuffer;Addr:Pointer;Size:TVkDeviceSize;usage:TVkFlags):TvHostBuffer; +function FetchHostBuffer(cmd:TvDependenciesObject; + Addr:QWORD; + Size:TVkDeviceSize; + usage:TVkFlags; + device_local:Boolean=False):TvHostBuffer; implementation -const - buf_ext:TVkExternalMemoryBufferCreateInfo=( - sType:VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO; - pNext:nil; - handleTypes:ord(VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT); - ); +uses + kern_rwlock; type - TvAddrCompare=object - function c(a,b:PPointer):Integer; static; + TvHostBufferKey=packed record + FAddr :QWORD; + FUsage :TVkFlags; + FBuffer:TvHostBuffer; end; - _TvHostBufferSet=specialize T23treeSet; - TvHostBufferSet=object(_TvHostBufferSet) - lock:TRWLock; - Procedure Init; - Procedure Lock_wr; - Procedure Unlock; + TvAddrCompare=object + function c(const a,b:TvHostBufferKey):Integer; static; end; + _TvHostBufferSet=specialize T23treeSet; + TvHostBufferSet=object(_TvHostBufferSet) + lock:Pointer; + Procedure Lock_wr; + Procedure Unlock_wr; + end; + +procedure TvHostBuffer.OnReleaseCmd(Sender:TObject); +begin + Release; +end; + var FHostBufferSet:TvHostBufferSet; -Procedure TvHostBufferSet.Init; -begin - rwlock_init(lock); -end; Procedure TvHostBufferSet.Lock_wr; begin - rwlock_wrlock(lock); + rw_wlock(lock); end; -Procedure TvHostBufferSet.Unlock; +Procedure TvHostBufferSet.Unlock_wr; begin - rwlock_unlock(lock); + rw_wunlock(lock); end; -function TvAddrCompare.c(a,b:PPointer):Integer; +function TvAddrCompare.c(const a,b:TvHostBufferKey):Integer; begin - Result:=Integer(a^>b^)-Integer(a^b.FAddr)-Integer(a.FAddr0) then Exit; + //2 FUsage + Result:=Integer(a.FUsage>b.FUsage)-Integer(a.FUsagenil) then - begin - Result:=TvHostBuffer(ptruint(i.Item^)-ptruint(@TvHostBuffer(nil).FAddr)); - end; -end; - -function Max(a,b:QWORD):QWORD; inline; -begin - if (a>b) then Result:=a else Result:=b; -end; - -function Min(a,b:QWORD):QWORD; inline; -begin - if (a=Size) then - begin - Result:=0; - end else - begin - Result:=1; - end; -end; - -function _New_simple(Addr:Pointer;Size:TVkDeviceSize;usage:TVkFlags):TvHostBuffer; -var - host:TvPointer; - - t:TvHostBuffer; - delta:TVkDeviceSize; -begin - Result:=nil; - - delta:=_fix_buf_size(False,Addr,Size,usage); - - host:=Default(TvPointer); - if not TryGetHostPointerByAddr(addr,host) then Exit; - - t:=TvHostBuffer.Create(Size,usage,@buf_ext); - - t.Fhost :=host; - t.Foffset:=delta; - t.BindMem(host); - - Result:=t; -end; - -function _New_sparce(queue:TVkQueue;Addr:Pointer;Size:TVkDeviceSize;usage:TVkFlags):TvHostBuffer; -var - host:TvPointer; - - asize:qword; - hsize:qword; - msize:qword; - - Offset,delta:TVkDeviceSize; - - bind:TVkSparseMemoryBind; - Binds:AVkSparseMemoryBind; - i:Integer; - - t:TvHostBuffer; -begin - Result:=nil; - - //hack; alignment is the same in virtual memory - delta:=_fix_buf_size(True,Addr,Size,usage); - - Binds:=Default(AVkSparseMemoryBind); - host :=Default(TvPointer); - hsize:=0; - - Offset:=0; - asize:=Size; - While (asize<>0) do - begin - if not TryGetHostPointerByAddr(addr,host,@hsize) then Exit; - - msize:=Min(hsize,asize); - - bind:=Default(TVkSparseMemoryBind); - bind.resourceOffset:=Offset; - bind.size :=msize; - bind.memory :=host.FHandle; - bind.memoryOffset :=host.FOffset; - - i:=Length(Binds); - SetLength(Binds,i+1); - Binds[i]:=bind; - - //next - Offset:=Offset+msize; - addr :=addr +msize; - asize :=asize -msize; - end; - - t:=TvHostBuffer.CreateSparce(Size,usage,@buf_ext); - - t.Foffset:=delta; - t.FSparse:=Binds; - - if (VkBindSparseBufferMemory(queue,t.FHandle,Length(Binds),@Binds[0])<>VK_SUCCESS) then - begin - t.Free; - Exit; - end; - - Result:=t; -end; - -function FetchHostBuffer(cmd:TvCustomCmdBuffer;Addr:Pointer;Size:TVkDeviceSize;usage:TVkFlags):TvHostBuffer; -var - t:TvHostBuffer; - +function _FindHostBuffer(Addr:QWORD;Size:TVkDeviceSize;usage:TVkFlags):TvHostBuffer; label - _exit; + _repeat; +var + It:TvHostBufferSet.Iterator; + key:TvHostBufferKey; + buf:TvHostBuffer; + __end:QWORD; +begin + __end:=Addr+Size; + key:=Default(TvHostBufferKey); + key.FAddr :=Addr; + key.FUsage:=0; + + _repeat: + + It:=FHostBufferSet.find(key); + + while (It.Item<>nil) do + begin + buf:=It.Item^.FBuffer; + + if buf.Acquire then + begin + + if (__end>buf.FAddr) and + (Addr<(buf.FAddr+buf.FSize)) and + ((buf.FUsage and usage)=usage) then + begin + Exit(buf); + end; + + buf.Release; + end else + begin + //mem is deleted, free buf + FHostBufferSet.erase(It); + FreeAndNil(buf); + goto _repeat; + end; + + It.Next; + end; + +end; + +function FetchHostBuffer(cmd:TvDependenciesObject; + Addr:QWORD; + Size:TVkDeviceSize; + usage:TVkFlags; + device_local:Boolean=False):TvHostBuffer; +label + _repeat; +var + key:TvHostBufferKey; + mem:TvPointer; begin Result:=nil; Assert(Size<>0); + _fix_buf_size(Addr,Size,usage); + + key:=Default(TvHostBufferKey); + key.FAddr :=Addr; + key.FUsage:=usage; + + // FHostBufferSet.Lock_wr; + // - t:=_Find(Addr); //find by key + _repeat: - if (t<>nil) then + key.FBuffer:=_FindHostBuffer(Addr,Size,usage); + + // + FHostBufferSet.Unlock_wr; + // + + if (key.FBuffer<>nil) then begin - if (t.FSize<(t.Foffset+Size)) or - ((t.FUsage and usage)<>usage) then + // + end else + begin + //create new + + mem:=MemManager.FetchHostMap(Addr,Size,device_local); + + if (mem.FMemory=nil) then begin - usage:=usage or t.FUsage; - FHostBufferSet.delete(@t.FAddr); - t.Release(nil); - t:=nil; - end; - end; - if (t=nil) then - begin - //Writeln('NewBuf:',HexStr(Addr)); + if device_local then + begin + mem:=MemManager.FetchHostMap(Addr,Size,False); + + if (mem.FMemory=nil) then + begin + //ENOMEM + Exit(nil); + end; + + end else + begin + //ENOMEM + Exit(nil); + end; - t:=nil; - Case _is_sparce(Addr,Size,usage) of - 0:begin - t:=_New_simple(Addr,Size,usage); - Assert(t<>nil,'create simple buffer fail'); - end; - 1:begin //is Sparse buffers - Assert(vDevice.sparseBinding,'sparseBinding not support'); - Assert(MemManager.SparceSupportHost,'sparse not support for host'); - t:=_New_sparce(cmd.FQueue.FHandle,Addr,Size,usage); - Assert(t<>nil,'create sparse buffer fail'); - end; - else - Assert(false,'Is not GPU Addr:'+HexStr(Addr)); end; - t.FAddr:=addr; //save key + key.FBuffer:=TvHostBuffer.Create(Size,usage,@buf_ext); + key.FBuffer.FAddr:=Addr; - FHostBufferSet.Insert(@t.FAddr); - t.Acquire(nil); - end; - - if (cmd<>nil) and (t<>nil) then - begin - if cmd.AddDependence(@t.Release) then + if (key.FBuffer.BindMem(mem)<>VK_SUCCESS) then begin - t.Acquire(cmd); + //unknow error + FreeAndNil(key.FBuffer); + mem.Release; //release [FetchHostMap] + // + Exit(nil); + end; + + mem.Release; //release [FetchHostMap] + + // + FHostBufferSet.Lock_wr; + // + + if not FHostBufferSet.Insert(key) then + begin + //collision? + + key.FBuffer.Release; //release [BindMem] + FreeAndNil(key.FBuffer); + + // + goto _repeat; + end; + + // + FHostBufferSet.Unlock_wr; + // + + //create new + end; + + //add dep + if (cmd<>nil) then + begin + if cmd.AddDependence(@key.FBuffer.OnReleaseCmd) then + begin + // + end else + begin + key.FBuffer.Release; //release [BindMem]/[_FindHostBuffer] end; end; - _exit: - FHostBufferSet.Unlock; - Result:=t; + Result:=key.FBuffer; end; -Procedure TvHostBuffer.Acquire(Sender:TObject); -begin - System.InterlockedIncrement(Pointer(FRefs)); -end; - -procedure TvHostBuffer.Release(Sender:TObject); -begin - if System.InterlockedDecrement(Pointer(FRefs))=nil then - begin - Free; - end; -end; - -initialization - FHostBufferSet.Init; end. diff --git a/vulkan/vImage.pas b/vulkan/vImage.pas index 5750dc02..ff3d6df8 100644 --- a/vulkan/vImage.pas +++ b/vulkan/vImage.pas @@ -64,9 +64,9 @@ type function GetImageInfo:TVkImageCreateInfo; virtual; abstract; function GetRequirements:TVkMemoryRequirements; function GetDedicatedAllocation:Boolean; + function Compile(ext:Pointer):Boolean; function BindMem(P:TvPointer):TVkResult; procedure OnReleaseMem(Sender:TObject); - function Compile(ext:Pointer):Boolean; end; const @@ -551,19 +551,63 @@ begin (rded.prefersDedicatedAllocation <>VK_FALSE); end; +function TvCustomImage.Compile(ext:Pointer):Boolean; +var + cinfo:TVkImageCreateInfo; + r:TVkResult; +begin + Result:=False; + + if (FHandle<>VK_NULL_HANDLE) then + begin + vkDestroyImage(Device.FHandle,FHandle,nil); + FHandle:=VK_NULL_HANDLE; + end; + + cinfo:=GetImageInfo; + cinfo.pNext:=ext; + + cinfo.format:=vkFixFormatSupport(cinfo.format,cinfo.tiling,cinfo.usage); + + r:=vkCreateImage(Device.FHandle,@cinfo,nil,@FHandle); + if (r<>VK_SUCCESS) then + begin + Writeln(StdErr,'vkCreateImage:',r); + Exit; + end; + Result:=True; +end; + function TvCustomImage.BindMem(P:TvPointer):TVkResult; begin - Result:=vkBindImageMemory(Device.FHandle,FHandle,P.FMemory.FHandle,P.FOffset); - if (Result=VK_SUCCESS) then + if P.Acquire then begin - FBind:=P; - P.FMemory.AddDependence(@Self.OnReleaseMem); + Result:=vkBindImageMemory(Device.FHandle,FHandle,P.FMemory.FHandle,P.FOffset); + // + if (Result=VK_SUCCESS) then + begin + FBind:=P; + P.FMemory.AddDependence(@Self.OnReleaseMem); + end else + begin + P.Release; + end; + // + end else + begin + Result:=VK_ERROR_UNKNOWN; end; end; procedure TvCustomImage.OnReleaseMem(Sender:TObject); begin + FBind.FMemory:=nil; // + if (FHandle<>VK_NULL_HANDLE) then + begin + vkDestroyImage(Device.FHandle,FHandle,nil); + FHandle:=VK_NULL_HANDLE; + end; end; procedure _test_and_set_to(var new:TVkFlags; @@ -651,33 +695,6 @@ begin end; -function TvCustomImage.Compile(ext:Pointer):Boolean; -var - cinfo:TVkImageCreateInfo; - r:TVkResult; -begin - Result:=False; - - if (FHandle<>VK_NULL_HANDLE) then - begin - vkDestroyImage(Device.FHandle,FHandle,nil); - FHandle:=VK_NULL_HANDLE; - end; - - cinfo:=GetImageInfo; - cinfo.pNext:=ext; - - cinfo.format:=vkFixFormatSupport(cinfo.format,cinfo.tiling,cinfo.usage); - - r:=vkCreateImage(Device.FHandle,@cinfo,nil,@FHandle); - if (r<>VK_SUCCESS) then - begin - Writeln(StdErr,'vkCreateImage:',r); - Exit; - end; - Result:=True; -end; - Constructor TvImage.Create(format:TVkFormat;extent:TVkExtent3D;usage:TVkFlags;flags:TVkImageCreateFlags;ext:Pointer=nil); begin FFormat:=format; diff --git a/vulkan/vImageManager.pas b/vulkan/vImageManager.pas index c8c1f802..257935a8 100644 --- a/vulkan/vImageManager.pas +++ b/vulkan/vImageManager.pas @@ -6,7 +6,6 @@ interface uses SysUtils, - RWLock, g23tree, //sys_types, Vulkan, @@ -62,7 +61,7 @@ type key:TvImageKey; FUsage:TVkFlags; // - lock:TRWLock; + lock:Pointer; FViews:TvImageView2Set; // Barrier:TvImageBarrier; @@ -110,6 +109,9 @@ var implementation +uses + kern_rwlock; + type TvImageKeyCompare=object function c(a,b:PvImageKey):Integer; static; @@ -117,28 +119,22 @@ type _TvImage2Set=specialize T23treeSet; TvImage2Set=object(_TvImage2Set) - lock:TRWLock; - Procedure Init; + lock:Pointer; Procedure Lock_wr; - Procedure Unlock; + Procedure Unlock_wr; end; var FImage2Set:TvImage2Set; -Procedure TvImage2Set.Init; -begin - rwlock_init(lock); -end; - Procedure TvImage2Set.Lock_wr; begin - rwlock_wrlock(lock); + rw_wlock(lock); end; -Procedure TvImage2Set.Unlock; +Procedure TvImage2Set.Unlock_wr; begin - rwlock_unlock(lock); + rw_wunlock(lock); end; function TvImageKeyCompare.c(a,b:PvImageKey):Integer; @@ -216,7 +212,6 @@ end; Constructor TvImage2.Create; begin inherited; - rwlock_init(lock); Barrier.Init; end; @@ -320,7 +315,7 @@ begin if (Self=nil) then Exit; if (FHandle=VK_NULL_HANDLE) then Exit; - rwlock_wrlock(lock); + rw_wlock(lock); t:=nil; i:=FViews.find(@F); @@ -351,7 +346,7 @@ begin r:=vkCreateImageView(Device.FHandle,@cinfo,nil,@FView); if (r<>VK_SUCCESS) then begin - rwlock_unlock(lock); + rw_wunlock(lock); Writeln(StdErr,'vkCreateImageView:',r); Exit; end; @@ -373,7 +368,7 @@ begin end; end; - rwlock_unlock(lock); + rw_wunlock(lock); Result:=t; end; @@ -476,7 +471,7 @@ begin if (cmd=nil) then Exit; if (not cmd.BeginCmdBuffer) then Exit; - rwlock_wrlock(lock); + rw_wlock(lock); if Barrier.Push(cmd.cmdbuf, FHandle, @@ -488,7 +483,7 @@ begin Inc(cmd.cmd_count); end; - rwlock_unlock(lock); + rw_wunlock(lock); end; { @@ -679,7 +674,7 @@ begin end; - FImage2Set.Unlock; + FImage2Set.Unlock_wr; end; function FindImage(cmd:TvCustomCmdBuffer;Addr:Pointer;cformat:TVkFormat):TvImage2; @@ -696,11 +691,9 @@ begin end; end; - FImage2Set.Unlock; + FImage2Set.Unlock_wr; end; -initialization - FImage2Set.Init; end. diff --git a/vulkan/vMemory.pas b/vulkan/vMemory.pas index 01c8be01..6113952a 100644 --- a/vulkan/vMemory.pas +++ b/vulkan/vMemory.pas @@ -44,6 +44,7 @@ type TvPointer=packed object FMemory:TvDeviceMemory; FOffset:TVkDeviceSize; + function Acquire:Boolean; procedure Release; end; @@ -94,8 +95,6 @@ type FHeaps:array of TvHeap; - lock:Pointer; - FDevBlocks:array of TvDeviceMemory; FFreeSet:TFreeDevNodeSet; FAllcSet:TAllcDevNodeSet; @@ -125,12 +124,20 @@ type Function _shrink_dev_block(max:TVkDeviceSize;heap_index:Byte):TVkDeviceSize; Function _shrink_host_map(max:TVkDeviceSize):TVkDeviceSize; procedure unmap_host(start,__end:QWORD); - Function AllocHostMap(Addr,Size:TVkDeviceSize;mtindex:Byte):TvPointer; + Function FetchHostMap(Addr,Size:TVkDeviceSize;mtindex:Byte):TvPointer; + Function FetchHostMap(Addr,Size:TVkDeviceSize;device_local:Boolean):TvPointer; end; var MemManager:TvMemManager; +const + buf_ext:TVkExternalMemoryBufferCreateInfo=( + sType:VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO; + pNext:nil; + handleTypes:ord(VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT); + ); + function vkAllocMemory(device:TVkDevice;Size:TVkDeviceSize;mtindex:TVkUInt32):TVkDeviceMemory; function vkAllocHostMemory(device:TVkDevice;Size:TVkDeviceSize;mtindex:TVkUInt32;adr:Pointer):TVkDeviceMemory; function vkAllocDedicatedImage(device:TVkDevice;Size:TVkDeviceSize;mtindex:TVkUInt32;FHandle:TVkImage):TVkDeviceMemory; @@ -149,6 +156,9 @@ implementation uses kern_rwlock; +var + global_mem_lock:Pointer=nil; + Procedure TvDeviceMemory.Acquire; begin System.InterlockedIncrement(Pointer(FRefs)); @@ -197,6 +207,25 @@ end; // +function TvPointer.Acquire:Boolean; +begin + Result:=False; + if (FMemory=nil) then Exit; + + // + rw_rlock(global_mem_lock); + // + + if (FMemory<>nil) then + begin + FMemory.Acquire; + Result:=True; + end; + + // + rw_runlock(global_mem_lock); +end; + procedure TvPointer.Release; begin if (FMemory<>nil) then @@ -230,13 +259,6 @@ begin Result:=Integer(a.FOffset>b.FOffset)-Integer(a.FOffsetGRANULAR_DEV_BLOCK_SIZE) then Align:=GRANULAR_DEV_BLOCK_SIZE; - rw_wlock(lock); + // + rw_wlock(global_mem_lock); // if _FetchFree_a(Size,Align,mtindex,key) then begin @@ -818,7 +841,7 @@ begin Result.FMemory.Acquire; end; // - rw_wunlock(lock); + rw_wunlock(global_mem_lock); end; Function TvMemManager.Free(P:TvPointer):Boolean; @@ -827,7 +850,8 @@ var begin if (P.FMemory=nil) then Exit; key:=Default(TDevNode); - rw_wlock(lock); + // + rw_wlock(global_mem_lock); // if _FindDevBlock(P.FMemory,key.FBlockId) then if _FetchAllc(P.FOffset,key.FBlockId,key) then @@ -865,7 +889,7 @@ begin Result:=True; end; // - rw_wunlock(lock); + rw_wunlock(global_mem_lock); end; Function TvMemManager._shrink_dev_block(max:TVkDeviceSize;heap_index:Byte):TVkDeviceSize; @@ -932,7 +956,7 @@ begin if (start=__end) then Exit; // - rw_wlock(lock); + rw_wlock(global_mem_lock); // node:=TvHostMemory(TAILQ_FIRST(@FHosts)); @@ -962,8 +986,7 @@ begin end; // - rw_wunlock(lock); - // + rw_wunlock(global_mem_lock); end; function AlignUp(addr:PtrUInt;alignment:PtrUInt):PtrUInt; inline; @@ -979,7 +1002,7 @@ begin Result:=addr-(addr mod alignment); end; -Function TvMemManager.AllocHostMap(Addr,Size:TVkDeviceSize;mtindex:Byte):TvPointer; +Function TvMemManager.FetchHostMap(Addr,Size:TVkDeviceSize;mtindex:Byte):TvPointer; label _retry, _fail; @@ -998,7 +1021,7 @@ begin FStart:=QWORD(Addr); F__End:=FStart+Size; // - rw_wlock(lock); + rw_wlock(global_mem_lock); // node:=TvHostMemory(TAILQ_FIRST(@FHosts)); @@ -1079,7 +1102,7 @@ begin _fail: // - rw_wunlock(lock); + rw_wunlock(global_mem_lock); // if (node<>nil) then @@ -1089,6 +1112,23 @@ begin end; end; +Function TvMemManager.FetchHostMap(Addr,Size:TVkDeviceSize;device_local:Boolean):TvPointer; +var + i:Byte; +begin + Result:=Default(TvPointer); + + Assert(Length(FHeaps)<>0); + + For i:=0 to High(FHeaps) do + if (FHeaps[i].host_visible) then + if (FHeaps[i].device_local=device_local) then + begin + Exit(FetchHostMap(Addr,Size,FHeaps[i].def_mem_type)); + end; + +end; + // function vkAllocMemory(device:TVkDevice;Size:TVkDeviceSize;mtindex:TVkUInt32):TVkDeviceMemory; diff --git a/vulkan/vRegs2Vulkan.pas b/vulkan/vRegs2Vulkan.pas index 9c42d9f4..00682017 100644 --- a/vulkan/vRegs2Vulkan.pas +++ b/vulkan/vRegs2Vulkan.pas @@ -86,6 +86,7 @@ type PGPU_REGS=^TGPU_REGS; TGPU_REGS=packed object + SH_REG:PSH_REG_GROUP; CX_REG:PCONTEXT_REG_GROUP; // 0xA000 Function _SHADER_MASK(i:Byte):Byte; inline; //0..7 @@ -103,6 +104,15 @@ type Function DB_ENABLE:Boolean; Function GET_DB_INFO:TDB_INFO; + function get_reg(i:word):DWORD; + + Function get_cs_addr:Pointer; + Function get_ps_addr:Pointer; + Function get_vs_addr:Pointer; + Function get_gs_addr:Pointer; + Function get_es_addr:Pointer; + Function get_hs_addr:Pointer; + Function get_ls_addr:Pointer; end; function GET_PRIM_TYPE (const VGT_PRIMITIVE_TYPE:TVGT_PRIMITIVE_TYPE):TVkPrimitiveTopology; @@ -1158,6 +1168,53 @@ begin Result.FImageInfo.params.arrayLayers:=1; end; +function TGPU_REGS.get_reg(i:word):DWORD; +begin + case i of + $2C00..$2E7F:Result:=PDWORD(SH_REG)[i-$2C00]; + $A000..$A38F:Result:=PDWORD(CX_REG)[i-$A000]; + else + Result:=0; + end; +end; + +Function TGPU_REGS.get_cs_addr:Pointer; +begin + Result:=getCodeAddress(SH_REG^.COMPUTE_PGM_LO,SH_REG^.COMPUTE_PGM_HI.DATA); +end; + +Function TGPU_REGS.get_ps_addr:Pointer; +begin + Result:=getCodeAddress(SH_REG^.SPI_SHADER_PGM_LO_PS,SH_REG^.SPI_SHADER_PGM_HI_PS.MEM_BASE); +end; + +Function TGPU_REGS.get_vs_addr:Pointer; +begin + Result:=getCodeAddress(SH_REG^.SPI_SHADER_PGM_LO_VS,SH_REG^.SPI_SHADER_PGM_HI_VS.MEM_BASE); +end; + +Function TGPU_REGS.get_gs_addr:Pointer; +begin + Result:=getCodeAddress(SH_REG^.SPI_SHADER_PGM_LO_GS,SH_REG^.SPI_SHADER_PGM_HI_GS.MEM_BASE); +end; + +Function TGPU_REGS.get_es_addr:Pointer; +begin + Result:=getCodeAddress(SH_REG^.SPI_SHADER_PGM_LO_ES,SH_REG^.SPI_SHADER_PGM_HI_ES.MEM_BASE); +end; + +Function TGPU_REGS.get_hs_addr:Pointer; +begin + Result:=getCodeAddress(SH_REG^.SPI_SHADER_PGM_LO_HS,SH_REG^.SPI_SHADER_PGM_HI_HS.MEM_BASE); +end; + +Function TGPU_REGS.get_ls_addr:Pointer; +begin + Result:=getCodeAddress(SH_REG^.SPI_SHADER_PGM_LO_LS,SH_REG^.SPI_SHADER_PGM_HI_LS.MEM_BASE); +end; + +/// + function GET_PRIM_TYPE(const VGT_PRIMITIVE_TYPE:TVGT_PRIMITIVE_TYPE):TVkPrimitiveTopology; begin case VGT_PRIMITIVE_TYPE.PRIM_TYPE of diff --git a/vulkan/vRender.pas b/vulkan/vRender.pas index 01791db0..5d7af364 100644 --- a/vulkan/vRender.pas +++ b/vulkan/vRender.pas @@ -7,8 +7,6 @@ interface uses Classes, SysUtils, - RWLock, - //sys_types, g23tree, //ps4_libSceVideoOut, si_ci_vi_merged_enum, diff --git a/vulkan/vRenderPassManager.pas b/vulkan/vRenderPassManager.pas index cd057d9f..360efab3 100644 --- a/vulkan/vRenderPassManager.pas +++ b/vulkan/vRenderPassManager.pas @@ -6,14 +6,12 @@ interface uses SysUtils, - RWLock, g23tree, Vulkan, vDevice, vDependence, - vPipeline{, - vImage, vPipeline, + vImage{, vCmdBuffer}; type @@ -46,16 +44,11 @@ type function FetchRenderPass(cmd:TvDependenciesObject;P:PvRenderPassKey):TvRenderPass2; -//////////////// -const - //useage image - TM_READ =1; - TM_WRITE=2; - TM_CLEAR=4; -//////////////// - implementation +uses + kern_rwlock; + type TvRenderPassKey2Compare=object function c(a,b:PvRenderPassKey):Integer; static; @@ -63,10 +56,9 @@ type _TvRenderPass2Set=specialize T23treeSet; TvRenderPass2Set=object(_TvRenderPass2Set) - lock:TRWLock; - Procedure Init; + lock:Pointer; Procedure Lock_wr; - Procedure Unlock; + Procedure Unlock_wr; end; var @@ -306,19 +298,14 @@ begin Result:=True; end; -Procedure TvRenderPass2Set.Init; -begin - rwlock_init(lock); -end; - Procedure TvRenderPass2Set.Lock_wr; begin - rwlock_wrlock(lock); + rw_wlock(lock); end; -Procedure TvRenderPass2Set.Unlock; +Procedure TvRenderPass2Set.Unlock_wr; begin - rwlock_unlock(lock); + rw_wunlock(lock); end; Procedure TvRenderPass2.Acquire; @@ -395,12 +382,10 @@ begin end; end; - FRenderPass2Set.Unlock; + FRenderPass2Set.Unlock_wr; end; -initialization - FRenderPass2Set.Init; end. diff --git a/vulkan/vSetLayoutManager.pas b/vulkan/vSetLayoutManager.pas index e56d43f7..553ca11e 100644 --- a/vulkan/vSetLayoutManager.pas +++ b/vulkan/vSetLayoutManager.pas @@ -6,7 +6,6 @@ interface uses SysUtils, - RWLock, g23tree, Vulkan, vPipeline; @@ -17,6 +16,9 @@ Function FetchSetLayout(FStage:TVkShaderStageFlags; implementation +uses + kern_rwlock; + type TvSetLayoutCompare=class class function c(a,b:PvSetLayoutKey):Integer; static; @@ -24,28 +26,22 @@ type _TvSetLayoutsPool=specialize T23treeSet; TvSetLayoutsPool=object(_TvSetLayoutsPool) - lock:TRWLock; - Procedure Init; + lock:Pointer; Procedure Lock_wr; - Procedure Unlock; + Procedure Unlock_wr; end; var FSetLayoutsPool:TvSetLayoutsPool; -Procedure TvSetLayoutsPool.Init; -begin - rwlock_init(lock); -end; - Procedure TvSetLayoutsPool.Lock_wr; begin - rwlock_wrlock(lock); + rw_wlock(lock); end; -Procedure TvSetLayoutsPool.Unlock; +Procedure TvSetLayoutsPool.Unlock_wr; begin - rwlock_unlock(lock); + rw_wunlock(lock); end; function CompareBind(var a,b:TVkDescriptorSetLayoutBinding):Integer; forward; @@ -111,7 +107,7 @@ begin Result:=t; end; - FSetLayoutsPool.Unlock; + FSetLayoutsPool.Unlock_wr; t.Compile; end; @@ -163,8 +159,6 @@ begin Result:=CompareBinds(a^.FBinds,b^.FBinds,Length(a^.FBinds)); end; -initialization - FSetLayoutsPool.Init; end. diff --git a/vulkan/vShaderManager.pas b/vulkan/vShaderManager.pas index fe72eb60..e50ff360 100644 --- a/vulkan/vShaderManager.pas +++ b/vulkan/vShaderManager.pas @@ -7,14 +7,15 @@ interface uses SysUtils, Classes, - RWLock, + murmurhash, g23tree, ps4_pssl, ps4_shader, - ps4_gpu_regs, + + vRegs2Vulkan, shader_dump, - ps4_program, + //ps4_program, vDevice, @@ -61,6 +62,10 @@ function FetchShaderGroup(F:PvShadersKey):TvShaderGroup; implementation +uses + kern_rwlock, + kern_dmem; + type TShaderCacheCompare=object function c(a,b:PShaderDataKey):Integer; static; @@ -72,54 +77,42 @@ type _TShaderCacheSet=specialize T23treeSet; TShaderCacheSet=object(_TShaderCacheSet) - lock:TRWLock; - Procedure Init; + lock:Pointer; Procedure Lock_wr; - Procedure Unlock; + Procedure Unlock_wr; end; _TShaderGroupSet=specialize T23treeSet; TShaderGroupSet=object(_TShaderGroupSet) - lock:TRWLock; - Procedure Init; + lock:Pointer; Procedure Lock_wr; - Procedure Unlock; + Procedure Unlock_wr; end; var FShaderCacheSet:TShaderCacheSet; FShaderGroupSet:TShaderGroupSet; -Procedure TShaderCacheSet.Init; -begin - rwlock_init(lock); -end; - Procedure TShaderCacheSet.Lock_wr; begin - rwlock_wrlock(lock); + rw_wlock(lock); end; -Procedure TShaderCacheSet.Unlock; +Procedure TShaderCacheSet.Unlock_wr; begin - rwlock_unlock(lock); + rw_wunlock(lock); end; // -Procedure TShaderGroupSet.Init; -begin - rwlock_init(lock); -end; - Procedure TShaderGroupSet.Lock_wr; begin - rwlock_wrlock(lock); + rw_wlock(lock); end; -Procedure TShaderGroupSet.Unlock; +Procedure TShaderGroupSet.Unlock_wr; begin - rwlock_unlock(lock); + rw_wunlock(lock); end; function Max(a,b:PtrInt):PtrInt; inline; @@ -180,7 +173,7 @@ var F:THandle; fname:RawByteString; begin - hash:=FastHash(M.Memory,M.Size); + hash:=MurmurHash64A(M.Memory,M.Size,0); case FStage of vShaderStagePs:fname:='_ps_'; @@ -231,18 +224,29 @@ begin case FStage of vShaderStagePs : begin - SprvEmit.InitPs(GPU_REGS.SPI.PS.RSRC1,GPU_REGS.SPI.PS.RSRC2,GPU_REGS.SPI.PS.INPUT_ENA); - SprvEmit.SetUserData(@GPU_REGS.SPI.PS.USER_DATA); + SprvEmit.InitPs(GPU_REGS.SH_REG^.SPI_SHADER_PGM_RSRC1_PS, + GPU_REGS.SH_REG^.SPI_SHADER_PGM_RSRC2_PS, + GPU_REGS.CX_REG^.SPI_PS_INPUT_ENA); + + SprvEmit.SetUserData(@GPU_REGS.SH_REG^.SPI_SHADER_USER_DATA_PS); end; vShaderStageVs: begin - SprvEmit.InitVs(GPU_REGS.SPI.VS.RSRC1,GPU_REGS.SPI.VS.RSRC2,GPU_REGS.VGT_NUM_INSTANCES); - SprvEmit.SetUserData(@GPU_REGS.SPI.VS.USER_DATA); + SprvEmit.InitVs(GPU_REGS.SH_REG^.SPI_SHADER_PGM_RSRC1_VS, + GPU_REGS.SH_REG^.SPI_SHADER_PGM_RSRC2_VS, + GPU_REGS.CX_REG^.VGT_DMA_NUM_INSTANCES); + + SprvEmit.SetUserData(@GPU_REGS.SH_REG^.SPI_SHADER_USER_DATA_VS); end; vShaderStageCs: begin - SprvEmit.InitCs(GPU_REGS.SPI.CS.RSRC1,GPU_REGS.SPI.CS.RSRC2,GPU_REGS.SPI.CS.NUM_THREAD_X,GPU_REGS.SPI.CS.NUM_THREAD_Y,GPU_REGS.SPI.CS.NUM_THREAD_Z); - SprvEmit.SetUserData(@GPU_REGS.SPI.CS.USER_DATA); + SprvEmit.InitCs(GPU_REGS.SH_REG^.COMPUTE_PGM_RSRC1, + GPU_REGS.SH_REG^.COMPUTE_PGM_RSRC2, + GPU_REGS.SH_REG^.COMPUTE_NUM_THREAD_X, + GPU_REGS.SH_REG^.COMPUTE_NUM_THREAD_Y, + GPU_REGS.SH_REG^.COMPUTE_NUM_THREAD_Z); + + SprvEmit.SetUserData(@GPU_REGS.SH_REG^.COMPUTE_USER_DATA); end; else @@ -313,13 +317,14 @@ begin begin Case FStage of - vShaderStageVs:pUserData:=@GPU_REGS.SPI.VS.USER_DATA; - vShaderStagePs:pUserData:=@GPU_REGS.SPI.PS.USER_DATA; - vShaderStageCs:pUserData:=@GPU_REGS.SPI.CS.USER_DATA; + vShaderStageVs:pUserData:=@GPU_REGS.SH_REG^.SPI_SHADER_USER_DATA_VS; + vShaderStagePs:pUserData:=@GPU_REGS.SH_REG^.SPI_SHADER_USER_DATA_PS; + vShaderStageCs:pUserData:=@GPU_REGS.SH_REG^.COMPUTE_USER_DATA; else Assert(false); end; + FShader:=nil; if Length(t.FShaders)<>0 then For i:=0 to High(t.FShaders) do @@ -445,25 +450,32 @@ end; function FetchShader(FStage:TvShaderStage;FDescSetId:Integer;var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderExt; var - pData:PDWORD; + pData0:PDWORD; + pData1:PDWORD; begin Case FStage of - vShaderStageVs:pData:=getCodeAddress(GPU_REGS.SPI.VS.LO,GPU_REGS.SPI.VS.HI); - vShaderStagePs:pData:=getCodeAddress(GPU_REGS.SPI.PS.LO,GPU_REGS.SPI.PS.HI); - vShaderStageCs:pData:=getCodeAddress(GPU_REGS.SPI.CS.LO,GPU_REGS.SPI.CS.HI); + vShaderStageVs:pData0:=GPU_REGS.get_vs_addr; + vShaderStagePs:pData0:=GPU_REGS.get_ps_addr; + vShaderStageCs:pData0:=GPU_REGS.get_cs_addr; else Assert(false); end; - if (pData=nil) then Exit(nil); + if (pData0=nil) then Exit(nil); //Assert(pData<>nil); + pData1:=nil; + if not get_dmem_ptr(pData0,@pData1,nil) then + begin + Assert(false,'get_dmem_ptr'); + end; + FShaderCacheSet.Lock_wr; - Result:=_FetchShader(FStage,pData,FDescSetId,GPU_REGS,pc); + Result:=_FetchShader(FStage,pData1,FDescSetId,GPU_REGS,pc); - FShaderCacheSet.Unlock; + FShaderCacheSet.Unlock_wr; end; // @@ -515,12 +527,9 @@ begin Result:=_FetchShaderGroup(F); - FShaderGroupSet.Unlock; + FShaderGroupSet.Unlock_wr; end; -initialization - FShaderCacheSet.Init; - FShaderGroupSet.Init; end.