unit vShaderManager; {$mode objfpc}{$H+} interface uses SysUtils, Classes, murmurhash, g23tree, ps4_pssl, ps4_shader, vRegs2Vulkan, shader_dump, //ps4_program, vDevice, vShader, vShaderExt, SprvEmit, emit_bin; type PShaderDataKey=^TShaderDataKey; TShaderDataKey=packed object FStage:TvShaderStage; FLen :Ptruint; pData :PDWORD; FHash :QWORD; function c(a,b:PShaderDataKey):Integer; static; Procedure SetData(Stage:TvShaderStage;Src:Pointer); Procedure Free; end; TShaderCodeCache=class key:TShaderDataKey; FShaderAliases:array of TvShaderExt; function AddShader(FDescSetId:Integer;Stream:TStream;pUserData:Pointer):TvShaderExt; Destructor Destroy; override; end; PPushConstAllocator=^TPushConstAllocator; TPushConstAllocator=object size :DWORD; offset:DWORD; Procedure Init; function GetAvailable:DWORD; procedure Apply(i:DWORD); end; { TShaderCacheSet --------------- [TShaderCodeCache] ( [TShaderDataKey] FShaderAliases [TvShaderExt] --------------- --------------- ............... --------------- ) --------------- ............... --------------- } function FetchShader(FStage:TvShaderStage;FDescSetId:Integer;var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderExt; function FetchShaderGroup(F:PvShadersKey):TvShaderGroup; function FetchShaderGroupRT(var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderGroup; function FetchShaderGroupCS(var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderGroup; implementation uses kern_rwlock, kern_dmem; type TShadersKeyCompare=object function c(a,b:PvShadersKey):Integer; static; end; _TShaderCacheSet=specialize T23treeSet; TShaderCacheSet=object(_TShaderCacheSet) lock:Pointer; Procedure Lock_wr; Procedure Unlock_wr; end; _TShaderGroupSet=specialize T23treeSet; TShaderGroupSet=object(_TShaderGroupSet) lock:Pointer; Procedure Lock_wr; Procedure Unlock_wr; end; var FShaderCacheSet:TShaderCacheSet; FShaderGroupSet:TShaderGroupSet; Procedure TShaderCacheSet.Lock_wr; begin rw_wlock(lock); end; Procedure TShaderCacheSet.Unlock_wr; begin rw_wunlock(lock); end; // Procedure TShaderGroupSet.Lock_wr; begin rw_wlock(lock); end; Procedure TShaderGroupSet.Unlock_wr; begin rw_wunlock(lock); end; function Max(a,b:PtrInt):PtrInt; inline; begin if (a>b) then Result:=a else Result:=b; end; function TShaderDataKey.c(a,b:PShaderDataKey):Integer; begin //1 FStage Result:=Integer(a^.FStage>b^.FStage)-Integer(a^.FStage0) then Exit; //2 FLen Result:=Integer((a^.FLen>b^.FLen) and (b^.FLen<>0))-Integer((a^.FLen0)); if (Result<>0) then Exit; //3 pData Result:=CompareDWord(a^.pData^,b^.pData^,Max(a^.FLen,b^.FLen) div 4); end; function TShadersKeyCompare.c(a,b:PvShadersKey):Integer; begin Result:=CompareByte(a^.FShaders,b^.FShaders,SizeOf(AvShaderStage)); end; function TShaderCodeCache.AddShader(FDescSetId:Integer;Stream:TStream;pUserData:Pointer):TvShaderExt; var i:Integer; begin Result:=TvShaderExt.Create; Result.FHash:=key.FHash; Result.FDescSetId:=FDescSetId; //set before loading Result.LoadFromStream(Stream); Result.PreloadShaderFuncs(pUserData); i:=Length(FShaderAliases); SetLength(FShaderAliases,i+1); FShaderAliases[i]:=Result; end; Destructor TShaderCodeCache.Destroy; begin Key.Free; inherited; end; Procedure TShaderDataKey.SetData(Stage:TvShaderStage;Src:Pointer); begin Free; FStage:=Stage; FLen :=_calc_shader_size(Src); pData :=AllocMem(FLen); Move(Src^,pData^,FLen); FHash:=MurmurHash64A(pData,FLen,0); end; Procedure TShaderDataKey.Free; begin if (FLen<>0) and (pData<>nil) then begin FreeMem(pData); end; end; function _FindShaderCodeCache(const key:TShaderDataKey):TShaderCodeCache; var i:TShaderCacheSet.Iterator; begin Result:=nil; i:=FShaderCacheSet.find(@key); if (i.Item<>nil) then begin Result:=TShaderCodeCache(ptruint(i.Item^)-ptruint(@TShaderCodeCache(nil).key)); end; end; function _FetchShaderCodeCache(const key:TShaderDataKey):TShaderCodeCache; var t:TShaderCodeCache; begin t:=_FindShaderCodeCache(key); if (t=nil) then begin t:=TShaderCodeCache.Create; t.key:=key; t.key.SetData(key.FStage,key.pData); FShaderCacheSet.Insert(@t.key); end; Result:=t; end; Procedure DumpSpv(FStage:TvShaderStage;M:TMemoryStream); var hash:QWORD; F:THandle; fname:RawByteString; begin hash:=MurmurHash64A(M.Memory,M.Size,0); case FStage of vShaderStagePs:fname:='_ps_'; vShaderStageVs:fname:='_vs_'; vShaderStageCs:fname:='_cs_'; else Exit; end; fname:='shader_dump\'+get_dev_progname+fname+HexStr(hash,16)+'.spv'; if FileExists(fname) then Exit; CreateDir('shader_dump'); F:=FileCreate(fname); FileWrite(F,M.Memory^,M.Size); FileClose(F); end; procedure TPushConstAllocator.Init; begin Size:=limits.maxPushConstantsSize; offset:=0; end; function TPushConstAllocator.GetAvailable:DWORD; begin Result:=0; if (offsetnil) then begin SprvEmit.Config.PushConstantsOffset :=pc^.offset; SprvEmit.Config.maxPushConstantsSize:=pc^.GetAvailable; end; //SprvEmit.Config.UseVertexInput:=False; if (SprvEmit.ParseStage(pData)>1) then begin Writeln(StdErr,'Shader Parse Err'); SprvEmit.Free; Exit; end; SprvEmit.PostStage; SprvEmit.AllocStage; Result:=TMemoryStream.Create; SprvEmit.SaveToStream(Result); SprvEmit.Free; end; function test_func(FShader:TvShaderExt;pUserData:Pointer):Boolean; var key:TShaderFuncKey; P:Pointer; i:Integer; begin Result:=True; if (Length(FShader.FShaderFuncs)=0) then Exit; key:=Default(TShaderFuncKey); For i:=0 to High(FShader.FShaderFuncs) do if (FShader.FShaderFuncs[i].pData<>nil) then begin P:=GetSharpByPatch(pUserData,FShader.FFuncLayouts[i].addr); if (P<>nil) then begin key.pData:=P; if (TShaderFuncKey.c(FShader.FShaderFuncs[i],key)<>0) then begin Exit(False); end; end; end; end; function test_instance(FShader:TvShaderExt;FStage:TvShaderStage;var GPU_REGS:TGPU_REGS):Boolean; var VGPR_COMP_CNT:Byte; begin if (FStage<>vShaderStageVs) then Exit(True); VGPR_COMP_CNT:=GPU_REGS.SH_REG^.SPI_SHADER_PGM_RSRC1_VS.VGPR_COMP_CNT; if (FShader.FInstance.VGPR_COMP_CNT<>VGPR_COMP_CNT) then Exit(False); if (VGPR_COMP_CNT>=1) then begin if (FShader.FInstance.STEP_RATE_0<>GPU_REGS.CX_REG^.VGT_INSTANCE_STEP_RATE_0) then Exit(False); end; if (VGPR_COMP_CNT>=2) then begin if (FShader.FInstance.STEP_RATE_1<>GPU_REGS.CX_REG^.VGT_INSTANCE_STEP_RATE_1) then Exit(False); end; Result:=True; end; function test_unif(FShader:TvShaderExt;FDescSetId:Integer;pUserData:Pointer):Boolean; var ch:TvBufOffsetChecker; begin if (FShader.FDescSetId<>FDescSetId) then Exit(False); ch.FResult:=True; FShader.EnumUnifLayout(@ch.AddAttr,FDescSetId,pUserData); Result:=ch.FResult; end; function test_push_const(FShader:TvShaderExt;pc_offset,pc_size:DWORD):Boolean; begin with FShader.FPushConst do begin Result:=(offset >=pc_offset) and //Checking offsets push constant ((offset+size)<=pc_size); //Is the remaining size sufficient? end; end; function _FetchShader(FStage:TvShaderStage;pData:PDWORD;FDescSetId:Integer;var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderExt; var F:TShaderDataKey; i:Integer; FShader:TvShaderExt; t:TShaderCodeCache; M:TMemoryStream; pUserData:Pointer; pc_offset,pc_size,pc_diff:DWORD; begin F:=Default(TShaderDataKey); F.FStage:=FStage; F.pData :=pData; { ...start <-\ | ...offset | | ...size --/ } if (pc<>nil) then //push const allocator used? begin pc_offset:=pc^.offset; pc_size :=pc^.GetAvailable; end else begin pc_offset:=0; pc_size :=0; end; t:=_FetchShaderCodeCache(F); FShader:=nil; if (Length(t.FShaderAliases)<>0) then begin pUserData:=GPU_REGS.get_user_data(FStage); For i:=0 to High(t.FShaderAliases) do begin FShader:=t.FShaderAliases[i]; if test_func(FShader,pUserData) then if test_instance(FShader,FStage,GPU_REGS) then if test_unif(FShader,FDescSetId,pUserData) then //Checking offsets within a shader if test_push_const(FShader,pc_offset,pc_size) then begin Break; //found end; FShader:=nil; //reset with not found end; end; if (FShader=nil) then //Rebuild with different parameters begin M:=ParseShader(FStage,pData,GPU_REGS,pc); Assert(M<>nil); pUserData:=GPU_REGS.get_user_data(FStage); FShader:=t.AddShader(FDescSetId,M,pUserData); if (FStage=vShaderStageVs) then begin FShader.SetInstance(GPU_REGS.SH_REG^.SPI_SHADER_PGM_RSRC1_VS.VGPR_COMP_CNT, GPU_REGS.CX_REG^.VGT_INSTANCE_STEP_RATE_0, GPU_REGS.CX_REG^.VGT_INSTANCE_STEP_RATE_1); end; DumpSpv(FStage,M); M.Free; case FStage of vShaderStagePs:DumpPS(GPU_REGS,FShader.FHash); vShaderStageVs:DumpVS(GPU_REGS,FShader.FHash); vShaderStageCs:DumpCS(GPU_REGS,FShader.FHash); else; end; // if (FShader.FPushConst.size<>0) and (pc<>nil) then //push const used? begin FShader.FPushConst.offset:=pc_offset; //Save offset Dec(FShader.FPushConst.size,pc_offset); //Move up size { ...start ...offset<-\ | ...size --/ } end; end; if (FShader.FPushConst.size<>0) and (pc<>nil) then //push const used? begin pc_diff:=FShader.FPushConst.offset-pc_offset; //get diff offset pc^.Apply(pc_diff+FShader.FPushConst.size); //apply with allocator end; Result:=FShader; end; function FetchShader(FStage:TvShaderStage;FDescSetId:Integer;var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderExt; var pData0:PDWORD; pData1:PDWORD; begin pData0:=GPU_REGS.get_code_addr(FStage); if (pData0=nil) then Exit(nil); //Assert(pData<>nil); pData1:=nil; if not get_dmem_ptr(pData0,@pData1,nil) then begin Assert(false,'get_dmem_ptr'); end; FShaderCacheSet.Lock_wr; Result:=_FetchShader(FStage,pData1,FDescSetId,GPU_REGS,pc); FShaderCacheSet.Unlock_wr; end; // function _FindShaderGroup(F:PvShadersKey):TvShaderGroup; var i:TShaderGroupSet.Iterator; begin Result:=nil; i:=FShaderGroupSet.find(F); if (i.Item<>nil) then begin Result:=TvShaderGroup(ptruint(i.Item^)-ptruint(@TvShaderGroup(nil).FKey)); end; end; function _FetchShaderGroup(F:PvShadersKey):TvShaderGroup; var t:TvShaderGroup; begin Result:=nil; t:=_FindShaderGroup(F); if (t=nil) then begin t:=TvShaderGroup.Create; t.FKey:=F^; if not t.Compile then begin FreeAndNil(t); end else begin FShaderGroupSet.Insert(@t.FKey); end; end; Result:=t; end; function FetchShaderGroup(F:PvShadersKey):TvShaderGroup; begin Result:=nil; if (F=nil) then Exit; FShaderGroupSet.Lock_wr; Result:=_FetchShaderGroup(F); FShaderGroupSet.Unlock_wr; end; function FetchShaderGroupRT(var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderGroup; var FShadersKey:TvShadersKey; i:TvShaderStage; FDescSetId:Integer; begin FShadersKey:=Default(TvShadersKey); FDescSetId:=0; For i:=High(TvShaderStage) downto Low(TvShaderStage) do begin if (i<>vShaderStageCs) then if (GPU_REGS.get_code_addr(i)<>nil) then begin FShadersKey.FShaders[i]:=FetchShader(i,FDescSetId,GPU_REGS,pc); Inc(FDescSetId); end; end; Result:=FetchShaderGroup(@FShadersKey); end; function FetchShaderGroupCS(var GPU_REGS:TGPU_REGS;pc:PPushConstAllocator):TvShaderGroup; var FShadersKey:TvShadersKey; begin FShadersKey:=Default(TvShadersKey); FShadersKey.FShaders[vShaderStageCs]:=FetchShader(vShaderStageCs,0,GPU_REGS,pc); Result:=FetchShaderGroup(@FShadersKey); end; end.