diff --git a/spirv/SprvEmit.pas b/spirv/SprvEmit.pas index e59d6755..e1765087 100644 --- a/spirv/SprvEmit.pas +++ b/spirv/SprvEmit.pas @@ -10,6 +10,7 @@ uses spirv, si_ci_vi_merged_registers, ps4_pssl, + srInterface, srAllocator, srLiteral, srType, @@ -45,6 +46,9 @@ type RSRC2:TSPI_SHADER_PGM_RSRC2_PS; ENA:TSPI_PS_INPUT_ENA); + Procedure SET_PIX_CENTER(val:Byte); + Procedure SET_SHADER_CONTROL(const SHADER_CONTROL:TDB_SHADER_CONTROL); + Procedure InitCs(RSRC1:TCOMPUTE_PGM_RSRC1; RSRC2:TCOMPUTE_PGM_RSRC2; NTX:TCOMPUTE_NUM_THREAD_X; @@ -430,6 +434,22 @@ begin AddCapability(Capability.Shader); end; +Procedure TSprvEmit.SET_PIX_CENTER(val:Byte); +begin + FPixelCenter:=(val<>0); +end; + +Procedure TSprvEmit.SET_SHADER_CONTROL(const SHADER_CONTROL:TDB_SHADER_CONTROL); +begin + case SHADER_CONTROL.CONSERVATIVE_Z_EXPORT of + 1:FDepthMode:=foDepthLess; //EXPORT_LESS_THAN_Z + 2:FDepthMode:=foDepthGreater; //EXPORT_GREATER_THAN_Z + else; + end; + // + FEarlyFragmentTests:=(SHADER_CONTROL.DEPTH_BEFORE_SHADER<>0); +end; + Procedure TSprvEmit.InitCs(RSRC1:TCOMPUTE_PGM_RSRC1; RSRC2:TCOMPUTE_PGM_RSRC2; NTX:TCOMPUTE_NUM_THREAD_X; diff --git a/spirv/emit_alloc.pas b/spirv/emit_alloc.pas index 1cef1622..eba93330 100644 --- a/spirv/emit_alloc.pas +++ b/spirv/emit_alloc.pas @@ -7,7 +7,9 @@ interface uses sysutils, spirv, + srNode, + srInterface, srType, srTypes, srConst, @@ -33,6 +35,7 @@ type procedure AllocBinding; procedure AllocTypeBinding; procedure AllocEntryPoint; + function AddExecutionMode(mode:PtrUint):PSpirvOp; procedure AllocHeader; procedure AllocOpListId(node:PspirvOp); procedure AllocListId(node:PsrNode); @@ -165,6 +168,16 @@ begin OutputList .AllocEntryPoint(node); end; +function TSprvEmit_alloc.AddExecutionMode(mode:PtrUint):PSpirvOp; +var + node:PSpirvOp; +begin + node:=HeaderList.AddSpirvOp(Op.OpExecutionMode); + node^.AddParam(Main); + node^.AddLiteral(mode,ExecutionMode.GetStr(mode)); + Result:=node; +end; + procedure TSprvEmit_alloc.AllocHeader; var node:PSpirvOp; @@ -178,44 +191,30 @@ begin Case FExecutionModel of ExecutionModel.Fragment: begin - node:=HeaderList.AddSpirvOp(Op.OpExecutionMode); - node^.AddParam(Main); - node^.AddLiteral(ExecutionMode.OriginUpperLeft,ExecutionMode.GetStr(ExecutionMode.OriginUpperLeft)); + AddExecutionMode(ExecutionMode.OriginUpperLeft); - if (foDepthReplacing in DecorateList.FfemOpSet) then + if FPixelCenter then begin - node:=HeaderList.AddSpirvOp(Op.OpExecutionMode); - node^.AddParam(Main); - node^.AddLiteral(ExecutionMode.DepthReplacing,ExecutionMode.GetStr(ExecutionMode.DepthReplacing)); + AddExecutionMode(ExecutionMode.PixelCenterInteger); end; - if (foDepthGreater in DecorateList.FfemOpSet) then + if FEarlyFragmentTests then begin - node:=HeaderList.AddSpirvOp(Op.OpExecutionMode); - node^.AddParam(Main); - node^.AddLiteral(ExecutionMode.DepthGreater,ExecutionMode.GetStr(ExecutionMode.DepthGreater)); + AddExecutionMode(ExecutionMode.EarlyFragmentTests); end; - if (foDepthLess in DecorateList.FfemOpSet) then - begin - node:=HeaderList.AddSpirvOp(Op.OpExecutionMode); - node^.AddParam(Main); - node^.AddLiteral(ExecutionMode.DepthLess,ExecutionMode.GetStr(ExecutionMode.DepthLess)); - end; - - if (foDepthUnchanged in DecorateList.FfemOpSet) then - begin - node:=HeaderList.AddSpirvOp(Op.OpExecutionMode); - node^.AddParam(Main); - node^.AddLiteral(ExecutionMode.DepthUnchanged,ExecutionMode.GetStr(ExecutionMode.DepthUnchanged)); + case FDepthMode of + foDepthReplacing:AddExecutionMode(ExecutionMode.DepthReplacing); + foDepthGreater :AddExecutionMode(ExecutionMode.DepthGreater); + foDepthLess :AddExecutionMode(ExecutionMode.DepthLess); + foDepthUnchanged:AddExecutionMode(ExecutionMode.DepthUnchanged); + else; end; end; ExecutionModel.GLCompute: begin - node:=HeaderList.AddSpirvOp(Op.OpExecutionMode); - node^.AddParam(Main); - node^.AddLiteral(ExecutionMode.LocalSize,ExecutionMode.GetStr(ExecutionMode.LocalSize)); + node:=AddExecutionMode(ExecutionMode.LocalSize); node^.AddLiteral(FLocalSize.x); node^.AddLiteral(FLocalSize.y); node^.AddLiteral(FLocalSize.z); diff --git a/spirv/pssl-spirv.lpi b/spirv/pssl-spirv.lpi index cc55b4d4..0e9eb7bf 100644 --- a/spirv/pssl-spirv.lpi +++ b/spirv/pssl-spirv.lpi @@ -25,7 +25,7 @@ - + @@ -35,214 +35,161 @@ - + - + - + - + - + - + - + - - - - - - - - - - - - - - + + - - + + - - - - - - - + + - - + + - - + + - - + + - - + + - - + + - - - - - - + + - - + + - - + + - - + + - - + + + + + + + + + + + + + + + + + + + + + + - + - - + - + - + - - + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/spirv/pssl-spirv.lpr b/spirv/pssl-spirv.lpr index 1bc73626..c0e4d33b 100644 --- a/spirv/pssl-spirv.lpr +++ b/spirv/pssl-spirv.lpr @@ -273,7 +273,7 @@ begin until false; end; -procedure load_pssl(base:Pointer); +procedure load_pssl(base:Pointer;ShaderType:Byte); var info:PShaderBinaryInfo; Slots:PInputUsageSlot; @@ -360,11 +360,9 @@ begin end; end; - Assert(info<>nil); - SprvEmit:=TSprvEmit.Create; - case info^.m_type of + case ShaderType of kShaderTypePs : begin if cfg.FPrintInfo then @@ -372,6 +370,9 @@ begin SprvEmit.InitPs(GPU_REGS.PS.RSRC1,GPU_REGS.PS.RSRC2,GPU_REGS.PS.INPUT_ENA); SprvEmit.SetUserData(@GPU_REGS.PS.USER_DATA); + + SprvEmit.SET_PIX_CENTER(0); //PA_SU_VTX_CNTL.PIX_CENTER + SprvEmit.SET_SHADER_CONTROL(GPU_REGS.PS.SHADER_CONTROL); end; kShaderTypeVsVs: begin @@ -512,10 +513,9 @@ begin load_dump(cfg.FName); end; - load_pssl(GPU_REGS.CS.Addr); - - load_pssl(GPU_REGS.VS.Addr); - load_pssl(GPU_REGS.PS.Addr); + load_pssl(GPU_REGS.CS.Addr,kShaderTypeCs); + load_pssl(GPU_REGS.VS.Addr,kShaderTypeVsVs); + load_pssl(GPU_REGS.PS.Addr,kShaderTypePs); if cfg.FPrintInfo then begin diff --git a/spirv/srDecorate.pas b/spirv/srDecorate.pas index 9f061179..7e47fa77 100644 --- a/spirv/srDecorate.pas +++ b/spirv/srDecorate.pas @@ -31,22 +31,12 @@ type node:PSpirvOp; end; - TfemOp=( - foDepthReplacing, - foDepthGreater, - foDepthLess, - foDepthUnchanged - ); - - TfemOpSet=Set of TfemOp; - PsrDecorateList=^TsrDecorateList; TsrDecorateList=object(TsrOpBlockCustom) type TNodeFetch=specialize TNodeFetch; var FNTree:TNodeFetch; - FfemOpSet:TfemOpSet; function Fetch(data:PsrNode;param1,param2,param3:DWORD):PsrDecorate; procedure OpDecorate(Data:PsrNode;dec_id,param:DWORD); procedure OpMember (Data:PsrNode;index,offset:DWORD); diff --git a/spirv/srInterface.pas b/spirv/srInterface.pas index b1d1607d..29868e65 100644 --- a/spirv/srInterface.pas +++ b/spirv/srInterface.pas @@ -38,8 +38,19 @@ type x,y,z:DWORD; end; + TDepthMode=( + foDepthNone, + foDepthReplacing, + foDepthGreater, + foDepthLess, + foDepthUnchanged + ); + TEmitInterface=class(TCustomEmit) FExecutionModel:Word; + FDepthMode:TDepthMode; + FPixelCenter:Boolean; + FEarlyFragmentTests:Boolean; FLocalSize:TLocalSize; Config:TsrConfig; // diff --git a/spirv/srOutput.pas b/spirv/srOutput.pas index 8d97716f..1be82745 100644 --- a/spirv/srOutput.pas +++ b/spirv/srOutput.pas @@ -131,7 +131,6 @@ begin end; etMrtz: begin - pDecorateList^.FfemOpSet:=pDecorateList^.FfemOpSet+[foDepthReplacing,foDepthGreater]; pDecorateList^.OpDecorate(pVar,Decoration.BuiltIn,BuiltIn.FragDepth); end; etPos0: diff --git a/vulkan/vMemory.pas b/vulkan/vMemory.pas index 5e3ac690..e0f6dad0 100644 --- a/vulkan/vMemory.pas +++ b/vulkan/vMemory.pas @@ -108,7 +108,7 @@ type Constructor Create; - function findMemoryType(Filter:TVkUInt32;prop:TVkMemoryPropertyFlags):Integer; + function findMemoryType(Filter:TVkUInt32;prop:TVkMemoryPropertyFlags;start:Integer):Integer; procedure LoadMemoryHeaps; procedure PrintMemoryHeaps; procedure PrintMemoryType(typeFilter:TVkUInt32); @@ -126,7 +126,7 @@ type public Function Alloc(const mr:TVkMemoryRequirements;pr:TVkMemoryPropertyFlags):TvPointer; - Function Alloc(Size,Align:TVkDeviceSize;mtindex:Byte):TvPointer; + Function Alloc(Size,Align:TVkDeviceSize;mtindex:Byte;test_free:Boolean):TvPointer; Function Free(P:TvPointer):Boolean; private @@ -377,12 +377,13 @@ begin TAILQ_INIT(@FHosts); end; -function TvMemManager.findMemoryType(Filter:TVkUInt32;prop:TVkMemoryPropertyFlags):Integer; +function TvMemManager.findMemoryType(Filter:TVkUInt32;prop:TVkMemoryPropertyFlags;start:Integer):Integer; var - i:TVkUInt32; + i:Integer; begin Result:=-1; - For i:=0 to FProperties.memoryTypeCount-1 do + if (start<0) or (start>=FProperties.memoryTypeCount) then Exit; + For i:=start to FProperties.memoryTypeCount-1 do begin if ((Filter and (1 shl i))<>0) and ((FProperties.memoryTypes[i].propertyFlags and prop)=prop) then begin @@ -781,12 +782,26 @@ Function TvMemManager.Alloc(const mr:TVkMemoryRequirements;pr:TVkMemoryPropertyF var mt:Integer; begin - mt:=findMemoryType(mr.memoryTypeBits,pr); + Result:=Default(TvPointer); + mt:=-1; + + repeat + + mt:=findMemoryType(mr.memoryTypeBits,pr,mt+1); + if (mt=-1) then Break; + + Result:=Alloc(mr.size,mr.alignment,mt,True); + if (Result.FMemory<>nil) then Exit; + + until false; + + mt:=findMemoryType(mr.memoryTypeBits,pr,0); if (mt=-1) then Exit(Default(TvPointer)); - Result:=Alloc(mr.size,mr.alignment,mt); + + Result:=Alloc(mr.size,mr.alignment,mt,False); end; -Function TvMemManager.Alloc(Size,Align:TVkDeviceSize;mtindex:Byte):TvPointer; +Function TvMemManager.Alloc(Size,Align:TVkDeviceSize;mtindex:Byte;test_free:Boolean):TvPointer; var key:TDevNode; Offset:TVkDeviceSize; @@ -828,25 +843,28 @@ begin Result.FMemory:=FDevBlocks[key.FBlockId]; Result.FOffset:=key.FOffset; end else - if _AllcDevBlock(System.Align(Size,GRANULAR_DEV_BLOCK_SIZE),mtindex,key.FBlockId) then begin - //alloc save - key.Fisfree:=False; - key.FSize :=Size; - key.FOffset:=0; - key.FmType :=mtindex; - FAllcSet.Insert(key); - Result.FMemory:=FDevBlocks[key.FBlockId]; - Result.FOffset:=0; - //next free save - FSize:=FDevBlocks[key.FBlockId].FSize; - if (Size<>FSize) then + if not test_free then + if _AllcDevBlock(System.Align(Size,GRANULAR_DEV_BLOCK_SIZE),mtindex,key.FBlockId) then begin - key.Fisfree:=True; - key.FOffset:=Size; - key.FSize :=FSize-Size; - FFreeSet.Insert(key); + //alloc save + key.Fisfree:=False; + key.FSize :=Size; + key.FOffset:=0; + key.FmType :=mtindex; FAllcSet.Insert(key); + Result.FMemory:=FDevBlocks[key.FBlockId]; + Result.FOffset:=0; + //next free save + FSize:=FDevBlocks[key.FBlockId].FSize; + if (Size<>FSize) then + begin + key.Fisfree:=True; + key.FOffset:=Size; + key.FSize :=FSize-Size; + FFreeSet.Insert(key); + FAllcSet.Insert(key); + end; end; end; // diff --git a/vulkan/vRegs2Vulkan.pas b/vulkan/vRegs2Vulkan.pas index 61179134..c8fa293c 100644 --- a/vulkan/vRegs2Vulkan.pas +++ b/vulkan/vRegs2Vulkan.pas @@ -1229,36 +1229,26 @@ begin Assert(SHADER_CONTROL.Z_EXPORT_ENABLE=0 ,'Z_EXPORT_ENABLE'); Assert(SHADER_CONTROL.STENCIL_TEST_VAL_EXPORT_ENABLE=0,'STENCIL_TEST_VAL_EXPORT_ENABLE'); - - //SHADER_CONTROL.CONSERVATIVE_Z_EXPORT -> - //VkPhysicalDeviceConservativeRasterizationPropertiesEXT::conservativeRasterizationPostDepthCoverage - - Assert(SHADER_CONTROL.DEPTH_BEFORE_SHADER=0,'DEPTH_BEFORE_SHADER'); - Assert(CX_REG^.DB_RENDER_OVERRIDE.FORCE_SHADER_Z_ORDER=0,'FORCE_SHADER_Z_ORDER'); - - //SHADER_CONTROL.DEPTH_BEFORE_SHADER + //SHADER_CONTROL.CONSERVATIVE_Z_EXPORT -> SPIRV DepthGreater/DepthLess + //CX_REG^.PA_SU_VTX_CNTL.PIX_CENTER -> SPIRV PixelCenterInteger + //SHADER_CONTROL.DEPTH_BEFORE_SHADER -> SPIRV EarlyFragmentTests //CX_REG^.CB_COLOR_CONTROL //CX_REG^.DB_RENDER_OVERRIDE.FORCE_SHADER_Z_ORDER - {if (CX_REG^.DB_RENDER_OVERRIDE.FORCE_SHADER_Z_ORDER<>0) then - begin} - Case SHADER_CONTROL.Z_ORDER of - LATE_Z, - RE_Z :Result.zorder_stage:=ord(VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT); + Case SHADER_CONTROL.Z_ORDER of + LATE_Z, + RE_Z :Result.zorder_stage:=ord(VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT); + + EARLY_Z_THEN_LATE_Z, + EARLY_Z_THEN_RE_Z :Result.zorder_stage:=ord(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT) or + ord(VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT); + end; - EARLY_Z_THEN_LATE_Z, - EARLY_Z_THEN_RE_Z :Result.zorder_stage:=ord(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT) or - ord(VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT); - end; - {end else if (SHADER_CONTROL.DEPTH_BEFORE_SHADER<>0) then begin - Result.zorder_stage:=ord(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT); - end else - begin - Result.zorder_stage:=ord(VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT); - end;} + Result.zorder_stage:=Result.zorder_stage or ord(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT); + end; Result.FImageInfo.Addr:=Result.Z_READ_ADDR; @@ -1317,20 +1307,24 @@ end; Function TGPU_REGS.GET_RASTERIZATION:TVkPipelineRasterizationStateCreateInfo; var SU_SC_MODE_CNTL:TPA_SU_SC_MODE_CNTL; + PA_CL_CLIP_CNTL:TPA_CL_CLIP_CNTL; begin SU_SC_MODE_CNTL:=CX_REG^.PA_SU_SC_MODE_CNTL; + PA_CL_CLIP_CNTL:=CX_REG^.PA_CL_CLIP_CNTL; Result:=Default(TVkPipelineRasterizationStateCreateInfo); Result.sType:=VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - if (SH_REG^.SPI_SHADER_PGM_LO_PS<>0) and + if (SH_REG^.SPI_SHADER_PGM_LO_PS<>0) or (SH_REG^.SPI_SHADER_PGM_HI_PS.MEM_BASE<>0) then + if (CX_REG^.DB_RENDER_CONTROL.DEPTH_CLEAR_ENABLE=0) and + (CX_REG^.DB_RENDER_CONTROL.STENCIL_CLEAR_ENABLE=0) then begin - Result.rasterizerDiscardEnable:=CX_REG^.DB_SHADER_CONTROL.KILL_ENABLE; + Result.rasterizerDiscardEnable:=PA_CL_CLIP_CNTL.DX_RASTERIZATION_KILL; end; //VkPhysicalDeviceDepthClampZeroOneFeaturesEXT::depthClampZeroOne - Result.depthClampEnable :=CX_REG^.PA_CL_CLIP_CNTL.DX_CLIP_SPACE_DEF; + Result.depthClampEnable :=PA_CL_CLIP_CNTL.ZCLIP_NEAR_DISABLE or PA_CL_CLIP_CNTL.ZCLIP_FAR_DISABLE; Result.polygonMode :=get_polygon_mode(SU_SC_MODE_CNTL); Result.cullMode :=get_cull_mode (SU_SC_MODE_CNTL); Result.frontFace :=TVkFrontFace (SU_SC_MODE_CNTL.FACE); //1:1 @@ -1363,23 +1357,28 @@ begin end; Function TGPU_REGS.GET_MULTISAMPLE:TVkPipelineMultisampleStateCreateInfo; +var + ps_iter_samples,num_samples:Integer; begin Result:=Default(TVkPipelineMultisampleStateCreateInfo); Result.sType:=VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - if (CX_REG^.PA_SC_MODE_CNTL_1.PS_ITER_SAMPLE<>0) then + if (CX_REG^.DB_EQAA.PS_ITER_SAMPLES<>0) or + (CX_REG^.PA_SC_AA_CONFIG.MSAA_NUM_SAMPLES<>0) then begin + ps_iter_samples:=1 shl CX_REG^.DB_EQAA.PS_ITER_SAMPLES; + num_samples :=1 shl CX_REG^.PA_SC_AA_CONFIG.MSAA_NUM_SAMPLES; + Result.sampleShadingEnable :=VK_TRUE; - Result.rasterizationSamples :=TVkSampleCountFlagBits(1 shl CX_REG^.PA_SC_AA_CONFIG.MSAA_NUM_SAMPLES); - Result.minSampleShading :=0.5; - Result.pSampleMask :=nil; + Result.rasterizationSamples :=TVkSampleCountFlagBits(num_samples); + Result.minSampleShading :=ps_iter_samples/num_samples; + Result.pSampleMask :=nil; //TODO Result.alphaToCoverageEnable:=CX_REG^.DB_ALPHA_TO_MASK.ALPHA_TO_MASK_ENABLE; Result.alphaToOneEnable :=VK_FALSE; end else begin Result.rasterizationSamples:=VK_SAMPLE_COUNT_1_BIT; end; - end; function TGPU_REGS.GET_PRIM_RESET:TVkBool32; diff --git a/vulkan/vShaderManager.pas b/vulkan/vShaderManager.pas index acddd296..fd37a292 100644 --- a/vulkan/vShaderManager.pas +++ b/vulkan/vShaderManager.pas @@ -292,6 +292,9 @@ begin GPU_REGS.CX_REG^.SPI_PS_INPUT_ENA); SprvEmit.SetUserData(GPU_REGS.get_user_data(FStage)); + + SprvEmit.SET_PIX_CENTER (GPU_REGS.CX_REG^.PA_SU_VTX_CNTL.PIX_CENTER); + SprvEmit.SET_SHADER_CONTROL(GPU_REGS.CX_REG^.DB_SHADER_CONTROL); end; vShaderStageVs: begin