diff --git a/chip/ps4_gpu_regs.pas b/chip/ps4_gpu_regs.pas index d66af485..17fa78f3 100644 --- a/chip/ps4_gpu_regs.pas +++ b/chip/ps4_gpu_regs.pas @@ -63,6 +63,62 @@ type TSPI_USER_DATA=array[0..15] of DWORD; + TPA_SC_AA_SAMPLE_LOCS_PIXEL_XY=bitpacked record + S0_X:bit4; + S0_Y:bit4; + S1_X:bit4; + S1_Y:bit4; + S2_X:bit4; + S2_Y:bit4; + S3_X:bit4; + S3_Y:bit4; + S4_X:bit4; + S4_Y:bit4; + S5_X:bit4; + S5_Y:bit4; + S6_X:bit4; + S6_Y:bit4; + S7_X:bit4; + S7_Y:bit4; + S8_X:bit4; + S8_Y:bit4; + S9_X:bit4; + S9_Y:bit4; + S10_X:bit4; + S10_Y:bit4; + S11_X:bit4; + S11_Y:bit4; + S12_X:bit4; + S12_Y:bit4; + S13_X:bit4; + S13_Y:bit4; + S14_X:bit4; + S14_Y:bit4; + S15_X:bit4; + S15_Y:bit4; + end; + + TPA_SC_AA_SAMPLE_LOCS_PIXEL=bitpacked array[0..1,0..15] of bit4; + + TPA_SC_CENTROID_PRIORITY=bitpacked record + DISTANCE_0:bit4; + DISTANCE_1:bit4; + DISTANCE_2:bit4; + DISTANCE_3:bit4; + DISTANCE_4:bit4; + DISTANCE_5:bit4; + DISTANCE_6:bit4; + DISTANCE_7:bit4; + DISTANCE_8:bit4; + DISTANCE_9:bit4; + DISTANCE_10:bit4; + DISTANCE_11:bit4; + DISTANCE_12:bit4; + DISTANCE_13:bit4; + DISTANCE_14:bit4; + DISTANCE_15:bit4; + end; + TRT_INFO=record //Addr:Pointer; @@ -151,6 +207,10 @@ type SC_AA_MASK_X0Y0_X1Y0:TPA_SC_AA_MASK_X0Y0_X1Y0; SC_AA_MASK_X0Y1_X1Y1:TPA_SC_AA_MASK_X0Y1_X1Y1; + SC_AA_SAMPLE_LOCS_PIXEL:array[0..1,0..1] of TPA_SC_AA_SAMPLE_LOCS_PIXEL_XY; + + SC_CENTROID_PRIORITY:TPA_SC_CENTROID_PRIORITY; + HARDWARE_SCREEN_OFFSET:TPA_SU_HARDWARE_SCREEN_OFFSET; SU_LINE_CNTL:TPA_SU_LINE_CNTL; SU_POINT_SIZE:TPA_SU_POINT_SIZE; @@ -162,6 +222,8 @@ type CL_CLIP_CNTL:TPA_CL_CLIP_CNTL; SC_CLIPRECT_RULE:TPA_SC_CLIPRECT_RULE; + SC_MODE_CNTL:TPA_SU_SC_MODE_CNTL; + VGT_SHADER_STAGES_EN:TVGT_SHADER_STAGES_EN; VGT_OUT_DEALLOC_CNTL:TVGT_OUT_DEALLOC_CNTL; @@ -176,11 +238,19 @@ type VGT_OUTPUT_PATH_CNTL:TVGT_OUTPUT_PATH_CNTL; + VGT_GS_MODE:TVGT_GS_MODE; + + VGT_GS_PER_ES:TVGT_GS_PER_ES; + VGT_ES_PER_GS:TVGT_ES_PER_GS; + VGT_GS_PER_VS:TVGT_GS_PER_VS; + VGT_PRIMITIVE_TYPE:TVGT_PRIMITIVE_TYPE; VGT_INDEX_TYPE :TVGT_INDEX_TYPE ; VGT_NUM_INSTANCES :TVGT_NUM_INSTANCES ; GRBM_GFX_INDEX :TGRBM_GFX_INDEX; + IA_MULTI_VGT_PARAM:TIA_MULTI_VGT_PARAM; + VGT_DMA:packed record INDEX_TYPE:TVGT_DMA_INDEX_TYPE; NUM_INSTANCES:TVGT_DMA_NUM_INSTANCES; @@ -275,6 +345,9 @@ type HTILE_SURFACE :TDB_HTILE_SURFACE ; + EQAA :TDB_EQAA; + + COUNT_CONTROL :TDB_COUNT_CONTROL; end; @@ -305,6 +378,7 @@ type function GET_INDEX_TYPE_SIZE:Byte; Procedure Clear; + Procedure InitDefault; Procedure ClearDMA; end; @@ -645,7 +719,7 @@ begin //BLEND_CLAMP - if (RENDER_TARGET[i].INFO.BLEND_BYPASS=1) then + if (RENDER_TARGET[i].INFO.BLEND_BYPASS<>0) then begin Result.blendEnable:=VK_FALSE; end else @@ -672,9 +746,19 @@ begin Result.alphaBlendOp :=GetBlendOp(CB_BLEND_CONTROL[i].ALPHA_COMB_FCN); end; + Assert(CB_BLEND_CONTROL[i].DISABLE_ROP3=0); end; + //Result.blendEnable:=VK_TRUE; + //Result.SRCCOLORBLENDFACTOR:=VK_BLEND_FACTOR_SRC_ALPHA; + //Result.DSTCOLORBLENDFACTOR:=VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + //Result.COLORBLENDOP :=VK_BLEND_OP_ADD; + //Result.SRCALPHABLENDFACTOR:=VK_BLEND_FACTOR_SRC_ALPHA; + //Result.DSTALPHABLENDFACTOR:=VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + //Result.ALPHABLENDOP :=VK_BLEND_OP_ADD; + //Result.COLORWRITEMASK :=15; + //CB_COLOR_CONTROL.MODE //CB_DISABLE //Assert(CB_COLOR_CONTROL.ROP3 = 204); //CB_DISABLE @@ -754,22 +838,12 @@ var begin Result:=Default(TRT_INFO); - { - Result.Addr:=Pointer(QWORD(RENDER_TARGET[i].BASE) shl 8); - if (RENDER_TARGET[i].INFO.LINEAR_GENERAL=1) then - begin - Result.Addr:=Pointer(QWORD(Result.Addr) or Byte(RENDER_TARGET[i].VIEW.SLICE_START)); - end; - } - Result.FImageInfo.Addr:=Pointer(QWORD(RENDER_TARGET[i].BASE) shl 8); if (RENDER_TARGET[i].INFO.LINEAR_GENERAL<>0) then begin Result.FImageInfo.Addr:=Pointer(QWORD(Result.FImageInfo.Addr) or Byte(RENDER_TARGET[i].VIEW.SLICE_START)); end; - //Result.extend:=GET_SCREEN_SIZE; - Result.FImageInfo.params.extend.width :=_fix_scissor_range(SCREEN_SCISSOR_BR.BR_X); Result.FImageInfo.params.extend.height:=_fix_scissor_range(SCREEN_SCISSOR_BR.BR_Y); Result.FImageInfo.params.extend.depth :=1; @@ -780,20 +854,6 @@ begin Assert(RENDER_TARGET[i].INFO.ENDIAN=ENDIAN_NONE); //Assert(RENDER_TARGET[i].INFO.COMPRESSION=0); //FMASK and MSAA - { - Case RENDER_TARGET[i].INFO.FORMAT of - COLOR_8_8_8_8: - Case RENDER_TARGET[i].INFO.NUMBER_TYPE of - NUMBER_UNORM:Result.cformat:=VK_FORMAT_R8G8B8A8_UNORM; - NUMBER_SRGB :Result.cformat:=VK_FORMAT_R8G8B8A8_SRGB; - else - Assert(false); - end; - else - Assert(false); - end; - } - Case RENDER_TARGET[i].INFO.FORMAT of COLOR_8_8_8_8: Case RENDER_TARGET[i].INFO.NUMBER_TYPE of @@ -806,26 +866,23 @@ begin Assert(false,'TODO'); end; - //Result.TILE_MODE_INDEX:=RENDER_TARGET[i].ATTRIB.TILE_MODE_INDEX; - //if (RENDER_TARGET[i].INFO.LINEAR_GENERAL=1) then Result.TILE_MODE_INDEX:=8; - if (RENDER_TARGET[i].INFO.LINEAR_GENERAL<>0) then Result.FImageInfo.params.tiling_idx:=kTileModeDisplay_LinearGeneral else Result.FImageInfo.params.tiling_idx:=RENDER_TARGET[i].ATTRIB.TILE_MODE_INDEX; Result.FImageInfo.params.itype :=ord(VK_IMAGE_TYPE_2D); - Result.FImageInfo.params.samples :=1{ shl (RENDER_TARGET[i].ATTRIB.NUM_SAMPLES and 3)}; + Result.FImageInfo.params.samples :=1 shl (RENDER_TARGET[i].ATTRIB.NUM_SAMPLES and 3); Result.FImageInfo.params.mipLevels :=1; Result.FImageInfo.params.arrayLayers:=1; Result.FImageView.cformat :=Result.FImageInfo.cformat; Result.FImageView.vtype :=ord(VK_IMAGE_VIEW_TYPE_2D); - Result.FImageView.dstSel.r:=ord(VK_COMPONENT_SWIZZLE_R); - Result.FImageView.dstSel.g:=ord(VK_COMPONENT_SWIZZLE_G); - Result.FImageView.dstSel.b:=ord(VK_COMPONENT_SWIZZLE_B); - Result.FImageView.dstSel.a:=ord(VK_COMPONENT_SWIZZLE_A); + //Result.FImageView.dstSel.r:=ord(VK_COMPONENT_SWIZZLE_R); + //Result.FImageView.dstSel.g:=ord(VK_COMPONENT_SWIZZLE_G); + //Result.FImageView.dstSel.b:=ord(VK_COMPONENT_SWIZZLE_B); + //Result.FImageView.dstSel.a:=ord(VK_COMPONENT_SWIZZLE_A); //Result.FImageView.dstSel:TvDstSel; TODO @@ -847,10 +904,6 @@ begin Result.IMAGE_USAGE:=Result.IMAGE_USAGE or TM_WRITE; - //if (RENDER_TARGET[i].INFO.FAST_CLEAR=1) then - //begin - //Result.FAST_CLEAR:=True; - Case RENDER_TARGET[i].INFO.FORMAT of COLOR_8_8_8_8: Case RENDER_TARGET[i].INFO.NUMBER_TYPE of @@ -1041,7 +1094,7 @@ begin Result.FImageInfo.params.tiling_idx:=DEPTH.Z_INFO.TILE_MODE_INDEX; Result.FImageInfo.params.itype :=ord(VK_IMAGE_TYPE_2D); - Result.FImageInfo.params.samples :=1{ shl (DEPTH.Z_INFO.NUM_SAMPLES and 3)}; + Result.FImageInfo.params.samples :=1 shl (DEPTH.Z_INFO.NUM_SAMPLES and 3); Result.FImageInfo.params.mipLevels :=1; Result.FImageInfo.params.arrayLayers:=1; end; @@ -1111,13 +1164,62 @@ begin end; end; + + Procedure TGPU_REGS.Clear; begin FillChar(Self,SizeOf(Self),0); +end; + +Procedure TGPU_REGS.InitDefault; +begin + Clear; DWORD(SPI.CS.STATIC_THREAD_MGMT_SE0):=$FFFFFFFF; DWORD(SPI.CS.STATIC_THREAD_MGMT_SE1):=$FFFFFFFF; + DWORD(SPI.CS.RESOURCE_LIMITS) :=$00000170; + DWORD(SPI.PS.RSRC3) :=$001701FF; + DWORD(SPI.VS.RSRC3) :=$001701FD; + //mmSPI_SHADER_PGM_RSRC3_GS=001701FF + //mmSPI_SHADER_PGM_RSRC3_ES=001701FD + //mmSPI_SHADER_PGM_RSRC3_HS=00000017 + //mmSPI_SHADER_PGM_RSRC3_LS=001701FD + DWORD(SPI.VS.LATE_ALLOC) :=$0000001C; + DWORD(SPI.VS.OUT_CONFIG) :=$00000002; + + DWORD(VTX_CNTL) :=$0000002D; + DWORD(SU_LINE_CNTL) :=$00000008; + DWORD(SU_POINT_SIZE) :=$00080008; + DWORD(SU_POINT_MINMAX) :=$FFFF0000; + DWORD(VTE_CNTL) :=$0000043F; + DWORD(SC_CLIPRECT_RULE) :=$0000FFFF; + DWORD(VGT_OUT_DEALLOC_CNTL) :=$00000010; + + PDWORD(@GB_CLIP.VERT_CLIP_ADJ)^ :=$3F800000; + PDWORD(@GB_CLIP.HORZ_CLIP_ADJ)^ :=$3F800000; + PDWORD(@GB_CLIP.VERT_DISC_ADJ)^ :=$3F800000; + PDWORD(@GB_CLIP.HORZ_DISC_ADJ)^ :=$3F800000; + + DWORD(CB_COLOR_CONTROL) :=$00CC0010; + + DWORD(SC_AA_MASK_X0Y0_X1Y0) :=$FFFFFFFF; + DWORD(SC_AA_MASK_X0Y1_X1Y1) :=$FFFFFFFF; + + DWORD(VGT_VTX_INDX.MAX_INDX) :=$FFFFFFFF; + + DWORD(SC_MODE_CNTL_1) :=$06020000; + + DWORD(PA_SU_POLY_OFFSET_DB_FMT_CNTL):=$000001E9; + + DWORD(VGT_GS_PER_ES) :=$00000100; + DWORD(VGT_ES_PER_GS) :=$00000100; + DWORD(VGT_GS_PER_VS) :=$00000004; + + DWORD(GRBM_GFX_INDEX ) :=$E0000000; + DWORD(IA_MULTI_VGT_PARAM) :=$000000FF; + + VGT_DMA.NUM_INSTANCES:=1; end; Procedure TGPU_REGS.ClearDMA; @@ -1384,7 +1486,8 @@ begin Result.params.mipLevels:=PT^.last_level-PT^.base_level+1; end; - Assert(Result.params.mipLevels=1,'TODO'); + //Assert(Result.params.mipLevels=1,'TODO'); + Result.params.mipLevels:=1; ///// Result.params.arrayLayers:=1; end; @@ -1462,6 +1565,10 @@ begin Result.base_level:=PT^.base_level; Result.last_level:=PT^.last_level; end; + + Result.base_level:=0; ///// + Result.last_level:=0; ///// + end; diff --git a/chip/ps4_tiling.pas b/chip/ps4_tiling.pas index 65c015a9..95ba7045 100644 --- a/chip/ps4_tiling.pas +++ b/chip/ps4_tiling.pas @@ -2612,13 +2612,13 @@ begin slice_offset := (z div m_tileThickness) * m_tilesPerSlice * m_tileBytes; - tile_row_index := y div kMicroTileHeight; + tile_row_index := y div kMicroTileHeight; tile_column_index := x div kMicroTileWidth; - tile_offset := ((tile_row_index * m_tilesPerRow) + tile_column_index) * m_tileBytes; + tile_offset := ((tile_row_index * m_tilesPerRow) + tile_column_index) * m_tileBytes; - element_offset := element_index * m_bitsPerElement; + element_offset := element_index * m_bitsPerElement; - final_offset := (slice_offset + tile_offset)*8 + element_offset; + final_offset := (slice_offset + tile_offset)*8 + element_offset; outTiledBitOffset := final_offset; diff --git a/chip/ps4_videodrv.pas b/chip/ps4_videodrv.pas index 0c5ae52b..63130097 100644 --- a/chip/ps4_videodrv.pas +++ b/chip/ps4_videodrv.pas @@ -743,12 +743,13 @@ procedure onEventWrite(pm4Hdr:PM4_TYPE_3_HEADER;Body:PTPM4CMDEVENTWRITE); begin {$ifdef ww} Case Body^.eventType of - THREAD_TRACE_MARKER :Writeln(' THREAD_TRACE_MARKER'); + CACHE_FLUSH_AND_INV_EVENT :Writeln(' FLUSH_AND_INV_EVENT'); FLUSH_AND_INV_CB_PIXEL_DATA:Writeln(' FLUSH_AND_INV_CB_PIXEL_DATA'); FLUSH_AND_INV_DB_DATA_TS :Writeln(' FLUSH_AND_INV_DB_DATA_TS'); FLUSH_AND_INV_DB_META :Writeln(' FLUSH_AND_INV_DB_META'); FLUSH_AND_INV_CB_DATA_TS :Writeln(' FLUSH_AND_INV_CB_DATA_TS'); FLUSH_AND_INV_CB_META :Writeln(' FLUSH_AND_INV_CB_META'); + THREAD_TRACE_MARKER :Writeln(' THREAD_TRACE_MARKER'); else Assert(False,IntToStr(Body^.eventType)); end; @@ -1189,8 +1190,19 @@ begin mmPA_SC_AA_MASK_X0Y0_X1Y0:DWORD(GPU_REGS.SC_AA_MASK_X0Y0_X1Y0):=value; mmPA_SC_AA_MASK_X0Y1_X1Y1:DWORD(GPU_REGS.SC_AA_MASK_X0Y1_X1Y1):=value; + mmPA_SC_AA_CONFIG :DWORD(GPU_REGS.SC_AA_CONFIG):=value; + mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0..mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3: + begin + PDWORD(@GPU_REGS.SC_AA_SAMPLE_LOCS_PIXEL)[reg-mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0]:=value; + end; + + mmPA_SC_CENTROID_PRIORITY_0..mmPA_SC_CENTROID_PRIORITY_1: + begin + PDWORD(@GPU_REGS.SC_CENTROID_PRIORITY)[reg-mmPA_SC_CENTROID_PRIORITY_0]:=value; + end; + mmPA_SU_HARDWARE_SCREEN_OFFSET:DWORD(GPU_REGS.HARDWARE_SCREEN_OFFSET):=value; mmPA_SU_VTX_CNTL:DWORD(GPU_REGS.VTX_CNTL):=value; @@ -1202,6 +1214,8 @@ begin mmPA_CL_CLIP_CNTL:DWORD(GPU_REGS.CL_CLIP_CNTL) :=value; mmPA_SC_CLIPRECT_RULE:DWORD(GPU_REGS.SC_CLIPRECT_RULE):=value; + mmPA_SU_SC_MODE_CNTL:DWORD(GPU_REGS.SC_MODE_CNTL):=value; + mmPA_CL_GB_VERT_CLIP_ADJ:PDWORD(@GPU_REGS.GB_CLIP.VERT_CLIP_ADJ)^:=value; mmPA_CL_GB_VERT_DISC_ADJ:PDWORD(@GPU_REGS.GB_CLIP.VERT_DISC_ADJ)^:=value; mmPA_CL_GB_HORZ_CLIP_ADJ:PDWORD(@GPU_REGS.GB_CLIP.HORZ_CLIP_ADJ)^:=value; @@ -1248,6 +1262,10 @@ begin mmDB_HTILE_SURFACE :DWORD(GPU_REGS.DEPTH.HTILE_SURFACE ):=value; + mmDB_EQAA :DWORD(GPU_REGS.DEPTH.EQAA):=value; + + mmDB_COUNT_CONTROL :DWORD(GPU_REGS.DEPTH.COUNT_CONTROL):=value; + mmVGT_SHADER_STAGES_EN :DWORD(GPU_REGS.VGT_SHADER_STAGES_EN) :=value; mmVGT_OUT_DEALLOC_CNTL :DWORD(GPU_REGS.VGT_OUT_DEALLOC_CNTL) :=value; @@ -1262,10 +1280,16 @@ begin mmVGT_OUTPUT_PATH_CNTL:DWORD(GPU_REGS.VGT_OUTPUT_PATH_CNTL):=value; - //mmVGT_GS_MODE:value:=value; + mmVGT_GS_MODE:DWORD(GPU_REGS.VGT_GS_MODE):=value; + + mmVGT_GS_PER_ES:DWORD(GPU_REGS.VGT_GS_PER_ES):=value; + mmVGT_ES_PER_GS:DWORD(GPU_REGS.VGT_ES_PER_GS):=value; + mmVGT_GS_PER_VS:DWORD(GPU_REGS.VGT_GS_PER_VS):=value; mmPA_SU_POLY_OFFSET_DB_FMT_CNTL:DWORD(GPU_REGS.PA_SU_POLY_OFFSET_DB_FMT_CNTL):=value; + mmIA_MULTI_VGT_PARAM:DWORD(GPU_REGS.IA_MULTI_VGT_PARAM):=value; + {$ifdef ww}else Writeln('SetContextReg:',getRegName(reg),'=',HexStr(value,8));{$endif} end; @@ -1343,7 +1367,8 @@ end; const CONTEXT_REG_BASE = $A000; - CONTEXT_SPACE_START=$0000a000; + CONTEXT_REG_END = $A400; + CONTEXT_REG_SIZE = CONTEXT_REG_END - CONTEXT_REG_BASE; procedure onSetContextReg(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDSETDATA); var @@ -1357,7 +1382,7 @@ begin r:=CONTEXT_REG_BASE+Body^.REG_OFFSET+i; v:=PDWORD(@Body^.REG_DATA)[i]; - //{$ifdef ww}Writeln('SetContextReg:',getRegName(r),'=',HexStr(v,8));{$endif} + {$ifdef ww}Writeln('SetContextReg:',getRegName(r),'=',HexStr(v,8));{$endif} //Continue; Inc(GFXRing.SetCxCount); @@ -1368,7 +1393,9 @@ begin end; const - PERSISTENT_SPACE_START=$00002c00; + SH_REG_BASE = $2C00; + SH_REG_END = $3000; + SH_REG_SIZE = SH_REG_END - SH_REG_BASE; procedure onSetShReg(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDSETDATA); var @@ -1379,10 +1406,10 @@ begin if c<>0 then For i:=0 to c-1 do begin - r:=PERSISTENT_SPACE_START+Body^.REG_OFFSET+i; + r:=SH_REG_BASE+Body^.REG_OFFSET+i; v:=PDWORD(@Body^.REG_DATA)[i]; - //{$ifdef ww}Writeln('SetShReg:',getRegName(r),'=',HexStr(v,8));{$endif} + {$ifdef ww}Writeln('SetShReg:',getRegName(r),'=',HexStr(v,8));{$endif} //Continue; Inc(GFXRing.SetShCount); @@ -1397,7 +1424,9 @@ type PGRBM_GFX_INDEX=^TGRBM_GFX_INDEX; Const - UCONFIG_SPACE_START=$0000c000; + USERCONFIG_REG_BASE = $0C000; + USERCONFIG_REG_END = $10000; + USERCONFIG_REG_SIZE = USERCONFIG_REG_END - USERCONFIG_REG_BASE; procedure onSetUConfigReg(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDSETDATA); var @@ -1415,10 +1444,10 @@ begin if c<>0 then For i:=0 to c-1 do begin - r:=UCONFIG_SPACE_START{ $C000}+Body^.REG_OFFSET+i; + r:=USERCONFIG_REG_BASE+Body^.REG_OFFSET+i; v:=PDWORD(@Body^.REG_DATA)[i]; - //{$ifdef ww}Writeln('SetUConfigReg:',getRegName(r),'=',HexStr(v,8));{$endif} + {$ifdef ww}Writeln('SetUConfigReg:',getRegName(r),'=',HexStr(v,8));{$endif} SetUContextReg(r,v); @@ -1459,13 +1488,132 @@ begin Free; end; +procedure ClearRenderTarget; +var + RT_INFO:TRT_INFO; + ri:TvImage2; + range:TVkImageSubresourceRange; +begin + Assert(DWORD(GPU_REGS.TARGET_MASK)=$F); + + RT_INFO:=GPU_REGS.GET_RT_INFO(0); + + GFXRing.AllocCmdBuffer; + GFXRing.CmdBuffer.EndRenderPass; + + ri:=FetchImage(GFXRing.CmdBuffer, + RT_INFO.FImageInfo, + ord(VK_IMAGE_USAGE_SAMPLED_BIT) or + ord(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) or + ord(VK_IMAGE_USAGE_TRANSFER_SRC_BIT) or + ord(VK_IMAGE_USAGE_TRANSFER_DST_BIT), + TM_CLEAR + ); + + ri.PushBarrier(GFXRing.CmdBuffer, + ord(VK_ACCESS_TRANSFER_WRITE_BIT), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + ord(VK_PIPELINE_STAGE_TRANSFER_BIT)); + + range:=ri.GetSubresRange; + + GFXRing.CmdBuffer.ClearColorImage(ri.FHandle, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + @RT_INFO.CLEAR_COLOR, + 1,@range); +end; + +procedure ResolveRenderTarget; +var + RT_INFO_SRC,RT_INFO_DST:TRT_INFO; + ri_src,ri_dst:TvImage2; + vport:TVkRect2D; + range:TVkImageResolve; +begin + Assert(DWORD(GPU_REGS.TARGET_MASK)=$F); + + RT_INFO_SRC:=GPU_REGS.GET_RT_INFO(0); + RT_INFO_DST:=GPU_REGS.GET_RT_INFO(1); + + vport:=GPU_REGS.GET_SCISSOR(0); + + GFXRing.AllocCmdBuffer; + GFXRing.CmdBuffer.EndRenderPass; + + ri_src:=FetchImage(GFXRing.CmdBuffer, + RT_INFO_SRC.FImageInfo, + ord(VK_IMAGE_USAGE_SAMPLED_BIT) or + ord(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) or + ord(VK_IMAGE_USAGE_TRANSFER_SRC_BIT) or + ord(VK_IMAGE_USAGE_TRANSFER_DST_BIT), + {TM_READ}0 + ); + + ri_dst:=FetchImage(GFXRing.CmdBuffer, + RT_INFO_DST.FImageInfo, + ord(VK_IMAGE_USAGE_SAMPLED_BIT) or + ord(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) or + ord(VK_IMAGE_USAGE_TRANSFER_SRC_BIT) or + ord(VK_IMAGE_USAGE_TRANSFER_DST_BIT), + TM_WRITE + ); + + ri_src.PushBarrier(GFXRing.CmdBuffer, + ord(VK_ACCESS_TRANSFER_READ_BIT), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + ord(VK_PIPELINE_STAGE_TRANSFER_BIT)); + + ri_dst.PushBarrier(GFXRing.CmdBuffer, + ord(VK_ACCESS_TRANSFER_WRITE_BIT), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + ord(VK_PIPELINE_STAGE_TRANSFER_BIT)); + + range:=Default(TVkImageResolve); + + range.srcSubresource:=ri_src.GetSubresLayer; + range.dstSubresource:=ri_dst.GetSubresLayer; + + range.srcOffset.Create(vport.offset.x,vport.offset.y,0); + range.dstOffset:=range.srcOffset; + + range.extent.Create(vport.extent.width,vport.extent.height,1); + + GFXRing.CmdBuffer.ResolveImage( + ri_src.FHandle, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + ri_dst.FHandle, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1,@range); + +end; + +procedure ClearDepthTarget(ri:TvImage2;clr:TVkClearDepthStencilValue); +var + range:TVkImageSubresourceRange; +begin + ri.PushBarrier(GFXRing.CmdBuffer, + ord(VK_ACCESS_TRANSFER_WRITE_BIT), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + ord(VK_PIPELINE_STAGE_TRANSFER_BIT)); + + range:=ri.GetSubresRange; + + GFXRing.CmdBuffer.ClearDepthStencilImage(ri.FHandle, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + @clr, + 1,@range); + +end; + + + var LastSetShCount:ptruint; LastSetCxCount:ptruint; LastRenderCmd:TvRenderTargets; -procedure UpdateGpuRegsInfo; +function UpdateGpuRegsInfo:Boolean; var FAttrBuilder:TvAttrBuilder; FUniformBuilder:TvUniformBuilder; @@ -1486,8 +1634,6 @@ var range:TVkImageSubresourceRange; clr:TVkClearColorValue; - clr2:TVkClearDepthStencilValue; - BufferImageCopy:TVkBufferImageCopy; fdump_ps,fdump_vs:RawByteString; @@ -1504,7 +1650,10 @@ var ctx_change:Boolean; + Event:TvEvent2; + begin + Result:=True; {$ifdef null_rt}Exit;{$endif} @@ -1526,9 +1675,43 @@ begin {$ifdef ww}Writeln(fdump_vs);{$endif} {$ifdef ww}Writeln(fdump_ps);{$endif} - //if not GPU_REGS.COMP_ENABLE then Exit; + Case GPU_REGS.CB_COLOR_CONTROL.MODE of + CB_DISABLE : + begin + {$ifdef ww}Writeln('CB_DISABLE');{$endif} + Exit(False); + end; + CB_NORMAL :; //continue + CB_ELIMINATE_FAST_CLEAR: + begin + ClearRenderTarget; + Exit(False); + end; + CB_RESOLVE : + begin + ResolveRenderTarget; + Exit(False); + end; + CB_DECOMPRESS : + begin + {$ifdef ww}Writeln('CB_DECOMPRESS');{$endif} + Exit(False); + end; + CB_FMASK_DECOMPRESS : + begin + {$ifdef ww}Writeln('CB_FMASK_DECOMPRESS');{$endif} + Exit(False); + end; + CB_DCC_DECOMPRESS : + begin + {$ifdef ww}Writeln('CB_DCC_DECOMPRESS');{$endif} + Exit(False); + end; + else + Assert(false,'CB_COLOR_CONTROL.MODE:'+IntToStr(GPU_REGS.CB_COLOR_CONTROL.MODE)); + end; - if not (GPU_REGS.COMP_ENABLE or GPU_REGS.DB_ENABLE) then Exit; + if not (GPU_REGS.COMP_ENABLE or GPU_REGS.DB_ENABLE) then Exit(False); GFXRing.AllocCmdBuffer; @@ -1559,6 +1742,16 @@ begin FRenderCmd.FPipeline.SetPrimType(GPU_REGS.GET_PRIM_TYPE); FRenderCmd.FPipeline.SetBlendColors(@GPU_REGS.CB_BLEND_RGBA); + //FRenderCmd.FPipeline.multisampling.sampleShadingEnable :=VK_FALSE; + //FRenderCmd.FPipeline.multisampling.rasterizationSamples :=TVkSampleCountFlagBits(1 shl GPU_REGS.SC_AA_CONFIG.MSAA_NUM_SAMPLES); + //FRenderCmd.FPipeline.multisampling.minSampleShading :=1; + //FRenderCmd.FPipeline.multisampling.pSampleMask :=nil; + //FRenderCmd.FPipeline.multisampling.alphaToCoverageEnable:=VK_FALSE; + //FRenderCmd.FPipeline.multisampling.alphaToOneEnable :=VK_FALSE; + + //SC_MODE_CNTL_0 = { + //MSAA_ENABLE = 1, + FRenderCmd.FRenderArea:=GPU_REGS.GET_SCREEN; @@ -1576,7 +1769,39 @@ begin begin RT_INFO:=GPU_REGS.GET_RT_INFO(i); - {$ifdef ww}Writeln('RT:',i,' ',HexStr(RT_INFO.FImageInfo.Addr));{$endif} + {$ifdef ww} + Writeln('RT:',i,' ',HexStr(RT_INFO.FImageInfo.Addr)); + Writeln(' TM_READ :',RT_INFO.IMAGE_USAGE and TM_READ <>0); + Writeln(' TM_WRITE:',RT_INFO.IMAGE_USAGE and TM_WRITE<>0); + Writeln(' TM_CLEAR:',RT_INFO.IMAGE_USAGE and TM_CLEAR<>0); + + Writeln(' format:',RT_INFO.FImageInfo.cformat); + Writeln(' size:',RT_INFO.FImageInfo.params.extend.width,'x',RT_INFO.FImageInfo.params.extend.height); + Writeln(' samples:',RT_INFO.FImageInfo.params.samples); + {$endif} + + //if (RT_INFO.FImageInfo.params.samples>ord(FRenderCmd.FPipeline.multisampling.rasterizationSamples)) then + //begin + // FRenderCmd.FPipeline.multisampling.rasterizationSamples:=TVkSampleCountFlagBits(RT_INFO.FImageInfo.params.samples); + //end; + + RT_INFO.FImageInfo.params.samples:=1; + + if (RT_INFO.IMAGE_USAGE and TM_READ)=0 then + begin + //RT_INFO.IMAGE_USAGE:=RT_INFO.IMAGE_USAGE or TM_CLEAR; + end; + + if (RT_INFO.IMAGE_USAGE and TM_CLEAR)=0 then + begin + RT_INFO.IMAGE_USAGE:=RT_INFO.IMAGE_USAGE or TM_READ; + end; + + if (RT_INFO.blend.blendEnable<>0) then + begin + //Exit(false); + //RT_INFO.IMAGE_USAGE:=RT_INFO.IMAGE_USAGE or TM_CLEAR; + end; //RT_INFO.IMAGE_USAGE:=RT_INFO.IMAGE_USAGE or TM_CLEAR; //RT_INFO.IMAGE_USAGE:=RT_INFO.IMAGE_USAGE and (not TM_READ); @@ -1590,18 +1815,15 @@ begin RT_INFO.IMAGE_USAGE ); - //ri.data_usage:=ri.data_usage and (not TM_READ); //reset read + //if (RT_INFO.blend.blendEnable=0) then + begin + //ri.data_usage:=ri.data_usage and (not TM_READ); //reset read + end; iv:=ri.FetchView(GFXRing.CmdBuffer,RT_INFO.FImageView); // - {$ifdef ww} - Writeln('TM_READ :',RT_INFO.IMAGE_USAGE and TM_READ <>0); - Writeln('TM_WRITE:',RT_INFO.IMAGE_USAGE and TM_WRITE<>0); - Writeln('TM_CLEAR:',RT_INFO.IMAGE_USAGE and TM_CLEAR<>0); - {$endif} - //Writeln(hexstr(PDWORD(RT_INFO.FImageInfo.Addr)[0],8)); //writeln; @@ -1609,6 +1831,11 @@ begin //RT_INFO.IMAGE_USAGE:=RT_INFO.IMAGE_USAGE and (not TM_CLEAR); + ri.PushBarrier(GFXRing.CmdBuffer, + ord(VK_ACCESS_TRANSFER_READ_BIT), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + ord(VK_PIPELINE_STAGE_TRANSFER_BIT)); + ri.PushBarrier(GFXRing.CmdBuffer, GetColorAccessMask(RT_INFO.IMAGE_USAGE), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, @@ -1630,23 +1857,41 @@ begin //RT_INFO.blend.blendEnable:=0; FRenderCmd.FPipeline.AddBlend(RT_INFO.blend); + //RT_INFO.CLEAR_COLOR.float32[0]:=1; + //RT_INFO.CLEAR_COLOR.float32[1]:=0; + //RT_INFO.CLEAR_COLOR.float32[2]:=0; + //RT_INFO.CLEAR_COLOR.float32[3]:=1; + FRenderCmd.AddClearColor(TVkClearValue(RT_INFO.CLEAR_COLOR)); end; - if GPU_REGS.DB_ENABLE then + //if not GPU_REGS.COMP_ENABLE then Exit(false); + if GPU_REGS.DB_ENABLE {false} then begin DB_INFO:=GPU_REGS.GET_DB_INFO; + //if (DB_INFO.DEPTH_USAGE and TM_READ)=0 then + //begin + //DB_INFO.DEPTH_USAGE:=DB_INFO.DEPTH_USAGE or TM_CLEAR; + //end; + {$ifdef ww} Writeln('DB'); - Writeln('TM_READ :',DB_INFO.DEPTH_USAGE and TM_READ <>0); - Writeln('TM_WRITE:',DB_INFO.DEPTH_USAGE and TM_WRITE<>0); - Writeln('TM_CLEAR:',DB_INFO.DEPTH_USAGE and TM_CLEAR<>0); + Writeln(' TM_READ :',DB_INFO.DEPTH_USAGE and TM_READ <>0); + Writeln(' TM_WRITE:',DB_INFO.DEPTH_USAGE and TM_WRITE<>0); + Writeln(' TM_CLEAR:',DB_INFO.DEPTH_USAGE and TM_CLEAR<>0); + + Writeln(' format:',DB_INFO.FImageInfo.cformat); + Writeln(' size:',DB_INFO.FImageInfo.params.extend.width,'x',DB_INFO.FImageInfo.params.extend.height); + Writeln(' samples:',DB_INFO.FImageInfo.params.samples); + {$endif} //DB_INFO.DEPTH_USAGE:={TM_CLEAR or} TM_READ or TM_WRITE; + DB_INFO.FImageInfo.params.samples:=1; + //DB_INFO.FImageInfo.params.samples:=ord(FRenderCmd.FPipeline.multisampling.rasterizationSamples); ri:=FetchImage(GFXRing.CmdBuffer, DB_INFO.FImageInfo, @@ -1661,22 +1906,15 @@ begin iv:=ri.FetchView(GFXRing.CmdBuffer); + ri.PushBarrier(GFXRing.CmdBuffer, + ord(VK_ACCESS_TRANSFER_READ_BIT), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + ord(VK_PIPELINE_STAGE_TRANSFER_BIT)); + if not GPU_REGS.COMP_ENABLE then begin - ri.PushBarrier(GFXRing.CmdBuffer, - ord(VK_ACCESS_TRANSFER_WRITE_BIT), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - ord(VK_PIPELINE_STAGE_TRANSFER_BIT)); - - range:=iv.GetSubresRange; - clr2:=DB_INFO.CLEAR_VALUE.depthStencil; - - GFXRing.CmdBuffer.ClearDepthStencilImage(ri.FHandle, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - @clr2, - 1,@range); - - Exit; + ClearDepthTarget(ri,DB_INFO.CLEAR_VALUE.depthStencil); + Exit(False); end; ri.PushBarrier(GFXRing.CmdBuffer, @@ -1690,7 +1928,9 @@ begin //Writeln('colorAttachmentCount:',FRenderCmd.FRenderPass.subpass.colorAttachmentCount); //Writeln('AtCount:',FRenderCmd.FRenderPass.AtCount); - FRenderCmd.FRenderPass.SetDepthStencilRef(FRenderCmd.FRenderPass.subpass.colorAttachmentCount,DB_INFO.DEPTH_USAGE,DB_INFO.STENCIL_USAGE); + FRenderCmd.FRenderPass.SetDepthStencilRef( + FRenderCmd.FRenderPass.subpass.colorAttachmentCount, + DB_INFO.DEPTH_USAGE,DB_INFO.STENCIL_USAGE); //if not GPU_REGS.COMP_ENABLE then //begin @@ -1725,7 +1965,7 @@ begin {$ifdef ww}Writeln('[FVSShader]');{$endif} FVSShader:=FetchShader(vShaderStageVs,0,GPU_REGS); - if (FVSShader=nil) then Exit; + if (FVSShader=nil) then Exit(False); FAttrBuilder:=Default(TvAttrBuilder); FVSShader.EnumVertLayout(@FAttrBuilder.AddAttr,FVSShader.FDescSetId,@GPU_REGS.SPI.VS.USER_DATA); @@ -1743,7 +1983,7 @@ begin {$ifdef ww}Writeln('[FPSShader]');{$endif} FPSShader:=FetchShader(vShaderStagePs,1,GPU_REGS); - if (FPSShader=nil) then Exit; + //if (FPSShader=nil) then Exit; FShadersKey:=Default(TvShadersKey); FShadersKey.SetVSShader(FVSShader); @@ -1792,6 +2032,25 @@ begin end; end; +{ + Event:=TvEvent2.Create; + GFXRing.CmdBuffer.AddDependence(@Event.Release); + + vkCmdSetEvent( + GFXRing.CmdBuffer.cmdbuf, + Event.FHandle, + ord(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)); + + vkCmdWaitEvents( + GFXRing.CmdBuffer.cmdbuf, + 1,@Event.FHandle, + ord(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT), + ord(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT), + 0,nil, + 0,nil, + 0,nil); + } + if ctx_change then begin if not GFXRing.CmdBuffer.BeginRenderPass(FRenderCmd) then @@ -1860,7 +2119,7 @@ begin iv:=ri.FetchView(GFXRing.CmdBuffer,FView); - ri.data_usage:=ri.data_usage and (not TM_READ); //////// + //ri.data_usage:=ri.data_usage and (not TM_READ); //////// FDescriptorGroup.FSets[fset].BindImg(bind,0, iv.FHandle, @@ -2028,11 +2287,11 @@ begin //drawInitiator:TVGT_DRAW_INITIATOR; - UpdateGpuRegsInfo; - - Addr:=getIndexAddress(GPU_REGS.VGT_DMA.BASE_LO,GPU_REGS.VGT_DMA.BASE_HI); - - GFXRing.CmdBuffer.DrawIndex2(Addr,GPU_REGS.VGT_DMA.INDICES,GPU_REGS.GET_INDEX_TYPE); + if UpdateGpuRegsInfo then + begin + Addr:=getIndexAddress(GPU_REGS.VGT_DMA.BASE_LO,GPU_REGS.VGT_DMA.BASE_HI); + GFXRing.CmdBuffer.DrawIndex2(Addr,GPU_REGS.VGT_DMA.INDICES,GPU_REGS.GET_INDEX_TYPE); + end; {$ifdef ww}Writeln('DrawIndex:',Body^.indexCount);{$endif} @@ -2043,9 +2302,10 @@ procedure onDrawIndexAuto(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDDRAWINDEXAUTO); begin GPU_REGS.VGT_DMA.INDICES:=Body^.indexCount; - UpdateGpuRegsInfo; - - GFXRing.CmdBuffer.DrawIndexAuto(GPU_REGS.VGT_DMA.INDICES); + if UpdateGpuRegsInfo then + begin + GFXRing.CmdBuffer.DrawIndexAuto(GPU_REGS.VGT_DMA.INDICES); + end; {$ifdef ww}Writeln('onDrawIndexAuto:',Body^.indexCount);{$endif} @@ -2094,6 +2354,7 @@ begin i:=0; s:=node^.dcbSizesInBytes[n]; P:=PByte(node^.dcbGpuAddrs[n]); + While (inil) do begin - Assert(pNode^.dtype<>dtUnknow,'WTF'); + + if (pNode^.dtype=dtUnknow) then + begin + Case pNode^.size of + 1:pNode^.dtype:=dtUint8; + 2:pNode^.dtype:=dtHalf16; + 4:pNode^.dtype:=dtFloat32; + 8:pNode^.dtype:=dtVec2f; + 16:pNode^.dtype:=dtVec4f; + else + Assert(false); + end; + end; + if (Foffsetb) then Result:=a else Result:=b; +end; + Procedure TvFlip.SetImageSize(width,height:DWORD); var buf:PvFlipBuffer; memr:TVkMemoryRequirements; + memr2:TVkMemoryRequirements; begin buf:=@FBuffers[FcurrentBuffer]; @@ -449,18 +455,28 @@ begin buf^.Extent, ord(VK_IMAGE_USAGE_STORAGE_BIT) or ord(VK_IMAGE_USAGE_TRANSFER_SRC_BIT) or - ord(VK_IMAGE_USAGE_TRANSFER_DST_BIT) + ord(VK_IMAGE_USAGE_TRANSFER_DST_BIT), + ord(VK_IMAGE_CREATE_ALIAS_BIT) ); + memr:=buf^.DstImgNORM.GetRequirements; + if (sformat<>VK_FORMAT_UNDEFINED) then + begin buf^.DstImgSRGB:=TvDeviceImage2D.Create( sformat, buf^.Extent, ord(VK_IMAGE_USAGE_TRANSFER_SRC_BIT) or - ord(VK_IMAGE_USAGE_TRANSFER_DST_BIT) + ord(VK_IMAGE_USAGE_TRANSFER_DST_BIT), + ord(VK_IMAGE_CREATE_ALIAS_BIT) ); - memr:=buf^.DstImgNORM.GetRequirements; + memr2:=buf^.DstImgSRGB.GetRequirements; + + memr.size :=Max(memr.size ,memr2.size); + memr.alignment :=Max(memr.alignment,memr2.alignment); + memr.memoryTypeBits:=memr.memoryTypeBits and memr2.memoryTypeBits; + end; //Writeln(buf^.DstImg.GetDedicatedAllocation); @@ -736,6 +752,8 @@ begin end else begin + ur.data_usage:=ur.data_usage and (not TM_READ); //reset read + ur.PushBarrier(buf^.cmdbuf, ord(VK_ACCESS_TRANSFER_READ_BIT), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, diff --git a/vulkan/vImage.pas b/vulkan/vImage.pas index 3a39e9ad..605454bf 100644 --- a/vulkan/vImage.pas +++ b/vulkan/vImage.pas @@ -110,14 +110,15 @@ type TvImage=class(TvCustomImage) FFormat:TVkFormat; - FUsage:TVkFlags; FExtent:TVkExtent3D; + FUsage:TVkFlags; + Fflags:TVkImageCreateFlags; Barrier:TvImageBarrier; - Constructor Create(format:TVkFormat;extent:TVkExtent3D;usage:TVkFlags;ext:Pointer=nil); + Constructor Create(format:TVkFormat;extent:TVkExtent3D;usage:TVkFlags;flags:TVkImageCreateFlags;ext:Pointer=nil); function GetImageInfo:TVkImageCreateInfo; override; function GetViewInfo:TVkImageViewCreateInfo; virtual; abstract; function NewView:TvImageView; - //function NewViewF(Format:TVkFormat):TvImageView; + function NewViewF(Format:TVkFormat):TvImageView; procedure PushBarrier(cmd:TVkCommandBuffer; range:TVkImageSubresourceRange; dstAccessMask:TVkAccessFlags; @@ -462,11 +463,12 @@ begin Result:=True; end; -Constructor TvImage.Create(format:TVkFormat;extent:TVkExtent3D;usage:TVkFlags;ext:Pointer=nil); +Constructor TvImage.Create(format:TVkFormat;extent:TVkExtent3D;usage:TVkFlags;flags:TVkImageCreateFlags;ext:Pointer=nil); begin FFormat:=format; - FUsage:=usage; FExtent:=extent; + FUsage:=usage; + Fflags:=flags; Barrier.Init; Compile(ext); end; @@ -477,9 +479,15 @@ begin Result.format:=FFormat; Result.extent:=FExtent; Result.usage :=FUsage; + Result.flags :=Fflags; end; function TvImage.NewView:TvImageView; +begin + Result:=NewViewF(FFormat); +end; + +function TvImage.NewViewF(Format:TVkFormat):TvImageView; var cinfo:TVkImageViewCreateInfo; FImg:TVkImageView; @@ -487,7 +495,8 @@ var begin Result:=nil; cinfo:=GetViewInfo; - cinfo.image:=FHandle; + cinfo.image :=FHandle; + cinfo.format:=Format; FImg:=VK_NULL_HANDLE; r:=vkCreateImageView(Device.FHandle,@cinfo,nil,@FImg); if (r<>VK_SUCCESS) then @@ -531,29 +540,6 @@ begin dstStageMask); end; -{ -function TvImage.NewViewF(Format:TVkFormat):TvImageView; -var - cinfo:TVkImageViewCreateInfo; - FImg:TVkImageView; - r:TVkResult; -begin - Result:=nil; - cinfo:=GetViewInfo; - cinfo.image :=FHandle; - cinfo.format:=Format; - FImg:=VK_NULL_HANDLE; - r:=vkCreateImageView(Device.FHandle,@cinfo,nil,@FImg); - if (r<>VK_SUCCESS) then - begin - Writeln('vkCreateImageView:',r); - Exit; - end; - Result:=TvImageView.Create; - Result.FHandle:=FImg; -end; -} - Procedure TvImageView.Acquire; begin System.InterlockedIncrement(Pointer(FRefs)); diff --git a/vulkan/vImageTiling.pas b/vulkan/vImageTiling.pas index 55903e8c..bd1623e2 100644 --- a/vulkan/vImageTiling.pas +++ b/vulkan/vImageTiling.pas @@ -24,21 +24,45 @@ implementation uses vImageManager; -Function getFormatSize(cformat:TVkFormat):Byte; +Function getFormatSize(cformat:TVkFormat):Byte; //in bytes begin Result:=0; Case cformat of - VK_FORMAT_R8G8B8A8_SRGB :Result:=4; - VK_FORMAT_R8G8B8A8_UNORM :Result:=4; - VK_FORMAT_R8G8_UNORM :Result:=2; - VK_FORMAT_R8_UNORM :Result:=1; - VK_FORMAT_R8_UINT :Result:=4; - VK_FORMAT_R5G6B5_UNORM_PACK16:Result:=2; + //pixel size + VK_FORMAT_R8G8B8A8_SRGB :Result:=4; + VK_FORMAT_R8G8B8A8_UNORM :Result:=4; + VK_FORMAT_R8G8_UNORM :Result:=2; + VK_FORMAT_R8_UNORM :Result:=1; + VK_FORMAT_R8_UINT :Result:=4; + VK_FORMAT_R5G6B5_UNORM_PACK16 :Result:=2; + + //texel size + VK_FORMAT_BC1_RGB_UNORM_BLOCK.. + VK_FORMAT_BC1_RGBA_SRGB_BLOCK, + VK_FORMAT_BC4_UNORM_BLOCK.. + VK_FORMAT_BC4_SNORM_BLOCK :Result:=8; + + VK_FORMAT_BC2_UNORM_BLOCK.. + VK_FORMAT_BC3_SRGB_BLOCK, + VK_FORMAT_BC5_UNORM_BLOCK.. + VK_FORMAT_BC7_SRGB_BLOCK :Result:=16; + else Assert(false,'TODO'); end; end; +function IsTexelFormat(cformat:TVkFormat):Boolean; +begin + Case cformat of + VK_FORMAT_BC1_RGB_UNORM_BLOCK.. + VK_FORMAT_BC7_SRGB_BLOCK: + Result:=True; + else + Result:=False; + end; +end; + { Procedure _Load_Linear(cmd:TvCustomCmdBuffer;image:TvImage2); var @@ -238,10 +262,13 @@ var //tp:TilingParameters; tiler:Tiler1d; //mtm:Byte; - size,i,x,y:QWORD; + size,i,x,y,z:QWORD; + m_bytePerElement:Word; m_bitsPerElement:Word; + m_slice_size:DWORD; + //m_macroTileWidth :DWORD; //m_macroTileHeight:DWORD; @@ -276,23 +303,29 @@ begin //tiler:=Default(Tiler2d); //tiler.init(tp); - Case image.key.cformat of - VK_FORMAT_BC1_RGB_UNORM_BLOCK, - VK_FORMAT_BC1_RGB_SRGB_BLOCK, - VK_FORMAT_BC1_RGBA_UNORM_BLOCK, - VK_FORMAT_BC1_RGBA_SRGB_BLOCK, - VK_FORMAT_BC3_UNORM_BLOCK, - VK_FORMAT_BC3_SRGB_BLOCK: - begin - _Load_Linear(cmd,image); - Exit; - end; - else - end; + //[kDataFormatBc3UnormSrgb] + //m_minGpuMode:0 + //m_tileMode:13 + //m_arrayMode:2 + //m_linearWidth:128 + //m_linearHeight:128 + //m_linearDepth:1 + //m_paddedWidth:128 + //m_paddedHeight:128 + //m_paddedDepth:1 + //m_bitsPerElement:128 + //m_linearSizeBytes:262144 + //m_tiledSizeBytes:262144 + //m_microTileMode:1 + //m_tileThickness:1 + //m_tileBytes:1024 + //m_tilesPerRow:16 + //m_tilesPerSlice:256 tiler:=Texture2d_32; - m_bitsPerElement:=getFormatSize(image.key.cformat)*8; + m_bytePerElement:=getFormatSize(image.key.cformat); + m_bitsPerElement:=m_bytePerElement*8; tiler.m_bitsPerElement:=m_bitsPerElement; @@ -300,26 +333,41 @@ begin tiler.m_linearHeight:=image.key.params.extend.height; tiler.m_linearDepth :=image.key.params.extend.depth; - tiler.m_linearSizeBytes:=tiler.m_linearWidth*tiler.m_linearHeight*tiler.m_linearDepth*(m_bitsPerElement div 8); + if IsTexelFormat(image.key.cformat) then + begin + tiler.m_linearWidth :=(tiler.m_linearWidth +3) div 4; + tiler.m_linearHeight:=(tiler.m_linearHeight+3) div 4; + tiler.m_linearDepth :=(tiler.m_linearDepth +3) div 4; + end; + + tiler.m_linearSizeBytes:=tiler.m_linearWidth*tiler.m_linearHeight*tiler.m_linearDepth*m_bytePerElement; tiler.m_tileBytes := (kMicroTileWidth * kMicroTileHeight * tiler.m_tileThickness * m_bitsPerElement + 7) div 8; + if IsTexelFormat(image.key.cformat) then + begin + tiler.m_paddedWidth :=tiler.m_linearWidth ; + tiler.m_paddedHeight:=tiler.m_linearHeight; + tiler.m_paddedDepth :=tiler.m_linearDepth ; + end else Case m_bitsPerElement of 32:begin tiler.m_paddedWidth :=((tiler.m_linearWidth +7) div 8)*8; tiler.m_paddedHeight:=((tiler.m_linearHeight+7) div 8)*8; + tiler.m_paddedDepth :=tiler.m_linearDepth; end; 8:begin tiler.m_paddedWidth :=((tiler.m_linearWidth +31) div 32)*32; - tiler.m_paddedHeight:=((tiler.m_linearHeight+7) div 8)*8; + tiler.m_paddedHeight:=((tiler.m_linearHeight+ 7) div 8)*8; + tiler.m_paddedDepth :=tiler.m_linearDepth; end; else Assert(false); end; - tiler.m_paddedDepth :=tiler.m_linearDepth; - tiler.m_tiledSizeBytes:=tiler.m_paddedWidth*tiler.m_paddedHeight*tiler.m_paddedDepth*(m_bitsPerElement div 8); + + tiler.m_tiledSizeBytes:=tiler.m_paddedWidth*tiler.m_paddedHeight*tiler.m_paddedDepth*m_bytePerElement; tiler.m_tilesPerRow:=tiler.m_paddedWidth div kMicroTileWidth; @@ -332,8 +380,10 @@ begin //m_tilesPerRow = m_paddedWidth / kMicroTileWidth; //m_tilesPerSlice = std::max(m_tilesPerRow * (m_paddedHeight / kMicroTileHeight), 1U); - size:=image.key.params.extend.width* - image.key.params.extend.height*(m_bitsPerElement div 8); + size:=tiler.m_linearWidth* + tiler.m_linearHeight* + tiler.m_linearDepth* + m_bytePerElement; buf:=TvTempBuffer.Create(size,ord(VK_BUFFER_USAGE_TRANSFER_SRC_BIT),nil); buf.Fhost:=MemManager.Alloc(buf.GetRequirements,ord(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); @@ -349,20 +399,19 @@ begin //pData:=AllocMem(size); - Assert(image.key.params.extend.depth=1); + m_slice_size:=(tiler.m_linearWidth*tiler.m_linearHeight); - For y:=0 to image.key.params.extend.height-1 do - For x:=0 to image.key.params.extend.width-1 do - begin - i:=0; - tiler.getTiledElementBitOffset(i,x,y,0); - i:=i div 8; - pSrc:=@PByte(image.key.Addr)[i]; - pDst:=@PByte(pData)[(y*image.key.params.extend.width+x)*(m_bitsPerElement div 8)]; - Move(pSrc^,pDst^,(m_bitsPerElement div 8)); - //i:=i div 4; - //pData[y*image.key.params.extend.width+x]:={Random($FFFFFFFF);}PDWORD(image.key.Addr)[i]; - end; + For z:=0 to tiler.m_linearDepth-1 do + For y:=0 to tiler.m_linearHeight-1 do + For x:=0 to tiler.m_linearWidth-1 do + begin + i:=0; + tiler.getTiledElementBitOffset(i,x,y,z); + i:=i div 8; + pSrc:=@PByte(image.key.Addr)[i]; + pDst:=@PByte(pData)[(z*m_slice_size+y*tiler.m_linearWidth+x)*m_bytePerElement]; + Move(pSrc^,pDst^,m_bytePerElement); + end; //Move(pData^,image.key.Addr^,size); //FreeMem(pData); diff --git a/vulkan/vMemory.pas b/vulkan/vMemory.pas index 59f1d105..a93d69bf 100644 --- a/vulkan/vMemory.pas +++ b/vulkan/vMemory.pas @@ -661,7 +661,7 @@ begin InitVulkan; //Some games request too much video memory, relevant for built-in iGPU - if (len>1024*1024*1024) then len:=1024*1024*1024; + //if (len>1024*1024*1024) then len:=1024*1024*1024; Result:=vkAllocHostPointer(Device.FHandle,len,MemManager.FHostVisibMt{FHostCacheMt},addr); Assert(Result<>VK_NULL_HANDLE); diff --git a/vulkan/vShaderExt.pas b/vulkan/vShaderExt.pas index 2e0e803b..45e01c6c 100644 --- a/vulkan/vShaderExt.pas +++ b/vulkan/vShaderExt.pas @@ -470,7 +470,10 @@ var begin Result:=nil; if (Length(addr)=0) then Exit; - pSharp:=nil; + + pSharp:=pData; + + if (Length(addr)>1) then For i:=High(addr)-1 downto 0 do begin pData:=pData+addr[i].offset; @@ -498,7 +501,7 @@ begin pData:=Pointer(PTSharpResource4(pData)^.base shl 8); end; else - Exit; + Assert(false); end; end; @@ -609,6 +612,7 @@ var b:TBufBindExt; i,stride:Integer; begin + Assert(PV<>nil); if (PV=nil) then Exit; //print_vsharp(PV); @@ -634,6 +638,7 @@ var b:TBufBindExt; i:Integer; begin + Assert(P<>nil); if (P=nil) or (size=0) then Exit; b:=Default(TBufBindExt); @@ -654,6 +659,7 @@ var b:TImageBindExt; i:Integer; begin + Assert(PT<>nil); if (PT=nil) then Exit; //print_tsharp4(PT); @@ -675,6 +681,7 @@ var P:Pointer; begin P:=GetSharpByPatch(FData,b.addr); + Assert(P<>nil); if (P=nil) then Exit; Case TVkDescriptorType(b.dtype) of @@ -729,6 +736,7 @@ var b:TSamplerBindExt; i:Integer; begin + Assert(PS<>nil); if (PS=nil) then Exit; //print_ssharp4(PS); diff --git a/vulkan/vShaderManager.pas b/vulkan/vShaderManager.pas index 27600ad0..9a98dff8 100644 --- a/vulkan/vShaderManager.pas +++ b/vulkan/vShaderManager.pas @@ -202,7 +202,8 @@ begin SprvEmit.FBuffers.cfg.minStorageBufferOffsetAlignment:=limits.minStorageBufferOffsetAlignment; // $10 SprvEmit.FBuffers.cfg.minUniformBufferOffsetAlignment:=limits.minUniformBufferOffsetAlignment; // $100 - SprvEmit.FBuffers.cfg.maxPushConstantsSize:=0; + SprvEmit.FBuffers.cfg.maxPushConstantsSize:=16*4; + SprvEmit.FBuffers.cfg.maxPushConstantsSize:=12; //SprvEmit.FUseVertexInput:=False; if (SprvEmit.Parse(pData)>1) then