diff --git a/chip/pm4_pfp.pas b/chip/pm4_pfp.pas index cc1498fb..d84c41ef 100644 --- a/chip/pm4_pfp.pas +++ b/chip/pm4_pfp.pas @@ -1689,13 +1689,13 @@ begin pctx^.Flush_stream(stGfxDcb); end; -procedure onPushMarker(pctx:p_pfp_ctx;Body:PChar); +procedure onPushMarker(pctx:p_pfp_ctx;Body:PChar;size:Integer); begin if p_print_gpu_hint then begin Writeln('\HINT_PUSH_MARKER:',Body); end; - pctx^.stream[pctx^.stream_type].Hint('\HINT_PUSH_MARKER:',Body); + pctx^.stream[pctx^.stream_type].Hint('\HINT_PUSH_MARKER:',Body,size); end; procedure onPopMarker(pctx:p_pfp_ctx); @@ -1704,16 +1704,25 @@ begin begin Writeln('\HINT_POP_MARKER'); end; - pctx^.stream[pctx^.stream_type].Hint('\HINT_POP_MARKER',''); + pctx^.stream[pctx^.stream_type].Hint('\HINT_POP_MARKER','',0); end; -procedure onSetMarker(pctx:p_pfp_ctx;Body:PChar); +procedure onSetMarker(pctx:p_pfp_ctx;Body:PChar;size:Integer); begin if p_print_gpu_hint then begin Writeln('\HINT_SET_MARKER:',Body); end; - pctx^.stream[pctx^.stream_type].Hint('\HINT_SET_MARKER:',Body); + pctx^.stream[pctx^.stream_type].Hint('\HINT_SET_MARKER:',Body,size); +end; + +procedure onMarker(pctx:p_pfp_ctx;Body:PChar;size:Integer); +begin + if p_print_gpu_hint then + begin + Writeln('\HINT_MARKER'); + end; + pctx^.stream[pctx^.stream_type].Hint('\HINT_MARKER','',0); end; procedure onWidthHeight(Body:PWORD); @@ -1796,7 +1805,7 @@ begin OP_HINT_PUSH_MARKER: begin - onPushMarker(pctx,@Body[2]); + onPushMarker(pctx,@Body[2],PM4_LENGTH(Body[0]) - 8); end; OP_HINT_POP_MARKER: @@ -1804,10 +1813,14 @@ begin onPopMarker(pctx); end; - OP_HINT_SET_MARKER, + OP_HINT_SET_MARKER: + begin + onSetMarker(pctx,@Body[2],PM4_LENGTH(Body[0]) - 8); + end; + OP_HINT_MARKER: begin - onSetMarker(pctx,@Body[2]); + onMarker(pctx,@Body[2],PM4_LENGTH(Body[0]) - 8); end; OP_HINT_PREPARE_FLIP_LABEL: diff --git a/chip/pm4_stream.pas b/chip/pm4_stream.pas index d4695c1e..1b0d4039 100644 --- a/chip/pm4_stream.pas +++ b/chip/pm4_stream.pas @@ -226,14 +226,14 @@ type init_scope:t_pm4_resource_init_scope; // function find_image_resource (const rkey:TvImageKey):p_pm4_resource; - function fetch_image_resource (const rkey:TvImageKey):p_pm4_resource; + function fetch_image_resource (const rkey:TvImageKey;hint:PChar):p_pm4_resource; function find_buffer_resource (addr:Pointer;size:DWORD):p_pm4_resource; - function fetch_buffer_resource (addr:Pointer;size:DWORD):p_pm4_resource; + function fetch_buffer_resource (addr:Pointer;size:DWORD;hint:PChar):p_pm4_resource; function find_htile_resource (addr:Pointer;size:DWORD):p_pm4_resource; function fetch_htile_resource (const rkey:TvImageKey;size:DWORD):p_pm4_resource; function fetch_resource_instance (scope:p_pm4_resource_curr_scope;r:p_pm4_resource;mem_usage:Integer;img_usage:s_image_usage):p_pm4_resource_instance; - function insert_image_resource (scope:p_pm4_resource_curr_scope;const rkey:TvImageKey;mem_usage:Integer;img_usage:s_image_usage):p_pm4_resource_instance; - function insert_buffer_resource (scope:p_pm4_resource_curr_scope;addr:Pointer;size:DWORD;mem_usage:Integer):p_pm4_resource_instance; + function insert_image_resource (scope:p_pm4_resource_curr_scope;const rkey:TvImageKey;mem_usage:Integer;img_usage:s_image_usage;hint:PChar):p_pm4_resource_instance; + function insert_buffer_resource (scope:p_pm4_resource_curr_scope;addr:Pointer;size:DWORD;mem_usage:Integer;hint:PChar):p_pm4_resource_instance; function insert_htile_resource (scope:p_pm4_resource_curr_scope;const rkey:TvImageKey;size:DWORD;mem_usage:Integer):p_pm4_resource_instance; procedure connect_resource_instance (i:p_pm4_resource_instance); procedure connect_resource_scope (scope:p_pm4_resource_curr_scope); @@ -390,7 +390,7 @@ type procedure Acquire; function Release:Boolean; // - procedure Hint (P1,P2:PChar); + procedure Hint (P1,P2:PChar;maxsize:Integer); procedure LoadConstRam (addr:Pointer;num_dw,offset:Word); procedure IncrementCE (); procedure WaitOnDECounterDiff(diff:DWORD); @@ -594,7 +594,7 @@ begin Result:=resource_set.Find(@tmp); end; -function t_pm4_resource_stream_scope.fetch_image_resource(const rkey:TvImageKey):p_pm4_resource; +function t_pm4_resource_stream_scope.fetch_image_resource(const rkey:TvImageKey;hint:PChar):p_pm4_resource; var tmp:t_pm4_resource; begin @@ -608,6 +608,11 @@ begin begin tmp.rsize:=get_image_size(rkey); + if p_print_gpu_ops then + begin + Writeln('fetch_image_resource:0x',HexStr(rkey.Addr),' 0x',HexStr(tmp.rsize,4)); + end; + Result:=allocator.Alloc(SizeOf(t_pm4_resource)); Result^:=tmp; @@ -627,7 +632,7 @@ begin Result:=resource_set.Find(@tmp); end; -function t_pm4_resource_stream_scope.fetch_buffer_resource(addr:Pointer;size:DWORD):p_pm4_resource; +function t_pm4_resource_stream_scope.fetch_buffer_resource(addr:Pointer;size:DWORD;hint:PChar):p_pm4_resource; var tmp:t_pm4_resource; begin @@ -643,6 +648,11 @@ begin Result:=allocator.Alloc(SizeOf(t_pm4_resource)); Result^:=tmp; + if p_print_gpu_ops then + begin + Writeln('fetch_buffer_resource(',hint,'):0x',HexStr(addr),' 0x',HexStr(size,4)); + end; + resource_set.Insert(Result); end; end; @@ -717,14 +727,14 @@ begin Result^.curr:=Result^.curr + curr; end; -function t_pm4_resource_stream_scope.insert_image_resource(scope:p_pm4_resource_curr_scope;const rkey:TvImageKey;mem_usage:Integer;img_usage:s_image_usage):p_pm4_resource_instance; +function t_pm4_resource_stream_scope.insert_image_resource(scope:p_pm4_resource_curr_scope;const rkey:TvImageKey;mem_usage:Integer;img_usage:s_image_usage;hint:PChar):p_pm4_resource_instance; var r:p_pm4_resource; i:p_pm4_resource_instance; begin if (rkey.cformat=VK_FORMAT_UNDEFINED) then Exit(nil); - r:=fetch_image_resource(rkey); + r:=fetch_image_resource(rkey,hint); i:=fetch_resource_instance(scope,r,mem_usage,img_usage); if ((mem_usage and TM_READ)<>0) then @@ -739,12 +749,12 @@ begin Result:=i; end; -function t_pm4_resource_stream_scope.insert_buffer_resource(scope:p_pm4_resource_curr_scope;addr:Pointer;size:DWORD;mem_usage:Integer):p_pm4_resource_instance; +function t_pm4_resource_stream_scope.insert_buffer_resource(scope:p_pm4_resource_curr_scope;addr:Pointer;size:DWORD;mem_usage:Integer;hint:PChar):p_pm4_resource_instance; var r:p_pm4_resource; i:p_pm4_resource_instance; begin - r:=fetch_buffer_resource(addr,size); + r:=fetch_buffer_resource(addr,size,hint); i:=fetch_resource_instance(scope,r,mem_usage,[iu_buffer]); if ((mem_usage and TM_READ)<>0) then @@ -900,20 +910,32 @@ end; // -procedure t_pm4_stream.Hint(P1,P2:PChar); +procedure t_pm4_stream.Hint(P1,P2:PChar;maxsize:Integer); var - len1,len2:ptruint; + len1,len2:Integer; node:p_pm4_node_Hint; begin len1:=StrLen(P1); - len2:=StrLen(P2); + + len2:=0; + while (maxsize<>0) do + begin + Inc(len2); + if (P2[len2]=#0) then + begin + Break; + end; + Dec(maxsize); + end; + node:=allocator.Alloc(SizeOf(t_pm4_node_Hint)+len1+len2+1); node^.ntype :=ntHint; node^.scope :=Default(t_pm4_resource_curr_scope); - Move(P1^,node^.data,len1); - Move(P2^,PChar(@node^.data)[len1],len2+1); + Move(P1^,PChar(@node^.data)[0] ,len1); + Move(P2^,PChar(@node^.data)[len1],len2); + PChar(@node^.data)[len1+len2]:=#0; add_node(node); end; @@ -933,7 +955,8 @@ begin insert_buffer_resource(@node^.scope, addr, num_dw*SizeOf(DWORD), - TM_READ); + TM_READ, + 'LoadConstRam'); add_node(node); end; @@ -1009,7 +1032,8 @@ begin insert_buffer_resource(@node^.scope, addr, get_data_size, - TM_WRITE); + TM_WRITE, + 'EventWriteEop'); end; add_node(node); @@ -1044,7 +1068,8 @@ begin insert_buffer_resource(@node^.scope, addr, get_data_size, - TM_WRITE); + TM_WRITE, + 'EventWriteEos'); end; add_node(node); @@ -1098,7 +1123,8 @@ begin insert_buffer_resource(@node^.scope, addr, get_data_size, - TM_WRITE); + TM_WRITE, + 'ReleaseMem'); end; add_node(node); @@ -1127,7 +1153,8 @@ begin insert_buffer_resource(@node^.scope, Pointer(srcOrData), numBytes, - TM_READ); + TM_READ, + 'DmaData'); end; else; end; @@ -1140,7 +1167,8 @@ begin insert_buffer_resource(@node^.scope, Pointer(dst), numBytes, - TM_WRITE); + TM_WRITE, + 'DmaData'); end; end; @@ -1172,7 +1200,8 @@ begin insert_buffer_resource(@node^.scope, src, num_dw*SizeOf(DWORD), - TM_READ); + TM_READ, + 'WriteData'); end; case dstSel of @@ -1184,7 +1213,8 @@ begin insert_buffer_resource(@node^.scope, Pointer(dst), num_dw*SizeOf(DWORD), - TM_WRITE); + TM_WRITE, + 'WriteData'); end; else; end; @@ -1323,7 +1353,8 @@ begin insert_image_resource(@node^.scope, FImage, memuse, - [iu_sampled]); + [iu_sampled], + 'Init_Uniforms'); end; vbStorage, vbMipStorage: @@ -1331,7 +1362,8 @@ begin insert_image_resource(@node^.scope, FImage, memuse, - [iu_storage]); + [iu_storage], + 'Init_Uniforms'); end; else Assert(false); @@ -1351,7 +1383,8 @@ begin insert_buffer_resource(@node^.scope, addr, size, - memuse); + memuse, + 'Init_Uniforms'); end; end; @@ -1380,7 +1413,8 @@ begin insert_buffer_resource(@node^.scope, addr, Shader.FPushConst.size, - TM_READ); + TM_READ, + 'Init_Pushs'); end; end; @@ -1418,7 +1452,8 @@ begin insert_image_resource(@node^.scope, RT.FImageInfo, RT.IMAGE_USAGE, - [iu_attachment]); + [iu_attachment], + 'Build_rt_info'); // @@ -1438,7 +1473,8 @@ begin resource_instance:=insert_image_resource(@node^.scope, GetDepthOnly(rt_info.DB_INFO.FImageInfo), rt_info.DB_INFO.DEPTH_USAGE, - [iu_depthstenc]); + [iu_depthstenc], + 'Build_rt_info'); if (resource_instance<>nil) then with resource_instance^.resource^ do @@ -1449,7 +1485,8 @@ begin resource_instance:=insert_image_resource(@node^.scope, GetStencilOnly(rt_info.DB_INFO.FImageInfo), rt_info.DB_INFO.STENCIL_USAGE, - [iu_depthstenc]); + [iu_depthstenc], + 'Build_rt_info'); if (resource_instance<>nil) then with resource_instance^.resource^ do diff --git a/vulkan/vPipelineManager.pas b/vulkan/vPipelineManager.pas index 3f7e2f51..ba3c13ec 100644 --- a/vulkan/vPipelineManager.pas +++ b/vulkan/vPipelineManager.pas @@ -320,6 +320,20 @@ begin viewportState.scissorCount :=Key.viewportCount; viewportState.pScissors :=@Key.Scissors; + pFeature:=nil; //init ext + + if limits.VK_EXT_depth_clip_control then + if (Key.ClipSpace=VK_TRUE) then + begin + ClipSpace:=Default(TVkPipelineViewportDepthClipControlCreateInfoEXT); + ClipSpace.sType :=VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT; + ClipSpace.negativeOneToOne:=Key.ClipSpace; + // + add_feature(@ClipSpace); + end; + + viewportState.pNext:=pFeature; //save ext + colorBlending:=Default(TVkPipelineColorBlendStateCreateInfo); colorBlending.sType :=VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; colorBlending.logicOpEnable :=ord(Key.colorBlending.logicOp<>ord(VK_LOGIC_OP_COPY)); @@ -339,7 +353,8 @@ begin end; rasterizer:=Key.rasterizer; - pFeature:=nil; + + pFeature:=nil; //init ext if limits.VK_EXT_provoking_vertex then begin @@ -350,16 +365,6 @@ begin add_feature(@ProvokingVertex); end; - if limits.VK_EXT_depth_clip_control then - if (Key.ClipSpace=VK_TRUE) then - begin - ClipSpace:=Default(TVkPipelineViewportDepthClipControlCreateInfoEXT); - ClipSpace.sType :=VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT; - ClipSpace.negativeOneToOne:=Key.ClipSpace; - // - add_feature(@ClipSpace); - end; - if limits.VK_EXT_depth_clip_enable then begin DepthClip:=Default(TVkPipelineRasterizationDepthClipStateCreateInfoEXT); @@ -369,7 +374,7 @@ begin add_feature(@DepthClip); end; - rasterizer.pNext:=pFeature; + rasterizer.pNext:=pFeature; //save ext info.sType :=VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; info.pStages :=@Stages; diff --git a/vulkan/vRegs2Vulkan.pas b/vulkan/vRegs2Vulkan.pas index d8764a0d..0c2e9304 100644 --- a/vulkan/vRegs2Vulkan.pas +++ b/vulkan/vRegs2Vulkan.pas @@ -6,6 +6,7 @@ interface uses sysutils, + math, bittype, half16, sys_bootparam, @@ -203,7 +204,6 @@ Function TGPU_REGS.GET_VPORT(i:Byte):TVkViewport; //0..15 var V:TVPORT_SCALE_OFFSET; VTE_CNTL:TPA_CL_VTE_CNTL; - reduce_z:Single; begin Result:=Default(TVkViewport); V:=CX_REG^.PA_CL_VPORT_SCALE_OFFSET[i]; @@ -221,23 +221,33 @@ begin Assert(VTE_CNTL.VTX_Z_FMT =0,'VTE_CNTL.VTX_Z_FMT' ); Assert(VTE_CNTL.VTX_W0_FMT=1,'VTE_CNTL.VTX_W0_FMT'); - if limits.VK_EXT_depth_clip_control and - (CX_REG^.PA_CL_CLIP_CNTL.DX_CLIP_SPACE_DEF=0) then - begin - //[-1..1] - reduce_z:=1; - end else - begin - //[0..1] - reduce_z:=0; - end; - Result.x :=V.XOFFSET-V.XSCALE; Result.y :=V.YOFFSET-V.YSCALE; Result.width :=V.XSCALE*2; Result.height :=V.YSCALE*2; - Result.minDepth:=V.ZOFFSET - V.ZSCALE * reduce_z; - Result.maxDepth:=V.ZOFFSET + V.ZSCALE; + + if limits.VK_EXT_depth_clip_control and //or emulate in shader? + (CX_REG^.PA_CL_CLIP_CNTL.DX_CLIP_SPACE_DEF=0) then + begin + //[-1..1] + Result.minDepth:=V.ZOFFSET-V.ZSCALE; + end else + begin + //[0..1] + Result.minDepth:=V.ZOFFSET; + end; + Result.maxDepth:=V.ZOFFSET+V.ZSCALE; + + if (not SameValue(Result.minDepth,CX_REG^.PA_SC_VPORT_ZMIN_MAX[i].ZMIN)) or + (not SameValue(Result.maxDepth,CX_REG^.PA_SC_VPORT_ZMIN_MAX[i].ZMAX)) then + begin + //VK_EXT_depth_clamp_control + //VkPipelineViewportDepthClampControlCreateInfoEXT + Writeln(stderr,'TODO:VK_EXT_depth_clamp_control'); + Writeln(stderr,' minDepth:',Result.minDepth,' ZMIN:',CX_REG^.PA_SC_VPORT_ZMIN_MAX[i].ZMIN); + Writeln(stderr,' maxDepth:',Result.maxDepth,' ZMAX:',CX_REG^.PA_SC_VPORT_ZMIN_MAX[i].ZMAX); + Assert(false,'TODO:VK_EXT_depth_clamp_control'); + end; end; Function _fix_scissor_range(i:Word):Word; @@ -1206,15 +1216,19 @@ begin if ((Result.DEPTH_USAGE and TM_CLEAR)=0) then begin Result.ds_state.depthCompareOp:=TVkCompareOp(DEPTH_CONTROL.ZFUNC); //1:1 + // + Result.ds_state.minDepthBounds:=PSingle(@CX_REG^.DB_DEPTH_BOUNDS_MIN)^; + Result.ds_state.maxDepthBounds:=PSingle(@CX_REG^.DB_DEPTH_BOUNDS_MAX)^; end else begin //force clear all - Result.ds_state.depthCompareOp:=VK_COMPARE_OP_NEVER; + Result.ds_state.depthCompareOp:=VK_COMPARE_OP_ALWAYS; + // + Result.ds_state.depthBoundsTestEnable:=VK_TRUE; + Result.ds_state.minDepthBounds:=Result.CLEAR_VALUE.depthStencil.depth; + Result.ds_state.maxDepthBounds:=Result.CLEAR_VALUE.depthStencil.depth; end; - Result.ds_state.minDepthBounds:=PSingle(@CX_REG^.DB_DEPTH_BOUNDS_MIN)^; - Result.ds_state.maxDepthBounds:=PSingle(@CX_REG^.DB_DEPTH_BOUNDS_MAX)^; - Assert(DEPTH_CONTROL.ENABLE_COLOR_WRITES_ON_DEPTH_FAIL =0,'ENABLE_COLOR_WRITES_ON_DEPTH_FAIL' ); Assert(DEPTH_CONTROL.DISABLE_COLOR_WRITES_ON_DEPTH_PASS=0,'DISABLE_COLOR_WRITES_ON_DEPTH_PASS');