From 5edf682c81cd58d50bb9fd071a18a634612c8bc0 Mon Sep 17 00:00:00 2001 From: Pavel <68122101+red-prig@users.noreply.github.com> Date: Tue, 6 Dec 2022 21:12:24 +0300 Subject: [PATCH] VOP1?13 and etc --- spirv/emit_exp.pas | 16 +++++++++++++--- spirv/emit_op.pas | 10 ++++++++++ spirv/emit_post_op.pas | 37 +++++++++++++++++++++++++++++++++++++ spirv/emit_vop1.pas | 18 ++++++++++++++++++ spirv/emit_vop2.pas | 18 ++++++++++++++++++ spirv/emit_vop3.pas | 40 +++++++++++++++++++++++++++------------- spirv/srType.pas | 20 ++++++++++++++++++++ spirv/srTypes.pas | 1 + 8 files changed, 144 insertions(+), 16 deletions(-) diff --git a/spirv/emit_exp.pas b/spirv/emit_exp.pas index b8194374..3fa116d3 100644 --- a/spirv/emit_exp.pas +++ b/spirv/emit_exp.pas @@ -38,15 +38,20 @@ Var rtype:TsrDataType; f,i,p:DWORD; + push_count:DWORD; begin //if (VM<>0) and (EXEC<>0) = set pixel else (if DONE=1) discard pixel /(PS only) + push_count:=0; + pOpBlock:=nil; if (FSPI.EXP.VM<>0) and (FSPI.EXP.DONE<>0) then begin pOpBlock:=AllocBlockOp; pOpBlock^.SetInfo(btOther,Cursor.Adr,Cursor.Adr); + PushBlockOp(line,pOpBlock,nil); + Inc(push_count); exc:=MakeRead(get_exec0,dtBool); node:=AddSpirvOp(OpMakeExp); @@ -57,16 +62,21 @@ begin if (TpsslExportType(FSPI.EXP.TGT)=etNull) //only set kill mask or (FSPI.EXP.EN=0) then //nop begin - if (pOpBlock<>nil) then //is pushed + + While (push_count<>0) do begin Main^.PopBlock; + Dec(push_count); end; + Exit; end; pOpBlock:=AllocBlockOp; //down pOpBlock^.SetInfo(btOther,Cursor.Adr,Cursor.Adr); + PushBlockOp(line,pOpBlock,nil); + Inc(push_count); //output @@ -172,10 +182,10 @@ begin OpStore(line,dout,dst); end; - if (pOpBlock<>nil) then //is pushed + While (push_count<>0) do begin Main^.PopBlock; - Main^.PopBlock; + Dec(push_count); end; end; diff --git a/spirv/emit_op.pas b/spirv/emit_op.pas index 0f4f95cd..24b5671c 100644 --- a/spirv/emit_op.pas +++ b/spirv/emit_op.pas @@ -141,6 +141,8 @@ type function OpAndTo(src0,src1:PsrRegNode;ppLine:PPspirvOp=nil):PsrRegNode; function OpAndTo(src0:PsrRegNode;src1:QWORD;ppLine:PPspirvOp=nil):PsrRegNode; // + function OpBitCountTo(src:PsrRegNode;ppLine:PPspirvOp=nil):PsrRegNode; + // function OpImageSampleImplicitLod(pLine:PspirvOp;img:PsrNode;dst,coord:PsrRegNode):PSpirvOp; function OpImageSampleExplicitLod(pLine:PspirvOp;img:PsrNode;dst,coord:PsrRegNode):PSpirvOp; function OpImageSampleDrefImplicitLod(pLine:PspirvOp;img:PsrNode;dst,coord,pcf:PsrRegNode):PSpirvOp; @@ -1259,6 +1261,14 @@ end; // +function TEmitOp.OpBitCountTo(src:PsrRegNode;ppLine:PPspirvOp=nil):PsrRegNode; +begin + Result:=NewReg(src^.dtype); + _set_line(ppLine,_Op1(_get_line(ppLine),Op.OpBitCount,Result,src)); +end; + +// + function TEmitOp.OpImageSampleImplicitLod(pLine:PspirvOp;img:PsrNode;dst,coord:PsrRegNode):PSpirvOp; Var node:PSpirvOp; diff --git a/spirv/emit_post_op.pas b/spirv/emit_post_op.pas index 7f77f09d..d18ef686 100644 --- a/spirv/emit_post_op.pas +++ b/spirv/emit_post_op.pas @@ -49,6 +49,8 @@ type function OnNot1(node:PSpirvOp):Integer; function OnBranchConditional1(node:PSpirvOp):Integer; // + function OpBitCount1(node:PSpirvOp):Integer; + // function OnSelect1(node:PSpirvOp):Integer; // procedure MakeVecConst(rtype:TsrDataType;dst:PsrRegNode;src:PPsrRegNode); @@ -103,6 +105,8 @@ begin Op.OpBranchConditional:Result:=OnBranchConditional1(node); + Op.OpBitCount :Result:=OpBitCount1(node); + else; end; end; @@ -779,6 +783,39 @@ begin Inc(Result); end; +function TEmitPostOp.OpBitCount1(node:PSpirvOp):Integer; +var + dst,src:PsrRegNode; + data:QWORD; + + procedure _SetConst(dtype:TsrDataType;value:QWORD); + begin + dst^.pWriter:=ConstList.Fetch(dtype,value); + node^.mark_not_used; + node^.pDst:=nil; + Inc(Result); + end; + +begin + Result:=0; + dst:=node^.pDst^.AsType(ntReg); + src:=RegDown(node^.ParamNode(0)^.AsReg); + + if (dst=nil) or (src=nil) then Exit; + + if src^.is_const then + begin + //need a const calc + data:=src^.AsConst^.GetData; + data:=PopCnt(data); //BitCount + + _SetConst(dst^.dtype,data); + Exit; + end; + +end; + + function try_get_comp_bridge(var src:PsrRegNode):Integer; forward; function TEmitPostOp.OpConvert1(node:PSpirvOp):Integer; diff --git a/spirv/emit_vop1.pas b/spirv/emit_vop1.pas index 0b730475..163c89d1 100644 --- a/spirv/emit_vop1.pas +++ b/spirv/emit_vop1.pas @@ -21,6 +21,7 @@ type procedure emit_V_CVT_F16_F32; procedure emit_V_CVT_F32_F16; procedure emit_V_CVT_OFF_F32_I4; + procedure emit_V_CVT_FLR_I32_F32; procedure emit_V_CVT_F32_UBYTE0; procedure emit_V_EXT_F32(OpId:DWORD); procedure emit_V_RSQ_CLAMP_F32; @@ -109,6 +110,21 @@ begin Op2(Op.OpFDiv,dtFloat32,dst,src,num_16); end; +procedure TEmit_VOP1.emit_V_CVT_FLR_I32_F32; //ConvertFloatToSignedInt(floor(vsrc.f)) +Var + dst:PsrRegSlot; + src:PsrRegNode; + flr:PsrRegNode; +begin + dst:=get_vdst8(FSPI.VOP1.VDST); + src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32); + + flr:=NewReg(dtFloat32); + _OpGlsl1(line,GlslOp.Floor,flr,src); + + Op1(Op.OpConvertFToS,dtInt32,dst,flr); +end; + procedure TEmit_VOP1.emit_V_CVT_F32_UBYTE0; Var dst:PsrRegSlot; @@ -220,6 +236,8 @@ begin V_CVT_OFF_F32_I4: emit_V_CVT_OFF_F32_I4; + V_CVT_FLR_I32_F32: emit_V_CVT_FLR_I32_F32; + V_CVT_F32_UBYTE0: emit_V_CVT_F32_UBYTE0; V_FRACT_F32: emit_V_EXT_F32(GlslOp.Fract); diff --git a/spirv/emit_vop2.pas b/spirv/emit_vop2.pas index 00a0c0a5..493ada79 100644 --- a/spirv/emit_vop2.pas +++ b/spirv/emit_vop2.pas @@ -35,6 +35,7 @@ type procedure emit_V_MAC_LEGACY_F32; procedure emit_V_MADAK_F32; procedure emit_V_MADMK_F32; + procedure emit_V_BCNT_U32_B32; procedure emit_V_MMX(OpId:DWORD;rtype:TsrDataType); end; @@ -374,6 +375,21 @@ begin OpFmaF32(dst,src[0],src[1],src[2]); end; +procedure TEmit_VOP2.emit_V_BCNT_U32_B32; //vdst = bit_count(vsrc0) + vsrc1.u +Var + dst:PsrRegSlot; + src:array[0..1] of PsrRegNode; +begin + dst:=get_vdst8(FSPI.VOP2.VDST); + + src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32); + src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32); + + src[0]:=OpBitCountTo(src[0]); + + Op2(Op.OpIAdd,dtUint32,dst,src[0],src[1]); +end; + procedure TEmit_VOP2.emit_V_MMX(OpId:DWORD;rtype:TsrDataType); Var dst:PsrRegSlot; @@ -427,6 +443,8 @@ begin V_MADAK_F32: emit_V_MADAK_F32; V_MADMK_F32: emit_V_MADMK_F32; + V_BCNT_U32_B32: emit_V_BCNT_U32_B32; + V_MIN_LEGACY_F32: emit_V_MMX(GlslOp.NMin,dtFloat32); V_MAX_LEGACY_F32: emit_V_MMX(GlslOp.NMax,dtFloat32); diff --git a/spirv/emit_vop3.pas b/spirv/emit_vop3.pas index db491aad..9a54b31b 100644 --- a/spirv/emit_vop3.pas +++ b/spirv/emit_vop3.pas @@ -35,10 +35,10 @@ type procedure emit_V_SUBREV_F32; procedure emit_V_CVT_PKRTZ_F16_F32; procedure emit_V_MMX_F32(OpId:DWORD); - procedure emit_V_MUL_LO_I32; + procedure emit_V_MUL_LO(rtype:TsrDataType); procedure emit_V_MUL_I32_I24; procedure emit_V_MUL_U32_U24; - procedure emit_V_MUL_HI_U32; + procedure emit_V_MUL_HI(rtype:TsrDataType); procedure emit_V_MAC_F32; procedure emit_V_BFE_U32; @@ -303,7 +303,7 @@ begin emit_dst_clamp_f(dst); end; -procedure TEmit_VOP3.emit_V_MUL_LO_I32; +procedure TEmit_VOP3.emit_V_MUL_LO(rtype:TsrDataType); Var dst:PsrRegSlot; src:array[0..1] of PsrRegNode; @@ -315,8 +315,8 @@ begin Assert(FSPI.VOP3a.CLAMP=0,'FSPI.VOP3a.CLAMP'); Assert(FSPI.VOP3a.NEG =0,'FSPI.VOP3a.NEG'); - src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,dtInt32); - src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,dtInt32); + src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,rtype); + src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,rtype); OpIMul(dst,src[0],src[1]); end; @@ -375,11 +375,12 @@ begin OpIMul(dst,src[0],src[1]); end; -procedure TEmit_VOP3.emit_V_MUL_HI_U32; +procedure TEmit_VOP3.emit_V_MUL_HI(rtype:TsrDataType); Var dst:PsrRegSlot; src:array[0..1] of PsrRegNode; tmp_r,dst_r:PsrRegNode; + tst:TsrDataType; begin dst:=get_vdst8(FSPI.VOP3a.VDST); @@ -388,13 +389,23 @@ begin Assert(FSPI.VOP3a.CLAMP=0,'FSPI.VOP3a.CLAMP'); Assert(FSPI.VOP3a.NEG =0,'FSPI.VOP3a.NEG'); - src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,dtUInt32); - src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,dtUInt32); + src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,rtype); + src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,rtype); - tmp_r:=NewReg(dtStruct2u); - _Op2(line,Op.OpUMulExtended,tmp_r,src[0],src[1]); + tst:=rtype.AsStruct2; + Assert(tst<>dtUnknow); - dst_r:=dst^.New(line,dtUInt32); + tmp_r:=NewReg(tst); + + if (rtype.Sign=0) then + begin + _Op2(line,Op.OpUMulExtended,tmp_r,src[0],src[1]); + end else + begin + _Op2(line,Op.OpSMulExtended,tmp_r,src[0],src[1]); + end; + + dst_r:=dst^.New(line,rtype); OpExtract(line,dst_r,tmp_r,1); end; @@ -1025,8 +1036,11 @@ begin //VOP3 only - V_MUL_LO_I32: emit_V_MUL_LO_I32; - V_MUL_HI_U32: emit_V_MUL_HI_U32; + V_MUL_LO_U32: emit_V_MUL_LO(dtUint32); + V_MUL_HI_U32: emit_V_MUL_HI(dtUint32); + + V_MUL_LO_I32: emit_V_MUL_LO(dtInt32); + V_MUL_HI_I32: emit_V_MUL_HI(dtInt32); V_BFE_U32: emit_V_BFE_U32; V_BFI_B32: emit_V_BFI_B32; diff --git a/spirv/srType.pas b/spirv/srType.pas index fcb80576..e7161af7 100644 --- a/spirv/srType.pas +++ b/spirv/srType.pas @@ -36,6 +36,7 @@ type dtVec3b, dtVec4b, + dtStruct2i, dtStruct2u, dtVec2u8, @@ -92,6 +93,7 @@ type function BitSize:Byte; function High:QWORD; function AsVector(_count:Byte):TsrDataType; + function AsStruct2:TsrDataType; end; Pvec2f=^Tvec2f; @@ -163,6 +165,7 @@ begin dtVec3b :Result:='bvec3'; dtVec4b :Result:='bvec4'; + dtStruct2i :Result:='rec2i'; dtStruct2u :Result:='rec2u'; dtVec2u8 :Result:='u8vec2'; @@ -233,6 +236,7 @@ begin dtInt64, dtUint64, + dtStruct2i, dtStruct2u, dtVec2u8, @@ -305,6 +309,7 @@ begin dtVec3b, dtVec4b, + dtStruct2i, dtStruct2u, dtVec2u8, @@ -369,6 +374,7 @@ begin dtVec3u, dtVec4u:Result:=dtUint32; + dtStruct2i, dtVec2i, dtVec3i, dtVec4i:Result:=dtInt32; @@ -396,6 +402,7 @@ begin dtVec2i, dtVec2h, dtVec2f, + dtStruct2i, dtStruct2u:Result:=2; dtVec3b, @@ -425,6 +432,8 @@ begin dtInt32, dtInt64, + dtStruct2i, + dtHalf16, dtFloat32, dtFloat64, @@ -469,6 +478,7 @@ begin dtVec4i16, dtVec2f, dtVec4h, + dtStruct2i, dtStruct2u:Result:=64; dtVec3u, @@ -559,6 +569,16 @@ begin end; end; +function TsrDataTypeHelper.AsStruct2:TsrDataType; +begin + Result:=dtUnknow; + Case Self of + dtInt32 :Result:=dtStruct2i; + dtUint32:Result:=dtStruct2u; + else; + end; +end; + function CompareType(rtype1,rtype2:TsrDataType):Boolean; begin Case rtype1 of diff --git a/spirv/srTypes.pas b/spirv/srTypes.pas index 0ff40819..b086fe45 100644 --- a/spirv/srTypes.pas +++ b/spirv/srTypes.pas @@ -568,6 +568,7 @@ begin // + dtStruct2i, dtStruct2u: begin Result:=_FetchStruct2(dtype);