diff --git a/chip/pm4_pfp.pas b/chip/pm4_pfp.pas index f23931d9..d7d2ea27 100644 --- a/chip/pm4_pfp.pas +++ b/chip/pm4_pfp.pas @@ -2190,8 +2190,6 @@ var begin Result:=0; - Result:=0; - case PM4_TYPE(token) of 0:begin //PM4_TYPE_0 if p_print_gpu_ops then Writeln('[ASC]PM4_TYPE_0 len:',PM4_LENGTH(token)); diff --git a/chip/shader_dump.pas b/chip/shader_dump.pas index 2bd97c50..c1c672c8 100644 --- a/chip/shader_dump.pas +++ b/chip/shader_dump.pas @@ -64,7 +64,7 @@ function _calc_usage(info:PShaderBinaryInfo;USER_DATA:PDWORD):TUSER_DATA_USEAGE; var i:Integer; Slots:PInputUsageSlot; - r:Byte; + r,c,w:Byte; begin Result:=Default(TUSER_DATA_USEAGE); if (info<>nil) then @@ -77,7 +77,7 @@ begin begin r:=Slots[i].m_startRegister; Assert(r<15); - Result[r]:=2; //getFetchAddress + Result[r+0]:=2; //getFetchAddress Result[r+1]:=1; //skip end; kShaderInputUsagePtrResourceTable, @@ -95,9 +95,22 @@ begin begin r:=Slots[i].m_startRegister; Assert(r<15); - Result[r]:=3; //getBufferAddress + Result[r+0]:=3; //getBufferAddress Result[r+1]:=1; //skip end; + kShaderInputUsageImmShaderResourceTable: + begin + r:=Slots[i].m_startRegister; + Assert(r<15); + c:=Slots[i].m_srtSizeInDWordMinusOne+1; + Assert(c<=8); + c:=c div 2; + For w:=0 to c-1 do + begin + Result[r+w*2+0]:=3; //getBufferAddress + Result[r+w*2+1]:=1; //skip + end; + end; end; end; For i:=0 to 15 do @@ -125,6 +138,7 @@ begin begin Case USEAGE_DATA[i] of 0:DUMP_BLOCK(F,REG+i,@USER_DATA[i],SizeOf(DWORD)); + 1:; //skip 2: begin buf:=getFetchAddress(USER_DATA[i],USER_DATA[i+1]); diff --git a/spirv/emit_sop2.pas b/spirv/emit_sop2.pas index 193da206..5c339a41 100644 --- a/spirv/emit_sop2.pas +++ b/spirv/emit_sop2.pas @@ -22,6 +22,7 @@ type procedure emit_S_SUB_I32; procedure emit_S_SUB_U32; procedure emit_S_ADDC_U32; + procedure emit_S_MMX(OpId:DWORD;rtype:TsrDataType); procedure emit_S_MUL_I32; procedure OpISccNotZero(src:TsrRegNode); procedure OpISccNotZero2(src0,src1:TsrRegNode); @@ -158,6 +159,19 @@ begin OpBitwiseOr(car,src[1],src[0]); //SCC1 or SCC2 end; +procedure TEmit_SOP2.emit_S_MMX(OpId:DWORD;rtype:TsrDataType); +Var + dst:PsrRegSlot; + src:array[0..1] of TsrRegNode; +begin + dst:=get_sdst7(FSPI.SOP2.SDST); + + src[0]:=fetch_ssrc9(FSPI.SOP2.SSRC0,rtype); + src[1]:=fetch_ssrc9(FSPI.SOP2.SSRC1,rtype); + + OpGlsl2(OpId,rtype,dst,src[0],src[1]); +end; + procedure TEmit_SOP2.emit_S_MUL_I32; Var dst:PsrRegSlot; @@ -507,6 +521,12 @@ begin S_ADDC_U32: emit_S_ADDC_U32; + S_MIN_I32: emit_S_MMX(GlslOp.SMin,dtInt32); + S_MAX_I32: emit_S_MMX(GlslOp.SMax,dtInt32); + + S_MIN_U32: emit_S_MMX(GlslOp.UMin,dtUint32); + S_MAX_U32: emit_S_MMX(GlslOp.UMax,dtUint32); + S_MUL_I32: emit_S_MUL_I32; S_LSHL_B32: emit_S_SH(Op.OpShiftLeftLogical ,dtUInt32); diff --git a/spirv/emit_vop2.pas b/spirv/emit_vop2.pas index 1c888825..b1bf0cfd 100644 --- a/spirv/emit_vop2.pas +++ b/spirv/emit_vop2.pas @@ -21,14 +21,13 @@ type procedure emit_V_AND_B32; procedure emit_V_OR_B32; procedure emit_V_XOR_B32; - procedure emit_V_SH_NRM(OpId:DWORD;rtype:TsrDataType); - procedure emit_V_SH_REV(OpId:DWORD;rtype:TsrDataType); + procedure emit_V_SH(OpId:DWORD;rtype:TsrDataType;rev:Boolean); procedure emit_V_ADD_I32; - procedure emit_V_SUB_I32; - procedure emit_V_SUBREV_I32; - procedure emit_V2_F32(OpId:DWORD); + procedure emit_V_SUB_I32(rev:Boolean); + procedure emit_V_ADDC_U32; + procedure emit_V_SUBB_U32(rev:Boolean); + procedure emit_V2_F32(OpId:DWORD;rev:Boolean); procedure emit_V_MUL_LEGACY_F32; - procedure emit_V_SUBREV_F32; procedure emit_V_CVT_PKRTZ_F16_F32; procedure emit_V_MUL_I32_I24; procedure emit_V_MUL_U32_U24; @@ -39,7 +38,6 @@ type procedure emit_V_BCNT_U32_B32; procedure emit_V_MMX(OpId:DWORD;rtype:TsrDataType); procedure emit_V_LDEXP_F32; - procedure emit_V_ADDC_U32; procedure emit_V_MBCNT_LO_U32_B32; procedure emit_V_MBCNT_HI_U32_B32; procedure emit_V_WRITELANE_B32; @@ -122,15 +120,25 @@ begin OpBitwiseXor(dst,src[0],src[1]); end; -procedure TEmit_VOP2.emit_V_SH_NRM(OpId:DWORD;rtype:TsrDataType); +procedure TEmit_VOP2.emit_V_SH(OpId:DWORD;rtype:TsrDataType;rev:Boolean); Var dst:PsrRegSlot; src:array[0..1] of TsrRegNode; begin dst:=get_vdst8(FSPI.VOP2.VDST); - src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,rtype); - src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUInt32); + case rev of + False: + begin + src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,rtype); + src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUInt32); + end; + True: + begin + src[1]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUInt32); + src[0]:=fetch_vsrc8(FSPI.VOP2.VSRC1,rtype); + end; + end; src[1]:=OpAndTo(src[1],31); src[1].PrepType(ord(dtUInt32)); @@ -138,22 +146,6 @@ begin Op2(OpId,src[0].dtype,dst,src[0],src[1]); end; -procedure TEmit_VOP2.emit_V_SH_REV(OpId:DWORD;rtype:TsrDataType); -Var - dst:PsrRegSlot; - src:array[0..1] of TsrRegNode; -begin - dst:=get_vdst8(FSPI.VOP2.VDST); - - src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUInt32); - src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,rtype); - - src[0]:=OpAndTo(src[0],31); - src[0].PrepType(ord(dtUInt32)); - - Op2(OpId,src[1].dtype,dst,src[1],src[0]); -end; - procedure TEmit_VOP2.emit_V_ADD_I32; //vdst = vsrc0.s + vsrc1.s; sdst[thread_id:] = carry_out & EXEC Var dst,car:PsrRegSlot; @@ -183,7 +175,7 @@ begin //OpBitwiseAnd(car,car^.current,exc); //carry_out & EXEC end; -procedure TEmit_VOP2.emit_V_SUB_I32; //vdst = vsrc0.u - vsub.u; sdst[thread_id:] = borrow_out & EXEC +procedure TEmit_VOP2.emit_V_SUB_I32(rev:Boolean); //vdst = vsrc0.u - vsub.u; sdst[thread_id:] = borrow_out & EXEC Var dst,bor:PsrRegSlot; src:array[0..1] of TsrRegNode; @@ -192,8 +184,18 @@ begin dst:=get_vdst8(FSPI.VOP2.VDST); bor:=get_vcc0; - src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32); - src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32); + case rev of + False: + begin + src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32); + src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32); + end; + True: + begin + src[1]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32); + src[0]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32); + end; + end; OpISubExt(dst,bor,src[0],src[1],dtUint32); @@ -212,25 +214,40 @@ begin //OpBitwiseAnd(bor,bor^.current,exc); //borrow_out & EXEC end; -procedure TEmit_VOP2.emit_V_SUBREV_I32; //vdst = vsrc1.u - vsub.u; sdst[thread_id:] = borrow_out & EXEC +procedure TEmit_VOP2.emit_V_ADDC_U32; Var - dst,bor:PsrRegSlot; - src:array[0..1] of TsrRegNode; + dst,car:PsrRegSlot; + src:array[0..2] of TsrRegNode; //exc:TsrRegNode; begin dst:=get_vdst8(FSPI.VOP2.VDST); - bor:=get_vcc0; + car:=get_vcc0; src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32); src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32); + src[2]:=MakeRead(get_vcc0,dtUInt32); - OpISubExt(dst,bor,src[1],src[0],dtUint32); + src[2]:=OpAndTo(src[2],1); + src[2].PrepType(ord(dtUInt32)); + + OpIAddExt(dst,car,src[0],src[1],dtUint32); //src0+src1 + + src[0]:=MakeRead(dst,dtUInt32); + src[1]:=MakeRead(car,dtUInt32); //save car1 + + OpIAddExt(dst,car,src[0],src[2],dtUint32); //(src0+src1)+src2 + + src[0]:=MakeRead(car,dtUInt32); + + OpBitwiseOr(car,src[1],src[0]); //car1 or car2 + + src[0]:=MakeRead(car,dtUInt32); { TODO: if (EXEC[i]) { - V_SUBREV_I32 - VCC[i] = bor; + V_ADDC_U32 + VCC[i] = car; } else { VCC[i] = 0; @@ -238,18 +255,87 @@ begin } //exc:=MakeRead(get_exec0,dtUnknow); - //OpBitwiseAnd(bor,bor^.current,exc); //borrow_out & EXEC + //OpBitwiseAnd(car,src[0],exc); //carry_out & EXEC end; -procedure TEmit_VOP2.emit_V2_F32(OpId:DWORD); +//v_subbrev_u32 +//vdst = vsrc1.u - vsub.u - sborrow[thread_id:]; sdst[thread_id:] = borrow_out & EXEC + +procedure TEmit_VOP2.emit_V_SUBB_U32(rev:Boolean); //vdst = vsrc0.u - vsub.u - sborrow[thread_id:]; sdst[thread_id:] = borrow_out & EXEC +Var + dst,bor:PsrRegSlot; + src:array[0..2] of TsrRegNode; + //exc:TsrRegNode; +begin + dst:=get_vdst8(FSPI.VOP2.VDST); + bor:=get_vcc0; + + case rev of + False: + begin + src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32); + src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32); + end; + True: + begin + src[1]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32); + src[0]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32); + end; + end; + + src[2]:=MakeRead(get_vcc0,dtUInt32); + + src[2]:=OpAndTo(src[2],1); + src[2].PrepType(ord(dtUInt32)); + + OpISubExt(dst,bor,src[0],src[1],dtUInt32); //src0-src1 + + src[0]:=MakeRead(dst,dtUInt32); + src[1]:=MakeRead(bor,dtUInt32); //save car1 + + OpISubExt(dst,bor,src[0],src[2],dtUInt32); //(src0-src1)-src2 + + src[0]:=MakeRead(bor,dtUInt32); + + //Or??? And??? + OpBitwiseOr(bor,src[1],src[0]); //car1 or car2 + + { + TODO: + if (EXEC[i]) { + V_SUBB_U32 + SDST[i] = bor; + } + else { + SDST[i] = 0; + } + } + + //src[0]:=MakeRead(bor,dtUInt32); + + //exc:=MakeRead(get_exec0,dtUnknow); + //OpBitwiseAnd(bor,src[0],exc); //borrow_out & EXEC +end; + +procedure TEmit_VOP2.emit_V2_F32(OpId:DWORD;rev:Boolean); Var dst:PsrRegSlot; src:array[0..1] of TsrRegNode; begin dst:=get_vdst8(FSPI.VOP2.VDST); - src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtFloat32); - src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtFloat32); + case rev of + False: + begin + src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtFloat32); + src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtFloat32); + end; + True: + begin + src[1]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtFloat32); + src[0]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtFloat32); + end; + end; Op2(OpId,dtFloat32,dst,src[0],src[1]); end; @@ -277,19 +363,6 @@ begin OpSelect(dst,mul,zero,cmp); //false,true,cond end; -procedure TEmit_VOP2.emit_V_SUBREV_F32; -Var - dst:PsrRegSlot; - src:array[0..1] of TsrRegNode; -begin - dst:=get_vdst8(FSPI.VOP2.VDST); - - src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtFloat32); - src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtFloat32); - - Op2(Op.OpFSub,dtFloat32,dst,src[1],src[0]); -end; - procedure TEmit_VOP2.emit_V_CVT_PKRTZ_F16_F32; Var dst:PsrRegSlot; @@ -462,50 +535,6 @@ begin Op2(Op.OpFMul,dtFloat32,dst,src[0],src[1]); end; -procedure TEmit_VOP2.emit_V_ADDC_U32; -Var - dst,car:PsrRegSlot; - src:array[0..2] of TsrRegNode; - //exc:TsrRegNode; -begin - dst:=get_vdst8(FSPI.VOP2.VDST); - car:=get_vcc0; - - src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32); - src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32); - src[2]:=MakeRead(get_vcc0,dtUInt32); - - src[2]:=OpAndTo(src[2],1); - src[2].PrepType(ord(dtUInt32)); - - OpIAddExt(dst,car,src[0],src[1],dtUint32); //src0+src1 - - src[0]:=MakeRead(dst,dtUInt32); - src[1]:=MakeRead(car,dtUInt32); //save car1 - - OpIAddExt(dst,car,src[0],src[2],dtUint32); //(src0+src1)+src2 - - src[0]:=MakeRead(car,dtUInt32); - - OpBitwiseOr(car,src[1],src[0]); //car1 or car2 - - src[0]:=MakeRead(car,dtUInt32); - - { - TODO: - if (EXEC[i]) { - V_ADDC_U32 - VCC[i] = car; - } - else { - VCC[i] = 0; - } - } - - //exc:=MakeRead(get_exec0,dtUnknow); - //OpBitwiseAnd(car,src[0],exc); //carry_out & EXEC -end; - //V_MBCNT_LO_U32_B32 v1, -1, v1 procedure TEmit_VOP2.emit_V_MBCNT_LO_U32_B32; @@ -609,22 +638,27 @@ begin V_OR_B32 : emit_V_OR_B32; V_XOR_B32 : emit_V_XOR_B32; - V_LSHL_B32 : emit_V_SH_NRM(Op.OpShiftLeftLogical ,dtUint32); - V_LSHLREV_B32: emit_V_SH_REV(Op.OpShiftLeftLogical ,dtUint32); - V_LSHR_B32 : emit_V_SH_NRM(Op.OpShiftRightLogical ,dtUint32); - V_LSHRREV_B32: emit_V_SH_REV(Op.OpShiftRightLogical ,dtUint32); - V_ASHR_I32 : emit_V_SH_NRM(Op.OpShiftRightArithmetic,dtInt32); - V_ASHRREV_I32: emit_V_SH_REV(Op.OpShiftRightArithmetic,dtInt32); + V_LSHL_B32 : emit_V_SH(Op.OpShiftLeftLogical ,dtUint32,False); + V_LSHLREV_B32: emit_V_SH(Op.OpShiftLeftLogical ,dtUint32,True); + V_LSHR_B32 : emit_V_SH(Op.OpShiftRightLogical ,dtUint32,False); + V_LSHRREV_B32: emit_V_SH(Op.OpShiftRightLogical ,dtUint32,True); + V_ASHR_I32 : emit_V_SH(Op.OpShiftRightArithmetic,dtInt32 ,False); + V_ASHRREV_I32: emit_V_SH(Op.OpShiftRightArithmetic,dtInt32 ,True); V_ADD_I32 : emit_V_ADD_I32; - V_SUB_I32 : emit_V_SUB_I32; - V_SUBREV_I32 : emit_V_SUBREV_I32; + V_SUB_I32 : emit_V_SUB_I32(False); + V_SUBREV_I32 : emit_V_SUB_I32(True ); - V_ADD_F32 : emit_V2_F32(Op.OpFAdd); - V_SUB_F32 : emit_V2_F32(Op.OpFSub); - V_SUBREV_F32 : emit_V_SUBREV_F32; + V_ADD_F32 : emit_V2_F32(Op.OpFAdd,False); + V_SUB_F32 : emit_V2_F32(Op.OpFSub,False); + V_SUBREV_F32 : emit_V2_F32(Op.OpFSub,True ); - V_MUL_F32 : emit_V2_F32(Op.OpFMul); + V_ADDC_U32: emit_V_ADDC_U32; + + V_SUBB_U32 :emit_V_SUBB_U32(False); + V_SUBBREV_U32:emit_V_SUBB_U32(True); + + V_MUL_F32 : emit_V2_F32(Op.OpFMul,False); V_MUL_LEGACY_F32: emit_V_MUL_LEGACY_F32; V_CVT_PKRTZ_F16_F32: emit_V_CVT_PKRTZ_F16_F32; @@ -654,8 +688,6 @@ begin V_LDEXP_F32: emit_V_LDEXP_F32; - V_ADDC_U32: emit_V_ADDC_U32; - V_MBCNT_LO_U32_B32: emit_V_MBCNT_LO_U32_B32; V_MBCNT_HI_U32_B32: emit_V_MBCNT_HI_U32_B32; diff --git a/spirv/emit_vop3.pas b/spirv/emit_vop3.pas index db964829..b944c7cc 100644 --- a/spirv/emit_vop3.pas +++ b/spirv/emit_vop3.pas @@ -31,23 +31,23 @@ type procedure emit_V_ADDC_U32; procedure emit_V_SUBB_U32; + procedure emit_V_ADD_I32; procedure emit_V_SUB_I32; procedure emit_V_CNDMASK_B32; procedure emit_V_MUL_LEGACY_F32; - procedure emit_V2_F32(OpId:DWORD); - procedure emit_V2_REV_F32(OpId:DWORD); + procedure emit_V2_F32(OpId:DWORD;rev:Boolean); procedure emit_V_CVT_PKRTZ_F16_F32; procedure emit_V_MMX(OpId:DWORD;rtype:TsrDataType); procedure emit_V_MMX3(OpId:DWORD;rtype:TsrDataType); - procedure emit_V_SH_NRM(OpId:DWORD;rtype:TsrDataType); - procedure emit_V_SH_REV(OpId:DWORD;rtype:TsrDataType); + procedure emit_V_SH(OpId:DWORD;rtype:TsrDataType;rev:Boolean); procedure emit_V_MUL_LO(rtype:TsrDataType); procedure emit_V_MUL_I32_I24; procedure emit_V_MUL_U32_U24; procedure emit_V_MUL_HI(rtype:TsrDataType); procedure emit_V_MAC_F32; procedure emit_V_LDEXP_F32; + procedure emit_V_BCNT_U32_B32; procedure emit_V_MBCNT_LO_U32_B32; procedure emit_V_MBCNT_HI_U32_B32; @@ -271,7 +271,7 @@ begin emit_dst_clamp_f(dst); end; -procedure TEmit_VOP3.emit_V2_F32(OpId:DWORD); +procedure TEmit_VOP3.emit_V2_F32(OpId:DWORD;rev:Boolean); Var dst:PsrRegSlot; src:array[0..1] of TsrRegNode; @@ -284,26 +284,10 @@ begin emit_src_abs_bit(@src,2); emit_src_neg_bit(@src,2); - Op2(OpId,dtFloat32,dst,src[0],src[1]); - - emit_dst_omod_f(dst); - emit_dst_clamp_f(dst); -end; - -procedure TEmit_VOP3.emit_V2_REV_F32(OpId:DWORD); -Var - dst:PsrRegSlot; - src:array[0..1] of TsrRegNode; -begin - dst:=get_vdst8(FSPI.VOP3a.VDST); - - src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,dtFloat32); - src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,dtFloat32); - - emit_src_abs_bit(@src,2); - emit_src_neg_bit(@src,2); - - Op2(OpId,dtFloat32,dst,src[1],src[0]); + case rev of + False:Op2(OpId,dtFloat32,dst,src[0],src[1]); + True :Op2(OpId,dtFloat32,dst,src[1],src[0]); + end; emit_dst_omod_f(dst); emit_dst_clamp_f(dst); @@ -335,8 +319,8 @@ Var begin dst:=get_vdst8(FSPI.VOP3a.VDST); - src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,dtFloat32); - src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,dtFloat32); + src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,rtype); + src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,rtype); if rtype.isFloat then begin @@ -361,9 +345,9 @@ Var begin dst:=get_vdst8(FSPI.VOP3a.VDST); - src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,dtFloat32); - src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,dtFloat32); - src[2]:=fetch_ssrc9(FSPI.VOP3a.SRC2,dtFloat32); + src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,rtype); + src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,rtype); + src[2]:=fetch_ssrc9(FSPI.VOP3a.SRC2,rtype); if rtype.isFloat then begin @@ -384,8 +368,7 @@ begin end; end; - -procedure TEmit_VOP3.emit_V_SH_NRM(OpId:DWORD;rtype:TsrDataType); +procedure TEmit_VOP3.emit_V_SH(OpId:DWORD;rtype:TsrDataType;rev:Boolean); Var dst:PsrRegSlot; src:array[0..1] of TsrRegNode; @@ -397,8 +380,18 @@ begin Assert(FSPI.VOP3a.CLAMP=0,'FSPI.VOP3a.CLAMP'); Assert(FSPI.VOP3a.NEG =0,'FSPI.VOP3a.NEG'); - src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,rtype); - src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,dtUInt32); + case rev of + False: + begin + src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,rtype); + src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,dtUInt32); + end; + True: + begin + src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC0,dtUInt32); + src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC1,rtype); + end; + end; src[1]:=OpAndTo(src[1],31); src[1].PrepType(ord(dtUInt32)); @@ -406,27 +399,6 @@ begin Op2(OpId,src[0].dtype,dst,src[0],src[1]); end; -procedure TEmit_VOP3.emit_V_SH_REV(OpId:DWORD;rtype:TsrDataType); -Var - dst:PsrRegSlot; - src:array[0..1] of TsrRegNode; -begin - dst:=get_vdst8(FSPI.VOP3a.VDST); - - Assert(FSPI.VOP3a.OMOD =0,'FSPI.VOP3a.OMOD'); - Assert(FSPI.VOP3a.ABS =0,'FSPI.VOP3a.ABS'); - Assert(FSPI.VOP3a.CLAMP=0,'FSPI.VOP3a.CLAMP'); - Assert(FSPI.VOP3a.NEG =0,'FSPI.VOP3a.NEG'); - - src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,dtUInt32); - src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,rtype); - - src[0]:=OpAndTo(src[0],31); - src[0].PrepType(ord(dtUInt32)); - - Op2(OpId,src[1].dtype,dst,src[1],src[0]); -end; - procedure TEmit_VOP3.emit_V_MUL_LO(rtype:TsrDataType); Var dst:PsrRegSlot; @@ -579,6 +551,26 @@ begin emit_dst_clamp_f(dst); end; +procedure TEmit_VOP3.emit_V_BCNT_U32_B32; //vdst = bit_count(vsrc0) + vsrc1.u +Var + dst:PsrRegSlot; + src:array[0..1] of TsrRegNode; +begin + dst:=get_vdst8(FSPI.VOP3a.VDST); + + Assert(FSPI.VOP3a.OMOD =0,'FSPI.VOP3a.OMOD'); + Assert(FSPI.VOP3a.ABS =0,'FSPI.VOP3a.ABS'); + Assert(FSPI.VOP3a.CLAMP=0,'FSPI.VOP3a.CLAMP'); + Assert(FSPI.VOP3a.NEG =0,'FSPI.VOP3a.NEG'); + + src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,dtUint32); + src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,dtUint32); + + src[0]:=OpBitCountTo(src[0]); + + Op2(Op.OpIAdd,dtUint32,dst,src[0],src[1]); +end; + procedure TEmit_VOP3.emit_V_MBCNT_LO_U32_B32; Var dst:PsrRegSlot; @@ -1348,6 +1340,38 @@ begin //OpBitwiseAnd(bor,src[0],exc); //borrow_out & EXEC end; +procedure TEmit_VOP3.emit_V_ADD_I32; +Var + dst,car:PsrRegSlot; + src:array[0..1] of TsrRegNode; + //exc:TsrRegNode; +begin + dst:=get_vdst8(FSPI.VOP3b.VDST); + car:=get_sdst7(FSPI.VOP3b.SDST); + + Assert(FSPI.VOP3b.OMOD=0,'FSPI.VOP3b.OMOD'); + Assert(FSPI.VOP3b.NEG =0,'FSPI.VOP3b.NEG'); + + src[0]:=fetch_ssrc9(FSPI.VOP3b.SRC0,dtUInt32); + src[1]:=fetch_ssrc9(FSPI.VOP3b.SRC1,dtUInt32); + + OpIAddExt(dst,car,src[0],src[1],dtUint32); + + { + TODO: + if (EXEC[i]) { + V_ADD_I32 + VCC[i] = car; + } + else { + VCC[i] = 0; + } + } + + //exc:=MakeRead(get_exec0,dtUnknow); + //OpBitwiseAnd(car,car^.current,exc); //carry_out & EXEC +end; + procedure TEmit_VOP3.emit_V_SUB_I32; Var dst,bor:PsrRegSlot; @@ -1386,6 +1410,7 @@ begin 256+V_ADDC_U32: emit_V_ADDC_U32; 256+V_SUBB_U32: emit_V_SUBB_U32; + 256+V_ADD_I32 : emit_V_ADD_I32; 256+V_SUB_I32 : emit_V_SUB_I32; else @@ -1401,16 +1426,16 @@ begin 256+V_CNDMASK_B32: emit_V_CNDMASK_B32; - 256+V_ADD_F32 : emit_V2_F32(Op.OpFAdd); - 256+V_SUB_F32 : emit_V2_F32(Op.OpFSub); - 256+V_SUBREV_F32 : emit_V2_REV_F32(Op.OpFSub); + 256+V_ADD_F32 : emit_V2_F32(Op.OpFAdd,False); + 256+V_SUB_F32 : emit_V2_F32(Op.OpFSub,False); + 256+V_SUBREV_F32 : emit_V2_F32(Op.OpFSub,True ); - 256+V_LSHL_B32 : emit_V_SH_NRM(Op.OpShiftLeftLogical ,dtUint32); - 256+V_LSHLREV_B32: emit_V_SH_REV(Op.OpShiftLeftLogical ,dtUint32); - 256+V_LSHR_B32 : emit_V_SH_NRM(Op.OpShiftRightLogical ,dtUint32); - 256+V_LSHRREV_B32: emit_V_SH_REV(Op.OpShiftRightLogical ,dtUint32); - 256+V_ASHR_I32 : emit_V_SH_NRM(Op.OpShiftRightArithmetic,dtInt32); - 256+V_ASHRREV_I32: emit_V_SH_REV(Op.OpShiftRightArithmetic,dtInt32); + 256+V_LSHL_B32 : emit_V_SH(Op.OpShiftLeftLogical ,dtUint32,False); + 256+V_LSHLREV_B32: emit_V_SH(Op.OpShiftLeftLogical ,dtUint32,True ); + 256+V_LSHR_B32 : emit_V_SH(Op.OpShiftRightLogical ,dtUint32,False); + 256+V_LSHRREV_B32: emit_V_SH(Op.OpShiftRightLogical ,dtUint32,True ); + 256+V_ASHR_I32 : emit_V_SH(Op.OpShiftRightArithmetic,dtInt32 ,False); + 256+V_ASHRREV_I32: emit_V_SH(Op.OpShiftRightArithmetic,dtInt32 ,True ); 256+V_CVT_PKRTZ_F16_F32: emit_V_CVT_PKRTZ_F16_F32; @@ -1428,7 +1453,7 @@ begin 256+V_MUL_LEGACY_F32: emit_V_MUL_LEGACY_F32; - 256+V_MUL_F32: emit_V2_F32(Op.OpFMul); + 256+V_MUL_F32: emit_V2_F32(Op.OpFMul,False); 256+V_MUL_I32_I24: emit_V_MUL_I32_I24; 256+V_MUL_U32_U24: emit_V_MUL_U32_U24; @@ -1437,6 +1462,8 @@ begin 256+V_LDEXP_F32: emit_V_LDEXP_F32; + 256+V_BCNT_U32_B32: emit_V_BCNT_U32_B32; + 256+V_MBCNT_LO_U32_B32: emit_V_MBCNT_LO_U32_B32; 256+V_MBCNT_HI_U32_B32: emit_V_MBCNT_HI_U32_B32; diff --git a/spirv/pssl-spirv.lpr b/spirv/pssl-spirv.lpr index 79084e0e..72b469c7 100644 --- a/spirv/pssl-spirv.lpr +++ b/spirv/pssl-spirv.lpr @@ -354,7 +354,16 @@ begin Writeln(' apiSlot=',Slots[i].m_apiSlot); Writeln(' startRegister=',Slots[i].m_startRegister); - Writeln(' param=',HexStr(Slots[i].m_srtSizeInDWordMinusOne,2)); + if (Slots[i].m_usageType=kShaderInputUsageImmShaderResourceTable) then + begin + Writeln(' srtSizeInDWordMinusOne=',HexStr(Slots[i].m_srtSizeInDWordMinusOne,2)); + end else + begin + Writeln(' registerCount=',Slots[i].b.m_registerCount); + Writeln(' resourceType=',Slots[i].b.m_resourceType); + Writeln(' chunkMask=',Slots[i].b.m_chunkMask); + end; + end; Writeln; end;