This commit is contained in:
Pavel 2025-03-28 11:53:39 +03:00
parent 7f42739963
commit eadc7c5f56
6 changed files with 282 additions and 182 deletions

View File

@ -2190,8 +2190,6 @@ var
begin
Result:=0;
Result:=0;
case PM4_TYPE(token) of
0:begin //PM4_TYPE_0
if p_print_gpu_ops then Writeln('[ASC]PM4_TYPE_0 len:',PM4_LENGTH(token));

View File

@ -64,7 +64,7 @@ function _calc_usage(info:PShaderBinaryInfo;USER_DATA:PDWORD):TUSER_DATA_USEAGE;
var
i:Integer;
Slots:PInputUsageSlot;
r:Byte;
r,c,w:Byte;
begin
Result:=Default(TUSER_DATA_USEAGE);
if (info<>nil) then
@ -77,7 +77,7 @@ begin
begin
r:=Slots[i].m_startRegister;
Assert(r<15);
Result[r]:=2; //getFetchAddress
Result[r+0]:=2; //getFetchAddress
Result[r+1]:=1; //skip
end;
kShaderInputUsagePtrResourceTable,
@ -95,9 +95,22 @@ begin
begin
r:=Slots[i].m_startRegister;
Assert(r<15);
Result[r]:=3; //getBufferAddress
Result[r+0]:=3; //getBufferAddress
Result[r+1]:=1; //skip
end;
kShaderInputUsageImmShaderResourceTable:
begin
r:=Slots[i].m_startRegister;
Assert(r<15);
c:=Slots[i].m_srtSizeInDWordMinusOne+1;
Assert(c<=8);
c:=c div 2;
For w:=0 to c-1 do
begin
Result[r+w*2+0]:=3; //getBufferAddress
Result[r+w*2+1]:=1; //skip
end;
end;
end;
end;
For i:=0 to 15 do
@ -125,6 +138,7 @@ begin
begin
Case USEAGE_DATA[i] of
0:DUMP_BLOCK(F,REG+i,@USER_DATA[i],SizeOf(DWORD));
1:; //skip
2:
begin
buf:=getFetchAddress(USER_DATA[i],USER_DATA[i+1]);

View File

@ -22,6 +22,7 @@ type
procedure emit_S_SUB_I32;
procedure emit_S_SUB_U32;
procedure emit_S_ADDC_U32;
procedure emit_S_MMX(OpId:DWORD;rtype:TsrDataType);
procedure emit_S_MUL_I32;
procedure OpISccNotZero(src:TsrRegNode);
procedure OpISccNotZero2(src0,src1:TsrRegNode);
@ -158,6 +159,19 @@ begin
OpBitwiseOr(car,src[1],src[0]); //SCC1 or SCC2
end;
procedure TEmit_SOP2.emit_S_MMX(OpId:DWORD;rtype:TsrDataType);
Var
dst:PsrRegSlot;
src:array[0..1] of TsrRegNode;
begin
dst:=get_sdst7(FSPI.SOP2.SDST);
src[0]:=fetch_ssrc9(FSPI.SOP2.SSRC0,rtype);
src[1]:=fetch_ssrc9(FSPI.SOP2.SSRC1,rtype);
OpGlsl2(OpId,rtype,dst,src[0],src[1]);
end;
procedure TEmit_SOP2.emit_S_MUL_I32;
Var
dst:PsrRegSlot;
@ -507,6 +521,12 @@ begin
S_ADDC_U32: emit_S_ADDC_U32;
S_MIN_I32: emit_S_MMX(GlslOp.SMin,dtInt32);
S_MAX_I32: emit_S_MMX(GlslOp.SMax,dtInt32);
S_MIN_U32: emit_S_MMX(GlslOp.UMin,dtUint32);
S_MAX_U32: emit_S_MMX(GlslOp.UMax,dtUint32);
S_MUL_I32: emit_S_MUL_I32;
S_LSHL_B32: emit_S_SH(Op.OpShiftLeftLogical ,dtUInt32);

View File

@ -21,14 +21,13 @@ type
procedure emit_V_AND_B32;
procedure emit_V_OR_B32;
procedure emit_V_XOR_B32;
procedure emit_V_SH_NRM(OpId:DWORD;rtype:TsrDataType);
procedure emit_V_SH_REV(OpId:DWORD;rtype:TsrDataType);
procedure emit_V_SH(OpId:DWORD;rtype:TsrDataType;rev:Boolean);
procedure emit_V_ADD_I32;
procedure emit_V_SUB_I32;
procedure emit_V_SUBREV_I32;
procedure emit_V2_F32(OpId:DWORD);
procedure emit_V_SUB_I32(rev:Boolean);
procedure emit_V_ADDC_U32;
procedure emit_V_SUBB_U32(rev:Boolean);
procedure emit_V2_F32(OpId:DWORD;rev:Boolean);
procedure emit_V_MUL_LEGACY_F32;
procedure emit_V_SUBREV_F32;
procedure emit_V_CVT_PKRTZ_F16_F32;
procedure emit_V_MUL_I32_I24;
procedure emit_V_MUL_U32_U24;
@ -39,7 +38,6 @@ type
procedure emit_V_BCNT_U32_B32;
procedure emit_V_MMX(OpId:DWORD;rtype:TsrDataType);
procedure emit_V_LDEXP_F32;
procedure emit_V_ADDC_U32;
procedure emit_V_MBCNT_LO_U32_B32;
procedure emit_V_MBCNT_HI_U32_B32;
procedure emit_V_WRITELANE_B32;
@ -122,15 +120,25 @@ begin
OpBitwiseXor(dst,src[0],src[1]);
end;
procedure TEmit_VOP2.emit_V_SH_NRM(OpId:DWORD;rtype:TsrDataType);
procedure TEmit_VOP2.emit_V_SH(OpId:DWORD;rtype:TsrDataType;rev:Boolean);
Var
dst:PsrRegSlot;
src:array[0..1] of TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP2.VDST);
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,rtype);
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUInt32);
case rev of
False:
begin
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,rtype);
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUInt32);
end;
True:
begin
src[1]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUInt32);
src[0]:=fetch_vsrc8(FSPI.VOP2.VSRC1,rtype);
end;
end;
src[1]:=OpAndTo(src[1],31);
src[1].PrepType(ord(dtUInt32));
@ -138,22 +146,6 @@ begin
Op2(OpId,src[0].dtype,dst,src[0],src[1]);
end;
procedure TEmit_VOP2.emit_V_SH_REV(OpId:DWORD;rtype:TsrDataType);
Var
dst:PsrRegSlot;
src:array[0..1] of TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP2.VDST);
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUInt32);
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,rtype);
src[0]:=OpAndTo(src[0],31);
src[0].PrepType(ord(dtUInt32));
Op2(OpId,src[1].dtype,dst,src[1],src[0]);
end;
procedure TEmit_VOP2.emit_V_ADD_I32; //vdst = vsrc0.s + vsrc1.s; sdst[thread_id:] = carry_out & EXEC
Var
dst,car:PsrRegSlot;
@ -183,7 +175,7 @@ begin
//OpBitwiseAnd(car,car^.current,exc); //carry_out & EXEC
end;
procedure TEmit_VOP2.emit_V_SUB_I32; //vdst = vsrc0.u - vsub.u; sdst[thread_id:] = borrow_out & EXEC
procedure TEmit_VOP2.emit_V_SUB_I32(rev:Boolean); //vdst = vsrc0.u - vsub.u; sdst[thread_id:] = borrow_out & EXEC
Var
dst,bor:PsrRegSlot;
src:array[0..1] of TsrRegNode;
@ -192,8 +184,18 @@ begin
dst:=get_vdst8(FSPI.VOP2.VDST);
bor:=get_vcc0;
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32);
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32);
case rev of
False:
begin
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32);
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32);
end;
True:
begin
src[1]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32);
src[0]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32);
end;
end;
OpISubExt(dst,bor,src[0],src[1],dtUint32);
@ -212,25 +214,40 @@ begin
//OpBitwiseAnd(bor,bor^.current,exc); //borrow_out & EXEC
end;
procedure TEmit_VOP2.emit_V_SUBREV_I32; //vdst = vsrc1.u - vsub.u; sdst[thread_id:] = borrow_out & EXEC
procedure TEmit_VOP2.emit_V_ADDC_U32;
Var
dst,bor:PsrRegSlot;
src:array[0..1] of TsrRegNode;
dst,car:PsrRegSlot;
src:array[0..2] of TsrRegNode;
//exc:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP2.VDST);
bor:=get_vcc0;
car:=get_vcc0;
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32);
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32);
src[2]:=MakeRead(get_vcc0,dtUInt32);
OpISubExt(dst,bor,src[1],src[0],dtUint32);
src[2]:=OpAndTo(src[2],1);
src[2].PrepType(ord(dtUInt32));
OpIAddExt(dst,car,src[0],src[1],dtUint32); //src0+src1
src[0]:=MakeRead(dst,dtUInt32);
src[1]:=MakeRead(car,dtUInt32); //save car1
OpIAddExt(dst,car,src[0],src[2],dtUint32); //(src0+src1)+src2
src[0]:=MakeRead(car,dtUInt32);
OpBitwiseOr(car,src[1],src[0]); //car1 or car2
src[0]:=MakeRead(car,dtUInt32);
{
TODO:
if (EXEC[i]) {
V_SUBREV_I32
VCC[i] = bor;
V_ADDC_U32
VCC[i] = car;
}
else {
VCC[i] = 0;
@ -238,18 +255,87 @@ begin
}
//exc:=MakeRead(get_exec0,dtUnknow);
//OpBitwiseAnd(bor,bor^.current,exc); //borrow_out & EXEC
//OpBitwiseAnd(car,src[0],exc); //carry_out & EXEC
end;
procedure TEmit_VOP2.emit_V2_F32(OpId:DWORD);
//v_subbrev_u32
//vdst = vsrc1.u - vsub.u - sborrow[thread_id:]; sdst[thread_id:] = borrow_out & EXEC
procedure TEmit_VOP2.emit_V_SUBB_U32(rev:Boolean); //vdst = vsrc0.u - vsub.u - sborrow[thread_id:]; sdst[thread_id:] = borrow_out & EXEC
Var
dst,bor:PsrRegSlot;
src:array[0..2] of TsrRegNode;
//exc:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP2.VDST);
bor:=get_vcc0;
case rev of
False:
begin
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32);
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32);
end;
True:
begin
src[1]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32);
src[0]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32);
end;
end;
src[2]:=MakeRead(get_vcc0,dtUInt32);
src[2]:=OpAndTo(src[2],1);
src[2].PrepType(ord(dtUInt32));
OpISubExt(dst,bor,src[0],src[1],dtUInt32); //src0-src1
src[0]:=MakeRead(dst,dtUInt32);
src[1]:=MakeRead(bor,dtUInt32); //save car1
OpISubExt(dst,bor,src[0],src[2],dtUInt32); //(src0-src1)-src2
src[0]:=MakeRead(bor,dtUInt32);
//Or??? And???
OpBitwiseOr(bor,src[1],src[0]); //car1 or car2
{
TODO:
if (EXEC[i]) {
V_SUBB_U32
SDST[i] = bor;
}
else {
SDST[i] = 0;
}
}
//src[0]:=MakeRead(bor,dtUInt32);
//exc:=MakeRead(get_exec0,dtUnknow);
//OpBitwiseAnd(bor,src[0],exc); //borrow_out & EXEC
end;
procedure TEmit_VOP2.emit_V2_F32(OpId:DWORD;rev:Boolean);
Var
dst:PsrRegSlot;
src:array[0..1] of TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP2.VDST);
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtFloat32);
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtFloat32);
case rev of
False:
begin
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtFloat32);
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtFloat32);
end;
True:
begin
src[1]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtFloat32);
src[0]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtFloat32);
end;
end;
Op2(OpId,dtFloat32,dst,src[0],src[1]);
end;
@ -277,19 +363,6 @@ begin
OpSelect(dst,mul,zero,cmp); //false,true,cond
end;
procedure TEmit_VOP2.emit_V_SUBREV_F32;
Var
dst:PsrRegSlot;
src:array[0..1] of TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP2.VDST);
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtFloat32);
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtFloat32);
Op2(Op.OpFSub,dtFloat32,dst,src[1],src[0]);
end;
procedure TEmit_VOP2.emit_V_CVT_PKRTZ_F16_F32;
Var
dst:PsrRegSlot;
@ -462,50 +535,6 @@ begin
Op2(Op.OpFMul,dtFloat32,dst,src[0],src[1]);
end;
procedure TEmit_VOP2.emit_V_ADDC_U32;
Var
dst,car:PsrRegSlot;
src:array[0..2] of TsrRegNode;
//exc:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP2.VDST);
car:=get_vcc0;
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32);
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32);
src[2]:=MakeRead(get_vcc0,dtUInt32);
src[2]:=OpAndTo(src[2],1);
src[2].PrepType(ord(dtUInt32));
OpIAddExt(dst,car,src[0],src[1],dtUint32); //src0+src1
src[0]:=MakeRead(dst,dtUInt32);
src[1]:=MakeRead(car,dtUInt32); //save car1
OpIAddExt(dst,car,src[0],src[2],dtUint32); //(src0+src1)+src2
src[0]:=MakeRead(car,dtUInt32);
OpBitwiseOr(car,src[1],src[0]); //car1 or car2
src[0]:=MakeRead(car,dtUInt32);
{
TODO:
if (EXEC[i]) {
V_ADDC_U32
VCC[i] = car;
}
else {
VCC[i] = 0;
}
}
//exc:=MakeRead(get_exec0,dtUnknow);
//OpBitwiseAnd(car,src[0],exc); //carry_out & EXEC
end;
//V_MBCNT_LO_U32_B32 v1, -1, v1
procedure TEmit_VOP2.emit_V_MBCNT_LO_U32_B32;
@ -609,22 +638,27 @@ begin
V_OR_B32 : emit_V_OR_B32;
V_XOR_B32 : emit_V_XOR_B32;
V_LSHL_B32 : emit_V_SH_NRM(Op.OpShiftLeftLogical ,dtUint32);
V_LSHLREV_B32: emit_V_SH_REV(Op.OpShiftLeftLogical ,dtUint32);
V_LSHR_B32 : emit_V_SH_NRM(Op.OpShiftRightLogical ,dtUint32);
V_LSHRREV_B32: emit_V_SH_REV(Op.OpShiftRightLogical ,dtUint32);
V_ASHR_I32 : emit_V_SH_NRM(Op.OpShiftRightArithmetic,dtInt32);
V_ASHRREV_I32: emit_V_SH_REV(Op.OpShiftRightArithmetic,dtInt32);
V_LSHL_B32 : emit_V_SH(Op.OpShiftLeftLogical ,dtUint32,False);
V_LSHLREV_B32: emit_V_SH(Op.OpShiftLeftLogical ,dtUint32,True);
V_LSHR_B32 : emit_V_SH(Op.OpShiftRightLogical ,dtUint32,False);
V_LSHRREV_B32: emit_V_SH(Op.OpShiftRightLogical ,dtUint32,True);
V_ASHR_I32 : emit_V_SH(Op.OpShiftRightArithmetic,dtInt32 ,False);
V_ASHRREV_I32: emit_V_SH(Op.OpShiftRightArithmetic,dtInt32 ,True);
V_ADD_I32 : emit_V_ADD_I32;
V_SUB_I32 : emit_V_SUB_I32;
V_SUBREV_I32 : emit_V_SUBREV_I32;
V_SUB_I32 : emit_V_SUB_I32(False);
V_SUBREV_I32 : emit_V_SUB_I32(True );
V_ADD_F32 : emit_V2_F32(Op.OpFAdd);
V_SUB_F32 : emit_V2_F32(Op.OpFSub);
V_SUBREV_F32 : emit_V_SUBREV_F32;
V_ADD_F32 : emit_V2_F32(Op.OpFAdd,False);
V_SUB_F32 : emit_V2_F32(Op.OpFSub,False);
V_SUBREV_F32 : emit_V2_F32(Op.OpFSub,True );
V_MUL_F32 : emit_V2_F32(Op.OpFMul);
V_ADDC_U32: emit_V_ADDC_U32;
V_SUBB_U32 :emit_V_SUBB_U32(False);
V_SUBBREV_U32:emit_V_SUBB_U32(True);
V_MUL_F32 : emit_V2_F32(Op.OpFMul,False);
V_MUL_LEGACY_F32: emit_V_MUL_LEGACY_F32;
V_CVT_PKRTZ_F16_F32: emit_V_CVT_PKRTZ_F16_F32;
@ -654,8 +688,6 @@ begin
V_LDEXP_F32: emit_V_LDEXP_F32;
V_ADDC_U32: emit_V_ADDC_U32;
V_MBCNT_LO_U32_B32: emit_V_MBCNT_LO_U32_B32;
V_MBCNT_HI_U32_B32: emit_V_MBCNT_HI_U32_B32;

View File

@ -31,23 +31,23 @@ type
procedure emit_V_ADDC_U32;
procedure emit_V_SUBB_U32;
procedure emit_V_ADD_I32;
procedure emit_V_SUB_I32;
procedure emit_V_CNDMASK_B32;
procedure emit_V_MUL_LEGACY_F32;
procedure emit_V2_F32(OpId:DWORD);
procedure emit_V2_REV_F32(OpId:DWORD);
procedure emit_V2_F32(OpId:DWORD;rev:Boolean);
procedure emit_V_CVT_PKRTZ_F16_F32;
procedure emit_V_MMX(OpId:DWORD;rtype:TsrDataType);
procedure emit_V_MMX3(OpId:DWORD;rtype:TsrDataType);
procedure emit_V_SH_NRM(OpId:DWORD;rtype:TsrDataType);
procedure emit_V_SH_REV(OpId:DWORD;rtype:TsrDataType);
procedure emit_V_SH(OpId:DWORD;rtype:TsrDataType;rev:Boolean);
procedure emit_V_MUL_LO(rtype:TsrDataType);
procedure emit_V_MUL_I32_I24;
procedure emit_V_MUL_U32_U24;
procedure emit_V_MUL_HI(rtype:TsrDataType);
procedure emit_V_MAC_F32;
procedure emit_V_LDEXP_F32;
procedure emit_V_BCNT_U32_B32;
procedure emit_V_MBCNT_LO_U32_B32;
procedure emit_V_MBCNT_HI_U32_B32;
@ -271,7 +271,7 @@ begin
emit_dst_clamp_f(dst);
end;
procedure TEmit_VOP3.emit_V2_F32(OpId:DWORD);
procedure TEmit_VOP3.emit_V2_F32(OpId:DWORD;rev:Boolean);
Var
dst:PsrRegSlot;
src:array[0..1] of TsrRegNode;
@ -284,26 +284,10 @@ begin
emit_src_abs_bit(@src,2);
emit_src_neg_bit(@src,2);
Op2(OpId,dtFloat32,dst,src[0],src[1]);
emit_dst_omod_f(dst);
emit_dst_clamp_f(dst);
end;
procedure TEmit_VOP3.emit_V2_REV_F32(OpId:DWORD);
Var
dst:PsrRegSlot;
src:array[0..1] of TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP3a.VDST);
src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,dtFloat32);
src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,dtFloat32);
emit_src_abs_bit(@src,2);
emit_src_neg_bit(@src,2);
Op2(OpId,dtFloat32,dst,src[1],src[0]);
case rev of
False:Op2(OpId,dtFloat32,dst,src[0],src[1]);
True :Op2(OpId,dtFloat32,dst,src[1],src[0]);
end;
emit_dst_omod_f(dst);
emit_dst_clamp_f(dst);
@ -335,8 +319,8 @@ Var
begin
dst:=get_vdst8(FSPI.VOP3a.VDST);
src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,dtFloat32);
src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,dtFloat32);
src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,rtype);
src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,rtype);
if rtype.isFloat then
begin
@ -361,9 +345,9 @@ Var
begin
dst:=get_vdst8(FSPI.VOP3a.VDST);
src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,dtFloat32);
src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,dtFloat32);
src[2]:=fetch_ssrc9(FSPI.VOP3a.SRC2,dtFloat32);
src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,rtype);
src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,rtype);
src[2]:=fetch_ssrc9(FSPI.VOP3a.SRC2,rtype);
if rtype.isFloat then
begin
@ -384,8 +368,7 @@ begin
end;
end;
procedure TEmit_VOP3.emit_V_SH_NRM(OpId:DWORD;rtype:TsrDataType);
procedure TEmit_VOP3.emit_V_SH(OpId:DWORD;rtype:TsrDataType;rev:Boolean);
Var
dst:PsrRegSlot;
src:array[0..1] of TsrRegNode;
@ -397,8 +380,18 @@ begin
Assert(FSPI.VOP3a.CLAMP=0,'FSPI.VOP3a.CLAMP');
Assert(FSPI.VOP3a.NEG =0,'FSPI.VOP3a.NEG');
src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,rtype);
src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,dtUInt32);
case rev of
False:
begin
src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,rtype);
src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,dtUInt32);
end;
True:
begin
src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC0,dtUInt32);
src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC1,rtype);
end;
end;
src[1]:=OpAndTo(src[1],31);
src[1].PrepType(ord(dtUInt32));
@ -406,27 +399,6 @@ begin
Op2(OpId,src[0].dtype,dst,src[0],src[1]);
end;
procedure TEmit_VOP3.emit_V_SH_REV(OpId:DWORD;rtype:TsrDataType);
Var
dst:PsrRegSlot;
src:array[0..1] of TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP3a.VDST);
Assert(FSPI.VOP3a.OMOD =0,'FSPI.VOP3a.OMOD');
Assert(FSPI.VOP3a.ABS =0,'FSPI.VOP3a.ABS');
Assert(FSPI.VOP3a.CLAMP=0,'FSPI.VOP3a.CLAMP');
Assert(FSPI.VOP3a.NEG =0,'FSPI.VOP3a.NEG');
src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,dtUInt32);
src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,rtype);
src[0]:=OpAndTo(src[0],31);
src[0].PrepType(ord(dtUInt32));
Op2(OpId,src[1].dtype,dst,src[1],src[0]);
end;
procedure TEmit_VOP3.emit_V_MUL_LO(rtype:TsrDataType);
Var
dst:PsrRegSlot;
@ -579,6 +551,26 @@ begin
emit_dst_clamp_f(dst);
end;
procedure TEmit_VOP3.emit_V_BCNT_U32_B32; //vdst = bit_count(vsrc0) + vsrc1.u
Var
dst:PsrRegSlot;
src:array[0..1] of TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP3a.VDST);
Assert(FSPI.VOP3a.OMOD =0,'FSPI.VOP3a.OMOD');
Assert(FSPI.VOP3a.ABS =0,'FSPI.VOP3a.ABS');
Assert(FSPI.VOP3a.CLAMP=0,'FSPI.VOP3a.CLAMP');
Assert(FSPI.VOP3a.NEG =0,'FSPI.VOP3a.NEG');
src[0]:=fetch_ssrc9(FSPI.VOP3a.SRC0,dtUint32);
src[1]:=fetch_ssrc9(FSPI.VOP3a.SRC1,dtUint32);
src[0]:=OpBitCountTo(src[0]);
Op2(Op.OpIAdd,dtUint32,dst,src[0],src[1]);
end;
procedure TEmit_VOP3.emit_V_MBCNT_LO_U32_B32;
Var
dst:PsrRegSlot;
@ -1348,6 +1340,38 @@ begin
//OpBitwiseAnd(bor,src[0],exc); //borrow_out & EXEC
end;
procedure TEmit_VOP3.emit_V_ADD_I32;
Var
dst,car:PsrRegSlot;
src:array[0..1] of TsrRegNode;
//exc:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP3b.VDST);
car:=get_sdst7(FSPI.VOP3b.SDST);
Assert(FSPI.VOP3b.OMOD=0,'FSPI.VOP3b.OMOD');
Assert(FSPI.VOP3b.NEG =0,'FSPI.VOP3b.NEG');
src[0]:=fetch_ssrc9(FSPI.VOP3b.SRC0,dtUInt32);
src[1]:=fetch_ssrc9(FSPI.VOP3b.SRC1,dtUInt32);
OpIAddExt(dst,car,src[0],src[1],dtUint32);
{
TODO:
if (EXEC[i]) {
V_ADD_I32
VCC[i] = car;
}
else {
VCC[i] = 0;
}
}
//exc:=MakeRead(get_exec0,dtUnknow);
//OpBitwiseAnd(car,car^.current,exc); //carry_out & EXEC
end;
procedure TEmit_VOP3.emit_V_SUB_I32;
Var
dst,bor:PsrRegSlot;
@ -1386,6 +1410,7 @@ begin
256+V_ADDC_U32: emit_V_ADDC_U32;
256+V_SUBB_U32: emit_V_SUBB_U32;
256+V_ADD_I32 : emit_V_ADD_I32;
256+V_SUB_I32 : emit_V_SUB_I32;
else
@ -1401,16 +1426,16 @@ begin
256+V_CNDMASK_B32: emit_V_CNDMASK_B32;
256+V_ADD_F32 : emit_V2_F32(Op.OpFAdd);
256+V_SUB_F32 : emit_V2_F32(Op.OpFSub);
256+V_SUBREV_F32 : emit_V2_REV_F32(Op.OpFSub);
256+V_ADD_F32 : emit_V2_F32(Op.OpFAdd,False);
256+V_SUB_F32 : emit_V2_F32(Op.OpFSub,False);
256+V_SUBREV_F32 : emit_V2_F32(Op.OpFSub,True );
256+V_LSHL_B32 : emit_V_SH_NRM(Op.OpShiftLeftLogical ,dtUint32);
256+V_LSHLREV_B32: emit_V_SH_REV(Op.OpShiftLeftLogical ,dtUint32);
256+V_LSHR_B32 : emit_V_SH_NRM(Op.OpShiftRightLogical ,dtUint32);
256+V_LSHRREV_B32: emit_V_SH_REV(Op.OpShiftRightLogical ,dtUint32);
256+V_ASHR_I32 : emit_V_SH_NRM(Op.OpShiftRightArithmetic,dtInt32);
256+V_ASHRREV_I32: emit_V_SH_REV(Op.OpShiftRightArithmetic,dtInt32);
256+V_LSHL_B32 : emit_V_SH(Op.OpShiftLeftLogical ,dtUint32,False);
256+V_LSHLREV_B32: emit_V_SH(Op.OpShiftLeftLogical ,dtUint32,True );
256+V_LSHR_B32 : emit_V_SH(Op.OpShiftRightLogical ,dtUint32,False);
256+V_LSHRREV_B32: emit_V_SH(Op.OpShiftRightLogical ,dtUint32,True );
256+V_ASHR_I32 : emit_V_SH(Op.OpShiftRightArithmetic,dtInt32 ,False);
256+V_ASHRREV_I32: emit_V_SH(Op.OpShiftRightArithmetic,dtInt32 ,True );
256+V_CVT_PKRTZ_F16_F32: emit_V_CVT_PKRTZ_F16_F32;
@ -1428,7 +1453,7 @@ begin
256+V_MUL_LEGACY_F32: emit_V_MUL_LEGACY_F32;
256+V_MUL_F32: emit_V2_F32(Op.OpFMul);
256+V_MUL_F32: emit_V2_F32(Op.OpFMul,False);
256+V_MUL_I32_I24: emit_V_MUL_I32_I24;
256+V_MUL_U32_U24: emit_V_MUL_U32_U24;
@ -1437,6 +1462,8 @@ begin
256+V_LDEXP_F32: emit_V_LDEXP_F32;
256+V_BCNT_U32_B32: emit_V_BCNT_U32_B32;
256+V_MBCNT_LO_U32_B32: emit_V_MBCNT_LO_U32_B32;
256+V_MBCNT_HI_U32_B32: emit_V_MBCNT_HI_U32_B32;

View File

@ -354,7 +354,16 @@ begin
Writeln(' apiSlot=',Slots[i].m_apiSlot);
Writeln(' startRegister=',Slots[i].m_startRegister);
Writeln(' param=',HexStr(Slots[i].m_srtSizeInDWordMinusOne,2));
if (Slots[i].m_usageType=kShaderInputUsageImmShaderResourceTable) then
begin
Writeln(' srtSizeInDWordMinusOne=',HexStr(Slots[i].m_srtSizeInDWordMinusOne,2));
end else
begin
Writeln(' registerCount=',Slots[i].b.m_registerCount);
Writeln(' resourceType=',Slots[i].b.m_resourceType);
Writeln(' chunkMask=',Slots[i].b.m_chunkMask);
end;
end;
Writeln;
end;