mirror of https://github.com/red-prig/fpPS4.git
374 lines
8.5 KiB
Plaintext
374 lines
8.5 KiB
Plaintext
unit emit_VOP2;
|
|
|
|
{$mode objfpc}{$H+}
|
|
|
|
interface
|
|
|
|
uses
|
|
sysutils,
|
|
ps4_pssl,
|
|
spirv,
|
|
srType,
|
|
srReg,
|
|
emit_fetch;
|
|
|
|
type
|
|
TEmit_VOP2=class(TEmitFetch)
|
|
procedure emit_VOP2;
|
|
procedure emit_V_CNDMASK_B32;
|
|
procedure emit_V_AND_B32;
|
|
procedure emit_V_OR_B32;
|
|
procedure emit_V_XOR_B32;
|
|
procedure emit_V_SH(OpId:DWORD;rtype:TsrDataType);
|
|
procedure emit_V_SHREV(OpId:DWORD;rtype:TsrDataType);
|
|
procedure emit_V_ADD_I32;
|
|
procedure emit_V_SUB_I32;
|
|
procedure emit_V_SUBREV_I32;
|
|
procedure emit_V2_F32(OpId:DWORD);
|
|
procedure emit_V_SUBREV_F32;
|
|
procedure emit_V_CVT_PKRTZ_F16_F32;
|
|
procedure emit_V_MUL_I32_I24;
|
|
procedure emit_V_MUL_U32_U24;
|
|
procedure emit_V_MAC_F32;
|
|
procedure emit_V_MADAK_F32;
|
|
procedure emit_V_MADMK_F32;
|
|
procedure emit_V_MMX(OpId:DWORD;rtype:TsrDataType);
|
|
end;
|
|
|
|
implementation
|
|
|
|
procedure TEmit_VOP2.emit_V_CNDMASK_B32;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..2] of PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUnknow);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUnknow);
|
|
src[2]:=MakeRead(get_vcc0,dtBool);
|
|
|
|
OpSelect(dst,src[0],src[1],src[2]);
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_AND_B32;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..1] of PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUnknow);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUnknow);
|
|
|
|
OpBitwiseAnd(dst,src[0],src[1]);
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_OR_B32;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..1] of PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUnknow);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUnknow);
|
|
|
|
OpBitwiseOr(dst,src[0],src[1]);
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_XOR_B32;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..1] of PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUInt32);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUInt32);
|
|
|
|
OpBitwiseXor(dst,src[0],src[1]);
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_SH(OpId:DWORD;rtype:TsrDataType);
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..1] of PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,rtype);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32);
|
|
|
|
src[1]:=OpBitwiseAndTo(src[1],31);
|
|
src[1]^.PrepType(ord(dtUInt32));
|
|
|
|
Op2(OpId,src[0]^.dtype,dst,src[0],src[1]);
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_SHREV(OpId:DWORD;rtype:TsrDataType);
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..1] of PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,rtype);
|
|
|
|
src[0]:=OpBitwiseAndTo(src[0],31);
|
|
src[0]^.PrepType(ord(dtUInt32));
|
|
|
|
Op2(OpId,src[1]^.dtype,dst,src[1],src[0]);
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_ADD_I32; //vdst = vsrc0.s + vsrc1.s; sdst[thread_id:] = carry_out & EXEC
|
|
Var
|
|
dst,car:PsrRegSlot;
|
|
src:array[0..1] of PsrRegNode;
|
|
exc:PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
car:=get_vcc0;
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32);
|
|
|
|
OpIAddExt(dst,car,src[0],src[1]);
|
|
|
|
exc:=MakeRead(get_exec0,dtUnknow);
|
|
OpBitwiseAnd(car,car^.current,exc); //carry_out & EXEC
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_SUB_I32; //vdst = vsrc0.u - vsub.u; sdst[thread_id:] = borrow_out & EXEC
|
|
Var
|
|
dst,bor:PsrRegSlot;
|
|
src:array[0..1] of PsrRegNode;
|
|
exc:PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
bor:=get_vcc0;
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32);
|
|
|
|
OpISubExt(dst,bor,src[0],src[1]);
|
|
|
|
exc:=MakeRead(get_exec0,dtUnknow);
|
|
OpBitwiseAnd(bor,bor^.current,exc); //borrow_out & EXEC
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_SUBREV_I32; //vdst = vsrc1.u - vsub.u; sdst[thread_id:] = borrow_out & EXEC
|
|
Var
|
|
dst,bor:PsrRegSlot;
|
|
src:array[0..1] of PsrRegNode;
|
|
exc:PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
bor:=get_vcc0;
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUint32);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUint32);
|
|
|
|
OpISubExt(dst,bor,src[1],src[0]);
|
|
|
|
exc:=MakeRead(get_exec0,dtUnknow);
|
|
OpBitwiseAnd(bor,bor^.current,exc); //borrow_out & EXEC
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V2_F32(OpId:DWORD);
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..1] of PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtFloat32);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtFloat32);
|
|
|
|
Op2(OpId,dtFloat32,dst,src[0],src[1]);
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_SUBREV_F32;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..1] of PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtFloat32);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtFloat32);
|
|
|
|
Op2(Op.OpFSub,dtFloat32,dst,src[1],src[0]);
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_CVT_PKRTZ_F16_F32;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..1] of PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtFloat32);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtFloat32);
|
|
|
|
OpConvFloatToHalf2(dst,src[0],src[1]);
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_MUL_I32_I24;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..1] of PsrRegNode;
|
|
bit24:PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtInt32);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtInt32);
|
|
|
|
bit24:=NewReg_q(dtUInt32,$FFFFFF);
|
|
|
|
src[0]:=OpBitwiseAndTo(src[0],bit24);
|
|
src[0]^.PrepType(ord(dtInt32));
|
|
|
|
src[1]:=OpBitwiseAndTo(src[1],bit24);
|
|
src[1]^.PrepType(ord(dtInt32));
|
|
|
|
OpIMul(dst,src[0],src[1]);
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_MUL_U32_U24;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..1] of PsrRegNode;
|
|
bit24:PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtUInt32);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtUInt32);
|
|
|
|
bit24:=NewReg_q(dtUInt32,$FFFFFF);
|
|
|
|
src[0]:=OpBitwiseAndTo(src[0],bit24);
|
|
src[0]^.PrepType(ord(dtUInt32));
|
|
|
|
src[1]:=OpBitwiseAndTo(src[1],bit24);
|
|
src[1]^.PrepType(ord(dtUInt32));
|
|
|
|
OpIMul(dst,src[0],src[1]);
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_MAC_F32; //vdst = vsrc0.f * vsrc1.f + vdst.f -> fma
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..2] of PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtFloat32);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtFloat32);
|
|
src[2]:=MakeRead(dst,dtFloat32);
|
|
|
|
OpFmaF32(dst,src[0],src[1],src[2]);
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_MADAK_F32; //vdst = vsrc0.f * vsrc1.f + kadd.f
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..2] of PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtFloat32);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtFloat32);
|
|
src[2]:=NewReg_q(dtFloat32,FSPI.INLINE32);
|
|
|
|
OpFmaF32(dst,src[0],src[1],src[2]);
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_MADMK_F32; //vdst = vsrc0.f * kmul.f + vadd.f
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..2] of PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,dtFloat32);
|
|
src[1]:=NewReg_q(dtFloat32,FSPI.INLINE32);
|
|
src[2]:=fetch_vsrc8(FSPI.VOP2.VSRC1,dtFloat32);
|
|
|
|
OpFmaF32(dst,src[0],src[1],src[2]);
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_V_MMX(OpId:DWORD;rtype:TsrDataType);
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..1] of PsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP2.VDST);
|
|
|
|
src[0]:=fetch_ssrc9(FSPI.VOP2.SRC0 ,rtype);
|
|
src[1]:=fetch_vsrc8(FSPI.VOP2.VSRC1,rtype);
|
|
|
|
OpGlsl2(OpId,rtype,dst,src[0],src[1]);
|
|
end;
|
|
|
|
procedure TEmit_VOP2.emit_VOP2;
|
|
begin
|
|
|
|
Case FSPI.VOP2.OP of
|
|
|
|
V_CNDMASK_B32: emit_V_CNDMASK_B32;
|
|
|
|
V_AND_B32 : emit_V_AND_B32;
|
|
V_OR_B32 : emit_V_OR_B32;
|
|
V_XOR_B32 : emit_V_XOR_B32;
|
|
|
|
V_LSHL_B32 : emit_V_SH(Op.OpShiftLeftLogical,dtUint32);
|
|
V_LSHLREV_B32: emit_V_SHREV(Op.OpShiftLeftLogical,dtUint32);
|
|
V_LSHR_B32 : emit_V_SH(Op.OpShiftRightLogical,dtUint32);
|
|
V_LSHRREV_B32: emit_V_SHREV(Op.OpShiftRightLogical,dtUint32);
|
|
V_ASHR_I32 : emit_V_SH(Op.OpShiftRightLogical,dtInt32);
|
|
V_ASHRREV_I32: emit_V_SHREV(Op.OpShiftRightLogical,dtInt32);
|
|
|
|
V_ADD_I32 : emit_V_ADD_I32;
|
|
V_SUB_I32 : emit_V_SUB_I32;
|
|
V_SUBREV_I32 : emit_V_SUBREV_I32;
|
|
|
|
V_ADD_F32 : emit_V2_F32(Op.OpFAdd);
|
|
V_SUB_F32 : emit_V2_F32(Op.OpFSub);
|
|
V_SUBREV_F32 : emit_V_SUBREV_F32;
|
|
|
|
V_MUL_F32 :emit_V2_F32(Op.OpFMul);
|
|
|
|
V_CVT_PKRTZ_F16_F32: emit_V_CVT_PKRTZ_F16_F32;
|
|
|
|
V_MUL_I32_I24: emit_V_MUL_I32_I24;
|
|
V_MUL_U32_U24: emit_V_MUL_U32_U24;
|
|
|
|
V_MAC_F32 : emit_V_MAC_F32;
|
|
V_MADAK_F32: emit_V_MADAK_F32;
|
|
V_MADMK_F32: emit_V_MADMK_F32;
|
|
|
|
V_MIN_LEGACY_F32: emit_V_MMX(GlslOp.NMin,dtFloat32);
|
|
V_MAX_LEGACY_F32: emit_V_MMX(GlslOp.NMax,dtFloat32);
|
|
|
|
V_MIN_F32: emit_V_MMX(GlslOp.FMin,dtFloat32);
|
|
V_MAX_F32: emit_V_MMX(GlslOp.FMax,dtFloat32);
|
|
|
|
V_MIN_I32: emit_V_MMX(GlslOp.SMin,dtInt32);
|
|
V_MAX_I32: emit_V_MMX(GlslOp.SMax,dtInt32);
|
|
|
|
V_MIN_U32: emit_V_MMX(GlslOp.UMin,dtUint32);
|
|
V_MAX_U32: emit_V_MMX(GlslOp.UMax,dtUint32);
|
|
|
|
else
|
|
Assert(false,'VOP2?'+IntToStr(FSPI.VOP2.OP));
|
|
end;
|
|
|
|
end;
|
|
|
|
end.
|
|
|