mirror of https://github.com/red-prig/fpPS4.git
554 lines
11 KiB
Plaintext
554 lines
11 KiB
Plaintext
unit emit_VOP1;
|
|
|
|
{$mode objfpc}{$H+}
|
|
|
|
interface
|
|
|
|
uses
|
|
sysutils,
|
|
ps4_pssl,
|
|
spirv,
|
|
srNode,
|
|
srType,
|
|
srTypes,
|
|
srReg,
|
|
srConst,
|
|
emit_fetch;
|
|
|
|
type
|
|
TEmit_VOP1=class(TEmitFetch)
|
|
procedure emit_VOP1;
|
|
procedure emit_V_MOV_B32;
|
|
procedure emit_V_READFIRSTLANE_B32;
|
|
procedure emit_V_MOVRELS_B32;
|
|
procedure emit_V_MOVRELD_B32;
|
|
procedure emit_V_CVT(OpId:DWORD;dst_type,src_type:TsrDataType);
|
|
procedure emit_V_CVT_F16_F32;
|
|
procedure emit_V_CVT_F32_F16;
|
|
procedure emit_V_CVT_OFF_F32_I4;
|
|
procedure emit_V_CVT_FLR_I32_F32;
|
|
procedure emit_V_CVT_RPI_I32_F32;
|
|
procedure emit_V_CVT_F32_UBYTE0;
|
|
procedure emit_V_CVT_F32_UBYTE1;
|
|
procedure emit_V_CVT_F32_UBYTE2;
|
|
procedure emit_V_CVT_F32_UBYTE3;
|
|
procedure emit_V_EXT_F32(OpId:DWORD);
|
|
procedure emit_V_RSQ_CLAMP_F32;
|
|
procedure emit_V_SIN_COS(OpId:DWORD);
|
|
procedure emit_V_NOT_B32;
|
|
procedure emit_V_RCP_F32;
|
|
procedure emit_V_FFBH_U32;
|
|
procedure emit_V_FFBL_B32;
|
|
procedure emit_V_BFREV_B32;
|
|
end;
|
|
|
|
implementation
|
|
|
|
uses
|
|
srPrivate;
|
|
|
|
procedure TEmit_VOP1.emit_V_MOV_B32;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUnknow);
|
|
MakeCopy(dst,src);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_READFIRSTLANE_B32; //sdst, vsrc
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:PsrRegSlot;
|
|
reg:TsrRegNode;
|
|
begin
|
|
//TODO: V_READFIRSTLANE_B32
|
|
//
|
|
dst:=get_sdst8(FSPI.VOP1.VDST); //NOTE: SDST
|
|
src:=get_ssrc9(FSPI.VOP1.SRC0);
|
|
|
|
if (src^.Category=cVectorArray) then
|
|
begin
|
|
Assert(false,'TODO: arrayed V_READFIRSTLANE_B32');
|
|
end;
|
|
|
|
reg:=MakeRead(src,dtUnknow);
|
|
|
|
MakeCopy(dst,reg);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_MOVRELS_B32; //vdst = VGPR[vgpr_index_of(vsrc) + M0.u] OOB:VGPR0
|
|
Var
|
|
i,vmin,vmax:WORD;
|
|
|
|
priv:TsrPrivate;
|
|
iType:TsrType;
|
|
idx:TsrRegNode;
|
|
pChain:TsrNode;
|
|
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
begin
|
|
idx:=MakeRead(get_m0,dtUnknow);
|
|
|
|
idx:=RegDown(idx);
|
|
|
|
if idx.is_const then
|
|
begin
|
|
i:=idx.AsConst.AsInt32;
|
|
|
|
dst:=get_vdst8 (FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0+i,dtUnknow); //vgpr_index_of(vsrc) + M0.u
|
|
MakeCopy(dst,src);
|
|
|
|
end else
|
|
begin
|
|
|
|
vmin:=FSPI.VOP1.SRC0-256;
|
|
vmax:=FVGPRS;
|
|
Assert(vmin<vmax);
|
|
|
|
priv :=PrivateList.FetchArray(dtFloat32,vmax-vmin);
|
|
|
|
iType:=TypeList.Fetch(dtFloat32);
|
|
|
|
//load
|
|
for i:=vmin to vmax-1 do
|
|
begin
|
|
idx:=NewImm_i(dtInt32,(i-vmin));
|
|
|
|
pChain:=OpAccessChainTo(iType,priv.pVar,idx);
|
|
|
|
src:=fetch_vsrc8(i,dtFloat32);
|
|
|
|
OpStore(line,pChain,src);
|
|
end;
|
|
//load
|
|
|
|
idx:=MakeRead(get_m0,dtInt32);
|
|
|
|
pChain:=OpAccessChainTo(iType,priv.pVar,idx);
|
|
|
|
src:=OpLoadTo(iType,pChain);
|
|
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
|
|
MakeCopy(dst,src);
|
|
|
|
end;
|
|
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_MOVRELD_B32; //VGPR[vgpr_index_of(vdst0) + M0.u] = vsrc OOB:discard
|
|
Var
|
|
i,vmin,vmax:WORD;
|
|
|
|
priv:TsrPrivate;
|
|
iType:TsrType;
|
|
idx:TsrRegNode;
|
|
pChain:TsrNode;
|
|
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
|
|
pCache:array[0..255] of TsrNode;
|
|
begin
|
|
idx:=MakeRead(get_m0,dtUnknow);
|
|
|
|
idx:=RegDown(idx);
|
|
|
|
if idx.is_const then
|
|
begin
|
|
i:=idx.AsConst.AsInt32;
|
|
|
|
dst:=get_vdst8 (FSPI.VOP1.VDST+i); //vgpr_index_of(vdst0) + M0.u
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUnknow);
|
|
MakeCopy(dst,src);
|
|
|
|
end else
|
|
begin
|
|
|
|
vmin:=FSPI.VOP1.VDST;
|
|
vmax:=FVGPRS;
|
|
Assert(vmin<vmax);
|
|
|
|
priv :=PrivateList.FetchArray(dtFloat32,vmax-vmin);
|
|
|
|
iType:=TypeList.Fetch(dtFloat32);
|
|
|
|
//load
|
|
for i:=vmin to vmax-1 do
|
|
begin
|
|
idx:=NewImm_i(dtInt32,(i-vmin));
|
|
|
|
pChain:=OpAccessChainTo(iType,priv.pVar,idx);
|
|
|
|
pCache[i]:=pChain;
|
|
|
|
src:=fetch_vsrc8(i,dtFloat32);
|
|
|
|
OpStore(line,pChain,src);
|
|
end;
|
|
//load
|
|
|
|
idx:=MakeRead(get_m0,dtInt32);
|
|
|
|
pChain:=OpAccessChainTo(iType,priv.pVar,idx);
|
|
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
|
|
OpStore(line,pChain,src);
|
|
|
|
//save
|
|
for i:=vmin to vmax-1 do
|
|
begin
|
|
pChain:=pCache[i];
|
|
|
|
src:=OpLoadTo(iType,pChain);
|
|
|
|
dst:=get_vdst8(i);
|
|
|
|
MakeCopy(dst,src);
|
|
end;
|
|
//save
|
|
|
|
end;
|
|
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_CVT(OpId:DWORD;dst_type,src_type:TsrDataType);
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,src_type);
|
|
Op1(OpId,dst_type,dst,src);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_CVT_F16_F32; //vdst[15:0].hf = ConvertFloatToHalfFloat(vsrc.f)
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:array[0..1] of TsrRegNode;
|
|
dstv:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src[0]:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
|
|
|
|
src[0]:=OpFToF(src[0],dtHalf16);
|
|
src[1]:=NewImm_s(dtHalf16,0);
|
|
|
|
dstv:=OpMakeVec(line,dtVec2h,@src);
|
|
|
|
dst^.New(dtVec2h).pWriter:=dstv;
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_CVT_F32_F16; //vdst.f = ConvertHalfFloatToFloat(vsrc[15:0].hf)
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
dst0:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtVec2h{dtUnknow});
|
|
|
|
//src:=OpBitwiseAndTo(src,$FFFF);
|
|
//src^.PrepType(ord(dtHalf16));
|
|
|
|
dst0:=NewReg(dtHalf16);
|
|
OpExtract(line,dst0,src,0);
|
|
|
|
Op1(Op.OpFConvert,dtFloat32,dst,{src}dst0);
|
|
end;
|
|
|
|
//V_CVT_OFF_F32_I4
|
|
//([0..3]-8)/16
|
|
procedure TEmit_VOP1.emit_V_CVT_OFF_F32_I4;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
num_16:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUInt32);
|
|
|
|
src:=OpAndTo(src,15);
|
|
src.PrepType(ord(dtInt32));
|
|
|
|
src:=OpISubTo(src,8);
|
|
|
|
src:=OpSToF(src,dtFloat32);
|
|
|
|
num_16:=NewImm_s(dtFloat32,16);
|
|
Op2(Op.OpFDiv,dtFloat32,dst,src,num_16);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_CVT_FLR_I32_F32; //ConvertFloatToSignedInt(floor(vsrc.f))
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
|
|
|
|
src:=OpFloorTo(src);
|
|
|
|
Op1(Op.OpConvertFToS,dtInt32,dst,src);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_CVT_RPI_I32_F32; //ConvertFloatToSignedInt(floor(vsrc.f+0.5))
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
|
|
|
|
src:=OpFAddToS(src,0.5);
|
|
src:=OpFloorTo(src);
|
|
|
|
Op1(Op.OpConvertFToS,dtInt32,dst,src);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_CVT_F32_UBYTE0;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUInt32);
|
|
|
|
src:=OpAndTo(src,$FF);
|
|
src.PrepType(ord(dtUInt32));
|
|
|
|
Op1(Op.OpConvertUToF,dtFloat32,dst,src);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_CVT_F32_UBYTE1;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUInt32);
|
|
|
|
src:=OpShrTo(src,8);
|
|
src:=OpAndTo(src,$FF);
|
|
src.PrepType(ord(dtUInt32));
|
|
|
|
Op1(Op.OpConvertUToF,dtFloat32,dst,src);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_CVT_F32_UBYTE2;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUInt32);
|
|
|
|
src:=OpShrTo(src,16);
|
|
src:=OpAndTo(src,$FF);
|
|
src.PrepType(ord(dtUInt32));
|
|
|
|
Op1(Op.OpConvertUToF,dtFloat32,dst,src);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_CVT_F32_UBYTE3;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUInt32);
|
|
|
|
src:=OpShrTo(src,24);
|
|
src:=OpAndTo(src,$FF);
|
|
src.PrepType(ord(dtUInt32));
|
|
|
|
Op1(Op.OpConvertUToF,dtFloat32,dst,src);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_EXT_F32(OpId:DWORD);
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
|
|
OpGlsl1(OpId,dtFloat32,dst,src);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_RSQ_CLAMP_F32;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
flt:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
|
|
OpGlsl1(GlslOp.InverseSqrt,dtFloat32,dst,src);
|
|
|
|
src:=MakeRead(dst,dtFloat32);
|
|
flt:=NewImm_s(dtFloat32,FLT_MAX);
|
|
|
|
OpGlsl2(GlslOp.NMin,dtFloat32,dst,src,flt);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_SIN_COS(OpId:DWORD);
|
|
const
|
|
PI2:Single=2*PI;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
|
|
|
|
src:=OpFMulToS(src,PI2);
|
|
|
|
OpGlsl1(OpId,dtFloat32,dst,src);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_NOT_B32;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUnknow);
|
|
|
|
OpNot(dst,src);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_RCP_F32;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
one:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
|
|
|
|
one:=NewImm_s(dtFloat32,1);
|
|
|
|
Op2(Op.OpFDiv,dtFloat32,dst,one,src);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_FFBH_U32;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
msb:TsrRegNode;
|
|
pos:TsrRegNode;
|
|
cnd:TsrRegNode;
|
|
begin
|
|
// Gcn wants the MSB position counting from the left, but SPIR-V counts from the rightmost (LSB)
|
|
// position
|
|
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUint32);
|
|
|
|
msb:=NewReg(dtUint32);
|
|
_OpGlsl1(line,GlslOp.FindUMsb,msb,src);
|
|
|
|
pos:=OpISubTo(NewImm_i(dtUint32,31),msb);
|
|
|
|
// Select 0xFFFFFFFF if src was 0
|
|
cnd:=OpINotEqualTo(src,NewImm_i(dtUint32,0));
|
|
|
|
//dst,cond,src_true,src_false
|
|
OpSelect(dst,cnd,pos,NewImm_q(dtUint32,High(DWORD)));
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_FFBL_B32;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtInt32);
|
|
|
|
OpGlsl1(GlslOp.FindILsb,dtInt32,dst,src);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_V_BFREV_B32;
|
|
Var
|
|
dst:PsrRegSlot;
|
|
src:TsrRegNode;
|
|
begin
|
|
dst:=get_vdst8(FSPI.VOP1.VDST);
|
|
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUInt32);
|
|
|
|
Op1(Op.OpBitReverse,dtUInt32,dst,src);
|
|
end;
|
|
|
|
procedure TEmit_VOP1.emit_VOP1;
|
|
begin
|
|
|
|
Case FSPI.VOP1.OP of
|
|
|
|
V_NOP:;
|
|
|
|
V_MOV_B32: emit_V_MOV_B32;
|
|
|
|
V_READFIRSTLANE_B32: emit_V_READFIRSTLANE_B32;
|
|
|
|
V_CVT_F32_I32: emit_V_CVT(Op.OpConvertSToF,dtFloat32,dtInt32);
|
|
V_CVT_F32_U32: emit_V_CVT(Op.OpConvertUToF,dtFloat32,dtUInt32);
|
|
V_CVT_U32_F32: emit_V_CVT(Op.OpConvertFToU,dtUInt32 ,dtFloat32);
|
|
V_CVT_I32_F32: emit_V_CVT(Op.OpConvertFToS,dtInt32 ,dtFloat32);
|
|
|
|
V_CVT_F16_F32: emit_V_CVT_F16_F32;
|
|
V_CVT_F32_F16: emit_V_CVT_F32_F16;
|
|
|
|
V_CVT_OFF_F32_I4: emit_V_CVT_OFF_F32_I4;
|
|
|
|
V_CVT_FLR_I32_F32: emit_V_CVT_FLR_I32_F32;
|
|
|
|
V_CVT_RPI_I32_F32: emit_V_CVT_RPI_I32_F32;
|
|
|
|
V_CVT_F32_UBYTE0: emit_V_CVT_F32_UBYTE0;
|
|
V_CVT_F32_UBYTE1: emit_V_CVT_F32_UBYTE1;
|
|
V_CVT_F32_UBYTE2: emit_V_CVT_F32_UBYTE2;
|
|
V_CVT_F32_UBYTE3: emit_V_CVT_F32_UBYTE3;
|
|
|
|
V_FRACT_F32: emit_V_EXT_F32(GlslOp.Fract);
|
|
V_TRUNC_F32: emit_V_EXT_F32(GlslOp.Trunc);
|
|
V_CEIL_F32 : emit_V_EXT_F32(GlslOp.Ceil);
|
|
|
|
V_RNDNE_F32: emit_V_EXT_F32(GlslOp.RoundEven);
|
|
V_FLOOR_F32: emit_V_EXT_F32(GlslOp.Floor);
|
|
V_EXP_F32 : emit_V_EXT_F32(GlslOp.Exp2);
|
|
V_LOG_F32 : emit_V_EXT_F32(GlslOp.Log2);
|
|
|
|
V_RSQ_F32 : emit_V_EXT_F32(GlslOp.InverseSqrt);
|
|
V_RSQ_CLAMP_F32: emit_V_RSQ_CLAMP_F32;
|
|
|
|
V_SQRT_F32 : emit_V_EXT_F32(GlslOp.Sqrt);
|
|
|
|
V_SIN_F32 : emit_V_SIN_COS(GlslOp.Sin);
|
|
V_COS_F32 : emit_V_SIN_COS(GlslOp.Cos);
|
|
|
|
V_NOT_B32 : emit_V_NOT_B32;
|
|
|
|
V_RCP_F32 : emit_V_RCP_F32;
|
|
V_RCP_IFLAG_F32: emit_V_RCP_F32;
|
|
|
|
V_FFBH_U32 : emit_V_FFBH_U32;
|
|
V_FFBL_B32 : emit_V_FFBL_B32;
|
|
|
|
V_BFREV_B32: emit_V_BFREV_B32;
|
|
|
|
V_MOVRELS_B32: emit_V_MOVRELS_B32;
|
|
V_MOVRELD_B32: emit_V_MOVRELD_B32;
|
|
|
|
else
|
|
Assert(false,'VOP1?'+IntToStr(FSPI.VOP1.OP)+' '+get_str_spi(FSPI));
|
|
end;
|
|
|
|
end;
|
|
|
|
end.
|
|
|