FPPS4/spirv/emit_vop1.pas

554 lines
11 KiB
Plaintext

unit emit_VOP1;
{$mode objfpc}{$H+}
interface
uses
sysutils,
ps4_pssl,
spirv,
srNode,
srType,
srTypes,
srReg,
srConst,
emit_fetch;
type
TEmit_VOP1=class(TEmitFetch)
procedure emit_VOP1;
procedure emit_V_MOV_B32;
procedure emit_V_READFIRSTLANE_B32;
procedure emit_V_MOVRELS_B32;
procedure emit_V_MOVRELD_B32;
procedure emit_V_CVT(OpId:DWORD;dst_type,src_type:TsrDataType);
procedure emit_V_CVT_F16_F32;
procedure emit_V_CVT_F32_F16;
procedure emit_V_CVT_OFF_F32_I4;
procedure emit_V_CVT_FLR_I32_F32;
procedure emit_V_CVT_RPI_I32_F32;
procedure emit_V_CVT_F32_UBYTE0;
procedure emit_V_CVT_F32_UBYTE1;
procedure emit_V_CVT_F32_UBYTE2;
procedure emit_V_CVT_F32_UBYTE3;
procedure emit_V_EXT_F32(OpId:DWORD);
procedure emit_V_RSQ_CLAMP_F32;
procedure emit_V_SIN_COS(OpId:DWORD);
procedure emit_V_NOT_B32;
procedure emit_V_RCP_F32;
procedure emit_V_FFBH_U32;
procedure emit_V_FFBL_B32;
procedure emit_V_BFREV_B32;
end;
implementation
uses
srPrivate;
procedure TEmit_VOP1.emit_V_MOV_B32;
Var
dst:PsrRegSlot;
src:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUnknow);
MakeCopy(dst,src);
end;
procedure TEmit_VOP1.emit_V_READFIRSTLANE_B32; //sdst, vsrc
Var
dst:PsrRegSlot;
src:PsrRegSlot;
reg:TsrRegNode;
begin
//TODO: V_READFIRSTLANE_B32
//
dst:=get_sdst8(FSPI.VOP1.VDST); //NOTE: SDST
src:=get_ssrc9(FSPI.VOP1.SRC0);
if (src^.Category=cVectorArray) then
begin
Assert(false,'TODO: arrayed V_READFIRSTLANE_B32');
end;
reg:=MakeRead(src,dtUnknow);
MakeCopy(dst,reg);
end;
procedure TEmit_VOP1.emit_V_MOVRELS_B32; //vdst = VGPR[vgpr_index_of(vsrc) + M0.u] OOB:VGPR0
Var
i,vmin,vmax:WORD;
priv:TsrPrivate;
iType:TsrType;
idx:TsrRegNode;
pChain:TsrNode;
dst:PsrRegSlot;
src:TsrRegNode;
begin
idx:=MakeRead(get_m0,dtUnknow);
idx:=RegDown(idx);
if idx.is_const then
begin
i:=idx.AsConst.AsInt32;
dst:=get_vdst8 (FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0+i,dtUnknow); //vgpr_index_of(vsrc) + M0.u
MakeCopy(dst,src);
end else
begin
vmin:=FSPI.VOP1.SRC0-256;
vmax:=FVGPRS;
Assert(vmin<vmax);
priv :=PrivateList.FetchArray(dtFloat32,vmax-vmin);
iType:=TypeList.Fetch(dtFloat32);
//load
for i:=vmin to vmax-1 do
begin
idx:=NewImm_i(dtInt32,(i-vmin));
pChain:=OpAccessChainTo(iType,priv.pVar,idx);
src:=fetch_vsrc8(i,dtFloat32);
OpStore(line,pChain,src);
end;
//load
idx:=MakeRead(get_m0,dtInt32);
pChain:=OpAccessChainTo(iType,priv.pVar,idx);
src:=OpLoadTo(iType,pChain);
dst:=get_vdst8(FSPI.VOP1.VDST);
MakeCopy(dst,src);
end;
end;
procedure TEmit_VOP1.emit_V_MOVRELD_B32; //VGPR[vgpr_index_of(vdst0) + M0.u] = vsrc OOB:discard
Var
i,vmin,vmax:WORD;
priv:TsrPrivate;
iType:TsrType;
idx:TsrRegNode;
pChain:TsrNode;
dst:PsrRegSlot;
src:TsrRegNode;
pCache:array[0..255] of TsrNode;
begin
idx:=MakeRead(get_m0,dtUnknow);
idx:=RegDown(idx);
if idx.is_const then
begin
i:=idx.AsConst.AsInt32;
dst:=get_vdst8 (FSPI.VOP1.VDST+i); //vgpr_index_of(vdst0) + M0.u
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUnknow);
MakeCopy(dst,src);
end else
begin
vmin:=FSPI.VOP1.VDST;
vmax:=FVGPRS;
Assert(vmin<vmax);
priv :=PrivateList.FetchArray(dtFloat32,vmax-vmin);
iType:=TypeList.Fetch(dtFloat32);
//load
for i:=vmin to vmax-1 do
begin
idx:=NewImm_i(dtInt32,(i-vmin));
pChain:=OpAccessChainTo(iType,priv.pVar,idx);
pCache[i]:=pChain;
src:=fetch_vsrc8(i,dtFloat32);
OpStore(line,pChain,src);
end;
//load
idx:=MakeRead(get_m0,dtInt32);
pChain:=OpAccessChainTo(iType,priv.pVar,idx);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
OpStore(line,pChain,src);
//save
for i:=vmin to vmax-1 do
begin
pChain:=pCache[i];
src:=OpLoadTo(iType,pChain);
dst:=get_vdst8(i);
MakeCopy(dst,src);
end;
//save
end;
end;
procedure TEmit_VOP1.emit_V_CVT(OpId:DWORD;dst_type,src_type:TsrDataType);
Var
dst:PsrRegSlot;
src:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,src_type);
Op1(OpId,dst_type,dst,src);
end;
procedure TEmit_VOP1.emit_V_CVT_F16_F32; //vdst[15:0].hf = ConvertFloatToHalfFloat(vsrc.f)
Var
dst:PsrRegSlot;
src:array[0..1] of TsrRegNode;
dstv:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src[0]:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
src[0]:=OpFToF(src[0],dtHalf16);
src[1]:=NewImm_s(dtHalf16,0);
dstv:=OpMakeVec(line,dtVec2h,@src);
dst^.New(dtVec2h).pWriter:=dstv;
end;
procedure TEmit_VOP1.emit_V_CVT_F32_F16; //vdst.f = ConvertHalfFloatToFloat(vsrc[15:0].hf)
Var
dst:PsrRegSlot;
src:TsrRegNode;
dst0:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtVec2h{dtUnknow});
//src:=OpBitwiseAndTo(src,$FFFF);
//src^.PrepType(ord(dtHalf16));
dst0:=NewReg(dtHalf16);
OpExtract(line,dst0,src,0);
Op1(Op.OpFConvert,dtFloat32,dst,{src}dst0);
end;
//V_CVT_OFF_F32_I4
//([0..3]-8)/16
procedure TEmit_VOP1.emit_V_CVT_OFF_F32_I4;
Var
dst:PsrRegSlot;
src:TsrRegNode;
num_16:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUInt32);
src:=OpAndTo(src,15);
src.PrepType(ord(dtInt32));
src:=OpISubTo(src,8);
src:=OpSToF(src,dtFloat32);
num_16:=NewImm_s(dtFloat32,16);
Op2(Op.OpFDiv,dtFloat32,dst,src,num_16);
end;
procedure TEmit_VOP1.emit_V_CVT_FLR_I32_F32; //ConvertFloatToSignedInt(floor(vsrc.f))
Var
dst:PsrRegSlot;
src:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
src:=OpFloorTo(src);
Op1(Op.OpConvertFToS,dtInt32,dst,src);
end;
procedure TEmit_VOP1.emit_V_CVT_RPI_I32_F32; //ConvertFloatToSignedInt(floor(vsrc.f+0.5))
Var
dst:PsrRegSlot;
src:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
src:=OpFAddToS(src,0.5);
src:=OpFloorTo(src);
Op1(Op.OpConvertFToS,dtInt32,dst,src);
end;
procedure TEmit_VOP1.emit_V_CVT_F32_UBYTE0;
Var
dst:PsrRegSlot;
src:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUInt32);
src:=OpAndTo(src,$FF);
src.PrepType(ord(dtUInt32));
Op1(Op.OpConvertUToF,dtFloat32,dst,src);
end;
procedure TEmit_VOP1.emit_V_CVT_F32_UBYTE1;
Var
dst:PsrRegSlot;
src:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUInt32);
src:=OpShrTo(src,8);
src:=OpAndTo(src,$FF);
src.PrepType(ord(dtUInt32));
Op1(Op.OpConvertUToF,dtFloat32,dst,src);
end;
procedure TEmit_VOP1.emit_V_CVT_F32_UBYTE2;
Var
dst:PsrRegSlot;
src:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUInt32);
src:=OpShrTo(src,16);
src:=OpAndTo(src,$FF);
src.PrepType(ord(dtUInt32));
Op1(Op.OpConvertUToF,dtFloat32,dst,src);
end;
procedure TEmit_VOP1.emit_V_CVT_F32_UBYTE3;
Var
dst:PsrRegSlot;
src:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUInt32);
src:=OpShrTo(src,24);
src:=OpAndTo(src,$FF);
src.PrepType(ord(dtUInt32));
Op1(Op.OpConvertUToF,dtFloat32,dst,src);
end;
procedure TEmit_VOP1.emit_V_EXT_F32(OpId:DWORD);
Var
dst:PsrRegSlot;
src:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
OpGlsl1(OpId,dtFloat32,dst,src);
end;
procedure TEmit_VOP1.emit_V_RSQ_CLAMP_F32;
Var
dst:PsrRegSlot;
src:TsrRegNode;
flt:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
OpGlsl1(GlslOp.InverseSqrt,dtFloat32,dst,src);
src:=MakeRead(dst,dtFloat32);
flt:=NewImm_s(dtFloat32,FLT_MAX);
OpGlsl2(GlslOp.NMin,dtFloat32,dst,src,flt);
end;
procedure TEmit_VOP1.emit_V_SIN_COS(OpId:DWORD);
const
PI2:Single=2*PI;
Var
dst:PsrRegSlot;
src:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
src:=OpFMulToS(src,PI2);
OpGlsl1(OpId,dtFloat32,dst,src);
end;
procedure TEmit_VOP1.emit_V_NOT_B32;
Var
dst:PsrRegSlot;
src:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUnknow);
OpNot(dst,src);
end;
procedure TEmit_VOP1.emit_V_RCP_F32;
Var
dst:PsrRegSlot;
src:TsrRegNode;
one:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtFloat32);
one:=NewImm_s(dtFloat32,1);
Op2(Op.OpFDiv,dtFloat32,dst,one,src);
end;
procedure TEmit_VOP1.emit_V_FFBH_U32;
Var
dst:PsrRegSlot;
src:TsrRegNode;
msb:TsrRegNode;
pos:TsrRegNode;
cnd:TsrRegNode;
begin
// Gcn wants the MSB position counting from the left, but SPIR-V counts from the rightmost (LSB)
// position
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUint32);
msb:=NewReg(dtUint32);
_OpGlsl1(line,GlslOp.FindUMsb,msb,src);
pos:=OpISubTo(NewImm_i(dtUint32,31),msb);
// Select 0xFFFFFFFF if src was 0
cnd:=OpINotEqualTo(src,NewImm_i(dtUint32,0));
//dst,cond,src_true,src_false
OpSelect(dst,cnd,pos,NewImm_q(dtUint32,High(DWORD)));
end;
procedure TEmit_VOP1.emit_V_FFBL_B32;
Var
dst:PsrRegSlot;
src:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtInt32);
OpGlsl1(GlslOp.FindILsb,dtInt32,dst,src);
end;
procedure TEmit_VOP1.emit_V_BFREV_B32;
Var
dst:PsrRegSlot;
src:TsrRegNode;
begin
dst:=get_vdst8(FSPI.VOP1.VDST);
src:=fetch_ssrc9(FSPI.VOP1.SRC0,dtUInt32);
Op1(Op.OpBitReverse,dtUInt32,dst,src);
end;
procedure TEmit_VOP1.emit_VOP1;
begin
Case FSPI.VOP1.OP of
V_NOP:;
V_MOV_B32: emit_V_MOV_B32;
V_READFIRSTLANE_B32: emit_V_READFIRSTLANE_B32;
V_CVT_F32_I32: emit_V_CVT(Op.OpConvertSToF,dtFloat32,dtInt32);
V_CVT_F32_U32: emit_V_CVT(Op.OpConvertUToF,dtFloat32,dtUInt32);
V_CVT_U32_F32: emit_V_CVT(Op.OpConvertFToU,dtUInt32 ,dtFloat32);
V_CVT_I32_F32: emit_V_CVT(Op.OpConvertFToS,dtInt32 ,dtFloat32);
V_CVT_F16_F32: emit_V_CVT_F16_F32;
V_CVT_F32_F16: emit_V_CVT_F32_F16;
V_CVT_OFF_F32_I4: emit_V_CVT_OFF_F32_I4;
V_CVT_FLR_I32_F32: emit_V_CVT_FLR_I32_F32;
V_CVT_RPI_I32_F32: emit_V_CVT_RPI_I32_F32;
V_CVT_F32_UBYTE0: emit_V_CVT_F32_UBYTE0;
V_CVT_F32_UBYTE1: emit_V_CVT_F32_UBYTE1;
V_CVT_F32_UBYTE2: emit_V_CVT_F32_UBYTE2;
V_CVT_F32_UBYTE3: emit_V_CVT_F32_UBYTE3;
V_FRACT_F32: emit_V_EXT_F32(GlslOp.Fract);
V_TRUNC_F32: emit_V_EXT_F32(GlslOp.Trunc);
V_CEIL_F32 : emit_V_EXT_F32(GlslOp.Ceil);
V_RNDNE_F32: emit_V_EXT_F32(GlslOp.RoundEven);
V_FLOOR_F32: emit_V_EXT_F32(GlslOp.Floor);
V_EXP_F32 : emit_V_EXT_F32(GlslOp.Exp2);
V_LOG_F32 : emit_V_EXT_F32(GlslOp.Log2);
V_RSQ_F32 : emit_V_EXT_F32(GlslOp.InverseSqrt);
V_RSQ_CLAMP_F32: emit_V_RSQ_CLAMP_F32;
V_SQRT_F32 : emit_V_EXT_F32(GlslOp.Sqrt);
V_SIN_F32 : emit_V_SIN_COS(GlslOp.Sin);
V_COS_F32 : emit_V_SIN_COS(GlslOp.Cos);
V_NOT_B32 : emit_V_NOT_B32;
V_RCP_F32 : emit_V_RCP_F32;
V_RCP_IFLAG_F32: emit_V_RCP_F32;
V_FFBH_U32 : emit_V_FFBH_U32;
V_FFBL_B32 : emit_V_FFBL_B32;
V_BFREV_B32: emit_V_BFREV_B32;
V_MOVRELS_B32: emit_V_MOVRELS_B32;
V_MOVRELD_B32: emit_V_MOVRELD_B32;
else
Assert(false,'VOP1?'+IntToStr(FSPI.VOP1.OP)+' '+get_str_spi(FSPI));
end;
end;
end.