mirror of https://github.com/red-prig/fpPS4.git
517 lines
11 KiB
Plaintext
517 lines
11 KiB
Plaintext
unit emit_vbuf_store;
|
|
|
|
{$mode ObjFPC}{$H+}
|
|
|
|
interface
|
|
|
|
uses
|
|
sysutils,
|
|
spirv,
|
|
ps4_pssl,
|
|
srNode,
|
|
srType,
|
|
srReg,
|
|
srLayout,
|
|
emit_fetch,
|
|
srVBufInfo,
|
|
emit_vbuf_chain;
|
|
|
|
type
|
|
Tstore_cache=record
|
|
info:TBuf_info;
|
|
v:TvarChain;
|
|
elem_orig:TsrDataType;
|
|
elem_resl:TsrDataType;
|
|
elem_count:ptruint;
|
|
elm:array[0..3] of TsrRegNode;
|
|
end;
|
|
|
|
TEmit_vbuf_store=class(TEmitFetch)
|
|
procedure buf_store(info:TBuf_info);
|
|
function fetch_id(var lc:Tstore_cache;i:Byte):TsrRegNode;
|
|
function fetch_zero(var lc:Tstore_cache):TsrRegNode;
|
|
function fetch_one(var lc:Tstore_cache):TsrRegNode;
|
|
function ClampTo(src:TsrRegNode;min_s,max_s:Single):TsrRegNode;
|
|
function F32ToF10(reg:TsrRegNode):TsrRegNode;
|
|
function F32ToF11(reg:TsrRegNode):TsrRegNode;
|
|
procedure make_store_cv(var lc:Tstore_cache);
|
|
procedure make_store_ce(var lc:Tstore_cache);
|
|
procedure make_store_uv(var lc:Tstore_cache);
|
|
procedure make_store_ue(var lc:Tstore_cache);
|
|
procedure buf_store_cv(info:TBuf_info;v:TvarChain);
|
|
end;
|
|
|
|
implementation
|
|
|
|
procedure TEmit_vbuf_store.buf_store(info:TBuf_info);
|
|
var
|
|
v:TvarChain;
|
|
begin
|
|
v:=TEmit_vbuf_chain(TObject(Self)).get_chain(info);
|
|
|
|
if (v.vType=vcUniformVector) then
|
|
begin
|
|
//reset dst sel
|
|
info.dsel:=dst_sel_identity;
|
|
end else
|
|
begin
|
|
info.dsel:=get_reverse_dst_sel(info.dsel);
|
|
end;
|
|
|
|
buf_store_cv(info,v);
|
|
end;
|
|
|
|
function TEmit_vbuf_store.fetch_id(var lc:Tstore_cache;i:Byte):TsrRegNode;
|
|
begin
|
|
Result:=fetch_vdst8(FSPI.MUBUF.VDATA+i,lc.elem_resl);
|
|
if (Result=nil) then Assert(false);
|
|
end;
|
|
|
|
function TEmit_vbuf_store.fetch_zero(var lc:Tstore_cache):TsrRegNode;
|
|
begin
|
|
Result:=NewImm_q(lc.elem_resl,0);
|
|
end;
|
|
|
|
function TEmit_vbuf_store.fetch_one(var lc:Tstore_cache):TsrRegNode;
|
|
begin
|
|
if (lc.elem_resl=dtFloat32) then
|
|
begin
|
|
Result:=NewImm_s(lc.elem_resl,1);
|
|
end else
|
|
begin
|
|
Result:=NewImm_i(lc.elem_resl,1);
|
|
end;
|
|
end;
|
|
|
|
function Min(a,b:PtrUInt):PtrUInt; inline;
|
|
begin
|
|
if (a<b) then Result:=a else Result:=b;
|
|
end;
|
|
|
|
function TEmit_vbuf_store.ClampTo(src:TsrRegNode;min_s,max_s:Single):TsrRegNode;
|
|
var
|
|
min,max:TsrRegNode;
|
|
begin
|
|
Result:=NewReg(dtFloat32);
|
|
|
|
min:=NewImm_s(dtFloat32,min_s);
|
|
max:=NewImm_s(dtFloat32,max_s);
|
|
|
|
_OpGlsl3(line,GlslOp.FClamp,Result,src,min,max);
|
|
end;
|
|
|
|
{
|
|
|
|
uint F32ToF10(float f) {
|
|
uint i = *reinterpret_cast<uint*>(&f);
|
|
|
|
uint t1 = i & 0x7fffffff; // Non-sign bits
|
|
uint t3 = i & 0xff800000; // Exponent + sign
|
|
|
|
t1 = t1 >> 18; // Align mantissa on MSB
|
|
|
|
t1 = t1 - 0xE00; // Adjust bias
|
|
|
|
if (t3 < 0x38800000) t1 = 0; // Flush-to-zero
|
|
if (t3 > 0x47000000) t1 = 0x3FF; // Clamp-to-max
|
|
|
|
return t1;
|
|
}
|
|
|
|
uint F32ToF11(float f) {
|
|
uint i = *reinterpret_cast<uint*>(&f);
|
|
|
|
uint t1 = i & 0x7fffffff; // Non-sign bits
|
|
uint t3 = i & 0xff800000; // Exponent + sign
|
|
|
|
// [S|E|M]
|
|
//F32 [1|8|23]
|
|
//F16 [1|5|10] -> 13 -> 0x1C000 -> 0x7BFF -> 0x47000000
|
|
//F11 [0|6|5] -> 18 -> 0x1C00 -> 0x7FF -> 0x87000000
|
|
//F10 [0|5|5] -> 18 -> 0xE00 -> 0x3FF -> 0x47000000
|
|
|
|
t1 = t1 >> 18; // Align mantissa on MSB
|
|
|
|
t1 = t1 - 0x1C00; // Adjust bias
|
|
|
|
if (t3 < 0x38800000) t1 = 0; // Flush-to-zero
|
|
if (t3 > 0x87000000) t1 = 0x7FF; // Clamp-to-max
|
|
|
|
return t1;
|
|
}
|
|
|
|
//10_11_11
|
|
// RR GG BB
|
|
//high [10][11][11] low
|
|
|
|
//11_11_10
|
|
// RR GG BB
|
|
//high [11][11][10] low
|
|
|
|
}
|
|
|
|
function TEmit_vbuf_store.F32ToF10(reg:TsrRegNode):TsrRegNode;
|
|
var
|
|
i,t1,t3,cond:TsrRegNode;
|
|
begin
|
|
i:=BitcastList.FetchRead(dtUint32,reg);
|
|
|
|
t1:=OpAndTo(i,$7fffffff); // Non-sign bits
|
|
t3:=OpAndTo(i,$ff800000); // Exponent + sign
|
|
|
|
t1.PrepType(ord(dtUint32));
|
|
t3.PrepType(ord(dtInt32 ));
|
|
|
|
t1:=OpShrTo(t1,18); // Align mantissa on MSB
|
|
|
|
t1:=OpISubTo(t1,$E00); // Adjust bias
|
|
|
|
cond:=OpCmpTo(Op.OpSLessThan,t3,NewImm_q(dtUint32,$38800000)); //(t3 < 0x38800000)
|
|
|
|
//cond,src_true,src_false
|
|
t1:=OpSelectTo(cond,NewImm_q(dtUint32,0),t1); //if (t3 < 0x38800000) t1 = 0; // Flush-to-zero
|
|
|
|
cond:=OpCmpTo(Op.OpSGreaterThan,t3,NewImm_q(dtUint32,$47000000)); //(t3 > 0x47000000)
|
|
|
|
//cond,src_true,src_false
|
|
t1:=OpSelectTo(cond,NewImm_q(dtUint32,$3FF),t1); //if (t3 > 0x47000000) t1 = 0x3FF; // Clamp-to-max
|
|
|
|
Result:=t1;
|
|
end;
|
|
|
|
function TEmit_vbuf_store.F32ToF11(reg:TsrRegNode):TsrRegNode;
|
|
var
|
|
i,t1,t3,cond:TsrRegNode;
|
|
begin
|
|
i:=BitcastList.FetchRead(dtUint32,reg);
|
|
|
|
t1:=OpAndTo(i,$7fffffff); // Non-sign bits
|
|
t3:=OpAndTo(i,$ff800000); // Exponent + sign
|
|
|
|
t1.PrepType(ord(dtUint32));
|
|
t3.PrepType(ord(dtInt32 ));
|
|
|
|
t1:=OpShrTo(t1,18); // Align mantissa on MSB
|
|
|
|
t1:=OpISubTo(t1,$1C00); // Adjust bias
|
|
|
|
cond:=OpCmpTo(Op.OpSLessThan,t3,NewImm_q(dtUint32,$38800000)); //(t3 < 0x38800000)
|
|
|
|
//cond,src_true,src_false
|
|
t1:=OpSelectTo(cond,NewImm_q(dtUint32,0),t1); //if (t3 < 0x38800000) t1 = 0; // Flush-to-zero
|
|
|
|
cond:=OpCmpTo(Op.OpSGreaterThan,t3,NewImm_q(dtUint32,$87000000)); //(t3 > 0x87000000)
|
|
|
|
//cond,src_true,src_false
|
|
t1:=OpSelectTo(cond,NewImm_q(dtUint32,$7FF),t1); //if (t3 > 0x47000000) t1 = 0x7FF; // Clamp-to-max
|
|
|
|
Result:=t1;
|
|
end;
|
|
|
|
procedure TEmit_vbuf_store.make_store_cv(var lc:Tstore_cache);
|
|
var
|
|
rsl:TsrRegNode;
|
|
i:Byte;
|
|
csize:PtrUInt;
|
|
orig,mnew:TsrChain;
|
|
idx:TsrRegNode;
|
|
lvl_0:TsrChainLvl_0;
|
|
lvl_1:TsrChainLvl_1;
|
|
begin
|
|
|
|
For i:=0 to lc.elem_count-1 do //fill
|
|
if (lc.elm[i]=nil) then
|
|
begin
|
|
Case lc.info.dsel[i] of
|
|
1:lc.elm[i]:=fetch_one(lc);
|
|
else
|
|
lc.elm[i]:=fetch_zero(lc);
|
|
end;
|
|
end;
|
|
|
|
//special types
|
|
Case lc.info.DFMT of
|
|
BUF_DATA_FORMAT_10_11_11 ,
|
|
BUF_DATA_FORMAT_11_11_10 ,
|
|
BUF_DATA_FORMAT_10_10_10_2,
|
|
BUF_DATA_FORMAT_2_10_10_10:
|
|
begin
|
|
lc.elem_orig :=dtUint32;
|
|
lc.elem_count:=1;
|
|
end;
|
|
else;
|
|
end;
|
|
|
|
if (lc.elem_resl<>lc.elem_orig) then
|
|
begin
|
|
|
|
Case lc.info.DFMT of
|
|
BUF_DATA_FORMAT_10_11_11:
|
|
begin
|
|
//10_11_11
|
|
// RR GG BB
|
|
//high [10][11][11] low
|
|
|
|
lc.elm[0]:=F32ToF10(lc.elm[0]); //R
|
|
lc.elm[1]:=F32ToF11(lc.elm[1]); //G
|
|
lc.elm[2]:=F32ToF11(lc.elm[2]); //B
|
|
|
|
lc.elm[0]:=OpShlTo(lc.elm[0],11+11);
|
|
lc.elm[1]:=OpShlTo(lc.elm[1],11);
|
|
|
|
lc.elm[2]:=OpOrTo(lc.elm[2],lc.elm[1]); //G|B
|
|
|
|
lc.elm[0]:=OpOrTo(lc.elm[2],lc.elm[0]); //R|G|B
|
|
lc.elm[1]:=nil;
|
|
lc.elm[2]:=nil;
|
|
end;
|
|
BUF_DATA_FORMAT_11_11_10:
|
|
begin
|
|
//11_11_10
|
|
// RR GG BB
|
|
//high [11][11][10] low
|
|
|
|
lc.elm[0]:=F32ToF11(lc.elm[0]); //R
|
|
lc.elm[1]:=F32ToF11(lc.elm[1]); //G
|
|
lc.elm[2]:=F32ToF10(lc.elm[2]); //B
|
|
|
|
lc.elm[0]:=OpShlTo(lc.elm[0],11+10);
|
|
lc.elm[1]:=OpShlTo(lc.elm[1],10);
|
|
|
|
lc.elm[2]:=OpOrTo(lc.elm[2],lc.elm[1]); //G|B
|
|
|
|
lc.elm[0]:=OpOrTo(lc.elm[2],lc.elm[0]); //R|G|B
|
|
lc.elm[1]:=nil;
|
|
lc.elm[2]:=nil;
|
|
end;
|
|
else
|
|
|
|
case lc.elem_resl of
|
|
dtFloat32: //isScalar
|
|
begin
|
|
|
|
Case lc.info.NFMT of
|
|
BUF_NUM_FORMAT_FLOAT:
|
|
begin
|
|
//float->float
|
|
For i:=0 to lc.elem_count-1 do
|
|
begin
|
|
lc.elm[i]:=OpFToF(lc.elm[i],lc.elem_orig);
|
|
end;
|
|
end;
|
|
BUF_NUM_FORMAT_UNORM:
|
|
begin
|
|
//float->byte
|
|
For i:=0 to lc.elem_count-1 do
|
|
begin
|
|
lc.elm[i]:=OpFMulToS(lc.elm[i],lc.elem_orig.High);
|
|
lc.elm[i]:=ClampTo (lc.elm[i],0,lc.elem_orig.High);
|
|
lc.elm[i]:=OpFToU (lc.elm[i],lc.elem_orig);
|
|
end;
|
|
end;
|
|
else
|
|
Assert(false,'TODO CONVERT:Float32->'+IntToStr(lc.info.NFMT));
|
|
end;
|
|
|
|
end;
|
|
dtUint32,
|
|
dtInt32 : //isInt
|
|
begin
|
|
Assert(false,'TODO CONVERT:Int32->'+IntToStr(lc.info.NFMT));
|
|
end;
|
|
else
|
|
Assert(False);
|
|
end;
|
|
|
|
end; //Case lc.info.DFMT of
|
|
|
|
end; //if (lc.elem_resl<>lc.elem_orig) then
|
|
|
|
Case lc.elem_count of
|
|
1:rsl:=lc.elm[0];
|
|
else
|
|
begin
|
|
rsl:=OpMakeVec(line,lc.elem_orig.AsVector(lc.elem_count),@lc.elm);
|
|
end;
|
|
end;
|
|
|
|
csize:=Min(lc.info.GetElemSize*lc.elem_count,lc.info.GetSizeFormat);
|
|
orig:=TsrChain(lc.v.data[0]);
|
|
|
|
if (orig.size<>csize) then //refetch
|
|
begin
|
|
idx:=orig.pIndex;
|
|
if (idx<>nil) then
|
|
begin
|
|
lvl_0.offset:=orig.offset;
|
|
lvl_0.size :=csize;
|
|
|
|
lvl_1.pIndex:=idx;
|
|
lvl_1.stride:=orig.stride;
|
|
|
|
mnew:=lc.info.grp.Fetch(@lvl_0,@lvl_1,cflags(dtUnknow,lc.info.GLC,lc.info.SLC));
|
|
end else
|
|
begin
|
|
lvl_0.offset:=orig.offset;
|
|
lvl_0.size :=csize;
|
|
|
|
mnew:=lc.info.grp.Fetch(@lvl_0,nil,cflags(dtUnknow,lc.info.GLC,lc.info.SLC));
|
|
end;
|
|
orig:=mnew;
|
|
end;
|
|
|
|
FetchStore(orig,rsl);
|
|
end;
|
|
|
|
procedure TEmit_vbuf_store.make_store_ce(var lc:Tstore_cache);
|
|
var
|
|
orig,elm:TsrChain;
|
|
sum_d:TsrRegNode;
|
|
lvl_0:TsrChainLvl_0;
|
|
lvl_1:TsrChainLvl_1;
|
|
i:Byte;
|
|
begin
|
|
orig:=TsrChain(lc.v.data[0]);
|
|
sum_d:=orig.pIndex;
|
|
|
|
For i:=0 to lc.elem_count-1 do
|
|
if (lc.elm[i]<>nil) then
|
|
begin
|
|
|
|
if (i=0) then
|
|
begin
|
|
elm:=orig;
|
|
end else
|
|
begin
|
|
sum_d:=OpIAddTo(sum_d,i);
|
|
|
|
lvl_0.offset:=0;
|
|
lvl_0.size :=orig.size;
|
|
|
|
lvl_1.pIndex:=sum_d;
|
|
lvl_1.stride:=orig.stride;
|
|
|
|
elm:=lc.info.grp.Fetch(@lvl_0,@lvl_1,cflags(dtUnknow,lc.info.GLC,lc.info.SLC));
|
|
end;
|
|
|
|
Assert(lc.elem_resl=lc.elem_orig,'TODO CONVERT:make_store_ce');
|
|
|
|
FetchStore(elm,lc.elm[i]);
|
|
|
|
end;
|
|
|
|
end;
|
|
|
|
procedure TEmit_vbuf_store.make_store_uv(var lc:Tstore_cache);
|
|
var
|
|
rsl,idx:TsrRegNode;
|
|
i:Byte;
|
|
begin
|
|
|
|
For i:=0 to lc.elem_count-1 do //fill
|
|
if (lc.elm[i]=nil) then
|
|
begin
|
|
Case lc.info.dsel[i] of
|
|
1:lc.elm[i]:=fetch_one(lc);
|
|
else
|
|
lc.elm[i]:=fetch_zero(lc);
|
|
end;
|
|
end;
|
|
|
|
Case lc.elem_count of
|
|
1:rsl:=lc.elm[0];
|
|
else
|
|
begin
|
|
rsl:=OpMakeVec(line,lc.elem_resl.AsVector(lc.elem_count),@lc.elm);
|
|
end;
|
|
end;
|
|
|
|
idx:=TsrRegNode(lc.v.data[1]);
|
|
|
|
OpImageWrite(line,TsrNode(lc.v.data[0]),idx,rsl);
|
|
end;
|
|
|
|
procedure TEmit_vbuf_store.make_store_ue(var lc:Tstore_cache);
|
|
var
|
|
sum_d,idx,rsl:TsrRegNode;
|
|
i:Byte;
|
|
begin
|
|
idx:=TsrRegNode(lc.v.data[1]);
|
|
|
|
For i:=0 to lc.elem_count-1 do
|
|
if (lc.elm[i]<>nil) then
|
|
begin
|
|
rsl:=lc.elm[i];
|
|
|
|
if (i=0) then
|
|
begin
|
|
sum_d:=idx;
|
|
end else
|
|
begin
|
|
sum_d:=OpIAddTo(idx,i);
|
|
end;
|
|
|
|
OpImageWrite(line,TsrNode(lc.v.data[0]),sum_d,rsl);
|
|
|
|
end;
|
|
end;
|
|
|
|
procedure TEmit_vbuf_store.buf_store_cv(info:TBuf_info;v:TvarChain);
|
|
var
|
|
lc:Tstore_cache;
|
|
|
|
i:Byte;
|
|
begin
|
|
|
|
Case v.vType of
|
|
vcInvalid:Exit;
|
|
else;
|
|
end;
|
|
|
|
Case info.DFMT of
|
|
BUF_DATA_FORMAT_10_10_10_2:Assert(false,'TODO: STORE:10_10_10_2');
|
|
BUF_DATA_FORMAT_2_10_10_10:Assert(false,'TODO: STORE:2_10_10_10');
|
|
else;
|
|
end;
|
|
|
|
lc:=Default(Tstore_cache);
|
|
lc.info :=info;
|
|
lc.v :=v;
|
|
lc.elem_resl :=info.GetResultType;
|
|
lc.elem_orig :=info.GetElemType;
|
|
lc.elem_count:=info.GetElemCount;
|
|
|
|
For i:=0 to lc.elem_count-1 do
|
|
begin
|
|
lc.elm[i]:=nil;
|
|
Case lc.info.dsel[i] of
|
|
4..7:
|
|
begin //RGBA
|
|
lc.elm[i]:=fetch_id(lc,lc.info.dsel[i]-4);
|
|
end;
|
|
else;
|
|
end;
|
|
end;
|
|
|
|
While (lc.elem_count<>0) do //trim count
|
|
begin
|
|
i:=lc.elem_count-1;
|
|
if (lc.elm[i]<>nil) then Break;
|
|
Dec(lc.elem_count);
|
|
end;
|
|
|
|
if (lc.elem_count=0) then Exit;
|
|
|
|
Case v.vType of
|
|
vcChainVector :make_store_cv(lc);
|
|
vcChainElement :make_store_ce(lc);
|
|
vcUniformVector :make_store_uv(lc);
|
|
vcUniformElement:make_store_ue(lc)
|
|
else;
|
|
end;
|
|
|
|
end;
|
|
|
|
end.
|
|
|