diff --git a/spirv/emit_op.pas b/spirv/emit_op.pas index 1a5dfe06..df9b8d67 100644 --- a/spirv/emit_op.pas +++ b/spirv/emit_op.pas @@ -73,8 +73,8 @@ type procedure OpFmaI32(dst:PsrRegSlot;src0,src1,src2:TsrRegNode); procedure OpFmaU32(dst:PsrRegSlot;src0,src1,src2:TsrRegNode); // - procedure OpSelect(dst:PsrRegSlot;src0,src1,cond:TsrRegNode); - function OpSelectTo(src0,src1,cond:TsrRegNode):TsrRegNode; + procedure OpSelect(dst:PsrRegSlot;src_false,src_true,cond:TsrRegNode); + function OpSelectTo(src_false,src_true,cond:TsrRegNode):TsrRegNode; // procedure OpIAddCar(pLine:TspirvOp;dst,car,src0,src1:TsrRegNode); procedure OpIAddExt(dst,car:PsrRegSlot;src0,src1:TsrRegNode;rtype:TsrDataType); @@ -171,6 +171,7 @@ type function OpNotEqualTo(src0,src1:TsrRegNode;ppLine:PPspirvOp=nil):TsrRegNode; // function OpBitCountTo(src:TsrRegNode;ppLine:PPspirvOp=nil):TsrRegNode; + function OpCmpTo(OpId:DWORD;src0,src1:TsrRegNode;ppLine:PPspirvOp=nil):TsrRegNode; // function OpImageSampleImplicitLod(pLine:TspirvOp;img:TsrNode;dst,coord:TsrRegNode):TspirvOp; function OpImageSampleExplicitLod(pLine:TspirvOp;img:TsrNode;dst,coord:TsrRegNode):TspirvOp; @@ -639,16 +640,16 @@ end; // -procedure TEmitOp.OpSelect(dst:PsrRegSlot;src0,src1,cond:TsrRegNode); +procedure TEmitOp.OpSelect(dst:PsrRegSlot;src_false,src_true,cond:TsrRegNode); begin - Op3(Op.OpSelect,LazyType2(src0.dtype,src1.dtype),dst,cond,src1,src0); + Op3(Op.OpSelect,LazyType2(src_false.dtype,src_true.dtype),dst,cond,src_true,src_false); end; -function TEmitOp.OpSelectTo(src0,src1,cond:TsrRegNode):TsrRegNode; +function TEmitOp.OpSelectTo(src_false,src_true,cond:TsrRegNode):TsrRegNode; begin - Result:=NewReg(LazyType2(src0.dtype,src1.dtype)); + Result:=NewReg(LazyType2(src_false.dtype,src_true.dtype)); // - _Op3(line,Op.OpSelect,Result,cond,src1,src0); + _Op3(line,Op.OpSelect,Result,cond,src_true,src_false); end; procedure TEmitOp.OpIAddCar(pLine:TspirvOp;dst,car,src0,src1:TsrRegNode); @@ -1479,6 +1480,12 @@ begin _set_line(ppLine,_Op1(_get_line(ppLine),Op.OpBitCount,Result,src)); end; +function TEmitOp.OpCmpTo(OpId:DWORD;src0,src1:TsrRegNode;ppLine:PPspirvOp=nil):TsrRegNode; +begin + Result:=NewReg(dtBool); + _set_line(ppLine,_Op2(_get_line(ppLine),OpId,Result,src0,src1)); +end; + // function TEmitOp.OpImageSampleImplicitLod(pLine:TspirvOp;img:TsrNode;dst,coord:TsrRegNode):TspirvOp; diff --git a/spirv/emit_vbuf_store.pas b/spirv/emit_vbuf_store.pas index ff6496ad..ead3fa87 100644 --- a/spirv/emit_vbuf_store.pas +++ b/spirv/emit_vbuf_store.pas @@ -32,6 +32,8 @@ type function fetch_zero(var lc:Tstore_cache):TsrRegNode; function fetch_one(var lc:Tstore_cache):TsrRegNode; function ClampTo(src:TsrRegNode;min_s,max_s:Single):TsrRegNode; + function F32ToF10(reg:TsrRegNode):TsrRegNode; + function F32ToF11(reg:TsrRegNode):TsrRegNode; procedure make_store_cv(var lc:Tstore_cache); procedure make_store_ce(var lc:Tstore_cache); procedure make_store_uv(var lc:Tstore_cache); @@ -98,6 +100,110 @@ begin _OpGlsl3(line,GlslOp.FClamp,Result,src,min,max); end; +{ + +uint F32ToF10(float f) { + uint i = *reinterpret_cast(&f); + + uint t1 = i & 0x7fffffff; // Non-sign bits + uint t3 = i & 0xff800000; // Exponent + sign + + t1 = t1 >> 18; // Align mantissa on MSB + + t1 = t1 - 0xE00; // Adjust bias + + if (t3 < 0x38800000) t1 = 0; // Flush-to-zero + if (t3 > 0x47000000) t1 = 0x3FF; // Clamp-to-max + + return t1; +} + +uint F32ToF11(float f) { + uint i = *reinterpret_cast(&f); + + uint t1 = i & 0x7fffffff; // Non-sign bits + uint t3 = i & 0xff800000; // Exponent + sign + + // [S|E|M] + //F32 [1|8|23] + //F16 [1|5|10] -> 13 -> 0x1C000 -> 0x7BFF -> 0x47000000 + //F11 [0|6|5] -> 18 -> 0x1C00 -> 0x7FF -> 0x87000000 + //F10 [0|5|5] -> 18 -> 0xE00 -> 0x3FF -> 0x47000000 + + t1 = t1 >> 18; // Align mantissa on MSB + + t1 = t1 - 0x1C00; // Adjust bias + + if (t3 < 0x38800000) t1 = 0; // Flush-to-zero + if (t3 > 0x87000000) t1 = 0x7FF; // Clamp-to-max + + return t1; +} + +//10_11_11 +// RR GG BB +//high [10][11][11] low + +//11_11_10 +// RR GG BB +//high [11][11][10] low + +} + +function TEmit_vbuf_store.F32ToF10(reg:TsrRegNode):TsrRegNode; +var + i,t1,t3,cond:TsrRegNode; +begin + i:=BitcastList.FetchRead(dtUint32,reg); + + t1:=OpAndTo(i,$7fffffff); // Non-sign bits + t3:=OpAndTo(i,$ff800000); // Exponent + sign + + t1.PrepType(ord(dtUint32)); + t3.PrepType(ord(dtInt32 )); + + t1:=OpShrTo(t1,18); // Align mantissa on MSB + + t1:=OpISubTo(t1,$E00); // Adjust bias + + cond:=OpCmpTo(Op.OpSLessThan,t3,NewImm_q(dtUint32,$38800000)); //(t3 < 0x38800000) + + t1:=OpSelectTo(t1,NewImm_q(dtUint32,0),cond); //if (t3 < 0x38800000) t1 = 0; // Flush-to-zero + + cond:=OpCmpTo(Op.OpSGreaterThan,t3,NewImm_q(dtUint32,$47000000)); //(t3 > 0x47000000) + + t1:=OpSelectTo(t1,NewImm_q(dtUint32,$3FF),cond); //if (t3 > 0x47000000) t1 = 0x3FF; // Clamp-to-max + + Result:=t1; +end; + +function TEmit_vbuf_store.F32ToF11(reg:TsrRegNode):TsrRegNode; +var + i,t1,t3,cond:TsrRegNode; +begin + i:=BitcastList.FetchRead(dtUint32,reg); + + t1:=OpAndTo(i,$7fffffff); // Non-sign bits + t3:=OpAndTo(i,$ff800000); // Exponent + sign + + t1.PrepType(ord(dtUint32)); + t3.PrepType(ord(dtInt32 )); + + t1:=OpShrTo(t1,18); // Align mantissa on MSB + + t1:=OpISubTo(t1,$1C00); // Adjust bias + + cond:=OpCmpTo(Op.OpSLessThan,t3,NewImm_q(dtUint32,$38800000)); //(t3 < 0x38800000) + + t1:=OpSelectTo(t1,NewImm_q(dtUint32,0),cond); //if (t3 < 0x38800000) t1 = 0; // Flush-to-zero + + cond:=OpCmpTo(Op.OpSGreaterThan,t3,NewImm_q(dtUint32,$87000000)); //(t3 > 0x87000000) + + t1:=OpSelectTo(t1,NewImm_q(dtUint32,$7FF),cond); //if (t3 > 0x47000000) t1 = 0x7FF; // Clamp-to-max + + Result:=t1; +end; + procedure TEmit_vbuf_store.make_store_cv(var lc:Tstore_cache); var rsl:TsrRegNode; @@ -119,57 +225,112 @@ begin end; end; + //special types + Case lc.info.DFMT of + BUF_DATA_FORMAT_10_11_11 , + BUF_DATA_FORMAT_11_11_10 , + BUF_DATA_FORMAT_10_10_10_2, + BUF_DATA_FORMAT_2_10_10_10: + begin + lc.elem_orig :=dtUint32; + lc.elem_count:=1; + end; + else; + end; + if (lc.elem_resl<>lc.elem_orig) then begin - case lc.elem_resl of - dtFloat32: //isScalar + + Case lc.info.DFMT of + BUF_DATA_FORMAT_10_11_11: begin + //10_11_11 + // RR GG BB + //high [10][11][11] low - Case lc.info.NFMT of - BUF_NUM_FORMAT_FLOAT: - begin - //float->float - For i:=0 to lc.elem_count-1 do - begin - lc.elm[i]:=OpFToF(lc.elm[i],lc.elem_orig); - end; - end; - BUF_NUM_FORMAT_UNORM: - begin - //float->byte - For i:=0 to lc.elem_count-1 do - begin - lc.elm[i]:=OpFMulToS(lc.elm[i],lc.elem_orig.High); - lc.elm[i]:=ClampTo (lc.elm[i],0,lc.elem_orig.High); - lc.elm[i]:=OpFToU (lc.elm[i],lc.elem_orig); - end; - end; - else - Assert(false,'TODO CONVERT:Float32->'+IntToStr(lc.info.NFMT)); - end; + lc.elm[0]:=F32ToF10(lc.elm[0]); //R + lc.elm[1]:=F32ToF11(lc.elm[1]); //G + lc.elm[2]:=F32ToF11(lc.elm[2]); //B + lc.elm[0]:=OpShlTo(lc.elm[0],11+11); + lc.elm[1]:=OpShlTo(lc.elm[1],11); + + lc.elm[2]:=OpOrTo(lc.elm[2],lc.elm[1]); //G|B + + lc.elm[0]:=OpOrTo(lc.elm[2],lc.elm[0]); //R|G|B + lc.elm[1]:=nil; + lc.elm[2]:=nil; end; - dtUint32, - dtInt32 : //isInt + BUF_DATA_FORMAT_11_11_10: begin - Assert(false,'TODO CONVERT:Int32->'+IntToStr(lc.info.NFMT)); + //11_11_10 + // RR GG BB + //high [11][11][10] low + + lc.elm[0]:=F32ToF11(lc.elm[0]); //R + lc.elm[1]:=F32ToF11(lc.elm[1]); //G + lc.elm[2]:=F32ToF10(lc.elm[2]); //B + + lc.elm[0]:=OpShlTo(lc.elm[0],11+10); + lc.elm[1]:=OpShlTo(lc.elm[1],10); + + lc.elm[2]:=OpOrTo(lc.elm[2],lc.elm[1]); //G|B + + lc.elm[0]:=OpOrTo(lc.elm[2],lc.elm[0]); //R|G|B + lc.elm[1]:=nil; + lc.elm[2]:=nil; end; else - Assert(False); - end; - end; + + case lc.elem_resl of + dtFloat32: //isScalar + begin + + Case lc.info.NFMT of + BUF_NUM_FORMAT_FLOAT: + begin + //float->float + For i:=0 to lc.elem_count-1 do + begin + lc.elm[i]:=OpFToF(lc.elm[i],lc.elem_orig); + end; + end; + BUF_NUM_FORMAT_UNORM: + begin + //float->byte + For i:=0 to lc.elem_count-1 do + begin + lc.elm[i]:=OpFMulToS(lc.elm[i],lc.elem_orig.High); + lc.elm[i]:=ClampTo (lc.elm[i],0,lc.elem_orig.High); + lc.elm[i]:=OpFToU (lc.elm[i],lc.elem_orig); + end; + end; + else + Assert(false,'TODO CONVERT:Float32->'+IntToStr(lc.info.NFMT)); + end; + + end; + dtUint32, + dtInt32 : //isInt + begin + Assert(false,'TODO CONVERT:Int32->'+IntToStr(lc.info.NFMT)); + end; + else + Assert(False); + end; + + end; //Case lc.info.DFMT of + + end; //if (lc.elem_resl<>lc.elem_orig) then Case lc.elem_count of 1:rsl:=lc.elm[0]; else begin rsl:=OpMakeVec(line,lc.elem_orig.AsVector(lc.elem_count),@lc.elm); - //rsl:=OpMakeVec(line,lc.elem_resl.AsVector(lc.elem_count),@lc.elm); end; end; - //Assert(lc.elem_resl=lc.elem_orig,'TODO CONVERT'); - csize:=Min(lc.info.GetElemSize*lc.elem_count,lc.info.GetSizeFormat); orig:=TsrChain(lc.v.data[0]); @@ -303,10 +464,10 @@ begin else; end; - if info.IsExtFormat then - begin - //TODO: ExtFormat - Assert(false,'TODO: ExtFormat='+IntToStr(info.DFMT)); + Case info.DFMT of + BUF_DATA_FORMAT_10_10_10_2:Assert(false,'TODO: STORE:10_10_10_2'); + BUF_DATA_FORMAT_2_10_10_10:Assert(false,'TODO: STORE:2_10_10_10'); + else; end; lc:=Default(Tstore_cache);