From b4706054d0bed837fac65ff4e94ab19c424d7892 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 13 Nov 2018 22:00:10 +0300 Subject: [PATCH] [D3D12] U11V11W10 texture loading shader --- ...texture_load_r11g11b10_rgba16_snorm_cs.cso | Bin 0 -> 8092 bytes .../texture_load_r11g11b10_rgba16_snorm_cs.h | 679 ++++++++++++++++++ ...texture_load_r11g11b10_rgba16_snorm_cs.txt | 214 ++++++ .../gpu/d3d12/shaders/pixel_formats.hlsli | 26 + ...exture_load_r11g11b10_rgba16_snorm.cs.hlsl | 33 + 5 files changed, 952 insertions(+) create mode 100644 src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_cs.cso create mode 100644 src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_cs.h create mode 100644 src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_cs.txt create mode 100644 src/xenia/gpu/d3d12/shaders/texture_load_r11g11b10_rgba16_snorm.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_cs.cso b/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_cs.cso new file mode 100644 index 0000000000000000000000000000000000000000..452df6fa7032ea2d88ce1e629bb067cb44a21120 GIT binary patch literal 8092 zcmbW6&5u<{8OF~o=mr|bLFPjxDpwej88RARCTJ#RQV!6L364tRh^tORU+7lo+xER= zCMJG_sEMB|cWN?YA~7K`E{yJ6u`tm;z-32WxG*kQnAzd;JLf#z@2S3*R*Sq;)mv{p z?`PFH=l1z4ubeyn&JXVY`2%nL=Iyg*|9P-l{_G9nl`qCq%uOg>!8Bwbq_^;{1o>?m;r&Up zU$mRtrer2H@FM1$gT?XS&GGGx!Q#!~((>YHczfg8peotRgVDIUGPvA`UX78PF899@|NY_{@BS3Ie~90g9C)}o4R5c2p4YnZrp4u$1Wri~pTWDx1~uE| zeqH>Ji{CC!jq{HrkR`X9eP!}*+*bA%uP%)S)$$v|jpgdzy?d%?)lsY@0f+WC89W(h zeRX_oWyjje&??GZ^~&T;qt&m~BeC3jn(Dx%Hqmk z^~TD$_T!BGAnTLc1?95zqd~ns+_<$gp0aprb$xMo{rYGyu0FGRZDTkZULW_Lz4ToF z)r$)k`wJ^e%Y%*nx#6w#)tefeKK0SlO7^E0KJ^+kNTdI3F<*G~!j=2)tE$iIviqto zrZ4ZWdSB9YHveO)ce`<2(%#nfHUEw78vLKb|JP}Towlg*-_QwIQGY-yKUJw?M_)vy zp3}Y~^+~R(Imz{Mb6(C$?rO__eKv779oW#EJ@e_X{#Soz+YX|wJ9^)pcF)mPcCPK? z+R2vpQY(9`+PlZ&mu1#dq7O>nU)%NB1C74f?``Yjy!Iu7?bvkmTia`z)_!ot!L|(= z&%6}m*e5-64_b`ZXBs^zYe9{5{NhA>L2~%d{CDr#mSc`B$j~Hr=#1o;KmDC;o0zBE z$U2u|RVU=b@fH_5!J|i>7&4cQ`|y)7v9k>Q5B2zXrRDWjQb`%j?({i9N9v8Am)#JFU!@^U#-6Bx98F*qids*w`=Ii;c*|+F}gv zG4+wyYk4hon)S1eoa>8BL^6i!%XnoDQ{I^;_WG={uf&*CU%a13iuE-K174(*bqR?V zDc08{44&1Iz#xB-@P+xbc$D=BM%mcwTE&hdGTqNaVzJvZ(Psi2=gW7Qd+xq7%3wTK zDK~c>d)%b=0M9by=E8FtAA7m}T;v(H&oVpw+95 z@xY8UiC5y|3wd&$jkT$TNcP3&{yowa30<)!89T1@;FcKp5-DXdipE~zN006jgE&M= zxr+&YJWpVdXMFY@X`f^47+V)|DR=$FcX)@?M~XhPuf&M{GETXhGxSEECt*Z?8K>O6 zKkza7JP9M(%s6HJS2QrN5qrRJ+{#)hYuG;HOUjeu7Vk{PF>bL|j4>&0rQR5~m_K7o ziW|Bj#kjFgd+J(YY#US7gdhjyYtSxOIQ7_im$xi)AuBYyzwK3 z?+C@ae(yGJ-D}TQFO29T<7kJbomT3yPvAMXuDcR9f7!_`@E1?3^!!t|M>X+JbM0Vg0cGWWLeU|FRI=0pRPg&<#$2zu8 z+JC2A$NzF&HCcaKqAY&*I=;w%F~~R$J7lNiko|mLd!O$P_^yMLcfPY~eJjq69dgyf z?mwTYJzDy2@GM84@(#Ei=gW2?Pfj&|WW0!5^w;(ohxL@Z@2XUx&(pN^Nqg+C5)F%r$t%rM@j|-pQiA%RJh@hq?$NmETUp zD^kkn?UMcPi~T^pa{e5Plr@17=wt1-sdK?n%F?4jBx6Xv{+>e)9yO41*KT}(S2mrQ z`u7p*au1g8RrA2LPM;6!L?-3!Z-3r_pt7!}$?=;Sv9T`q;7|2We?8zY2&R-JT(tk! zzcSC|^FQV8ca+}=s2lIa_LRHzVgGdoIHy}gT-n=iYlPUvT*mrfteA9uzd%0aZhcTI z#&8UcYujRN?^Ywuzk9G;Bj$l?ovsne4X}i3#&%sWMX&SR6C;vsa@=Cx zQr4N0L6P8<&2h6X_h310=0*Qp7uJbP$~)ir92;X;4y;JdLzJ`cQ~moN>S7Y^qx_RS z^*ztN?>@Gjdqwn{k@R> 9u; + // -512 and -511 are both -1.0, but with -512 the conversion will overflow. + s10 = s10 == 0x200u ? 0x201u : s10; + // Take the absolute value. + s10 = (s10 ^ (signs ? 0x3FFu : 0u)) + signs; + // Expand the 9-bit absolute value to 15 bits like unorm. + s10 = (s10 << 6u) | (s10 >> 3u); + // Apply the sign. + return (s10 ^ (signs ? 0xFFFFu : 0u)) + signs; +} + +// Assuming the original number has only 11 bits. +uint4 XeSNorm11To16(uint4 s11) { + uint4 signs = s11 >> 10u; + // -1024 and -1023 are both -1.0, but with -1024 the conversion will overflow. + s11 = s11 == 0x400u ? 0x401u : s11; + // Take the absolute value. + s11 = (s11 ^ (signs ? 0x7FFu : 0u)) + signs; + // Expand the 10-bit absolute value to 15 bits like unorm. + s11 = (s11 << 5u) | (s11 >> 5u); + // Apply the sign. + return (s11 ^ (signs ? 0xFFFFu : 0u)) + signs; +} + // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp uint XeFloat16To7e3(uint4 rgba_f16u32) { diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r11g11b10_rgba16_snorm.cs.hlsl b/src/xenia/gpu/d3d12/shaders/texture_load_r11g11b10_rgba16_snorm.cs.hlsl new file mode 100644 index 000000000..6acc16b15 --- /dev/null +++ b/src/xenia/gpu/d3d12/shaders/texture_load_r11g11b10_rgba16_snorm.cs.hlsl @@ -0,0 +1,33 @@ +#include "pixel_formats.hlsli" +#include "texture_load.hlsli" + +[numthreads(8, 32, 1)] +void main(uint3 xe_thread_id : SV_DispatchThreadID) { + // 1 thread = 4 uint R11G11B10_SNORM texels to RGBA16. + uint3 block_index = xe_thread_id; + block_index.x <<= 2u; + [branch] if (any(block_index >= xe_texture_load_size_blocks)) { + return; + } + uint4 block_offsets_guest = + XeTextureLoadGuestBlockOffsets(block_index, 4u, 2u); + uint4 blocks = uint4(xe_texture_load_source.Load(block_offsets_guest.x), + xe_texture_load_source.Load(block_offsets_guest.y), + xe_texture_load_source.Load(block_offsets_guest.z), + xe_texture_load_source.Load(block_offsets_guest.w)); + blocks = XeByteSwap(blocks, xe_texture_load_endianness); + uint block_offset_host = XeTextureHostLinearOffset( + block_index, xe_texture_load_size_blocks.y, xe_texture_load_host_pitch, + 8u) + xe_texture_load_host_base; + + // Expand two's complement. + uint4 blocks_host_rg = + XeSNorm11To16(blocks & 2047u) | XeSNorm11To16((blocks >> 11u) & 2047u); + uint4 blocks_host_ba = XeSNorm10To16(blocks >> 22u) | 0x7FFF0000u; + + // Store the texels. + xe_texture_load_dest.Store4(block_offset_host, + uint4(blocks_host_rg.xy, blocks_host_ba.xy).xzyw); + xe_texture_load_dest.Store4(block_offset_host + 16u, + uint4(blocks_host_rg.zw, blocks_host_ba.zw).xzyw); +}