[D3D12] Fix a storing typo in the DXT3 decompression shader

This commit is contained in:
Triang3l 2020-07-14 17:45:41 +03:00
parent c99c430665
commit 9418f3f2c5
4 changed files with 854 additions and 875 deletions

File diff suppressed because it is too large Load Diff

View File

@ -2,10 +2,6 @@
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
//
//
// Note: shader requires additional functionality:
// Typed UAV Load Additional Formats
//
//
// Buffer Definitions:
//
// cbuffer XeTextureLoadConstants
@ -216,8 +212,6 @@ if_nz r3.y
or r5.xyzw, r5.xyzw, r6.xyzw
ubfe r6.xyzw, l(4, 4, 4, 4), l(16, 20, 24, 28), r3.xxxx
imad r5.xyzw, r6.xyzw, l(0x11000000, 0x11000000, 0x11000000, 0x11000000), r5.xyzw
store_uav_typed U0[0].xyzw, r0.yyyy, r5.xyzw
ld_uav_typed r5.xyzw, r0.yyyy, U0[0].xyzw
store_uav_typed U0[0].xyzw, r3.zzzz, r5.xyzw
imad r3.x, vThreadID.y, l(4), l(2)
ilt r3.x, r3.x, CB0[0][2].z
@ -353,8 +347,6 @@ if_nz r3.y
or r3.xyzw, r3.xyzw, r5.xyzw
ubfe r5.xyzw, l(4, 4, 4, 4), l(16, 20, 24, 28), r1.xxxx
imad r3.xyzw, r5.xyzw, l(0x11000000, 0x11000000, 0x11000000, 0x11000000), r3.xyzw
store_uav_typed U0[0].xyzw, r0.yyyy, r3.xyzw
ld_uav_typed r3.xyzw, r0.yyyy, U0[0].xyzw
store_uav_typed U0[0].xyzw, r1.yyyy, r3.xyzw
imad r1.x, vThreadID.y, l(4), l(2)
ilt r1.x, r1.x, CB0[0][2].z
@ -410,4 +402,4 @@ if_nz r3.y
endif
endif
ret
// Approximately 358 instruction slots used
// Approximately 354 instruction slots used

View File

@ -39,9 +39,8 @@ void main(uint3 xe_thread_id : SV_DispatchThreadID) {
((block.xxxx >> uint4(0u, 4u, 8u, 12u)) & 0xFu) * 0x11000000u;
[branch] if (texel_index_host.y + 1 < int(xe_texture_load_height_texels)) {
xe_texture_load_dest[block_offset_host + elements_pitch_host] =
xe_texture_load_dest[block_offset_host] =
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) +
((block.xxxx >> uint4(16u, 20u, 24u, 28u)) & 0xFu) * 0x11000000u;
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) +
((block.xxxx >> uint4(16u, 20u, 24u, 28u)) & 0xFu) * 0x11000000u;
[branch] if (texel_index_host.y + 2 <
int(xe_texture_load_height_texels)) {
xe_texture_load_dest[block_offset_host + 2 * elements_pitch_host] =