[D3D12] Fix a storing typo in the DXT3 decompression shader

This commit is contained in:
Triang3l 2020-07-14 17:45:41 +03:00
parent c99c430665
commit 9418f3f2c5
4 changed files with 854 additions and 875 deletions

File diff suppressed because it is too large Load Diff

View File

@ -2,10 +2,6 @@
// Generated by Microsoft (R) HLSL Shader Compiler 10.1 // Generated by Microsoft (R) HLSL Shader Compiler 10.1
// //
// //
// Note: shader requires additional functionality:
// Typed UAV Load Additional Formats
//
//
// Buffer Definitions: // Buffer Definitions:
// //
// cbuffer XeTextureLoadConstants // cbuffer XeTextureLoadConstants
@ -216,8 +212,6 @@ if_nz r3.y
or r5.xyzw, r5.xyzw, r6.xyzw or r5.xyzw, r5.xyzw, r6.xyzw
ubfe r6.xyzw, l(4, 4, 4, 4), l(16, 20, 24, 28), r3.xxxx ubfe r6.xyzw, l(4, 4, 4, 4), l(16, 20, 24, 28), r3.xxxx
imad r5.xyzw, r6.xyzw, l(0x11000000, 0x11000000, 0x11000000, 0x11000000), r5.xyzw imad r5.xyzw, r6.xyzw, l(0x11000000, 0x11000000, 0x11000000, 0x11000000), r5.xyzw
store_uav_typed U0[0].xyzw, r0.yyyy, r5.xyzw
ld_uav_typed r5.xyzw, r0.yyyy, U0[0].xyzw
store_uav_typed U0[0].xyzw, r3.zzzz, r5.xyzw store_uav_typed U0[0].xyzw, r3.zzzz, r5.xyzw
imad r3.x, vThreadID.y, l(4), l(2) imad r3.x, vThreadID.y, l(4), l(2)
ilt r3.x, r3.x, CB0[0][2].z ilt r3.x, r3.x, CB0[0][2].z
@ -353,8 +347,6 @@ if_nz r3.y
or r3.xyzw, r3.xyzw, r5.xyzw or r3.xyzw, r3.xyzw, r5.xyzw
ubfe r5.xyzw, l(4, 4, 4, 4), l(16, 20, 24, 28), r1.xxxx ubfe r5.xyzw, l(4, 4, 4, 4), l(16, 20, 24, 28), r1.xxxx
imad r3.xyzw, r5.xyzw, l(0x11000000, 0x11000000, 0x11000000, 0x11000000), r3.xyzw imad r3.xyzw, r5.xyzw, l(0x11000000, 0x11000000, 0x11000000, 0x11000000), r3.xyzw
store_uav_typed U0[0].xyzw, r0.yyyy, r3.xyzw
ld_uav_typed r3.xyzw, r0.yyyy, U0[0].xyzw
store_uav_typed U0[0].xyzw, r1.yyyy, r3.xyzw store_uav_typed U0[0].xyzw, r1.yyyy, r3.xyzw
imad r1.x, vThreadID.y, l(4), l(2) imad r1.x, vThreadID.y, l(4), l(2)
ilt r1.x, r1.x, CB0[0][2].z ilt r1.x, r1.x, CB0[0][2].z
@ -410,4 +402,4 @@ if_nz r3.y
endif endif
endif endif
ret ret
// Approximately 358 instruction slots used // Approximately 354 instruction slots used

View File

@ -39,9 +39,8 @@ void main(uint3 xe_thread_id : SV_DispatchThreadID) {
((block.xxxx >> uint4(0u, 4u, 8u, 12u)) & 0xFu) * 0x11000000u; ((block.xxxx >> uint4(0u, 4u, 8u, 12u)) & 0xFu) * 0x11000000u;
[branch] if (texel_index_host.y + 1 < int(xe_texture_load_height_texels)) { [branch] if (texel_index_host.y + 1 < int(xe_texture_load_height_texels)) {
xe_texture_load_dest[block_offset_host + elements_pitch_host] = xe_texture_load_dest[block_offset_host + elements_pitch_host] =
xe_texture_load_dest[block_offset_host] = XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) +
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) + ((block.xxxx >> uint4(16u, 20u, 24u, 28u)) & 0xFu) * 0x11000000u;
((block.xxxx >> uint4(16u, 20u, 24u, 28u)) & 0xFu) * 0x11000000u;
[branch] if (texel_index_host.y + 2 < [branch] if (texel_index_host.y + 2 <
int(xe_texture_load_height_texels)) { int(xe_texture_load_height_texels)) {
xe_texture_load_dest[block_offset_host + 2 * elements_pitch_host] = xe_texture_load_dest[block_offset_host + 2 * elements_pitch_host] =