From 7935bcf49f80947addcbbaccb03164e1cbcc3639 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 2 Nov 2018 20:21:33 +0300 Subject: [PATCH] [D3D12] Fix resolving of non-top-left sample --- .../dxbc/edram_tile_sample_32bpp_cs.cso | Bin 5212 -> 5228 bytes .../shaders/dxbc/edram_tile_sample_32bpp_cs.h | 523 +++++++++--------- .../dxbc/edram_tile_sample_32bpp_cs.txt | 13 +- .../dxbc/edram_tile_sample_64bpp_cs.cso | Bin 5500 -> 5500 bytes .../shaders/dxbc/edram_tile_sample_64bpp_cs.h | 14 +- .../dxbc/edram_tile_sample_64bpp_cs.txt | 4 +- .../shaders/edram_tile_sample_32bpp.cs.hlsl | 4 +- .../shaders/edram_tile_sample_64bpp.cs.hlsl | 4 +- 8 files changed, 282 insertions(+), 280 deletions(-) diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_tile_sample_32bpp_cs.cso b/src/xenia/gpu/d3d12/shaders/dxbc/edram_tile_sample_32bpp_cs.cso index 87d909fba6f703db65fccf2b4a40e090adbe2be7..9f1f3c2156efa35e46ac97cc5a96b66b886300e7 100644 GIT binary patch delta 221 zcmcbk@kYbYCBn&>;hNvffUT#so!j{Dn!f*ijFEwXAxDIPffY!b0C5ZxkPpN)Kzw1N zp+6I+k01j>AW%Mdb0?EDv!o^i1G^D}00T2nu^W_r#s%b0UdSrW%Eky}ZNAC+i*d3t zm*C_J94x8=K!r;g7&w%Gih+hQSOIAeU@K4n(;z}_vLcr-NVO;jkOxxjgsghACdYhk V(FULtClG@G$aWA=oqSig9snJVBxV2r delta 234 zcmaE(aYw_@CBn(M@AT+}g9i!g|lyJB$nr3^5`M46H!f1jJ@yV8{XD8X!Kf z(a@iX(?*biArL6$Ncdjagj@WC67y|zS->6yHR8UtE46a1G^D}00T1v1A`lse#Qml TPhQ9>&dSCJWNp65x|kmT4|Ebj delta 72 zcmeyP^+!v@CBn&hvq;q1y~f4-OBc;`eKT8m&PI_5tdkG0iYPAQ7GPjzU|`T>U|>&T W5MW>eve_6RVqt7R{^pyki}?XoM-oc_ diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_tile_sample_64bpp_cs.h b/src/xenia/gpu/d3d12/shaders/dxbc/edram_tile_sample_64bpp_cs.h index bf0a7fe99..73e646743 100644 --- a/src/xenia/gpu/d3d12/shaders/dxbc/edram_tile_sample_64bpp_cs.h +++ b/src/xenia/gpu/d3d12/shaders/dxbc/edram_tile_sample_64bpp_cs.h @@ -1,8 +1,8 @@ // generated from `xb buildhlsl` // source: edram_tile_sample_64bpp.cs.hlsl const uint8_t edram_tile_sample_64bpp_cs[] = { - 0x44, 0x58, 0x42, 0x43, 0xB3, 0x14, 0x5A, 0xAD, 0xBD, 0x33, 0x73, 0x0F, - 0xA5, 0xA2, 0x9D, 0x45, 0xEC, 0x9B, 0x23, 0x9C, 0x01, 0x00, 0x00, 0x00, + 0x44, 0x58, 0x42, 0x43, 0x0C, 0x37, 0xE3, 0xC2, 0x17, 0x14, 0xD2, 0x0E, + 0x74, 0xE3, 0x75, 0x95, 0xEC, 0xB3, 0x4A, 0x8D, 0x01, 0x00, 0x00, 0x00, 0x7C, 0x15, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x5C, 0x02, 0x00, 0x00, 0x6C, 0x02, 0x00, 0x00, 0x7C, 0x02, 0x00, 0x00, 0xE0, 0x14, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x20, 0x02, 0x00, 0x00, @@ -121,11 +121,11 @@ const uint8_t edram_tile_sample_64bpp_cs[] = { 0x1E, 0x00, 0x00, 0x0A, 0xC2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x06, 0x04, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x62, 0x00, 0x10, 0x00, - 0x02, 0x00, 0x00, 0x00, 0xA6, 0x0B, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, - 0xA6, 0x0B, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07, - 0x62, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x06, 0x01, 0x10, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x56, 0x06, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07, 0x32, 0x00, 0x10, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, + 0xE6, 0x0A, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, + 0x62, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0xA6, 0x0B, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x06, 0x01, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x09, 0x82, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0xFF, 0x07, 0x00, 0x00, diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_tile_sample_64bpp_cs.txt b/src/xenia/gpu/d3d12/shaders/dxbc/edram_tile_sample_64bpp_cs.txt index 42d2a3bca..4dc3146af 100644 --- a/src/xenia/gpu/d3d12/shaders/dxbc/edram_tile_sample_64bpp_cs.txt +++ b/src/xenia/gpu/d3d12/shaders/dxbc/edram_tile_sample_64bpp_cs.txt @@ -67,8 +67,8 @@ ishl r3.zw, vThreadGroupID.xxxy, r2.xxxy imad r1.yz, r1.yyzy, r2.xxyx, r3.zzwz imad r3.xy, -r3.xyxx, l(10, 8, 0, 0), vThreadIDInGroup.xyxx iadd r3.zw, r2.xxxy, l(0, 0, 2, 0) -iadd r2.yz, r2.zzwz, r3.zzwz -ishl r2.yz, r3.xxyx, r2.yyzy +ishl r3.xy, r3.xyxx, r3.zwzz +iadd r2.yz, r2.zzwz, r3.xxyx and r0.w, CB0[0][1].x, l(2047) ushr r1.w, CB0[0][1].x, l(12) imad r0.w, r1.z, r1.w, r0.w diff --git a/src/xenia/gpu/d3d12/shaders/edram_tile_sample_32bpp.cs.hlsl b/src/xenia/gpu/d3d12/shaders/edram_tile_sample_32bpp.cs.hlsl index 6242bfd94..545ee5d22 100644 --- a/src/xenia/gpu/d3d12/shaders/edram_tile_sample_32bpp.cs.hlsl +++ b/src/xenia/gpu/d3d12/shaders/edram_tile_sample_32bpp.cs.hlsl @@ -26,8 +26,8 @@ void main(uint3 xe_group_id : SV_GroupID, uint2(uint2(10u, 8u) <= xe_group_thread_id.xy) * sample_info.xy; uint edram_offset = XeEDRAMOffset32bpp( (xe_group_id.xy << sample_info.xy) + edram_tile_quarter, - (xe_group_thread_id.xy - edram_tile_quarter * uint2(10u, 8u)) << - (sample_info.xy + uint2(2u, 0u)) + sample_info.zw); + ((xe_group_thread_id.xy - edram_tile_quarter * uint2(10u, 8u)) << + (sample_info.xy + uint2(2u, 0u))) + sample_info.zw); // At 1x and 2x, this contains samples of 4 pixels. At 4x, this contains // samples of 2, need to load 2 more. uint4 pixels = xe_edram_load_store_source.Load4(edram_offset); diff --git a/src/xenia/gpu/d3d12/shaders/edram_tile_sample_64bpp.cs.hlsl b/src/xenia/gpu/d3d12/shaders/edram_tile_sample_64bpp.cs.hlsl index 1b3a0edd3..afa832039 100644 --- a/src/xenia/gpu/d3d12/shaders/edram_tile_sample_64bpp.cs.hlsl +++ b/src/xenia/gpu/d3d12/shaders/edram_tile_sample_64bpp.cs.hlsl @@ -26,8 +26,8 @@ void main(uint3 xe_group_id : SV_GroupID, uint2(uint2(10u, 8u) <= xe_group_thread_id.xy) * sample_info.xy; uint edram_offset = XeEDRAMOffset64bpp( (xe_group_id.xy << sample_info.xy) + edram_tile_quarter, - (xe_group_thread_id.xy - edram_tile_quarter * uint2(10u, 8u)) << - (sample_info.xy + uint2(2u, 0u)) + sample_info.zw); + ((xe_group_thread_id.xy - edram_tile_quarter * uint2(10u, 8u)) << + (sample_info.xy + uint2(2u, 0u))) + sample_info.zw); // Loaded with the first 2 pixels at 1x and 2x, or the first 1 pixel at 4x. uint4 pixels_01 = xe_edram_load_store_source.Load4(edram_offset); // Loaded with the second 2 pixels at 1x and 2x, or the second 1 pixel at 4x.