diff --git a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc index 45f2c3284..6f91693bb 100644 --- a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc @@ -575,29 +575,16 @@ bool D3D12RenderTargetCache::Initialize() { // and pipelines. D3D12_ROOT_PARAMETER host_depth_store_root_parameters[kHostDepthStoreRootParameterCount]; - // Rectangle constant. - D3D12_ROOT_PARAMETER& host_depth_store_root_rectangle_constant = - host_depth_store_root_parameters - [kHostDepthStoreRootParameterRectangleConstant]; - host_depth_store_root_rectangle_constant.ParameterType = + // Constants. + D3D12_ROOT_PARAMETER& host_depth_store_root_constants = + host_depth_store_root_parameters[kHostDepthStoreRootParameterConstants]; + host_depth_store_root_constants.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; - host_depth_store_root_rectangle_constant.Constants.ShaderRegister = 0; - host_depth_store_root_rectangle_constant.Constants.RegisterSpace = 0; - host_depth_store_root_rectangle_constant.Constants.Num32BitValues = - sizeof(HostDepthStoreRectangleConstant) / sizeof(uint32_t); - host_depth_store_root_rectangle_constant.ShaderVisibility = - D3D12_SHADER_VISIBILITY_ALL; - // Render target constant. - D3D12_ROOT_PARAMETER& host_depth_store_root_render_target_constant = - host_depth_store_root_parameters - [kHostDepthStoreRootParameterRenderTargetConstant]; - host_depth_store_root_render_target_constant.ParameterType = - D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; - host_depth_store_root_render_target_constant.Constants.ShaderRegister = 1; - host_depth_store_root_render_target_constant.Constants.RegisterSpace = 0; - host_depth_store_root_render_target_constant.Constants.Num32BitValues = - sizeof(HostDepthStoreRenderTargetConstant) / sizeof(uint32_t); - host_depth_store_root_render_target_constant.ShaderVisibility = + host_depth_store_root_constants.Constants.ShaderRegister = 0; + host_depth_store_root_constants.Constants.RegisterSpace = 0; + host_depth_store_root_constants.Constants.Num32BitValues = + sizeof(HostDepthStoreConstants) / sizeof(uint32_t); + host_depth_store_root_constants.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // Source. D3D12_DESCRIPTOR_RANGE host_depth_store_root_source_range; @@ -4599,9 +4586,11 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears( host_depth_store_render_target_constant.msaa_2x_supported = uint32_t(msaa_2x_supported_); command_list.D3DSetComputeRoot32BitConstants( - kHostDepthStoreRootParameterRenderTargetConstant, + kHostDepthStoreRootParameterConstants, sizeof(host_depth_store_render_target_constant) / sizeof(uint32_t), - &host_depth_store_render_target_constant, 0); + &host_depth_store_render_target_constant, + offsetof(HostDepthStoreConstants, render_target) / + sizeof(uint32_t)); // Barriers - don't need to try to combine them with the rest of // render target transfer barriers now - if this happens, after host // depth storing, NON_PIXEL_SHADER_RESOURCE -> DEPTH_WRITE will be done @@ -4649,9 +4638,10 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears( host_depth_store_rectangle_constant.width_pixels_div_8_minus_1 = (transfer_rectangle.width_pixels >> 3) - 1; command_list.D3DSetComputeRoot32BitConstants( - kHostDepthStoreRootParameterRectangleConstant, + kHostDepthStoreRootParameterConstants, sizeof(host_depth_store_rectangle_constant) / sizeof(uint32_t), - &host_depth_store_rectangle_constant, 0); + &host_depth_store_rectangle_constant, + offsetof(HostDepthStoreConstants, rectangle) / sizeof(uint32_t)); command_processor_.SubmitBarriers(); command_list.D3DDispatch( (transfer_rectangle.width_pixels * pixel_size_x + 63) >> 6, diff --git a/src/xenia/gpu/d3d12/d3d12_render_target_cache.h b/src/xenia/gpu/d3d12/d3d12_render_target_cache.h index 8b5b17310..1ccb7eab5 100644 --- a/src/xenia/gpu/d3d12/d3d12_render_target_cache.h +++ b/src/xenia/gpu/d3d12/d3d12_render_target_cache.h @@ -527,39 +527,8 @@ class D3D12RenderTargetCache final : public RenderTargetCache { } }; - union HostDepthStoreRectangleConstant { - uint32_t constant; - struct { - // - 1 because the maximum is 0x1FFF / 8, not 0x2000 / 8. - uint32_t x_pixels_div_8 : xenos::kResolveSizeBits - 1 - - xenos::kResolveAlignmentPixelsLog2; - uint32_t y_pixels_div_8 : xenos::kResolveSizeBits - 1 - - xenos::kResolveAlignmentPixelsLog2; - uint32_t width_pixels_div_8_minus_1 : xenos::kResolveSizeBits - 1 - - xenos::kResolveAlignmentPixelsLog2; - }; - HostDepthStoreRectangleConstant() : constant(0) { - static_assert_size(*this, sizeof(constant)); - } - }; - - union HostDepthStoreRenderTargetConstant { - uint32_t constant; - struct { - uint32_t pitch_tiles : xenos::kEdramPitchTilesBits; - uint32_t resolution_scale_x : 2; - uint32_t resolution_scale_y : 2; - // Whether 2x MSAA is supported natively rather than through 4x. - uint32_t msaa_2x_supported : 1; - }; - HostDepthStoreRenderTargetConstant() : constant(0) { - static_assert_size(*this, sizeof(constant)); - } - }; - enum { - kHostDepthStoreRootParameterRectangleConstant, - kHostDepthStoreRootParameterRenderTargetConstant, + kHostDepthStoreRootParameterConstants, kHostDepthStoreRootParameterSource, kHostDepthStoreRootParameterDest, kHostDepthStoreRootParameterCount, diff --git a/src/xenia/gpu/render_target_cache.h b/src/xenia/gpu/render_target_cache.h index 743946438..9c745fb05 100644 --- a/src/xenia/gpu/render_target_cache.h +++ b/src/xenia/gpu/render_target_cache.h @@ -391,6 +391,41 @@ class RenderTargetCache { const Rectangle* cutout = nullptr); }; + union HostDepthStoreRectangleConstant { + uint32_t constant; + struct { + // - 1 because the maximum is 0x1FFF / 8, not 0x2000 / 8. + uint32_t x_pixels_div_8 : xenos::kResolveSizeBits - 1 - + xenos::kResolveAlignmentPixelsLog2; + uint32_t y_pixels_div_8 : xenos::kResolveSizeBits - 1 - + xenos::kResolveAlignmentPixelsLog2; + uint32_t width_pixels_div_8_minus_1 : xenos::kResolveSizeBits - 1 - + xenos::kResolveAlignmentPixelsLog2; + }; + HostDepthStoreRectangleConstant() : constant(0) { + static_assert_size(*this, sizeof(constant)); + } + }; + + union HostDepthStoreRenderTargetConstant { + uint32_t constant; + struct { + uint32_t pitch_tiles : xenos::kEdramPitchTilesBits; + uint32_t resolution_scale_x : 2; + uint32_t resolution_scale_y : 2; + // Whether 2x MSAA is supported natively rather than through 4x. + uint32_t msaa_2x_supported : 1; + }; + HostDepthStoreRenderTargetConstant() : constant(0) { + static_assert_size(*this, sizeof(constant)); + } + }; + + struct HostDepthStoreConstants { + HostDepthStoreRectangleConstant rectangle; + HostDepthStoreRenderTargetConstant render_target; + }; + struct ResolveCopyDumpRectangle { RenderTarget* render_target; // If rows == 1: diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/host_depth_store_1xmsaa_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/host_depth_store_1xmsaa_cs.h index 3e43288af..e8a23943a 100644 --- a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/host_depth_store_1xmsaa_cs.h +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/host_depth_store_1xmsaa_cs.h @@ -5,17 +5,11 @@ // // Buffer Definitions: // -// cbuffer XeHostDepthStoreRectangleConstants +// cbuffer xesl_push_constants // { // // uint xe_host_depth_store_rectangle;// Offset: 0 Size: 4 -// -// } -// -// cbuffer XeHostDepthStoreRenderTargetConstants -// { -// -// uint xe_host_depth_store_render_target;// Offset: 0 Size: 4 +// uint xe_host_depth_store_render_target;// Offset: 4 Size: 4 // // } // @@ -24,10 +18,9 @@ // // Name Type Format Dim ID HLSL Bind Count // ------------------------------ ---------- ------- ----------- ------- -------------- ------ -// xe_host_depth_store_source texture float 2d T0 t0 1 +// xe_host_depth_store_source texture float4 2d T0 t0 1 // xe_host_depth_store_dest UAV uint4 buf U0 u0 1 -// XeHostDepthStoreRectangleConstants cbuffer NA NA CB0 cb0 1 -// XeHostDepthStoreRenderTargetConstants cbuffer NA NA CB1 cb1 1 +// xesl_push_constants cbuffer NA NA CB0 cb0 1 // // // @@ -45,15 +38,13 @@ cs_5_1 dcl_globalFlags refactoringAllowed dcl_constantbuffer CB0[0:0][1], immediateIndexed, space=0 -dcl_constantbuffer CB1[1:1][1], immediateIndexed, space=0 dcl_resource_texture2d (float,float,float,float) T0[0:0], space=0 dcl_uav_typed_buffer (uint,uint,uint,uint) U0[0:0], space=0 dcl_input vThreadID.xy -dcl_temps 4 +dcl_temps 5 dcl_thread_group 8, 8, 1 -ubfe r0.x, l(10), l(20), CB0[0][0].x +ubfe r0.xy, l(10, 2, 0, 0), l(20, 10, 0, 0), CB0[0][0].xyxx iadd r0.x, r0.x, l(1) -ubfe r0.y, l(2), l(10), CB1[1][0].x imul null, r0.x, r0.y, r0.x uge r0.x, vThreadID.x, r0.x if_nz r0.x @@ -62,11 +53,11 @@ endif ushr r0.y, CB0[0][0].x, l(10) mov r0.x, CB0[0][0].x bfi r0.xy, l(10, 10, 0, 0), l(3, 3, 0, 0), r0.xyxx, l(0, 0, 0, 0) -ubfe r0.zw, l(0, 0, 2, 2), l(0, 0, 10, 12), CB1[1][0].xxxx +ubfe r0.zw, l(0, 0, 2, 2), l(0, 0, 10, 12), CB0[0][0].yyyy ishl r1.x, vThreadID.x, l(3) mov r1.y, vThreadID.y imad r1.xy, r0.xyxx, r0.zwzz, r1.xyxx -and r0.x, CB1[1][0].x, l(1023) +and r0.x, CB0[0][0].y, l(1023) imul null, r0.yz, r0.zzwz, l(0, 80, 16, 0) udiv r2.xy, null, r1.xyxx, r0.yzyy imad r0.x, r2.y, r0.x, r2.x @@ -76,134 +67,121 @@ imad r0.y, r2.y, r0.y, r2.x imad r0.x, r0.x, r0.z, r0.y ushr r0.x, r0.x, l(2) mov r1.zw, l(0,0,0,0) -ld r2.x, r1.xyww, T0[0].xyzw -iadd r3.xyzw, r1.xyww, l(1, 0, 0, 0) -ld r2.y, r3.xyzw, T0[0].yxzw -iadd r3.xyzw, r1.xyww, l(2, 0, 0, 0) +ld r2.x, r1.xyzw, T0[0].xyzw +iadd r3.xyzw, r1.xyxy, l(2, 0, 1, 0) +mov r4.xy, r3.zwzz +mov r4.zw, l(0,0,0,0) +ld r2.y, r4.xyzw, T0[0].yxzw +mov r3.zw, l(0,0,0,0) ld r2.z, r3.xyzw, T0[0].yzxw -iadd r3.xyzw, r1.xyww, l(3, 0, 0, 0) -ld r2.w, r3.xyzw, T0[0].yzwx +iadd r3.xyzw, r1.xyxy, l(4, 0, 3, 0) +mov r4.xy, r3.zwzz +mov r4.zw, l(0,0,0,0) +ld r2.w, r4.xyzw, T0[0].yzwx store_uav_typed U0[0].xyzw, r0.xxxx, r2.xyzw iadd r0.y, r0.x, l(1) -iadd r2.xyzw, r1.xyww, l(4, 0, 0, 0) -ld r2.x, r2.xyzw, T0[0].xyzw -iadd r3.xyzw, r1.xyww, l(5, 0, 0, 0) -ld r2.y, r3.xyzw, T0[0].yxzw -iadd r3.xyzw, r1.xyww, l(6, 0, 0, 0) +mov r3.zw, l(0,0,0,0) +ld r2.x, r3.xyzw, T0[0].xyzw +iadd r3.xyzw, r1.xyxy, l(6, 0, 5, 0) +mov r4.xy, r3.zwzz +mov r4.zw, l(0,0,0,0) +ld r2.y, r4.xyzw, T0[0].yxzw +mov r3.zw, l(0,0,0,0) ld r2.z, r3.xyzw, T0[0].yzxw -iadd r1.xyzw, r1.xyzw, l(7, 0, 0, 0) +iadd r1.xy, r1.xyxx, l(7, 0, 0, 0) +mov r1.zw, l(0,0,0,0) ld r2.w, r1.xyzw, T0[0].yzwx store_uav_typed U0[0].xyzw, r0.yyyy, r2.xyzw ret -// Approximately 44 instruction slots used +// Approximately 50 instruction slots used #endif const BYTE host_depth_store_1xmsaa_cs[] = { - 68, 88, 66, 67, 240, 197, - 52, 115, 102, 12, 206, 220, - 26, 47, 204, 31, 163, 187, - 103, 82, 1, 0, 0, 0, - 196, 9, 0, 0, 5, 0, + 68, 88, 66, 67, 245, 164, + 50, 194, 210, 99, 179, 114, + 212, 91, 203, 114, 230, 214, + 20, 69, 1, 0, 0, 0, + 192, 9, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, - 172, 2, 0, 0, 188, 2, - 0, 0, 204, 2, 0, 0, - 40, 9, 0, 0, 82, 68, - 69, 70, 112, 2, 0, 0, - 2, 0, 0, 0, 92, 1, - 0, 0, 4, 0, 0, 0, + 52, 2, 0, 0, 68, 2, + 0, 0, 84, 2, 0, 0, + 36, 9, 0, 0, 82, 68, + 69, 70, 248, 1, 0, 0, + 1, 0, 0, 0, 252, 0, + 0, 0, 3, 0, 0, 0, 60, 0, 0, 0, 1, 5, 83, 67, 0, 5, 0, 0, - 70, 2, 0, 0, 19, 19, + 206, 1, 0, 0, 19, 19, 68, 37, 60, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, - 220, 0, 0, 0, 2, 0, + 180, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 247, 0, + 0, 0, 0, 0, 207, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 16, 1, 0, 0, + 0, 0, 232, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 51, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 120, 101, - 95, 104, 111, 115, 116, 95, - 100, 101, 112, 116, 104, 95, - 115, 116, 111, 114, 101, 95, - 115, 111, 117, 114, 99, 101, - 0, 120, 101, 95, 104, 111, - 115, 116, 95, 100, 101, 112, - 116, 104, 95, 115, 116, 111, - 114, 101, 95, 100, 101, 115, - 116, 0, 88, 101, 72, 111, - 115, 116, 68, 101, 112, 116, - 104, 83, 116, 111, 114, 101, - 82, 101, 99, 116, 97, 110, - 103, 108, 101, 67, 111, 110, - 115, 116, 97, 110, 116, 115, - 0, 88, 101, 72, 111, 115, - 116, 68, 101, 112, 116, 104, - 83, 116, 111, 114, 101, 82, - 101, 110, 100, 101, 114, 84, - 97, 114, 103, 101, 116, 67, - 111, 110, 115, 116, 97, 110, - 116, 115, 0, 171, 171, 171, - 16, 1, 0, 0, 1, 0, - 0, 0, 140, 1, 0, 0, + 120, 101, 95, 104, 111, 115, + 116, 95, 100, 101, 112, 116, + 104, 95, 115, 116, 111, 114, + 101, 95, 115, 111, 117, 114, + 99, 101, 0, 120, 101, 95, + 104, 111, 115, 116, 95, 100, + 101, 112, 116, 104, 95, 115, + 116, 111, 114, 101, 95, 100, + 101, 115, 116, 0, 120, 101, + 115, 108, 95, 112, 117, 115, + 104, 95, 99, 111, 110, 115, + 116, 97, 110, 116, 115, 0, + 232, 0, 0, 0, 2, 0, + 0, 0, 20, 1, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 51, 1, 0, 0, 1, 0, - 0, 0, 252, 1, 0, 0, - 16, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 180, 1, 0, 0, 0, 0, + 100, 1, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, - 2, 0, 0, 0, 216, 1, + 2, 0, 0, 0, 136, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, - 0, 0, 0, 0, 120, 101, - 95, 104, 111, 115, 116, 95, - 100, 101, 112, 116, 104, 95, - 115, 116, 111, 114, 101, 95, - 114, 101, 99, 116, 97, 110, - 103, 108, 101, 0, 100, 119, - 111, 114, 100, 0, 0, 0, - 19, 0, 1, 0, 1, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 210, 1, 0, 0, 36, 2, - 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 172, 1, + 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 2, 0, - 0, 0, 216, 1, 0, 0, + 0, 0, 136, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 120, 101, 95, 104, 111, 115, 116, 95, 100, 101, 112, 116, 104, 95, 115, 116, + 111, 114, 101, 95, 114, 101, + 99, 116, 97, 110, 103, 108, + 101, 0, 100, 119, 111, 114, + 100, 0, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 130, 1, + 0, 0, 120, 101, 95, 104, + 111, 115, 116, 95, 100, 101, + 112, 116, 104, 95, 115, 116, 111, 114, 101, 95, 114, 101, 110, 100, 101, 114, 95, 116, 97, 114, 103, 101, 116, 0, @@ -220,52 +198,44 @@ const BYTE host_depth_store_1xmsaa_cs[] = 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, - 84, 6, 0, 0, 81, 0, - 5, 0, 149, 1, 0, 0, + 200, 6, 0, 0, 81, 0, + 5, 0, 178, 1, 0, 0, 106, 8, 0, 1, 89, 0, 0, 7, 70, 142, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 89, 0, 0, 7, - 70, 142, 48, 0, 1, 0, - 0, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 1, 0, + 0, 0, 88, 24, 0, 7, + 70, 126, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 88, 24, 0, 7, 70, 126, + 0, 0, 0, 0, 85, 85, + 0, 0, 0, 0, 0, 0, + 156, 8, 0, 7, 70, 238, + 49, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 68, 68, 0, 0, + 0, 0, 0, 0, 95, 0, + 0, 2, 50, 0, 2, 0, + 104, 0, 0, 2, 5, 0, + 0, 0, 155, 0, 0, 4, + 8, 0, 0, 0, 8, 0, + 0, 0, 1, 0, 0, 0, + 138, 0, 0, 17, 50, 0, + 16, 0, 0, 0, 0, 0, + 2, 64, 0, 0, 10, 0, + 0, 0, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 20, 0, 0, 0, 10, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 70, 128, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 85, 85, 0, 0, - 0, 0, 0, 0, 156, 8, - 0, 7, 70, 238, 49, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 68, 68, 0, 0, 0, 0, - 0, 0, 95, 0, 0, 2, - 50, 0, 2, 0, 104, 0, - 0, 2, 4, 0, 0, 0, - 155, 0, 0, 4, 8, 0, - 0, 0, 8, 0, 0, 0, - 1, 0, 0, 0, 138, 0, - 0, 11, 18, 0, 16, 0, + 0, 0, 30, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, - 0, 0, 10, 0, 0, 0, - 1, 64, 0, 0, 20, 0, - 0, 0, 10, 128, 48, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 30, 0, 0, 7, 18, 0, - 16, 0, 0, 0, 0, 0, - 10, 0, 16, 0, 0, 0, - 0, 0, 1, 64, 0, 0, - 1, 0, 0, 0, 138, 0, - 0, 11, 34, 0, 16, 0, - 0, 0, 0, 0, 1, 64, - 0, 0, 2, 0, 0, 0, - 1, 64, 0, 0, 10, 0, - 0, 0, 10, 128, 48, 0, - 1, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 38, 0, 0, 8, 0, 208, 0, 0, 18, 0, 16, 0, 0, 0, 0, 0, 26, 0, @@ -311,8 +281,8 @@ const BYTE host_depth_store_1xmsaa_cs[] = 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 12, 0, - 0, 0, 6, 128, 48, 0, - 1, 0, 0, 0, 1, 0, + 0, 0, 86, 133, 48, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 6, 18, 0, 16, 0, 1, 0, 0, 0, @@ -328,9 +298,9 @@ const BYTE host_depth_store_1xmsaa_cs[] = 0, 0, 70, 0, 16, 0, 1, 0, 0, 0, 1, 0, 0, 9, 18, 0, 16, 0, - 0, 0, 0, 0, 10, 128, - 48, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 26, 128, + 48, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 64, 0, 0, 255, 3, 0, 0, 38, 0, 0, 11, 0, 208, 0, 0, @@ -387,44 +357,60 @@ const BYTE host_depth_store_1xmsaa_cs[] = 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0, 8, 18, 0, 16, 0, 2, 0, - 0, 0, 70, 15, 16, 0, + 0, 0, 70, 14, 16, 0, 1, 0, 0, 0, 70, 126, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 10, 242, 0, 16, 0, - 3, 0, 0, 0, 70, 15, - 16, 0, 1, 0, 0, 0, - 2, 64, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 45, 0, 0, 8, - 34, 0, 16, 0, 2, 0, - 0, 0, 70, 14, 16, 0, - 3, 0, 0, 0, 22, 126, - 32, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 30, 0, - 0, 10, 242, 0, 16, 0, - 3, 0, 0, 0, 70, 15, + 3, 0, 0, 0, 70, 4, 16, 0, 1, 0, 0, 0, 2, 64, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 54, 0, 0, 5, + 50, 0, 16, 0, 4, 0, + 0, 0, 230, 10, 16, 0, + 3, 0, 0, 0, 54, 0, + 0, 8, 194, 0, 16, 0, + 4, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, - 0, 0, 45, 0, 0, 8, - 66, 0, 16, 0, 2, 0, - 0, 0, 70, 14, 16, 0, - 3, 0, 0, 0, 150, 124, - 32, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 30, 0, - 0, 10, 242, 0, 16, 0, - 3, 0, 0, 0, 70, 15, - 16, 0, 1, 0, 0, 0, - 2, 64, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 45, 0, 0, 8, 34, 0, + 16, 0, 2, 0, 0, 0, + 70, 14, 16, 0, 4, 0, + 0, 0, 22, 126, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 54, 0, 0, 8, + 194, 0, 16, 0, 3, 0, + 0, 0, 2, 64, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 45, 0, + 0, 8, 66, 0, 16, 0, + 2, 0, 0, 0, 70, 14, + 16, 0, 3, 0, 0, 0, + 150, 124, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 30, 0, 0, 10, 242, 0, + 16, 0, 3, 0, 0, 0, + 70, 4, 16, 0, 1, 0, + 0, 0, 2, 64, 0, 0, + 4, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 54, 0, + 0, 5, 50, 0, 16, 0, + 4, 0, 0, 0, 230, 10, + 16, 0, 3, 0, 0, 0, + 54, 0, 0, 8, 194, 0, + 16, 0, 4, 0, 0, 0, + 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0, 8, 130, 0, 16, 0, 2, 0, 0, 0, 70, 14, 16, 0, - 3, 0, 0, 0, 150, 115, + 4, 0, 0, 0, 150, 115, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 164, 0, 0, 8, 242, 224, 33, 0, @@ -436,35 +422,41 @@ const BYTE host_depth_store_1xmsaa_cs[] = 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, - 1, 0, 0, 0, 30, 0, - 0, 10, 242, 0, 16, 0, - 2, 0, 0, 0, 70, 15, - 16, 0, 1, 0, 0, 0, - 2, 64, 0, 0, 4, 0, + 1, 0, 0, 0, 54, 0, + 0, 8, 194, 0, 16, 0, + 3, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 45, 0, 0, 8, - 18, 0, 16, 0, 2, 0, - 0, 0, 70, 14, 16, 0, - 2, 0, 0, 0, 70, 126, - 32, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 30, 0, - 0, 10, 242, 0, 16, 0, - 3, 0, 0, 0, 70, 15, - 16, 0, 1, 0, 0, 0, - 2, 64, 0, 0, 5, 0, + 0, 0, 0, 0, 0, 0, + 45, 0, 0, 8, 18, 0, + 16, 0, 2, 0, 0, 0, + 70, 14, 16, 0, 3, 0, + 0, 0, 70, 126, 32, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 30, 0, 0, 10, + 242, 0, 16, 0, 3, 0, + 0, 0, 70, 4, 16, 0, + 1, 0, 0, 0, 2, 64, + 0, 0, 6, 0, 0, 0, + 0, 0, 0, 0, 5, 0, + 0, 0, 0, 0, 0, 0, + 54, 0, 0, 5, 50, 0, + 16, 0, 4, 0, 0, 0, + 230, 10, 16, 0, 3, 0, + 0, 0, 54, 0, 0, 8, + 194, 0, 16, 0, 4, 0, + 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 45, 0, 0, 8, - 34, 0, 16, 0, 2, 0, - 0, 0, 70, 14, 16, 0, - 3, 0, 0, 0, 22, 126, - 32, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 30, 0, - 0, 10, 242, 0, 16, 0, - 3, 0, 0, 0, 70, 15, - 16, 0, 1, 0, 0, 0, - 2, 64, 0, 0, 6, 0, + 0, 0, 0, 0, 45, 0, + 0, 8, 34, 0, 16, 0, + 2, 0, 0, 0, 70, 14, + 16, 0, 4, 0, 0, 0, + 22, 126, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 54, 0, 0, 8, 194, 0, + 16, 0, 3, 0, 0, 0, + 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0, 8, @@ -473,38 +465,44 @@ const BYTE host_depth_store_1xmsaa_cs[] = 3, 0, 0, 0, 150, 124, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 0, - 0, 10, 242, 0, 16, 0, - 1, 0, 0, 0, 70, 14, + 0, 10, 50, 0, 16, 0, + 1, 0, 0, 0, 70, 0, 16, 0, 1, 0, 0, 0, 2, 64, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 45, 0, 0, 8, - 130, 0, 16, 0, 2, 0, - 0, 0, 70, 14, 16, 0, - 1, 0, 0, 0, 150, 115, - 32, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 164, 0, - 0, 8, 242, 224, 33, 0, + 0, 0, 54, 0, 0, 8, + 194, 0, 16, 0, 1, 0, + 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 86, 5, 16, 0, - 0, 0, 0, 0, 70, 14, - 16, 0, 2, 0, 0, 0, - 62, 0, 0, 1, 83, 84, - 65, 84, 148, 0, 0, 0, - 44, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, - 0, 0, 18, 0, 0, 0, - 5, 0, 0, 0, 2, 0, + 0, 0, 0, 0, 45, 0, + 0, 8, 130, 0, 16, 0, + 2, 0, 0, 0, 70, 14, + 16, 0, 1, 0, 0, 0, + 150, 115, 32, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 164, 0, 0, 8, 242, 224, + 33, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 86, 5, + 16, 0, 0, 0, 0, 0, + 70, 14, 16, 0, 2, 0, + 0, 0, 62, 0, 0, 1, + 83, 84, 65, 84, 148, 0, + 0, 0, 50, 0, 0, 0, + 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 15, 0, + 0, 0, 5, 0, 0, 0, + 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 8, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, + 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -515,6 +513,5 @@ const BYTE host_depth_store_1xmsaa_cs[] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 2, 0, 0, 0 + 0, 0, 2, 0, 0, 0 }; diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/host_depth_store_2xmsaa_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/host_depth_store_2xmsaa_cs.h index 16d7d51b2..d428ec7d1 100644 --- a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/host_depth_store_2xmsaa_cs.h +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/host_depth_store_2xmsaa_cs.h @@ -5,17 +5,11 @@ // // Buffer Definitions: // -// cbuffer XeHostDepthStoreRectangleConstants +// cbuffer xesl_push_constants // { // // uint xe_host_depth_store_rectangle;// Offset: 0 Size: 4 -// -// } -// -// cbuffer XeHostDepthStoreRenderTargetConstants -// { -// -// uint xe_host_depth_store_render_target;// Offset: 0 Size: 4 +// uint xe_host_depth_store_render_target;// Offset: 4 Size: 4 // // } // @@ -24,10 +18,9 @@ // // Name Type Format Dim ID HLSL Bind Count // ------------------------------ ---------- ------- ----------- ------- -------------- ------ -// xe_host_depth_store_source texture float 2dMS T0 t0 1 +// xe_host_depth_store_source texture float4 2dMS T0 t0 1 // xe_host_depth_store_dest UAV uint4 buf U0 u0 1 -// XeHostDepthStoreRectangleConstants cbuffer NA NA CB0 cb0 1 -// XeHostDepthStoreRenderTargetConstants cbuffer NA NA CB1 cb1 1 +// xesl_push_constants cbuffer NA NA CB0 cb0 1 // // // @@ -45,15 +38,13 @@ cs_5_1 dcl_globalFlags refactoringAllowed dcl_constantbuffer CB0[0:0][1], immediateIndexed, space=0 -dcl_constantbuffer CB1[1:1][1], immediateIndexed, space=0 dcl_resource_texture2dms(0) (float,float,float,float) T0[0:0], space=0 dcl_uav_typed_buffer (uint,uint,uint,uint) U0[0:0], space=0 dcl_input vThreadID.xy dcl_temps 5 dcl_thread_group 8, 8, 1 -ubfe r0.x, l(10), l(20), CB0[0][0].x +ubfe r0.xy, l(10, 2, 0, 0), l(20, 10, 0, 0), CB0[0][0].xyxx iadd r0.x, r0.x, l(1) -ubfe r0.y, l(2), l(10), CB1[1][0].x imul null, r0.x, r0.y, r0.x uge r0.x, vThreadID.x, r0.x if_nz r0.x @@ -62,12 +53,12 @@ endif ushr r0.y, CB0[0][0].x, l(10) mov r0.x, CB0[0][0].x bfi r0.xy, l(10, 10, 0, 0), l(3, 3, 0, 0), r0.xyxx, l(0, 0, 0, 0) -ubfe r1.xyz, l(2, 2, 1, 0), l(10, 12, 14, 0), CB1[1][0].xxxx +ubfe r1.xyz, l(2, 2, 1, 0), l(10, 12, 14, 0), CB0[0][0].yyyy ishl r2.x, vThreadID.x, l(3) ushr r2.y, vThreadID.y, l(1) imad r0.xy, r0.xyxx, r1.xyxx, r2.xyxx and r2.y, vThreadID.y, l(1) -and r1.w, CB1[1][0].x, l(1023) +and r1.w, CB0[0][0].y, l(1023) ishl r0.z, r0.y, l(1) mov r2.x, l(0) iadd r2.xz, r0.xxzx, r2.xxyx @@ -108,115 +99,95 @@ mov r3.zw, l(0,0,0,0) ldms r2.w, r3.xyzw, T0[0].yzwx, r1.x store_uav_typed U0[0].xyzw, r0.wwww, r2.xyzw ret -// Approximately 57 instruction slots used +// Approximately 56 instruction slots used #endif const BYTE host_depth_store_2xmsaa_cs[] = { - 68, 88, 66, 67, 220, 153, - 96, 168, 160, 176, 100, 61, - 221, 246, 187, 57, 87, 53, - 74, 27, 1, 0, 0, 0, - 104, 11, 0, 0, 5, 0, + 68, 88, 66, 67, 93, 44, + 123, 4, 117, 130, 24, 111, + 100, 32, 239, 250, 72, 125, + 9, 219, 1, 0, 0, 0, + 192, 10, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, - 172, 2, 0, 0, 188, 2, - 0, 0, 204, 2, 0, 0, - 204, 10, 0, 0, 82, 68, - 69, 70, 112, 2, 0, 0, - 2, 0, 0, 0, 92, 1, - 0, 0, 4, 0, 0, 0, + 52, 2, 0, 0, 68, 2, + 0, 0, 84, 2, 0, 0, + 36, 10, 0, 0, 82, 68, + 69, 70, 248, 1, 0, 0, + 1, 0, 0, 0, 252, 0, + 0, 0, 3, 0, 0, 0, 60, 0, 0, 0, 1, 5, 83, 67, 0, 5, 0, 0, - 70, 2, 0, 0, 19, 19, + 206, 1, 0, 0, 19, 19, 68, 37, 60, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, - 220, 0, 0, 0, 2, 0, + 180, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 247, 0, + 0, 0, 0, 0, 207, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 16, 1, 0, 0, + 0, 0, 232, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 51, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 120, 101, - 95, 104, 111, 115, 116, 95, - 100, 101, 112, 116, 104, 95, - 115, 116, 111, 114, 101, 95, - 115, 111, 117, 114, 99, 101, - 0, 120, 101, 95, 104, 111, - 115, 116, 95, 100, 101, 112, - 116, 104, 95, 115, 116, 111, - 114, 101, 95, 100, 101, 115, - 116, 0, 88, 101, 72, 111, - 115, 116, 68, 101, 112, 116, - 104, 83, 116, 111, 114, 101, - 82, 101, 99, 116, 97, 110, - 103, 108, 101, 67, 111, 110, - 115, 116, 97, 110, 116, 115, - 0, 88, 101, 72, 111, 115, - 116, 68, 101, 112, 116, 104, - 83, 116, 111, 114, 101, 82, - 101, 110, 100, 101, 114, 84, - 97, 114, 103, 101, 116, 67, - 111, 110, 115, 116, 97, 110, - 116, 115, 0, 171, 171, 171, - 16, 1, 0, 0, 1, 0, - 0, 0, 140, 1, 0, 0, + 120, 101, 95, 104, 111, 115, + 116, 95, 100, 101, 112, 116, + 104, 95, 115, 116, 111, 114, + 101, 95, 115, 111, 117, 114, + 99, 101, 0, 120, 101, 95, + 104, 111, 115, 116, 95, 100, + 101, 112, 116, 104, 95, 115, + 116, 111, 114, 101, 95, 100, + 101, 115, 116, 0, 120, 101, + 115, 108, 95, 112, 117, 115, + 104, 95, 99, 111, 110, 115, + 116, 97, 110, 116, 115, 0, + 232, 0, 0, 0, 2, 0, + 0, 0, 20, 1, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 51, 1, 0, 0, 1, 0, - 0, 0, 252, 1, 0, 0, - 16, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 180, 1, 0, 0, 0, 0, + 100, 1, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, - 2, 0, 0, 0, 216, 1, + 2, 0, 0, 0, 136, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, - 0, 0, 0, 0, 120, 101, - 95, 104, 111, 115, 116, 95, - 100, 101, 112, 116, 104, 95, - 115, 116, 111, 114, 101, 95, - 114, 101, 99, 116, 97, 110, - 103, 108, 101, 0, 100, 119, - 111, 114, 100, 0, 0, 0, - 19, 0, 1, 0, 1, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 210, 1, 0, 0, 36, 2, - 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 172, 1, + 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 2, 0, - 0, 0, 216, 1, 0, 0, + 0, 0, 136, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 120, 101, 95, 104, 111, 115, 116, 95, 100, 101, 112, 116, 104, 95, 115, 116, + 111, 114, 101, 95, 114, 101, + 99, 116, 97, 110, 103, 108, + 101, 0, 100, 119, 111, 114, + 100, 0, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 130, 1, + 0, 0, 120, 101, 95, 104, + 111, 115, 116, 95, 100, 101, + 112, 116, 104, 95, 115, 116, 111, 114, 101, 95, 114, 101, 110, 100, 101, 114, 95, 116, 97, 114, 103, 101, 116, 0, @@ -233,52 +204,44 @@ const BYTE host_depth_store_2xmsaa_cs[] = 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, - 248, 7, 0, 0, 81, 0, - 5, 0, 254, 1, 0, 0, + 200, 7, 0, 0, 81, 0, + 5, 0, 242, 1, 0, 0, 106, 8, 0, 1, 89, 0, 0, 7, 70, 142, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 89, 0, 0, 7, - 70, 142, 48, 0, 1, 0, - 0, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 1, 0, + 0, 0, 88, 32, 0, 7, + 70, 126, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 88, 32, 0, 7, 70, 126, + 0, 0, 0, 0, 85, 85, + 0, 0, 0, 0, 0, 0, + 156, 8, 0, 7, 70, 238, + 49, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 68, 68, 0, 0, + 0, 0, 0, 0, 95, 0, + 0, 2, 50, 0, 2, 0, + 104, 0, 0, 2, 5, 0, + 0, 0, 155, 0, 0, 4, + 8, 0, 0, 0, 8, 0, + 0, 0, 1, 0, 0, 0, + 138, 0, 0, 17, 50, 0, + 16, 0, 0, 0, 0, 0, + 2, 64, 0, 0, 10, 0, + 0, 0, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 20, 0, 0, 0, 10, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 70, 128, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 85, 85, 0, 0, - 0, 0, 0, 0, 156, 8, - 0, 7, 70, 238, 49, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 68, 68, 0, 0, 0, 0, - 0, 0, 95, 0, 0, 2, - 50, 0, 2, 0, 104, 0, - 0, 2, 5, 0, 0, 0, - 155, 0, 0, 4, 8, 0, - 0, 0, 8, 0, 0, 0, - 1, 0, 0, 0, 138, 0, - 0, 11, 18, 0, 16, 0, + 0, 0, 30, 0, 0, 7, + 18, 0, 16, 0, 0, 0, + 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, - 0, 0, 10, 0, 0, 0, - 1, 64, 0, 0, 20, 0, - 0, 0, 10, 128, 48, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 30, 0, 0, 7, 18, 0, - 16, 0, 0, 0, 0, 0, - 10, 0, 16, 0, 0, 0, - 0, 0, 1, 64, 0, 0, - 1, 0, 0, 0, 138, 0, - 0, 11, 34, 0, 16, 0, - 0, 0, 0, 0, 1, 64, - 0, 0, 2, 0, 0, 0, - 1, 64, 0, 0, 10, 0, - 0, 0, 10, 128, 48, 0, - 1, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 38, 0, 0, 8, 0, 208, 0, 0, 18, 0, 16, 0, 0, 0, 0, 0, 26, 0, @@ -324,8 +287,8 @@ const BYTE host_depth_store_2xmsaa_cs[] = 2, 64, 0, 0, 10, 0, 0, 0, 12, 0, 0, 0, 14, 0, 0, 0, 0, 0, - 0, 0, 6, 128, 48, 0, - 1, 0, 0, 0, 1, 0, + 0, 0, 86, 133, 48, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 6, 18, 0, 16, 0, 2, 0, 0, 0, @@ -347,8 +310,8 @@ const BYTE host_depth_store_2xmsaa_cs[] = 0, 0, 1, 0, 0, 0, 1, 0, 0, 9, 130, 0, 16, 0, 1, 0, 0, 0, - 10, 128, 48, 0, 1, 0, - 0, 0, 1, 0, 0, 0, + 26, 128, 48, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 64, 0, 0, 255, 3, 0, 0, 41, 0, 0, 7, 66, 0, @@ -575,7 +538,7 @@ const BYTE host_depth_store_2xmsaa_cs[] = 16, 0, 2, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, - 57, 0, 0, 0, 5, 0, + 56, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/host_depth_store_4xmsaa_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/host_depth_store_4xmsaa_cs.h index 0ff471cc5..03e8bf7be 100644 --- a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/host_depth_store_4xmsaa_cs.h +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/host_depth_store_4xmsaa_cs.h @@ -5,17 +5,11 @@ // // Buffer Definitions: // -// cbuffer XeHostDepthStoreRectangleConstants +// cbuffer xesl_push_constants // { // // uint xe_host_depth_store_rectangle;// Offset: 0 Size: 4 -// -// } -// -// cbuffer XeHostDepthStoreRenderTargetConstants -// { -// -// uint xe_host_depth_store_render_target;// Offset: 0 Size: 4 +// uint xe_host_depth_store_render_target;// Offset: 4 Size: 4 // // } // @@ -24,10 +18,9 @@ // // Name Type Format Dim ID HLSL Bind Count // ------------------------------ ---------- ------- ----------- ------- -------------- ------ -// xe_host_depth_store_source texture float 2dMS T0 t0 1 +// xe_host_depth_store_source texture float4 2dMS T0 t0 1 // xe_host_depth_store_dest UAV uint4 buf U0 u0 1 -// XeHostDepthStoreRectangleConstants cbuffer NA NA CB0 cb0 1 -// XeHostDepthStoreRenderTargetConstants cbuffer NA NA CB1 cb1 1 +// xesl_push_constants cbuffer NA NA CB0 cb0 1 // // // @@ -45,16 +38,14 @@ cs_5_1 dcl_globalFlags refactoringAllowed dcl_constantbuffer CB0[0:0][1], immediateIndexed, space=0 -dcl_constantbuffer CB1[1:1][1], immediateIndexed, space=0 dcl_resource_texture2dms(0) (float,float,float,float) T0[0:0], space=0 dcl_uav_typed_buffer (uint,uint,uint,uint) U0[0:0], space=0 dcl_input vThreadID.xy dcl_temps 5 dcl_thread_group 8, 8, 1 ushr r0.x, vThreadID.x, l(1) -ubfe r0.y, l(10), l(20), CB0[0][0].x +ubfe r0.yz, l(0, 10, 2, 0), l(0, 20, 10, 0), CB0[0][0].xxyx iadd r0.y, r0.y, l(1) -ubfe r0.z, l(2), l(10), CB1[1][0].x imul null, r0.y, r0.z, r0.y uge r0.x, r0.x, r0.y if_nz r0.x @@ -63,12 +54,12 @@ endif ushr r0.y, CB0[0][0].x, l(10) mov r0.x, CB0[0][0].x bfi r0.xy, l(10, 10, 0, 0), l(3, 3, 0, 0), r0.xyxx, l(0, 0, 0, 0) -ubfe r0.zw, l(0, 0, 2, 2), l(0, 0, 10, 12), CB1[1][0].xxxx +ubfe r0.zw, l(0, 0, 2, 2), l(0, 0, 10, 12), CB0[0][0].yyyy ishl r1.x, vThreadID.x, l(2) ushr r1.y, vThreadID.y, l(1) imad r1.xy, r0.xyxx, r0.zwzz, r1.xyxx bfi r0.xy, l(31, 31, 0, 0), l(1, 1, 0, 0), r1.xyxx, vThreadID.xyxx -and r2.x, CB1[1][0].x, l(1023) +and r2.x, CB0[0][0].y, l(1023) imul null, r0.zw, r0.zzzw, l(0, 0, 80, 16) udiv r2.yz, null, r0.xxyx, r0.zzwz imad r2.x, r2.z, r2.x, r2.y @@ -97,115 +88,95 @@ ldms r2.z, r1.xyww, T0[0].yzxw, r0.y ldms r2.w, r1.xyzw, T0[0].yzwx, r0.z store_uav_typed U0[0].xyzw, r0.wwww, r2.xyzw ret -// Approximately 46 instruction slots used +// Approximately 45 instruction slots used #endif const BYTE host_depth_store_4xmsaa_cs[] = { - 68, 88, 66, 67, 255, 93, - 213, 231, 110, 151, 46, 65, - 140, 74, 54, 254, 196, 135, - 241, 89, 1, 0, 0, 0, - 72, 10, 0, 0, 5, 0, + 68, 88, 66, 67, 40, 223, + 121, 252, 88, 5, 117, 190, + 136, 7, 3, 6, 127, 125, + 212, 65, 1, 0, 0, 0, + 160, 9, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, - 172, 2, 0, 0, 188, 2, - 0, 0, 204, 2, 0, 0, - 172, 9, 0, 0, 82, 68, - 69, 70, 112, 2, 0, 0, - 2, 0, 0, 0, 92, 1, - 0, 0, 4, 0, 0, 0, + 52, 2, 0, 0, 68, 2, + 0, 0, 84, 2, 0, 0, + 4, 9, 0, 0, 82, 68, + 69, 70, 248, 1, 0, 0, + 1, 0, 0, 0, 252, 0, + 0, 0, 3, 0, 0, 0, 60, 0, 0, 0, 1, 5, 83, 67, 0, 5, 0, 0, - 70, 2, 0, 0, 19, 19, + 206, 1, 0, 0, 19, 19, 68, 37, 60, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, - 220, 0, 0, 0, 2, 0, + 180, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 247, 0, + 0, 0, 0, 0, 207, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 16, 1, 0, 0, + 0, 0, 232, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 51, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 120, 101, - 95, 104, 111, 115, 116, 95, - 100, 101, 112, 116, 104, 95, - 115, 116, 111, 114, 101, 95, - 115, 111, 117, 114, 99, 101, - 0, 120, 101, 95, 104, 111, - 115, 116, 95, 100, 101, 112, - 116, 104, 95, 115, 116, 111, - 114, 101, 95, 100, 101, 115, - 116, 0, 88, 101, 72, 111, - 115, 116, 68, 101, 112, 116, - 104, 83, 116, 111, 114, 101, - 82, 101, 99, 116, 97, 110, - 103, 108, 101, 67, 111, 110, - 115, 116, 97, 110, 116, 115, - 0, 88, 101, 72, 111, 115, - 116, 68, 101, 112, 116, 104, - 83, 116, 111, 114, 101, 82, - 101, 110, 100, 101, 114, 84, - 97, 114, 103, 101, 116, 67, - 111, 110, 115, 116, 97, 110, - 116, 115, 0, 171, 171, 171, - 16, 1, 0, 0, 1, 0, - 0, 0, 140, 1, 0, 0, + 120, 101, 95, 104, 111, 115, + 116, 95, 100, 101, 112, 116, + 104, 95, 115, 116, 111, 114, + 101, 95, 115, 111, 117, 114, + 99, 101, 0, 120, 101, 95, + 104, 111, 115, 116, 95, 100, + 101, 112, 116, 104, 95, 115, + 116, 111, 114, 101, 95, 100, + 101, 115, 116, 0, 120, 101, + 115, 108, 95, 112, 117, 115, + 104, 95, 99, 111, 110, 115, + 116, 97, 110, 116, 115, 0, + 232, 0, 0, 0, 2, 0, + 0, 0, 20, 1, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 51, 1, 0, 0, 1, 0, - 0, 0, 252, 1, 0, 0, - 16, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 180, 1, 0, 0, 0, 0, + 100, 1, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, - 2, 0, 0, 0, 216, 1, + 2, 0, 0, 0, 136, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, - 0, 0, 0, 0, 120, 101, - 95, 104, 111, 115, 116, 95, - 100, 101, 112, 116, 104, 95, - 115, 116, 111, 114, 101, 95, - 114, 101, 99, 116, 97, 110, - 103, 108, 101, 0, 100, 119, - 111, 114, 100, 0, 0, 0, - 19, 0, 1, 0, 1, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 210, 1, 0, 0, 36, 2, - 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 172, 1, + 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 2, 0, - 0, 0, 216, 1, 0, 0, + 0, 0, 136, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 120, 101, 95, 104, 111, 115, 116, 95, 100, 101, 112, 116, 104, 95, 115, 116, + 111, 114, 101, 95, 114, 101, + 99, 116, 97, 110, 103, 108, + 101, 0, 100, 119, 111, 114, + 100, 0, 0, 0, 19, 0, + 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 130, 1, + 0, 0, 120, 101, 95, 104, + 111, 115, 116, 95, 100, 101, + 112, 116, 104, 95, 115, 116, 111, 114, 101, 95, 114, 101, 110, 100, 101, 114, 95, 116, 97, 114, 103, 101, 116, 0, @@ -222,56 +193,48 @@ const BYTE host_depth_store_4xmsaa_cs[] = 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, - 216, 6, 0, 0, 81, 0, - 5, 0, 182, 1, 0, 0, + 168, 6, 0, 0, 81, 0, + 5, 0, 170, 1, 0, 0, 106, 8, 0, 1, 89, 0, 0, 7, 70, 142, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 89, 0, 0, 7, - 70, 142, 48, 0, 1, 0, - 0, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 1, 0, + 0, 0, 88, 32, 0, 7, + 70, 126, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 88, 32, 0, 7, 70, 126, + 0, 0, 0, 0, 85, 85, + 0, 0, 0, 0, 0, 0, + 156, 8, 0, 7, 70, 238, + 49, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 68, 68, 0, 0, + 0, 0, 0, 0, 95, 0, + 0, 2, 50, 0, 2, 0, + 104, 0, 0, 2, 5, 0, + 0, 0, 155, 0, 0, 4, + 8, 0, 0, 0, 8, 0, + 0, 0, 1, 0, 0, 0, + 85, 0, 0, 6, 18, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 2, 0, 1, 64, + 0, 0, 1, 0, 0, 0, + 138, 0, 0, 17, 98, 0, + 16, 0, 0, 0, 0, 0, + 2, 64, 0, 0, 0, 0, + 0, 0, 10, 0, 0, 0, + 2, 0, 0, 0, 0, 0, + 0, 0, 2, 64, 0, 0, + 0, 0, 0, 0, 20, 0, + 0, 0, 10, 0, 0, 0, + 0, 0, 0, 0, 6, 129, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 85, 85, 0, 0, - 0, 0, 0, 0, 156, 8, - 0, 7, 70, 238, 49, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 68, 68, 0, 0, 0, 0, - 0, 0, 95, 0, 0, 2, - 50, 0, 2, 0, 104, 0, - 0, 2, 5, 0, 0, 0, - 155, 0, 0, 4, 8, 0, - 0, 0, 8, 0, 0, 0, - 1, 0, 0, 0, 85, 0, - 0, 6, 18, 0, 16, 0, - 0, 0, 0, 0, 10, 0, - 2, 0, 1, 64, 0, 0, - 1, 0, 0, 0, 138, 0, - 0, 11, 34, 0, 16, 0, + 0, 0, 30, 0, 0, 7, + 34, 0, 16, 0, 0, 0, + 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, - 0, 0, 10, 0, 0, 0, - 1, 64, 0, 0, 20, 0, - 0, 0, 10, 128, 48, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 30, 0, 0, 7, 34, 0, - 16, 0, 0, 0, 0, 0, - 26, 0, 16, 0, 0, 0, - 0, 0, 1, 64, 0, 0, - 1, 0, 0, 0, 138, 0, - 0, 11, 66, 0, 16, 0, - 0, 0, 0, 0, 1, 64, - 0, 0, 2, 0, 0, 0, - 1, 64, 0, 0, 10, 0, - 0, 0, 10, 128, 48, 0, - 1, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 38, 0, 0, 8, 0, 208, 0, 0, 34, 0, 16, 0, 0, 0, 0, 0, 42, 0, @@ -318,8 +281,8 @@ const BYTE host_depth_store_4xmsaa_cs[] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 12, 0, 0, 0, - 6, 128, 48, 0, 1, 0, - 0, 0, 1, 0, 0, 0, + 86, 133, 48, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 6, 18, 0, 16, 0, 1, 0, 0, 0, 10, 0, @@ -347,8 +310,8 @@ const BYTE host_depth_store_4xmsaa_cs[] = 1, 0, 0, 0, 70, 0, 2, 0, 1, 0, 0, 9, 18, 0, 16, 0, 2, 0, - 0, 0, 10, 128, 48, 0, - 1, 0, 0, 0, 1, 0, + 0, 0, 26, 128, 48, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 64, 0, 0, 255, 3, 0, 0, 38, 0, 0, 11, @@ -516,7 +479,7 @@ const BYTE host_depth_store_4xmsaa_cs[] = 16, 0, 2, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, - 46, 0, 0, 0, 5, 0, + 45, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_cs.h index cfeca33cb..5e7150be1 100644 --- a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_cs.h +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_cs.h @@ -1329,11 +1329,12 @@ switch r0.y imad r0.yz, r1.yywy, l(0, 0x00010000, 0x00010000, 0), r1.xxzx break default - f32tof16 r1.x, r4.x - f32tof16 r1.y, r7.x - f32tof16 r3.x, r5.x - f32tof16 r3.y, r2.x - imad r0.yz, r3.xxyx, l(0, 0x00010000, 0x00010000, 0), r1.xxyx + f32tof16 r0.w, r4.x + f32tof16 r1.x, r5.x + imad r0.y, r1.x, l(0x00010000), r0.w + f32tof16 r0.w, r7.x + f32tof16 r1.x, r2.x + imad r0.z, r1.x, l(0x00010000), r0.w break endswitch ieq r0.w, r3.w, l(1) @@ -1345,20 +1346,20 @@ if_nz r0.w endif store_uav_typed U0[0].xyzw, r0.xxxx, r0.yzyy ret -// Approximately 1299 instruction slots used +// Approximately 1300 instruction slots used #endif const BYTE resolve_full_16bpp_cs[] = { - 68, 88, 66, 67, 122, 248, - 161, 239, 38, 34, 59, 122, - 64, 201, 159, 30, 45, 236, - 145, 6, 1, 0, 0, 0, - 112, 158, 0, 0, 5, 0, + 68, 88, 66, 67, 161, 122, + 199, 225, 156, 78, 85, 181, + 38, 69, 188, 217, 160, 206, + 17, 122, 1, 0, 0, 0, + 136, 158, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 208, 2, 0, 0, 224, 2, 0, 0, 240, 2, 0, 0, - 212, 157, 0, 0, 82, 68, + 236, 157, 0, 0, 82, 68, 69, 70, 148, 2, 0, 0, 1, 0, 0, 0, 236, 0, 0, 0, 3, 0, 0, 0, @@ -1476,8 +1477,8 @@ const BYTE resolve_full_16bpp_cs[] = 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, - 220, 154, 0, 0, 81, 0, - 5, 0, 183, 38, 0, 0, + 244, 154, 0, 0, 81, 0, + 5, 0, 189, 38, 0, 0, 106, 8, 0, 1, 89, 0, 0, 7, 70, 142, 48, 0, 0, 0, 0, 0, 0, 0, @@ -8024,27 +8025,31 @@ const BYTE resolve_full_16bpp_cs[] = 16, 0, 1, 0, 0, 0, 2, 0, 0, 1, 10, 0, 0, 1, 130, 0, 0, 5, - 18, 0, 16, 0, 1, 0, + 130, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 4, 0, 0, 0, 130, 0, - 0, 5, 34, 0, 16, 0, + 0, 5, 18, 0, 16, 0, 1, 0, 0, 0, 10, 0, - 16, 0, 7, 0, 0, 0, - 130, 0, 0, 5, 18, 0, - 16, 0, 3, 0, 0, 0, - 10, 0, 16, 0, 5, 0, + 16, 0, 5, 0, 0, 0, + 35, 0, 0, 9, 34, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 1, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 1, 0, 58, 0, + 16, 0, 0, 0, 0, 0, + 130, 0, 0, 5, 130, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 7, 0, 0, 0, 130, 0, 0, 5, - 34, 0, 16, 0, 3, 0, + 18, 0, 16, 0, 1, 0, 0, 0, 10, 0, 16, 0, 2, 0, 0, 0, 35, 0, - 0, 12, 98, 0, 16, 0, - 0, 0, 0, 0, 6, 1, - 16, 0, 3, 0, 0, 0, - 2, 64, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, - 0, 0, 1, 0, 0, 0, - 0, 0, 6, 1, 16, 0, - 1, 0, 0, 0, 2, 0, + 0, 9, 66, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 1, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 1, 0, 58, 0, 16, 0, + 0, 0, 0, 0, 2, 0, 0, 1, 23, 0, 0, 1, 32, 0, 0, 7, 130, 0, 16, 0, 0, 0, 0, 0, @@ -8085,10 +8090,10 @@ const BYTE resolve_full_16bpp_cs[] = 150, 5, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, - 0, 0, 19, 5, 0, 0, + 0, 0, 20, 5, 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 133, 0, 0, 0, 30, 1, + 133, 0, 0, 0, 31, 1, 0, 0, 172, 0, 0, 0, 61, 0, 0, 0, 29, 0, 0, 0, 0, 0, 0, 0, diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_scaled_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_scaled_cs.h index 189c65905..14e340d1a 100644 --- a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_scaled_cs.h +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_scaled_cs.h @@ -1360,11 +1360,12 @@ switch r0.y imad r0.yz, r1.yywy, l(0, 0x00010000, 0x00010000, 0), r1.xxzx break default - f32tof16 r1.x, r5.x - f32tof16 r1.y, r8.x - f32tof16 r2.x, r6.x - f32tof16 r2.y, r7.x - imad r0.yz, r2.xxyx, l(0, 0x00010000, 0x00010000, 0), r1.xxyx + f32tof16 r0.w, r5.x + f32tof16 r1.x, r6.x + imad r0.y, r1.x, l(0x00010000), r0.w + f32tof16 r0.w, r8.x + f32tof16 r1.x, r7.x + imad r0.z, r1.x, l(0x00010000), r0.w break endswitch and r0.w, CB0[0][0].z, l(7) @@ -1377,20 +1378,20 @@ if_nz r0.w endif store_uav_typed U0[0].xyzw, r0.xxxx, r0.yzyy ret -// Approximately 1323 instruction slots used +// Approximately 1324 instruction slots used #endif const BYTE resolve_full_16bpp_scaled_cs[] = { - 68, 88, 66, 67, 17, 196, - 248, 162, 148, 187, 123, 170, - 150, 149, 87, 23, 157, 34, - 89, 51, 1, 0, 0, 0, - 188, 160, 0, 0, 5, 0, + 68, 88, 66, 67, 12, 60, + 201, 113, 72, 169, 62, 4, + 36, 236, 72, 168, 110, 92, + 28, 108, 1, 0, 0, 0, + 212, 160, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 56, 3, 0, 0, 72, 3, 0, 0, 88, 3, 0, 0, - 32, 160, 0, 0, 82, 68, + 56, 160, 0, 0, 82, 68, 69, 70, 252, 2, 0, 0, 2, 0, 0, 0, 52, 1, 0, 0, 4, 0, 0, 0, @@ -1525,8 +1526,8 @@ const BYTE resolve_full_16bpp_scaled_cs[] = 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, - 69, 88, 192, 156, 0, 0, - 81, 0, 5, 0, 48, 39, + 69, 88, 216, 156, 0, 0, + 81, 0, 5, 0, 54, 39, 0, 0, 106, 8, 0, 1, 89, 0, 0, 7, 70, 142, 48, 0, 0, 0, 0, 0, @@ -8148,27 +8149,31 @@ const BYTE resolve_full_16bpp_scaled_cs[] = 16, 0, 1, 0, 0, 0, 2, 0, 0, 1, 10, 0, 0, 1, 130, 0, 0, 5, - 18, 0, 16, 0, 1, 0, + 130, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 5, 0, 0, 0, 130, 0, - 0, 5, 34, 0, 16, 0, + 0, 5, 18, 0, 16, 0, 1, 0, 0, 0, 10, 0, - 16, 0, 8, 0, 0, 0, - 130, 0, 0, 5, 18, 0, - 16, 0, 2, 0, 0, 0, - 10, 0, 16, 0, 6, 0, + 16, 0, 6, 0, 0, 0, + 35, 0, 0, 9, 34, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 1, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 1, 0, 58, 0, + 16, 0, 0, 0, 0, 0, + 130, 0, 0, 5, 130, 0, + 16, 0, 0, 0, 0, 0, + 10, 0, 16, 0, 8, 0, 0, 0, 130, 0, 0, 5, - 34, 0, 16, 0, 2, 0, + 18, 0, 16, 0, 1, 0, 0, 0, 10, 0, 16, 0, 7, 0, 0, 0, 35, 0, - 0, 12, 98, 0, 16, 0, - 0, 0, 0, 0, 6, 1, - 16, 0, 2, 0, 0, 0, - 2, 64, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, - 0, 0, 1, 0, 0, 0, - 0, 0, 6, 1, 16, 0, - 1, 0, 0, 0, 2, 0, + 0, 9, 66, 0, 16, 0, + 0, 0, 0, 0, 10, 0, + 16, 0, 1, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 1, 0, 58, 0, 16, 0, + 0, 0, 0, 0, 2, 0, 0, 1, 23, 0, 0, 1, 1, 0, 0, 9, 130, 0, 16, 0, 0, 0, 0, 0, @@ -8215,10 +8220,10 @@ const BYTE resolve_full_16bpp_scaled_cs[] = 150, 5, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, - 0, 0, 43, 5, 0, 0, + 0, 0, 44, 5, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 133, 0, 0, 0, 39, 1, + 133, 0, 0, 0, 40, 1, 0, 0, 180, 0, 0, 0, 63, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_cs.h index 3c923d804..21539f855 100644 --- a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_cs.h +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_cs.h @@ -1318,15 +1318,15 @@ switch r0.y imad r6.x, r0.z, l(0x00010000), r0.y break case l(31) - f32tof16 r1.x, r4.x - f32tof16 r1.y, r5.x - f32tof16 r1.z, r7.x - f32tof16 r1.w, r6.x - f32tof16 r8.x, r4.y - f32tof16 r8.y, r5.y - f32tof16 r8.z, r7.y - f32tof16 r8.w, r2.y - imad r6.xyzw, r8.wyxz, l(0x00010000, 0x00010000, 0x00010000, 0x00010000), r1.wyxz + f32tof16 r0.yz, r4.xxyx + imad r6.z, r0.z, l(0x00010000), r0.y + f32tof16 r0.yz, r5.xxyx + imad r6.y, r0.z, l(0x00010000), r0.y + f32tof16 r0.yz, r7.xxyx + imad r6.w, r0.z, l(0x00010000), r0.y + f32tof16 r0.y, r6.x + f32tof16 r0.z, r2.y + imad r6.x, r0.z, l(0x00010000), r0.y break default mov r6.z, r4.x @@ -1354,15 +1354,15 @@ ret const BYTE resolve_full_32bpp_cs[] = { - 68, 88, 66, 67, 6, 223, - 221, 81, 201, 228, 242, 38, - 30, 228, 108, 198, 29, 216, - 108, 219, 1, 0, 0, 0, - 220, 157, 0, 0, 5, 0, + 68, 88, 66, 67, 190, 127, + 1, 51, 115, 7, 103, 223, + 70, 14, 127, 63, 222, 233, + 22, 41, 1, 0, 0, 0, + 0, 158, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 208, 2, 0, 0, 224, 2, 0, 0, 240, 2, 0, 0, - 64, 157, 0, 0, 82, 68, + 100, 157, 0, 0, 82, 68, 69, 70, 148, 2, 0, 0, 1, 0, 0, 0, 236, 0, 0, 0, 3, 0, 0, 0, @@ -1480,8 +1480,8 @@ const BYTE resolve_full_32bpp_cs[] = 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, - 72, 154, 0, 0, 81, 0, - 5, 0, 146, 38, 0, 0, + 108, 154, 0, 0, 81, 0, + 5, 0, 155, 38, 0, 0, 106, 8, 0, 1, 89, 0, 0, 7, 70, 142, 48, 0, 0, 0, 0, 0, 0, 0, @@ -7944,41 +7944,47 @@ const BYTE resolve_full_32bpp_cs[] = 0, 0, 2, 0, 0, 1, 6, 0, 0, 3, 1, 64, 0, 0, 31, 0, 0, 0, - 130, 0, 0, 5, 18, 0, - 16, 0, 1, 0, 0, 0, - 10, 0, 16, 0, 4, 0, + 130, 0, 0, 5, 98, 0, + 16, 0, 0, 0, 0, 0, + 6, 1, 16, 0, 4, 0, + 0, 0, 35, 0, 0, 9, + 66, 0, 16, 0, 6, 0, + 0, 0, 42, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 1, 0, + 26, 0, 16, 0, 0, 0, 0, 0, 130, 0, 0, 5, - 34, 0, 16, 0, 1, 0, - 0, 0, 10, 0, 16, 0, - 5, 0, 0, 0, 130, 0, - 0, 5, 66, 0, 16, 0, - 1, 0, 0, 0, 10, 0, + 98, 0, 16, 0, 0, 0, + 0, 0, 6, 1, 16, 0, + 5, 0, 0, 0, 35, 0, + 0, 9, 34, 0, 16, 0, + 6, 0, 0, 0, 42, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 1, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 130, 0, + 0, 5, 98, 0, 16, 0, + 0, 0, 0, 0, 6, 1, 16, 0, 7, 0, 0, 0, - 130, 0, 0, 5, 130, 0, - 16, 0, 1, 0, 0, 0, + 35, 0, 0, 9, 130, 0, + 16, 0, 6, 0, 0, 0, + 42, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 1, 0, 26, 0, + 16, 0, 0, 0, 0, 0, + 130, 0, 0, 5, 34, 0, + 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 6, 0, 0, 0, 130, 0, 0, 5, - 18, 0, 16, 0, 8, 0, - 0, 0, 26, 0, 16, 0, - 4, 0, 0, 0, 130, 0, - 0, 5, 34, 0, 16, 0, - 8, 0, 0, 0, 26, 0, - 16, 0, 5, 0, 0, 0, - 130, 0, 0, 5, 66, 0, - 16, 0, 8, 0, 0, 0, - 26, 0, 16, 0, 7, 0, - 0, 0, 130, 0, 0, 5, - 130, 0, 16, 0, 8, 0, + 66, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 2, 0, 0, 0, 35, 0, - 0, 12, 242, 0, 16, 0, - 6, 0, 0, 0, 118, 8, - 16, 0, 8, 0, 0, 0, - 2, 64, 0, 0, 0, 0, - 1, 0, 0, 0, 1, 0, - 0, 0, 1, 0, 0, 0, - 1, 0, 118, 8, 16, 0, - 1, 0, 0, 0, 2, 0, + 0, 9, 18, 0, 16, 0, + 6, 0, 0, 0, 42, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 1, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 2, 0, 0, 1, 10, 0, 0, 1, 54, 0, 0, 5, 66, 0, 16, 0, 6, 0, 0, 0, @@ -8068,7 +8074,7 @@ const BYTE resolve_full_32bpp_cs[] = 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 134, 0, 0, 0, - 12, 1, 0, 0, 173, 0, + 15, 1, 0, 0, 173, 0, 0, 0, 62, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_scaled_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_scaled_cs.h index 89ca2467e..7016585d7 100644 --- a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_scaled_cs.h +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_scaled_cs.h @@ -1360,15 +1360,15 @@ switch r0.y imad r9.x, r0.z, l(0x00010000), r0.y break case l(31) - f32tof16 r1.x, r5.x - f32tof16 r1.y, r6.x - f32tof16 r1.z, r8.x - f32tof16 r1.w, r9.x - f32tof16 r2.x, r5.y - f32tof16 r2.y, r6.y - f32tof16 r2.z, r8.y - f32tof16 r2.w, r7.y - imad r9.xyzw, r2.wyxz, l(0x00010000, 0x00010000, 0x00010000, 0x00010000), r1.wyxz + f32tof16 r0.yz, r5.xxyx + imad r9.z, r0.z, l(0x00010000), r0.y + f32tof16 r0.yz, r6.xxyx + imad r9.y, r0.z, l(0x00010000), r0.y + f32tof16 r0.yz, r8.xxyx + imad r9.w, r0.z, l(0x00010000), r0.y + f32tof16 r0.y, r9.x + f32tof16 r0.z, r7.y + imad r9.x, r0.z, l(0x00010000), r0.y break default mov r9.z, r5.x @@ -1397,15 +1397,15 @@ ret const BYTE resolve_full_32bpp_scaled_cs[] = { - 68, 88, 66, 67, 19, 170, - 1, 90, 249, 15, 80, 164, - 255, 208, 31, 117, 114, 147, - 62, 94, 1, 0, 0, 0, - 4, 162, 0, 0, 5, 0, + 68, 88, 66, 67, 116, 11, + 112, 163, 68, 217, 200, 16, + 201, 60, 125, 107, 17, 99, + 192, 101, 1, 0, 0, 0, + 40, 162, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 56, 3, 0, 0, 72, 3, 0, 0, 88, 3, 0, 0, - 104, 161, 0, 0, 82, 68, + 140, 161, 0, 0, 82, 68, 69, 70, 252, 2, 0, 0, 2, 0, 0, 0, 52, 1, 0, 0, 4, 0, 0, 0, @@ -1540,8 +1540,8 @@ const BYTE resolve_full_32bpp_scaled_cs[] = 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, - 69, 88, 8, 158, 0, 0, - 81, 0, 5, 0, 130, 39, + 69, 88, 44, 158, 0, 0, + 81, 0, 5, 0, 139, 39, 0, 0, 106, 8, 0, 1, 89, 0, 0, 7, 70, 142, 48, 0, 0, 0, 0, 0, @@ -8159,40 +8159,46 @@ const BYTE resolve_full_32bpp_scaled_cs[] = 0, 1, 6, 0, 0, 3, 1, 64, 0, 0, 31, 0, 0, 0, 130, 0, 0, 5, - 18, 0, 16, 0, 1, 0, - 0, 0, 10, 0, 16, 0, - 5, 0, 0, 0, 130, 0, - 0, 5, 34, 0, 16, 0, - 1, 0, 0, 0, 10, 0, + 98, 0, 16, 0, 0, 0, + 0, 0, 6, 1, 16, 0, + 5, 0, 0, 0, 35, 0, + 0, 9, 66, 0, 16, 0, + 9, 0, 0, 0, 42, 0, + 16, 0, 0, 0, 0, 0, + 1, 64, 0, 0, 0, 0, + 1, 0, 26, 0, 16, 0, + 0, 0, 0, 0, 130, 0, + 0, 5, 98, 0, 16, 0, + 0, 0, 0, 0, 6, 1, 16, 0, 6, 0, 0, 0, - 130, 0, 0, 5, 66, 0, - 16, 0, 1, 0, 0, 0, - 10, 0, 16, 0, 8, 0, + 35, 0, 0, 9, 34, 0, + 16, 0, 9, 0, 0, 0, + 42, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 1, 0, 26, 0, + 16, 0, 0, 0, 0, 0, + 130, 0, 0, 5, 98, 0, + 16, 0, 0, 0, 0, 0, + 6, 1, 16, 0, 8, 0, + 0, 0, 35, 0, 0, 9, + 130, 0, 16, 0, 9, 0, + 0, 0, 42, 0, 16, 0, + 0, 0, 0, 0, 1, 64, + 0, 0, 0, 0, 1, 0, + 26, 0, 16, 0, 0, 0, 0, 0, 130, 0, 0, 5, - 130, 0, 16, 0, 1, 0, + 34, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 9, 0, 0, 0, 130, 0, - 0, 5, 18, 0, 16, 0, - 2, 0, 0, 0, 26, 0, - 16, 0, 5, 0, 0, 0, - 130, 0, 0, 5, 34, 0, - 16, 0, 2, 0, 0, 0, - 26, 0, 16, 0, 6, 0, - 0, 0, 130, 0, 0, 5, - 66, 0, 16, 0, 2, 0, - 0, 0, 26, 0, 16, 0, - 8, 0, 0, 0, 130, 0, - 0, 5, 130, 0, 16, 0, - 2, 0, 0, 0, 26, 0, + 0, 5, 66, 0, 16, 0, + 0, 0, 0, 0, 26, 0, 16, 0, 7, 0, 0, 0, - 35, 0, 0, 12, 242, 0, + 35, 0, 0, 9, 18, 0, 16, 0, 9, 0, 0, 0, - 118, 8, 16, 0, 2, 0, - 0, 0, 2, 64, 0, 0, - 0, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 1, 0, - 0, 0, 1, 0, 118, 8, - 16, 0, 1, 0, 0, 0, + 42, 0, 16, 0, 0, 0, + 0, 0, 1, 64, 0, 0, + 0, 0, 1, 0, 26, 0, + 16, 0, 0, 0, 0, 0, 2, 0, 0, 1, 10, 0, 0, 1, 54, 0, 0, 5, 66, 0, 16, 0, 9, 0, @@ -8288,7 +8294,7 @@ const BYTE resolve_full_32bpp_scaled_cs[] = 58, 5, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 134, 0, - 0, 0, 24, 1, 0, 0, + 0, 0, 27, 1, 0, 0, 183, 0, 0, 0, 64, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_cs.h index eb712a645..524659ba0 100644 --- a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_cs.h +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_cs.h @@ -1151,16 +1151,18 @@ switch r1.y imad r2.zw, r9.yyyw, l(0, 0, 0x00010000, 0x00010000), r9.xxxz break case l(32) - f32tof16 r9.xy, r4.xzxx - f32tof16 r9.zw, r6.xxxz - f32tof16 r10.xy, r4.ywyy - f32tof16 r10.zw, r5.wwwz - imad r5.xyzw, r10.xyzw, l(0x00010000, 0x00010000, 0x00010000, 0x00010000), r9.xyzw - f32tof16 r9.xy, r7.xzxx - f32tof16 r9.zw, r8.xxxz - f32tof16 r10.xy, r7.ywyy - f32tof16 r10.zw, r2.wwwz - imad r2.xyzw, r10.xyzw, l(0x00010000, 0x00010000, 0x00010000, 0x00010000), r9.xyzw + f32tof16 r1.yw, r4.xxxz + f32tof16 r3.yz, r4.yywy + imad r5.xy, r3.yzyy, l(0x00010000, 0x00010000, 0, 0), r1.ywyy + f32tof16 r1.yw, r6.xxxz + f32tof16 r3.yz, r5.wwzw + imad r5.zw, r3.yyyz, l(0, 0, 0x00010000, 0x00010000), r1.yyyw + f32tof16 r1.yw, r7.xxxz + f32tof16 r3.yz, r7.yywy + imad r2.xy, r3.yzyy, l(0x00010000, 0x00010000, 0, 0), r1.ywyy + f32tof16 r1.yw, r8.xxxz + f32tof16 r3.yz, r2.wwzw + imad r2.zw, r3.yyyz, l(0, 0, 0x00010000, 0x00010000), r1.yyyw break default mov r5.xy, r4.xyxx @@ -1270,20 +1272,20 @@ if_nz r0.z endif store_uav_typed U0[0].xyzw, r0.xxxx, r2.xyzw ret -// Approximately 1224 instruction slots used +// Approximately 1226 instruction slots used #endif const BYTE resolve_full_64bpp_cs[] = { - 68, 88, 66, 67, 87, 220, - 183, 187, 206, 232, 244, 147, - 255, 195, 21, 134, 69, 222, - 224, 255, 1, 0, 0, 0, - 232, 147, 0, 0, 5, 0, + 68, 88, 66, 67, 100, 233, + 159, 181, 66, 54, 89, 165, + 83, 0, 68, 153, 219, 84, + 76, 22, 1, 0, 0, 0, + 72, 148, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 208, 2, 0, 0, 224, 2, 0, 0, 240, 2, 0, 0, - 76, 147, 0, 0, 82, 68, + 172, 147, 0, 0, 82, 68, 69, 70, 148, 2, 0, 0, 1, 0, 0, 0, 236, 0, 0, 0, 3, 0, 0, 0, @@ -1401,8 +1403,8 @@ const BYTE resolve_full_64bpp_cs[] = 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, - 84, 144, 0, 0, 81, 0, - 5, 0, 21, 36, 0, 0, + 180, 144, 0, 0, 81, 0, + 5, 0, 45, 36, 0, 0, 106, 8, 0, 1, 89, 0, 0, 7, 70, 142, 48, 0, 0, 0, 0, 0, 0, 0, @@ -6941,49 +6943,65 @@ const BYTE resolve_full_64bpp_cs[] = 0, 0, 2, 0, 0, 1, 6, 0, 0, 3, 1, 64, 0, 0, 32, 0, 0, 0, - 130, 0, 0, 5, 50, 0, - 16, 0, 9, 0, 0, 0, - 134, 0, 16, 0, 4, 0, + 130, 0, 0, 5, 162, 0, + 16, 0, 1, 0, 0, 0, + 6, 8, 16, 0, 4, 0, 0, 0, 130, 0, 0, 5, - 194, 0, 16, 0, 9, 0, - 0, 0, 6, 8, 16, 0, - 6, 0, 0, 0, 130, 0, - 0, 5, 50, 0, 16, 0, - 10, 0, 0, 0, 214, 5, - 16, 0, 4, 0, 0, 0, - 130, 0, 0, 5, 194, 0, - 16, 0, 10, 0, 0, 0, - 246, 11, 16, 0, 5, 0, - 0, 0, 35, 0, 0, 12, - 242, 0, 16, 0, 5, 0, - 0, 0, 70, 14, 16, 0, - 10, 0, 0, 0, 2, 64, - 0, 0, 0, 0, 1, 0, - 0, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 1, 0, - 70, 14, 16, 0, 9, 0, - 0, 0, 130, 0, 0, 5, - 50, 0, 16, 0, 9, 0, - 0, 0, 134, 0, 16, 0, - 7, 0, 0, 0, 130, 0, - 0, 5, 194, 0, 16, 0, - 9, 0, 0, 0, 6, 8, - 16, 0, 8, 0, 0, 0, - 130, 0, 0, 5, 50, 0, - 16, 0, 10, 0, 0, 0, - 214, 5, 16, 0, 7, 0, - 0, 0, 130, 0, 0, 5, - 194, 0, 16, 0, 10, 0, - 0, 0, 246, 11, 16, 0, - 2, 0, 0, 0, 35, 0, - 0, 12, 242, 0, 16, 0, - 2, 0, 0, 0, 70, 14, - 16, 0, 10, 0, 0, 0, + 98, 0, 16, 0, 3, 0, + 0, 0, 86, 7, 16, 0, + 4, 0, 0, 0, 35, 0, + 0, 12, 50, 0, 16, 0, + 5, 0, 0, 0, 150, 5, + 16, 0, 3, 0, 0, 0, 2, 64, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 214, 5, 16, 0, + 1, 0, 0, 0, 130, 0, + 0, 5, 162, 0, 16, 0, + 1, 0, 0, 0, 6, 8, + 16, 0, 6, 0, 0, 0, + 130, 0, 0, 5, 98, 0, + 16, 0, 3, 0, 0, 0, + 246, 14, 16, 0, 5, 0, + 0, 0, 35, 0, 0, 12, + 194, 0, 16, 0, 5, 0, + 0, 0, 86, 9, 16, 0, + 3, 0, 0, 0, 2, 64, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 1, 0, + 86, 13, 16, 0, 1, 0, + 0, 0, 130, 0, 0, 5, + 162, 0, 16, 0, 1, 0, + 0, 0, 6, 8, 16, 0, + 7, 0, 0, 0, 130, 0, + 0, 5, 98, 0, 16, 0, + 3, 0, 0, 0, 86, 7, + 16, 0, 7, 0, 0, 0, + 35, 0, 0, 12, 50, 0, + 16, 0, 2, 0, 0, 0, + 150, 5, 16, 0, 3, 0, + 0, 0, 2, 64, 0, 0, 0, 0, 1, 0, 0, 0, - 1, 0, 70, 14, 16, 0, - 9, 0, 0, 0, 2, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 214, 5, + 16, 0, 1, 0, 0, 0, + 130, 0, 0, 5, 162, 0, + 16, 0, 1, 0, 0, 0, + 6, 8, 16, 0, 8, 0, + 0, 0, 130, 0, 0, 5, + 98, 0, 16, 0, 3, 0, + 0, 0, 246, 14, 16, 0, + 2, 0, 0, 0, 35, 0, + 0, 12, 194, 0, 16, 0, + 2, 0, 0, 0, 86, 9, + 16, 0, 3, 0, 0, 0, + 2, 64, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 86, 13, 16, 0, + 1, 0, 0, 0, 2, 0, 0, 1, 10, 0, 0, 1, 54, 0, 0, 5, 50, 0, 16, 0, 5, 0, 0, 0, @@ -7561,10 +7579,10 @@ const BYTE resolve_full_64bpp_cs[] = 16, 0, 2, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, - 200, 4, 0, 0, 23, 0, + 202, 4, 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 118, 0, - 0, 0, 234, 0, 0, 0, + 0, 0, 236, 0, 0, 0, 178, 0, 0, 0, 59, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_scaled_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_scaled_cs.h index 3bd227749..16212b77a 100644 --- a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_scaled_cs.h +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_scaled_cs.h @@ -1174,16 +1174,18 @@ switch r0.y imad r7.zw, r11.yyyw, l(0, 0, 0x00010000, 0x00010000), r11.xxxz break case l(32) - f32tof16 r11.xy, r5.xzxx - f32tof16 r11.zw, r9.xxxz - f32tof16 r12.xy, r5.ywyy - f32tof16 r12.zw, r6.wwwz - imad r6.xyzw, r12.xyzw, l(0x00010000, 0x00010000, 0x00010000, 0x00010000), r11.xyzw - f32tof16 r11.xy, r8.xzxx - f32tof16 r11.zw, r10.xxxz - f32tof16 r12.xy, r8.ywyy - f32tof16 r12.zw, r7.wwwz - imad r7.xyzw, r12.xyzw, l(0x00010000, 0x00010000, 0x00010000, 0x00010000), r11.xyzw + f32tof16 r0.yw, r5.xxxz + f32tof16 r1.yz, r5.yywy + imad r6.xy, r1.yzyy, l(0x00010000, 0x00010000, 0, 0), r0.ywyy + f32tof16 r0.yw, r9.xxxz + f32tof16 r1.yz, r6.wwzw + imad r6.zw, r1.yyyz, l(0, 0, 0x00010000, 0x00010000), r0.yyyw + f32tof16 r0.yw, r8.xxxz + f32tof16 r1.yz, r8.yywy + imad r7.xy, r1.yzyy, l(0x00010000, 0x00010000, 0, 0), r0.ywyy + f32tof16 r0.yw, r10.xxxz + f32tof16 r1.yz, r7.wwzw + imad r7.zw, r1.yyyz, l(0, 0, 0x00010000, 0x00010000), r0.yyyw break default mov r6.xy, r5.xyxx @@ -1333,20 +1335,20 @@ if_nz r0.z endif store_uav_typed U0[0].xyzw, r0.xxxx, r7.xyzw ret -// Approximately 1279 instruction slots used +// Approximately 1281 instruction slots used #endif const BYTE resolve_full_64bpp_scaled_cs[] = { - 68, 88, 66, 67, 60, 118, - 161, 100, 5, 152, 135, 130, - 31, 41, 169, 237, 170, 21, - 198, 182, 1, 0, 0, 0, - 216, 153, 0, 0, 5, 0, + 68, 88, 66, 67, 209, 138, + 12, 24, 59, 191, 97, 5, + 235, 179, 86, 139, 240, 215, + 9, 54, 1, 0, 0, 0, + 56, 154, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 56, 3, 0, 0, 72, 3, 0, 0, 88, 3, 0, 0, - 60, 153, 0, 0, 82, 68, + 156, 153, 0, 0, 82, 68, 69, 70, 252, 2, 0, 0, 2, 0, 0, 0, 52, 1, 0, 0, 4, 0, 0, 0, @@ -1481,8 +1483,8 @@ const BYTE resolve_full_64bpp_scaled_cs[] = 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, - 69, 88, 220, 149, 0, 0, - 81, 0, 5, 0, 119, 37, + 69, 88, 60, 150, 0, 0, + 81, 0, 5, 0, 143, 37, 0, 0, 106, 8, 0, 1, 89, 0, 0, 7, 70, 142, 48, 0, 0, 0, 0, 0, @@ -7094,48 +7096,64 @@ const BYTE resolve_full_64bpp_scaled_cs[] = 0, 1, 6, 0, 0, 3, 1, 64, 0, 0, 32, 0, 0, 0, 130, 0, 0, 5, - 50, 0, 16, 0, 11, 0, - 0, 0, 134, 0, 16, 0, + 162, 0, 16, 0, 0, 0, + 0, 0, 6, 8, 16, 0, 5, 0, 0, 0, 130, 0, - 0, 5, 194, 0, 16, 0, - 11, 0, 0, 0, 6, 8, - 16, 0, 9, 0, 0, 0, - 130, 0, 0, 5, 50, 0, - 16, 0, 12, 0, 0, 0, - 214, 5, 16, 0, 5, 0, - 0, 0, 130, 0, 0, 5, - 194, 0, 16, 0, 12, 0, - 0, 0, 246, 11, 16, 0, - 6, 0, 0, 0, 35, 0, - 0, 12, 242, 0, 16, 0, - 6, 0, 0, 0, 70, 14, - 16, 0, 12, 0, 0, 0, - 2, 64, 0, 0, 0, 0, - 1, 0, 0, 0, 1, 0, - 0, 0, 1, 0, 0, 0, - 1, 0, 70, 14, 16, 0, - 11, 0, 0, 0, 130, 0, - 0, 5, 50, 0, 16, 0, - 11, 0, 0, 0, 134, 0, - 16, 0, 8, 0, 0, 0, - 130, 0, 0, 5, 194, 0, - 16, 0, 11, 0, 0, 0, - 6, 8, 16, 0, 10, 0, - 0, 0, 130, 0, 0, 5, - 50, 0, 16, 0, 12, 0, - 0, 0, 214, 5, 16, 0, - 8, 0, 0, 0, 130, 0, - 0, 5, 194, 0, 16, 0, - 12, 0, 0, 0, 246, 11, - 16, 0, 7, 0, 0, 0, - 35, 0, 0, 12, 242, 0, - 16, 0, 7, 0, 0, 0, - 70, 14, 16, 0, 12, 0, + 0, 5, 98, 0, 16, 0, + 1, 0, 0, 0, 86, 7, + 16, 0, 5, 0, 0, 0, + 35, 0, 0, 12, 50, 0, + 16, 0, 6, 0, 0, 0, + 150, 5, 16, 0, 1, 0, 0, 0, 2, 64, 0, 0, 0, 0, 1, 0, 0, 0, - 1, 0, 0, 0, 1, 0, - 0, 0, 1, 0, 70, 14, - 16, 0, 11, 0, 0, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 214, 5, + 16, 0, 0, 0, 0, 0, + 130, 0, 0, 5, 162, 0, + 16, 0, 0, 0, 0, 0, + 6, 8, 16, 0, 9, 0, + 0, 0, 130, 0, 0, 5, + 98, 0, 16, 0, 1, 0, + 0, 0, 246, 14, 16, 0, + 6, 0, 0, 0, 35, 0, + 0, 12, 194, 0, 16, 0, + 6, 0, 0, 0, 86, 9, + 16, 0, 1, 0, 0, 0, + 2, 64, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, + 1, 0, 86, 13, 16, 0, + 0, 0, 0, 0, 130, 0, + 0, 5, 162, 0, 16, 0, + 0, 0, 0, 0, 6, 8, + 16, 0, 8, 0, 0, 0, + 130, 0, 0, 5, 98, 0, + 16, 0, 1, 0, 0, 0, + 86, 7, 16, 0, 8, 0, + 0, 0, 35, 0, 0, 12, + 50, 0, 16, 0, 7, 0, + 0, 0, 150, 5, 16, 0, + 1, 0, 0, 0, 2, 64, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 214, 5, 16, 0, 0, 0, + 0, 0, 130, 0, 0, 5, + 162, 0, 16, 0, 0, 0, + 0, 0, 6, 8, 16, 0, + 10, 0, 0, 0, 130, 0, + 0, 5, 98, 0, 16, 0, + 1, 0, 0, 0, 246, 14, + 16, 0, 7, 0, 0, 0, + 35, 0, 0, 12, 194, 0, + 16, 0, 7, 0, 0, 0, + 86, 9, 16, 0, 1, 0, + 0, 0, 2, 64, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 86, 13, + 16, 0, 0, 0, 0, 0, 2, 0, 0, 1, 10, 0, 0, 1, 54, 0, 0, 5, 50, 0, 16, 0, 6, 0, @@ -7877,10 +7895,10 @@ const BYTE resolve_full_64bpp_scaled_cs[] = 70, 14, 16, 0, 7, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, - 0, 0, 255, 4, 0, 0, + 0, 0, 1, 5, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 118, 0, 0, 0, 252, 0, + 118, 0, 0, 0, 254, 0, 0, 0, 192, 0, 0, 0, 63, 0, 0, 0, 39, 0, 0, 0, 0, 0, 0, 0, diff --git a/src/xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_1xmsaa_cs.h b/src/xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_1xmsaa_cs.h new file mode 100644 index 000000000..f1aa0adf6 --- /dev/null +++ b/src/xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_1xmsaa_cs.h @@ -0,0 +1,333 @@ +// Generated with `xb buildshaders`. +#if 0 +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 25265 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %5663 "main" %gl_GlobalInvocationID + OpExecutionMode %5663 LocalSize 8 8 1 + OpMemberDecorate %_struct_990 0 Offset 0 + OpMemberDecorate %_struct_990 1 Offset 4 + OpDecorate %_struct_990 Block + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %_runtimearr_v4uint ArrayStride 16 + OpMemberDecorate %_struct_1972 0 NonReadable + OpMemberDecorate %_struct_1972 0 Offset 0 + OpDecorate %_struct_1972 BufferBlock + OpDecorate %4790 DescriptorSet 0 + OpDecorate %4790 Binding 0 + OpDecorate %3709 DescriptorSet 1 + OpDecorate %3709 Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %1282 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %v2uint = OpTypeVector %uint 2 + %bool = OpTypeBool + %uint_2 = OpConstant %uint 2 + %uint_1 = OpConstant %uint 1 + %1837 = OpConstantComposite %v2uint %uint_2 %uint_1 + %v2bool = OpTypeVector %bool 2 + %uint_0 = OpConstant %uint 0 + %1807 = OpConstantComposite %v2uint %uint_0 %uint_0 + %1828 = OpConstantComposite %v2uint %uint_1 %uint_1 + %1816 = OpConstantComposite %v2uint %uint_1 %uint_0 + %uint_80 = OpConstant %uint 80 + %uint_16 = OpConstant %uint 16 + %2719 = OpConstantComposite %v2uint %uint_80 %uint_16 + %int = OpTypeInt 32 1 +%_struct_990 = OpTypeStruct %uint %uint +%_ptr_PushConstant__struct_990 = OpTypePointer PushConstant %_struct_990 + %3052 = OpVariable %_ptr_PushConstant__struct_990 PushConstant + %int_1 = OpConstant %int 1 +%_ptr_PushConstant_uint = OpTypePointer PushConstant %uint + %uint_10 = OpConstant %uint 10 + %uint_12 = OpConstant %uint 12 + %2041 = OpConstantComposite %v2uint %uint_10 %uint_12 + %uint_3 = OpConstant %uint 3 + %int_0 = OpConstant %int 0 + %1927 = OpConstantComposite %v2uint %uint_0 %uint_10 + %uint_1023 = OpConstant %uint 1023 + %uint_20 = OpConstant %uint 20 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input +%_ptr_Input_uint = OpTypePointer Input %uint + %v2int = OpTypeVector %int 2 + %1834 = OpConstantComposite %v2uint %uint_3 %uint_0 + %v4uint = OpTypeVector %uint 4 +%_runtimearr_v4uint = OpTypeRuntimeArray %v4uint +%_struct_1972 = OpTypeStruct %_runtimearr_v4uint +%_ptr_Uniform__struct_1972 = OpTypePointer Uniform %_struct_1972 + %4790 = OpVariable %_ptr_Uniform__struct_1972 Uniform + %float = OpTypeFloat 32 + %150 = OpTypeImage %float 2D 0 0 0 1 Unknown +%_ptr_UniformConstant_150 = OpTypePointer UniformConstant %150 + %3709 = OpVariable %_ptr_UniformConstant_150 UniformConstant + %v4float = OpTypeVector %float 4 + %1824 = OpConstantComposite %v2int %int_1 %int_0 + %int_2 = OpConstant %int 2 + %1833 = OpConstantComposite %v2int %int_2 %int_0 + %int_3 = OpConstant %int 3 + %1842 = OpConstantComposite %v2int %int_3 %int_0 +%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint + %int_4 = OpConstant %int 4 + %1851 = OpConstantComposite %v2int %int_4 %int_0 + %int_5 = OpConstant %int 5 + %1860 = OpConstantComposite %v2int %int_5 %int_0 + %int_6 = OpConstant %int 6 + %1869 = OpConstantComposite %v2int %int_6 %int_0 + %int_7 = OpConstant %int 7 + %1878 = OpConstantComposite %v2int %int_7 %int_0 + %uint_8 = OpConstant %uint 8 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_8 %uint_8 %uint_1 + %1870 = OpConstantComposite %v2uint %uint_3 %uint_3 + %2213 = OpConstantComposite %v2uint %uint_1023 %uint_1023 + %5663 = OpFunction %void None %1282 + %15110 = OpLabel + OpSelectionMerge %19578 None + OpSwitch %uint_0 %11880 + %11880 = OpLabel + %22245 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %15627 = OpLoad %uint %22245 + %22605 = OpAccessChain %_ptr_PushConstant_uint %3052 %int_0 + %21784 = OpLoad %uint %22605 + %21170 = OpShiftRightLogical %uint %21784 %uint_20 + %15618 = OpBitwiseAnd %uint %21170 %uint_1023 + %10265 = OpIAdd %uint %15618 %uint_1 + %19929 = OpAccessChain %_ptr_PushConstant_uint %3052 %int_1 + %15334 = OpLoad %uint %19929 + %10293 = OpCompositeConstruct %v2uint %15334 %15334 + %24634 = OpShiftRightLogical %v2uint %10293 %2041 + %24203 = OpBitwiseAnd %v2uint %24634 %1870 + %10929 = OpCompositeExtract %uint %24203 0 + %7670 = OpIMul %uint %10265 %10929 + %7287 = OpUGreaterThanEqual %bool %15627 %7670 + OpSelectionMerge %16345 DontFlatten + OpBranchConditional %7287 %21992 %16345 + %21992 = OpLabel + OpBranch %19578 + %16345 = OpLabel + %10771 = OpCompositeConstruct %v2uint %21784 %21784 + %13581 = OpShiftRightLogical %v2uint %10771 %1927 + %23379 = OpBitwiseAnd %v2uint %13581 %2213 + %13680 = OpShiftLeftLogical %v2uint %23379 %1870 + %24677 = OpIMul %v2uint %13680 %24203 + %7005 = OpLoad %v3uint %gl_GlobalInvocationID + %22399 = OpVectorShuffle %v2uint %7005 %7005 0 1 + %21597 = OpShiftLeftLogical %v2uint %22399 %1834 + %9038 = OpIAdd %v2uint %24677 %21597 + %24559 = OpBitcast %v2int %9038 + %8919 = OpBitcast %v2uint %24559 + %18334 = OpBitwiseAnd %uint %15334 %uint_1023 + %7195 = OpUGreaterThanEqual %v2bool %1807 %1837 + %17737 = OpSelect %v2uint %7195 %1828 %1807 + %10430 = OpShiftLeftLogical %v2uint %8919 %17737 + %16475 = OpShiftRightLogical %v2uint %1807 %1816 + %13071 = OpBitwiseAnd %v2uint %16475 %1828 + %20272 = OpIAdd %v2uint %10430 %13071 + %21145 = OpIMul %v2uint %2719 %24203 + %14725 = OpShiftRightLogical %v2uint %21145 %1807 + %19799 = OpUDiv %v2uint %20272 %14725 + %20390 = OpCompositeExtract %uint %19799 1 + %11046 = OpIMul %uint %20390 %18334 + %24741 = OpCompositeExtract %uint %19799 0 + %20806 = OpIAdd %uint %11046 %24741 + %13527 = OpIMul %v2uint %19799 %14725 + %20715 = OpISub %v2uint %20272 %13527 + %7303 = OpCompositeExtract %uint %21145 0 + %22882 = OpCompositeExtract %uint %21145 1 + %13170 = OpIMul %uint %7303 %22882 + %14551 = OpIMul %uint %20806 %13170 + %6805 = OpCompositeExtract %uint %20715 1 + %23526 = OpCompositeExtract %uint %14725 0 + %22886 = OpIMul %uint %6805 %23526 + %6886 = OpCompositeExtract %uint %20715 0 + %9696 = OpIAdd %uint %22886 %6886 + %19199 = OpShiftLeftLogical %uint %9696 %uint_0 + %25264 = OpIAdd %uint %14551 %19199 + %6574 = OpShiftRightLogical %uint %25264 %uint_2 + %7456 = OpLoad %150 %3709 + %17822 = OpImageFetch %v4float %7456 %24559 Lod %int_0 + %11864 = OpCompositeExtract %float %17822 0 + %19035 = OpIAdd %v2int %24559 %1824 + %20902 = OpImageFetch %v4float %7456 %19035 Lod %int_0 + %17472 = OpCompositeExtract %float %20902 0 + %19036 = OpIAdd %v2int %24559 %1833 + %20903 = OpImageFetch %v4float %7456 %19036 Lod %int_0 + %17473 = OpCompositeExtract %float %20903 0 + %19037 = OpIAdd %v2int %24559 %1842 + %19990 = OpImageFetch %v4float %7456 %19037 Lod %int_0 + %7256 = OpCompositeExtract %float %19990 0 + %6487 = OpCompositeConstruct %v4float %11864 %17472 %17473 %7256 + %20366 = OpBitcast %v4uint %6487 + %12860 = OpAccessChain %_ptr_Uniform_v4uint %4790 %int_0 %6574 + OpStore %12860 %20366 + %20256 = OpIAdd %uint %6574 %uint_1 + %8574 = OpIAdd %v2int %24559 %1851 + %10680 = OpImageFetch %v4float %7456 %8574 Lod %int_0 + %17474 = OpCompositeExtract %float %10680 0 + %19038 = OpIAdd %v2int %24559 %1860 + %20904 = OpImageFetch %v4float %7456 %19038 Lod %int_0 + %17475 = OpCompositeExtract %float %20904 0 + %19039 = OpIAdd %v2int %24559 %1869 + %20905 = OpImageFetch %v4float %7456 %19039 Lod %int_0 + %17476 = OpCompositeExtract %float %20905 0 + %19040 = OpIAdd %v2int %24559 %1878 + %19991 = OpImageFetch %v4float %7456 %19040 Lod %int_0 + %7257 = OpCompositeExtract %float %19991 0 + %6488 = OpCompositeConstruct %v4float %17474 %17475 %17476 %7257 + %20367 = OpBitcast %v4uint %6488 + %15159 = OpAccessChain %_ptr_Uniform_v4uint %4790 %int_0 %20256 + OpStore %15159 %20367 + OpBranch %19578 + %19578 = OpLabel + OpReturn + OpFunctionEnd +#endif + +const uint32_t host_depth_store_1xmsaa_cs[] = { + 0x07230203, 0x00010000, 0x0008000A, 0x000062B1, 0x00000000, 0x00020011, + 0x00000001, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E, + 0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0006000F, 0x00000005, + 0x0000161F, 0x6E69616D, 0x00000000, 0x00000F48, 0x00060010, 0x0000161F, + 0x00000011, 0x00000008, 0x00000008, 0x00000001, 0x00050048, 0x000003DE, + 0x00000000, 0x00000023, 0x00000000, 0x00050048, 0x000003DE, 0x00000001, + 0x00000023, 0x00000004, 0x00030047, 0x000003DE, 0x00000002, 0x00040047, + 0x00000F48, 0x0000000B, 0x0000001C, 0x00040047, 0x000007DC, 0x00000006, + 0x00000010, 0x00040048, 0x000007B4, 0x00000000, 0x00000019, 0x00050048, + 0x000007B4, 0x00000000, 0x00000023, 0x00000000, 0x00030047, 0x000007B4, + 0x00000003, 0x00040047, 0x000012B6, 0x00000022, 0x00000000, 0x00040047, + 0x000012B6, 0x00000021, 0x00000000, 0x00040047, 0x00000E7D, 0x00000022, + 0x00000001, 0x00040047, 0x00000E7D, 0x00000021, 0x00000000, 0x00040047, + 0x00000AC7, 0x0000000B, 0x00000019, 0x00020013, 0x00000008, 0x00030021, + 0x00000502, 0x00000008, 0x00040015, 0x0000000B, 0x00000020, 0x00000000, + 0x00040017, 0x00000011, 0x0000000B, 0x00000002, 0x00020014, 0x00000009, + 0x0004002B, 0x0000000B, 0x00000A10, 0x00000002, 0x0004002B, 0x0000000B, + 0x00000A0D, 0x00000001, 0x0005002C, 0x00000011, 0x0000072D, 0x00000A10, + 0x00000A0D, 0x00040017, 0x0000000F, 0x00000009, 0x00000002, 0x0004002B, + 0x0000000B, 0x00000A0A, 0x00000000, 0x0005002C, 0x00000011, 0x0000070F, + 0x00000A0A, 0x00000A0A, 0x0005002C, 0x00000011, 0x00000724, 0x00000A0D, + 0x00000A0D, 0x0005002C, 0x00000011, 0x00000718, 0x00000A0D, 0x00000A0A, + 0x0004002B, 0x0000000B, 0x00000AFA, 0x00000050, 0x0004002B, 0x0000000B, + 0x00000A3A, 0x00000010, 0x0005002C, 0x00000011, 0x00000A9F, 0x00000AFA, + 0x00000A3A, 0x00040015, 0x0000000C, 0x00000020, 0x00000001, 0x0004001E, + 0x000003DE, 0x0000000B, 0x0000000B, 0x00040020, 0x0000065B, 0x00000009, + 0x000003DE, 0x0004003B, 0x0000065B, 0x00000BEC, 0x00000009, 0x0004002B, + 0x0000000C, 0x00000A0E, 0x00000001, 0x00040020, 0x00000288, 0x00000009, + 0x0000000B, 0x0004002B, 0x0000000B, 0x00000A28, 0x0000000A, 0x0004002B, + 0x0000000B, 0x00000A2E, 0x0000000C, 0x0005002C, 0x00000011, 0x000007F9, + 0x00000A28, 0x00000A2E, 0x0004002B, 0x0000000B, 0x00000A13, 0x00000003, + 0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000, 0x0005002C, 0x00000011, + 0x00000787, 0x00000A0A, 0x00000A28, 0x0004002B, 0x0000000B, 0x00000A44, + 0x000003FF, 0x0004002B, 0x0000000B, 0x00000A46, 0x00000014, 0x00040017, + 0x00000014, 0x0000000B, 0x00000003, 0x00040020, 0x00000291, 0x00000001, + 0x00000014, 0x0004003B, 0x00000291, 0x00000F48, 0x00000001, 0x00040020, + 0x00000289, 0x00000001, 0x0000000B, 0x00040017, 0x00000012, 0x0000000C, + 0x00000002, 0x0005002C, 0x00000011, 0x0000072A, 0x00000A13, 0x00000A0A, + 0x00040017, 0x00000017, 0x0000000B, 0x00000004, 0x0003001D, 0x000007DC, + 0x00000017, 0x0003001E, 0x000007B4, 0x000007DC, 0x00040020, 0x00000A31, + 0x00000002, 0x000007B4, 0x0004003B, 0x00000A31, 0x000012B6, 0x00000002, + 0x00030016, 0x0000000D, 0x00000020, 0x00090019, 0x00000096, 0x0000000D, + 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, + 0x00040020, 0x00000313, 0x00000000, 0x00000096, 0x0004003B, 0x00000313, + 0x00000E7D, 0x00000000, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004, + 0x0005002C, 0x00000012, 0x00000720, 0x00000A0E, 0x00000A0B, 0x0004002B, + 0x0000000C, 0x00000A11, 0x00000002, 0x0005002C, 0x00000012, 0x00000729, + 0x00000A11, 0x00000A0B, 0x0004002B, 0x0000000C, 0x00000A14, 0x00000003, + 0x0005002C, 0x00000012, 0x00000732, 0x00000A14, 0x00000A0B, 0x00040020, + 0x00000294, 0x00000002, 0x00000017, 0x0004002B, 0x0000000C, 0x00000A17, + 0x00000004, 0x0005002C, 0x00000012, 0x0000073B, 0x00000A17, 0x00000A0B, + 0x0004002B, 0x0000000C, 0x00000A1A, 0x00000005, 0x0005002C, 0x00000012, + 0x00000744, 0x00000A1A, 0x00000A0B, 0x0004002B, 0x0000000C, 0x00000A1D, + 0x00000006, 0x0005002C, 0x00000012, 0x0000074D, 0x00000A1D, 0x00000A0B, + 0x0004002B, 0x0000000C, 0x00000A20, 0x00000007, 0x0005002C, 0x00000012, + 0x00000756, 0x00000A20, 0x00000A0B, 0x0004002B, 0x0000000B, 0x00000A22, + 0x00000008, 0x0006002C, 0x00000014, 0x00000AC7, 0x00000A22, 0x00000A22, + 0x00000A0D, 0x0005002C, 0x00000011, 0x0000074E, 0x00000A13, 0x00000A13, + 0x0005002C, 0x00000011, 0x000008A5, 0x00000A44, 0x00000A44, 0x00050036, + 0x00000008, 0x0000161F, 0x00000000, 0x00000502, 0x000200F8, 0x00003B06, + 0x000300F7, 0x00004C7A, 0x00000000, 0x000300FB, 0x00000A0A, 0x00002E68, + 0x000200F8, 0x00002E68, 0x00050041, 0x00000289, 0x000056E5, 0x00000F48, + 0x00000A0A, 0x0004003D, 0x0000000B, 0x00003D0B, 0x000056E5, 0x00050041, + 0x00000288, 0x0000584D, 0x00000BEC, 0x00000A0B, 0x0004003D, 0x0000000B, + 0x00005518, 0x0000584D, 0x000500C2, 0x0000000B, 0x000052B2, 0x00005518, + 0x00000A46, 0x000500C7, 0x0000000B, 0x00003D02, 0x000052B2, 0x00000A44, + 0x00050080, 0x0000000B, 0x00002819, 0x00003D02, 0x00000A0D, 0x00050041, + 0x00000288, 0x00004DD9, 0x00000BEC, 0x00000A0E, 0x0004003D, 0x0000000B, + 0x00003BE6, 0x00004DD9, 0x00050050, 0x00000011, 0x00002835, 0x00003BE6, + 0x00003BE6, 0x000500C2, 0x00000011, 0x0000603A, 0x00002835, 0x000007F9, + 0x000500C7, 0x00000011, 0x00005E8B, 0x0000603A, 0x0000074E, 0x00050051, + 0x0000000B, 0x00002AB1, 0x00005E8B, 0x00000000, 0x00050084, 0x0000000B, + 0x00001DF6, 0x00002819, 0x00002AB1, 0x000500AE, 0x00000009, 0x00001C77, + 0x00003D0B, 0x00001DF6, 0x000300F7, 0x00003FD9, 0x00000002, 0x000400FA, + 0x00001C77, 0x000055E8, 0x00003FD9, 0x000200F8, 0x000055E8, 0x000200F9, + 0x00004C7A, 0x000200F8, 0x00003FD9, 0x00050050, 0x00000011, 0x00002A13, + 0x00005518, 0x00005518, 0x000500C2, 0x00000011, 0x0000350D, 0x00002A13, + 0x00000787, 0x000500C7, 0x00000011, 0x00005B53, 0x0000350D, 0x000008A5, + 0x000500C4, 0x00000011, 0x00003570, 0x00005B53, 0x0000074E, 0x00050084, + 0x00000011, 0x00006065, 0x00003570, 0x00005E8B, 0x0004003D, 0x00000014, + 0x00001B5D, 0x00000F48, 0x0007004F, 0x00000011, 0x0000577F, 0x00001B5D, + 0x00001B5D, 0x00000000, 0x00000001, 0x000500C4, 0x00000011, 0x0000545D, + 0x0000577F, 0x0000072A, 0x00050080, 0x00000011, 0x0000234E, 0x00006065, + 0x0000545D, 0x0004007C, 0x00000012, 0x00005FEF, 0x0000234E, 0x0004007C, + 0x00000011, 0x000022D7, 0x00005FEF, 0x000500C7, 0x0000000B, 0x0000479E, + 0x00003BE6, 0x00000A44, 0x000500AE, 0x0000000F, 0x00001C1B, 0x0000070F, + 0x0000072D, 0x000600A9, 0x00000011, 0x00004549, 0x00001C1B, 0x00000724, + 0x0000070F, 0x000500C4, 0x00000011, 0x000028BE, 0x000022D7, 0x00004549, + 0x000500C2, 0x00000011, 0x0000405B, 0x0000070F, 0x00000718, 0x000500C7, + 0x00000011, 0x0000330F, 0x0000405B, 0x00000724, 0x00050080, 0x00000011, + 0x00004F30, 0x000028BE, 0x0000330F, 0x00050084, 0x00000011, 0x00005299, + 0x00000A9F, 0x00005E8B, 0x000500C2, 0x00000011, 0x00003985, 0x00005299, + 0x0000070F, 0x00050086, 0x00000011, 0x00004D57, 0x00004F30, 0x00003985, + 0x00050051, 0x0000000B, 0x00004FA6, 0x00004D57, 0x00000001, 0x00050084, + 0x0000000B, 0x00002B26, 0x00004FA6, 0x0000479E, 0x00050051, 0x0000000B, + 0x000060A5, 0x00004D57, 0x00000000, 0x00050080, 0x0000000B, 0x00005146, + 0x00002B26, 0x000060A5, 0x00050084, 0x00000011, 0x000034D7, 0x00004D57, + 0x00003985, 0x00050082, 0x00000011, 0x000050EB, 0x00004F30, 0x000034D7, + 0x00050051, 0x0000000B, 0x00001C87, 0x00005299, 0x00000000, 0x00050051, + 0x0000000B, 0x00005962, 0x00005299, 0x00000001, 0x00050084, 0x0000000B, + 0x00003372, 0x00001C87, 0x00005962, 0x00050084, 0x0000000B, 0x000038D7, + 0x00005146, 0x00003372, 0x00050051, 0x0000000B, 0x00001A95, 0x000050EB, + 0x00000001, 0x00050051, 0x0000000B, 0x00005BE6, 0x00003985, 0x00000000, + 0x00050084, 0x0000000B, 0x00005966, 0x00001A95, 0x00005BE6, 0x00050051, + 0x0000000B, 0x00001AE6, 0x000050EB, 0x00000000, 0x00050080, 0x0000000B, + 0x000025E0, 0x00005966, 0x00001AE6, 0x000500C4, 0x0000000B, 0x00004AFF, + 0x000025E0, 0x00000A0A, 0x00050080, 0x0000000B, 0x000062B0, 0x000038D7, + 0x00004AFF, 0x000500C2, 0x0000000B, 0x000019AE, 0x000062B0, 0x00000A10, + 0x0004003D, 0x00000096, 0x00001D20, 0x00000E7D, 0x0007005F, 0x0000001D, + 0x0000459E, 0x00001D20, 0x00005FEF, 0x00000002, 0x00000A0B, 0x00050051, + 0x0000000D, 0x00002E58, 0x0000459E, 0x00000000, 0x00050080, 0x00000012, + 0x00004A5B, 0x00005FEF, 0x00000720, 0x0007005F, 0x0000001D, 0x000051A6, + 0x00001D20, 0x00004A5B, 0x00000002, 0x00000A0B, 0x00050051, 0x0000000D, + 0x00004440, 0x000051A6, 0x00000000, 0x00050080, 0x00000012, 0x00004A5C, + 0x00005FEF, 0x00000729, 0x0007005F, 0x0000001D, 0x000051A7, 0x00001D20, + 0x00004A5C, 0x00000002, 0x00000A0B, 0x00050051, 0x0000000D, 0x00004441, + 0x000051A7, 0x00000000, 0x00050080, 0x00000012, 0x00004A5D, 0x00005FEF, + 0x00000732, 0x0007005F, 0x0000001D, 0x00004E16, 0x00001D20, 0x00004A5D, + 0x00000002, 0x00000A0B, 0x00050051, 0x0000000D, 0x00001C58, 0x00004E16, + 0x00000000, 0x00070050, 0x0000001D, 0x00001957, 0x00002E58, 0x00004440, + 0x00004441, 0x00001C58, 0x0004007C, 0x00000017, 0x00004F8E, 0x00001957, + 0x00060041, 0x00000294, 0x0000323C, 0x000012B6, 0x00000A0B, 0x000019AE, + 0x0003003E, 0x0000323C, 0x00004F8E, 0x00050080, 0x0000000B, 0x00004F20, + 0x000019AE, 0x00000A0D, 0x00050080, 0x00000012, 0x0000217E, 0x00005FEF, + 0x0000073B, 0x0007005F, 0x0000001D, 0x000029B8, 0x00001D20, 0x0000217E, + 0x00000002, 0x00000A0B, 0x00050051, 0x0000000D, 0x00004442, 0x000029B8, + 0x00000000, 0x00050080, 0x00000012, 0x00004A5E, 0x00005FEF, 0x00000744, + 0x0007005F, 0x0000001D, 0x000051A8, 0x00001D20, 0x00004A5E, 0x00000002, + 0x00000A0B, 0x00050051, 0x0000000D, 0x00004443, 0x000051A8, 0x00000000, + 0x00050080, 0x00000012, 0x00004A5F, 0x00005FEF, 0x0000074D, 0x0007005F, + 0x0000001D, 0x000051A9, 0x00001D20, 0x00004A5F, 0x00000002, 0x00000A0B, + 0x00050051, 0x0000000D, 0x00004444, 0x000051A9, 0x00000000, 0x00050080, + 0x00000012, 0x00004A60, 0x00005FEF, 0x00000756, 0x0007005F, 0x0000001D, + 0x00004E17, 0x00001D20, 0x00004A60, 0x00000002, 0x00000A0B, 0x00050051, + 0x0000000D, 0x00001C59, 0x00004E17, 0x00000000, 0x00070050, 0x0000001D, + 0x00001958, 0x00004442, 0x00004443, 0x00004444, 0x00001C59, 0x0004007C, + 0x00000017, 0x00004F8F, 0x00001958, 0x00060041, 0x00000294, 0x00003B37, + 0x000012B6, 0x00000A0B, 0x00004F20, 0x0003003E, 0x00003B37, 0x00004F8F, + 0x000200F9, 0x00004C7A, 0x000200F8, 0x00004C7A, 0x000100FD, 0x00010038, +}; diff --git a/src/xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_2xmsaa_cs.h b/src/xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_2xmsaa_cs.h new file mode 100644 index 000000000..652edce67 --- /dev/null +++ b/src/xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_2xmsaa_cs.h @@ -0,0 +1,365 @@ +// Generated with `xb buildshaders`. +#if 0 +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 24742 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %5663 "main" %gl_GlobalInvocationID + OpExecutionMode %5663 LocalSize 8 8 1 + OpMemberDecorate %_struct_990 0 Offset 0 + OpMemberDecorate %_struct_990 1 Offset 4 + OpDecorate %_struct_990 Block + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %_runtimearr_v4uint ArrayStride 16 + OpMemberDecorate %_struct_1972 0 NonReadable + OpMemberDecorate %_struct_1972 0 Offset 0 + OpDecorate %_struct_1972 BufferBlock + OpDecorate %4790 DescriptorSet 0 + OpDecorate %4790 Binding 0 + OpDecorate %3709 DescriptorSet 1 + OpDecorate %3709 Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %1282 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %v2uint = OpTypeVector %uint 2 + %bool = OpTypeBool + %uint_2 = OpConstant %uint 2 + %uint_1 = OpConstant %uint 1 + %1837 = OpConstantComposite %v2uint %uint_2 %uint_1 + %v2bool = OpTypeVector %bool 2 + %uint_0 = OpConstant %uint 0 + %1807 = OpConstantComposite %v2uint %uint_0 %uint_0 + %1828 = OpConstantComposite %v2uint %uint_1 %uint_1 + %1816 = OpConstantComposite %v2uint %uint_1 %uint_0 + %uint_80 = OpConstant %uint 80 + %uint_16 = OpConstant %uint 16 + %2719 = OpConstantComposite %v2uint %uint_80 %uint_16 + %int = OpTypeInt 32 1 +%_struct_990 = OpTypeStruct %uint %uint +%_ptr_PushConstant__struct_990 = OpTypePointer PushConstant %_struct_990 + %3052 = OpVariable %_ptr_PushConstant__struct_990 PushConstant + %int_1 = OpConstant %int 1 +%_ptr_PushConstant_uint = OpTypePointer PushConstant %uint + %uint_10 = OpConstant %uint 10 + %uint_12 = OpConstant %uint 12 + %2041 = OpConstantComposite %v2uint %uint_10 %uint_12 + %uint_3 = OpConstant %uint 3 + %int_0 = OpConstant %int 0 + %1927 = OpConstantComposite %v2uint %uint_0 %uint_10 + %uint_1023 = OpConstant %uint 1023 + %uint_20 = OpConstant %uint 20 + %uint_14 = OpConstant %uint 14 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input +%_ptr_Input_uint = OpTypePointer Input %uint + %v2int = OpTypeVector %int 2 + %int_3 = OpConstant %int 3 + %v4uint = OpTypeVector %uint 4 +%_runtimearr_v4uint = OpTypeRuntimeArray %v4uint +%_struct_1972 = OpTypeStruct %_runtimearr_v4uint +%_ptr_Uniform__struct_1972 = OpTypePointer Uniform %_struct_1972 + %4790 = OpVariable %_ptr_Uniform__struct_1972 Uniform + %float = OpTypeFloat 32 + %182 = OpTypeImage %float 2D 0 0 1 1 Unknown +%_ptr_UniformConstant_182 = OpTypePointer UniformConstant %182 + %3709 = OpVariable %_ptr_UniformConstant_182 UniformConstant + %v4float = OpTypeVector %float 4 + %1824 = OpConstantComposite %v2int %int_1 %int_0 + %int_2 = OpConstant %int 2 + %1833 = OpConstantComposite %v2int %int_2 %int_0 + %1842 = OpConstantComposite %v2int %int_3 %int_0 +%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint + %int_4 = OpConstant %int 4 + %1851 = OpConstantComposite %v2int %int_4 %int_0 + %int_5 = OpConstant %int 5 + %1860 = OpConstantComposite %v2int %int_5 %int_0 + %int_6 = OpConstant %int 6 + %1869 = OpConstantComposite %v2int %int_6 %int_0 + %int_7 = OpConstant %int 7 + %1878 = OpConstantComposite %v2int %int_7 %int_0 + %uint_8 = OpConstant %uint 8 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_8 %uint_8 %uint_1 + %1870 = OpConstantComposite %v2uint %uint_3 %uint_3 + %2213 = OpConstantComposite %v2uint %uint_1023 %uint_1023 + %5663 = OpFunction %void None %1282 + %15110 = OpLabel + OpSelectionMerge %19578 None + OpSwitch %uint_0 %11880 + %11880 = OpLabel + %22245 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %15627 = OpLoad %uint %22245 + %22605 = OpAccessChain %_ptr_PushConstant_uint %3052 %int_0 + %21784 = OpLoad %uint %22605 + %21170 = OpShiftRightLogical %uint %21784 %uint_20 + %15618 = OpBitwiseAnd %uint %21170 %uint_1023 + %10265 = OpIAdd %uint %15618 %uint_1 + %19929 = OpAccessChain %_ptr_PushConstant_uint %3052 %int_1 + %15334 = OpLoad %uint %19929 + %10293 = OpCompositeConstruct %v2uint %15334 %15334 + %24634 = OpShiftRightLogical %v2uint %10293 %2041 + %24203 = OpBitwiseAnd %v2uint %24634 %1870 + %10929 = OpCompositeExtract %uint %24203 0 + %7670 = OpIMul %uint %10265 %10929 + %7287 = OpUGreaterThanEqual %bool %15627 %7670 + OpSelectionMerge %16345 DontFlatten + OpBranchConditional %7287 %21992 %16345 + %21992 = OpLabel + OpBranch %19578 + %16345 = OpLabel + %10771 = OpCompositeConstruct %v2uint %21784 %21784 + %13581 = OpShiftRightLogical %v2uint %10771 %1927 + %23379 = OpBitwiseAnd %v2uint %13581 %2213 + %16245 = OpShiftLeftLogical %v2uint %23379 %1870 + %20127 = OpIMul %v2uint %16245 %24203 + %19539 = OpShiftLeftLogical %uint %15627 %uint_3 + %17126 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1 + %22160 = OpLoad %uint %17126 + %22686 = OpShiftRightLogical %uint %22160 %uint_1 + %6471 = OpCompositeConstruct %v2uint %19539 %22686 + %9483 = OpIAdd %v2uint %20127 %6471 + %13160 = OpBitcast %v2int %9483 + %11709 = OpBitwiseAnd %uint %22160 %uint_1 + %10746 = OpBitcast %v2uint %13160 + %11323 = OpBitwiseAnd %uint %15334 %uint_1023 + %7195 = OpUGreaterThanEqual %v2bool %1828 %1837 + %15571 = OpSelect %v2uint %7195 %1828 %1807 + %10986 = OpShiftLeftLogical %v2uint %10746 %15571 + %24669 = OpCompositeConstruct %v2uint %11709 %11709 + %9093 = OpShiftRightLogical %v2uint %24669 %1816 + %16072 = OpBitwiseAnd %v2uint %9093 %1828 + %20272 = OpIAdd %v2uint %10986 %16072 + %21145 = OpIMul %v2uint %2719 %24203 + %14725 = OpShiftRightLogical %v2uint %21145 %1807 + %19799 = OpUDiv %v2uint %20272 %14725 + %20390 = OpCompositeExtract %uint %19799 1 + %11046 = OpIMul %uint %20390 %11323 + %24741 = OpCompositeExtract %uint %19799 0 + %20806 = OpIAdd %uint %11046 %24741 + %13527 = OpIMul %v2uint %19799 %14725 + %20715 = OpISub %v2uint %20272 %13527 + %7303 = OpCompositeExtract %uint %21145 0 + %22882 = OpCompositeExtract %uint %21145 1 + %13170 = OpIMul %uint %7303 %22882 + %14551 = OpIMul %uint %20806 %13170 + %6805 = OpCompositeExtract %uint %20715 1 + %23526 = OpCompositeExtract %uint %14725 0 + %22886 = OpIMul %uint %6805 %23526 + %6886 = OpCompositeExtract %uint %20715 0 + %9696 = OpIAdd %uint %22886 %6886 + %19199 = OpShiftLeftLogical %uint %9696 %uint_0 + %8720 = OpIAdd %uint %14551 %19199 + %23683 = OpShiftRightLogical %uint %8720 %uint_2 + %13780 = OpShiftRightLogical %uint %15334 %uint_14 + %12661 = OpBitwiseAnd %uint %13780 %uint_1 + %11491 = OpINotEqual %bool %12661 %uint_0 + OpSelectionMerge %8129 None + OpBranchConditional %11491 %22983 %14914 + %14914 = OpLabel + %24444 = OpINotEqual %bool %11709 %uint_0 + %20803 = OpSelect %int %24444 %int_3 %int_0 + OpBranch %8129 + %22983 = OpLabel + %7795 = OpINotEqual %bool %11709 %uint_0 + %20804 = OpSelect %int %7795 %int_0 %int_1 + OpBranch %8129 + %8129 = OpLabel + %21086 = OpPhi %int %20804 %22983 %20803 %14914 + %12853 = OpLoad %182 %3709 + %23440 = OpImageFetch %v4float %12853 %13160 Sample %21086 + %11864 = OpCompositeExtract %float %23440 0 + %19035 = OpIAdd %v2int %13160 %1824 + %20902 = OpImageFetch %v4float %12853 %19035 Sample %21086 + %17472 = OpCompositeExtract %float %20902 0 + %19036 = OpIAdd %v2int %13160 %1833 + %20903 = OpImageFetch %v4float %12853 %19036 Sample %21086 + %17473 = OpCompositeExtract %float %20903 0 + %19037 = OpIAdd %v2int %13160 %1842 + %19990 = OpImageFetch %v4float %12853 %19037 Sample %21086 + %7256 = OpCompositeExtract %float %19990 0 + %6487 = OpCompositeConstruct %v4float %11864 %17472 %17473 %7256 + %20366 = OpBitcast %v4uint %6487 + %12860 = OpAccessChain %_ptr_Uniform_v4uint %4790 %int_0 %23683 + OpStore %12860 %20366 + %20256 = OpIAdd %uint %23683 %uint_1 + %8574 = OpIAdd %v2int %13160 %1851 + %10680 = OpImageFetch %v4float %12853 %8574 Sample %21086 + %17474 = OpCompositeExtract %float %10680 0 + %19038 = OpIAdd %v2int %13160 %1860 + %20904 = OpImageFetch %v4float %12853 %19038 Sample %21086 + %17475 = OpCompositeExtract %float %20904 0 + %19039 = OpIAdd %v2int %13160 %1869 + %20905 = OpImageFetch %v4float %12853 %19039 Sample %21086 + %17476 = OpCompositeExtract %float %20905 0 + %19040 = OpIAdd %v2int %13160 %1878 + %19991 = OpImageFetch %v4float %12853 %19040 Sample %21086 + %7257 = OpCompositeExtract %float %19991 0 + %6488 = OpCompositeConstruct %v4float %17474 %17475 %17476 %7257 + %20367 = OpBitcast %v4uint %6488 + %15159 = OpAccessChain %_ptr_Uniform_v4uint %4790 %int_0 %20256 + OpStore %15159 %20367 + OpBranch %19578 + %19578 = OpLabel + OpReturn + OpFunctionEnd +#endif + +const uint32_t host_depth_store_2xmsaa_cs[] = { + 0x07230203, 0x00010000, 0x0008000A, 0x000060A6, 0x00000000, 0x00020011, + 0x00000001, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E, + 0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0006000F, 0x00000005, + 0x0000161F, 0x6E69616D, 0x00000000, 0x00000F48, 0x00060010, 0x0000161F, + 0x00000011, 0x00000008, 0x00000008, 0x00000001, 0x00050048, 0x000003DE, + 0x00000000, 0x00000023, 0x00000000, 0x00050048, 0x000003DE, 0x00000001, + 0x00000023, 0x00000004, 0x00030047, 0x000003DE, 0x00000002, 0x00040047, + 0x00000F48, 0x0000000B, 0x0000001C, 0x00040047, 0x000007DC, 0x00000006, + 0x00000010, 0x00040048, 0x000007B4, 0x00000000, 0x00000019, 0x00050048, + 0x000007B4, 0x00000000, 0x00000023, 0x00000000, 0x00030047, 0x000007B4, + 0x00000003, 0x00040047, 0x000012B6, 0x00000022, 0x00000000, 0x00040047, + 0x000012B6, 0x00000021, 0x00000000, 0x00040047, 0x00000E7D, 0x00000022, + 0x00000001, 0x00040047, 0x00000E7D, 0x00000021, 0x00000000, 0x00040047, + 0x00000AC7, 0x0000000B, 0x00000019, 0x00020013, 0x00000008, 0x00030021, + 0x00000502, 0x00000008, 0x00040015, 0x0000000B, 0x00000020, 0x00000000, + 0x00040017, 0x00000011, 0x0000000B, 0x00000002, 0x00020014, 0x00000009, + 0x0004002B, 0x0000000B, 0x00000A10, 0x00000002, 0x0004002B, 0x0000000B, + 0x00000A0D, 0x00000001, 0x0005002C, 0x00000011, 0x0000072D, 0x00000A10, + 0x00000A0D, 0x00040017, 0x0000000F, 0x00000009, 0x00000002, 0x0004002B, + 0x0000000B, 0x00000A0A, 0x00000000, 0x0005002C, 0x00000011, 0x0000070F, + 0x00000A0A, 0x00000A0A, 0x0005002C, 0x00000011, 0x00000724, 0x00000A0D, + 0x00000A0D, 0x0005002C, 0x00000011, 0x00000718, 0x00000A0D, 0x00000A0A, + 0x0004002B, 0x0000000B, 0x00000AFA, 0x00000050, 0x0004002B, 0x0000000B, + 0x00000A3A, 0x00000010, 0x0005002C, 0x00000011, 0x00000A9F, 0x00000AFA, + 0x00000A3A, 0x00040015, 0x0000000C, 0x00000020, 0x00000001, 0x0004001E, + 0x000003DE, 0x0000000B, 0x0000000B, 0x00040020, 0x0000065B, 0x00000009, + 0x000003DE, 0x0004003B, 0x0000065B, 0x00000BEC, 0x00000009, 0x0004002B, + 0x0000000C, 0x00000A0E, 0x00000001, 0x00040020, 0x00000288, 0x00000009, + 0x0000000B, 0x0004002B, 0x0000000B, 0x00000A28, 0x0000000A, 0x0004002B, + 0x0000000B, 0x00000A2E, 0x0000000C, 0x0005002C, 0x00000011, 0x000007F9, + 0x00000A28, 0x00000A2E, 0x0004002B, 0x0000000B, 0x00000A13, 0x00000003, + 0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000, 0x0005002C, 0x00000011, + 0x00000787, 0x00000A0A, 0x00000A28, 0x0004002B, 0x0000000B, 0x00000A44, + 0x000003FF, 0x0004002B, 0x0000000B, 0x00000A46, 0x00000014, 0x0004002B, + 0x0000000B, 0x00000A34, 0x0000000E, 0x00040017, 0x00000014, 0x0000000B, + 0x00000003, 0x00040020, 0x00000291, 0x00000001, 0x00000014, 0x0004003B, + 0x00000291, 0x00000F48, 0x00000001, 0x00040020, 0x00000289, 0x00000001, + 0x0000000B, 0x00040017, 0x00000012, 0x0000000C, 0x00000002, 0x0004002B, + 0x0000000C, 0x00000A14, 0x00000003, 0x00040017, 0x00000017, 0x0000000B, + 0x00000004, 0x0003001D, 0x000007DC, 0x00000017, 0x0003001E, 0x000007B4, + 0x000007DC, 0x00040020, 0x00000A31, 0x00000002, 0x000007B4, 0x0004003B, + 0x00000A31, 0x000012B6, 0x00000002, 0x00030016, 0x0000000D, 0x00000020, + 0x00090019, 0x000000B6, 0x0000000D, 0x00000001, 0x00000000, 0x00000000, + 0x00000001, 0x00000001, 0x00000000, 0x00040020, 0x00000333, 0x00000000, + 0x000000B6, 0x0004003B, 0x00000333, 0x00000E7D, 0x00000000, 0x00040017, + 0x0000001D, 0x0000000D, 0x00000004, 0x0005002C, 0x00000012, 0x00000720, + 0x00000A0E, 0x00000A0B, 0x0004002B, 0x0000000C, 0x00000A11, 0x00000002, + 0x0005002C, 0x00000012, 0x00000729, 0x00000A11, 0x00000A0B, 0x0005002C, + 0x00000012, 0x00000732, 0x00000A14, 0x00000A0B, 0x00040020, 0x00000294, + 0x00000002, 0x00000017, 0x0004002B, 0x0000000C, 0x00000A17, 0x00000004, + 0x0005002C, 0x00000012, 0x0000073B, 0x00000A17, 0x00000A0B, 0x0004002B, + 0x0000000C, 0x00000A1A, 0x00000005, 0x0005002C, 0x00000012, 0x00000744, + 0x00000A1A, 0x00000A0B, 0x0004002B, 0x0000000C, 0x00000A1D, 0x00000006, + 0x0005002C, 0x00000012, 0x0000074D, 0x00000A1D, 0x00000A0B, 0x0004002B, + 0x0000000C, 0x00000A20, 0x00000007, 0x0005002C, 0x00000012, 0x00000756, + 0x00000A20, 0x00000A0B, 0x0004002B, 0x0000000B, 0x00000A22, 0x00000008, + 0x0006002C, 0x00000014, 0x00000AC7, 0x00000A22, 0x00000A22, 0x00000A0D, + 0x0005002C, 0x00000011, 0x0000074E, 0x00000A13, 0x00000A13, 0x0005002C, + 0x00000011, 0x000008A5, 0x00000A44, 0x00000A44, 0x00050036, 0x00000008, + 0x0000161F, 0x00000000, 0x00000502, 0x000200F8, 0x00003B06, 0x000300F7, + 0x00004C7A, 0x00000000, 0x000300FB, 0x00000A0A, 0x00002E68, 0x000200F8, + 0x00002E68, 0x00050041, 0x00000289, 0x000056E5, 0x00000F48, 0x00000A0A, + 0x0004003D, 0x0000000B, 0x00003D0B, 0x000056E5, 0x00050041, 0x00000288, + 0x0000584D, 0x00000BEC, 0x00000A0B, 0x0004003D, 0x0000000B, 0x00005518, + 0x0000584D, 0x000500C2, 0x0000000B, 0x000052B2, 0x00005518, 0x00000A46, + 0x000500C7, 0x0000000B, 0x00003D02, 0x000052B2, 0x00000A44, 0x00050080, + 0x0000000B, 0x00002819, 0x00003D02, 0x00000A0D, 0x00050041, 0x00000288, + 0x00004DD9, 0x00000BEC, 0x00000A0E, 0x0004003D, 0x0000000B, 0x00003BE6, + 0x00004DD9, 0x00050050, 0x00000011, 0x00002835, 0x00003BE6, 0x00003BE6, + 0x000500C2, 0x00000011, 0x0000603A, 0x00002835, 0x000007F9, 0x000500C7, + 0x00000011, 0x00005E8B, 0x0000603A, 0x0000074E, 0x00050051, 0x0000000B, + 0x00002AB1, 0x00005E8B, 0x00000000, 0x00050084, 0x0000000B, 0x00001DF6, + 0x00002819, 0x00002AB1, 0x000500AE, 0x00000009, 0x00001C77, 0x00003D0B, + 0x00001DF6, 0x000300F7, 0x00003FD9, 0x00000002, 0x000400FA, 0x00001C77, + 0x000055E8, 0x00003FD9, 0x000200F8, 0x000055E8, 0x000200F9, 0x00004C7A, + 0x000200F8, 0x00003FD9, 0x00050050, 0x00000011, 0x00002A13, 0x00005518, + 0x00005518, 0x000500C2, 0x00000011, 0x0000350D, 0x00002A13, 0x00000787, + 0x000500C7, 0x00000011, 0x00005B53, 0x0000350D, 0x000008A5, 0x000500C4, + 0x00000011, 0x00003F75, 0x00005B53, 0x0000074E, 0x00050084, 0x00000011, + 0x00004E9F, 0x00003F75, 0x00005E8B, 0x000500C4, 0x0000000B, 0x00004C53, + 0x00003D0B, 0x00000A13, 0x00050041, 0x00000289, 0x000042E6, 0x00000F48, + 0x00000A0D, 0x0004003D, 0x0000000B, 0x00005690, 0x000042E6, 0x000500C2, + 0x0000000B, 0x0000589E, 0x00005690, 0x00000A0D, 0x00050050, 0x00000011, + 0x00001947, 0x00004C53, 0x0000589E, 0x00050080, 0x00000011, 0x0000250B, + 0x00004E9F, 0x00001947, 0x0004007C, 0x00000012, 0x00003368, 0x0000250B, + 0x000500C7, 0x0000000B, 0x00002DBD, 0x00005690, 0x00000A0D, 0x0004007C, + 0x00000011, 0x000029FA, 0x00003368, 0x000500C7, 0x0000000B, 0x00002C3B, + 0x00003BE6, 0x00000A44, 0x000500AE, 0x0000000F, 0x00001C1B, 0x00000724, + 0x0000072D, 0x000600A9, 0x00000011, 0x00003CD3, 0x00001C1B, 0x00000724, + 0x0000070F, 0x000500C4, 0x00000011, 0x00002AEA, 0x000029FA, 0x00003CD3, + 0x00050050, 0x00000011, 0x0000605D, 0x00002DBD, 0x00002DBD, 0x000500C2, + 0x00000011, 0x00002385, 0x0000605D, 0x00000718, 0x000500C7, 0x00000011, + 0x00003EC8, 0x00002385, 0x00000724, 0x00050080, 0x00000011, 0x00004F30, + 0x00002AEA, 0x00003EC8, 0x00050084, 0x00000011, 0x00005299, 0x00000A9F, + 0x00005E8B, 0x000500C2, 0x00000011, 0x00003985, 0x00005299, 0x0000070F, + 0x00050086, 0x00000011, 0x00004D57, 0x00004F30, 0x00003985, 0x00050051, + 0x0000000B, 0x00004FA6, 0x00004D57, 0x00000001, 0x00050084, 0x0000000B, + 0x00002B26, 0x00004FA6, 0x00002C3B, 0x00050051, 0x0000000B, 0x000060A5, + 0x00004D57, 0x00000000, 0x00050080, 0x0000000B, 0x00005146, 0x00002B26, + 0x000060A5, 0x00050084, 0x00000011, 0x000034D7, 0x00004D57, 0x00003985, + 0x00050082, 0x00000011, 0x000050EB, 0x00004F30, 0x000034D7, 0x00050051, + 0x0000000B, 0x00001C87, 0x00005299, 0x00000000, 0x00050051, 0x0000000B, + 0x00005962, 0x00005299, 0x00000001, 0x00050084, 0x0000000B, 0x00003372, + 0x00001C87, 0x00005962, 0x00050084, 0x0000000B, 0x000038D7, 0x00005146, + 0x00003372, 0x00050051, 0x0000000B, 0x00001A95, 0x000050EB, 0x00000001, + 0x00050051, 0x0000000B, 0x00005BE6, 0x00003985, 0x00000000, 0x00050084, + 0x0000000B, 0x00005966, 0x00001A95, 0x00005BE6, 0x00050051, 0x0000000B, + 0x00001AE6, 0x000050EB, 0x00000000, 0x00050080, 0x0000000B, 0x000025E0, + 0x00005966, 0x00001AE6, 0x000500C4, 0x0000000B, 0x00004AFF, 0x000025E0, + 0x00000A0A, 0x00050080, 0x0000000B, 0x00002210, 0x000038D7, 0x00004AFF, + 0x000500C2, 0x0000000B, 0x00005C83, 0x00002210, 0x00000A10, 0x000500C2, + 0x0000000B, 0x000035D4, 0x00003BE6, 0x00000A34, 0x000500C7, 0x0000000B, + 0x00003175, 0x000035D4, 0x00000A0D, 0x000500AB, 0x00000009, 0x00002CE3, + 0x00003175, 0x00000A0A, 0x000300F7, 0x00001FC1, 0x00000000, 0x000400FA, + 0x00002CE3, 0x000059C7, 0x00003A42, 0x000200F8, 0x00003A42, 0x000500AB, + 0x00000009, 0x00005F7C, 0x00002DBD, 0x00000A0A, 0x000600A9, 0x0000000C, + 0x00005143, 0x00005F7C, 0x00000A14, 0x00000A0B, 0x000200F9, 0x00001FC1, + 0x000200F8, 0x000059C7, 0x000500AB, 0x00000009, 0x00001E73, 0x00002DBD, + 0x00000A0A, 0x000600A9, 0x0000000C, 0x00005144, 0x00001E73, 0x00000A0B, + 0x00000A0E, 0x000200F9, 0x00001FC1, 0x000200F8, 0x00001FC1, 0x000700F5, + 0x0000000C, 0x0000525E, 0x00005144, 0x000059C7, 0x00005143, 0x00003A42, + 0x0004003D, 0x000000B6, 0x00003235, 0x00000E7D, 0x0007005F, 0x0000001D, + 0x00005B90, 0x00003235, 0x00003368, 0x00000040, 0x0000525E, 0x00050051, + 0x0000000D, 0x00002E58, 0x00005B90, 0x00000000, 0x00050080, 0x00000012, + 0x00004A5B, 0x00003368, 0x00000720, 0x0007005F, 0x0000001D, 0x000051A6, + 0x00003235, 0x00004A5B, 0x00000040, 0x0000525E, 0x00050051, 0x0000000D, + 0x00004440, 0x000051A6, 0x00000000, 0x00050080, 0x00000012, 0x00004A5C, + 0x00003368, 0x00000729, 0x0007005F, 0x0000001D, 0x000051A7, 0x00003235, + 0x00004A5C, 0x00000040, 0x0000525E, 0x00050051, 0x0000000D, 0x00004441, + 0x000051A7, 0x00000000, 0x00050080, 0x00000012, 0x00004A5D, 0x00003368, + 0x00000732, 0x0007005F, 0x0000001D, 0x00004E16, 0x00003235, 0x00004A5D, + 0x00000040, 0x0000525E, 0x00050051, 0x0000000D, 0x00001C58, 0x00004E16, + 0x00000000, 0x00070050, 0x0000001D, 0x00001957, 0x00002E58, 0x00004440, + 0x00004441, 0x00001C58, 0x0004007C, 0x00000017, 0x00004F8E, 0x00001957, + 0x00060041, 0x00000294, 0x0000323C, 0x000012B6, 0x00000A0B, 0x00005C83, + 0x0003003E, 0x0000323C, 0x00004F8E, 0x00050080, 0x0000000B, 0x00004F20, + 0x00005C83, 0x00000A0D, 0x00050080, 0x00000012, 0x0000217E, 0x00003368, + 0x0000073B, 0x0007005F, 0x0000001D, 0x000029B8, 0x00003235, 0x0000217E, + 0x00000040, 0x0000525E, 0x00050051, 0x0000000D, 0x00004442, 0x000029B8, + 0x00000000, 0x00050080, 0x00000012, 0x00004A5E, 0x00003368, 0x00000744, + 0x0007005F, 0x0000001D, 0x000051A8, 0x00003235, 0x00004A5E, 0x00000040, + 0x0000525E, 0x00050051, 0x0000000D, 0x00004443, 0x000051A8, 0x00000000, + 0x00050080, 0x00000012, 0x00004A5F, 0x00003368, 0x0000074D, 0x0007005F, + 0x0000001D, 0x000051A9, 0x00003235, 0x00004A5F, 0x00000040, 0x0000525E, + 0x00050051, 0x0000000D, 0x00004444, 0x000051A9, 0x00000000, 0x00050080, + 0x00000012, 0x00004A60, 0x00003368, 0x00000756, 0x0007005F, 0x0000001D, + 0x00004E17, 0x00003235, 0x00004A60, 0x00000040, 0x0000525E, 0x00050051, + 0x0000000D, 0x00001C59, 0x00004E17, 0x00000000, 0x00070050, 0x0000001D, + 0x00001958, 0x00004442, 0x00004443, 0x00004444, 0x00001C59, 0x0004007C, + 0x00000017, 0x00004F8F, 0x00001958, 0x00060041, 0x00000294, 0x00003B37, + 0x000012B6, 0x00000A0B, 0x00004F20, 0x0003003E, 0x00003B37, 0x00004F8F, + 0x000200F9, 0x00004C7A, 0x000200F8, 0x00004C7A, 0x000100FD, 0x00010038, +}; diff --git a/src/xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_4xmsaa_cs.h b/src/xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_4xmsaa_cs.h new file mode 100644 index 000000000..b26844c81 --- /dev/null +++ b/src/xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_4xmsaa_cs.h @@ -0,0 +1,332 @@ +// Generated with `xb buildshaders`. +#if 0 +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 24815 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %5663 "main" %gl_GlobalInvocationID + OpExecutionMode %5663 LocalSize 8 8 1 + OpMemberDecorate %_struct_990 0 Offset 0 + OpMemberDecorate %_struct_990 1 Offset 4 + OpDecorate %_struct_990 Block + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %_runtimearr_v4uint ArrayStride 16 + OpMemberDecorate %_struct_1972 0 NonReadable + OpMemberDecorate %_struct_1972 0 Offset 0 + OpDecorate %_struct_1972 BufferBlock + OpDecorate %4790 DescriptorSet 0 + OpDecorate %4790 Binding 0 + OpDecorate %3709 DescriptorSet 1 + OpDecorate %3709 Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %1282 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %v2uint = OpTypeVector %uint 2 + %bool = OpTypeBool + %uint_2 = OpConstant %uint 2 + %uint_1 = OpConstant %uint 1 + %1837 = OpConstantComposite %v2uint %uint_2 %uint_1 + %v2bool = OpTypeVector %bool 2 + %uint_0 = OpConstant %uint 0 + %1807 = OpConstantComposite %v2uint %uint_0 %uint_0 + %1828 = OpConstantComposite %v2uint %uint_1 %uint_1 + %1816 = OpConstantComposite %v2uint %uint_1 %uint_0 + %uint_80 = OpConstant %uint 80 + %uint_16 = OpConstant %uint 16 + %2719 = OpConstantComposite %v2uint %uint_80 %uint_16 + %int = OpTypeInt 32 1 +%_struct_990 = OpTypeStruct %uint %uint +%_ptr_PushConstant__struct_990 = OpTypePointer PushConstant %_struct_990 + %3052 = OpVariable %_ptr_PushConstant__struct_990 PushConstant + %int_1 = OpConstant %int 1 +%_ptr_PushConstant_uint = OpTypePointer PushConstant %uint + %uint_10 = OpConstant %uint 10 + %uint_12 = OpConstant %uint 12 + %2041 = OpConstantComposite %v2uint %uint_10 %uint_12 + %uint_3 = OpConstant %uint 3 + %int_0 = OpConstant %int 0 + %1927 = OpConstantComposite %v2uint %uint_0 %uint_10 + %uint_1023 = OpConstant %uint 1023 + %uint_20 = OpConstant %uint 20 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input +%_ptr_Input_uint = OpTypePointer Input %uint + %v2int = OpTypeVector %int 2 + %v4uint = OpTypeVector %uint 4 +%_runtimearr_v4uint = OpTypeRuntimeArray %v4uint +%_struct_1972 = OpTypeStruct %_runtimearr_v4uint +%_ptr_Uniform__struct_1972 = OpTypePointer Uniform %_struct_1972 + %4790 = OpVariable %_ptr_Uniform__struct_1972 Uniform + %float = OpTypeFloat 32 + %182 = OpTypeImage %float 2D 0 0 1 1 Unknown +%_ptr_UniformConstant_182 = OpTypePointer UniformConstant %182 + %3709 = OpVariable %_ptr_UniformConstant_182 UniformConstant + %v4float = OpTypeVector %float 4 + %1824 = OpConstantComposite %v2int %int_1 %int_0 +%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint + %int_2 = OpConstant %int 2 + %1833 = OpConstantComposite %v2int %int_2 %int_0 + %int_3 = OpConstant %int 3 + %1842 = OpConstantComposite %v2int %int_3 %int_0 + %uint_8 = OpConstant %uint 8 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_8 %uint_8 %uint_1 + %1870 = OpConstantComposite %v2uint %uint_3 %uint_3 + %2213 = OpConstantComposite %v2uint %uint_1023 %uint_1023 + %5663 = OpFunction %void None %1282 + %15110 = OpLabel + OpSelectionMerge %19578 None + OpSwitch %uint_0 %11880 + %11880 = OpLabel + %24696 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %12457 = OpLoad %uint %24696 + %24008 = OpShiftRightLogical %uint %12457 %uint_1 + %8790 = OpAccessChain %_ptr_PushConstant_uint %3052 %int_0 + %22808 = OpLoad %uint %8790 + %21170 = OpShiftRightLogical %uint %22808 %uint_20 + %15618 = OpBitwiseAnd %uint %21170 %uint_1023 + %10265 = OpIAdd %uint %15618 %uint_1 + %19929 = OpAccessChain %_ptr_PushConstant_uint %3052 %int_1 + %15334 = OpLoad %uint %19929 + %10293 = OpCompositeConstruct %v2uint %15334 %15334 + %24634 = OpShiftRightLogical %v2uint %10293 %2041 + %24203 = OpBitwiseAnd %v2uint %24634 %1870 + %10929 = OpCompositeExtract %uint %24203 0 + %7670 = OpIMul %uint %10265 %10929 + %7287 = OpUGreaterThanEqual %bool %24008 %7670 + OpSelectionMerge %16345 DontFlatten + OpBranchConditional %7287 %21992 %16345 + %21992 = OpLabel + OpBranch %19578 + %16345 = OpLabel + %10771 = OpCompositeConstruct %v2uint %22808 %22808 + %13581 = OpShiftRightLogical %v2uint %10771 %1927 + %23379 = OpBitwiseAnd %v2uint %13581 %2213 + %16245 = OpShiftLeftLogical %v2uint %23379 %1870 + %20127 = OpIMul %v2uint %16245 %24203 + %19539 = OpShiftLeftLogical %uint %12457 %uint_2 + %17126 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1 + %22160 = OpLoad %uint %17126 + %22686 = OpShiftRightLogical %uint %22160 %uint_1 + %6471 = OpCompositeConstruct %v2uint %19539 %22686 + %8058 = OpIAdd %v2uint %20127 %6471 + %8432 = OpBitcast %v2int %8058 + %7291 = OpBitcast %v2uint %8432 + %22610 = OpShiftLeftLogical %v2uint %7291 %1828 + %8742 = OpLoad %v3uint %gl_GlobalInvocationID + %16994 = OpVectorShuffle %v2uint %8742 %8742 0 1 + %24648 = OpBitwiseAnd %v2uint %16994 %1828 + %15693 = OpBitwiseOr %v2uint %22610 %24648 + %23331 = OpBitwiseAnd %uint %15334 %uint_1023 + %17642 = OpUGreaterThanEqual %v2bool %1807 %1837 + %17737 = OpSelect %v2uint %17642 %1828 %1807 + %10430 = OpShiftLeftLogical %v2uint %15693 %17737 + %16475 = OpShiftRightLogical %v2uint %1807 %1816 + %13071 = OpBitwiseAnd %v2uint %16475 %1828 + %20272 = OpIAdd %v2uint %10430 %13071 + %21145 = OpIMul %v2uint %2719 %24203 + %14725 = OpShiftRightLogical %v2uint %21145 %1807 + %19799 = OpUDiv %v2uint %20272 %14725 + %20390 = OpCompositeExtract %uint %19799 1 + %11046 = OpIMul %uint %20390 %23331 + %24741 = OpCompositeExtract %uint %19799 0 + %20806 = OpIAdd %uint %11046 %24741 + %13527 = OpIMul %v2uint %19799 %14725 + %20715 = OpISub %v2uint %20272 %13527 + %7303 = OpCompositeExtract %uint %21145 0 + %22882 = OpCompositeExtract %uint %21145 1 + %13170 = OpIMul %uint %7303 %22882 + %14551 = OpIMul %uint %20806 %13170 + %6805 = OpCompositeExtract %uint %20715 1 + %23526 = OpCompositeExtract %uint %14725 0 + %22886 = OpIMul %uint %6805 %23526 + %6886 = OpCompositeExtract %uint %20715 0 + %9696 = OpIAdd %uint %22886 %6886 + %19199 = OpShiftLeftLogical %uint %9696 %uint_0 + %8815 = OpIAdd %uint %14551 %19199 + %22761 = OpShiftRightLogical %uint %8815 %uint_2 + %10583 = OpBitwiseAnd %uint %22160 %uint_1 + %16337 = OpShiftLeftLogical %uint %10583 %uint_1 + %21075 = OpBitcast %int %16337 + %8163 = OpIAdd %int %21075 %int_1 + %22164 = OpLoad %182 %3709 + %9165 = OpImageFetch %v4float %22164 %8432 Sample %21075 + %17307 = OpCompositeExtract %float %9165 0 + %6654 = OpImageFetch %v4float %22164 %8432 Sample %8163 + %15080 = OpCompositeExtract %float %6654 0 + %19035 = OpIAdd %v2int %8432 %1824 + %20275 = OpImageFetch %v4float %22164 %19035 Sample %21075 + %22915 = OpCompositeExtract %float %20275 0 + %24813 = OpImageFetch %v4float %22164 %19035 Sample %8163 + %23935 = OpCompositeExtract %float %24813 0 + %6487 = OpCompositeConstruct %v4float %17307 %15080 %22915 %23935 + %20366 = OpBitcast %v4uint %6487 + %12860 = OpAccessChain %_ptr_Uniform_v4uint %4790 %int_0 %22761 + OpStore %12860 %20366 + %20256 = OpIAdd %uint %22761 %uint_1 + %8574 = OpIAdd %v2int %8432 %1833 + %10053 = OpImageFetch %v4float %22164 %8574 Sample %21075 + %22916 = OpCompositeExtract %float %10053 0 + %6655 = OpImageFetch %v4float %22164 %8574 Sample %8163 + %15081 = OpCompositeExtract %float %6655 0 + %19036 = OpIAdd %v2int %8432 %1842 + %20276 = OpImageFetch %v4float %22164 %19036 Sample %21075 + %22917 = OpCompositeExtract %float %20276 0 + %24814 = OpImageFetch %v4float %22164 %19036 Sample %8163 + %23936 = OpCompositeExtract %float %24814 0 + %6488 = OpCompositeConstruct %v4float %22916 %15081 %22917 %23936 + %20367 = OpBitcast %v4uint %6488 + %15159 = OpAccessChain %_ptr_Uniform_v4uint %4790 %int_0 %20256 + OpStore %15159 %20367 + OpBranch %19578 + %19578 = OpLabel + OpReturn + OpFunctionEnd +#endif + +const uint32_t host_depth_store_4xmsaa_cs[] = { + 0x07230203, 0x00010000, 0x0008000A, 0x000060EF, 0x00000000, 0x00020011, + 0x00000001, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E, + 0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0006000F, 0x00000005, + 0x0000161F, 0x6E69616D, 0x00000000, 0x00000F48, 0x00060010, 0x0000161F, + 0x00000011, 0x00000008, 0x00000008, 0x00000001, 0x00050048, 0x000003DE, + 0x00000000, 0x00000023, 0x00000000, 0x00050048, 0x000003DE, 0x00000001, + 0x00000023, 0x00000004, 0x00030047, 0x000003DE, 0x00000002, 0x00040047, + 0x00000F48, 0x0000000B, 0x0000001C, 0x00040047, 0x000007DC, 0x00000006, + 0x00000010, 0x00040048, 0x000007B4, 0x00000000, 0x00000019, 0x00050048, + 0x000007B4, 0x00000000, 0x00000023, 0x00000000, 0x00030047, 0x000007B4, + 0x00000003, 0x00040047, 0x000012B6, 0x00000022, 0x00000000, 0x00040047, + 0x000012B6, 0x00000021, 0x00000000, 0x00040047, 0x00000E7D, 0x00000022, + 0x00000001, 0x00040047, 0x00000E7D, 0x00000021, 0x00000000, 0x00040047, + 0x00000AC7, 0x0000000B, 0x00000019, 0x00020013, 0x00000008, 0x00030021, + 0x00000502, 0x00000008, 0x00040015, 0x0000000B, 0x00000020, 0x00000000, + 0x00040017, 0x00000011, 0x0000000B, 0x00000002, 0x00020014, 0x00000009, + 0x0004002B, 0x0000000B, 0x00000A10, 0x00000002, 0x0004002B, 0x0000000B, + 0x00000A0D, 0x00000001, 0x0005002C, 0x00000011, 0x0000072D, 0x00000A10, + 0x00000A0D, 0x00040017, 0x0000000F, 0x00000009, 0x00000002, 0x0004002B, + 0x0000000B, 0x00000A0A, 0x00000000, 0x0005002C, 0x00000011, 0x0000070F, + 0x00000A0A, 0x00000A0A, 0x0005002C, 0x00000011, 0x00000724, 0x00000A0D, + 0x00000A0D, 0x0005002C, 0x00000011, 0x00000718, 0x00000A0D, 0x00000A0A, + 0x0004002B, 0x0000000B, 0x00000AFA, 0x00000050, 0x0004002B, 0x0000000B, + 0x00000A3A, 0x00000010, 0x0005002C, 0x00000011, 0x00000A9F, 0x00000AFA, + 0x00000A3A, 0x00040015, 0x0000000C, 0x00000020, 0x00000001, 0x0004001E, + 0x000003DE, 0x0000000B, 0x0000000B, 0x00040020, 0x0000065B, 0x00000009, + 0x000003DE, 0x0004003B, 0x0000065B, 0x00000BEC, 0x00000009, 0x0004002B, + 0x0000000C, 0x00000A0E, 0x00000001, 0x00040020, 0x00000288, 0x00000009, + 0x0000000B, 0x0004002B, 0x0000000B, 0x00000A28, 0x0000000A, 0x0004002B, + 0x0000000B, 0x00000A2E, 0x0000000C, 0x0005002C, 0x00000011, 0x000007F9, + 0x00000A28, 0x00000A2E, 0x0004002B, 0x0000000B, 0x00000A13, 0x00000003, + 0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000, 0x0005002C, 0x00000011, + 0x00000787, 0x00000A0A, 0x00000A28, 0x0004002B, 0x0000000B, 0x00000A44, + 0x000003FF, 0x0004002B, 0x0000000B, 0x00000A46, 0x00000014, 0x00040017, + 0x00000014, 0x0000000B, 0x00000003, 0x00040020, 0x00000291, 0x00000001, + 0x00000014, 0x0004003B, 0x00000291, 0x00000F48, 0x00000001, 0x00040020, + 0x00000289, 0x00000001, 0x0000000B, 0x00040017, 0x00000012, 0x0000000C, + 0x00000002, 0x00040017, 0x00000017, 0x0000000B, 0x00000004, 0x0003001D, + 0x000007DC, 0x00000017, 0x0003001E, 0x000007B4, 0x000007DC, 0x00040020, + 0x00000A31, 0x00000002, 0x000007B4, 0x0004003B, 0x00000A31, 0x000012B6, + 0x00000002, 0x00030016, 0x0000000D, 0x00000020, 0x00090019, 0x000000B6, + 0x0000000D, 0x00000001, 0x00000000, 0x00000000, 0x00000001, 0x00000001, + 0x00000000, 0x00040020, 0x00000333, 0x00000000, 0x000000B6, 0x0004003B, + 0x00000333, 0x00000E7D, 0x00000000, 0x00040017, 0x0000001D, 0x0000000D, + 0x00000004, 0x0005002C, 0x00000012, 0x00000720, 0x00000A0E, 0x00000A0B, + 0x00040020, 0x00000294, 0x00000002, 0x00000017, 0x0004002B, 0x0000000C, + 0x00000A11, 0x00000002, 0x0005002C, 0x00000012, 0x00000729, 0x00000A11, + 0x00000A0B, 0x0004002B, 0x0000000C, 0x00000A14, 0x00000003, 0x0005002C, + 0x00000012, 0x00000732, 0x00000A14, 0x00000A0B, 0x0004002B, 0x0000000B, + 0x00000A22, 0x00000008, 0x0006002C, 0x00000014, 0x00000AC7, 0x00000A22, + 0x00000A22, 0x00000A0D, 0x0005002C, 0x00000011, 0x0000074E, 0x00000A13, + 0x00000A13, 0x0005002C, 0x00000011, 0x000008A5, 0x00000A44, 0x00000A44, + 0x00050036, 0x00000008, 0x0000161F, 0x00000000, 0x00000502, 0x000200F8, + 0x00003B06, 0x000300F7, 0x00004C7A, 0x00000000, 0x000300FB, 0x00000A0A, + 0x00002E68, 0x000200F8, 0x00002E68, 0x00050041, 0x00000289, 0x00006078, + 0x00000F48, 0x00000A0A, 0x0004003D, 0x0000000B, 0x000030A9, 0x00006078, + 0x000500C2, 0x0000000B, 0x00005DC8, 0x000030A9, 0x00000A0D, 0x00050041, + 0x00000288, 0x00002256, 0x00000BEC, 0x00000A0B, 0x0004003D, 0x0000000B, + 0x00005918, 0x00002256, 0x000500C2, 0x0000000B, 0x000052B2, 0x00005918, + 0x00000A46, 0x000500C7, 0x0000000B, 0x00003D02, 0x000052B2, 0x00000A44, + 0x00050080, 0x0000000B, 0x00002819, 0x00003D02, 0x00000A0D, 0x00050041, + 0x00000288, 0x00004DD9, 0x00000BEC, 0x00000A0E, 0x0004003D, 0x0000000B, + 0x00003BE6, 0x00004DD9, 0x00050050, 0x00000011, 0x00002835, 0x00003BE6, + 0x00003BE6, 0x000500C2, 0x00000011, 0x0000603A, 0x00002835, 0x000007F9, + 0x000500C7, 0x00000011, 0x00005E8B, 0x0000603A, 0x0000074E, 0x00050051, + 0x0000000B, 0x00002AB1, 0x00005E8B, 0x00000000, 0x00050084, 0x0000000B, + 0x00001DF6, 0x00002819, 0x00002AB1, 0x000500AE, 0x00000009, 0x00001C77, + 0x00005DC8, 0x00001DF6, 0x000300F7, 0x00003FD9, 0x00000002, 0x000400FA, + 0x00001C77, 0x000055E8, 0x00003FD9, 0x000200F8, 0x000055E8, 0x000200F9, + 0x00004C7A, 0x000200F8, 0x00003FD9, 0x00050050, 0x00000011, 0x00002A13, + 0x00005918, 0x00005918, 0x000500C2, 0x00000011, 0x0000350D, 0x00002A13, + 0x00000787, 0x000500C7, 0x00000011, 0x00005B53, 0x0000350D, 0x000008A5, + 0x000500C4, 0x00000011, 0x00003F75, 0x00005B53, 0x0000074E, 0x00050084, + 0x00000011, 0x00004E9F, 0x00003F75, 0x00005E8B, 0x000500C4, 0x0000000B, + 0x00004C53, 0x000030A9, 0x00000A10, 0x00050041, 0x00000289, 0x000042E6, + 0x00000F48, 0x00000A0D, 0x0004003D, 0x0000000B, 0x00005690, 0x000042E6, + 0x000500C2, 0x0000000B, 0x0000589E, 0x00005690, 0x00000A0D, 0x00050050, + 0x00000011, 0x00001947, 0x00004C53, 0x0000589E, 0x00050080, 0x00000011, + 0x00001F7A, 0x00004E9F, 0x00001947, 0x0004007C, 0x00000012, 0x000020F0, + 0x00001F7A, 0x0004007C, 0x00000011, 0x00001C7B, 0x000020F0, 0x000500C4, + 0x00000011, 0x00005852, 0x00001C7B, 0x00000724, 0x0004003D, 0x00000014, + 0x00002226, 0x00000F48, 0x0007004F, 0x00000011, 0x00004262, 0x00002226, + 0x00002226, 0x00000000, 0x00000001, 0x000500C7, 0x00000011, 0x00006048, + 0x00004262, 0x00000724, 0x000500C5, 0x00000011, 0x00003D4D, 0x00005852, + 0x00006048, 0x000500C7, 0x0000000B, 0x00005B23, 0x00003BE6, 0x00000A44, + 0x000500AE, 0x0000000F, 0x000044EA, 0x0000070F, 0x0000072D, 0x000600A9, + 0x00000011, 0x00004549, 0x000044EA, 0x00000724, 0x0000070F, 0x000500C4, + 0x00000011, 0x000028BE, 0x00003D4D, 0x00004549, 0x000500C2, 0x00000011, + 0x0000405B, 0x0000070F, 0x00000718, 0x000500C7, 0x00000011, 0x0000330F, + 0x0000405B, 0x00000724, 0x00050080, 0x00000011, 0x00004F30, 0x000028BE, + 0x0000330F, 0x00050084, 0x00000011, 0x00005299, 0x00000A9F, 0x00005E8B, + 0x000500C2, 0x00000011, 0x00003985, 0x00005299, 0x0000070F, 0x00050086, + 0x00000011, 0x00004D57, 0x00004F30, 0x00003985, 0x00050051, 0x0000000B, + 0x00004FA6, 0x00004D57, 0x00000001, 0x00050084, 0x0000000B, 0x00002B26, + 0x00004FA6, 0x00005B23, 0x00050051, 0x0000000B, 0x000060A5, 0x00004D57, + 0x00000000, 0x00050080, 0x0000000B, 0x00005146, 0x00002B26, 0x000060A5, + 0x00050084, 0x00000011, 0x000034D7, 0x00004D57, 0x00003985, 0x00050082, + 0x00000011, 0x000050EB, 0x00004F30, 0x000034D7, 0x00050051, 0x0000000B, + 0x00001C87, 0x00005299, 0x00000000, 0x00050051, 0x0000000B, 0x00005962, + 0x00005299, 0x00000001, 0x00050084, 0x0000000B, 0x00003372, 0x00001C87, + 0x00005962, 0x00050084, 0x0000000B, 0x000038D7, 0x00005146, 0x00003372, + 0x00050051, 0x0000000B, 0x00001A95, 0x000050EB, 0x00000001, 0x00050051, + 0x0000000B, 0x00005BE6, 0x00003985, 0x00000000, 0x00050084, 0x0000000B, + 0x00005966, 0x00001A95, 0x00005BE6, 0x00050051, 0x0000000B, 0x00001AE6, + 0x000050EB, 0x00000000, 0x00050080, 0x0000000B, 0x000025E0, 0x00005966, + 0x00001AE6, 0x000500C4, 0x0000000B, 0x00004AFF, 0x000025E0, 0x00000A0A, + 0x00050080, 0x0000000B, 0x0000226F, 0x000038D7, 0x00004AFF, 0x000500C2, + 0x0000000B, 0x000058E9, 0x0000226F, 0x00000A10, 0x000500C7, 0x0000000B, + 0x00002957, 0x00005690, 0x00000A0D, 0x000500C4, 0x0000000B, 0x00003FD1, + 0x00002957, 0x00000A0D, 0x0004007C, 0x0000000C, 0x00005253, 0x00003FD1, + 0x00050080, 0x0000000C, 0x00001FE3, 0x00005253, 0x00000A0E, 0x0004003D, + 0x000000B6, 0x00005694, 0x00000E7D, 0x0007005F, 0x0000001D, 0x000023CD, + 0x00005694, 0x000020F0, 0x00000040, 0x00005253, 0x00050051, 0x0000000D, + 0x0000439B, 0x000023CD, 0x00000000, 0x0007005F, 0x0000001D, 0x000019FE, + 0x00005694, 0x000020F0, 0x00000040, 0x00001FE3, 0x00050051, 0x0000000D, + 0x00003AE8, 0x000019FE, 0x00000000, 0x00050080, 0x00000012, 0x00004A5B, + 0x000020F0, 0x00000720, 0x0007005F, 0x0000001D, 0x00004F33, 0x00005694, + 0x00004A5B, 0x00000040, 0x00005253, 0x00050051, 0x0000000D, 0x00005983, + 0x00004F33, 0x00000000, 0x0007005F, 0x0000001D, 0x000060ED, 0x00005694, + 0x00004A5B, 0x00000040, 0x00001FE3, 0x00050051, 0x0000000D, 0x00005D7F, + 0x000060ED, 0x00000000, 0x00070050, 0x0000001D, 0x00001957, 0x0000439B, + 0x00003AE8, 0x00005983, 0x00005D7F, 0x0004007C, 0x00000017, 0x00004F8E, + 0x00001957, 0x00060041, 0x00000294, 0x0000323C, 0x000012B6, 0x00000A0B, + 0x000058E9, 0x0003003E, 0x0000323C, 0x00004F8E, 0x00050080, 0x0000000B, + 0x00004F20, 0x000058E9, 0x00000A0D, 0x00050080, 0x00000012, 0x0000217E, + 0x000020F0, 0x00000729, 0x0007005F, 0x0000001D, 0x00002745, 0x00005694, + 0x0000217E, 0x00000040, 0x00005253, 0x00050051, 0x0000000D, 0x00005984, + 0x00002745, 0x00000000, 0x0007005F, 0x0000001D, 0x000019FF, 0x00005694, + 0x0000217E, 0x00000040, 0x00001FE3, 0x00050051, 0x0000000D, 0x00003AE9, + 0x000019FF, 0x00000000, 0x00050080, 0x00000012, 0x00004A5C, 0x000020F0, + 0x00000732, 0x0007005F, 0x0000001D, 0x00004F34, 0x00005694, 0x00004A5C, + 0x00000040, 0x00005253, 0x00050051, 0x0000000D, 0x00005985, 0x00004F34, + 0x00000000, 0x0007005F, 0x0000001D, 0x000060EE, 0x00005694, 0x00004A5C, + 0x00000040, 0x00001FE3, 0x00050051, 0x0000000D, 0x00005D80, 0x000060EE, + 0x00000000, 0x00070050, 0x0000001D, 0x00001958, 0x00005984, 0x00003AE9, + 0x00005985, 0x00005D80, 0x0004007C, 0x00000017, 0x00004F8F, 0x00001958, + 0x00060041, 0x00000294, 0x00003B37, 0x000012B6, 0x00000A0B, 0x00004F20, + 0x0003003E, 0x00003B37, 0x00004F8F, 0x000200F9, 0x00004C7A, 0x000200F8, + 0x00004C7A, 0x000100FD, 0x00010038, +}; diff --git a/src/xenia/gpu/shaders/bytecode/vulkan_spirv/passthrough_position_xy_vs.h b/src/xenia/gpu/shaders/bytecode/vulkan_spirv/passthrough_position_xy_vs.h new file mode 100644 index 000000000..043da3951 --- /dev/null +++ b/src/xenia/gpu/shaders/bytecode/vulkan_spirv/passthrough_position_xy_vs.h @@ -0,0 +1,76 @@ +// Generated with `xb buildshaders`. +#if 0 +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 24988 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %5663 "main" %4930 %5474 + OpMemberDecorate %_struct_2935 0 BuiltIn Position + OpMemberDecorate %_struct_2935 1 BuiltIn PointSize + OpMemberDecorate %_struct_2935 2 BuiltIn ClipDistance + OpMemberDecorate %_struct_2935 3 BuiltIn CullDistance + OpDecorate %_struct_2935 Block + OpDecorate %5474 Location 0 + %void = OpTypeVoid + %1282 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_1 = OpConstant %uint 1 +%_arr_float_uint_1 = OpTypeArray %float %uint_1 +%_struct_2935 = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 +%_ptr_Output__struct_2935 = OpTypePointer Output %_struct_2935 + %4930 = OpVariable %_ptr_Output__struct_2935 Output + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float + %5474 = OpVariable %_ptr_Input_v2float Input + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %5663 = OpFunction %void None %1282 + %24987 = OpLabel + %17674 = OpLoad %v2float %5474 + %21995 = OpCompositeExtract %float %17674 0 + %23384 = OpCompositeExtract %float %17674 1 + %18260 = OpCompositeConstruct %v4float %21995 %23384 %float_0 %float_1 + %12055 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 + OpStore %12055 %18260 + OpReturn + OpFunctionEnd +#endif + +const uint32_t passthrough_position_xy_vs[] = { + 0x07230203, 0x00010000, 0x0008000A, 0x0000619C, 0x00000000, 0x00020011, + 0x00000001, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E, + 0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0007000F, 0x00000000, + 0x0000161F, 0x6E69616D, 0x00000000, 0x00001342, 0x00001562, 0x00050048, + 0x00000B77, 0x00000000, 0x0000000B, 0x00000000, 0x00050048, 0x00000B77, + 0x00000001, 0x0000000B, 0x00000001, 0x00050048, 0x00000B77, 0x00000002, + 0x0000000B, 0x00000003, 0x00050048, 0x00000B77, 0x00000003, 0x0000000B, + 0x00000004, 0x00030047, 0x00000B77, 0x00000002, 0x00040047, 0x00001562, + 0x0000001E, 0x00000000, 0x00020013, 0x00000008, 0x00030021, 0x00000502, + 0x00000008, 0x00030016, 0x0000000D, 0x00000020, 0x00040017, 0x0000001D, + 0x0000000D, 0x00000004, 0x00040015, 0x0000000B, 0x00000020, 0x00000000, + 0x0004002B, 0x0000000B, 0x00000A0D, 0x00000001, 0x0004001C, 0x0000022A, + 0x0000000D, 0x00000A0D, 0x0006001E, 0x00000B77, 0x0000001D, 0x0000000D, + 0x0000022A, 0x0000022A, 0x00040020, 0x00000231, 0x00000003, 0x00000B77, + 0x0004003B, 0x00000231, 0x00001342, 0x00000003, 0x00040015, 0x0000000C, + 0x00000020, 0x00000001, 0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000, + 0x00040017, 0x00000013, 0x0000000D, 0x00000002, 0x00040020, 0x00000290, + 0x00000001, 0x00000013, 0x0004003B, 0x00000290, 0x00001562, 0x00000001, + 0x0004002B, 0x0000000D, 0x00000A0C, 0x00000000, 0x0004002B, 0x0000000D, + 0x0000008A, 0x3F800000, 0x00040020, 0x0000029A, 0x00000003, 0x0000001D, + 0x00050036, 0x00000008, 0x0000161F, 0x00000000, 0x00000502, 0x000200F8, + 0x0000619B, 0x0004003D, 0x00000013, 0x0000450A, 0x00001562, 0x00050051, + 0x0000000D, 0x000055EB, 0x0000450A, 0x00000000, 0x00050051, 0x0000000D, + 0x00005B58, 0x0000450A, 0x00000001, 0x00070050, 0x0000001D, 0x00004754, + 0x000055EB, 0x00005B58, 0x00000A0C, 0x0000008A, 0x00050041, 0x0000029A, + 0x00002F17, 0x00001342, 0x00000A0B, 0x0003003E, 0x00002F17, 0x00004754, + 0x000100FD, 0x00010038, +}; diff --git a/src/xenia/gpu/shaders/edram.hlsli b/src/xenia/gpu/shaders/edram.hlsli deleted file mode 100644 index 51415d501..000000000 --- a/src/xenia/gpu/shaders/edram.hlsli +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef XENIA_GPU_D3D12_SHADERS_EDRAM_HLSLI_ -#define XENIA_GPU_D3D12_SHADERS_EDRAM_HLSLI_ - -#include "pixel_formats.hlsli" - -#define kXenosMsaaSamples_1X 0u -#define kXenosMsaaSamples_2X 1u -#define kXenosMsaaSamples_4X 2u - -uint XeEdramOffsetInts(uint2 pixel_index, uint base_tiles, uint pitch_tiles, - uint msaa_samples, bool is_depth, uint format_ints_log2, - uint pixel_sample_index, uint2 resolution_scale) { - uint2 rt_sample_index = - pixel_index << - uint2(msaa_samples >= uint2(kXenosMsaaSamples_4X, kXenosMsaaSamples_2X)); - rt_sample_index += (pixel_sample_index >> uint2(1u, 0u)) & 1u; - // For now, while the actual storage of 64bpp render targets in comparison to - // 32bpp is not known, storing 40x16 64bpp samples per tile for simplicity of - // addressing in different scenarios. - uint2 tile_size_at_32bpp = uint2(80u, 16u) * resolution_scale; - uint2 tile_size_samples = tile_size_at_32bpp >> uint2(format_ints_log2, 0u); - uint2 tile_offset_xy = rt_sample_index / tile_size_samples; - base_tiles += tile_offset_xy.y * pitch_tiles + tile_offset_xy.x; - rt_sample_index -= tile_offset_xy * tile_size_samples; - if (is_depth) { - uint tile_width_half = tile_size_samples.x >> 1u; - rt_sample_index.x = - uint(int(rt_sample_index.x) + - ((rt_sample_index.x >= tile_width_half) ? -int(tile_width_half) - : int(tile_width_half))); - } - return base_tiles * (tile_size_at_32bpp.x * tile_size_at_32bpp.y) + - ((rt_sample_index.y * tile_size_samples.x + rt_sample_index.x) << - format_ints_log2); -} - -#endif // XENIA_GPU_D3D12_SHADERS_EDRAM_HLSLI_ diff --git a/src/xenia/gpu/shaders/edram.xesli b/src/xenia/gpu/shaders/edram.xesli new file mode 100644 index 000000000..4fd61eefd --- /dev/null +++ b/src/xenia/gpu/shaders/edram.xesli @@ -0,0 +1,50 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SHADERS_EDRAM_XESLI_ +#define XENIA_GPU_SHADERS_EDRAM_XESLI_ + +#include "pixel_formats.xesli" + +#define kXenosMsaaSamples_1X 0u +#define kXenosMsaaSamples_2X 1u +#define kXenosMsaaSamples_4X 2u + +uint XeEdramOffsetInts(xesl_uint2 pixel_index, uint base_tiles, + uint pitch_tiles, uint msaa_samples, bool is_depth, + uint format_ints_log2, uint pixel_sample_index, + xesl_uint2 resolution_scale) { + xesl_uint2 rt_sample_index = + pixel_index << + xesl_uint2(xesl_greaterThanEqual( + msaa_samples.xx, + xesl_uint2(kXenosMsaaSamples_4X, kXenosMsaaSamples_2X))); + rt_sample_index += (pixel_sample_index.xx >> xesl_uint2(1u, 0u)) & 1u; + // For now, while the actual storage of 64bpp render targets in comparison to + // 32bpp is not known, storing 40x16 64bpp samples per tile for simplicity of + // addressing in different scenarios. + xesl_uint2 tile_size_at_32bpp = xesl_uint2(80u, 16u) * resolution_scale; + xesl_uint2 tile_size_samples = + tile_size_at_32bpp >> xesl_uint2(format_ints_log2, 0u); + xesl_uint2 tile_offset_xy = rt_sample_index / tile_size_samples; + base_tiles += tile_offset_xy.y * pitch_tiles + tile_offset_xy.x; + rt_sample_index -= tile_offset_xy * tile_size_samples; + if (is_depth) { + uint tile_width_half = tile_size_samples.x >> 1u; + rt_sample_index.x = + uint(int(rt_sample_index.x) + + ((rt_sample_index.x >= tile_width_half) ? -int(tile_width_half) + : int(tile_width_half))); + } + return base_tiles * (tile_size_at_32bpp.x * tile_size_at_32bpp.y) + + ((rt_sample_index.y * tile_size_samples.x + rt_sample_index.x) << + format_ints_log2); +} + +#endif // XENIA_GPU_SHADERS_EDRAM_XESLI_ diff --git a/src/xenia/gpu/shaders/float24_round.ps.hlsl b/src/xenia/gpu/shaders/float24_round.ps.hlsl index 10840173c..87f0ae56a 100644 --- a/src/xenia/gpu/shaders/float24_round.ps.hlsl +++ b/src/xenia/gpu/shaders/float24_round.ps.hlsl @@ -1,4 +1,4 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #include "xenos_draw.hlsli" struct XePSInput { diff --git a/src/xenia/gpu/shaders/float24_truncate.ps.hlsl b/src/xenia/gpu/shaders/float24_truncate.ps.hlsl index 29d4e6761..110215e51 100644 --- a/src/xenia/gpu/shaders/float24_truncate.ps.hlsl +++ b/src/xenia/gpu/shaders/float24_truncate.ps.hlsl @@ -1,4 +1,4 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #include "xenos_draw.hlsli" struct XePSInput { diff --git a/src/xenia/gpu/shaders/host_depth_store.hlsli b/src/xenia/gpu/shaders/host_depth_store.hlsli deleted file mode 100644 index b722ee22a..000000000 --- a/src/xenia/gpu/shaders/host_depth_store.hlsli +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef XENIA_GPU_D3D12_SHADERS_HOST_DEPTH_STORE_HLSLI_ -#define XENIA_GPU_D3D12_SHADERS_HOST_DEPTH_STORE_HLSLI_ - -cbuffer XeHostDepthStoreRectangleConstants : register(b0) { - uint xe_host_depth_store_rectangle; -}; - -cbuffer XeHostDepthStoreRenderTargetConstants : register(b1) { - uint xe_host_depth_store_render_target; -}; -RWBuffer xe_host_depth_store_dest : register(u0); - -uint2 XeHostDepthStoreResolutionScale() { - return (xe_host_depth_store_render_target.xx >> uint2(10u, 12u)) & 0x3u; -} - -uint2 XeHostDepthStoreUnscaledOrigin() { - return ((xe_host_depth_store_rectangle.xx >> uint2(0u, 10u)) & 0x3FFu) << 3u; -} - -uint2 XeHostDepthStoreScaledOrigin() { - return XeHostDepthStoreUnscaledOrigin() * XeHostDepthStoreResolutionScale(); -} - -uint XeHostDepthStoreUnscaledWidthDiv8Minus1() { - return (xe_host_depth_store_rectangle >> 20u) & 0x3FFu; -} - -uint XeHostDepthStoreScaledWidthDiv8() { - return (XeHostDepthStoreUnscaledWidthDiv8Minus1() + 1u) * - XeHostDepthStoreResolutionScale().x; -} - -// As host depth is needed for at most one transfer destination per update, base -// is not passed to the shader - (0, 0) of the render target is at 0 of the -// destination buffer. - -uint XeHostDepthStorePitchTiles() { - return xe_host_depth_store_render_target & 0x3FFu; -} - -bool XeHostDepthStoreMsaa2xSupported() { - return bool((xe_host_depth_store_render_target >> 14u) & 0x1u); -} - -// 40-sample columns are not swapped for addressing simplicity (because this is -// used for depth -> depth transfers, where swapping isn't needed). - -#endif // XENIA_GPU_D3D12_SHADERS_HOST_DEPTH_STORE_HLSLI_ \ No newline at end of file diff --git a/src/xenia/gpu/shaders/host_depth_store.xesli b/src/xenia/gpu/shaders/host_depth_store.xesli new file mode 100644 index 000000000..366cdebeb --- /dev/null +++ b/src/xenia/gpu/shaders/host_depth_store.xesli @@ -0,0 +1,61 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SHADERS_HOST_DEPTH_STORE_XESLI_ +#define XENIA_GPU_SHADERS_HOST_DEPTH_STORE_XESLI_ + +#include "../../ui/shaders/xesl.xesli" + +xesl_push_constants_begin(b0, space0) + uint xe_host_depth_store_rectangle; + uint xe_host_depth_store_render_target; +xesl_push_constants_end + +xesl_uint2 XeHostDepthStoreResolutionScale() { + uint rt_constant = xesl_push_constant(xe_host_depth_store_render_target); + return (rt_constant.xx >> xesl_uint2(10u, 12u)) & 0x3u; +} + +xesl_uint2 XeHostDepthStoreUnscaledOrigin() { + uint rectangle_constant = xesl_push_constant(xe_host_depth_store_rectangle); + return ((rectangle_constant.xx >> xesl_uint2(0u, 10u)) & 0x3FFu) << 3u; +} + +xesl_uint2 XeHostDepthStoreScaledOrigin() { + return XeHostDepthStoreUnscaledOrigin() * XeHostDepthStoreResolutionScale(); +} + +uint XeHostDepthStoreUnscaledWidthDiv8Minus1() { + uint rectangle_constant = xesl_push_constant(xe_host_depth_store_rectangle); + return (rectangle_constant >> 20u) & 0x3FFu; +} + +uint XeHostDepthStoreScaledWidthDiv8() { + return (XeHostDepthStoreUnscaledWidthDiv8Minus1() + 1u) * + XeHostDepthStoreResolutionScale().x; +} + +// As host depth is needed for at most one transfer destination per update, base +// is not passed to the shader - (0, 0) of the render target is at 0 of the +// destination buffer. + +uint XeHostDepthStorePitchTiles() { + uint rt_constant = xesl_push_constant(xe_host_depth_store_render_target); + return rt_constant & 0x3FFu; +} + +bool XeHostDepthStoreMsaa2xSupported() { + uint rt_constant = xesl_push_constant(xe_host_depth_store_render_target); + return bool((rt_constant >> 14u) & 0x1u); +} + +// 40-sample columns are not swapped for addressing simplicity (because this is +// used for depth -> depth transfers, where swapping isn't needed). + +#endif // XENIA_GPU_SHADERS_HOST_DEPTH_STORE_XESLI_ diff --git a/src/xenia/gpu/shaders/host_depth_store_1xmsaa.cs.hlsl b/src/xenia/gpu/shaders/host_depth_store_1xmsaa.cs.hlsl deleted file mode 100644 index 95b99dba9..000000000 --- a/src/xenia/gpu/shaders/host_depth_store_1xmsaa.cs.hlsl +++ /dev/null @@ -1,32 +0,0 @@ -#include "edram.hlsli" -#include "host_depth_store.hlsli" - -Texture2D xe_host_depth_store_source : register(t0); - -[numthreads(8, 8, 1)] -void main(uint3 xe_thread_id : SV_DispatchThreadID) { - // 1 thread = 8 samples (same as resolve granularity). - uint2 resolution_scale = XeHostDepthStoreResolutionScale(); - // Group height can't cross resolve granularity, Y overflow check not needed. - [branch] if (xe_thread_id.x >= XeHostDepthStoreScaledWidthDiv8()) { - return; - } - uint2 pixel_index = - XeHostDepthStoreScaledOrigin() + (xe_thread_id.xy << uint2(3u, 0u)); - uint edram_address_int4s = - XeEdramOffsetInts(pixel_index, 0u, XeHostDepthStorePitchTiles(), - kXenosMsaaSamples_1X, false, 0u, 0u, - XeHostDepthStoreResolutionScale()) - >> 2u; - int3 source_pixel_index = int3(pixel_index, 0); - xe_host_depth_store_dest[edram_address_int4s] = asuint(float4( - xe_host_depth_store_source.Load(source_pixel_index), - xe_host_depth_store_source.Load(source_pixel_index + int3(1, 0, 0)), - xe_host_depth_store_source.Load(source_pixel_index + int3(2, 0, 0)), - xe_host_depth_store_source.Load(source_pixel_index + int3(3, 0, 0)))); - xe_host_depth_store_dest[edram_address_int4s + 1u] = asuint(float4( - xe_host_depth_store_source.Load(source_pixel_index + int3(4, 0, 0)), - xe_host_depth_store_source.Load(source_pixel_index + int3(5, 0, 0)), - xe_host_depth_store_source.Load(source_pixel_index + int3(6, 0, 0)), - xe_host_depth_store_source.Load(source_pixel_index + int3(7, 0, 0)))); -} diff --git a/src/xenia/gpu/shaders/host_depth_store_1xmsaa.cs.xesl b/src/xenia/gpu/shaders/host_depth_store_1xmsaa.cs.xesl new file mode 100644 index 000000000..d0310cb6a --- /dev/null +++ b/src/xenia/gpu/shaders/host_depth_store_1xmsaa.cs.xesl @@ -0,0 +1,59 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "../../ui/shaders/xesl.xesli" + +#include "edram.xesli" +#include "host_depth_store.xesli" + +xesl_entry + xesl_writeTypedStorageBuffer(xesl_uint4, xe_host_depth_store_dest, set=0, + binding=0, u0, space0) + xesl_entry_binding_next + xesl_texture(xesl_texture2D, xe_host_depth_store_source, set=1, binding=0, t0, + space0) +xesl_entry_bindings_end_local_size(8, 8, 1) + xesl_input_global_invocation_id +xesl_entry_signature_end + // 1 thread = 8 samples (same as resolve granularity). + // Group height can't cross resolve granularity, Y overflow check not needed. + xesl_dont_flatten + if (xesl_GlobalInvocationID.x >= XeHostDepthStoreScaledWidthDiv8()) { + return; + } + xesl_int2 pixel_index = + xesl_int2(XeHostDepthStoreScaledOrigin() + + (xesl_GlobalInvocationID.xy << xesl_uint2(3u, 0u))); + uint edram_address_int4s = + XeEdramOffsetInts(xesl_uint2(pixel_index), 0u, + XeHostDepthStorePitchTiles(), kXenosMsaaSamples_1X, + false, 0u, 0u, XeHostDepthStoreResolutionScale()) + >> 2u; + xesl_writeTypedStorageBufferStore( + xe_host_depth_store_dest, edram_address_int4s, + xesl_floatBitsToUint(xesl_float4( + xesl_texelFetch2D(xe_host_depth_store_source, pixel_index, 0).r, + xesl_texelFetch2D(xe_host_depth_store_source, + pixel_index + xesl_int2(1, 0), 0).r, + xesl_texelFetch2D(xe_host_depth_store_source, + pixel_index + xesl_int2(2, 0), 0).r, + xesl_texelFetch2D(xe_host_depth_store_source, + pixel_index + xesl_int2(3, 0), 0).r))); + xesl_writeTypedStorageBufferStore( + xe_host_depth_store_dest, edram_address_int4s + 1u, + xesl_floatBitsToUint(xesl_float4( + xesl_texelFetch2D(xe_host_depth_store_source, + pixel_index + xesl_int2(4, 0), 0).r, + xesl_texelFetch2D(xe_host_depth_store_source, + pixel_index + xesl_int2(5, 0), 0).r, + xesl_texelFetch2D(xe_host_depth_store_source, + pixel_index + xesl_int2(6, 0), 0).r, + xesl_texelFetch2D(xe_host_depth_store_source, + pixel_index + xesl_int2(7, 0), 0).r))); +xesl_entry_end diff --git a/src/xenia/gpu/shaders/host_depth_store_2xmsaa.cs.hlsl b/src/xenia/gpu/shaders/host_depth_store_2xmsaa.cs.hlsl deleted file mode 100644 index fb71781a2..000000000 --- a/src/xenia/gpu/shaders/host_depth_store_2xmsaa.cs.hlsl +++ /dev/null @@ -1,43 +0,0 @@ -#include "edram.hlsli" -#include "host_depth_store.hlsli" - -Texture2DMS xe_host_depth_store_source : register(t0); - -[numthreads(8, 8, 1)] -void main(uint3 xe_thread_id : SV_DispatchThreadID) { - // 1 thread = 8 samples (8x0.5 pixels, resolve granularity is 8 pixels). - // Group height can't cross resolve granularity, Y overflow check not needed. - [branch] if (xe_thread_id.x >= XeHostDepthStoreScaledWidthDiv8()) { - return; - } - uint2 pixel_index = XeHostDepthStoreScaledOrigin() + - uint2(xe_thread_id.x << 3u, xe_thread_id.y >> 1u); - uint dest_sample_index = xe_thread_id.y & 1u; - uint edram_address_int4s = - XeEdramOffsetInts(pixel_index, 0u, XeHostDepthStorePitchTiles(), - kXenosMsaaSamples_2X, false, 0u, dest_sample_index, - XeHostDepthStoreResolutionScale()) - >> 2u; - // Top and bottom to Direct3D 10.1+ top 1 and bottom 0 (for 2x) or top-left 0 - // and bottom-right 3 (for 4x). - int source_sample_index = - XeHostDepthStoreMsaa2xSupported() ? (dest_sample_index ? 0u : 1u) - : (dest_sample_index ? 3u : 0u); - xe_host_depth_store_dest[edram_address_int4s] = asuint(float4( - xe_host_depth_store_source.Load(int2(pixel_index), source_sample_index), - xe_host_depth_store_source.Load(int2(pixel_index) + int2(1, 0), - source_sample_index), - xe_host_depth_store_source.Load(int2(pixel_index) + int2(2, 0), - source_sample_index), - xe_host_depth_store_source.Load(int2(pixel_index) + int2(3, 0), - source_sample_index))); - xe_host_depth_store_dest[edram_address_int4s + 1u] = asuint(float4( - xe_host_depth_store_source.Load(int2(pixel_index) + int2(4, 0), - source_sample_index), - xe_host_depth_store_source.Load(int2(pixel_index) + int2(5, 0), - source_sample_index), - xe_host_depth_store_source.Load(int2(pixel_index) + int2(6, 0), - source_sample_index), - xe_host_depth_store_source.Load(int2(pixel_index) + int2(7, 0), - source_sample_index))); -} diff --git a/src/xenia/gpu/shaders/host_depth_store_2xmsaa.cs.xesl b/src/xenia/gpu/shaders/host_depth_store_2xmsaa.cs.xesl new file mode 100644 index 000000000..8210a6de3 --- /dev/null +++ b/src/xenia/gpu/shaders/host_depth_store_2xmsaa.cs.xesl @@ -0,0 +1,75 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "../../ui/shaders/xesl.xesli" + +#include "edram.xesli" +#include "host_depth_store.xesli" + +xesl_entry + xesl_writeTypedStorageBuffer(xesl_uint4, xe_host_depth_store_dest, set=0, + binding=0, u0, space0) + xesl_entry_binding_next + xesl_texture(xesl_texture2DMS, xe_host_depth_store_source, set=1, binding=0, + t0, space0) +xesl_entry_bindings_end_local_size(8, 8, 1) + xesl_input_global_invocation_id +xesl_entry_signature_end + // 1 thread = 8 samples (8x0.5 pixels, resolve granularity is 8 pixels). + // Group height can't cross resolve granularity, Y overflow check not needed. + xesl_dont_flatten + if (xesl_GlobalInvocationID.x >= XeHostDepthStoreScaledWidthDiv8()) { + return; + } + xesl_int2 pixel_index = + xesl_int2(XeHostDepthStoreScaledOrigin() + + xesl_uint2(xesl_GlobalInvocationID.x << 3u, + xesl_GlobalInvocationID.y >> 1u)); + uint dest_sample_index = xesl_GlobalInvocationID.y & 1u; + uint edram_address_int4s = + XeEdramOffsetInts(xesl_uint2(pixel_index), 0u, + XeHostDepthStorePitchTiles(), kXenosMsaaSamples_2X, + false, 0u, dest_sample_index, + XeHostDepthStoreResolutionScale()) + >> 2u; + // Top and bottom to Direct3D 10.1+ and Vulkan top 1 and bottom 0 (for 2x) or + // top-left 0 and bottom-right 3 (for 4x). + int source_sample_index = + XeHostDepthStoreMsaa2xSupported() ? (bool(dest_sample_index) ? 0 : 1) + : (bool(dest_sample_index) ? 3 : 0); + xesl_writeTypedStorageBufferStore( + xe_host_depth_store_dest, edram_address_int4s, + xesl_floatBitsToUint(xesl_float4( + xesl_texelFetch2DMS(xe_host_depth_store_source, pixel_index, + source_sample_index).r, + xesl_texelFetch2DMS(xe_host_depth_store_source, + pixel_index + xesl_int2(1, 0), + source_sample_index).r, + xesl_texelFetch2DMS(xe_host_depth_store_source, + pixel_index + xesl_int2(2, 0), + source_sample_index).r, + xesl_texelFetch2DMS(xe_host_depth_store_source, + pixel_index + xesl_int2(3, 0), + source_sample_index).r))); + xesl_writeTypedStorageBufferStore( + xe_host_depth_store_dest, edram_address_int4s + 1u, + xesl_floatBitsToUint(xesl_float4( + xesl_texelFetch2DMS(xe_host_depth_store_source, + pixel_index + xesl_int2(4, 0), + source_sample_index).r, + xesl_texelFetch2DMS(xe_host_depth_store_source, + pixel_index + xesl_int2(5, 0), + source_sample_index).r, + xesl_texelFetch2DMS(xe_host_depth_store_source, + pixel_index + xesl_int2(6, 0), + source_sample_index).r, + xesl_texelFetch2DMS(xe_host_depth_store_source, + pixel_index + xesl_int2(7, 0), + source_sample_index).r))); +xesl_entry_end diff --git a/src/xenia/gpu/shaders/host_depth_store_4xmsaa.cs.hlsl b/src/xenia/gpu/shaders/host_depth_store_4xmsaa.cs.hlsl deleted file mode 100644 index ce8e94568..000000000 --- a/src/xenia/gpu/shaders/host_depth_store_4xmsaa.cs.hlsl +++ /dev/null @@ -1,40 +0,0 @@ -#include "edram.hlsli" -#include "host_depth_store.hlsli" - -Texture2DMS xe_host_depth_store_source : register(t0); - -[numthreads(8, 8, 1)] -void main(uint3 xe_thread_id : SV_DispatchThreadID) { - // 1 thread = 8 samples (4x0.5 pixels, resolve granularity is 8 pixels). - // Group height can't cross resolve granularity, Y overflow check not needed. - [branch] if ((xe_thread_id.x >> 1u) >= XeHostDepthStoreScaledWidthDiv8()) { - return; - } - uint2 pixel_index = XeHostDepthStoreScaledOrigin() + - uint2(xe_thread_id.x << 2u, xe_thread_id.y >> 1u); - // For simplicity, passing samples directly, not pixels, to XeEdramOffsetInts. - uint edram_address_int4s = - XeEdramOffsetInts((pixel_index << 1u) | (xe_thread_id.xy & 1u), 0u, - XeHostDepthStorePitchTiles(), kXenosMsaaSamples_1X, - false, 0u, 0u, XeHostDepthStoreResolutionScale()) - >> 2u; - // Render target horizontal sample in bit 0, vertical sample in bit 1. - int source_sample_left = int((xe_thread_id.y & 1u) << 1u); - int source_sample_right = source_sample_left + 1; - xe_host_depth_store_dest[edram_address_int4s] = asuint(float4( - xe_host_depth_store_source.Load(int2(pixel_index), source_sample_left), - xe_host_depth_store_source.Load(int2(pixel_index), source_sample_right), - xe_host_depth_store_source.Load(int2(pixel_index) + int2(1, 0), - source_sample_left), - xe_host_depth_store_source.Load(int2(pixel_index) + int2(1, 0), - source_sample_right))); - xe_host_depth_store_dest[edram_address_int4s + 1u] = asuint(float4( - xe_host_depth_store_source.Load(int2(pixel_index) + int2(2, 0), - source_sample_left), - xe_host_depth_store_source.Load(int2(pixel_index) + int2(2, 0), - source_sample_right), - xe_host_depth_store_source.Load(int2(pixel_index) + int2(3, 0), - source_sample_left), - xe_host_depth_store_source.Load(int2(pixel_index) + int2(3, 0), - source_sample_right))); -} diff --git a/src/xenia/gpu/shaders/host_depth_store_4xmsaa.cs.xesl b/src/xenia/gpu/shaders/host_depth_store_4xmsaa.cs.xesl new file mode 100644 index 000000000..05fac4584 --- /dev/null +++ b/src/xenia/gpu/shaders/host_depth_store_4xmsaa.cs.xesl @@ -0,0 +1,72 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "../../ui/shaders/xesl.xesli" + +#include "edram.xesli" +#include "host_depth_store.xesli" + +xesl_entry + xesl_writeTypedStorageBuffer(xesl_uint4, xe_host_depth_store_dest, set=0, + binding=0, u0, space0) + xesl_entry_binding_next + xesl_texture(xesl_texture2DMS, xe_host_depth_store_source, set=1, binding=0, + t0, space0) +xesl_entry_bindings_end_local_size(8, 8, 1) + xesl_input_global_invocation_id +xesl_entry_signature_end + // 1 thread = 8 samples (4x0.5 pixels, resolve granularity is 8 pixels). + // Group height can't cross resolve granularity, Y overflow check not needed. + xesl_dont_flatten + if ((xesl_GlobalInvocationID.x >> 1u) >= XeHostDepthStoreScaledWidthDiv8()) { + return; + } + xesl_int2 pixel_index = + xesl_int2(XeHostDepthStoreScaledOrigin() + + xesl_uint2(xesl_GlobalInvocationID.x << 2u, + xesl_GlobalInvocationID.y >> 1u)); + // For simplicity, passing samples directly, not pixels, to XeEdramOffsetInts. + uint edram_address_int4s = + XeEdramOffsetInts( + (xesl_uint2(pixel_index) << 1u) | (xesl_GlobalInvocationID.xy & 1u), + 0u, XeHostDepthStorePitchTiles(), kXenosMsaaSamples_1X, false, 0u, 0u, + XeHostDepthStoreResolutionScale()) + >> 2u; + // Render target horizontal sample in bit 0, vertical sample in bit 1. + int source_sample_left = int((xesl_GlobalInvocationID.y & 1u) << 1u); + int source_sample_right = source_sample_left + 1; + xesl_writeTypedStorageBufferStore( + xe_host_depth_store_dest, edram_address_int4s, + xesl_floatBitsToUint(xesl_float4( + xesl_texelFetch2DMS(xe_host_depth_store_source, pixel_index, + source_sample_left).r, + xesl_texelFetch2DMS(xe_host_depth_store_source, pixel_index, + source_sample_right).r, + xesl_texelFetch2DMS(xe_host_depth_store_source, + pixel_index + xesl_int2(1, 0), + source_sample_left).r, + xesl_texelFetch2DMS(xe_host_depth_store_source, + pixel_index + xesl_int2(1, 0), + source_sample_right).r))); + xesl_writeTypedStorageBufferStore( + xe_host_depth_store_dest, edram_address_int4s + 1u, + xesl_floatBitsToUint(xesl_float4( + xesl_texelFetch2DMS(xe_host_depth_store_source, + pixel_index + xesl_int2(2, 0), + source_sample_left).r, + xesl_texelFetch2DMS(xe_host_depth_store_source, + pixel_index + xesl_int2(2, 0), + source_sample_right).r, + xesl_texelFetch2DMS(xe_host_depth_store_source, + pixel_index + xesl_int2(3, 0), + source_sample_left).r, + xesl_texelFetch2DMS(xe_host_depth_store_source, + pixel_index + xesl_int2(3, 0), + source_sample_right).r))); +xesl_entry_end diff --git a/src/xenia/gpu/shaders/passthrough_position_xy.vs.hlsl b/src/xenia/gpu/shaders/passthrough_position_xy.vs.hlsl deleted file mode 100644 index 97f4f0e1d..000000000 --- a/src/xenia/gpu/shaders/passthrough_position_xy.vs.hlsl +++ /dev/null @@ -1,3 +0,0 @@ -float4 main(float2 position : POSITION) : SV_Position { - return float4(position, 0.0, 1.0); -} diff --git a/src/xenia/gpu/shaders/passthrough_position_xy.vs.xesl b/src/xenia/gpu/shaders/passthrough_position_xy.vs.xesl new file mode 100644 index 000000000..292e64107 --- /dev/null +++ b/src/xenia/gpu/shaders/passthrough_position_xy.vs.xesl @@ -0,0 +1,19 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "../../ui/shaders/xesl.xesli" + +xesl_entry +xesl_entry_bindings_end + xesl_input(xesl_float2, xe_in_position, 0, POSITION) + xesl_entry_signature_next + xesl_output_position +xesl_entry_signature_end + xesl_Position = xesl_float4(xe_in_position, 0.0, 1.0); +xesl_entry_end diff --git a/src/xenia/gpu/shaders/pixel_formats.hlsli b/src/xenia/gpu/shaders/pixel_formats.xesli similarity index 55% rename from src/xenia/gpu/shaders/pixel_formats.hlsli rename to src/xenia/gpu/shaders/pixel_formats.xesli index 473a0620d..e2d350a8e 100644 --- a/src/xenia/gpu/shaders/pixel_formats.hlsli +++ b/src/xenia/gpu/shaders/pixel_formats.xesli @@ -1,5 +1,16 @@ -#ifndef XENIA_GPU_D3D12_SHADERS_PIXEL_FORMATS_HLSLI_ -#define XENIA_GPU_D3D12_SHADERS_PIXEL_FORMATS_HLSLI_ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SHADERS_PIXEL_FORMATS_XESLI_ +#define XENIA_GPU_SHADERS_PIXEL_FORMATS_XESLI_ + +#include "../../ui/shaders/xesl.xesli" #define kXenosFormat_1_REVERSE 0u #define kXenosFormat_1 1u @@ -84,212 +95,66 @@ // ColorFormat packing, according to the Direct3D 11.3 functional specification. -uint XePackR8UNorm(float f) { - return uint(saturate(f) * 255.0f + 0.5f); -} - -uint XePackR5G5B5A1UNorm(float4 f) { - uint4 n = uint4(saturate(f) * float2(31.0f, 1.0f).xxxy + 0.5f); +uint XePackR5G5B5A1UNorm(xesl_float4 f) { + xesl_uint4 n = + xesl_uint4(xesl_saturate(f) * xesl_float2(31.0, 1.0).xxxy + 0.5); return n.r | (n.g << 5) | (n.b << 10) | (n.a << 15); } -uint XePackR5G6B5UNorm(float3 f) { - uint3 n = uint3(saturate(f) * float3(31.0f, 63.0f, 31.0f) + 0.5f); +uint XePackR5G6B5UNorm(xesl_float3 f) { + xesl_uint3 n = + xesl_uint3(xesl_saturate(f) * xesl_float3(31.0, 63.0, 31.0) + 0.5); return n.r | (n.g << 5) | (n.b << 11); } -uint XePackR5G5B6UNorm(float3 f) { - uint3 n = uint3(saturate(f) * float3(31.0f, 31.0f, 63.0f) + 0.5f); +uint XePackR5G5B6UNorm(xesl_float3 f) { + xesl_uint3 n = + xesl_uint3(xesl_saturate(f) * xesl_float3(31.0, 31.0, 63.0) + 0.5); return n.r | (n.g << 5) | (n.b << 10); } -uint XePackR8G8UNorm(float2 f) { - uint2 n = uint2(saturate(f) * 255.0f + 0.5f); - return n.r | (n.g << 8); -} - -uint XePackR8G8B8UNorm(float3 f) { - uint3 n = uint3(saturate(f) * 255.0f + 0.5f); - return n.r | (n.g << 8) | (n.b << 16); -} - -uint XePackR8G8B8A8UNorm(float4 f) { - uint4 n = uint4(saturate(f) * 255.0f + 0.5f); +uint XePackR8G8B8A8UNorm(xesl_float4 f) { + xesl_uint4 n = xesl_uint4(xesl_saturate(f) * 255.0 + 0.5); return n.r | (n.g << 8) | (n.b << 16) | (n.a << 24); } -uint XePackR10G10B10A2UNorm(float4 f) { - uint4 n = uint4(saturate(f) * float2(1023.0f, 3.0f).xxxy + 0.5f); +uint XePackR10G10B10A2UNorm(xesl_float4 f) { + xesl_uint4 n = + xesl_uint4(xesl_saturate(f) * xesl_float2(1023.0, 3.0).xxxy + 0.5); return n.r | (n.g << 10) | (n.b << 20) | (n.a << 30); } -uint XePackR4G4B4A4UNorm(float4 f) { - uint4 n = uint4(saturate(f) * 15.0f + 0.5f); +uint XePackR4G4B4A4UNorm(xesl_float4 f) { + xesl_uint4 n = xesl_uint4(xesl_saturate(f) * 15.0 + 0.5); return n.r | (n.g << 4) | (n.b << 8) | (n.a << 12); } -uint XePackR11G11B10UNorm(float3 f) { - uint3 n = uint3(saturate(f) * float3(2047.0f, 2047.0f, 1023.0f) + 0.5f); +uint XePackR11G11B10UNorm(xesl_float3 f) { + xesl_uint3 n = + xesl_uint3(xesl_saturate(f) * xesl_float3(2047.0, 2047.0, 1023.0) + 0.5); return n.r | (n.g << 11) | (n.b << 22); } -uint XePackR10G11B11UNorm(float3 f) { - uint3 n = uint3(saturate(f) * float3(1023.0f, 2047.0f, 2047.0f) + 0.5f); +uint XePackR10G11B11UNorm(xesl_float3 f) { + xesl_uint3 n = + xesl_uint3(xesl_saturate(f) * xesl_float3(1023.0, 2047.0, 2047.0) + 0.5); return n.r | (n.g << 10) | (n.b << 21); } -uint XePackR16UNorm(float f) { - return uint(saturate(f) * 65535.0f + 0.5f); -} - -uint XePackR16G16UNorm(float2 f) { - uint2 n = uint2(saturate(f) * 65535.0f + 0.5f); +uint XePackR16G16UNorm(xesl_float2 f) { + xesl_uint2 n = xesl_uint2(xesl_saturate(f) * 65535.0 + 0.5); return n.r | (n.g << 16); } -uint2 XePackR16G16B16A16UNorm(float4 f) { - uint4 n = uint4(saturate(f) * 65535.0f + 0.5f); +xesl_uint2 XePackR16G16B16A16UNorm(xesl_float4 f) { + xesl_uint4 n = xesl_uint4(xesl_saturate(f) * 65535.0 + 0.5); return n.rb | (n.ga << 16); } -uint4 XePack16bpp4PixelsInUInt4(float4 pixel_0, float4 pixel_1, float4 pixel_2, - float4 pixel_3, uint format) { - uint4 packed; - switch (format) { - case kXenosFormat_1_5_5_5: - packed.x = XePackR5G5B5A1UNorm(pixel_0); - packed.y = XePackR5G5B5A1UNorm(pixel_1); - packed.z = XePackR5G5B5A1UNorm(pixel_2); - packed.w = XePackR5G5B5A1UNorm(pixel_3); - break; - case kXenosFormat_5_6_5: - packed.x = XePackR5G6B5UNorm(pixel_0.rgb); - packed.y = XePackR5G6B5UNorm(pixel_1.rgb); - packed.z = XePackR5G6B5UNorm(pixel_2.rgb); - packed.w = XePackR5G6B5UNorm(pixel_3.rgb); - break; - case kXenosFormat_6_5_5: - packed.x = XePackR5G5B6UNorm(pixel_0.rgb); - packed.y = XePackR5G5B6UNorm(pixel_1.rgb); - packed.z = XePackR5G5B6UNorm(pixel_2.rgb); - packed.w = XePackR5G5B6UNorm(pixel_3.rgb); - break; - case kXenosFormat_8_8: - packed.x = XePackR8G8UNorm(pixel_0.rg); - packed.y = XePackR8G8UNorm(pixel_1.rg); - packed.z = XePackR8G8UNorm(pixel_2.rg); - packed.w = XePackR8G8UNorm(pixel_3.rg); - break; - case kXenosFormat_4_4_4_4: - packed.x = XePackR4G4B4A4UNorm(pixel_0); - packed.y = XePackR4G4B4A4UNorm(pixel_1); - packed.z = XePackR4G4B4A4UNorm(pixel_2); - packed.w = XePackR4G4B4A4UNorm(pixel_3); - break; - case kXenosFormat_16: - packed.x = XePackR16UNorm(pixel_0.r); - packed.y = XePackR16UNorm(pixel_1.r); - packed.z = XePackR16UNorm(pixel_2.r); - packed.w = XePackR16UNorm(pixel_3.r); - break; - default: - // Treat as something (16_FLOAT). - packed = f32tof16(float4(pixel_0.r, pixel_1.r, pixel_2.r, pixel_3.r)); - break; - } - return packed; -} - -void XePack16bpp5PixelsInUInt41(float4 pixel_0, float4 pixel_1, float4 pixel_2, - float4 pixel_3, float4 pixel_4, uint format, - out uint4 packed, out uint packed_4) { - switch (format) { - case kXenosFormat_1_5_5_5: - packed.x = XePackR5G5B5A1UNorm(pixel_0); - packed.y = XePackR5G5B5A1UNorm(pixel_1); - packed.z = XePackR5G5B5A1UNorm(pixel_2); - packed.w = XePackR5G5B5A1UNorm(pixel_3); - packed_4 = XePackR5G5B5A1UNorm(pixel_4); - break; - case kXenosFormat_5_6_5: - packed.x = XePackR5G6B5UNorm(pixel_0.rgb); - packed.y = XePackR5G6B5UNorm(pixel_1.rgb); - packed.z = XePackR5G6B5UNorm(pixel_2.rgb); - packed.w = XePackR5G6B5UNorm(pixel_3.rgb); - packed_4 = XePackR5G6B5UNorm(pixel_4.rgb); - break; - case kXenosFormat_6_5_5: - packed.x = XePackR5G5B6UNorm(pixel_0.rgb); - packed.y = XePackR5G5B6UNorm(pixel_1.rgb); - packed.z = XePackR5G5B6UNorm(pixel_2.rgb); - packed.w = XePackR5G5B6UNorm(pixel_3.rgb); - packed_4 = XePackR5G5B6UNorm(pixel_4.rgb); - break; - case kXenosFormat_8_8: - packed.x = XePackR8G8UNorm(pixel_0.rg); - packed.y = XePackR8G8UNorm(pixel_1.rg); - packed.z = XePackR8G8UNorm(pixel_2.rg); - packed.w = XePackR8G8UNorm(pixel_3.rg); - packed_4 = XePackR8G8UNorm(pixel_4.rg); - break; - case kXenosFormat_4_4_4_4: - packed.x = XePackR4G4B4A4UNorm(pixel_0); - packed.y = XePackR4G4B4A4UNorm(pixel_1); - packed.z = XePackR4G4B4A4UNorm(pixel_2); - packed.w = XePackR4G4B4A4UNorm(pixel_3); - packed_4 = XePackR4G4B4A4UNorm(pixel_4); - break; - case kXenosFormat_16: - packed.x = XePackR16UNorm(pixel_0.r); - packed.y = XePackR16UNorm(pixel_1.r); - packed.z = XePackR16UNorm(pixel_2.r); - packed.w = XePackR16UNorm(pixel_3.r); - packed_4 = XePackR16UNorm(pixel_4.r); - break; - default: - // Treat as something (16_FLOAT). - packed = f32tof16(float4(pixel_0.r, pixel_1.r, pixel_2.r, pixel_3.r)); - packed_4 = f32tof16(pixel_4.r); - break; - } -} - -uint XePack16bpp2PixelsInUInt(float4 pixel_0, float4 pixel_1, uint format) { - uint packed; - switch (format) { - case kXenosFormat_1_5_5_5: - packed = XePackR5G5B5A1UNorm(pixel_0) | - (XePackR5G5B5A1UNorm(pixel_1) << 16u); - break; - case kXenosFormat_5_6_5: - packed = XePackR5G6B5UNorm(pixel_0.rgb) | - (XePackR5G6B5UNorm(pixel_1.rgb) << 16u); - break; - case kXenosFormat_6_5_5: - packed = XePackR5G5B6UNorm(pixel_0.rgb) | - (XePackR5G5B6UNorm(pixel_1.rgb) << 16u); - break; - case kXenosFormat_8_8: - packed = XePackR8G8B8A8UNorm(float4(pixel_0.rg, pixel_1.rg)); - break; - case kXenosFormat_4_4_4_4: - packed = XePackR4G4B4A4UNorm(pixel_0) | - (XePackR4G4B4A4UNorm(pixel_1) << 16u); - break; - case kXenosFormat_16: - packed = XePackR16G16UNorm(float2(pixel_0.r, pixel_1.r)); - break; - default: - // Treat as something (16_FLOAT). - packed = f32tof16(pixel_0.r) | (f32tof16(pixel_1.r) << 16u); - break; - } - return packed; -} - -uint2 XePack16bpp4PixelsInUInt2(float4 pixel_0, float4 pixel_1, float4 pixel_2, - float4 pixel_3, uint format) { - uint2 packed; +xesl_uint2 XePack16bpp4PixelsInUInt2(xesl_float4 pixel_0, xesl_float4 pixel_1, + xesl_float4 pixel_2, xesl_float4 pixel_3, + uint format) { + xesl_uint2 packed; switch (format) { case kXenosFormat_1_5_5_5: packed.x = XePackR5G5B5A1UNorm(pixel_0) | @@ -310,8 +175,8 @@ uint2 XePack16bpp4PixelsInUInt2(float4 pixel_0, float4 pixel_1, float4 pixel_2, (XePackR5G5B6UNorm(pixel_3.rgb) << 16u); break; case kXenosFormat_8_8: - packed.x = XePackR8G8B8A8UNorm(float4(pixel_0.rg, pixel_1.rg)); - packed.y = XePackR8G8B8A8UNorm(float4(pixel_2.rg, pixel_3.rg)); + packed.x = XePackR8G8B8A8UNorm(xesl_float4(pixel_0.rg, pixel_1.rg)); + packed.y = XePackR8G8B8A8UNorm(xesl_float4(pixel_2.rg, pixel_3.rg)); break; case kXenosFormat_4_4_4_4: packed.x = XePackR4G4B4A4UNorm(pixel_0) | @@ -320,63 +185,22 @@ uint2 XePack16bpp4PixelsInUInt2(float4 pixel_0, float4 pixel_1, float4 pixel_2, (XePackR4G4B4A4UNorm(pixel_3) << 16u); break; case kXenosFormat_16: - packed = XePackR16G16B16A16UNorm(float4(pixel_0.r, pixel_1.r, - pixel_2.r, pixel_3.r)); + packed = XePackR16G16B16A16UNorm(xesl_float4(pixel_0.r, pixel_1.r, + pixel_2.r, pixel_3.r)); break; default: // Treat as something (16_FLOAT). - packed = f32tof16(float2(pixel_0.r, pixel_2.r)) | - (f32tof16(float2(pixel_1.r, pixel_3.r)) << 16u); + packed.x = xesl_packHalf2x16(xesl_float2(pixel_0.r, pixel_1.r)); + packed.y = xesl_packHalf2x16(xesl_float2(pixel_2.r, pixel_3.r)); break; } return packed; } -uint2 XePack32bpp2Pixels(float4 pixel_0, float4 pixel_1, uint format) { - uint2 packed; - switch (format) { - case kXenosFormat_8_8_8_8: - // TODO(Triang3l): Investigate 8_8_8_8_A. - case kXenosFormat_8_8_8_8_A: - case kXenosFormat_8_8_8_8_AS_16_16_16_16: - packed.x = XePackR8G8B8A8UNorm(pixel_0); - packed.y = XePackR8G8B8A8UNorm(pixel_1); - break; - case kXenosFormat_2_10_10_10: - case kXenosFormat_2_10_10_10_AS_16_16_16_16: - packed.x = XePackR10G10B10A2UNorm(pixel_0); - packed.y = XePackR10G10B10A2UNorm(pixel_1); - break; - case kXenosFormat_10_11_11: - case kXenosFormat_10_11_11_AS_16_16_16_16: - packed.x = XePackR11G11B10UNorm(pixel_0.rgb); - packed.y = XePackR11G11B10UNorm(pixel_1.rgb); - break; - case kXenosFormat_11_11_10: - case kXenosFormat_11_11_10_AS_16_16_16_16: - packed.x = XePackR10G11B11UNorm(pixel_0.rgb); - packed.y = XePackR10G11B11UNorm(pixel_1.rgb); - break; - case kXenosFormat_16_16: - packed.x = XePackR16G16UNorm(pixel_0.rg); - packed.y = XePackR16G16UNorm(pixel_1.rg); - break; - case kXenosFormat_16_16_FLOAT: - packed = f32tof16(float2(pixel_0.r, pixel_1.r)) | - (f32tof16(float2(pixel_0.g, pixel_1.g)) << 16u); - break; - default: - // Treat as 32_FLOAT. - packed.x = asuint(pixel_0.r); - packed.y = asuint(pixel_1.r); - break; - } - return packed; -} - -uint4 XePack32bpp4Pixels(float4 pixel_0, float4 pixel_1, float4 pixel_2, - float4 pixel_3, uint format) { - uint4 packed; +xesl_uint4 XePack32bpp4Pixels(xesl_float4 pixel_0, xesl_float4 pixel_1, + xesl_float4 pixel_2, xesl_float4 pixel_3, + uint format) { + xesl_uint4 packed; switch (format) { case kXenosFormat_8_8_8_8: // TODO(Triang3l): Investigate 8_8_8_8_A. @@ -415,103 +239,25 @@ uint4 XePack32bpp4Pixels(float4 pixel_0, float4 pixel_1, float4 pixel_2, packed.w = XePackR16G16UNorm(pixel_3.rg); break; case kXenosFormat_16_16_FLOAT: - packed = - f32tof16(float4(pixel_0.r, pixel_1.r, pixel_2.r, pixel_3.r)) | - (f32tof16(float4(pixel_0.g, pixel_1.g, pixel_2.g, pixel_3.g)) << 16u); + packed.x = xesl_packHalf2x16(xesl_float2(pixel_0.r, pixel_0.g)); + packed.y = xesl_packHalf2x16(xesl_float2(pixel_1.r, pixel_1.g)); + packed.z = xesl_packHalf2x16(xesl_float2(pixel_2.r, pixel_2.g)); + packed.w = xesl_packHalf2x16(xesl_float2(pixel_3.r, pixel_3.g)); break; default: // Treat as 32_FLOAT. - packed.x = asuint(pixel_0.r); - packed.y = asuint(pixel_1.r); - packed.z = asuint(pixel_2.r); - packed.w = asuint(pixel_3.r); + packed.x = xesl_floatBitsToUint(pixel_0.r); + packed.y = xesl_floatBitsToUint(pixel_1.r); + packed.z = xesl_floatBitsToUint(pixel_2.r); + packed.w = xesl_floatBitsToUint(pixel_3.r); break; } return packed; } -void XePack32bpp5Pixels(float4 pixel_0, float4 pixel_1, float4 pixel_2, - float4 pixel_3, float4 pixel_4, uint format, - out uint4 packed, out uint packed_4) { - switch (format) { - case kXenosFormat_8_8_8_8: - // TODO(Triang3l): Investigate 8_8_8_8_A. - case kXenosFormat_8_8_8_8_A: - case kXenosFormat_8_8_8_8_AS_16_16_16_16: - packed.x = XePackR8G8B8A8UNorm(pixel_0); - packed.y = XePackR8G8B8A8UNorm(pixel_1); - packed.z = XePackR8G8B8A8UNorm(pixel_2); - packed.w = XePackR8G8B8A8UNorm(pixel_3); - packed_4 = XePackR8G8B8A8UNorm(pixel_4); - break; - case kXenosFormat_2_10_10_10: - case kXenosFormat_2_10_10_10_AS_16_16_16_16: - packed.x = XePackR10G10B10A2UNorm(pixel_0); - packed.y = XePackR10G10B10A2UNorm(pixel_1); - packed.z = XePackR10G10B10A2UNorm(pixel_2); - packed.w = XePackR10G10B10A2UNorm(pixel_3); - packed_4 = XePackR10G10B10A2UNorm(pixel_4); - break; - case kXenosFormat_10_11_11: - case kXenosFormat_10_11_11_AS_16_16_16_16: - packed.x = XePackR11G11B10UNorm(pixel_0.rgb); - packed.y = XePackR11G11B10UNorm(pixel_1.rgb); - packed.z = XePackR11G11B10UNorm(pixel_2.rgb); - packed.w = XePackR11G11B10UNorm(pixel_3.rgb); - packed_4 = XePackR11G11B10UNorm(pixel_4.rgb); - break; - case kXenosFormat_11_11_10: - case kXenosFormat_11_11_10_AS_16_16_16_16: - packed.x = XePackR10G11B11UNorm(pixel_0.rgb); - packed.y = XePackR10G11B11UNorm(pixel_1.rgb); - packed.z = XePackR10G11B11UNorm(pixel_2.rgb); - packed.w = XePackR10G11B11UNorm(pixel_3.rgb); - packed_4 = XePackR10G11B11UNorm(pixel_4.rgb); - break; - case kXenosFormat_16_16: - packed.x = XePackR16G16UNorm(pixel_0.rg); - packed.y = XePackR16G16UNorm(pixel_1.rg); - packed.z = XePackR16G16UNorm(pixel_2.rg); - packed.w = XePackR16G16UNorm(pixel_3.rg); - packed_4 = XePackR16G16UNorm(pixel_4.rg); - break; - case kXenosFormat_16_16_FLOAT: - packed = - f32tof16(float4(pixel_0.r, pixel_1.r, pixel_2.r, pixel_3.r)) | - (f32tof16(float4(pixel_0.g, pixel_1.g, pixel_2.g, pixel_3.g)) << 16u); - packed_4 = f32tof16(pixel_4.r) | (f32tof16(pixel_4.g) << 16u); - break; - default: - // Treat as 32_FLOAT. - packed.x = asuint(pixel_0.r); - packed.y = asuint(pixel_1.r); - packed.z = asuint(pixel_2.r); - packed.w = asuint(pixel_3.r); - packed_4 = asuint(pixel_4.r); - break; - } -} - -uint2 XePack64bppPixel(float4 pixel, uint format) { - uint2 packed; - switch (format) { - case kXenosFormat_16_16_16_16: - packed = XePackR16G16B16A16UNorm(pixel); - break; - case kXenosFormat_16_16_16_16_FLOAT: - packed = f32tof16(pixel.rb) | (f32tof16(pixel.ga) << 16u); - break; - default: - // Treat as 32_32_FLOAT. - packed = asuint(pixel.rg); - break; - } - return packed; -} - -void XePack64bpp4Pixels(float4 pixel_0, float4 pixel_1, float4 pixel_2, - float4 pixel_3, uint format, out uint4 packed_01, - out uint4 packed_23) { +void XePack64bpp4Pixels(xesl_float4 pixel_0, xesl_float4 pixel_1, + xesl_float4 pixel_2, xesl_float4 pixel_3, uint format, + out xesl_uint4 packed_01, out xesl_uint4 packed_23) { switch (format) { case kXenosFormat_16_16_16_16: packed_01.xy = XePackR16G16B16A16UNorm(pixel_0); @@ -520,120 +266,84 @@ void XePack64bpp4Pixels(float4 pixel_0, float4 pixel_1, float4 pixel_2, packed_23.zw = XePackR16G16B16A16UNorm(pixel_3); break; case kXenosFormat_16_16_16_16_FLOAT: - packed_01 = - f32tof16(float4(pixel_0.r, pixel_0.b, pixel_1.r, pixel_1.b)) | - (f32tof16(float4(pixel_0.g, pixel_0.a, pixel_1.g, pixel_1.a)) << 16u); - packed_23 = - f32tof16(float4(pixel_2.r, pixel_2.b, pixel_3.r, pixel_3.b)) | - (f32tof16(float4(pixel_2.g, pixel_2.a, pixel_3.g, pixel_3.a)) << 16u); + packed_01.x = xesl_packHalf2x16(xesl_float2(pixel_0.r, pixel_0.g)); + packed_01.y = xesl_packHalf2x16(xesl_float2(pixel_0.b, pixel_0.a)); + packed_01.z = xesl_packHalf2x16(xesl_float2(pixel_1.r, pixel_1.g)); + packed_01.w = xesl_packHalf2x16(xesl_float2(pixel_1.b, pixel_1.a)); + packed_23.x = xesl_packHalf2x16(xesl_float2(pixel_2.r, pixel_2.g)); + packed_23.y = xesl_packHalf2x16(xesl_float2(pixel_2.b, pixel_2.a)); + packed_23.z = xesl_packHalf2x16(xesl_float2(pixel_3.r, pixel_3.g)); + packed_23.w = xesl_packHalf2x16(xesl_float2(pixel_3.b, pixel_3.a)); break; default: // Treat as 32_32_FLOAT. - packed_01 = asuint(float4(pixel_0.rg, pixel_1.rg)); - packed_23 = asuint(float4(pixel_2.rg, pixel_3.rg)); - break; - } -} - -void XePack64bpp5Pixels(float4 pixel_0, float4 pixel_1, float4 pixel_2, - float4 pixel_3, float4 pixel_4, uint format, - out uint4 packed_01, out uint4 packed_23, - out uint2 packed_4) { - switch (format) { - case kXenosFormat_16_16_16_16: - packed_01.xy = XePackR16G16B16A16UNorm(pixel_0); - packed_01.zw = XePackR16G16B16A16UNorm(pixel_1); - packed_23.xy = XePackR16G16B16A16UNorm(pixel_2); - packed_23.zw = XePackR16G16B16A16UNorm(pixel_3); - packed_4 = XePackR16G16B16A16UNorm(pixel_4); - break; - case kXenosFormat_16_16_16_16_FLOAT: - packed_01 = - f32tof16(float4(pixel_0.r, pixel_0.b, pixel_1.r, pixel_1.b)) | - (f32tof16(float4(pixel_0.g, pixel_0.a, pixel_1.g, pixel_1.a)) << 16u); - packed_23 = - f32tof16(float4(pixel_2.r, pixel_2.b, pixel_3.r, pixel_3.b)) | - (f32tof16(float4(pixel_2.g, pixel_2.a, pixel_3.g, pixel_3.a)) << 16u); - packed_4 = f32tof16(float2(pixel_4.r, pixel_4.b)) | - (f32tof16(float2(pixel_4.g, pixel_4.a)) << 16u); - break; - default: - // Treat as 32_32_FLOAT. - packed_01 = asuint(float4(pixel_0.rg, pixel_1.rg)); - packed_23 = asuint(float4(pixel_2.rg, pixel_3.rg)); - packed_4 = asuint(pixel_4.rg); + packed_01 = xesl_floatBitsToUint(xesl_float4(pixel_0.rg, pixel_1.rg)); + packed_23 = xesl_floatBitsToUint(xesl_float4(pixel_2.rg, pixel_3.rg)); break; } } // EDRAM color format unpacking. -float XeUnpackR8UNorm(uint p) { - return float(p & 255u) * (1.0f / 255.0f); +xesl_float4 XeUnpackR8UNormX4(xesl_uint4 p) { + return xesl_float4(p & 255u) * (1.0 / 255.0); } -float4 XeUnpackR8UNormX4(uint4 p) { - return float4(p & 255u) * (1.0f / 255.0f); +xesl_float4 XeUnpackR8G8B8A8UNorm(uint p) { + return xesl_float4((p.xxxx >> xesl_uint4(0u, 8u, 16u, 24u)) & 255u) * + (1.0 / 255.0); } -float4 XeUnpackR8G8B8A8UNorm(uint p) { - return float4((p >> uint4(0u, 8u, 16u, 24u)) & 255u) * (1.0f / 255.0f); +xesl_float4 XeUnpackR10UNormX4(xesl_uint4 p) { + return xesl_float4(p & 1023u) * (1.0 / 1023.0); } -float XeUnpackR10UNorm(uint p) { - return float(p & 1023u) * (1.0f / 1023.0f); +xesl_float4 XeUnpackR10G10B10A2UNorm(uint p) { + return xesl_float4((p.xxxx >> xesl_uint4(0u, 10u, 20u, 30u)) & + xesl_uint2(1023u, 3u).xxxy) * + xesl_float2(1.0 / 1023.0, 1.0 / 3.0).xxxy; } -float4 XeUnpackR10UNormX4(uint4 p) { - return float4(p & 1023u) * (1.0f / 1023.0f); -} - -float4 XeUnpackR10G10B10A2UNorm(uint p) { - return float4((p >> uint4(0u, 10u, 20u, 30u)) & uint2(1023u, 3u).xxxy) * - float2(1.0f / 1023.0f, 1.0f / 3.0f).xxxy; -} - -float4 XeUnpackR10FloatX4(uint4 p) { +xesl_float4 XeUnpackR10FloatX4(xesl_uint4 p) { // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp - uint4 f10u32 = p & 0x3FFu; - uint4 mantissa = f10u32 & 0x7Fu; - uint4 exponent = f10u32 >> 7u; + xesl_uint4 f10u32 = p & 0x3FFu; + xesl_uint4 mantissa = f10u32 & 0x7Fu; + xesl_uint4 exponent = f10u32 >> 7u; // Normalize the values for the denormalized components. // Exponent = 1; // do { Exponent--; Mantissa <<= 1; } while ((Mantissa & 0x80) == 0); - bool4 is_denormalized = exponent == 0u; - uint4 mantissa_lzcnt = (7u).xxxx - firstbithigh(mantissa); - exponent = is_denormalized ? ((1u).xxxx - mantissa_lzcnt) : exponent; - mantissa = - is_denormalized ? ((mantissa << mantissa_lzcnt) & 0x7Fu) : mantissa; + xesl_bool4 is_denormalized = xesl_equal(exponent, (0u).xxxx); + xesl_uint4 mantissa_lzcnt = (7u).xxxx - xesl_findMSB(mantissa); + exponent = + xesl_select(is_denormalized, ((1u).xxxx - mantissa_lzcnt), exponent); + mantissa = xesl_select(is_denormalized, + ((mantissa << mantissa_lzcnt) & 0x7Fu), mantissa); // Combine into 32-bit float bits and clear zeros. - return asfloat( - (f10u32 != 0u) ? (((exponent + 124u) << 23u) | (mantissa << 16u)) - : (0u).xxxx); + return xesl_uintBitsToFloat(xesl_select( + xesl_equal(f10u32, (0u).xxxx), (0u).xxxx, + ((exponent + 124u) << 23u) | (mantissa << 16u))); } -float XeUnpackR10Float(uint p) { - return XeUnpackR10FloatX4(p.xxxx).x; -} - -float4 XeUnpackR10G10B10A2Float(uint p) { +xesl_float4 XeUnpackR10G10B10A2Float(uint p) { // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp - uint3 rgb_f10u32 = (p.xxx >> uint3(0u, 10u, 20u)) & 0x3FFu; - uint3 mantissa = rgb_f10u32 & 0x7Fu; - uint3 exponent = rgb_f10u32 >> 7u; + xesl_uint3 rgb_f10u32 = (p.xxx >> xesl_uint3(0u, 10u, 20u)) & 0x3FFu; + xesl_uint3 mantissa = rgb_f10u32 & 0x7Fu; + xesl_uint3 exponent = rgb_f10u32 >> 7u; // Normalize the values for the denormalized components. // Exponent = 1; // do { Exponent--; Mantissa <<= 1; } while ((Mantissa & 0x80) == 0); - bool3 is_denormalized = exponent == 0u; - uint3 mantissa_lzcnt = (7u).xxx - firstbithigh(mantissa); - exponent = is_denormalized ? ((1u).xxx - mantissa_lzcnt) : exponent; - mantissa = - is_denormalized ? ((mantissa << mantissa_lzcnt) & 0x7Fu) : mantissa; + xesl_bool3 is_denormalized = xesl_equal(exponent, (0u).xxx); + xesl_uint3 mantissa_lzcnt = (7u).xxx - xesl_findMSB(mantissa); + exponent = + xesl_select(is_denormalized, ((1u).xxx - mantissa_lzcnt), exponent); + mantissa = xesl_select(is_denormalized, + ((mantissa << mantissa_lzcnt) & 0x7Fu), mantissa); // Combine into 32-bit float bits and clear zeros. - uint3 rgb_f32u32 = - (rgb_f10u32 != 0u) ? (((exponent + 124u) << 23u) | (mantissa << 16u)) - : (0u).xxx; - return float4(asfloat(rgb_f32u32), float(p >> 30u) * (1.0f / 3.0f)); + xesl_uint3 rgb_f32u32 = xesl_select( + xesl_equal(rgb_f10u32, (0u).xxx), (0u).xxx, + ((exponent + 124u) << 23u) | (mantissa << 16u)); + return xesl_float4(xesl_uintBitsToFloat(rgb_f32u32), + float(p >> 30u) * (1.0 / 3.0)); } // http://web.archive.org/web/20180826210254/https://www.students.science.uu.nl/~3220516/advancedgraphics/papers/inferred_lighting.pdf @@ -645,69 +355,67 @@ float4 XeUnpackR10G10B10A2Float(uint p) { // to 1. When sampling from this texture in a shader, the results must be // scaled to a 0 to 1 range." -// Upper 16 bits are ignored by XeUnpackR16Edram. +// Upper 16 bits are ignored by XeUnpackR16EdramX4. -float XeUnpackR16Edram(uint p) { - return max(float(asint(p) << 16 >> 16) * (32.0f / 32767.0f), -1.0f); +xesl_float4 XeUnpackR16EdramX4(xesl_uint4 p) { + return max(xesl_float4(xesl_int4(p) << 16 >> 16) * (32.0 / 32767.0), -1.0); } -float4 XeUnpackR16EdramX4(uint4 p) { - return max(float4(asint(p) << 16 >> 16) * (32.0f / 32767.0f), -1.0f); -} - -float2 XeUnpackR16G16Edram(uint p) { - return max(float2(asint(p) << int2(16, 0) >> 16) * (32.0f / 32767.0f), - -1.0f); -} - -float4 XeUnpackR16G16B16A16Edram(uint2 p) { +xesl_float2 XeUnpackR16G16Edram(uint p) { return max( - float4(asint(p).xxyy << int2(16, 0).xyxy >> 16) * (32.0f / 32767.0f), - -1.0f); + xesl_float2(int(p).xx << xesl_int2(16, 0) >> 16) * (32.0 / 32767.0), + -1.0); +} + +xesl_float4 XeUnpackR16G16B16A16Edram(xesl_uint2 p) { + return max(xesl_float4(xesl_int2(p).xxyy << xesl_int2(16, 0).xyxy >> 16) * + (32.0 / 32767.0), + -1.0); } // Xenos 16-bit packed textures are RGBA, but in Direct3D 12 they are BGRA. -uint4 XeR5G5B5A1ToB5G5R5A1(uint4 packed_texels) { +xesl_uint4 XeR5G5B5A1ToB5G5R5A1(xesl_uint4 packed_texels) { return (packed_texels & 0x83E083E0u) | ((packed_texels & 0x001F001Fu) << 10) | ((packed_texels & 0x7C007C00u) >> 10); } -uint2 XeR5G5B5A1ToB5G5R5A1(uint2 packed_texels) { +xesl_uint2 XeR5G5B5A1ToB5G5R5A1(xesl_uint2 packed_texels) { return XeR5G5B5A1ToB5G5R5A1(packed_texels.xyxx).xy; } -uint4 XeR5G6B5ToB5G6R5(uint4 packed_texels) { +xesl_uint4 XeR5G6B5ToB5G6R5(xesl_uint4 packed_texels) { return (packed_texels & 0x07E007E0u) | ((packed_texels & 0x001F001Fu) << 11) | ((packed_texels & 0xF800F800u) >> 11); } -uint2 XeR5G6B5ToB5G6R5(uint2 packed_texels) { +xesl_uint2 XeR5G6B5ToB5G6R5(xesl_uint2 packed_texels) { return XeR5G6B5ToB5G6R5(packed_texels.xyxx).xy; } -uint4 XeR4G4B4A4ToB4G4R4A4(uint4 packed_texels) { +xesl_uint4 XeR4G4B4A4ToB4G4R4A4(xesl_uint4 packed_texels) { return (packed_texels & 0xF0F0F0F0u) | ((packed_texels & 0x000F000Fu) << 8) | ((packed_texels & 0x0F000F00u) >> 8); } -uint2 XeR4G4B4A4ToB4G4R4A4(uint2 packed_texels) { +xesl_uint2 XeR4G4B4A4ToB4G4R4A4(xesl_uint2 packed_texels) { return XeR4G4B4A4ToB4G4R4A4(packed_texels.xyxx).xy; } // RRRRR GGGGG BBBBBB to GGGGG BBBBBB RRRRR (use RBGA swizzle when reading). -uint4 XeR5G5B6ToB5G6R5WithRBGASwizzle(uint4 packed_texels) { +xesl_uint4 XeR5G5B6ToB5G6R5WithRBGASwizzle(xesl_uint4 packed_texels) { return ((packed_texels & 0x001F001Fu) << 11) | - ((packed_texels & 0xFFE0FFE0) >> 5); + ((packed_texels & 0xFFE0FFE0u) >> 5); } -uint2 XeR5G5B6ToB5G6R5WithRBGASwizzle(uint2 packed_texels) { +xesl_uint2 XeR5G5B6ToB5G6R5WithRBGASwizzle(xesl_uint2 packed_texels) { return XeR5G5B6ToB5G6R5WithRBGASwizzle(packed_texels.xyxx).xy; } -uint4 XeR10G11B11UNormToRGBA16(uint2 packed_texels) { +xesl_uint4 XeR10G11B11UNormToRGBA16(xesl_uint2 packed_texels) { // Red and blue. - uint4 result = - (((packed_texels.xxyy >> uint2(0u, 21u).xyxy) & - uint2(1023u, 2047u).xyxy) << - uint2(6u, 5u).xyxy) | - ((packed_texels.xxyy >> uint2(4u, 27u).xyxy) & uint2(63u, 31u).xyxy); + xesl_uint4 result = + (((packed_texels.xxyy >> xesl_uint2(0u, 21u).xyxy) & + xesl_uint2(1023u, 2047u).xyxy) << + xesl_uint2(6u, 5u).xyxy) | + ((packed_texels.xxyy >> xesl_uint2(4u, 27u).xyxy) & + xesl_uint2(63u, 31u).xyxy); // Green. The 5 bits to be duplicated to the bottom are already at 16. result.xz |= ((packed_texels & (2047u << 10u)) << (21u - 10u)) | (packed_texels & (31u << 16u)); @@ -715,22 +423,23 @@ uint4 XeR10G11B11UNormToRGBA16(uint2 packed_texels) { result.yw |= 0xFFFF0000u; return result; } -uint2 XeR10G11B11UNormToRGBA16(uint packed_texel) { +xesl_uint2 XeR10G11B11UNormToRGBA16(uint packed_texel) { return XeR10G11B11UNormToRGBA16(packed_texel.xx).xy; } -void XeR10G11B11UNormToRGBA16(uint4 packed_texels, out uint4 out_01, - out uint4 out_23) { +void XeR10G11B11UNormToRGBA16(xesl_uint4 packed_texels, out xesl_uint4 out_01, + out xesl_uint4 out_23) { out_01 = XeR10G11B11UNormToRGBA16(packed_texels.xy); out_23 = XeR10G11B11UNormToRGBA16(packed_texels.zw); } -uint4 XeR11G11B10UNormToRGBA16(uint2 packed_texels) { +xesl_uint4 XeR11G11B10UNormToRGBA16(xesl_uint2 packed_texels) { // Red and blue. - uint4 result = - (((packed_texels.xxyy >> uint2(0u, 22u).xyxy) & - uint2(2047u, 1023u).xyxy) << - uint2(5u, 6u).xyxy) | - ((packed_texels.xxyy >> uint2(6u, 26u).xyxy) & uint2(31u, 63u).xyxy); + xesl_uint4 result = + (((packed_texels.xxyy >> xesl_uint2(0u, 22u).xyxy) & + xesl_uint2(2047u, 1023u).xyxy) << + xesl_uint2(5u, 6u).xyxy) | + ((packed_texels.xxyy >> xesl_uint2(6u, 26u).xyxy) & + xesl_uint2(31u, 63u).xyxy); // Green. result.xz |= ((packed_texels & (2047u << 11u)) << (21u - 11u)) | ((packed_texels & (31u << 17u)) >> (17u - 16u)); @@ -738,67 +447,69 @@ uint4 XeR11G11B10UNormToRGBA16(uint2 packed_texels) { result.yw |= 0xFFFF0000u; return result; } -uint2 XeR11G11B10UNormToRGBA16(uint packed_texel) { +xesl_uint2 XeR11G11B10UNormToRGBA16(uint packed_texel) { return XeR11G11B10UNormToRGBA16(packed_texel.xx).xy; } -void XeR11G11B10UNormToRGBA16(uint4 packed_texels, out uint4 out_01, - out uint4 out_23) { +void XeR11G11B10UNormToRGBA16(xesl_uint4 packed_texels, out xesl_uint4 out_01, + out xesl_uint4 out_23) { out_01 = XeR11G11B10UNormToRGBA16(packed_texels.xy); out_23 = XeR11G11B10UNormToRGBA16(packed_texels.zw); } // Assuming the original number has only 10 bits. -uint2 XeSNorm10To16(uint2 s10) { - uint2 signs = s10 >> 9u; +xesl_uint2 XeSNorm10To16(xesl_uint2 s10) { + xesl_uint2 signs = s10 >> 9u; + xesl_bool2 is_negative = xesl_notEqual(signs, (0u).xx); // -512 and -511 are both -1.0, but with -512 the conversion will overflow. - s10 = s10 == 0x200u ? 0x201u : s10; + s10 = xesl_select(xesl_equal(s10, (0x200u).xx), (0x201u).xx, s10); // Take the absolute value. - s10 = (s10 ^ (signs ? 0x3FFu : 0u)) + signs; + s10 = (s10 ^ xesl_select(is_negative, (0x3FFu).xx, (0u).xx)) + signs; // Expand the 9-bit absolute value to 15 bits like unorm. s10 = (s10 << 6u) | (s10 >> 3u); // Apply the sign. - return (s10 ^ (signs ? 0xFFFFu : 0u)) + signs; + return (s10 ^ xesl_select(is_negative, (0xFFFFu).xx, (0u).xx)) + signs; } // Assuming the original number has only 11 bits. -uint2 XeSNorm11To16(uint2 s11) { - uint2 signs = s11 >> 10u; +xesl_uint2 XeSNorm11To16(xesl_uint2 s11) { + xesl_uint2 signs = s11 >> 10u; + xesl_bool2 is_negative = xesl_notEqual(signs, (0u).xx); // -1024 and -1023 are both -1.0, but with -1024 the conversion will overflow. - s11 = s11 == 0x400u ? 0x401u : s11; + s11 = xesl_select(xesl_equal(s11, (0x400u).xx), (0x401u).xx, s11); // Take the absolute value. - s11 = (s11 ^ (signs ? 0x7FFu : 0u)) + signs; + s11 = (s11 ^ xesl_select(is_negative, (0x7FFu).xx, (0u).xx)) + signs; // Expand the 10-bit absolute value to 15 bits like unorm. s11 = (s11 << 5u) | (s11 >> 5u); // Apply the sign. - return (s11 ^ (signs ? 0xFFFFu : 0u)) + signs; + return (s11 ^ xesl_select(is_negative, (0xFFFFu).xx, (0u).xx)) + signs; } -uint4 XeR10G11B11SNormToRGBA16(uint2 packed_texels) { +xesl_uint4 XeR10G11B11SNormToRGBA16(xesl_uint2 packed_texels) { // uint4(RG0, RG1, BA0, BA1).xzyw == uint4(RG0, BA0, RG1, BA1). - return uint4(XeSNorm10To16(packed_texels & 1023u) | - (XeSNorm11To16((packed_texels >> 10u) & 2047u) << 16u), - XeSNorm11To16(packed_texels >> 21u) | 0x7FFF0000u).xzyw; + return xesl_uint4(XeSNorm10To16(packed_texels & 1023u) | + (XeSNorm11To16((packed_texels >> 10u) & 2047u) << 16u), + XeSNorm11To16(packed_texels >> 21u) | 0x7FFF0000u).xzyw; } -uint2 XeR10G11B11SNormToRGBA16(uint packed_texel) { +xesl_uint2 XeR10G11B11SNormToRGBA16(uint packed_texel) { return XeR10G11B11SNormToRGBA16(packed_texel.xx).xy; } -void XeR10G11B11SNormToRGBA16(uint4 packed_texels, out uint4 out_01, - out uint4 out_23) { +void XeR10G11B11SNormToRGBA16(xesl_uint4 packed_texels, out xesl_uint4 out_01, + out xesl_uint4 out_23) { out_01 = XeR10G11B11SNormToRGBA16(packed_texels.xy); out_23 = XeR10G11B11SNormToRGBA16(packed_texels.zw); } -uint4 XeR11G11B10SNormToRGBA16(uint2 packed_texels) { +xesl_uint4 XeR11G11B10SNormToRGBA16(xesl_uint2 packed_texels) { // uint4(RG0, RG1, BA0, BA1).xzyw == uint4(RG0, BA0, RG1, BA1). - return uint4(XeSNorm11To16(packed_texels & 2047u) | - (XeSNorm11To16((packed_texels >> 11u) & 2047u) << 16u), - XeSNorm10To16(packed_texels >> 22u) | 0x7FFF0000u).xzyw; + return xesl_uint4(XeSNorm11To16(packed_texels & 2047u) | + (XeSNorm11To16((packed_texels >> 11u) & 2047u) << 16u), + XeSNorm10To16(packed_texels >> 22u) | 0x7FFF0000u).xzyw; } -uint2 XeR11G11B10SNormToRGBA16(uint packed_texel) { +xesl_uint2 XeR11G11B10SNormToRGBA16(uint packed_texel) { return XeR11G11B10SNormToRGBA16(packed_texel.xx).xy; } -void XeR11G11B10SNormToRGBA16(uint4 packed_texels, out uint4 out_01, - out uint4 out_23) { +void XeR11G11B10SNormToRGBA16(xesl_uint4 packed_texels, out xesl_uint4 out_01, + out xesl_uint4 out_23) { out_01 = XeR11G11B10SNormToRGBA16(packed_texels.xy); out_23 = XeR11G11B10SNormToRGBA16(packed_texels.zw); } @@ -818,14 +529,14 @@ uint XeFloat32To20e4(uint f32u32) { return ((f24u32 + 3u + ((f24u32 >> 3u) & 1u)) >> 3u) & 0xFFFFFFu; } -uint XeFloat20e4To32(uint f24u32, bool remap_to_0_to_0_5 = false) { +uint XeFloat20e4To32(uint f24u32, bool remap_to_0_to_0_5) { uint mantissa = f24u32 & 0xFFFFFu; uint exponent = f24u32 >> 20u; // Normalize the values for the denormalized components. // Exponent = 1; // do { Exponent--; Mantissa <<= 1; } while ((Mantissa & 0x100000) == 0); bool is_denormalized = exponent == 0u; - uint mantissa_lzcnt = 20u - firstbithigh(mantissa); + uint mantissa_lzcnt = 20u - xesl_findMSB(mantissa); exponent = is_denormalized ? (1u - mantissa_lzcnt) : exponent; mantissa = is_denormalized ? ((mantissa << mantissa_lzcnt) & 0xFFFFFu) : mantissa; @@ -837,35 +548,35 @@ uint XeFloat20e4To32(uint f24u32, bool remap_to_0_to_0_5 = false) { : 0u; } -uint4 XeFloat20e4To32(uint4 f24u32) { - uint4 mantissa = f24u32 & 0xFFFFFu; - uint4 exponent = f24u32 >> 20u; +xesl_uint4 XeFloat20e4To32(xesl_uint4 f24u32) { + xesl_uint4 mantissa = f24u32 & 0xFFFFFu; + xesl_uint4 exponent = f24u32 >> 20u; // Normalize the values for the denormalized components. // Exponent = 1; // do { Exponent--; Mantissa <<= 1; } while ((Mantissa & 0x100000) == 0); - bool4 is_denormalized = exponent == 0u; - uint4 mantissa_lzcnt = (20u).xxxx - firstbithigh(mantissa); - exponent = is_denormalized ? ((1u).xxxx - mantissa_lzcnt) : exponent; - mantissa = - is_denormalized ? ((mantissa << mantissa_lzcnt) & 0xFFFFFu) : mantissa; + xesl_bool4 is_denormalized = xesl_equal(exponent, (0u).xxxx); + xesl_uint4 mantissa_lzcnt = (20u).xxxx - xesl_findMSB(mantissa); + exponent = xesl_select(is_denormalized, (1u).xxxx - mantissa_lzcnt, exponent); + mantissa = xesl_select( + is_denormalized, (mantissa << mantissa_lzcnt) & 0xFFFFFu, mantissa); // Combine into 32-bit float bits and clear zeros. - return (f24u32 != 0u) ? (((exponent + 112u) << 23u) | (mantissa << 3u)) - : (0u).xxxx; + return xesl_select(xesl_equal(f24u32, (0u).xxxx), (0u).xxxx, + ((exponent + 112u) << 23u) | (mantissa << 3u)); } -uint2 XeFloat20e4To32(uint2 f24u32) { +xesl_uint2 XeFloat20e4To32(xesl_uint2 f24u32) { return XeFloat20e4To32(f24u32.xyxx).xy; } -float4 XeUNorm24To32(uint4 n24) { - // Not 1.0f / 16777215.0f as that gives an incorrect result (like for a very +xesl_float4 XeUNorm24To32(xesl_uint4 n24) { + // Not 1.0 / 16777215.0 as that gives an incorrect result (like for a very // common 0xC00000 which clears 2_10_10_10 to 0001). Division by 2^24 is just // an exponent shift though, thus exact. - // Division by 16777215.0f behaves this way. - return float4(n24 + (n24 >> 23u)) * (1.0f / 16777216.0f); + // Division by 16777215.0 behaves this way. + return xesl_float4(n24 + (n24 >> 23u)) * (1.0 / 16777216.0); } -float2 XeUNorm24To32(uint2 n24) { +xesl_float2 XeUNorm24To32(xesl_uint2 n24) { return XeUNorm24To32(n24.xyxx).xy; } @@ -876,17 +587,17 @@ float2 XeUNorm24To32(uint2 n24) { // Relative ordering between endpoints is preserved, so result.x > result.y // (color0 > color1) and result.x <= result.y (color0 <= color1) can be used for // choosing the DXT1 mode. -uint2 XeDXTColorEndpointsToBGR8In10(uint bgr_end_565) { +xesl_uint2 XeDXTColorEndpointsToBGR8In10(uint bgr_end_565) { // Converting 5:6:5 to 8:8:8 similar to how Compressonator does that. // https://github.com/GPUOpen-Tools/compressonator/blob/master/CMP_CompressonatorLib/DXTC/Codec_DXTC_RGBA.cpp#L340 - uint2 bgr_end_8in10 = + xesl_uint2 bgr_end_8in10 = // Blue in 0:4 and 16:20 - to 3:7. - (uint2(bgr_end_565 << 3u, bgr_end_565 >> (16u - 3u)) & (31u << 3u)) | + (xesl_uint2(bgr_end_565 << 3u, bgr_end_565 >> (16u - 3u)) & (31u << 3u)) | // Green in 5:10 and 21:26 - to 12:17. - (uint2(bgr_end_565 << (12u - 5u), bgr_end_565 >> (21u - 12u)) & + (xesl_uint2(bgr_end_565 << (12u - 5u), bgr_end_565 >> (21u - 12u)) & (63u << 12u)) | // Red in 11:15 and 27:31 - to 23:27. - (uint2(bgr_end_565 << (23u - 11u), bgr_end_565 >> (27u - 23u)) & + (xesl_uint2(bgr_end_565 << (23u - 11u), bgr_end_565 >> (27u - 23u)) & (31u << 23u)); // Apply the lower bit replication to give full dynamic range. // Blue and red. @@ -899,14 +610,14 @@ uint2 XeDXTColorEndpointsToBGR8In10(uint bgr_end_565) { // Sorts the color indices of a DXT3/DXT5 or a DXT1 opaque block so they can be // used as the weights for the second endpoint, from 0 to 3. To get the weights // for the first endpoint, apply bitwise NOT to the result. -uint4 XeDXTHighColorWeights(uint4 codes) { +xesl_uint4 XeDXTHighColorWeights(xesl_uint4 codes) { // Initially 00 = 3:0, 01 = 0:3, 10 = 2:1, 11 = 1:2. // Swap bits. 00 = 3:0, 01 = 2:1, 10 = 0:3, 11 = 1:2. codes = ((codes & 0x55555555u) << 1u) | ((codes & 0xAAAAAAAAu) >> 1u); // Swap 10 and 11. 00 = 3:0, 01 = 2:1, 10 = 1:2, 11 = 0:3. return codes ^ ((codes & 0xAAAAAAAAu) >> 1u); } -uint2 XeDXTHighColorWeights(uint2 codes) { +xesl_uint2 XeDXTHighColorWeights(xesl_uint2 codes) { return XeDXTHighColorWeights(codes.xyxx).xy; } uint XeDXTHighColorWeights(uint codes) { @@ -918,9 +629,9 @@ uint XeDXTHighColorWeights(uint codes) { // space between each), weights can be obtained using XeDXTHighColorWeights. // Alpha is set to 0 in the result. Weights must be shifted right by 8 * row // index before calling. -uint4 XeDXTOpaqueRowToRGB8(uint2 bgr_end_8in10, uint weights_high) { - const uint4 weights_shifts = uint4(0u, 2u, 4u, 6u); - uint4 bgr_row_8in10_3x = +xesl_uint4 XeDXTOpaqueRowToRGB8(xesl_uint2 bgr_end_8in10, uint weights_high) { + const xesl_uint4 weights_shifts = xesl_uint4(0u, 2u, 4u, 6u); + xesl_uint4 bgr_row_8in10_3x = (((~weights_high).xxxx >> weights_shifts) & 3u) * bgr_end_8in10.x + ((weights_high.xxxx >> weights_shifts) & 3u) * bgr_end_8in10.y; return (((bgr_row_8in10_3x & 1023u) / 3u) << 16u) | @@ -933,7 +644,7 @@ uint4 XeDXTOpaqueRowToRGB8(uint2 bgr_end_8in10, uint weights_high) { // high endpoint, and both bits for 1/2 of each, AND of those bits can be used // as the right shift amount for mixing the two colors in the punchthrough // mode). Zero for the punchthrough alpha texels. -uint4 XeDXT1TransWeights(uint4 codes) { +xesl_uint4 XeDXT1TransWeights(xesl_uint4 codes) { // Initially 00 = 1:0, 01 = 0:1, 10 = 1:1, 11 = 0:0. // 00 = 0:0, 01 = 1:1, 10 = 0:1, 11 = 1:0. codes = ~codes; @@ -945,18 +656,18 @@ uint4 XeDXT1TransWeights(uint4 codes) { // can be obtained using XeDXTColorEndpointsToBGR8In10 (8 bits with 2 bits of // free space between each), weights can be obtained using XeDXT1TransWeights // and must be shifted right by 8 * row index before calling. -uint4 XeDXT1TransRowToRGBA8(uint2 bgr_end_8in10, uint weights) { - const uint4 weights_shifts_low = uint4(0u, 2u, 4u, 6u); - const uint4 weights_shifts_high = uint4(1u, 3u, 5u, 7u); - uint4 bgr_row_8in10_scaled = +xesl_uint4 XeDXT1TransRowToRGBA8(xesl_uint2 bgr_end_8in10, uint weights) { + const xesl_uint4 weights_shifts_low = xesl_uint4(0u, 2u, 4u, 6u); + const xesl_uint4 weights_shifts_high = xesl_uint4(1u, 3u, 5u, 7u); + xesl_uint4 bgr_row_8in10_scaled = ((weights.xxxx >> weights_shifts_low) & 1u) * bgr_end_8in10.x + ((weights.xxxx >> weights_shifts_high) & 1u) * bgr_end_8in10.y; // Whether the texel is (RGB0+RGB1)/2 - divide the weighted sum by 2 (shift // right by 1) if it is. - uint4 weights_sums_log2 = weights & ((weights & 0xAAAAAAAAu) >> 1u); - uint4 bgr_shift = (weights_sums_log2.xxxx >> weights_shifts_low) & 1u; + uint weights_sums_log2 = weights & ((weights & 0xAAAAAAAAu) >> 1u); + xesl_uint4 bgr_shift = (weights_sums_log2.xxxx >> weights_shifts_low) & 1u; // Whether the texel is opaque. - uint4 weights_alpha = + uint weights_alpha = (weights & 0x55555555u) | ((weights & 0xAAAAAAAAu) >> 1u); return (((bgr_row_8in10_scaled & 1023u) >> bgr_shift) << 16u) + ((((bgr_row_8in10_scaled >> 10u) & 1023u) >> bgr_shift) << 8u) + @@ -968,7 +679,7 @@ uint4 XeDXT1TransRowToRGBA8(uint2 bgr_end_8in10, uint weights) { // converting DXT3A. Only 16 bits of alpha half-blocks are used. Alpha is from // word 0 for rows 0 and 1, from word 1 for rows 2 and 3, must be shifted right // by 16 * (row index & 1) before calling. -uint4 XeDXT3FourBlocksRowToA8(uint4 alphas) { +xesl_uint4 XeDXT3FourBlocksRowToA8(xesl_uint4 alphas) { // (alphas & 0xFu) | ((alphas & 0xFu) << 4u) | // ((alphas & 0xF0u) << (8u - 4u)) | ((alphas & 0xF0u) << (12u - 4u)) | // ((alphas & 0xF00u) << (16u - 8u)) | ((alphas & 0xF00u) << (20u - 8u)) | @@ -978,7 +689,7 @@ uint4 XeDXT3FourBlocksRowToA8(uint4 alphas) { ((alphas & 0xF000u) << 16u); } -uint4 XeDXT3AAs1111TwoBlocksRowToBGRA4(uint2 halfblocks) { +xesl_uint4 XeDXT3AAs1111TwoBlocksRowToBGRA4(xesl_uint2 halfblocks) { // Only 16 bits of half-blocks are used. X contains pixels 0123, Y - 4567 (in // the image, halfblocks.y is halfblocks.x + 8). // In the row, X contains pixels 01, Y - 23, Z - 45, W - 67. @@ -991,14 +702,15 @@ uint4 XeDXT3AAs1111TwoBlocksRowToBGRA4(uint2 halfblocks) { // is the T-shaped (or somewhat H-shaped) metal beams in the beginning of the // first mission), however the contents don't say anything about the channel // order. - uint4 row = (((halfblocks.xxyy >> uint2(3u, 11u).xyxy) & 1u) << 8u) | - (((halfblocks.xxyy >> uint2(7u, 15u).xyxy) & 1u) << 24u) | - (((halfblocks.xxyy >> uint2(2u, 10u).xyxy) & 1u) << 4u) | - (((halfblocks.xxyy >> uint2(6u, 14u).xyxy) & 1u) << 20u) | - ((halfblocks.xxyy >> uint2(1u, 9u).xyxy) & 1u) | - (((halfblocks.xxyy >> uint2(5u, 13u).xyxy) & 1u) << 16u) | - (((halfblocks.xxyy >> uint2(0u, 8u).xyxy) & 1u) << 12u) | - (((halfblocks.xxyy >> uint2(4u, 12u).xyxy) & 1u) << 28u); + xesl_uint4 row = + (((halfblocks.xxyy >> xesl_uint2(3u, 11u).xyxy) & 1u) << 8u) | + (((halfblocks.xxyy >> xesl_uint2(7u, 15u).xyxy) & 1u) << 24u) | + (((halfblocks.xxyy >> xesl_uint2(2u, 10u).xyxy) & 1u) << 4u) | + (((halfblocks.xxyy >> xesl_uint2(6u, 14u).xyxy) & 1u) << 20u) | + ((halfblocks.xxyy >> xesl_uint2(1u, 9u).xyxy) & 1u) | + (((halfblocks.xxyy >> xesl_uint2(5u, 13u).xyxy) & 1u) << 16u) | + (((halfblocks.xxyy >> xesl_uint2(0u, 8u).xyxy) & 1u) << 12u) | + (((halfblocks.xxyy >> xesl_uint2(4u, 12u).xyxy) & 1u) << 28u); row |= row << 1u; row |= row << 2u; return row; @@ -1066,7 +778,7 @@ uint XeDXT5High6StepAlphaWeights(uint codes_24b) { // Sorts half (24 bits) of the codes of a DXT5 alpha block so they can be used // as weights for XeDXT5RowToA8. -uint XeDXT5HighAlphaWeights(uint2 end, uint codes_24b) { +uint XeDXT5HighAlphaWeights(xesl_uint2 end, uint codes_24b) { return (end.x <= end.y) ? XeDXT5High6StepAlphaWeights(codes_24b) : XeDXT5High8StepAlphaWeights(codes_24b); } @@ -1075,7 +787,7 @@ uint XeDXT5HighAlphaWeights(uint2 end, uint codes_24b) { // in bits 0:7 and 8:15 of the first dword, weights can be obtained using // XeDXT5High8StepAlphaWeights and must be shifted right by 12 * (row index & 1) // before calling. -uint XeDXT58StepRowToA8(uint2 end, uint weights_high) { +uint XeDXT58StepRowToA8(xesl_uint2 end, uint weights_high) { uint weights_low = ~weights_high; return ((end.x * (weights_low & 7u) + end.y * (weights_high & 7u)) / 7u) | @@ -1089,9 +801,9 @@ uint XeDXT58StepRowToA8(uint2 end, uint weights_high) { // Version of XeDXT58StepRowToA8 that returns values packed in low 8 bits of // 16-bit parts, for DXN decompression. -uint2 XeDXT58StepRowToA8In16(uint2 end, uint weights_high) { +xesl_uint2 XeDXT58StepRowToA8In16(xesl_uint2 end, uint weights_high) { uint weights_low = ~weights_high; - return uint2( + return xesl_uint2( ((end.x * (weights_low & 7u) + end.y * (weights_high & 7u)) / 7u) | (((end.x * ((weights_low >> 3u) & 7u) + end.y * ((weights_high >> 3u) & 7u)) / 7u) << 16u), @@ -1105,7 +817,7 @@ uint2 XeDXT58StepRowToA8In16(uint2 end, uint weights_high) { // in bits 0:7 and 8:15 of the first dword, weights can be obtained using // XeDXT5High6StepAlphaWeights and must be shifted right by 12 * (row index & 1) // before calling. -uint XeDXT56StepRowToA8(uint2 end, uint weights_6step) { +uint XeDXT56StepRowToA8(xesl_uint2 end, uint weights_6step) { // Make a mask for whether the weights are constants. uint is_constant = weights_6step & 0x492u & ((weights_6step & 0x924u) >> 1u); is_constant |= (is_constant << 1u) | (is_constant >> 1u); @@ -1136,7 +848,7 @@ uint XeDXT56StepRowToA8(uint2 end, uint weights_6step) { // Version of XeDXT56StepRowToA8 that returns values packed in low 8 bits of // 16-bit parts, for DXN decompression. -uint2 XeDXT56StepRowToA8In16(uint2 end, uint weights_6step) { +xesl_uint2 XeDXT56StepRowToA8In16(xesl_uint2 end, uint weights_6step) { // Make a mask for whether the weights are constants. uint is_constant = weights_6step & 0x492u & ((weights_6step & 0x924u) >> 1u); is_constant |= (is_constant << 1u) | (is_constant >> 1u); @@ -1147,7 +859,7 @@ uint2 XeDXT56StepRowToA8In16(uint2 end, uint weights_6step) { uint weights_high = weights_6step & ~is_constant; uint weights_low = ((5u * 0x249u) - weights_high) & ~is_constant; // Interpolate. - uint2 row = uint2( + xesl_uint2 row = xesl_uint2( ((end.x * (weights_low & 7u) + end.y * (weights_high & 7u)) / 5u) | (((end.x * ((weights_low >> 3u) & 7u) + end.y * ((weights_high >> 3u) & 7u)) / 5u) << 16u), @@ -1157,7 +869,7 @@ uint2 XeDXT56StepRowToA8In16(uint2 end, uint weights_6step) { end.y * ((weights_high >> 9u) & 7u)) / 5u) << 16u)); // Get the constant values as 1 bit per pixel separated by 7 bits. uint constant_weights = weights_6step & is_constant; - uint2 constant_values = uint2( + xesl_uint2 constant_values = xesl_uint2( (constant_weights & 1u) | ((constant_weights & (1u << 3u)) << (16u - 3u)), ((constant_weights >> 6u) & 1u) | ((constant_weights & (1u << 9u)) << (16u - 9u))); @@ -1168,14 +880,14 @@ uint2 XeDXT56StepRowToA8In16(uint2 end, uint weights_6step) { // Get alphas of a DXT5 alpha row. Endpoint alphas are in bits 0:7 and 8:15 of // the first dword, weights can be obtained using XeDXT5HighAlphaWeights and // must be shifted right by 12 * (row index & 1) before calling. -uint XeDXT5RowToA8(uint2 end, uint weights) { +uint XeDXT5RowToA8(xesl_uint2 end, uint weights) { return (end.x <= end.y) ? XeDXT56StepRowToA8(end, weights) : XeDXT58StepRowToA8(end, weights); } // Version of XeDXT5RowToA8 that returns values packed in low 8 bits of 16-bit // parts, for DXN decompression. -uint2 XeDXT5RowToA8In16(uint2 end, uint weights) { +xesl_uint2 XeDXT5RowToA8In16(xesl_uint2 end, uint weights) { return (end.x <= end.y) ? XeDXT56StepRowToA8In16(end, weights) : XeDXT58StepRowToA8In16(end, weights); } @@ -1185,12 +897,14 @@ uint2 XeDXT5RowToA8In16(uint2 end, uint weights) { // they can be multiplied by weights with room for overflow. Weights can be // obtained using XeDXTHighColorWeights and must be shifted right by 8 * row // index before calling. -uint4 XeCTX1TwoBlocksRowToR8G8(uint4 end_8in16, uint2 weights_high) { - uint2 weights_low = ~weights_high; - const uint4 weights_shifts = uint4(0u, 2u, 4u, 6u); - uint4 row_8in16 = ((weights_low.xxxx >> weights_shifts) & 3u) * end_8in16.x + - ((weights_high.xxxx >> weights_shifts) & 3u) * end_8in16.y; - uint4 result; +xesl_uint4 XeCTX1TwoBlocksRowToR8G8(xesl_uint4 end_8in16, + xesl_uint2 weights_high) { + xesl_uint2 weights_low = ~weights_high; + const xesl_uint4 weights_shifts = xesl_uint4(0u, 2u, 4u, 6u); + xesl_uint4 row_8in16 = + ((weights_low.xxxx >> weights_shifts) & 3u) * end_8in16.x + + ((weights_high.xxxx >> weights_shifts) & 3u) * end_8in16.y; + xesl_uint4 result; result.xy = ((row_8in16.xz & 0xFFFFu) / 3u) | (((row_8in16.xz >> 16u) / 3u) << 8u) | (((row_8in16.yw & 0xFFFFu) / 3u) << 16u) | @@ -1204,4 +918,4 @@ uint4 XeCTX1TwoBlocksRowToR8G8(uint4 end_8in16, uint2 weights_high) { return result; } -#endif // XENIA_GPU_D3D12_SHADERS_PIXEL_FORMATS_HLSLI_ +#endif // XENIA_GPU_SHADERS_PIXEL_FORMATS_XESLI_ diff --git a/src/xenia/gpu/shaders/resolve.hlsli b/src/xenia/gpu/shaders/resolve.hlsli index 2b91be03e..bc0b36712 100644 --- a/src/xenia/gpu/shaders/resolve.hlsli +++ b/src/xenia/gpu/shaders/resolve.hlsli @@ -1,8 +1,8 @@ #ifndef XENIA_GPU_D3D12_SHADERS_RESOLVE_HLSLI_ #define XENIA_GPU_D3D12_SHADERS_RESOLVE_HLSLI_ -#include "edram.hlsli" -#include "pixel_formats.hlsli" +#include "edram.xesli" +#include "pixel_formats.xesli" #include "texture_address.hlsli" cbuffer XeResolveConstants : register(b0) { diff --git a/src/xenia/gpu/shaders/resolve_full_16bpp.hlsli b/src/xenia/gpu/shaders/resolve_full_16bpp.hlsli index a1a8a50c2..29aa9dcbc 100644 --- a/src/xenia/gpu/shaders/resolve_full_16bpp.hlsli +++ b/src/xenia/gpu/shaders/resolve_full_16bpp.hlsli @@ -1,5 +1,5 @@ #include "endian.hlsli" -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #include "resolve.hlsli" RWBuffer xe_resolve_dest : register(u0); diff --git a/src/xenia/gpu/shaders/resolve_full_32bpp.hlsli b/src/xenia/gpu/shaders/resolve_full_32bpp.hlsli index a6e010bff..1c96dd64c 100644 --- a/src/xenia/gpu/shaders/resolve_full_32bpp.hlsli +++ b/src/xenia/gpu/shaders/resolve_full_32bpp.hlsli @@ -1,5 +1,5 @@ #include "endian.hlsli" -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #include "resolve.hlsli" RWBuffer xe_resolve_dest : register(u0); diff --git a/src/xenia/gpu/shaders/resolve_full_64bpp.hlsli b/src/xenia/gpu/shaders/resolve_full_64bpp.hlsli index 9299df19e..d66079e17 100644 --- a/src/xenia/gpu/shaders/resolve_full_64bpp.hlsli +++ b/src/xenia/gpu/shaders/resolve_full_64bpp.hlsli @@ -1,5 +1,5 @@ #include "endian.hlsli" -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #include "resolve.hlsli" RWBuffer xe_resolve_dest : register(u0); diff --git a/src/xenia/gpu/shaders/resolve_full_8bpp.hlsli b/src/xenia/gpu/shaders/resolve_full_8bpp.hlsli index d048c902b..ccb3ac63c 100644 --- a/src/xenia/gpu/shaders/resolve_full_8bpp.hlsli +++ b/src/xenia/gpu/shaders/resolve_full_8bpp.hlsli @@ -1,5 +1,5 @@ #include "endian.hlsli" -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #include "resolve.hlsli" RWBuffer xe_resolve_dest : register(u0); diff --git a/src/xenia/gpu/shaders/texture_load_ctx1.cs.hlsl b/src/xenia/gpu/shaders/texture_load_ctx1.cs.hlsl index 56c75c49a..59b934715 100644 --- a/src/xenia/gpu/shaders/texture_load_ctx1.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_ctx1.cs.hlsl @@ -1,4 +1,4 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #include "texture_load.hlsli" Buffer xe_texture_load_source : register(t0); diff --git a/src/xenia/gpu/shaders/texture_load_depth_float.cs.hlsl b/src/xenia/gpu/shaders/texture_load_depth_float.cs.hlsl index 2dcd17d22..41dcddf10 100644 --- a/src/xenia/gpu/shaders/texture_load_depth_float.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_depth_float.cs.hlsl @@ -1,4 +1,4 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_32BPB_TRANSFORM(blocks) \ (XeFloat20e4To32((blocks) >> 8u)) #include "texture_load_32bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_depth_float_scaled.cs.hlsl b/src/xenia/gpu/shaders/texture_load_depth_float_scaled.cs.hlsl index 0dbc12654..608a639b2 100644 --- a/src/xenia/gpu/shaders/texture_load_depth_float_scaled.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_depth_float_scaled.cs.hlsl @@ -1,5 +1,5 @@ #define XE_TEXTURE_LOAD_RESOLUTION_SCALED -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_32BPB_TRANSFORM(blocks) \ (XeFloat20e4To32((blocks) >> 8u)) #include "texture_load_32bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_depth_unorm.cs.hlsl b/src/xenia/gpu/shaders/texture_load_depth_unorm.cs.hlsl index 7272b3c82..34ef357b4 100644 --- a/src/xenia/gpu/shaders/texture_load_depth_unorm.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_depth_unorm.cs.hlsl @@ -1,4 +1,4 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_32BPB_TRANSFORM(blocks) \ (asuint(XeUNorm24To32((blocks) >> 8u))) #include "texture_load_32bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_depth_unorm_scaled.cs.hlsl b/src/xenia/gpu/shaders/texture_load_depth_unorm_scaled.cs.hlsl index 38ef57827..27cdf741e 100644 --- a/src/xenia/gpu/shaders/texture_load_depth_unorm_scaled.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_depth_unorm_scaled.cs.hlsl @@ -1,5 +1,5 @@ #define XE_TEXTURE_LOAD_RESOLUTION_SCALED -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_32BPB_TRANSFORM(blocks) \ (asuint(XeUNorm24To32((blocks) >> 8u))) #include "texture_load_32bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_dxn_rg8.cs.hlsl b/src/xenia/gpu/shaders/texture_load_dxn_rg8.cs.hlsl index 69e193595..460957fd5 100644 --- a/src/xenia/gpu/shaders/texture_load_dxn_rg8.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_dxn_rg8.cs.hlsl @@ -1,4 +1,4 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #include "texture_load.hlsli" Buffer xe_texture_load_source : register(t0); diff --git a/src/xenia/gpu/shaders/texture_load_dxt1_rgba8.cs.hlsl b/src/xenia/gpu/shaders/texture_load_dxt1_rgba8.cs.hlsl index efe6b51cd..530228745 100644 --- a/src/xenia/gpu/shaders/texture_load_dxt1_rgba8.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_dxt1_rgba8.cs.hlsl @@ -1,4 +1,4 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #include "texture_load.hlsli" Buffer xe_texture_load_source : register(t0); diff --git a/src/xenia/gpu/shaders/texture_load_dxt3_rgba8.cs.hlsl b/src/xenia/gpu/shaders/texture_load_dxt3_rgba8.cs.hlsl index e320fb9c5..f20082369 100644 --- a/src/xenia/gpu/shaders/texture_load_dxt3_rgba8.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_dxt3_rgba8.cs.hlsl @@ -1,4 +1,4 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #include "texture_load.hlsli" Buffer xe_texture_load_source : register(t0); diff --git a/src/xenia/gpu/shaders/texture_load_dxt3a.cs.hlsl b/src/xenia/gpu/shaders/texture_load_dxt3a.cs.hlsl index be0e1bb08..165c6da62 100644 --- a/src/xenia/gpu/shaders/texture_load_dxt3a.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_dxt3a.cs.hlsl @@ -1,4 +1,4 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #include "texture_load.hlsli" Buffer xe_texture_load_source : register(t0); diff --git a/src/xenia/gpu/shaders/texture_load_dxt3aas1111.cs.hlsl b/src/xenia/gpu/shaders/texture_load_dxt3aas1111.cs.hlsl index 0ec95ecf7..f1337e717 100644 --- a/src/xenia/gpu/shaders/texture_load_dxt3aas1111.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_dxt3aas1111.cs.hlsl @@ -1,4 +1,4 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #include "texture_load.hlsli" Buffer xe_texture_load_source : register(t0); diff --git a/src/xenia/gpu/shaders/texture_load_dxt5_rgba8.cs.hlsl b/src/xenia/gpu/shaders/texture_load_dxt5_rgba8.cs.hlsl index e886a4811..e51347e1f 100644 --- a/src/xenia/gpu/shaders/texture_load_dxt5_rgba8.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_dxt5_rgba8.cs.hlsl @@ -1,4 +1,4 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #include "texture_load.hlsli" Buffer xe_texture_load_source : register(t0); diff --git a/src/xenia/gpu/shaders/texture_load_dxt5a_r8.cs.hlsl b/src/xenia/gpu/shaders/texture_load_dxt5a_r8.cs.hlsl index a35be6b3f..911696bc3 100644 --- a/src/xenia/gpu/shaders/texture_load_dxt5a_r8.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_dxt5a_r8.cs.hlsl @@ -1,4 +1,4 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #include "texture_load.hlsli" Buffer xe_texture_load_source : register(t0); diff --git a/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16.cs.hlsl index 9f06e12cb..2671aee5b 100644 --- a/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16.cs.hlsl @@ -1,3 +1,3 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_32BPB_TO_64BPB XeR10G11B11UNormToRGBA16 #include "texture_load_32bpb_64bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_scaled.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_scaled.cs.hlsl index ba5e3bcd8..eeff0d5bd 100644 --- a/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_scaled.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_scaled.cs.hlsl @@ -1,4 +1,4 @@ #define XE_TEXTURE_LOAD_RESOLUTION_SCALED -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_32BPB_TO_64BPB XeR10G11B11UNormToRGBA16 #include "texture_load_32bpb_64bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_snorm.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_snorm.cs.hlsl index 4acb7631b..ad9e373b4 100644 --- a/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_snorm.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_snorm.cs.hlsl @@ -1,3 +1,3 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_32BPB_TO_64BPB XeR10G11B11SNormToRGBA16 #include "texture_load_32bpb_64bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_snorm_scaled.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_snorm_scaled.cs.hlsl index 1436bfb8c..448c90955 100644 --- a/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_snorm_scaled.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_snorm_scaled.cs.hlsl @@ -1,4 +1,4 @@ #define XE_TEXTURE_LOAD_RESOLUTION_SCALED -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_32BPB_TO_64BPB XeR10G11B11SNormToRGBA16 #include "texture_load_32bpb_64bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16.cs.hlsl index e4f3628dc..3a0b79fcd 100644 --- a/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16.cs.hlsl @@ -1,3 +1,3 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_32BPB_TO_64BPB XeR11G11B10UNormToRGBA16 #include "texture_load_32bpb_64bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_scaled.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_scaled.cs.hlsl index 104596699..8b98f06b2 100644 --- a/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_scaled.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_scaled.cs.hlsl @@ -1,4 +1,4 @@ #define XE_TEXTURE_LOAD_RESOLUTION_SCALED -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_32BPB_TO_64BPB XeR11G11B10UNormToRGBA16 #include "texture_load_32bpb_64bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_snorm.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_snorm.cs.hlsl index 65ee26ece..315a73991 100644 --- a/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_snorm.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_snorm.cs.hlsl @@ -1,3 +1,3 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_32BPB_TO_64BPB XeR11G11B10SNormToRGBA16 #include "texture_load_32bpb_64bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_snorm_scaled.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_snorm_scaled.cs.hlsl index 0a44707f5..0d0922019 100644 --- a/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_snorm_scaled.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_snorm_scaled.cs.hlsl @@ -1,4 +1,4 @@ #define XE_TEXTURE_LOAD_RESOLUTION_SCALED -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_32BPB_TO_64BPB XeR11G11B10SNormToRGBA16 #include "texture_load_32bpb_64bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_r4g4b4a4_b4g4r4a4.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r4g4b4a4_b4g4r4a4.cs.hlsl index 1416cc291..58ff0e9ef 100644 --- a/src/xenia/gpu/shaders/texture_load_r4g4b4a4_b4g4r4a4.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r4g4b4a4_b4g4r4a4.cs.hlsl @@ -1,3 +1,3 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_16BPB_TRANSFORM XeR4G4B4A4ToB4G4R4A4 #include "texture_load_16bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_r4g4b4a4_b4g4r4a4_scaled.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r4g4b4a4_b4g4r4a4_scaled.cs.hlsl index 2e1058998..e2a5f89f5 100644 --- a/src/xenia/gpu/shaders/texture_load_r4g4b4a4_b4g4r4a4_scaled.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r4g4b4a4_b4g4r4a4_scaled.cs.hlsl @@ -1,4 +1,4 @@ #define XE_TEXTURE_LOAD_RESOLUTION_SCALED -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_16BPB_TRANSFORM XeR4G4B4A4ToB4G4R4A4 #include "texture_load_16bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_r5g5b5a1_b5g5r5a1.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r5g5b5a1_b5g5r5a1.cs.hlsl index 3ab63c5c7..4afd83d63 100644 --- a/src/xenia/gpu/shaders/texture_load_r5g5b5a1_b5g5r5a1.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r5g5b5a1_b5g5r5a1.cs.hlsl @@ -1,3 +1,3 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_16BPB_TRANSFORM XeR5G5B5A1ToB5G5R5A1 #include "texture_load_16bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_r5g5b5a1_b5g5r5a1_scaled.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r5g5b5a1_b5g5r5a1_scaled.cs.hlsl index f991ee80b..c17db7707 100644 --- a/src/xenia/gpu/shaders/texture_load_r5g5b5a1_b5g5r5a1_scaled.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r5g5b5a1_b5g5r5a1_scaled.cs.hlsl @@ -1,4 +1,4 @@ #define XE_TEXTURE_LOAD_RESOLUTION_SCALED -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_16BPB_TRANSFORM XeR5G5B5A1ToB5G5R5A1 #include "texture_load_16bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga.cs.hlsl index 1c2c112cb..075a497c6 100644 --- a/src/xenia/gpu/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga.cs.hlsl @@ -1,3 +1,3 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_16BPB_TRANSFORM XeR5G5B6ToB5G6R5WithRBGASwizzle #include "texture_load_16bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled.cs.hlsl index 67d1186cb..ce9b7b48a 100644 --- a/src/xenia/gpu/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled.cs.hlsl @@ -1,4 +1,4 @@ #define XE_TEXTURE_LOAD_RESOLUTION_SCALED -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_16BPB_TRANSFORM XeR5G5B6ToB5G6R5WithRBGASwizzle #include "texture_load_16bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_r5g6b5_b5g6r5.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r5g6b5_b5g6r5.cs.hlsl index 5530095c0..e198774d0 100644 --- a/src/xenia/gpu/shaders/texture_load_r5g6b5_b5g6r5.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r5g6b5_b5g6r5.cs.hlsl @@ -1,3 +1,3 @@ -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_16BPB_TRANSFORM XeR5G6B5ToB5G6R5 #include "texture_load_16bpb.hlsli" diff --git a/src/xenia/gpu/shaders/texture_load_r5g6b5_b5g6r5_scaled.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r5g6b5_b5g6r5_scaled.cs.hlsl index 6e5d3011c..825f3760b 100644 --- a/src/xenia/gpu/shaders/texture_load_r5g6b5_b5g6r5_scaled.cs.hlsl +++ b/src/xenia/gpu/shaders/texture_load_r5g6b5_b5g6r5_scaled.cs.hlsl @@ -1,4 +1,4 @@ #define XE_TEXTURE_LOAD_RESOLUTION_SCALED -#include "pixel_formats.hlsli" +#include "pixel_formats.xesli" #define XE_TEXTURE_LOAD_16BPB_TRANSFORM XeR5G6B5ToB5G6R5 #include "texture_load_16bpb.hlsli" diff --git a/src/xenia/ui/shaders/xesl.xesli b/src/xenia/ui/shaders/xesl.xesli index f8ef66ea2..8101fa480 100644 --- a/src/xenia/ui/shaders/xesl.xesli +++ b/src/xenia/ui/shaders/xesl.xesli @@ -14,6 +14,7 @@ // arguments. // Required GLSL extensions: +// - GL_EXT_control_flow_attributes // - GL_EXT_samplerless_texture_functions // For functions, it's preferable to take the identifiers here from an existing @@ -85,7 +86,7 @@ // Everything here must be separated with xesl_entry_binding_next, with no // leading or trailing separators. // - Texel buffer, texture, sampler bindings. -// xesl_entry_bindings_end +// xesl_entry_bindings_end or (for CS) xesl_entry_bindings_end_local_size // Everything here must be separated with xesl_entry_signature_next, with no // leading or trailing separators. // - Linked stage inputs. @@ -103,9 +104,14 @@ // Both binding and signature entry names may be placed in the global scope in // the target language, make sure they don't collide with anything there. #if XESL_LANGUAGE_GLSL + #define xesl_entry_bindings_end_local_size(x, y, z) \ + layout(local_size_x=(x), local_size_y=(y), local_size_z=(z)) in; #define xesl_entry_signature_end void main() { #elif XESL_LANGUAGE_HLSL #define xesl_entry_bindings_end void main( + #define xesl_entry_bindings_end_local_size(x, y, z) \ + [numthreads(x, y, z)] \ + xesl_entry_bindings_end #define xesl_entry_signature_next , #define xesl_entry_signature_end ) { #else @@ -196,8 +202,12 @@ #define xesl_push_constant(name) xesl_push_constants.name #endif // XESL_PUSH_CONSTANTS_GLOBAL -// Texture, sampler and image declarations must be in the entry point bindings -// declaration. +// Buffer, texture, sampler and image declarations must be in the entry point +// bindings declaration. +// - xesl_typedStorageBuffer is a buffer limited to 1/2/4-component vectors of +// 32-bit integers and floats, a typed buffer on Direct3D, but a storage +// buffer (as opposed to a texel buffer, which has a very small minimum +// requirement for the maximum size) on Vulkan. // - xesl_texture is a separate texture. // - xesl_samplerState is a separate sampler. // - xesl_sampler is a combined texture / sampler where available, internally @@ -206,8 +216,21 @@ #define XESL_COMBINED_TEXTURE_SAMPLER 1 // Types. #define xesl_texture2D texture2D + #define xesl_texture2DMS texture2DMS #define xesl_sampler2D sampler2D // Binding declarations. + #define xesl_typedStorageBuffer(value_type, name, glsl_set, glsl_binding, \ + hlsl_t, hlsl_t_space) \ + layout(std430, glsl_set, glsl_binding) \ + readonly buffer xesl_id_buffer_##name { \ + value_type data[]; \ + } name; + #define xesl_writeTypedStorageBuffer(value_type, name, glsl_set, \ + glsl_binding, hlsl_u, hlsl_u_space) \ + layout(std430, glsl_set, glsl_binding) \ + writeonly buffer xesl_id_buffer_##name { \ + value_type data[]; \ + } name; #define xesl_texture(texture_type, name, glsl_set, glsl_binding, hlsl_t, \ hlsl_t_space) \ layout(glsl_set, glsl_binding) uniform texture_type name; @@ -217,9 +240,15 @@ #define xesl_sampler(sampler_type, name, glsl_set, glsl_binding, hlsl_t, \ hlsl_t_space, hlsl_s, hlsl_s_space) \ layout(glsl_set, glsl_binding) uniform sampler_type name; - // Fetching. + // Fetching and storing. + #define xesl_typedStorageBufferLoad(name, position) \ + ((name).data[uint(position)]) + #define xesl_writeTypedStorageBufferStore(name, position, value) \ + ((name).data[uint(position)] = (value)) #define xesl_texelFetch2D(texture_name, position, lod) \ texelFetch(texture_name, position, lod) + #define xesl_texelFetch2DMS(texture_name, position, sample_index) \ + texelFetch(texture_name, position, sample_index) #define xesl_textureSampleLod2D_sep(texture_name, sampler_name, position, \ lod) \ textureLod(sampler2D(texture_name, sampler_name), position, lod) @@ -243,17 +272,29 @@ textureGather(texture_sampler_name, position, 3) #elif XESL_LANGUAGE_HLSL // Types. - #define xesl_texture2D Texture2D + #define xesl_texture2D Texture2D + #define xesl_texture2DMS Texture2DMS // Binding declarations. + #define xesl_typedStorageBuffer(value_type, name, glsl_set, glsl_binding, \ + hlsl_t, hlsl_t_space) \ + Buffer name : register(hlsl_t, hlsl_t_space); + #define xesl_writeTypedStorageBuffer(value_type, name, glsl_set, \ + glsl_binding, hlsl_u, hlsl_u_space) \ + RWBuffer name : register(hlsl_u, hlsl_u_space); #define xesl_texture(texture_type, name, glsl_set, glsl_binding, hlsl_t, \ hlsl_t_space) \ texture_type name : register(hlsl_t, hlsl_t_space); #define xesl_samplerState(name, glsl_set, glsl_binding, hlsl_s, \ hlsl_s_space) \ SamplerState name : register(hlsl_s, hlsl_s_space); - // Fetching. + // Fetching and storing. + #define xesl_typedStorageBufferLoad(name, position) ((name)[uint(position)]) + #define xesl_writeTypedStorageBufferStore(name, position, value) \ + ((name)[uint(position)] = (value)) #define xesl_texelFetch2D(texture_name, position, lod) \ ((texture_name).Load(int3(position, lod))) + #define xesl_texelFetch2DMS(texture_name, position, sample_index) \ + ((texture_name).Load(position, sample_index)) #define xesl_textureSampleLod2D_sep(texture_name, sampler_name, position, \ lod) \ ((texture_name).SampleLevel(sampler_name, position, lod)) @@ -266,7 +307,7 @@ #define xesl_textureGatherAlpha2D_sep(texture_name, sampler_name, position) \ ((texture_name).GatherAlpha(sampler_name, position)) #else - #error xesl_texture and xesl_samplerState not defined for the target language. + #error Buffers and textures not defined for the target language. #endif // XESL_LANGUAGE // If there's no language specialization doing this already, implement combined // textures / samplers as separate, with `xesl_id_texture_` and @@ -324,11 +365,19 @@ #define XESL_FRAG_COORD_W_IS_INVERSE 1 #define xesl_VertexID gl_VertexIndex #define xesl_FragCoord gl_FragCoord + #define xesl_WorkGroupID gl_WorkGroupID + #define xesl_LocalInvocationID gl_LocalInvocationID + #define xesl_GlobalInvocationID gl_GlobalInvocationID + #define xesl_LocalInvocationIndex gl_LocalInvocationIndex #define xesl_Position gl_Position #define xesl_input(type, name, index, hlsl_semantic) \ layout(location=index) in type name; #define xesl_input_vertex_id #define xesl_input_frag_coord + #define xesl_input_work_group_id + #define xesl_input_local_invocation_id + #define xesl_input_global_invocation_id + #define xesl_input_local_invocation_index #define xesl_output(type, name, index, hlsl_semantic) \ layout(location=index) out type name; #define xesl_output_position @@ -342,6 +391,14 @@ uint xesl_id_vertex_id : SV_VertexID #define xesl_input_frag_coord \ xesl_float4 xesl_FragCoord : SV_Position + #define xesl_input_work_group_id \ + xesl_uint3 xesl_WorkGroupID : SV_GroupID + #define xesl_input_local_invocation_id \ + xesl_uint3 xesl_LocalInvocationID : SV_GroupThreadID + #define xesl_input_global_invocation_id \ + xesl_uint3 xesl_GlobalInvocationID : SV_DispatchThreadID + #define xesl_input_local_invocation_index \ + uint xesl_LocalInvocationIndex : SV_GroupIndex #define xesl_output(type, name, index, hlsl_semantic) \ out type name : hlsl_semantic #define xesl_output_position \ @@ -361,9 +418,54 @@ #define XESL_Y_SCREEN_DIRECTION -1.0 #endif // XESL_LANGUAGE_GLSL +// Attributes. + +#if XESL_LANGUAGE_GLSL + #define xesl_flatten [[flatten]] + #define xesl_dont_flatten [[dont_flatten]] +#elif XESL_LANGUAGE_HLSL + #define xesl_flatten [flatten] + #define xesl_dont_flatten [branch] +#endif // XESL_LANGUAGE +#ifndef xesl_flatten + #define xesl_flatten +#endif // !xesl_flatten +#ifndef xesl_dont_flatten + #define xesl_dont_flatten +#endif // !xesl_dont_flatten + // Function aliases. -#if XESL_LANGUAGE_HLSL +#if XESL_LANGUAGE_GLSL + #define xesl_lessThan lessThan + #define xesl_lessThanEqual lessThanEqual + #define xesl_greaterThan greaterThan + #define xesl_greaterThanEqual greaterThanEqual + #define xesl_equal equal + #define xesl_notEqual notEqual + #define xesl_not not + #define xesl_select(condition, true_result, false_result) \ + mix(false_result, true_result, condition) +#elif XESL_LANGUAGE_HLSL + #define xesl_lessThan(x, y) ((x) < (y)) + #define xesl_lessThanEqual(x, y) ((x) <= (y)) + #define xesl_greaterThan(x, y) ((x) > (y)) + #define xesl_greaterThanEqual(x, y) ((x) >= (y)) + #define xesl_equal(x, y) ((x) == (y)) + #define xesl_notEqual(x, y) ((x) != (y)) + #define xesl_not(x) (!(x)) + #define xesl_select(condition, true_result, false_result) \ + ((condition) ? (true_result) : (false_result)) +#else + #error Comparison operations not defined for the target language. +#endif + +#if XESL_LANGUAGE_GLSL + #define xesl_floatBitsToInt floatBitsToInt + #define xesl_floatBitsToUint floatBitsToUint + #define xesl_intBitsToFloat intBitsToFloat + #define xesl_uintBitsToFloat uintBitsToFloat +#elif XESL_LANGUAGE_HLSL // Using functions instead of #define for implicit argument conversion. int xesl_floatBitsToInt(float value) { return asint(value); } xesl_int2 xesl_floatBitsToInt(xesl_float2 value) { return asint(value); } @@ -382,11 +484,8 @@ xesl_float3 xesl_uintBitsToFloat(xesl_uint3 value) { return asfloat(value); } xesl_float4 xesl_uintBitsToFloat(xesl_uint4 value) { return asfloat(value); } #else - #define xesl_floatBitsToInt floatBitsToInt - #define xesl_floatBitsToUint floatBitsToUint - #define xesl_intBitsToFloat intBitsToFloat - #define xesl_uintBitsToFloat uintBitsToFloat -#endif // XESL_LANGUAGE_HLSL + #error Float bit casting not defined for the target language. +#endif // XESL_LANGUAGE #if XESL_LANGUAGE_GLSL float xesl_saturate(float value) { @@ -406,6 +505,16 @@ #define xesl_saturate saturate #endif // XESL_LANGUAGE_GLSL +#if XESL_LANGUAGE_GLSL + #define xesl_findLSB findLSB + #define xesl_findMSB findMSB +#elif XESL_LANGUAGE_HLSL + #define xesl_findLSB firstbitlow + #define xesl_findMSB firstbithigh +#else + #error Bit count operations not defined for the target language. +#endif // XESL_LANGUAGE + #if XESL_LANGUAGE_GLSL #define xesl_packHalf2x16 packHalf2x16 #elif XESL_LANGUAGE_HLSL diff --git a/xenia-build b/xenia-build index 50a42416e..d41b9f4b0 100755 --- a/xenia-build +++ b/xenia-build @@ -1020,6 +1020,7 @@ class BuildShadersCommand(Command): # preserves line numbers in error and warning messages. spirv_xesl_wrapper = \ '#version 460\n' + \ + '#extension GL_EXT_control_flow_attributes : require\n' + \ '#extension GL_EXT_samplerless_texture_functions : require\n' + \ '#extension GL_GOOGLE_include_directive : require\n' + \ '#include "%s"\n'