[XeSL] xesl_write*Store > xesl_*Store

This commit is contained in:
Triang3l 2022-06-24 23:37:29 +03:00
parent 7a4732e14f
commit f4a634c617
29 changed files with 89 additions and 95 deletions

View File

@ -46,7 +46,7 @@ xesl_entry_inputs_end_code_begin_compute
kXenosMsaaSamples_1X, false, 0u, 0u,
resolution_scale)
>> 2u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_host_depth_store_dest, edram_address_int4s,
xesl_floatBitsToUint(xesl_float4(
xesl_texelFetch2D(xe_host_depth_store_source, pixel_index, 0).r,
@ -56,7 +56,7 @@ xesl_entry_inputs_end_code_begin_compute
pixel_index + xesl_int2(2, 0), 0).r,
xesl_texelFetch2D(xe_host_depth_store_source,
pixel_index + xesl_int2(3, 0), 0).r)));
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_host_depth_store_dest, edram_address_int4s + 1u,
xesl_floatBitsToUint(xesl_float4(
xesl_texelFetch2D(xe_host_depth_store_source,

View File

@ -54,7 +54,7 @@ xesl_entry_inputs_end_code_begin_compute
XeHostDepthStoreRTMsaa2xSupported(rt_constant)
? (bool(dest_sample_index) ? 0 : 1)
: (bool(dest_sample_index) ? 3 : 0);
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_host_depth_store_dest, edram_address_int4s,
xesl_floatBitsToUint(xesl_float4(
xesl_texelFetch2DMS(xe_host_depth_store_source, pixel_index,
@ -68,7 +68,7 @@ xesl_entry_inputs_end_code_begin_compute
xesl_texelFetch2DMS(xe_host_depth_store_source,
pixel_index + xesl_int2(3, 0),
source_sample_index).r)));
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_host_depth_store_dest, edram_address_int4s + 1u,
xesl_floatBitsToUint(xesl_float4(
xesl_texelFetch2DMS(xe_host_depth_store_source,

View File

@ -51,7 +51,7 @@ xesl_entry_inputs_end_code_begin_compute
// Render target horizontal sample in bit 0, vertical sample in bit 1.
int source_sample_left = int((xesl_GlobalInvocationID.y & 1u) << 1u);
int source_sample_right = source_sample_left + 1;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_host_depth_store_dest, edram_address_int4s,
xesl_floatBitsToUint(xesl_float4(
xesl_texelFetch2DMS(xe_host_depth_store_source, pixel_index,
@ -64,7 +64,7 @@ xesl_entry_inputs_end_code_begin_compute
xesl_texelFetch2DMS(xe_host_depth_store_source,
pixel_index + xesl_int2(1, 0),
source_sample_right).r)));
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_host_depth_store_dest, edram_address_int4s + 1u,
xesl_floatBitsToUint(xesl_float4(
xesl_texelFetch2DMS(xe_host_depth_store_source,

View File

@ -43,8 +43,8 @@ xesl_entry_inputs_end_code_begin_compute
kXenosMsaaSamples_1X, resolve_info.edram_is_depth, 0u, 0u,
resolve_info.resolution_scale)
>> 2u;
xesl_writeTypedStorageBufferStore(xe_resolve_edram, address_int4s,
resolve_info.clear_value.xxxx);
xesl_writeTypedStorageBufferStore(xe_resolve_edram, address_int4s + 1u,
resolve_info.clear_value.xxxx);
xesl_typedStorageBufferStore(xe_resolve_edram, address_int4s,
resolve_info.clear_value.xxxx);
xesl_typedStorageBufferStore(xe_resolve_edram, address_int4s + 1u,
resolve_info.clear_value.xxxx);
xesl_entry_code_end_compute

View File

@ -44,7 +44,7 @@ xesl_entry_inputs_end_code_begin_compute
>> 2u;
uint i;
xesl_unroll for (i = 0u; i < 4u; ++i) {
xesl_writeTypedStorageBufferStore(xe_resolve_edram, address_int4s + i,
resolve_info.clear_value.xyxy);
xesl_typedStorageBufferStore(xe_resolve_edram, address_int4s + i,
resolve_info.clear_value.xyxy);
}
xesl_entry_code_end_compute

View File

@ -56,12 +56,12 @@ xesl_entry_inputs_end_code_begin_compute
XeResolveSwap8PixelsRedBlue32bpp(resolve_info, pixels_0123, pixels_4567);
uint dest_address =
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap32(pixels_0123, resolve_info.dest_endian_128));
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
pixel_index.x, 2u, resolve_info.resolution_scale) >> 4u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap32(pixels_4567, resolve_info.dest_endian_128));
xesl_entry_code_end_compute

View File

@ -74,12 +74,12 @@ xesl_entry_inputs_end_code_begin_compute
XeResolveSwap8PixelsRedBlue32bpp(resolve_info, pixels_0123, pixels_4567);
uint dest_address =
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap32(pixels_0123, resolve_info.dest_endian_128));
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
pixel_index.x, 2u, resolve_info.resolution_scale) >> 4u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap32(pixels_4567, resolve_info.dest_endian_128));
xesl_entry_code_end_compute

View File

@ -56,12 +56,12 @@ xesl_entry_inputs_end_code_begin_compute
XeResolveSwap4PixelsRedBlue64bpp(resolve_info, pixels_01, pixels_23);
uint dest_address =
XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap64(pixels_01, resolve_info.dest_endian_128));
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap64(pixels_23, resolve_info.dest_endian_128));
xesl_entry_code_end_compute

View File

@ -61,12 +61,12 @@ xesl_entry_inputs_end_code_begin_compute
XeResolveSwap4PixelsRedBlue64bpp(resolve_info, pixels_01, pixels_23);
uint dest_address =
XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap64(pixels_01, resolve_info.dest_endian_128));
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap64(pixels_23, resolve_info.dest_endian_128));
xesl_entry_code_end_compute

View File

@ -49,13 +49,13 @@ xesl_entry_inputs_end_code_begin_compute
// Only 32_32_32_32_FLOAT color format is 128bpp.
uint dest_address =
XeResolveDestPixelAddress(resolve_info, pixel_index, 4u) >> 4u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap128(xesl_floatBitsToUint(pixel_0),
resolve_info.dest_endian_128));
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
pixel_index.x, 4u, resolve_info.resolution_scale) >> 4u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap128(xesl_floatBitsToUint(pixel_1),
resolve_info.dest_endian_128));

View File

@ -47,7 +47,7 @@ xesl_entry_inputs_end_code_begin_compute
if (resolve_info.duplicate_second_host_pixel.x && pixel_index.x == 0u) {
pixel_0 = pixel_1;
}
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_resolve_dest,
XeResolveDestPixelAddress(resolve_info, pixel_index, 1u) >> 3u,
XeEndianSwap16(XePack16bpp4PixelsInUInt2(pixel_0, pixel_1, pixel_2,

View File

@ -49,7 +49,7 @@ xesl_entry_inputs_end_code_begin_compute
if (resolve_info.duplicate_second_host_pixel.x && pixel_index.x == 0u) {
pixel_0 = pixel_1;
}
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_resolve_dest,
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u,
XeEndianSwap32(XePack32bpp4Pixels(pixel_0, pixel_1, pixel_2, pixel_3,

View File

@ -52,12 +52,12 @@ xesl_entry_inputs_end_code_begin_compute
}
uint dest_address =
XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap64(packed_01, resolve_info.dest_endian_128));
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap64(packed_23, resolve_info.dest_endian_128));
xesl_entry_code_end_compute

View File

@ -49,7 +49,7 @@ xesl_entry_inputs_end_code_begin_compute
}
// Convert to R8.
// TODO(Triang3l): Investigate formats 8_A and 8_B.
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_resolve_dest,
XeResolveDestPixelAddress(resolve_info, pixel_index, 0u) >> 3u,
xesl_uint2(XePackR8G8B8A8UNorm(pixels_0123),

View File

@ -37,7 +37,7 @@ xesl_entry_inputs_end_code_begin_compute
load_info.host_offset) >> 4u);
uint block_offset_guest =
XeTextureLoadGuestBlockOffset(load_info, block_index, 16u, 4u) >> 4u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XeEndianSwap32(xesl_typedStorageBufferLoad(xe_texture_load_source,
block_offset_guest),
@ -46,7 +46,7 @@ xesl_entry_inputs_end_code_begin_compute
block_offset_guest +=
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 4u) >>
4u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XeEndianSwap32(xesl_typedStorageBufferLoad(xe_texture_load_source,
block_offset_guest),

View File

@ -41,9 +41,8 @@ xesl_entry_inputs_end_code_begin_compute
xesl_uint4 guest_blocks = XeEndianSwap16(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32);
xesl_writeTypedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
++block_offset_host;
block_offset_guest +=
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 1u) >>
@ -51,7 +50,6 @@ xesl_entry_inputs_end_code_begin_compute
guest_blocks = XeEndianSwap16(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32);
xesl_writeTypedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
xesl_entry_code_end_compute

View File

@ -41,9 +41,8 @@ xesl_entry_inputs_end_code_begin_compute
xesl_uint4 guest_blocks = XeEndianSwap32(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32);
xesl_writeTypedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
++block_offset_host;
block_offset_guest +=
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 2u) >>
@ -51,7 +50,6 @@ xesl_entry_inputs_end_code_begin_compute
guest_blocks = XeEndianSwap32(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32);
xesl_writeTypedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
xesl_entry_code_end_compute

View File

@ -44,10 +44,10 @@ xesl_entry_inputs_end_code_begin_compute
load_info.endian_32);
xesl_uint4 block_0, block_1;
XE_TEXTURE_LOAD_32BPB_TO_64BPB(guest_blocks, block_0, block_1);
xesl_writeTypedStorageBufferStore(xe_texture_load_dest, block_offset_host,
block_0);
xesl_writeTypedStorageBufferStore(xe_texture_load_dest,
block_offset_host + 1u, block_1);
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
block_0);
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host + 1u,
block_1);
block_offset_guest +=
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 2u) >>
4u;
@ -55,8 +55,8 @@ xesl_entry_inputs_end_code_begin_compute
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32);
XE_TEXTURE_LOAD_32BPB_TO_64BPB(guest_blocks, block_0, block_1);
xesl_writeTypedStorageBufferStore(xe_texture_load_dest,
block_offset_host + 2u, block_0);
xesl_writeTypedStorageBufferStore(xe_texture_load_dest,
block_offset_host + 3u, block_1);
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host + 2u,
block_0);
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host + 3u,
block_1);
xesl_entry_code_end_compute

View File

@ -41,9 +41,8 @@ xesl_entry_inputs_end_code_begin_compute
xesl_uint4 guest_blocks = XeEndianSwap32(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32);
xesl_writeTypedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
++block_offset_host;
block_offset_guest +=
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 3u) >>
@ -51,7 +50,6 @@ xesl_entry_inputs_end_code_begin_compute
guest_blocks = XeEndianSwap32(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32);
xesl_writeTypedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
xesl_entry_code_end_compute

View File

@ -37,7 +37,7 @@ xesl_entry_inputs_end_code_begin_compute
load_info.host_offset) >> 4u);
uint block_offset_guest =
XeTextureLoadGuestBlockOffset(load_info, block_index, 1u, 0u) >> 3u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
xesl_uint4(
xesl_typedStorageBufferLoad(xe_texture_load_source,

View File

@ -72,20 +72,20 @@ xesl_entry_inputs_end_code_begin_compute
end_8in16.xz = ((blocks.xz >> 8u) & 0xFFu) | ((blocks.xz & 0xFFu) << 16u);
end_8in16.yw = (blocks.xz >> 24u) | (blocks.xz & 0xFF0000u);
xesl_uint2 weights_high = XeDXTHighColorWeights(blocks.yw);
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high));
xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + elements_pitch_host,
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 8u));
xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host,
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 16u));
xesl_dont_flatten
if (texel_index_host.y + 3u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest,
block_offset_host + 3u * elements_pitch_host,
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 24u));

View File

@ -57,7 +57,7 @@ xesl_entry_inputs_end_code_begin_compute
XeDXT5HighAlphaWeights(end_0.zw, weights.y),
XeDXT5HighAlphaWeights(end_1.xy, weights.z),
XeDXT5HighAlphaWeights(end_1.zw, weights.w));
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
@ -66,7 +66,7 @@ xesl_entry_inputs_end_code_begin_compute
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host;
weights >>= 12u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
@ -79,7 +79,7 @@ xesl_entry_inputs_end_code_begin_compute
XeDXT5HighAlphaWeights(end_0.zw, weights.y),
XeDXT5HighAlphaWeights(end_1.xy, weights.z),
XeDXT5HighAlphaWeights(end_1.zw, weights.w));
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
@ -88,7 +88,7 @@ xesl_entry_inputs_end_code_begin_compute
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host;
weights >>= 12u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),

View File

@ -70,22 +70,22 @@ xesl_entry_inputs_end_code_begin_compute
block_offset_host += elements_pitch_host;
weights >>= 8u;
}
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
is_trans.x ? XeDXT1TransRowToRGBA8(end_8in10_01.xy, weights.x)
: (XeDXTOpaqueRowToRGB8(end_8in10_01.xy, weights.x) |
0xFF000000u));
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 1u,
is_trans.y ? XeDXT1TransRowToRGBA8(end_8in10_01.zw, weights.y)
: (XeDXTOpaqueRowToRGB8(end_8in10_01.zw, weights.y) |
0xFF000000u));
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 2u,
is_trans.z ? XeDXT1TransRowToRGBA8(end_8in10_23.xy, weights.z)
: (XeDXTOpaqueRowToRGB8(end_8in10_23.xy, weights.z) |
0xFF000000u));
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 3u,
is_trans.w ? XeDXT1TransRowToRGBA8(end_8in10_23.zw, weights.w)
: (XeDXTOpaqueRowToRGB8(end_8in10_23.zw, weights.w) |

View File

@ -55,25 +55,25 @@ xesl_entry_inputs_end_code_begin_compute
// Sort the color indices so they can be used as weights for the second
// endpoint.
uint bgr_weights = XeDXTHighColorWeights(block.w);
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights) +
((block.xxxx >> xesl_uint4(0u, 4u, 8u, 12u)) & 0xFu) * 0x11000000u);
xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + elements_pitch_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) +
((block.xxxx >> xesl_uint4(16u, 20u, 24u, 28u)) & 0xFu) *
0x11000000u);
xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 16u) +
((block.yyyy >> xesl_uint4(0u, 4u, 8u, 12u)) & 0xFu) *
0x11000000u);
xesl_dont_flatten
if (texel_index_host.y + 3u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest,
block_offset_host + 3u * elements_pitch_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 24u) +

View File

@ -51,22 +51,22 @@ xesl_entry_inputs_end_code_begin_compute
xesl_uint4 blocks_23 = XeEndianSwap32(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32);
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.xz, blocks_23.xz)));
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.xz, blocks_23.xz) >> 16u));
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.yw, blocks_23.yw)));
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XeDXT3FourBlocksRowToA8(
xesl_uint4(blocks_01.yw, blocks_23.yw) >> 16u));

View File

@ -51,36 +51,36 @@ xesl_entry_inputs_end_code_begin_compute
xesl_uint4 blocks_23 = XeEndianSwap32(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32);
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_01.xz));
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 1u,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_23.xz));
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host;
xesl_uint4 high_halfblocks = xesl_uint4(blocks_01.xz, blocks_23.xz) >> 16u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.xy));
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 1u,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.zw));
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_01.yw));
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 1u,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_23.yw));
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host;
high_halfblocks = xesl_uint4(blocks_01.yw, blocks_23.yw) >> 16u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.xy));
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 1u,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.zw));
}

View File

@ -58,14 +58,14 @@ xesl_entry_inputs_end_code_begin_compute
xesl_uint2 alpha_end = (block.xx >> xesl_uint2(0u, 8u)) & 0xFFu;
uint alpha_weights = XeDXT5HighAlphaWeights(
alpha_end, (block.x >> 16u) | ((block.y & 0xFFu) << 16u));
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights) |
((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights)) <<
xesl_uint4(24u, 16u, 8u, 0u))
& 0xFF000000u));
xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + elements_pitch_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) |
((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights >> 12u)) <<
@ -73,7 +73,7 @@ xesl_entry_inputs_end_code_begin_compute
& 0xFF000000u));
xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) {
alpha_weights = XeDXT5HighAlphaWeights(alpha_end, block.y >> 8u);
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 16u) |
((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights)) <<
@ -81,7 +81,7 @@ xesl_entry_inputs_end_code_begin_compute
& 0xFF000000u));
xesl_dont_flatten
if (texel_index_host.y + 3u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest,
block_offset_host + 3u * elements_pitch_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 24u) |

View File

@ -58,7 +58,7 @@ xesl_entry_inputs_end_code_begin_compute
XeDXT5HighAlphaWeights(end_01.zw, weights.y),
XeDXT5HighAlphaWeights(end_23.xy, weights.z),
XeDXT5HighAlphaWeights(end_23.zw, weights.w));
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
XeDXT5RowToA8(end_01.zw, weights.y),
@ -67,7 +67,7 @@ xesl_entry_inputs_end_code_begin_compute
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host;
weights >>= 12u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
XeDXT5RowToA8(end_01.zw, weights.y),
@ -80,7 +80,7 @@ xesl_entry_inputs_end_code_begin_compute
XeDXT5HighAlphaWeights(end_01.zw, weights.y),
XeDXT5HighAlphaWeights(end_23.xy, weights.z),
XeDXT5HighAlphaWeights(end_23.zw, weights.w));
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
XeDXT5RowToA8(end_01.zw, weights.y),
@ -89,7 +89,7 @@ xesl_entry_inputs_end_code_begin_compute
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host;
weights >>= 12u;
xesl_writeTypedStorageBufferStore(
xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
XeDXT5RowToA8(end_01.zw, weights.y),

View File

@ -558,7 +558,7 @@ xesl_float4 xesl_float_x4(float xesl_var_value) {
// Loading and storing.
#define xesl_typedStorageBufferLoad(name, position) \
((name).xesl_id_data[uint(position)])
#define xesl_writeTypedStorageBufferStore(name, position, value) \
#define xesl_typedStorageBufferStore(name, position, value) \
((name).xesl_id_data[uint(position)] = (value))
#define xesl_uintVectorBufferLoad1(name, position) \
((name).xesl_id_data[uint(position)])
@ -582,7 +582,7 @@ xesl_float4 xesl_float_x4(float xesl_var_value) {
ByteAddressBuffer name : register(hlsl_t, hlsl_t_space);
// Loading and storing.
#define xesl_typedStorageBufferLoad(name, position) ((name)[uint(position)])
#define xesl_writeTypedStorageBufferStore(name, position, value) \
#define xesl_typedStorageBufferStore(name, position, value) \
((name)[uint(position)] = (value))
#define xesl_uintVectorBufferLoad1(name, position) \
((name).Load(int(position) << 2))
@ -602,7 +602,7 @@ xesl_float4 xesl_float_x4(float xesl_var_value) {
const device uint* name [[msl_buffer]]
// Loading and storing.
#define xesl_typedStorageBufferLoad(name, position) ((name)[size_t(position)])
#define xesl_writeTypedStorageBufferStore(name, position, value) \
#define xesl_typedStorageBufferStore(name, position, value) \
((name)[size_t(position)] = (value))
#define xesl_uintVectorBufferLoad1(name, position) ((name)[size_t(position)])
#define xesl_uintVectorBufferLoad2(name, position) \