[XeSL] xesl_write*Store > xesl_*Store

This commit is contained in:
Triang3l 2022-06-24 23:37:29 +03:00
parent 7a4732e14f
commit f4a634c617
29 changed files with 89 additions and 95 deletions

View File

@ -46,7 +46,7 @@ xesl_entry_inputs_end_code_begin_compute
kXenosMsaaSamples_1X, false, 0u, 0u, kXenosMsaaSamples_1X, false, 0u, 0u,
resolution_scale) resolution_scale)
>> 2u; >> 2u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_host_depth_store_dest, edram_address_int4s, xe_host_depth_store_dest, edram_address_int4s,
xesl_floatBitsToUint(xesl_float4( xesl_floatBitsToUint(xesl_float4(
xesl_texelFetch2D(xe_host_depth_store_source, pixel_index, 0).r, xesl_texelFetch2D(xe_host_depth_store_source, pixel_index, 0).r,
@ -56,7 +56,7 @@ xesl_entry_inputs_end_code_begin_compute
pixel_index + xesl_int2(2, 0), 0).r, pixel_index + xesl_int2(2, 0), 0).r,
xesl_texelFetch2D(xe_host_depth_store_source, xesl_texelFetch2D(xe_host_depth_store_source,
pixel_index + xesl_int2(3, 0), 0).r))); pixel_index + xesl_int2(3, 0), 0).r)));
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_host_depth_store_dest, edram_address_int4s + 1u, xe_host_depth_store_dest, edram_address_int4s + 1u,
xesl_floatBitsToUint(xesl_float4( xesl_floatBitsToUint(xesl_float4(
xesl_texelFetch2D(xe_host_depth_store_source, xesl_texelFetch2D(xe_host_depth_store_source,

View File

@ -54,7 +54,7 @@ xesl_entry_inputs_end_code_begin_compute
XeHostDepthStoreRTMsaa2xSupported(rt_constant) XeHostDepthStoreRTMsaa2xSupported(rt_constant)
? (bool(dest_sample_index) ? 0 : 1) ? (bool(dest_sample_index) ? 0 : 1)
: (bool(dest_sample_index) ? 3 : 0); : (bool(dest_sample_index) ? 3 : 0);
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_host_depth_store_dest, edram_address_int4s, xe_host_depth_store_dest, edram_address_int4s,
xesl_floatBitsToUint(xesl_float4( xesl_floatBitsToUint(xesl_float4(
xesl_texelFetch2DMS(xe_host_depth_store_source, pixel_index, xesl_texelFetch2DMS(xe_host_depth_store_source, pixel_index,
@ -68,7 +68,7 @@ xesl_entry_inputs_end_code_begin_compute
xesl_texelFetch2DMS(xe_host_depth_store_source, xesl_texelFetch2DMS(xe_host_depth_store_source,
pixel_index + xesl_int2(3, 0), pixel_index + xesl_int2(3, 0),
source_sample_index).r))); source_sample_index).r)));
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_host_depth_store_dest, edram_address_int4s + 1u, xe_host_depth_store_dest, edram_address_int4s + 1u,
xesl_floatBitsToUint(xesl_float4( xesl_floatBitsToUint(xesl_float4(
xesl_texelFetch2DMS(xe_host_depth_store_source, xesl_texelFetch2DMS(xe_host_depth_store_source,

View File

@ -51,7 +51,7 @@ xesl_entry_inputs_end_code_begin_compute
// Render target horizontal sample in bit 0, vertical sample in bit 1. // Render target horizontal sample in bit 0, vertical sample in bit 1.
int source_sample_left = int((xesl_GlobalInvocationID.y & 1u) << 1u); int source_sample_left = int((xesl_GlobalInvocationID.y & 1u) << 1u);
int source_sample_right = source_sample_left + 1; int source_sample_right = source_sample_left + 1;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_host_depth_store_dest, edram_address_int4s, xe_host_depth_store_dest, edram_address_int4s,
xesl_floatBitsToUint(xesl_float4( xesl_floatBitsToUint(xesl_float4(
xesl_texelFetch2DMS(xe_host_depth_store_source, pixel_index, xesl_texelFetch2DMS(xe_host_depth_store_source, pixel_index,
@ -64,7 +64,7 @@ xesl_entry_inputs_end_code_begin_compute
xesl_texelFetch2DMS(xe_host_depth_store_source, xesl_texelFetch2DMS(xe_host_depth_store_source,
pixel_index + xesl_int2(1, 0), pixel_index + xesl_int2(1, 0),
source_sample_right).r))); source_sample_right).r)));
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_host_depth_store_dest, edram_address_int4s + 1u, xe_host_depth_store_dest, edram_address_int4s + 1u,
xesl_floatBitsToUint(xesl_float4( xesl_floatBitsToUint(xesl_float4(
xesl_texelFetch2DMS(xe_host_depth_store_source, xesl_texelFetch2DMS(xe_host_depth_store_source,

View File

@ -43,8 +43,8 @@ xesl_entry_inputs_end_code_begin_compute
kXenosMsaaSamples_1X, resolve_info.edram_is_depth, 0u, 0u, kXenosMsaaSamples_1X, resolve_info.edram_is_depth, 0u, 0u,
resolve_info.resolution_scale) resolve_info.resolution_scale)
>> 2u; >> 2u;
xesl_writeTypedStorageBufferStore(xe_resolve_edram, address_int4s, xesl_typedStorageBufferStore(xe_resolve_edram, address_int4s,
resolve_info.clear_value.xxxx); resolve_info.clear_value.xxxx);
xesl_writeTypedStorageBufferStore(xe_resolve_edram, address_int4s + 1u, xesl_typedStorageBufferStore(xe_resolve_edram, address_int4s + 1u,
resolve_info.clear_value.xxxx); resolve_info.clear_value.xxxx);
xesl_entry_code_end_compute xesl_entry_code_end_compute

View File

@ -44,7 +44,7 @@ xesl_entry_inputs_end_code_begin_compute
>> 2u; >> 2u;
uint i; uint i;
xesl_unroll for (i = 0u; i < 4u; ++i) { xesl_unroll for (i = 0u; i < 4u; ++i) {
xesl_writeTypedStorageBufferStore(xe_resolve_edram, address_int4s + i, xesl_typedStorageBufferStore(xe_resolve_edram, address_int4s + i,
resolve_info.clear_value.xyxy); resolve_info.clear_value.xyxy);
} }
xesl_entry_code_end_compute xesl_entry_code_end_compute

View File

@ -56,12 +56,12 @@ xesl_entry_inputs_end_code_begin_compute
XeResolveSwap8PixelsRedBlue32bpp(resolve_info, pixels_0123, pixels_4567); XeResolveSwap8PixelsRedBlue32bpp(resolve_info, pixels_0123, pixels_4567);
uint dest_address = uint dest_address =
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u; XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address, xe_resolve_dest, dest_address,
XeEndianSwap32(pixels_0123, resolve_info.dest_endian_128)); XeEndianSwap32(pixels_0123, resolve_info.dest_endian_128));
dest_address += XeResolveDestRightConsecutiveBlocksOffset( dest_address += XeResolveDestRightConsecutiveBlocksOffset(
pixel_index.x, 2u, resolve_info.resolution_scale) >> 4u; pixel_index.x, 2u, resolve_info.resolution_scale) >> 4u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address, xe_resolve_dest, dest_address,
XeEndianSwap32(pixels_4567, resolve_info.dest_endian_128)); XeEndianSwap32(pixels_4567, resolve_info.dest_endian_128));
xesl_entry_code_end_compute xesl_entry_code_end_compute

View File

@ -74,12 +74,12 @@ xesl_entry_inputs_end_code_begin_compute
XeResolveSwap8PixelsRedBlue32bpp(resolve_info, pixels_0123, pixels_4567); XeResolveSwap8PixelsRedBlue32bpp(resolve_info, pixels_0123, pixels_4567);
uint dest_address = uint dest_address =
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u; XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address, xe_resolve_dest, dest_address,
XeEndianSwap32(pixels_0123, resolve_info.dest_endian_128)); XeEndianSwap32(pixels_0123, resolve_info.dest_endian_128));
dest_address += XeResolveDestRightConsecutiveBlocksOffset( dest_address += XeResolveDestRightConsecutiveBlocksOffset(
pixel_index.x, 2u, resolve_info.resolution_scale) >> 4u; pixel_index.x, 2u, resolve_info.resolution_scale) >> 4u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address, xe_resolve_dest, dest_address,
XeEndianSwap32(pixels_4567, resolve_info.dest_endian_128)); XeEndianSwap32(pixels_4567, resolve_info.dest_endian_128));
xesl_entry_code_end_compute xesl_entry_code_end_compute

View File

@ -56,12 +56,12 @@ xesl_entry_inputs_end_code_begin_compute
XeResolveSwap4PixelsRedBlue64bpp(resolve_info, pixels_01, pixels_23); XeResolveSwap4PixelsRedBlue64bpp(resolve_info, pixels_01, pixels_23);
uint dest_address = uint dest_address =
XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u; XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address, xe_resolve_dest, dest_address,
XeEndianSwap64(pixels_01, resolve_info.dest_endian_128)); XeEndianSwap64(pixels_01, resolve_info.dest_endian_128));
dest_address += XeResolveDestRightConsecutiveBlocksOffset( dest_address += XeResolveDestRightConsecutiveBlocksOffset(
pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u; pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address, xe_resolve_dest, dest_address,
XeEndianSwap64(pixels_23, resolve_info.dest_endian_128)); XeEndianSwap64(pixels_23, resolve_info.dest_endian_128));
xesl_entry_code_end_compute xesl_entry_code_end_compute

View File

@ -61,12 +61,12 @@ xesl_entry_inputs_end_code_begin_compute
XeResolveSwap4PixelsRedBlue64bpp(resolve_info, pixels_01, pixels_23); XeResolveSwap4PixelsRedBlue64bpp(resolve_info, pixels_01, pixels_23);
uint dest_address = uint dest_address =
XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u; XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address, xe_resolve_dest, dest_address,
XeEndianSwap64(pixels_01, resolve_info.dest_endian_128)); XeEndianSwap64(pixels_01, resolve_info.dest_endian_128));
dest_address += XeResolveDestRightConsecutiveBlocksOffset( dest_address += XeResolveDestRightConsecutiveBlocksOffset(
pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u; pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address, xe_resolve_dest, dest_address,
XeEndianSwap64(pixels_23, resolve_info.dest_endian_128)); XeEndianSwap64(pixels_23, resolve_info.dest_endian_128));
xesl_entry_code_end_compute xesl_entry_code_end_compute

View File

@ -49,13 +49,13 @@ xesl_entry_inputs_end_code_begin_compute
// Only 32_32_32_32_FLOAT color format is 128bpp. // Only 32_32_32_32_FLOAT color format is 128bpp.
uint dest_address = uint dest_address =
XeResolveDestPixelAddress(resolve_info, pixel_index, 4u) >> 4u; XeResolveDestPixelAddress(resolve_info, pixel_index, 4u) >> 4u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address, xe_resolve_dest, dest_address,
XeEndianSwap128(xesl_floatBitsToUint(pixel_0), XeEndianSwap128(xesl_floatBitsToUint(pixel_0),
resolve_info.dest_endian_128)); resolve_info.dest_endian_128));
dest_address += XeResolveDestRightConsecutiveBlocksOffset( dest_address += XeResolveDestRightConsecutiveBlocksOffset(
pixel_index.x, 4u, resolve_info.resolution_scale) >> 4u; pixel_index.x, 4u, resolve_info.resolution_scale) >> 4u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address, xe_resolve_dest, dest_address,
XeEndianSwap128(xesl_floatBitsToUint(pixel_1), XeEndianSwap128(xesl_floatBitsToUint(pixel_1),
resolve_info.dest_endian_128)); resolve_info.dest_endian_128));

View File

@ -47,7 +47,7 @@ xesl_entry_inputs_end_code_begin_compute
if (resolve_info.duplicate_second_host_pixel.x && pixel_index.x == 0u) { if (resolve_info.duplicate_second_host_pixel.x && pixel_index.x == 0u) {
pixel_0 = pixel_1; pixel_0 = pixel_1;
} }
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_resolve_dest, xe_resolve_dest,
XeResolveDestPixelAddress(resolve_info, pixel_index, 1u) >> 3u, XeResolveDestPixelAddress(resolve_info, pixel_index, 1u) >> 3u,
XeEndianSwap16(XePack16bpp4PixelsInUInt2(pixel_0, pixel_1, pixel_2, XeEndianSwap16(XePack16bpp4PixelsInUInt2(pixel_0, pixel_1, pixel_2,

View File

@ -49,7 +49,7 @@ xesl_entry_inputs_end_code_begin_compute
if (resolve_info.duplicate_second_host_pixel.x && pixel_index.x == 0u) { if (resolve_info.duplicate_second_host_pixel.x && pixel_index.x == 0u) {
pixel_0 = pixel_1; pixel_0 = pixel_1;
} }
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_resolve_dest, xe_resolve_dest,
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u, XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u,
XeEndianSwap32(XePack32bpp4Pixels(pixel_0, pixel_1, pixel_2, pixel_3, XeEndianSwap32(XePack32bpp4Pixels(pixel_0, pixel_1, pixel_2, pixel_3,

View File

@ -52,12 +52,12 @@ xesl_entry_inputs_end_code_begin_compute
} }
uint dest_address = uint dest_address =
XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u; XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address, xe_resolve_dest, dest_address,
XeEndianSwap64(packed_01, resolve_info.dest_endian_128)); XeEndianSwap64(packed_01, resolve_info.dest_endian_128));
dest_address += XeResolveDestRightConsecutiveBlocksOffset( dest_address += XeResolveDestRightConsecutiveBlocksOffset(
pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u; pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_resolve_dest, dest_address, xe_resolve_dest, dest_address,
XeEndianSwap64(packed_23, resolve_info.dest_endian_128)); XeEndianSwap64(packed_23, resolve_info.dest_endian_128));
xesl_entry_code_end_compute xesl_entry_code_end_compute

View File

@ -49,7 +49,7 @@ xesl_entry_inputs_end_code_begin_compute
} }
// Convert to R8. // Convert to R8.
// TODO(Triang3l): Investigate formats 8_A and 8_B. // TODO(Triang3l): Investigate formats 8_A and 8_B.
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_resolve_dest, xe_resolve_dest,
XeResolveDestPixelAddress(resolve_info, pixel_index, 0u) >> 3u, XeResolveDestPixelAddress(resolve_info, pixel_index, 0u) >> 3u,
xesl_uint2(XePackR8G8B8A8UNorm(pixels_0123), xesl_uint2(XePackR8G8B8A8UNorm(pixels_0123),

View File

@ -37,7 +37,7 @@ xesl_entry_inputs_end_code_begin_compute
load_info.host_offset) >> 4u); load_info.host_offset) >> 4u);
uint block_offset_guest = uint block_offset_guest =
XeTextureLoadGuestBlockOffset(load_info, block_index, 16u, 4u) >> 4u; XeTextureLoadGuestBlockOffset(load_info, block_index, 16u, 4u) >> 4u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
XeEndianSwap32(xesl_typedStorageBufferLoad(xe_texture_load_source, XeEndianSwap32(xesl_typedStorageBufferLoad(xe_texture_load_source,
block_offset_guest), block_offset_guest),
@ -46,7 +46,7 @@ xesl_entry_inputs_end_code_begin_compute
block_offset_guest += block_offset_guest +=
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 4u) >> XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 4u) >>
4u; 4u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
XeEndianSwap32(xesl_typedStorageBufferLoad(xe_texture_load_source, XeEndianSwap32(xesl_typedStorageBufferLoad(xe_texture_load_source,
block_offset_guest), block_offset_guest),

View File

@ -41,9 +41,8 @@ xesl_entry_inputs_end_code_begin_compute
xesl_uint4 guest_blocks = XeEndianSwap16( xesl_uint4 guest_blocks = XeEndianSwap16(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest), xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32); load_info.endian_32);
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
xe_texture_load_dest, block_offset_host, XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
++block_offset_host; ++block_offset_host;
block_offset_guest += block_offset_guest +=
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 1u) >> XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 1u) >>
@ -51,7 +50,6 @@ xesl_entry_inputs_end_code_begin_compute
guest_blocks = XeEndianSwap16( guest_blocks = XeEndianSwap16(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest), xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32); load_info.endian_32);
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
xe_texture_load_dest, block_offset_host, XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
xesl_entry_code_end_compute xesl_entry_code_end_compute

View File

@ -41,9 +41,8 @@ xesl_entry_inputs_end_code_begin_compute
xesl_uint4 guest_blocks = XeEndianSwap32( xesl_uint4 guest_blocks = XeEndianSwap32(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest), xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32); load_info.endian_32);
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
xe_texture_load_dest, block_offset_host, XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
++block_offset_host; ++block_offset_host;
block_offset_guest += block_offset_guest +=
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 2u) >> XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 2u) >>
@ -51,7 +50,6 @@ xesl_entry_inputs_end_code_begin_compute
guest_blocks = XeEndianSwap32( guest_blocks = XeEndianSwap32(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest), xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32); load_info.endian_32);
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
xe_texture_load_dest, block_offset_host, XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
xesl_entry_code_end_compute xesl_entry_code_end_compute

View File

@ -44,10 +44,10 @@ xesl_entry_inputs_end_code_begin_compute
load_info.endian_32); load_info.endian_32);
xesl_uint4 block_0, block_1; xesl_uint4 block_0, block_1;
XE_TEXTURE_LOAD_32BPB_TO_64BPB(guest_blocks, block_0, block_1); XE_TEXTURE_LOAD_32BPB_TO_64BPB(guest_blocks, block_0, block_1);
xesl_writeTypedStorageBufferStore(xe_texture_load_dest, block_offset_host, xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
block_0); block_0);
xesl_writeTypedStorageBufferStore(xe_texture_load_dest, xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host + 1u,
block_offset_host + 1u, block_1); block_1);
block_offset_guest += block_offset_guest +=
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 2u) >> XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 2u) >>
4u; 4u;
@ -55,8 +55,8 @@ xesl_entry_inputs_end_code_begin_compute
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest), xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32); load_info.endian_32);
XE_TEXTURE_LOAD_32BPB_TO_64BPB(guest_blocks, block_0, block_1); XE_TEXTURE_LOAD_32BPB_TO_64BPB(guest_blocks, block_0, block_1);
xesl_writeTypedStorageBufferStore(xe_texture_load_dest, xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host + 2u,
block_offset_host + 2u, block_0); block_0);
xesl_writeTypedStorageBufferStore(xe_texture_load_dest, xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host + 3u,
block_offset_host + 3u, block_1); block_1);
xesl_entry_code_end_compute xesl_entry_code_end_compute

View File

@ -41,9 +41,8 @@ xesl_entry_inputs_end_code_begin_compute
xesl_uint4 guest_blocks = XeEndianSwap32( xesl_uint4 guest_blocks = XeEndianSwap32(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest), xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32); load_info.endian_32);
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
xe_texture_load_dest, block_offset_host, XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
++block_offset_host; ++block_offset_host;
block_offset_guest += block_offset_guest +=
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 3u) >> XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 3u) >>
@ -51,7 +50,6 @@ xesl_entry_inputs_end_code_begin_compute
guest_blocks = XeEndianSwap32( guest_blocks = XeEndianSwap32(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest), xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32); load_info.endian_32);
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
xe_texture_load_dest, block_offset_host, XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
xesl_entry_code_end_compute xesl_entry_code_end_compute

View File

@ -37,7 +37,7 @@ xesl_entry_inputs_end_code_begin_compute
load_info.host_offset) >> 4u); load_info.host_offset) >> 4u);
uint block_offset_guest = uint block_offset_guest =
XeTextureLoadGuestBlockOffset(load_info, block_index, 1u, 0u) >> 3u; XeTextureLoadGuestBlockOffset(load_info, block_index, 1u, 0u) >> 3u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
xesl_uint4( xesl_uint4(
xesl_typedStorageBufferLoad(xe_texture_load_source, xesl_typedStorageBufferLoad(xe_texture_load_source,

View File

@ -72,20 +72,20 @@ xesl_entry_inputs_end_code_begin_compute
end_8in16.xz = ((blocks.xz >> 8u) & 0xFFu) | ((blocks.xz & 0xFFu) << 16u); end_8in16.xz = ((blocks.xz >> 8u) & 0xFFu) | ((blocks.xz & 0xFFu) << 16u);
end_8in16.yw = (blocks.xz >> 24u) | (blocks.xz & 0xFF0000u); end_8in16.yw = (blocks.xz >> 24u) | (blocks.xz & 0xFF0000u);
xesl_uint2 weights_high = XeDXTHighColorWeights(blocks.yw); xesl_uint2 weights_high = XeDXTHighColorWeights(blocks.yw);
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high)); XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high));
xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) { xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + elements_pitch_host, xe_texture_load_dest, block_offset_host + elements_pitch_host,
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 8u)); XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 8u));
xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) { xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host, xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host,
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 16u)); XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 16u));
xesl_dont_flatten xesl_dont_flatten
if (texel_index_host.y + 3u < load_info.height_texels) { if (texel_index_host.y + 3u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, xe_texture_load_dest,
block_offset_host + 3u * elements_pitch_host, block_offset_host + 3u * elements_pitch_host,
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 24u)); XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 24u));

View File

@ -57,7 +57,7 @@ xesl_entry_inputs_end_code_begin_compute
XeDXT5HighAlphaWeights(end_0.zw, weights.y), XeDXT5HighAlphaWeights(end_0.zw, weights.y),
XeDXT5HighAlphaWeights(end_1.xy, weights.z), XeDXT5HighAlphaWeights(end_1.xy, weights.z),
XeDXT5HighAlphaWeights(end_1.zw, weights.w)); XeDXT5HighAlphaWeights(end_1.zw, weights.w));
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) | xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u), (XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
@ -66,7 +66,7 @@ xesl_entry_inputs_end_code_begin_compute
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) { xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host; block_offset_host += elements_pitch_host;
weights >>= 12u; weights >>= 12u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) | xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u), (XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
@ -79,7 +79,7 @@ xesl_entry_inputs_end_code_begin_compute
XeDXT5HighAlphaWeights(end_0.zw, weights.y), XeDXT5HighAlphaWeights(end_0.zw, weights.y),
XeDXT5HighAlphaWeights(end_1.xy, weights.z), XeDXT5HighAlphaWeights(end_1.xy, weights.z),
XeDXT5HighAlphaWeights(end_1.zw, weights.w)); XeDXT5HighAlphaWeights(end_1.zw, weights.w));
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) | xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u), (XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
@ -88,7 +88,7 @@ xesl_entry_inputs_end_code_begin_compute
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) { xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host; block_offset_host += elements_pitch_host;
weights >>= 12u; weights >>= 12u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) | xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u), (XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),

View File

@ -70,22 +70,22 @@ xesl_entry_inputs_end_code_begin_compute
block_offset_host += elements_pitch_host; block_offset_host += elements_pitch_host;
weights >>= 8u; weights >>= 8u;
} }
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
is_trans.x ? XeDXT1TransRowToRGBA8(end_8in10_01.xy, weights.x) is_trans.x ? XeDXT1TransRowToRGBA8(end_8in10_01.xy, weights.x)
: (XeDXTOpaqueRowToRGB8(end_8in10_01.xy, weights.x) | : (XeDXTOpaqueRowToRGB8(end_8in10_01.xy, weights.x) |
0xFF000000u)); 0xFF000000u));
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 1u, xe_texture_load_dest, block_offset_host + 1u,
is_trans.y ? XeDXT1TransRowToRGBA8(end_8in10_01.zw, weights.y) is_trans.y ? XeDXT1TransRowToRGBA8(end_8in10_01.zw, weights.y)
: (XeDXTOpaqueRowToRGB8(end_8in10_01.zw, weights.y) | : (XeDXTOpaqueRowToRGB8(end_8in10_01.zw, weights.y) |
0xFF000000u)); 0xFF000000u));
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 2u, xe_texture_load_dest, block_offset_host + 2u,
is_trans.z ? XeDXT1TransRowToRGBA8(end_8in10_23.xy, weights.z) is_trans.z ? XeDXT1TransRowToRGBA8(end_8in10_23.xy, weights.z)
: (XeDXTOpaqueRowToRGB8(end_8in10_23.xy, weights.z) | : (XeDXTOpaqueRowToRGB8(end_8in10_23.xy, weights.z) |
0xFF000000u)); 0xFF000000u));
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 3u, xe_texture_load_dest, block_offset_host + 3u,
is_trans.w ? XeDXT1TransRowToRGBA8(end_8in10_23.zw, weights.w) is_trans.w ? XeDXT1TransRowToRGBA8(end_8in10_23.zw, weights.w)
: (XeDXTOpaqueRowToRGB8(end_8in10_23.zw, weights.w) | : (XeDXTOpaqueRowToRGB8(end_8in10_23.zw, weights.w) |

View File

@ -55,25 +55,25 @@ xesl_entry_inputs_end_code_begin_compute
// Sort the color indices so they can be used as weights for the second // Sort the color indices so they can be used as weights for the second
// endpoint. // endpoint.
uint bgr_weights = XeDXTHighColorWeights(block.w); uint bgr_weights = XeDXTHighColorWeights(block.w);
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights) + XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights) +
((block.xxxx >> xesl_uint4(0u, 4u, 8u, 12u)) & 0xFu) * 0x11000000u); ((block.xxxx >> xesl_uint4(0u, 4u, 8u, 12u)) & 0xFu) * 0x11000000u);
xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) { xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + elements_pitch_host, xe_texture_load_dest, block_offset_host + elements_pitch_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) + XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) +
((block.xxxx >> xesl_uint4(16u, 20u, 24u, 28u)) & 0xFu) * ((block.xxxx >> xesl_uint4(16u, 20u, 24u, 28u)) & 0xFu) *
0x11000000u); 0x11000000u);
xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) { xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host, xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 16u) + XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 16u) +
((block.yyyy >> xesl_uint4(0u, 4u, 8u, 12u)) & 0xFu) * ((block.yyyy >> xesl_uint4(0u, 4u, 8u, 12u)) & 0xFu) *
0x11000000u); 0x11000000u);
xesl_dont_flatten xesl_dont_flatten
if (texel_index_host.y + 3u < load_info.height_texels) { if (texel_index_host.y + 3u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, xe_texture_load_dest,
block_offset_host + 3u * elements_pitch_host, block_offset_host + 3u * elements_pitch_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 24u) + XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 24u) +

View File

@ -51,22 +51,22 @@ xesl_entry_inputs_end_code_begin_compute
xesl_uint4 blocks_23 = XeEndianSwap32( xesl_uint4 blocks_23 = XeEndianSwap32(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest), xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32); load_info.endian_32);
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.xz, blocks_23.xz))); XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.xz, blocks_23.xz)));
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) { xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host; block_offset_host += elements_pitch_host;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.xz, blocks_23.xz) >> 16u)); XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.xz, blocks_23.xz) >> 16u));
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) { xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host; block_offset_host += elements_pitch_host;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.yw, blocks_23.yw))); XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.yw, blocks_23.yw)));
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) { xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host; block_offset_host += elements_pitch_host;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
XeDXT3FourBlocksRowToA8( XeDXT3FourBlocksRowToA8(
xesl_uint4(blocks_01.yw, blocks_23.yw) >> 16u)); xesl_uint4(blocks_01.yw, blocks_23.yw) >> 16u));

View File

@ -51,36 +51,36 @@ xesl_entry_inputs_end_code_begin_compute
xesl_uint4 blocks_23 = XeEndianSwap32( xesl_uint4 blocks_23 = XeEndianSwap32(
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest), xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
load_info.endian_32); load_info.endian_32);
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_01.xz)); XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_01.xz));
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 1u, xe_texture_load_dest, block_offset_host + 1u,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_23.xz)); XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_23.xz));
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) { xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host; block_offset_host += elements_pitch_host;
xesl_uint4 high_halfblocks = xesl_uint4(blocks_01.xz, blocks_23.xz) >> 16u; xesl_uint4 high_halfblocks = xesl_uint4(blocks_01.xz, blocks_23.xz) >> 16u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.xy)); XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.xy));
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 1u, xe_texture_load_dest, block_offset_host + 1u,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.zw)); XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.zw));
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) { xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host; block_offset_host += elements_pitch_host;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_01.yw)); XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_01.yw));
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 1u, xe_texture_load_dest, block_offset_host + 1u,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_23.yw)); XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_23.yw));
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) { xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host; block_offset_host += elements_pitch_host;
high_halfblocks = xesl_uint4(blocks_01.yw, blocks_23.yw) >> 16u; high_halfblocks = xesl_uint4(blocks_01.yw, blocks_23.yw) >> 16u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.xy)); XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.xy));
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 1u, xe_texture_load_dest, block_offset_host + 1u,
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.zw)); XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.zw));
} }

View File

@ -58,14 +58,14 @@ xesl_entry_inputs_end_code_begin_compute
xesl_uint2 alpha_end = (block.xx >> xesl_uint2(0u, 8u)) & 0xFFu; xesl_uint2 alpha_end = (block.xx >> xesl_uint2(0u, 8u)) & 0xFFu;
uint alpha_weights = XeDXT5HighAlphaWeights( uint alpha_weights = XeDXT5HighAlphaWeights(
alpha_end, (block.x >> 16u) | ((block.y & 0xFFu) << 16u)); alpha_end, (block.x >> 16u) | ((block.y & 0xFFu) << 16u));
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights) | XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights) |
((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights)) << ((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights)) <<
xesl_uint4(24u, 16u, 8u, 0u)) xesl_uint4(24u, 16u, 8u, 0u))
& 0xFF000000u)); & 0xFF000000u));
xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) { xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + elements_pitch_host, xe_texture_load_dest, block_offset_host + elements_pitch_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) | XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) |
((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights >> 12u)) << ((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights >> 12u)) <<
@ -73,7 +73,7 @@ xesl_entry_inputs_end_code_begin_compute
& 0xFF000000u)); & 0xFF000000u));
xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) { xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) {
alpha_weights = XeDXT5HighAlphaWeights(alpha_end, block.y >> 8u); alpha_weights = XeDXT5HighAlphaWeights(alpha_end, block.y >> 8u);
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host, xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 16u) | XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 16u) |
((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights)) << ((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights)) <<
@ -81,7 +81,7 @@ xesl_entry_inputs_end_code_begin_compute
& 0xFF000000u)); & 0xFF000000u));
xesl_dont_flatten xesl_dont_flatten
if (texel_index_host.y + 3u < load_info.height_texels) { if (texel_index_host.y + 3u < load_info.height_texels) {
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, xe_texture_load_dest,
block_offset_host + 3u * elements_pitch_host, block_offset_host + 3u * elements_pitch_host,
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 24u) | XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 24u) |

View File

@ -58,7 +58,7 @@ xesl_entry_inputs_end_code_begin_compute
XeDXT5HighAlphaWeights(end_01.zw, weights.y), XeDXT5HighAlphaWeights(end_01.zw, weights.y),
XeDXT5HighAlphaWeights(end_23.xy, weights.z), XeDXT5HighAlphaWeights(end_23.xy, weights.z),
XeDXT5HighAlphaWeights(end_23.zw, weights.w)); XeDXT5HighAlphaWeights(end_23.zw, weights.w));
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x), xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
XeDXT5RowToA8(end_01.zw, weights.y), XeDXT5RowToA8(end_01.zw, weights.y),
@ -67,7 +67,7 @@ xesl_entry_inputs_end_code_begin_compute
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) { xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host; block_offset_host += elements_pitch_host;
weights >>= 12u; weights >>= 12u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x), xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
XeDXT5RowToA8(end_01.zw, weights.y), XeDXT5RowToA8(end_01.zw, weights.y),
@ -80,7 +80,7 @@ xesl_entry_inputs_end_code_begin_compute
XeDXT5HighAlphaWeights(end_01.zw, weights.y), XeDXT5HighAlphaWeights(end_01.zw, weights.y),
XeDXT5HighAlphaWeights(end_23.xy, weights.z), XeDXT5HighAlphaWeights(end_23.xy, weights.z),
XeDXT5HighAlphaWeights(end_23.zw, weights.w)); XeDXT5HighAlphaWeights(end_23.zw, weights.w));
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x), xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
XeDXT5RowToA8(end_01.zw, weights.y), XeDXT5RowToA8(end_01.zw, weights.y),
@ -89,7 +89,7 @@ xesl_entry_inputs_end_code_begin_compute
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) { xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
block_offset_host += elements_pitch_host; block_offset_host += elements_pitch_host;
weights >>= 12u; weights >>= 12u;
xesl_writeTypedStorageBufferStore( xesl_typedStorageBufferStore(
xe_texture_load_dest, block_offset_host, xe_texture_load_dest, block_offset_host,
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x), xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
XeDXT5RowToA8(end_01.zw, weights.y), XeDXT5RowToA8(end_01.zw, weights.y),

View File

@ -558,7 +558,7 @@ xesl_float4 xesl_float_x4(float xesl_var_value) {
// Loading and storing. // Loading and storing.
#define xesl_typedStorageBufferLoad(name, position) \ #define xesl_typedStorageBufferLoad(name, position) \
((name).xesl_id_data[uint(position)]) ((name).xesl_id_data[uint(position)])
#define xesl_writeTypedStorageBufferStore(name, position, value) \ #define xesl_typedStorageBufferStore(name, position, value) \
((name).xesl_id_data[uint(position)] = (value)) ((name).xesl_id_data[uint(position)] = (value))
#define xesl_uintVectorBufferLoad1(name, position) \ #define xesl_uintVectorBufferLoad1(name, position) \
((name).xesl_id_data[uint(position)]) ((name).xesl_id_data[uint(position)])
@ -582,7 +582,7 @@ xesl_float4 xesl_float_x4(float xesl_var_value) {
ByteAddressBuffer name : register(hlsl_t, hlsl_t_space); ByteAddressBuffer name : register(hlsl_t, hlsl_t_space);
// Loading and storing. // Loading and storing.
#define xesl_typedStorageBufferLoad(name, position) ((name)[uint(position)]) #define xesl_typedStorageBufferLoad(name, position) ((name)[uint(position)])
#define xesl_writeTypedStorageBufferStore(name, position, value) \ #define xesl_typedStorageBufferStore(name, position, value) \
((name)[uint(position)] = (value)) ((name)[uint(position)] = (value))
#define xesl_uintVectorBufferLoad1(name, position) \ #define xesl_uintVectorBufferLoad1(name, position) \
((name).Load(int(position) << 2)) ((name).Load(int(position) << 2))
@ -602,7 +602,7 @@ xesl_float4 xesl_float_x4(float xesl_var_value) {
const device uint* name [[msl_buffer]] const device uint* name [[msl_buffer]]
// Loading and storing. // Loading and storing.
#define xesl_typedStorageBufferLoad(name, position) ((name)[size_t(position)]) #define xesl_typedStorageBufferLoad(name, position) ((name)[size_t(position)])
#define xesl_writeTypedStorageBufferStore(name, position, value) \ #define xesl_typedStorageBufferStore(name, position, value) \
((name)[size_t(position)] = (value)) ((name)[size_t(position)] = (value))
#define xesl_uintVectorBufferLoad1(name, position) ((name)[size_t(position)]) #define xesl_uintVectorBufferLoad1(name, position) ((name)[size_t(position)])
#define xesl_uintVectorBufferLoad2(name, position) \ #define xesl_uintVectorBufferLoad2(name, position) \