[XeSL] xesl_write*Store > xesl_*Store
This commit is contained in:
parent
7a4732e14f
commit
f4a634c617
|
@ -46,7 +46,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
kXenosMsaaSamples_1X, false, 0u, 0u,
|
kXenosMsaaSamples_1X, false, 0u, 0u,
|
||||||
resolution_scale)
|
resolution_scale)
|
||||||
>> 2u;
|
>> 2u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_host_depth_store_dest, edram_address_int4s,
|
xe_host_depth_store_dest, edram_address_int4s,
|
||||||
xesl_floatBitsToUint(xesl_float4(
|
xesl_floatBitsToUint(xesl_float4(
|
||||||
xesl_texelFetch2D(xe_host_depth_store_source, pixel_index, 0).r,
|
xesl_texelFetch2D(xe_host_depth_store_source, pixel_index, 0).r,
|
||||||
|
@ -56,7 +56,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
pixel_index + xesl_int2(2, 0), 0).r,
|
pixel_index + xesl_int2(2, 0), 0).r,
|
||||||
xesl_texelFetch2D(xe_host_depth_store_source,
|
xesl_texelFetch2D(xe_host_depth_store_source,
|
||||||
pixel_index + xesl_int2(3, 0), 0).r)));
|
pixel_index + xesl_int2(3, 0), 0).r)));
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_host_depth_store_dest, edram_address_int4s + 1u,
|
xe_host_depth_store_dest, edram_address_int4s + 1u,
|
||||||
xesl_floatBitsToUint(xesl_float4(
|
xesl_floatBitsToUint(xesl_float4(
|
||||||
xesl_texelFetch2D(xe_host_depth_store_source,
|
xesl_texelFetch2D(xe_host_depth_store_source,
|
||||||
|
|
|
@ -54,7 +54,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
XeHostDepthStoreRTMsaa2xSupported(rt_constant)
|
XeHostDepthStoreRTMsaa2xSupported(rt_constant)
|
||||||
? (bool(dest_sample_index) ? 0 : 1)
|
? (bool(dest_sample_index) ? 0 : 1)
|
||||||
: (bool(dest_sample_index) ? 3 : 0);
|
: (bool(dest_sample_index) ? 3 : 0);
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_host_depth_store_dest, edram_address_int4s,
|
xe_host_depth_store_dest, edram_address_int4s,
|
||||||
xesl_floatBitsToUint(xesl_float4(
|
xesl_floatBitsToUint(xesl_float4(
|
||||||
xesl_texelFetch2DMS(xe_host_depth_store_source, pixel_index,
|
xesl_texelFetch2DMS(xe_host_depth_store_source, pixel_index,
|
||||||
|
@ -68,7 +68,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
xesl_texelFetch2DMS(xe_host_depth_store_source,
|
xesl_texelFetch2DMS(xe_host_depth_store_source,
|
||||||
pixel_index + xesl_int2(3, 0),
|
pixel_index + xesl_int2(3, 0),
|
||||||
source_sample_index).r)));
|
source_sample_index).r)));
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_host_depth_store_dest, edram_address_int4s + 1u,
|
xe_host_depth_store_dest, edram_address_int4s + 1u,
|
||||||
xesl_floatBitsToUint(xesl_float4(
|
xesl_floatBitsToUint(xesl_float4(
|
||||||
xesl_texelFetch2DMS(xe_host_depth_store_source,
|
xesl_texelFetch2DMS(xe_host_depth_store_source,
|
||||||
|
|
|
@ -51,7 +51,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
// Render target horizontal sample in bit 0, vertical sample in bit 1.
|
// Render target horizontal sample in bit 0, vertical sample in bit 1.
|
||||||
int source_sample_left = int((xesl_GlobalInvocationID.y & 1u) << 1u);
|
int source_sample_left = int((xesl_GlobalInvocationID.y & 1u) << 1u);
|
||||||
int source_sample_right = source_sample_left + 1;
|
int source_sample_right = source_sample_left + 1;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_host_depth_store_dest, edram_address_int4s,
|
xe_host_depth_store_dest, edram_address_int4s,
|
||||||
xesl_floatBitsToUint(xesl_float4(
|
xesl_floatBitsToUint(xesl_float4(
|
||||||
xesl_texelFetch2DMS(xe_host_depth_store_source, pixel_index,
|
xesl_texelFetch2DMS(xe_host_depth_store_source, pixel_index,
|
||||||
|
@ -64,7 +64,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
xesl_texelFetch2DMS(xe_host_depth_store_source,
|
xesl_texelFetch2DMS(xe_host_depth_store_source,
|
||||||
pixel_index + xesl_int2(1, 0),
|
pixel_index + xesl_int2(1, 0),
|
||||||
source_sample_right).r)));
|
source_sample_right).r)));
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_host_depth_store_dest, edram_address_int4s + 1u,
|
xe_host_depth_store_dest, edram_address_int4s + 1u,
|
||||||
xesl_floatBitsToUint(xesl_float4(
|
xesl_floatBitsToUint(xesl_float4(
|
||||||
xesl_texelFetch2DMS(xe_host_depth_store_source,
|
xesl_texelFetch2DMS(xe_host_depth_store_source,
|
||||||
|
|
|
@ -43,8 +43,8 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
kXenosMsaaSamples_1X, resolve_info.edram_is_depth, 0u, 0u,
|
kXenosMsaaSamples_1X, resolve_info.edram_is_depth, 0u, 0u,
|
||||||
resolve_info.resolution_scale)
|
resolve_info.resolution_scale)
|
||||||
>> 2u;
|
>> 2u;
|
||||||
xesl_writeTypedStorageBufferStore(xe_resolve_edram, address_int4s,
|
xesl_typedStorageBufferStore(xe_resolve_edram, address_int4s,
|
||||||
resolve_info.clear_value.xxxx);
|
resolve_info.clear_value.xxxx);
|
||||||
xesl_writeTypedStorageBufferStore(xe_resolve_edram, address_int4s + 1u,
|
xesl_typedStorageBufferStore(xe_resolve_edram, address_int4s + 1u,
|
||||||
resolve_info.clear_value.xxxx);
|
resolve_info.clear_value.xxxx);
|
||||||
xesl_entry_code_end_compute
|
xesl_entry_code_end_compute
|
||||||
|
|
|
@ -44,7 +44,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
>> 2u;
|
>> 2u;
|
||||||
uint i;
|
uint i;
|
||||||
xesl_unroll for (i = 0u; i < 4u; ++i) {
|
xesl_unroll for (i = 0u; i < 4u; ++i) {
|
||||||
xesl_writeTypedStorageBufferStore(xe_resolve_edram, address_int4s + i,
|
xesl_typedStorageBufferStore(xe_resolve_edram, address_int4s + i,
|
||||||
resolve_info.clear_value.xyxy);
|
resolve_info.clear_value.xyxy);
|
||||||
}
|
}
|
||||||
xesl_entry_code_end_compute
|
xesl_entry_code_end_compute
|
||||||
|
|
|
@ -56,12 +56,12 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
XeResolveSwap8PixelsRedBlue32bpp(resolve_info, pixels_0123, pixels_4567);
|
XeResolveSwap8PixelsRedBlue32bpp(resolve_info, pixels_0123, pixels_4567);
|
||||||
uint dest_address =
|
uint dest_address =
|
||||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u;
|
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_resolve_dest, dest_address,
|
xe_resolve_dest, dest_address,
|
||||||
XeEndianSwap32(pixels_0123, resolve_info.dest_endian_128));
|
XeEndianSwap32(pixels_0123, resolve_info.dest_endian_128));
|
||||||
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
||||||
pixel_index.x, 2u, resolve_info.resolution_scale) >> 4u;
|
pixel_index.x, 2u, resolve_info.resolution_scale) >> 4u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_resolve_dest, dest_address,
|
xe_resolve_dest, dest_address,
|
||||||
XeEndianSwap32(pixels_4567, resolve_info.dest_endian_128));
|
XeEndianSwap32(pixels_4567, resolve_info.dest_endian_128));
|
||||||
xesl_entry_code_end_compute
|
xesl_entry_code_end_compute
|
||||||
|
|
|
@ -74,12 +74,12 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
XeResolveSwap8PixelsRedBlue32bpp(resolve_info, pixels_0123, pixels_4567);
|
XeResolveSwap8PixelsRedBlue32bpp(resolve_info, pixels_0123, pixels_4567);
|
||||||
uint dest_address =
|
uint dest_address =
|
||||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u;
|
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_resolve_dest, dest_address,
|
xe_resolve_dest, dest_address,
|
||||||
XeEndianSwap32(pixels_0123, resolve_info.dest_endian_128));
|
XeEndianSwap32(pixels_0123, resolve_info.dest_endian_128));
|
||||||
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
||||||
pixel_index.x, 2u, resolve_info.resolution_scale) >> 4u;
|
pixel_index.x, 2u, resolve_info.resolution_scale) >> 4u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_resolve_dest, dest_address,
|
xe_resolve_dest, dest_address,
|
||||||
XeEndianSwap32(pixels_4567, resolve_info.dest_endian_128));
|
XeEndianSwap32(pixels_4567, resolve_info.dest_endian_128));
|
||||||
xesl_entry_code_end_compute
|
xesl_entry_code_end_compute
|
||||||
|
|
|
@ -56,12 +56,12 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
XeResolveSwap4PixelsRedBlue64bpp(resolve_info, pixels_01, pixels_23);
|
XeResolveSwap4PixelsRedBlue64bpp(resolve_info, pixels_01, pixels_23);
|
||||||
uint dest_address =
|
uint dest_address =
|
||||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u;
|
XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_resolve_dest, dest_address,
|
xe_resolve_dest, dest_address,
|
||||||
XeEndianSwap64(pixels_01, resolve_info.dest_endian_128));
|
XeEndianSwap64(pixels_01, resolve_info.dest_endian_128));
|
||||||
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
||||||
pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u;
|
pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_resolve_dest, dest_address,
|
xe_resolve_dest, dest_address,
|
||||||
XeEndianSwap64(pixels_23, resolve_info.dest_endian_128));
|
XeEndianSwap64(pixels_23, resolve_info.dest_endian_128));
|
||||||
xesl_entry_code_end_compute
|
xesl_entry_code_end_compute
|
||||||
|
|
|
@ -61,12 +61,12 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
XeResolveSwap4PixelsRedBlue64bpp(resolve_info, pixels_01, pixels_23);
|
XeResolveSwap4PixelsRedBlue64bpp(resolve_info, pixels_01, pixels_23);
|
||||||
uint dest_address =
|
uint dest_address =
|
||||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u;
|
XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_resolve_dest, dest_address,
|
xe_resolve_dest, dest_address,
|
||||||
XeEndianSwap64(pixels_01, resolve_info.dest_endian_128));
|
XeEndianSwap64(pixels_01, resolve_info.dest_endian_128));
|
||||||
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
||||||
pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u;
|
pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_resolve_dest, dest_address,
|
xe_resolve_dest, dest_address,
|
||||||
XeEndianSwap64(pixels_23, resolve_info.dest_endian_128));
|
XeEndianSwap64(pixels_23, resolve_info.dest_endian_128));
|
||||||
xesl_entry_code_end_compute
|
xesl_entry_code_end_compute
|
||||||
|
|
|
@ -49,13 +49,13 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
// Only 32_32_32_32_FLOAT color format is 128bpp.
|
// Only 32_32_32_32_FLOAT color format is 128bpp.
|
||||||
uint dest_address =
|
uint dest_address =
|
||||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 4u) >> 4u;
|
XeResolveDestPixelAddress(resolve_info, pixel_index, 4u) >> 4u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_resolve_dest, dest_address,
|
xe_resolve_dest, dest_address,
|
||||||
XeEndianSwap128(xesl_floatBitsToUint(pixel_0),
|
XeEndianSwap128(xesl_floatBitsToUint(pixel_0),
|
||||||
resolve_info.dest_endian_128));
|
resolve_info.dest_endian_128));
|
||||||
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
||||||
pixel_index.x, 4u, resolve_info.resolution_scale) >> 4u;
|
pixel_index.x, 4u, resolve_info.resolution_scale) >> 4u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_resolve_dest, dest_address,
|
xe_resolve_dest, dest_address,
|
||||||
XeEndianSwap128(xesl_floatBitsToUint(pixel_1),
|
XeEndianSwap128(xesl_floatBitsToUint(pixel_1),
|
||||||
resolve_info.dest_endian_128));
|
resolve_info.dest_endian_128));
|
||||||
|
|
|
@ -47,7 +47,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
if (resolve_info.duplicate_second_host_pixel.x && pixel_index.x == 0u) {
|
if (resolve_info.duplicate_second_host_pixel.x && pixel_index.x == 0u) {
|
||||||
pixel_0 = pixel_1;
|
pixel_0 = pixel_1;
|
||||||
}
|
}
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_resolve_dest,
|
xe_resolve_dest,
|
||||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 1u) >> 3u,
|
XeResolveDestPixelAddress(resolve_info, pixel_index, 1u) >> 3u,
|
||||||
XeEndianSwap16(XePack16bpp4PixelsInUInt2(pixel_0, pixel_1, pixel_2,
|
XeEndianSwap16(XePack16bpp4PixelsInUInt2(pixel_0, pixel_1, pixel_2,
|
||||||
|
|
|
@ -49,7 +49,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
if (resolve_info.duplicate_second_host_pixel.x && pixel_index.x == 0u) {
|
if (resolve_info.duplicate_second_host_pixel.x && pixel_index.x == 0u) {
|
||||||
pixel_0 = pixel_1;
|
pixel_0 = pixel_1;
|
||||||
}
|
}
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_resolve_dest,
|
xe_resolve_dest,
|
||||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u,
|
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u,
|
||||||
XeEndianSwap32(XePack32bpp4Pixels(pixel_0, pixel_1, pixel_2, pixel_3,
|
XeEndianSwap32(XePack32bpp4Pixels(pixel_0, pixel_1, pixel_2, pixel_3,
|
||||||
|
|
|
@ -52,12 +52,12 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
}
|
}
|
||||||
uint dest_address =
|
uint dest_address =
|
||||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u;
|
XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_resolve_dest, dest_address,
|
xe_resolve_dest, dest_address,
|
||||||
XeEndianSwap64(packed_01, resolve_info.dest_endian_128));
|
XeEndianSwap64(packed_01, resolve_info.dest_endian_128));
|
||||||
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
||||||
pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u;
|
pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_resolve_dest, dest_address,
|
xe_resolve_dest, dest_address,
|
||||||
XeEndianSwap64(packed_23, resolve_info.dest_endian_128));
|
XeEndianSwap64(packed_23, resolve_info.dest_endian_128));
|
||||||
xesl_entry_code_end_compute
|
xesl_entry_code_end_compute
|
||||||
|
|
|
@ -49,7 +49,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
}
|
}
|
||||||
// Convert to R8.
|
// Convert to R8.
|
||||||
// TODO(Triang3l): Investigate formats 8_A and 8_B.
|
// TODO(Triang3l): Investigate formats 8_A and 8_B.
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_resolve_dest,
|
xe_resolve_dest,
|
||||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 0u) >> 3u,
|
XeResolveDestPixelAddress(resolve_info, pixel_index, 0u) >> 3u,
|
||||||
xesl_uint2(XePackR8G8B8A8UNorm(pixels_0123),
|
xesl_uint2(XePackR8G8B8A8UNorm(pixels_0123),
|
||||||
|
|
|
@ -37,7 +37,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
load_info.host_offset) >> 4u);
|
load_info.host_offset) >> 4u);
|
||||||
uint block_offset_guest =
|
uint block_offset_guest =
|
||||||
XeTextureLoadGuestBlockOffset(load_info, block_index, 16u, 4u) >> 4u;
|
XeTextureLoadGuestBlockOffset(load_info, block_index, 16u, 4u) >> 4u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
XeEndianSwap32(xesl_typedStorageBufferLoad(xe_texture_load_source,
|
XeEndianSwap32(xesl_typedStorageBufferLoad(xe_texture_load_source,
|
||||||
block_offset_guest),
|
block_offset_guest),
|
||||||
|
@ -46,7 +46,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
block_offset_guest +=
|
block_offset_guest +=
|
||||||
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 4u) >>
|
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 4u) >>
|
||||||
4u;
|
4u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
XeEndianSwap32(xesl_typedStorageBufferLoad(xe_texture_load_source,
|
XeEndianSwap32(xesl_typedStorageBufferLoad(xe_texture_load_source,
|
||||||
block_offset_guest),
|
block_offset_guest),
|
||||||
|
|
|
@ -41,9 +41,8 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
xesl_uint4 guest_blocks = XeEndianSwap16(
|
xesl_uint4 guest_blocks = XeEndianSwap16(
|
||||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||||
load_info.endian_32);
|
load_info.endian_32);
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
||||||
xe_texture_load_dest, block_offset_host,
|
XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
|
||||||
XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
|
|
||||||
++block_offset_host;
|
++block_offset_host;
|
||||||
block_offset_guest +=
|
block_offset_guest +=
|
||||||
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 1u) >>
|
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 1u) >>
|
||||||
|
@ -51,7 +50,6 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
guest_blocks = XeEndianSwap16(
|
guest_blocks = XeEndianSwap16(
|
||||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||||
load_info.endian_32);
|
load_info.endian_32);
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
||||||
xe_texture_load_dest, block_offset_host,
|
XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
|
||||||
XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
|
|
||||||
xesl_entry_code_end_compute
|
xesl_entry_code_end_compute
|
||||||
|
|
|
@ -41,9 +41,8 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
xesl_uint4 guest_blocks = XeEndianSwap32(
|
xesl_uint4 guest_blocks = XeEndianSwap32(
|
||||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||||
load_info.endian_32);
|
load_info.endian_32);
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
||||||
xe_texture_load_dest, block_offset_host,
|
XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
|
||||||
XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
|
|
||||||
++block_offset_host;
|
++block_offset_host;
|
||||||
block_offset_guest +=
|
block_offset_guest +=
|
||||||
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 2u) >>
|
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 2u) >>
|
||||||
|
@ -51,7 +50,6 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
guest_blocks = XeEndianSwap32(
|
guest_blocks = XeEndianSwap32(
|
||||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||||
load_info.endian_32);
|
load_info.endian_32);
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
||||||
xe_texture_load_dest, block_offset_host,
|
XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
|
||||||
XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
|
|
||||||
xesl_entry_code_end_compute
|
xesl_entry_code_end_compute
|
||||||
|
|
|
@ -44,10 +44,10 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
load_info.endian_32);
|
load_info.endian_32);
|
||||||
xesl_uint4 block_0, block_1;
|
xesl_uint4 block_0, block_1;
|
||||||
XE_TEXTURE_LOAD_32BPB_TO_64BPB(guest_blocks, block_0, block_1);
|
XE_TEXTURE_LOAD_32BPB_TO_64BPB(guest_blocks, block_0, block_1);
|
||||||
xesl_writeTypedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
||||||
block_0);
|
block_0);
|
||||||
xesl_writeTypedStorageBufferStore(xe_texture_load_dest,
|
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host + 1u,
|
||||||
block_offset_host + 1u, block_1);
|
block_1);
|
||||||
block_offset_guest +=
|
block_offset_guest +=
|
||||||
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 2u) >>
|
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 2u) >>
|
||||||
4u;
|
4u;
|
||||||
|
@ -55,8 +55,8 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||||
load_info.endian_32);
|
load_info.endian_32);
|
||||||
XE_TEXTURE_LOAD_32BPB_TO_64BPB(guest_blocks, block_0, block_1);
|
XE_TEXTURE_LOAD_32BPB_TO_64BPB(guest_blocks, block_0, block_1);
|
||||||
xesl_writeTypedStorageBufferStore(xe_texture_load_dest,
|
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host + 2u,
|
||||||
block_offset_host + 2u, block_0);
|
block_0);
|
||||||
xesl_writeTypedStorageBufferStore(xe_texture_load_dest,
|
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host + 3u,
|
||||||
block_offset_host + 3u, block_1);
|
block_1);
|
||||||
xesl_entry_code_end_compute
|
xesl_entry_code_end_compute
|
||||||
|
|
|
@ -41,9 +41,8 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
xesl_uint4 guest_blocks = XeEndianSwap32(
|
xesl_uint4 guest_blocks = XeEndianSwap32(
|
||||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||||
load_info.endian_32);
|
load_info.endian_32);
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
||||||
xe_texture_load_dest, block_offset_host,
|
XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
|
||||||
XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
|
|
||||||
++block_offset_host;
|
++block_offset_host;
|
||||||
block_offset_guest +=
|
block_offset_guest +=
|
||||||
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 3u) >>
|
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 3u) >>
|
||||||
|
@ -51,7 +50,6 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
guest_blocks = XeEndianSwap32(
|
guest_blocks = XeEndianSwap32(
|
||||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||||
load_info.endian_32);
|
load_info.endian_32);
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
||||||
xe_texture_load_dest, block_offset_host,
|
XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
|
||||||
XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
|
|
||||||
xesl_entry_code_end_compute
|
xesl_entry_code_end_compute
|
||||||
|
|
|
@ -37,7 +37,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
load_info.host_offset) >> 4u);
|
load_info.host_offset) >> 4u);
|
||||||
uint block_offset_guest =
|
uint block_offset_guest =
|
||||||
XeTextureLoadGuestBlockOffset(load_info, block_index, 1u, 0u) >> 3u;
|
XeTextureLoadGuestBlockOffset(load_info, block_index, 1u, 0u) >> 3u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
xesl_uint4(
|
xesl_uint4(
|
||||||
xesl_typedStorageBufferLoad(xe_texture_load_source,
|
xesl_typedStorageBufferLoad(xe_texture_load_source,
|
||||||
|
|
|
@ -72,20 +72,20 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
end_8in16.xz = ((blocks.xz >> 8u) & 0xFFu) | ((blocks.xz & 0xFFu) << 16u);
|
end_8in16.xz = ((blocks.xz >> 8u) & 0xFFu) | ((blocks.xz & 0xFFu) << 16u);
|
||||||
end_8in16.yw = (blocks.xz >> 24u) | (blocks.xz & 0xFF0000u);
|
end_8in16.yw = (blocks.xz >> 24u) | (blocks.xz & 0xFF0000u);
|
||||||
xesl_uint2 weights_high = XeDXTHighColorWeights(blocks.yw);
|
xesl_uint2 weights_high = XeDXTHighColorWeights(blocks.yw);
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high));
|
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high));
|
||||||
xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) {
|
xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) {
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host + elements_pitch_host,
|
xe_texture_load_dest, block_offset_host + elements_pitch_host,
|
||||||
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 8u));
|
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 8u));
|
||||||
xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) {
|
xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) {
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host,
|
xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host,
|
||||||
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 16u));
|
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 16u));
|
||||||
xesl_dont_flatten
|
xesl_dont_flatten
|
||||||
if (texel_index_host.y + 3u < load_info.height_texels) {
|
if (texel_index_host.y + 3u < load_info.height_texels) {
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest,
|
xe_texture_load_dest,
|
||||||
block_offset_host + 3u * elements_pitch_host,
|
block_offset_host + 3u * elements_pitch_host,
|
||||||
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 24u));
|
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 24u));
|
||||||
|
|
|
@ -57,7 +57,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
XeDXT5HighAlphaWeights(end_0.zw, weights.y),
|
XeDXT5HighAlphaWeights(end_0.zw, weights.y),
|
||||||
XeDXT5HighAlphaWeights(end_1.xy, weights.z),
|
XeDXT5HighAlphaWeights(end_1.xy, weights.z),
|
||||||
XeDXT5HighAlphaWeights(end_1.zw, weights.w));
|
XeDXT5HighAlphaWeights(end_1.zw, weights.w));
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
|
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
|
||||||
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
|
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
|
||||||
|
@ -66,7 +66,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||||
block_offset_host += elements_pitch_host;
|
block_offset_host += elements_pitch_host;
|
||||||
weights >>= 12u;
|
weights >>= 12u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
|
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
|
||||||
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
|
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
|
||||||
|
@ -79,7 +79,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
XeDXT5HighAlphaWeights(end_0.zw, weights.y),
|
XeDXT5HighAlphaWeights(end_0.zw, weights.y),
|
||||||
XeDXT5HighAlphaWeights(end_1.xy, weights.z),
|
XeDXT5HighAlphaWeights(end_1.xy, weights.z),
|
||||||
XeDXT5HighAlphaWeights(end_1.zw, weights.w));
|
XeDXT5HighAlphaWeights(end_1.zw, weights.w));
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
|
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
|
||||||
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
|
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
|
||||||
|
@ -88,7 +88,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||||
block_offset_host += elements_pitch_host;
|
block_offset_host += elements_pitch_host;
|
||||||
weights >>= 12u;
|
weights >>= 12u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
|
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
|
||||||
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
|
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
|
||||||
|
|
|
@ -70,22 +70,22 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
block_offset_host += elements_pitch_host;
|
block_offset_host += elements_pitch_host;
|
||||||
weights >>= 8u;
|
weights >>= 8u;
|
||||||
}
|
}
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
is_trans.x ? XeDXT1TransRowToRGBA8(end_8in10_01.xy, weights.x)
|
is_trans.x ? XeDXT1TransRowToRGBA8(end_8in10_01.xy, weights.x)
|
||||||
: (XeDXTOpaqueRowToRGB8(end_8in10_01.xy, weights.x) |
|
: (XeDXTOpaqueRowToRGB8(end_8in10_01.xy, weights.x) |
|
||||||
0xFF000000u));
|
0xFF000000u));
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host + 1u,
|
xe_texture_load_dest, block_offset_host + 1u,
|
||||||
is_trans.y ? XeDXT1TransRowToRGBA8(end_8in10_01.zw, weights.y)
|
is_trans.y ? XeDXT1TransRowToRGBA8(end_8in10_01.zw, weights.y)
|
||||||
: (XeDXTOpaqueRowToRGB8(end_8in10_01.zw, weights.y) |
|
: (XeDXTOpaqueRowToRGB8(end_8in10_01.zw, weights.y) |
|
||||||
0xFF000000u));
|
0xFF000000u));
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host + 2u,
|
xe_texture_load_dest, block_offset_host + 2u,
|
||||||
is_trans.z ? XeDXT1TransRowToRGBA8(end_8in10_23.xy, weights.z)
|
is_trans.z ? XeDXT1TransRowToRGBA8(end_8in10_23.xy, weights.z)
|
||||||
: (XeDXTOpaqueRowToRGB8(end_8in10_23.xy, weights.z) |
|
: (XeDXTOpaqueRowToRGB8(end_8in10_23.xy, weights.z) |
|
||||||
0xFF000000u));
|
0xFF000000u));
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host + 3u,
|
xe_texture_load_dest, block_offset_host + 3u,
|
||||||
is_trans.w ? XeDXT1TransRowToRGBA8(end_8in10_23.zw, weights.w)
|
is_trans.w ? XeDXT1TransRowToRGBA8(end_8in10_23.zw, weights.w)
|
||||||
: (XeDXTOpaqueRowToRGB8(end_8in10_23.zw, weights.w) |
|
: (XeDXTOpaqueRowToRGB8(end_8in10_23.zw, weights.w) |
|
||||||
|
|
|
@ -55,25 +55,25 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
// Sort the color indices so they can be used as weights for the second
|
// Sort the color indices so they can be used as weights for the second
|
||||||
// endpoint.
|
// endpoint.
|
||||||
uint bgr_weights = XeDXTHighColorWeights(block.w);
|
uint bgr_weights = XeDXTHighColorWeights(block.w);
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights) +
|
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights) +
|
||||||
((block.xxxx >> xesl_uint4(0u, 4u, 8u, 12u)) & 0xFu) * 0x11000000u);
|
((block.xxxx >> xesl_uint4(0u, 4u, 8u, 12u)) & 0xFu) * 0x11000000u);
|
||||||
xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) {
|
xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) {
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host + elements_pitch_host,
|
xe_texture_load_dest, block_offset_host + elements_pitch_host,
|
||||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) +
|
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) +
|
||||||
((block.xxxx >> xesl_uint4(16u, 20u, 24u, 28u)) & 0xFu) *
|
((block.xxxx >> xesl_uint4(16u, 20u, 24u, 28u)) & 0xFu) *
|
||||||
0x11000000u);
|
0x11000000u);
|
||||||
xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) {
|
xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) {
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host,
|
xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host,
|
||||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 16u) +
|
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 16u) +
|
||||||
((block.yyyy >> xesl_uint4(0u, 4u, 8u, 12u)) & 0xFu) *
|
((block.yyyy >> xesl_uint4(0u, 4u, 8u, 12u)) & 0xFu) *
|
||||||
0x11000000u);
|
0x11000000u);
|
||||||
xesl_dont_flatten
|
xesl_dont_flatten
|
||||||
if (texel_index_host.y + 3u < load_info.height_texels) {
|
if (texel_index_host.y + 3u < load_info.height_texels) {
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest,
|
xe_texture_load_dest,
|
||||||
block_offset_host + 3u * elements_pitch_host,
|
block_offset_host + 3u * elements_pitch_host,
|
||||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 24u) +
|
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 24u) +
|
||||||
|
|
|
@ -51,22 +51,22 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
xesl_uint4 blocks_23 = XeEndianSwap32(
|
xesl_uint4 blocks_23 = XeEndianSwap32(
|
||||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||||
load_info.endian_32);
|
load_info.endian_32);
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.xz, blocks_23.xz)));
|
XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.xz, blocks_23.xz)));
|
||||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||||
block_offset_host += elements_pitch_host;
|
block_offset_host += elements_pitch_host;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.xz, blocks_23.xz) >> 16u));
|
XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.xz, blocks_23.xz) >> 16u));
|
||||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||||
block_offset_host += elements_pitch_host;
|
block_offset_host += elements_pitch_host;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.yw, blocks_23.yw)));
|
XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.yw, blocks_23.yw)));
|
||||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||||
block_offset_host += elements_pitch_host;
|
block_offset_host += elements_pitch_host;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
XeDXT3FourBlocksRowToA8(
|
XeDXT3FourBlocksRowToA8(
|
||||||
xesl_uint4(blocks_01.yw, blocks_23.yw) >> 16u));
|
xesl_uint4(blocks_01.yw, blocks_23.yw) >> 16u));
|
||||||
|
|
|
@ -51,36 +51,36 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
xesl_uint4 blocks_23 = XeEndianSwap32(
|
xesl_uint4 blocks_23 = XeEndianSwap32(
|
||||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||||
load_info.endian_32);
|
load_info.endian_32);
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_01.xz));
|
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_01.xz));
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host + 1u,
|
xe_texture_load_dest, block_offset_host + 1u,
|
||||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_23.xz));
|
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_23.xz));
|
||||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||||
block_offset_host += elements_pitch_host;
|
block_offset_host += elements_pitch_host;
|
||||||
xesl_uint4 high_halfblocks = xesl_uint4(blocks_01.xz, blocks_23.xz) >> 16u;
|
xesl_uint4 high_halfblocks = xesl_uint4(blocks_01.xz, blocks_23.xz) >> 16u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.xy));
|
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.xy));
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host + 1u,
|
xe_texture_load_dest, block_offset_host + 1u,
|
||||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.zw));
|
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.zw));
|
||||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||||
block_offset_host += elements_pitch_host;
|
block_offset_host += elements_pitch_host;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_01.yw));
|
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_01.yw));
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host + 1u,
|
xe_texture_load_dest, block_offset_host + 1u,
|
||||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_23.yw));
|
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_23.yw));
|
||||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||||
block_offset_host += elements_pitch_host;
|
block_offset_host += elements_pitch_host;
|
||||||
high_halfblocks = xesl_uint4(blocks_01.yw, blocks_23.yw) >> 16u;
|
high_halfblocks = xesl_uint4(blocks_01.yw, blocks_23.yw) >> 16u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.xy));
|
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.xy));
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host + 1u,
|
xe_texture_load_dest, block_offset_host + 1u,
|
||||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.zw));
|
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.zw));
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,14 +58,14 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
xesl_uint2 alpha_end = (block.xx >> xesl_uint2(0u, 8u)) & 0xFFu;
|
xesl_uint2 alpha_end = (block.xx >> xesl_uint2(0u, 8u)) & 0xFFu;
|
||||||
uint alpha_weights = XeDXT5HighAlphaWeights(
|
uint alpha_weights = XeDXT5HighAlphaWeights(
|
||||||
alpha_end, (block.x >> 16u) | ((block.y & 0xFFu) << 16u));
|
alpha_end, (block.x >> 16u) | ((block.y & 0xFFu) << 16u));
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights) |
|
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights) |
|
||||||
((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights)) <<
|
((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights)) <<
|
||||||
xesl_uint4(24u, 16u, 8u, 0u))
|
xesl_uint4(24u, 16u, 8u, 0u))
|
||||||
& 0xFF000000u));
|
& 0xFF000000u));
|
||||||
xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) {
|
xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) {
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host + elements_pitch_host,
|
xe_texture_load_dest, block_offset_host + elements_pitch_host,
|
||||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) |
|
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) |
|
||||||
((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights >> 12u)) <<
|
((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights >> 12u)) <<
|
||||||
|
@ -73,7 +73,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
& 0xFF000000u));
|
& 0xFF000000u));
|
||||||
xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) {
|
xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) {
|
||||||
alpha_weights = XeDXT5HighAlphaWeights(alpha_end, block.y >> 8u);
|
alpha_weights = XeDXT5HighAlphaWeights(alpha_end, block.y >> 8u);
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host,
|
xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host,
|
||||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 16u) |
|
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 16u) |
|
||||||
((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights)) <<
|
((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights)) <<
|
||||||
|
@ -81,7 +81,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
& 0xFF000000u));
|
& 0xFF000000u));
|
||||||
xesl_dont_flatten
|
xesl_dont_flatten
|
||||||
if (texel_index_host.y + 3u < load_info.height_texels) {
|
if (texel_index_host.y + 3u < load_info.height_texels) {
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest,
|
xe_texture_load_dest,
|
||||||
block_offset_host + 3u * elements_pitch_host,
|
block_offset_host + 3u * elements_pitch_host,
|
||||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 24u) |
|
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 24u) |
|
||||||
|
|
|
@ -58,7 +58,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
XeDXT5HighAlphaWeights(end_01.zw, weights.y),
|
XeDXT5HighAlphaWeights(end_01.zw, weights.y),
|
||||||
XeDXT5HighAlphaWeights(end_23.xy, weights.z),
|
XeDXT5HighAlphaWeights(end_23.xy, weights.z),
|
||||||
XeDXT5HighAlphaWeights(end_23.zw, weights.w));
|
XeDXT5HighAlphaWeights(end_23.zw, weights.w));
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
|
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
|
||||||
XeDXT5RowToA8(end_01.zw, weights.y),
|
XeDXT5RowToA8(end_01.zw, weights.y),
|
||||||
|
@ -67,7 +67,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||||
block_offset_host += elements_pitch_host;
|
block_offset_host += elements_pitch_host;
|
||||||
weights >>= 12u;
|
weights >>= 12u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
|
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
|
||||||
XeDXT5RowToA8(end_01.zw, weights.y),
|
XeDXT5RowToA8(end_01.zw, weights.y),
|
||||||
|
@ -80,7 +80,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
XeDXT5HighAlphaWeights(end_01.zw, weights.y),
|
XeDXT5HighAlphaWeights(end_01.zw, weights.y),
|
||||||
XeDXT5HighAlphaWeights(end_23.xy, weights.z),
|
XeDXT5HighAlphaWeights(end_23.xy, weights.z),
|
||||||
XeDXT5HighAlphaWeights(end_23.zw, weights.w));
|
XeDXT5HighAlphaWeights(end_23.zw, weights.w));
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
|
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
|
||||||
XeDXT5RowToA8(end_01.zw, weights.y),
|
XeDXT5RowToA8(end_01.zw, weights.y),
|
||||||
|
@ -89,7 +89,7 @@ xesl_entry_inputs_end_code_begin_compute
|
||||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||||
block_offset_host += elements_pitch_host;
|
block_offset_host += elements_pitch_host;
|
||||||
weights >>= 12u;
|
weights >>= 12u;
|
||||||
xesl_writeTypedStorageBufferStore(
|
xesl_typedStorageBufferStore(
|
||||||
xe_texture_load_dest, block_offset_host,
|
xe_texture_load_dest, block_offset_host,
|
||||||
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
|
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
|
||||||
XeDXT5RowToA8(end_01.zw, weights.y),
|
XeDXT5RowToA8(end_01.zw, weights.y),
|
||||||
|
|
|
@ -558,7 +558,7 @@ xesl_float4 xesl_float_x4(float xesl_var_value) {
|
||||||
// Loading and storing.
|
// Loading and storing.
|
||||||
#define xesl_typedStorageBufferLoad(name, position) \
|
#define xesl_typedStorageBufferLoad(name, position) \
|
||||||
((name).xesl_id_data[uint(position)])
|
((name).xesl_id_data[uint(position)])
|
||||||
#define xesl_writeTypedStorageBufferStore(name, position, value) \
|
#define xesl_typedStorageBufferStore(name, position, value) \
|
||||||
((name).xesl_id_data[uint(position)] = (value))
|
((name).xesl_id_data[uint(position)] = (value))
|
||||||
#define xesl_uintVectorBufferLoad1(name, position) \
|
#define xesl_uintVectorBufferLoad1(name, position) \
|
||||||
((name).xesl_id_data[uint(position)])
|
((name).xesl_id_data[uint(position)])
|
||||||
|
@ -582,7 +582,7 @@ xesl_float4 xesl_float_x4(float xesl_var_value) {
|
||||||
ByteAddressBuffer name : register(hlsl_t, hlsl_t_space);
|
ByteAddressBuffer name : register(hlsl_t, hlsl_t_space);
|
||||||
// Loading and storing.
|
// Loading and storing.
|
||||||
#define xesl_typedStorageBufferLoad(name, position) ((name)[uint(position)])
|
#define xesl_typedStorageBufferLoad(name, position) ((name)[uint(position)])
|
||||||
#define xesl_writeTypedStorageBufferStore(name, position, value) \
|
#define xesl_typedStorageBufferStore(name, position, value) \
|
||||||
((name)[uint(position)] = (value))
|
((name)[uint(position)] = (value))
|
||||||
#define xesl_uintVectorBufferLoad1(name, position) \
|
#define xesl_uintVectorBufferLoad1(name, position) \
|
||||||
((name).Load(int(position) << 2))
|
((name).Load(int(position) << 2))
|
||||||
|
@ -602,7 +602,7 @@ xesl_float4 xesl_float_x4(float xesl_var_value) {
|
||||||
const device uint* name [[msl_buffer]]
|
const device uint* name [[msl_buffer]]
|
||||||
// Loading and storing.
|
// Loading and storing.
|
||||||
#define xesl_typedStorageBufferLoad(name, position) ((name)[size_t(position)])
|
#define xesl_typedStorageBufferLoad(name, position) ((name)[size_t(position)])
|
||||||
#define xesl_writeTypedStorageBufferStore(name, position, value) \
|
#define xesl_typedStorageBufferStore(name, position, value) \
|
||||||
((name)[size_t(position)] = (value))
|
((name)[size_t(position)] = (value))
|
||||||
#define xesl_uintVectorBufferLoad1(name, position) ((name)[size_t(position)])
|
#define xesl_uintVectorBufferLoad1(name, position) ((name)[size_t(position)])
|
||||||
#define xesl_uintVectorBufferLoad2(name, position) \
|
#define xesl_uintVectorBufferLoad2(name, position) \
|
||||||
|
|
Loading…
Reference in New Issue