[XeSL] xesl_write*Store > xesl_*Store
This commit is contained in:
parent
7a4732e14f
commit
f4a634c617
|
@ -46,7 +46,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
kXenosMsaaSamples_1X, false, 0u, 0u,
|
||||
resolution_scale)
|
||||
>> 2u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_host_depth_store_dest, edram_address_int4s,
|
||||
xesl_floatBitsToUint(xesl_float4(
|
||||
xesl_texelFetch2D(xe_host_depth_store_source, pixel_index, 0).r,
|
||||
|
@ -56,7 +56,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
pixel_index + xesl_int2(2, 0), 0).r,
|
||||
xesl_texelFetch2D(xe_host_depth_store_source,
|
||||
pixel_index + xesl_int2(3, 0), 0).r)));
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_host_depth_store_dest, edram_address_int4s + 1u,
|
||||
xesl_floatBitsToUint(xesl_float4(
|
||||
xesl_texelFetch2D(xe_host_depth_store_source,
|
||||
|
|
|
@ -54,7 +54,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
XeHostDepthStoreRTMsaa2xSupported(rt_constant)
|
||||
? (bool(dest_sample_index) ? 0 : 1)
|
||||
: (bool(dest_sample_index) ? 3 : 0);
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_host_depth_store_dest, edram_address_int4s,
|
||||
xesl_floatBitsToUint(xesl_float4(
|
||||
xesl_texelFetch2DMS(xe_host_depth_store_source, pixel_index,
|
||||
|
@ -68,7 +68,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
xesl_texelFetch2DMS(xe_host_depth_store_source,
|
||||
pixel_index + xesl_int2(3, 0),
|
||||
source_sample_index).r)));
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_host_depth_store_dest, edram_address_int4s + 1u,
|
||||
xesl_floatBitsToUint(xesl_float4(
|
||||
xesl_texelFetch2DMS(xe_host_depth_store_source,
|
||||
|
|
|
@ -51,7 +51,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
// Render target horizontal sample in bit 0, vertical sample in bit 1.
|
||||
int source_sample_left = int((xesl_GlobalInvocationID.y & 1u) << 1u);
|
||||
int source_sample_right = source_sample_left + 1;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_host_depth_store_dest, edram_address_int4s,
|
||||
xesl_floatBitsToUint(xesl_float4(
|
||||
xesl_texelFetch2DMS(xe_host_depth_store_source, pixel_index,
|
||||
|
@ -64,7 +64,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
xesl_texelFetch2DMS(xe_host_depth_store_source,
|
||||
pixel_index + xesl_int2(1, 0),
|
||||
source_sample_right).r)));
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_host_depth_store_dest, edram_address_int4s + 1u,
|
||||
xesl_floatBitsToUint(xesl_float4(
|
||||
xesl_texelFetch2DMS(xe_host_depth_store_source,
|
||||
|
|
|
@ -43,8 +43,8 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
kXenosMsaaSamples_1X, resolve_info.edram_is_depth, 0u, 0u,
|
||||
resolve_info.resolution_scale)
|
||||
>> 2u;
|
||||
xesl_writeTypedStorageBufferStore(xe_resolve_edram, address_int4s,
|
||||
resolve_info.clear_value.xxxx);
|
||||
xesl_writeTypedStorageBufferStore(xe_resolve_edram, address_int4s + 1u,
|
||||
resolve_info.clear_value.xxxx);
|
||||
xesl_typedStorageBufferStore(xe_resolve_edram, address_int4s,
|
||||
resolve_info.clear_value.xxxx);
|
||||
xesl_typedStorageBufferStore(xe_resolve_edram, address_int4s + 1u,
|
||||
resolve_info.clear_value.xxxx);
|
||||
xesl_entry_code_end_compute
|
||||
|
|
|
@ -44,7 +44,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
>> 2u;
|
||||
uint i;
|
||||
xesl_unroll for (i = 0u; i < 4u; ++i) {
|
||||
xesl_writeTypedStorageBufferStore(xe_resolve_edram, address_int4s + i,
|
||||
resolve_info.clear_value.xyxy);
|
||||
xesl_typedStorageBufferStore(xe_resolve_edram, address_int4s + i,
|
||||
resolve_info.clear_value.xyxy);
|
||||
}
|
||||
xesl_entry_code_end_compute
|
||||
|
|
|
@ -56,12 +56,12 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
XeResolveSwap8PixelsRedBlue32bpp(resolve_info, pixels_0123, pixels_4567);
|
||||
uint dest_address =
|
||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_resolve_dest, dest_address,
|
||||
XeEndianSwap32(pixels_0123, resolve_info.dest_endian_128));
|
||||
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
||||
pixel_index.x, 2u, resolve_info.resolution_scale) >> 4u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_resolve_dest, dest_address,
|
||||
XeEndianSwap32(pixels_4567, resolve_info.dest_endian_128));
|
||||
xesl_entry_code_end_compute
|
||||
|
|
|
@ -74,12 +74,12 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
XeResolveSwap8PixelsRedBlue32bpp(resolve_info, pixels_0123, pixels_4567);
|
||||
uint dest_address =
|
||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_resolve_dest, dest_address,
|
||||
XeEndianSwap32(pixels_0123, resolve_info.dest_endian_128));
|
||||
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
||||
pixel_index.x, 2u, resolve_info.resolution_scale) >> 4u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_resolve_dest, dest_address,
|
||||
XeEndianSwap32(pixels_4567, resolve_info.dest_endian_128));
|
||||
xesl_entry_code_end_compute
|
||||
|
|
|
@ -56,12 +56,12 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
XeResolveSwap4PixelsRedBlue64bpp(resolve_info, pixels_01, pixels_23);
|
||||
uint dest_address =
|
||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_resolve_dest, dest_address,
|
||||
XeEndianSwap64(pixels_01, resolve_info.dest_endian_128));
|
||||
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
||||
pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_resolve_dest, dest_address,
|
||||
XeEndianSwap64(pixels_23, resolve_info.dest_endian_128));
|
||||
xesl_entry_code_end_compute
|
||||
|
|
|
@ -61,12 +61,12 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
XeResolveSwap4PixelsRedBlue64bpp(resolve_info, pixels_01, pixels_23);
|
||||
uint dest_address =
|
||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_resolve_dest, dest_address,
|
||||
XeEndianSwap64(pixels_01, resolve_info.dest_endian_128));
|
||||
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
||||
pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_resolve_dest, dest_address,
|
||||
XeEndianSwap64(pixels_23, resolve_info.dest_endian_128));
|
||||
xesl_entry_code_end_compute
|
||||
|
|
|
@ -49,13 +49,13 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
// Only 32_32_32_32_FLOAT color format is 128bpp.
|
||||
uint dest_address =
|
||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 4u) >> 4u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_resolve_dest, dest_address,
|
||||
XeEndianSwap128(xesl_floatBitsToUint(pixel_0),
|
||||
resolve_info.dest_endian_128));
|
||||
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
||||
pixel_index.x, 4u, resolve_info.resolution_scale) >> 4u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_resolve_dest, dest_address,
|
||||
XeEndianSwap128(xesl_floatBitsToUint(pixel_1),
|
||||
resolve_info.dest_endian_128));
|
||||
|
|
|
@ -47,7 +47,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
if (resolve_info.duplicate_second_host_pixel.x && pixel_index.x == 0u) {
|
||||
pixel_0 = pixel_1;
|
||||
}
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_resolve_dest,
|
||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 1u) >> 3u,
|
||||
XeEndianSwap16(XePack16bpp4PixelsInUInt2(pixel_0, pixel_1, pixel_2,
|
||||
|
|
|
@ -49,7 +49,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
if (resolve_info.duplicate_second_host_pixel.x && pixel_index.x == 0u) {
|
||||
pixel_0 = pixel_1;
|
||||
}
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_resolve_dest,
|
||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u,
|
||||
XeEndianSwap32(XePack32bpp4Pixels(pixel_0, pixel_1, pixel_2, pixel_3,
|
||||
|
|
|
@ -52,12 +52,12 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
}
|
||||
uint dest_address =
|
||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 3u) >> 4u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_resolve_dest, dest_address,
|
||||
XeEndianSwap64(packed_01, resolve_info.dest_endian_128));
|
||||
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
|
||||
pixel_index.x, 3u, resolve_info.resolution_scale) >> 4u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_resolve_dest, dest_address,
|
||||
XeEndianSwap64(packed_23, resolve_info.dest_endian_128));
|
||||
xesl_entry_code_end_compute
|
||||
|
|
|
@ -49,7 +49,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
}
|
||||
// Convert to R8.
|
||||
// TODO(Triang3l): Investigate formats 8_A and 8_B.
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_resolve_dest,
|
||||
XeResolveDestPixelAddress(resolve_info, pixel_index, 0u) >> 3u,
|
||||
xesl_uint2(XePackR8G8B8A8UNorm(pixels_0123),
|
||||
|
|
|
@ -37,7 +37,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
load_info.host_offset) >> 4u);
|
||||
uint block_offset_guest =
|
||||
XeTextureLoadGuestBlockOffset(load_info, block_index, 16u, 4u) >> 4u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XeEndianSwap32(xesl_typedStorageBufferLoad(xe_texture_load_source,
|
||||
block_offset_guest),
|
||||
|
@ -46,7 +46,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
block_offset_guest +=
|
||||
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 4u) >>
|
||||
4u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XeEndianSwap32(xesl_typedStorageBufferLoad(xe_texture_load_source,
|
||||
block_offset_guest),
|
||||
|
|
|
@ -41,9 +41,8 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
xesl_uint4 guest_blocks = XeEndianSwap16(
|
||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||
load_info.endian_32);
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
|
||||
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
|
||||
++block_offset_host;
|
||||
block_offset_guest +=
|
||||
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 1u) >>
|
||||
|
@ -51,7 +50,6 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
guest_blocks = XeEndianSwap16(
|
||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||
load_info.endian_32);
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
|
||||
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_16BPB_TRANSFORM(guest_blocks));
|
||||
xesl_entry_code_end_compute
|
||||
|
|
|
@ -41,9 +41,8 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
xesl_uint4 guest_blocks = XeEndianSwap32(
|
||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||
load_info.endian_32);
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
|
||||
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
|
||||
++block_offset_host;
|
||||
block_offset_guest +=
|
||||
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 2u) >>
|
||||
|
@ -51,7 +50,6 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
guest_blocks = XeEndianSwap32(
|
||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||
load_info.endian_32);
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
|
||||
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_32BPB_TRANSFORM(guest_blocks));
|
||||
xesl_entry_code_end_compute
|
||||
|
|
|
@ -44,10 +44,10 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
load_info.endian_32);
|
||||
xesl_uint4 block_0, block_1;
|
||||
XE_TEXTURE_LOAD_32BPB_TO_64BPB(guest_blocks, block_0, block_1);
|
||||
xesl_writeTypedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
||||
block_0);
|
||||
xesl_writeTypedStorageBufferStore(xe_texture_load_dest,
|
||||
block_offset_host + 1u, block_1);
|
||||
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
||||
block_0);
|
||||
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host + 1u,
|
||||
block_1);
|
||||
block_offset_guest +=
|
||||
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 2u) >>
|
||||
4u;
|
||||
|
@ -55,8 +55,8 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||
load_info.endian_32);
|
||||
XE_TEXTURE_LOAD_32BPB_TO_64BPB(guest_blocks, block_0, block_1);
|
||||
xesl_writeTypedStorageBufferStore(xe_texture_load_dest,
|
||||
block_offset_host + 2u, block_0);
|
||||
xesl_writeTypedStorageBufferStore(xe_texture_load_dest,
|
||||
block_offset_host + 3u, block_1);
|
||||
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host + 2u,
|
||||
block_0);
|
||||
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host + 3u,
|
||||
block_1);
|
||||
xesl_entry_code_end_compute
|
||||
|
|
|
@ -41,9 +41,8 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
xesl_uint4 guest_blocks = XeEndianSwap32(
|
||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||
load_info.endian_32);
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
|
||||
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
|
||||
++block_offset_host;
|
||||
block_offset_guest +=
|
||||
XeTextureLoadRightConsecutiveBlocksOffset(load_info, block_index.x, 3u) >>
|
||||
|
@ -51,7 +50,6 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
guest_blocks = XeEndianSwap32(
|
||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||
load_info.endian_32);
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
|
||||
xesl_typedStorageBufferStore(xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_64BPB_TRANSFORM(guest_blocks));
|
||||
xesl_entry_code_end_compute
|
||||
|
|
|
@ -37,7 +37,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
load_info.host_offset) >> 4u);
|
||||
uint block_offset_guest =
|
||||
XeTextureLoadGuestBlockOffset(load_info, block_index, 1u, 0u) >> 3u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
xesl_uint4(
|
||||
xesl_typedStorageBufferLoad(xe_texture_load_source,
|
||||
|
|
|
@ -72,20 +72,20 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
end_8in16.xz = ((blocks.xz >> 8u) & 0xFFu) | ((blocks.xz & 0xFFu) << 16u);
|
||||
end_8in16.yw = (blocks.xz >> 24u) | (blocks.xz & 0xFF0000u);
|
||||
xesl_uint2 weights_high = XeDXTHighColorWeights(blocks.yw);
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high));
|
||||
xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) {
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host + elements_pitch_host,
|
||||
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 8u));
|
||||
xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) {
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host,
|
||||
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 16u));
|
||||
xesl_dont_flatten
|
||||
if (texel_index_host.y + 3u < load_info.height_texels) {
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest,
|
||||
block_offset_host + 3u * elements_pitch_host,
|
||||
XeCTX1TwoBlocksRowToR8G8(end_8in16, weights_high >> 24u));
|
||||
|
|
|
@ -57,7 +57,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
XeDXT5HighAlphaWeights(end_0.zw, weights.y),
|
||||
XeDXT5HighAlphaWeights(end_1.xy, weights.z),
|
||||
XeDXT5HighAlphaWeights(end_1.zw, weights.w));
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
|
||||
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
|
||||
|
@ -66,7 +66,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||
block_offset_host += elements_pitch_host;
|
||||
weights >>= 12u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
|
||||
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
|
||||
|
@ -79,7 +79,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
XeDXT5HighAlphaWeights(end_0.zw, weights.y),
|
||||
XeDXT5HighAlphaWeights(end_1.xy, weights.z),
|
||||
XeDXT5HighAlphaWeights(end_1.zw, weights.w));
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
|
||||
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
|
||||
|
@ -88,7 +88,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||
block_offset_host += elements_pitch_host;
|
||||
weights >>= 12u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
xesl_uint4(XeDXT5RowToA8In16(end_0.xy, weights.x) |
|
||||
(XeDXT5RowToA8In16(end_0.zw, weights.y) << 8u),
|
||||
|
|
|
@ -70,22 +70,22 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
block_offset_host += elements_pitch_host;
|
||||
weights >>= 8u;
|
||||
}
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
is_trans.x ? XeDXT1TransRowToRGBA8(end_8in10_01.xy, weights.x)
|
||||
: (XeDXTOpaqueRowToRGB8(end_8in10_01.xy, weights.x) |
|
||||
0xFF000000u));
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host + 1u,
|
||||
is_trans.y ? XeDXT1TransRowToRGBA8(end_8in10_01.zw, weights.y)
|
||||
: (XeDXTOpaqueRowToRGB8(end_8in10_01.zw, weights.y) |
|
||||
0xFF000000u));
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host + 2u,
|
||||
is_trans.z ? XeDXT1TransRowToRGBA8(end_8in10_23.xy, weights.z)
|
||||
: (XeDXTOpaqueRowToRGB8(end_8in10_23.xy, weights.z) |
|
||||
0xFF000000u));
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host + 3u,
|
||||
is_trans.w ? XeDXT1TransRowToRGBA8(end_8in10_23.zw, weights.w)
|
||||
: (XeDXTOpaqueRowToRGB8(end_8in10_23.zw, weights.w) |
|
||||
|
|
|
@ -55,25 +55,25 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
// Sort the color indices so they can be used as weights for the second
|
||||
// endpoint.
|
||||
uint bgr_weights = XeDXTHighColorWeights(block.w);
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights) +
|
||||
((block.xxxx >> xesl_uint4(0u, 4u, 8u, 12u)) & 0xFu) * 0x11000000u);
|
||||
xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) {
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host + elements_pitch_host,
|
||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) +
|
||||
((block.xxxx >> xesl_uint4(16u, 20u, 24u, 28u)) & 0xFu) *
|
||||
0x11000000u);
|
||||
xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) {
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host,
|
||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 16u) +
|
||||
((block.yyyy >> xesl_uint4(0u, 4u, 8u, 12u)) & 0xFu) *
|
||||
0x11000000u);
|
||||
xesl_dont_flatten
|
||||
if (texel_index_host.y + 3u < load_info.height_texels) {
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest,
|
||||
block_offset_host + 3u * elements_pitch_host,
|
||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 24u) +
|
||||
|
|
|
@ -51,22 +51,22 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
xesl_uint4 blocks_23 = XeEndianSwap32(
|
||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||
load_info.endian_32);
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.xz, blocks_23.xz)));
|
||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||
block_offset_host += elements_pitch_host;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.xz, blocks_23.xz) >> 16u));
|
||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||
block_offset_host += elements_pitch_host;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XeDXT3FourBlocksRowToA8(xesl_uint4(blocks_01.yw, blocks_23.yw)));
|
||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||
block_offset_host += elements_pitch_host;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XeDXT3FourBlocksRowToA8(
|
||||
xesl_uint4(blocks_01.yw, blocks_23.yw) >> 16u));
|
||||
|
|
|
@ -51,36 +51,36 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
xesl_uint4 blocks_23 = XeEndianSwap32(
|
||||
xesl_typedStorageBufferLoad(xe_texture_load_source, block_offset_guest),
|
||||
load_info.endian_32);
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_01.xz));
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host + 1u,
|
||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_23.xz));
|
||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||
block_offset_host += elements_pitch_host;
|
||||
xesl_uint4 high_halfblocks = xesl_uint4(blocks_01.xz, blocks_23.xz) >> 16u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.xy));
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host + 1u,
|
||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.zw));
|
||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||
block_offset_host += elements_pitch_host;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_01.yw));
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host + 1u,
|
||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(blocks_23.yw));
|
||||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||
block_offset_host += elements_pitch_host;
|
||||
high_halfblocks = xesl_uint4(blocks_01.yw, blocks_23.yw) >> 16u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.xy));
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host + 1u,
|
||||
XE_TEXTURE_LOAD_DXT3A_AS_1_1_1_1_TO_16BPP(high_halfblocks.zw));
|
||||
}
|
||||
|
|
|
@ -58,14 +58,14 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
xesl_uint2 alpha_end = (block.xx >> xesl_uint2(0u, 8u)) & 0xFFu;
|
||||
uint alpha_weights = XeDXT5HighAlphaWeights(
|
||||
alpha_end, (block.x >> 16u) | ((block.y & 0xFFu) << 16u));
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights) |
|
||||
((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights)) <<
|
||||
xesl_uint4(24u, 16u, 8u, 0u))
|
||||
& 0xFF000000u));
|
||||
xesl_dont_flatten if (texel_index_host.y + 1u < load_info.height_texels) {
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host + elements_pitch_host,
|
||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 8u) |
|
||||
((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights >> 12u)) <<
|
||||
|
@ -73,7 +73,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
& 0xFF000000u));
|
||||
xesl_dont_flatten if (texel_index_host.y + 2u < load_info.height_texels) {
|
||||
alpha_weights = XeDXT5HighAlphaWeights(alpha_end, block.y >> 8u);
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host + 2u * elements_pitch_host,
|
||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 16u) |
|
||||
((xesl_uint_x4(XeDXT5RowToA8(alpha_end, alpha_weights)) <<
|
||||
|
@ -81,7 +81,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
& 0xFF000000u));
|
||||
xesl_dont_flatten
|
||||
if (texel_index_host.y + 3u < load_info.height_texels) {
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest,
|
||||
block_offset_host + 3u * elements_pitch_host,
|
||||
XeDXTOpaqueRowToRGB8(bgr_end_8in10, bgr_weights >> 24u) |
|
||||
|
|
|
@ -58,7 +58,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
XeDXT5HighAlphaWeights(end_01.zw, weights.y),
|
||||
XeDXT5HighAlphaWeights(end_23.xy, weights.z),
|
||||
XeDXT5HighAlphaWeights(end_23.zw, weights.w));
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
|
||||
XeDXT5RowToA8(end_01.zw, weights.y),
|
||||
|
@ -67,7 +67,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||
block_offset_host += elements_pitch_host;
|
||||
weights >>= 12u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
|
||||
XeDXT5RowToA8(end_01.zw, weights.y),
|
||||
|
@ -80,7 +80,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
XeDXT5HighAlphaWeights(end_01.zw, weights.y),
|
||||
XeDXT5HighAlphaWeights(end_23.xy, weights.z),
|
||||
XeDXT5HighAlphaWeights(end_23.zw, weights.w));
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
|
||||
XeDXT5RowToA8(end_01.zw, weights.y),
|
||||
|
@ -89,7 +89,7 @@ xesl_entry_inputs_end_code_begin_compute
|
|||
xesl_dont_flatten if (++texel_index_host.y < load_info.height_texels) {
|
||||
block_offset_host += elements_pitch_host;
|
||||
weights >>= 12u;
|
||||
xesl_writeTypedStorageBufferStore(
|
||||
xesl_typedStorageBufferStore(
|
||||
xe_texture_load_dest, block_offset_host,
|
||||
xesl_uint4(XeDXT5RowToA8(end_01.xy, weights.x),
|
||||
XeDXT5RowToA8(end_01.zw, weights.y),
|
||||
|
|
|
@ -558,7 +558,7 @@ xesl_float4 xesl_float_x4(float xesl_var_value) {
|
|||
// Loading and storing.
|
||||
#define xesl_typedStorageBufferLoad(name, position) \
|
||||
((name).xesl_id_data[uint(position)])
|
||||
#define xesl_writeTypedStorageBufferStore(name, position, value) \
|
||||
#define xesl_typedStorageBufferStore(name, position, value) \
|
||||
((name).xesl_id_data[uint(position)] = (value))
|
||||
#define xesl_uintVectorBufferLoad1(name, position) \
|
||||
((name).xesl_id_data[uint(position)])
|
||||
|
@ -582,7 +582,7 @@ xesl_float4 xesl_float_x4(float xesl_var_value) {
|
|||
ByteAddressBuffer name : register(hlsl_t, hlsl_t_space);
|
||||
// Loading and storing.
|
||||
#define xesl_typedStorageBufferLoad(name, position) ((name)[uint(position)])
|
||||
#define xesl_writeTypedStorageBufferStore(name, position, value) \
|
||||
#define xesl_typedStorageBufferStore(name, position, value) \
|
||||
((name)[uint(position)] = (value))
|
||||
#define xesl_uintVectorBufferLoad1(name, position) \
|
||||
((name).Load(int(position) << 2))
|
||||
|
@ -602,7 +602,7 @@ xesl_float4 xesl_float_x4(float xesl_var_value) {
|
|||
const device uint* name [[msl_buffer]]
|
||||
// Loading and storing.
|
||||
#define xesl_typedStorageBufferLoad(name, position) ((name)[size_t(position)])
|
||||
#define xesl_writeTypedStorageBufferStore(name, position, value) \
|
||||
#define xesl_typedStorageBufferStore(name, position, value) \
|
||||
((name)[size_t(position)] = (value))
|
||||
#define xesl_uintVectorBufferLoad1(name, position) ((name)[size_t(position)])
|
||||
#define xesl_uintVectorBufferLoad2(name, position) \
|
||||
|
|
Loading…
Reference in New Issue