733 lines
28 KiB
HLSL
733 lines
28 KiB
HLSL
#ifndef XENIA_GPU_D3D12_SHADERS_RESOLVE_HLSLI_
|
|
#define XENIA_GPU_D3D12_SHADERS_RESOLVE_HLSLI_
|
|
|
|
#include "edram.xesli"
|
|
#include "pixel_formats.xesli"
|
|
#include "texture_address.xesli"
|
|
|
|
cbuffer XeResolveConstants : register(b0) {
|
|
#ifdef XE_RESOLVE_CLEAR
|
|
uint2 xe_resolve_clear_value;
|
|
#endif
|
|
// xe::gpu::draw_util::ResolveSourcePackedInfo.
|
|
uint xe_resolve_edram_info;
|
|
// xe::gpu::draw_util::ResolveAddressPackedInfo.
|
|
uint xe_resolve_address_info;
|
|
#ifndef XE_RESOLVE_CLEAR
|
|
// Sanitized RB_COPY_DEST_INFO.
|
|
uint xe_resolve_dest_info;
|
|
// xe::gpu::draw_util::ResolveCopyDestPitchPackedInfo.
|
|
uint xe_resolve_dest_pitch_aligned;
|
|
#ifndef XE_RESOLVE_RESOLUTION_SCALED
|
|
uint xe_resolve_dest_base;
|
|
#endif
|
|
#endif
|
|
};
|
|
|
|
#ifdef XE_RESOLVE_RESOLUTION_SCALED
|
|
cbuffer XeResolveResolutionScaleConstant : register(b1) {
|
|
// xe::gpu::draw_util::ResolveResolutionScaleConstant.
|
|
uint xe_resolve_resolution_scale;
|
|
}
|
|
#endif
|
|
|
|
uint XeResolveEdramPitchTiles() {
|
|
return xe_resolve_edram_info & ((1u << 10u) - 1u);
|
|
}
|
|
|
|
uint XeResolveEdramMsaaSamples() {
|
|
return (xe_resolve_edram_info >> 10u) & ((1u << 2u) - 1u);
|
|
}
|
|
|
|
// Always false for non-one-to-one resolve.
|
|
bool XeResolveEdramIsDepth() {
|
|
return (xe_resolve_edram_info & (1u << 12u)) != 0u;
|
|
}
|
|
|
|
uint XeResolveEdramBaseTiles() {
|
|
return (xe_resolve_edram_info >> 13u) & ((1u << 12u) - 1u);
|
|
}
|
|
|
|
uint XeResolveEdramFormat() {
|
|
return (xe_resolve_edram_info >> 25u) & ((1u << 4u) - 1u);
|
|
}
|
|
|
|
uint XeResolveEdramFormatIntsLog2() {
|
|
return (xe_resolve_edram_info >> 29u) & 1u;
|
|
}
|
|
|
|
bool XeResolveEdramFormatIs64bpp() {
|
|
return XeResolveEdramFormatIntsLog2() != 0u;
|
|
}
|
|
|
|
uint XeResolveEdramPixelStrideInts() {
|
|
return 1u << (XeResolveEdramFormatIntsLog2() +
|
|
uint(XeResolveEdramMsaaSamples() >= kXenosMsaaSamples_4X));
|
|
}
|
|
|
|
uint2 XeResolveResolutionScale() {
|
|
#ifdef XE_RESOLVE_RESOLUTION_SCALED
|
|
return (xe_resolve_resolution_scale.xx >> uint2(0u, 2u)) & 3u;
|
|
#else
|
|
return uint2(1u, 1u);
|
|
#endif
|
|
}
|
|
|
|
uint XeResolveResolutionScaleArea() {
|
|
uint2 scale = XeResolveResolutionScale();
|
|
return scale.x * scale.y;
|
|
}
|
|
|
|
bool2 XeResolveDuplicateSecondHostPixel() {
|
|
#ifdef XE_RESOLVE_RESOLUTION_SCALED
|
|
bool2 duplicate;
|
|
if ((xe_resolve_edram_info & (1u << 30u)) != 0u) {
|
|
duplicate = XeResolveResolutionScale() > uint2(1u, 1u);
|
|
} else {
|
|
duplicate = bool2(false, false);
|
|
}
|
|
return duplicate;
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
// Within 160x32 guest pixels, total value relative to the source EDRAM base,
|
|
// & 31 of * 8 relative to the destination texture base.
|
|
uint2 XeResolveUnscaledOffsetDiv8() {
|
|
return
|
|
(xe_resolve_address_info >> uint2(0u, 5u)) & ((1u << uint2(5u, 2u)) - 1u);
|
|
}
|
|
|
|
uint2 XeResolveUnscaledOffset() {
|
|
return XeResolveUnscaledOffsetDiv8() << 3u;
|
|
}
|
|
|
|
uint2 XeResolveScaledOffsetDiv8() {
|
|
return XeResolveUnscaledOffsetDiv8() * XeResolveResolutionScale();
|
|
}
|
|
|
|
uint2 XeResolveScaledOffset() {
|
|
return XeResolveScaledOffsetDiv8() << 3u;
|
|
}
|
|
|
|
uint2 XeResolveUnscaledSizeDiv8() {
|
|
return (xe_resolve_address_info >> uint2(7u, 18u)) & ((1u << 11u) - 1u);
|
|
}
|
|
|
|
uint2 XeResolveScaledSizeDiv8() {
|
|
return XeResolveUnscaledSizeDiv8() * XeResolveResolutionScale();
|
|
}
|
|
|
|
uint2 XeResolveScaledSize() {
|
|
return XeResolveScaledSizeDiv8() << 3u;
|
|
}
|
|
|
|
#ifndef XE_RESOLVE_CLEAR
|
|
uint XeResolveDestEndian128() {
|
|
return xe_resolve_dest_info & ((1u << 3u) - 1u);
|
|
}
|
|
|
|
bool XeResolveDestIsArray() {
|
|
return (xe_resolve_dest_info & (1u << 3u)) != 0u;
|
|
}
|
|
|
|
uint XeResolveDestSlice() {
|
|
return (xe_resolve_dest_info >> 4u) & ((1u << 3u) - 1u);
|
|
}
|
|
|
|
uint XeResolveDestFormat() {
|
|
return (xe_resolve_dest_info >> 7u) & ((1u << 6u) - 1u);
|
|
}
|
|
|
|
int XeResolveDestExpBias() {
|
|
return int(xe_resolve_dest_info) << (32 - (16 + 6)) >> (32 - 6);
|
|
}
|
|
|
|
float XeResolveDestExpBiasFactor() {
|
|
return asfloat((XeResolveDestExpBias() << 23) + asint(1.0f));
|
|
}
|
|
|
|
bool XeResolveDestSwap() {
|
|
return (xe_resolve_dest_info & (1u << 24u)) != 0u;
|
|
}
|
|
|
|
uint XeResolveDestRowPitchAlignedDiv32() {
|
|
return xe_resolve_dest_pitch_aligned & ((1u << 10u) - 1u);
|
|
}
|
|
|
|
uint XeResolveDestRowPitchAligned() {
|
|
return XeResolveDestRowPitchAlignedDiv32() << 5u;
|
|
}
|
|
|
|
uint XeResolveDestSlicePitchAlignedDiv32() {
|
|
return (xe_resolve_dest_pitch_aligned >> 10u) & ((1u << 10u) - 1u);
|
|
}
|
|
|
|
uint XeResolveDestSlicePitchAligned() {
|
|
return XeResolveDestSlicePitchAlignedDiv32() << 5u;
|
|
}
|
|
|
|
uint XeResolveDestPixelAddress(uint2 p, uint bpp_log2) {
|
|
uint2 resolution_scale = XeResolveResolutionScale();
|
|
p += (XeResolveUnscaledOffset() & 31u) * resolution_scale;
|
|
uint address;
|
|
uint row_pitch = XeResolveDestRowPitchAligned();
|
|
#ifdef XE_RESOLVE_RESOLUTION_SCALED
|
|
address = XeTextureScaledTiledOffset(XeResolveDestIsArray(),
|
|
uint3(p, XeResolveDestSlice()),
|
|
row_pitch,
|
|
XeResolveDestSlicePitchAligned(),
|
|
bpp_log2, resolution_scale);
|
|
#else
|
|
[branch] if (XeResolveDestIsArray()) {
|
|
address = uint(XeTextureTiledOffset3D(
|
|
int3(p, XeResolveDestSlice()), row_pitch,
|
|
XeResolveDestSlicePitchAligned(), bpp_log2));
|
|
} else {
|
|
address = uint(XeTextureTiledOffset2D(int2(p), row_pitch, bpp_log2));
|
|
}
|
|
address += xe_resolve_dest_base;
|
|
#endif
|
|
return address;
|
|
}
|
|
|
|
|
|
// Offset of the beginning of the odd R32G32/R32G32B32A32 store address from
|
|
// the address of the even store.
|
|
uint XeResolveDestRightConsecutiveBlocksOffset(uint x, uint bpp_log2) {
|
|
#ifdef XE_RESOLVE_RESOLUTION_SCALED
|
|
return XeTextureScaledRightSubUnitOffsetInConsecutivePair(
|
|
x, bpp_log2, XeResolveResolutionScale());
|
|
#else
|
|
return XeTextureTiledOddConsecutiveBlocksOffset(bpp_log2);
|
|
#endif
|
|
}
|
|
|
|
#define kXenosCopySampleSelect_0 0u
|
|
#define kXenosCopySampleSelect_1 1u
|
|
#define kXenosCopySampleSelect_2 2u
|
|
#define kXenosCopySampleSelect_3 3u
|
|
#define kXenosCopySampleSelect_01 4u
|
|
#define kXenosCopySampleSelect_23 5u
|
|
#define kXenosCopySampleSelect_0123 6u
|
|
|
|
uint XeResolveSampleSelect() {
|
|
return xe_resolve_address_info >> 29u;
|
|
}
|
|
|
|
uint XeResolveFirstSampleIndex() {
|
|
uint sample_select = XeResolveSampleSelect();
|
|
uint sample_index;
|
|
if (sample_select <= kXenosCopySampleSelect_3) {
|
|
sample_index = sample_select;
|
|
} else if (sample_select == kXenosCopySampleSelect_23) {
|
|
sample_index = 2u;
|
|
} else {
|
|
sample_index = 0u;
|
|
}
|
|
return sample_index;
|
|
}
|
|
|
|
// Offset to the first sample to participate in averaging (or the sample to be
|
|
// copied if not averaging).
|
|
uint XeResolveColorCopySourcePixelAddressIntsYDuplicating(uint2 pixel_index) {
|
|
return
|
|
XeEdramOffsetInts(
|
|
uint2(pixel_index.x,
|
|
max(pixel_index.y,
|
|
uint(XeResolveDuplicateSecondHostPixel().y))) +
|
|
XeResolveScaledOffset(),
|
|
XeResolveEdramBaseTiles(), XeResolveEdramPitchTiles(),
|
|
XeResolveEdramMsaaSamples(), false, XeResolveEdramFormatIntsLog2(),
|
|
XeResolveFirstSampleIndex(), XeResolveResolutionScale());
|
|
}
|
|
|
|
// Not using arrays for multi-pixel functions because they are compiled to
|
|
// indexable temps by FXC.
|
|
|
|
void XeResolveUnpack32bpp2Samples(uint2 packed, uint format,
|
|
out float4 sample_0, out float4 sample_1) {
|
|
switch (format) {
|
|
case kXenosColorRenderTargetFormat_8_8_8_8:
|
|
case kXenosColorRenderTargetFormat_8_8_8_8_GAMMA:
|
|
sample_0 = XeUnpackR8G8B8A8UNorm(packed.x);
|
|
sample_1 = XeUnpackR8G8B8A8UNorm(packed.y);
|
|
break;
|
|
case kXenosColorRenderTargetFormat_2_10_10_10:
|
|
case kXenosColorRenderTargetFormat_2_10_10_10_AS_10_10_10_10:
|
|
sample_0 = XeUnpackR10G10B10A2UNorm(packed.x);
|
|
sample_1 = XeUnpackR10G10B10A2UNorm(packed.y);
|
|
break;
|
|
case kXenosColorRenderTargetFormat_2_10_10_10_FLOAT:
|
|
case kXenosColorRenderTargetFormat_2_10_10_10_FLOAT_AS_16_16_16_16:
|
|
sample_0 = XeUnpackR10G10B10A2Float(packed.x);
|
|
sample_1 = XeUnpackR10G10B10A2Float(packed.y);
|
|
break;
|
|
case kXenosColorRenderTargetFormat_16_16:
|
|
sample_0 = float4(XeUnpackR16G16Edram(packed.x), 0.0f, 0.0f);
|
|
sample_1 = float4(XeUnpackR16G16Edram(packed.y), 0.0f, 0.0f);
|
|
break;
|
|
case kXenosColorRenderTargetFormat_16_16_FLOAT:
|
|
sample_0 = float4(f16tof32(packed.x >> uint2(0u, 16u)), 0.0f, 0.0f);
|
|
sample_1 = float4(f16tof32(packed.y >> uint2(0u, 16u)), 0.0f, 0.0f);
|
|
break;
|
|
default:
|
|
// Treat as 32_FLOAT.
|
|
sample_0 = float2(asfloat(packed.x), 0.0f).xyyy;
|
|
sample_1 = float2(asfloat(packed.y), 0.0f).xyyy;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void XeResolveUnpack32bpp4Samples(uint4 packed, uint format,
|
|
out float4 sample_0, out float4 sample_1,
|
|
out float4 sample_2, out float4 sample_3) {
|
|
switch (format) {
|
|
case kXenosColorRenderTargetFormat_8_8_8_8:
|
|
case kXenosColorRenderTargetFormat_8_8_8_8_GAMMA:
|
|
sample_0 = XeUnpackR8G8B8A8UNorm(packed.x);
|
|
sample_1 = XeUnpackR8G8B8A8UNorm(packed.y);
|
|
sample_2 = XeUnpackR8G8B8A8UNorm(packed.z);
|
|
sample_3 = XeUnpackR8G8B8A8UNorm(packed.w);
|
|
break;
|
|
case kXenosColorRenderTargetFormat_2_10_10_10:
|
|
case kXenosColorRenderTargetFormat_2_10_10_10_AS_10_10_10_10:
|
|
sample_0 = XeUnpackR10G10B10A2UNorm(packed.x);
|
|
sample_1 = XeUnpackR10G10B10A2UNorm(packed.y);
|
|
sample_2 = XeUnpackR10G10B10A2UNorm(packed.z);
|
|
sample_3 = XeUnpackR10G10B10A2UNorm(packed.w);
|
|
break;
|
|
case kXenosColorRenderTargetFormat_2_10_10_10_FLOAT:
|
|
case kXenosColorRenderTargetFormat_2_10_10_10_FLOAT_AS_16_16_16_16:
|
|
sample_0 = XeUnpackR10G10B10A2Float(packed.x);
|
|
sample_1 = XeUnpackR10G10B10A2Float(packed.y);
|
|
sample_2 = XeUnpackR10G10B10A2Float(packed.z);
|
|
sample_3 = XeUnpackR10G10B10A2Float(packed.w);
|
|
break;
|
|
case kXenosColorRenderTargetFormat_16_16:
|
|
sample_0 = float4(XeUnpackR16G16Edram(packed.x), 0.0f, 0.0f);
|
|
sample_1 = float4(XeUnpackR16G16Edram(packed.y), 0.0f, 0.0f);
|
|
sample_2 = float4(XeUnpackR16G16Edram(packed.z), 0.0f, 0.0f);
|
|
sample_3 = float4(XeUnpackR16G16Edram(packed.w), 0.0f, 0.0f);
|
|
break;
|
|
case kXenosColorRenderTargetFormat_16_16_FLOAT:
|
|
sample_0 = float4(f16tof32(packed.x >> uint2(0u, 16u)), 0.0f, 0.0f);
|
|
sample_1 = float4(f16tof32(packed.y >> uint2(0u, 16u)), 0.0f, 0.0f);
|
|
sample_2 = float4(f16tof32(packed.z >> uint2(0u, 16u)), 0.0f, 0.0f);
|
|
sample_3 = float4(f16tof32(packed.w >> uint2(0u, 16u)), 0.0f, 0.0f);
|
|
break;
|
|
default:
|
|
// Treat as 32_FLOAT.
|
|
sample_0 = float2(asfloat(packed.x), 0.0f).xyyy;
|
|
sample_1 = float2(asfloat(packed.y), 0.0f).xyyy;
|
|
sample_2 = float2(asfloat(packed.z), 0.0f).xyyy;
|
|
sample_3 = float2(asfloat(packed.w), 0.0f).xyyy;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void XeResolveUnpack32bpp8RedSamples(uint4 packed_0123, uint4 packed_4567,
|
|
uint format, bool swap,
|
|
out float4 samples_0123,
|
|
out float4 samples_4567) {
|
|
switch (format) {
|
|
case kXenosColorRenderTargetFormat_8_8_8_8:
|
|
case kXenosColorRenderTargetFormat_8_8_8_8_GAMMA: {
|
|
uint shift = swap ? 16u : 0u;
|
|
samples_0123 = XeUnpackR8UNormX4(packed_0123 >> shift);
|
|
samples_4567 = XeUnpackR8UNormX4(packed_4567 >> shift);
|
|
} break;
|
|
case kXenosColorRenderTargetFormat_2_10_10_10:
|
|
case kXenosColorRenderTargetFormat_2_10_10_10_AS_10_10_10_10: {
|
|
uint shift = swap ? 20u : 0u;
|
|
samples_0123 = XeUnpackR10UNormX4(packed_0123 >> shift);
|
|
samples_4567 = XeUnpackR10UNormX4(packed_4567 >> shift);
|
|
} break;
|
|
case kXenosColorRenderTargetFormat_2_10_10_10_FLOAT:
|
|
case kXenosColorRenderTargetFormat_2_10_10_10_FLOAT_AS_16_16_16_16: {
|
|
uint shift = swap ? 20u : 0u;
|
|
samples_0123 = XeUnpackR10FloatX4(packed_0123 >> shift);
|
|
samples_4567 = XeUnpackR10FloatX4(packed_4567 >> shift);
|
|
} break;
|
|
case kXenosColorRenderTargetFormat_16_16:
|
|
samples_0123 = XeUnpackR16EdramX4(packed_0123);
|
|
samples_4567 = XeUnpackR16EdramX4(packed_4567);
|
|
break;
|
|
case kXenosColorRenderTargetFormat_16_16_FLOAT:
|
|
samples_0123 = f16tof32(packed_0123);
|
|
samples_4567 = f16tof32(packed_4567);
|
|
break;
|
|
default:
|
|
// Treat as 32_FLOAT.
|
|
samples_0123 = asfloat(packed_0123);
|
|
samples_4567 = asfloat(packed_4567);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void XeResolveUnpack64bpp2Samples(uint4 packed, uint format,
|
|
out float4 sample_0, out float4 sample_1) {
|
|
switch (format) {
|
|
case kXenosColorRenderTargetFormat_16_16_16_16:
|
|
sample_0 = XeUnpackR16G16B16A16Edram(packed.xy);
|
|
sample_1 = XeUnpackR16G16B16A16Edram(packed.zw);
|
|
break;
|
|
case kXenosColorRenderTargetFormat_16_16_16_16_FLOAT:
|
|
sample_0 = f16tof32(packed.xxyy >> uint2(0u, 16u).xyxy);
|
|
sample_1 = f16tof32(packed.zzww >> uint2(0u, 16u).xyxy);
|
|
break;
|
|
default:
|
|
// Treat as 32_32_FLOAT.
|
|
sample_0 = float4(asfloat(packed.xy), 0.0f, 0.0f);
|
|
sample_1 = float4(asfloat(packed.zw), 0.0f, 0.0f);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void XeResolveUnpack64bpp4Samples(uint4 packed_01, uint4 packed_23,
|
|
uint format, out float4 sample_0,
|
|
out float4 sample_1, out float4 sample_2,
|
|
out float4 sample_3) {
|
|
switch (format) {
|
|
case kXenosColorRenderTargetFormat_16_16_16_16:
|
|
sample_0 = XeUnpackR16G16B16A16Edram(packed_01.xy);
|
|
sample_1 = XeUnpackR16G16B16A16Edram(packed_01.zw);
|
|
sample_2 = XeUnpackR16G16B16A16Edram(packed_23.xy);
|
|
sample_3 = XeUnpackR16G16B16A16Edram(packed_23.zw);
|
|
break;
|
|
case kXenosColorRenderTargetFormat_16_16_16_16_FLOAT:
|
|
sample_0 = f16tof32(packed_01.xxyy >> uint2(0u, 16u).xyxy);
|
|
sample_1 = f16tof32(packed_01.zzww >> uint2(0u, 16u).xyxy);
|
|
sample_2 = f16tof32(packed_23.xxyy >> uint2(0u, 16u).xyxy);
|
|
sample_3 = f16tof32(packed_23.zzww >> uint2(0u, 16u).xyxy);
|
|
break;
|
|
default:
|
|
// Treat as 32_32_FLOAT.
|
|
sample_0 = float4(asfloat(packed_01.xy), 0.0f, 0.0f);
|
|
sample_1 = float4(asfloat(packed_01.zw), 0.0f, 0.0f);
|
|
sample_2 = float4(asfloat(packed_23.xy), 0.0f, 0.0f);
|
|
sample_3 = float4(asfloat(packed_23.zw), 0.0f, 0.0f);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void XeResolveUnpack64bpp8RedUnswappedSamples(
|
|
uint4 packed_0123, uint4 packed_4567, uint format,
|
|
out float4 samples_0123, out float4 samples_4567) {
|
|
switch (format) {
|
|
case kXenosColorRenderTargetFormat_16_16_16_16:
|
|
samples_0123 = XeUnpackR16EdramX4(packed_0123);
|
|
samples_4567 = XeUnpackR16EdramX4(packed_4567);
|
|
break;
|
|
case kXenosColorRenderTargetFormat_16_16_16_16_FLOAT:
|
|
samples_0123 = f16tof32(packed_0123);
|
|
samples_4567 = f16tof32(packed_4567);
|
|
break;
|
|
default:
|
|
// Treat as 32_32_FLOAT.
|
|
samples_0123 = asfloat(packed_0123);
|
|
samples_4567 = asfloat(packed_4567);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void XeResolveLoad2RGBAUnswappedPixelSamplesFromRaw(
|
|
ByteAddressBuffer source, uint sample_address_bytes,
|
|
uint pixel_stride_bytes, uint format_ints_log2, uint format,
|
|
out float4 pixel_0, out float4 pixel_1) {
|
|
[branch] if (format_ints_log2) {
|
|
uint4 packed;
|
|
[branch] if (pixel_stride_bytes == 8u) {
|
|
packed = source.Load4(sample_address_bytes);
|
|
} else {
|
|
packed.xy = source.Load2(sample_address_bytes);
|
|
packed.zw = source.Load2(sample_address_bytes + pixel_stride_bytes);
|
|
}
|
|
XeResolveUnpack64bpp2Samples(packed, format, pixel_0, pixel_1);
|
|
} else {
|
|
uint2 packed;
|
|
[branch] if (pixel_stride_bytes == 4u) {
|
|
packed = source.Load2(sample_address_bytes);
|
|
} else {
|
|
packed.x = source.Load(sample_address_bytes);
|
|
packed.y = source.Load(sample_address_bytes + pixel_stride_bytes);
|
|
}
|
|
XeResolveUnpack32bpp2Samples(packed, format, pixel_0, pixel_1);
|
|
}
|
|
}
|
|
|
|
void XeResolveLoad4RGBAUnswappedPixelSamplesFromRaw(
|
|
ByteAddressBuffer source, uint sample_address_bytes,
|
|
uint pixel_stride_bytes, uint format_ints_log2, uint format,
|
|
out float4 pixel_0, out float4 pixel_1, out float4 pixel_2,
|
|
out float4 pixel_3) {
|
|
[branch] if (format_ints_log2) {
|
|
uint4 packed_01, packed_23;
|
|
[branch] if (pixel_stride_bytes == 8u) {
|
|
packed_01 = source.Load4(sample_address_bytes);
|
|
packed_23 = source.Load4(sample_address_bytes + 16u);
|
|
} else {
|
|
packed_01.xy = source.Load2(sample_address_bytes);
|
|
packed_01.zw = source.Load2(sample_address_bytes + pixel_stride_bytes);
|
|
packed_23.xy =
|
|
source.Load2(sample_address_bytes + 2u * pixel_stride_bytes);
|
|
packed_23.zw =
|
|
source.Load2(sample_address_bytes + 3u * pixel_stride_bytes);
|
|
}
|
|
XeResolveUnpack64bpp4Samples(packed_01, packed_23, format, pixel_0,
|
|
pixel_1, pixel_2, pixel_3);
|
|
} else {
|
|
uint4 packed;
|
|
[branch] if (pixel_stride_bytes == 4u) {
|
|
packed = source.Load4(sample_address_bytes);
|
|
} else {
|
|
packed.x = source.Load(sample_address_bytes);
|
|
packed.y = source.Load(sample_address_bytes + pixel_stride_bytes);
|
|
packed.z = source.Load(sample_address_bytes + 2u * pixel_stride_bytes);
|
|
packed.w = source.Load(sample_address_bytes + 3u * pixel_stride_bytes);
|
|
}
|
|
XeResolveUnpack32bpp4Samples(packed, format, pixel_0, pixel_1, pixel_2,
|
|
pixel_3);
|
|
}
|
|
}
|
|
|
|
// For red/blue swapping for 64bpp, pre-add 4 to sample_address_bytes.
|
|
void XeResolveLoad8RedPixelSamplesFromRaw(
|
|
ByteAddressBuffer source, uint sample_address_bytes,
|
|
uint pixel_stride_bytes, uint format_ints_log2, uint format,
|
|
bool swap_32bpp, out float4 pixels_0123, out float4 pixels_4567) {
|
|
uint4 packed_0123, packed_4567;
|
|
[branch] if (pixel_stride_bytes == 4u) {
|
|
packed_0123 = source.Load4(sample_address_bytes);
|
|
packed_4567 = source.Load4(sample_address_bytes + 16u);
|
|
} else {
|
|
packed_0123.x = source.Load(sample_address_bytes);
|
|
packed_0123.y = source.Load(sample_address_bytes + pixel_stride_bytes);
|
|
packed_0123.z =
|
|
source.Load(sample_address_bytes + 2u * pixel_stride_bytes);
|
|
packed_0123.w =
|
|
source.Load(sample_address_bytes + 3u * pixel_stride_bytes);
|
|
packed_4567.x =
|
|
source.Load(sample_address_bytes + 4u * pixel_stride_bytes);
|
|
packed_4567.y =
|
|
source.Load(sample_address_bytes + 5u * pixel_stride_bytes);
|
|
packed_4567.z =
|
|
source.Load(sample_address_bytes + 6u * pixel_stride_bytes);
|
|
packed_4567.w =
|
|
source.Load(sample_address_bytes + 7u * pixel_stride_bytes);
|
|
}
|
|
[branch] if (format_ints_log2) {
|
|
XeResolveUnpack64bpp8RedUnswappedSamples(packed_0123, packed_4567, format,
|
|
pixels_0123, pixels_4567);
|
|
} else {
|
|
XeResolveUnpack32bpp8RedSamples(packed_0123, packed_4567, format,
|
|
swap_32bpp, pixels_0123, pixels_4567);
|
|
}
|
|
}
|
|
|
|
void XeResolveLoad2RGBAColors(ByteAddressBuffer source, uint address_ints,
|
|
out float4 pixel_0, out float4 pixel_1) {
|
|
uint format_ints_log2 = XeResolveEdramFormatIntsLog2();
|
|
uint pixel_stride_bytes = XeResolveEdramPixelStrideInts() << 2u;
|
|
uint address_bytes = address_ints << 2u;
|
|
uint format = XeResolveEdramFormat();
|
|
XeResolveLoad2RGBAUnswappedPixelSamplesFromRaw(source, address_bytes,
|
|
pixel_stride_bytes,
|
|
format_ints_log2, format,
|
|
pixel_0, pixel_1);
|
|
uint sample_select = XeResolveSampleSelect();
|
|
float exp_bias = XeResolveDestExpBiasFactor();
|
|
[branch] if (sample_select >= kXenosCopySampleSelect_01) {
|
|
uint tile_row_stride = 320u * XeResolveResolutionScale().x;
|
|
// TODO(Triang3l): Gamma-correct resolve for 8_8_8_8_GAMMA.
|
|
exp_bias *= 0.5f;
|
|
float4 msaa_resolve_pixel_0, msaa_resolve_pixel_1;
|
|
XeResolveLoad2RGBAUnswappedPixelSamplesFromRaw(
|
|
source, address_bytes + tile_row_stride, pixel_stride_bytes,
|
|
format_ints_log2, format, msaa_resolve_pixel_0, msaa_resolve_pixel_1);
|
|
pixel_0 += msaa_resolve_pixel_0;
|
|
pixel_1 += msaa_resolve_pixel_1;
|
|
[branch] if (sample_select >= kXenosCopySampleSelect_0123) {
|
|
exp_bias *= 0.5f;
|
|
XeResolveLoad2RGBAUnswappedPixelSamplesFromRaw(
|
|
source, address_bytes + (4u << format_ints_log2),
|
|
pixel_stride_bytes, format_ints_log2, format, msaa_resolve_pixel_0,
|
|
msaa_resolve_pixel_1);
|
|
pixel_0 += msaa_resolve_pixel_0;
|
|
pixel_1 += msaa_resolve_pixel_1;
|
|
XeResolveLoad2RGBAUnswappedPixelSamplesFromRaw(
|
|
source,
|
|
address_bytes + (tile_row_stride + (4u << format_ints_log2)),
|
|
pixel_stride_bytes, format_ints_log2, format, msaa_resolve_pixel_0,
|
|
msaa_resolve_pixel_1);
|
|
pixel_0 += msaa_resolve_pixel_0;
|
|
pixel_1 += msaa_resolve_pixel_1;
|
|
}
|
|
}
|
|
pixel_0 *= exp_bias;
|
|
pixel_1 *= exp_bias;
|
|
[branch] if (XeResolveDestSwap()) {
|
|
pixel_0 = pixel_0.bgra;
|
|
pixel_1 = pixel_1.bgra;
|
|
}
|
|
}
|
|
|
|
void XeResolveLoad4RGBAColors(ByteAddressBuffer source, uint address_ints,
|
|
out float4 pixel_0, out float4 pixel_1,
|
|
out float4 pixel_2, out float4 pixel_3) {
|
|
uint format_ints_log2 = XeResolveEdramFormatIntsLog2();
|
|
uint pixel_stride_bytes = XeResolveEdramPixelStrideInts() << 2u;
|
|
uint address_bytes = address_ints << 2u;
|
|
uint format = XeResolveEdramFormat();
|
|
XeResolveLoad4RGBAUnswappedPixelSamplesFromRaw(source, address_bytes,
|
|
pixel_stride_bytes,
|
|
format_ints_log2, format,
|
|
pixel_0, pixel_1, pixel_2,
|
|
pixel_3);
|
|
uint sample_select = XeResolveSampleSelect();
|
|
float exp_bias = XeResolveDestExpBiasFactor();
|
|
[branch] if (sample_select >= kXenosCopySampleSelect_01) {
|
|
uint tile_row_stride = 320u * XeResolveResolutionScale().x;
|
|
// TODO(Triang3l): Gamma-correct resolve for 8_8_8_8_GAMMA.
|
|
exp_bias *= 0.5f;
|
|
float4 msaa_resolve_pixel_0;
|
|
float4 msaa_resolve_pixel_1;
|
|
float4 msaa_resolve_pixel_2;
|
|
float4 msaa_resolve_pixel_3;
|
|
XeResolveLoad4RGBAUnswappedPixelSamplesFromRaw(
|
|
source,
|
|
address_bytes + tile_row_stride, pixel_stride_bytes, format_ints_log2,
|
|
format, msaa_resolve_pixel_0, msaa_resolve_pixel_1,
|
|
msaa_resolve_pixel_2, msaa_resolve_pixel_3);
|
|
pixel_0 += msaa_resolve_pixel_0;
|
|
pixel_1 += msaa_resolve_pixel_1;
|
|
pixel_2 += msaa_resolve_pixel_2;
|
|
pixel_3 += msaa_resolve_pixel_3;
|
|
[branch] if (sample_select >= kXenosCopySampleSelect_0123) {
|
|
exp_bias *= 0.5f;
|
|
XeResolveLoad4RGBAUnswappedPixelSamplesFromRaw(
|
|
source, address_bytes + (4u << format_ints_log2),
|
|
pixel_stride_bytes, format_ints_log2, format, msaa_resolve_pixel_0,
|
|
msaa_resolve_pixel_1, msaa_resolve_pixel_2, msaa_resolve_pixel_3);
|
|
pixel_0 += msaa_resolve_pixel_0;
|
|
pixel_1 += msaa_resolve_pixel_1;
|
|
pixel_2 += msaa_resolve_pixel_2;
|
|
pixel_3 += msaa_resolve_pixel_3;
|
|
XeResolveLoad4RGBAUnswappedPixelSamplesFromRaw(
|
|
source,
|
|
address_bytes + (tile_row_stride + (4u << format_ints_log2)),
|
|
pixel_stride_bytes, format_ints_log2, format, msaa_resolve_pixel_0,
|
|
msaa_resolve_pixel_1, msaa_resolve_pixel_2, msaa_resolve_pixel_3);
|
|
pixel_0 += msaa_resolve_pixel_0;
|
|
pixel_1 += msaa_resolve_pixel_1;
|
|
pixel_2 += msaa_resolve_pixel_2;
|
|
pixel_3 += msaa_resolve_pixel_3;
|
|
}
|
|
}
|
|
pixel_0 *= exp_bias;
|
|
pixel_1 *= exp_bias;
|
|
pixel_2 *= exp_bias;
|
|
pixel_3 *= exp_bias;
|
|
[branch] if (XeResolveDestSwap()) {
|
|
pixel_0 = pixel_0.bgra;
|
|
pixel_1 = pixel_1.bgra;
|
|
pixel_2 = pixel_2.bgra;
|
|
pixel_3 = pixel_3.bgra;
|
|
}
|
|
}
|
|
|
|
void XeResolveLoad8RedColors(ByteAddressBuffer source, uint address_ints,
|
|
out float4 pixels_0123, out float4 pixels_4567) {
|
|
uint format_ints_log2 = XeResolveEdramFormatIntsLog2();
|
|
uint pixel_stride_bytes = XeResolveEdramPixelStrideInts() << 2u;
|
|
uint address_bytes = address_ints << 2u;
|
|
uint format = XeResolveEdramFormat();
|
|
bool swap = XeResolveDestSwap();
|
|
[branch] if (format_ints_log2 && swap) {
|
|
// Likely want to load the blue part from the right half.
|
|
address_bytes += 4u;
|
|
}
|
|
XeResolveLoad8RedPixelSamplesFromRaw(source, address_bytes,
|
|
pixel_stride_bytes, format_ints_log2,
|
|
format, swap, pixels_0123,
|
|
pixels_4567);
|
|
uint sample_select = XeResolveSampleSelect();
|
|
float exp_bias = XeResolveDestExpBiasFactor();
|
|
[branch] if (sample_select >= kXenosCopySampleSelect_01) {
|
|
uint tile_row_stride = 320u * XeResolveResolutionScale().x;
|
|
// TODO(Triang3l): Gamma-correct resolve for 8_8_8_8_GAMMA.
|
|
exp_bias *= 0.5f;
|
|
float4 msaa_resolve_pixels_0123, msaa_resolve_pixels_4567;
|
|
XeResolveLoad8RedPixelSamplesFromRaw(
|
|
source, address_bytes + tile_row_stride, pixel_stride_bytes,
|
|
format_ints_log2, format, swap, msaa_resolve_pixels_0123,
|
|
msaa_resolve_pixels_4567);
|
|
pixels_0123 += msaa_resolve_pixels_0123;
|
|
pixels_4567 += msaa_resolve_pixels_4567;
|
|
[branch] if (sample_select >= kXenosCopySampleSelect_0123) {
|
|
exp_bias *= 0.5f;
|
|
XeResolveLoad8RedPixelSamplesFromRaw(
|
|
source, address_bytes + (4u << format_ints_log2),
|
|
pixel_stride_bytes, format_ints_log2, format, swap,
|
|
msaa_resolve_pixels_0123, msaa_resolve_pixels_4567);
|
|
pixels_0123 += msaa_resolve_pixels_0123;
|
|
pixels_4567 += msaa_resolve_pixels_4567;
|
|
XeResolveLoad8RedPixelSamplesFromRaw(
|
|
source,
|
|
address_bytes + (tile_row_stride + (4u << format_ints_log2)),
|
|
pixel_stride_bytes, format_ints_log2, format, swap,
|
|
msaa_resolve_pixels_0123, msaa_resolve_pixels_4567);
|
|
pixels_0123 += msaa_resolve_pixels_0123;
|
|
pixels_4567 += msaa_resolve_pixels_4567;
|
|
}
|
|
}
|
|
pixels_0123 *= exp_bias;
|
|
pixels_4567 *= exp_bias;
|
|
}
|
|
|
|
uint4 XeResolveSwapRedBlue_8_8_8_8(uint4 pixels) {
|
|
return (pixels & ~0xFF00FFu) | ((pixels & 0xFFu) << 16u) |
|
|
((pixels >> 16u) & 0xFFu);
|
|
}
|
|
|
|
uint4 XeResolveSwapRedBlue_2_10_10_10(uint4 pixels) {
|
|
return (pixels & ~0x3FF003FF) | ((pixels & 0x3FFu) << 20u) |
|
|
((pixels >> 20u) & 0x3FFu);
|
|
}
|
|
|
|
void XeResolveSwap8PixelsRedBlue32bpp(inout uint4 pixels_0123,
|
|
inout uint4 pixels_4567) {
|
|
[branch] if (XeResolveDestSwap()) {
|
|
switch (XeResolveEdramFormat()) {
|
|
case kXenosColorRenderTargetFormat_8_8_8_8:
|
|
case kXenosColorRenderTargetFormat_8_8_8_8_GAMMA:
|
|
pixels_0123 = XeResolveSwapRedBlue_8_8_8_8(pixels_0123);
|
|
pixels_4567 = XeResolveSwapRedBlue_8_8_8_8(pixels_4567);
|
|
break;
|
|
case kXenosColorRenderTargetFormat_2_10_10_10:
|
|
case kXenosColorRenderTargetFormat_2_10_10_10_FLOAT:
|
|
case kXenosColorRenderTargetFormat_2_10_10_10_AS_10_10_10_10:
|
|
case kXenosColorRenderTargetFormat_2_10_10_10_FLOAT_AS_16_16_16_16:
|
|
pixels_0123 = XeResolveSwapRedBlue_2_10_10_10(pixels_0123);
|
|
pixels_4567 = XeResolveSwapRedBlue_2_10_10_10(pixels_4567);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void XeResolveSwap4PixelsRedBlue64bpp(inout uint4 pixels_01,
|
|
inout uint4 pixels_23) {
|
|
[branch] if (XeResolveDestSwap()) {
|
|
uint format = XeResolveEdramFormat();
|
|
[branch] if (format == kXenosColorRenderTargetFormat_16_16_16_16 ||
|
|
format == kXenosColorRenderTargetFormat_16_16_16_16_FLOAT) {
|
|
pixels_01 = (pixels_01 & ~0xFFFFu) | (pixels_01.yxwz & 0xFFFFu);
|
|
pixels_23 = (pixels_23 & ~0xFFFFu) | (pixels_23.yxwz & 0xFFFFu);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#endif // XENIA_GPU_D3D12_SHADERS_RESOLVE_HLSLI_
|