[D3D12] Fix and enable triangle adaptive tessellation

This commit is contained in:
Triang3l 2020-04-08 00:26:49 +03:00
parent 8123978c49
commit 7cf3a75ab1
5 changed files with 43 additions and 25 deletions

View File

@ -56,11 +56,9 @@ DEFINE_bool(d3d12_submit_on_primary_buffer_end, true,
"possible to submit immediately to try to reduce frame latency.", "possible to submit immediately to try to reduce frame latency.",
"D3D12"); "D3D12");
DEFINE_bool( DEFINE_bool(
d3d12_tessellation_adaptive, false, d3d12_tessellation_adaptive, true,
"Allow games to use adaptive tessellation - may be disabled if the game " "Allow games to use adaptive tessellation - may be disabled if the game "
"has issues with memexport, the maximum factor will be used in this case. " "has issues with memexport, the maximum factor will be used in this case.",
"Temporarily disabled by default since there are visible cracks currently "
"in Halo 3.",
"D3D12"); "D3D12");
namespace xe { namespace xe {

View File

@ -20,12 +20,28 @@ XeHSConstantDataOutput XePatchConstant(
// has already been added on the CPU. // has already been added on the CPU.
// Fork phase. // Fork phase.
// UVW are taken with ZYX swizzle (when r1.y is 0) in the vertex (domain) // It appears that on the Xbox 360:
// shader. Edge 0 is with U = 0, edge 1 is with V = 0, edge 2 is with W = 0. // - [0] is the factor for the v0->v1 edge.
// TODO(Triang3l): Verify this order. There are still cracks. // - [1] is the factor for the v1->v2 edge.
// - [2] is the factor for the v2->v0 edge.
// Where v0 is the U1V0W0 vertex, v1 is the U0V1W0 vertex, and v2 is the
// U0V0W1 vertex.
// The hint at the order was provided in the Code Listing 15 of:
// http://www.uraldev.ru/files/download/21/Real-Time_Tessellation_on_GPU.pdf
// In Direct3D 12:
// - [0] is the factor for the U0 edge (v1->v2).
// - [1] is the factor for the V0 edge (v2->v0),
// - [2] is the factor for the W0 edge (v0->v1).
// Direct3D 12 provides barycentrics as X for v0, Y for v1, Z for v2.
// In Xenia's domain shaders, the barycentric coordinates are handled as:
// 1) vDomain.xyz -> r0.zyx by Xenia.
// 2) r0.zyx -> r0.zyx by the guest (because r1.y is set to 0 by Xenia, which
// apparently means identity swizzle to games).
// 3) r0.z * v0 + r0.y * v1 + r0.x * v2 by the guest.
// With this order, there are no cracks in Halo 3 water.
[unroll] for (i = 0u; i < 3u; ++i) { [unroll] for (i = 0u; i < 3u; ++i) {
output.edges[i] = output.edges[i] = clamp(
clamp(asfloat(xe_input_patch[2u - i].index_or_edge_factor) + 1.0f, asfloat(xe_input_patch[(i + 1u) % 3u].index_or_edge_factor) + 1.0f,
xe_tessellation_factor_range.x, xe_tessellation_factor_range.y); xe_tessellation_factor_range.x, xe_tessellation_factor_range.y);
} }

View File

@ -1,11 +1,11 @@
// generated from `xb buildhlsl` // generated from `xb buildhlsl`
// source: adaptive_triangle.hs.hlsl // source: adaptive_triangle.hs.hlsl
const uint8_t adaptive_triangle_hs[] = { const uint8_t adaptive_triangle_hs[] = {
0x44, 0x58, 0x42, 0x43, 0x72, 0x34, 0xB9, 0xC2, 0xEC, 0x61, 0xB3, 0x84, 0x44, 0x58, 0x42, 0x43, 0x11, 0x2F, 0xB7, 0xDC, 0x3A, 0xC8, 0x6E, 0xE8,
0x40, 0x92, 0xAA, 0x70, 0x58, 0xC9, 0x88, 0x7D, 0x01, 0x00, 0x00, 0x00, 0x46, 0xFA, 0x34, 0x10, 0x8D, 0x43, 0xC9, 0x2E, 0x01, 0x00, 0x00, 0x00,
0x88, 0x0D, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0xAC, 0x0D, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
0x30, 0x0A, 0x00, 0x00, 0x64, 0x0A, 0x00, 0x00, 0x74, 0x0A, 0x00, 0x00, 0x30, 0x0A, 0x00, 0x00, 0x64, 0x0A, 0x00, 0x00, 0x74, 0x0A, 0x00, 0x00,
0x08, 0x0B, 0x00, 0x00, 0xEC, 0x0C, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x08, 0x0B, 0x00, 0x00, 0x10, 0x0D, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46,
0xF0, 0x09, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0xF0, 0x09, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0x53, 0x48, 0x01, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0x53, 0x48,
0x00, 0x05, 0x00, 0x00, 0xC6, 0x09, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x00, 0x05, 0x00, 0x00, 0xC6, 0x09, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25,
@ -236,8 +236,8 @@ const uint8_t adaptive_triangle_hs[] = {
0x01, 0x0E, 0x00, 0x00, 0x53, 0x56, 0x5F, 0x54, 0x65, 0x73, 0x73, 0x46, 0x01, 0x0E, 0x00, 0x00, 0x53, 0x56, 0x5F, 0x54, 0x65, 0x73, 0x73, 0x46,
0x61, 0x63, 0x74, 0x6F, 0x72, 0x00, 0x53, 0x56, 0x5F, 0x49, 0x6E, 0x73, 0x61, 0x63, 0x74, 0x6F, 0x72, 0x00, 0x53, 0x56, 0x5F, 0x49, 0x6E, 0x73,
0x69, 0x64, 0x65, 0x54, 0x65, 0x73, 0x73, 0x46, 0x61, 0x63, 0x74, 0x6F, 0x69, 0x64, 0x65, 0x54, 0x65, 0x73, 0x73, 0x46, 0x61, 0x63, 0x74, 0x6F,
0x72, 0x00, 0xAB, 0xAB, 0x53, 0x48, 0x45, 0x58, 0xDC, 0x01, 0x00, 0x00, 0x72, 0x00, 0xAB, 0xAB, 0x53, 0x48, 0x45, 0x58, 0x00, 0x02, 0x00, 0x00,
0x51, 0x00, 0x03, 0x00, 0x77, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x01, 0x51, 0x00, 0x03, 0x00, 0x80, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x01,
0x93, 0x18, 0x00, 0x01, 0x94, 0x18, 0x00, 0x01, 0x95, 0x10, 0x00, 0x01, 0x93, 0x18, 0x00, 0x01, 0x94, 0x18, 0x00, 0x01, 0x95, 0x10, 0x00, 0x01,
0x96, 0x20, 0x00, 0x01, 0x97, 0x18, 0x00, 0x01, 0x6A, 0x08, 0x00, 0x01, 0x96, 0x20, 0x00, 0x01, 0x97, 0x18, 0x00, 0x01, 0x6A, 0x08, 0x00, 0x01,
0x59, 0x00, 0x00, 0x07, 0x46, 0x8E, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x07, 0x46, 0x8E, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
@ -252,10 +252,13 @@ const uint8_t adaptive_triangle_hs[] = {
0x12, 0x20, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x12, 0x20, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
0x68, 0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x04, 0x68, 0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x04,
0x12, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x12, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x04, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x06, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x70, 0x01, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x12, 0x00, 0x10, 0x00, 0x0A, 0x70, 0x01, 0x00, 0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, 0x4E, 0x00, 0x00, 0x08, 0x00, 0xD0, 0x00, 0x00, 0x12, 0x00, 0x10, 0x00,
0x0A, 0x90, 0xE1, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09,
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0x00, 0x00, 0x80, 0x3F, 0x0A, 0x90, 0xA1, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x09,
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
@ -277,9 +280,9 @@ const uint8_t adaptive_triangle_hs[] = {
0x33, 0x00, 0x00, 0x07, 0x12, 0x20, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x07, 0x12, 0x20, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0xB0, 0x11, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0xB0, 0x11, 0x00,
0x02, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54, 0x02, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
0x94, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,

View File

@ -99,8 +99,9 @@ dcl_output_siv o1.x, finalTriVeq0EdgeTessFactor
dcl_output_siv o2.x, finalTriWeq0EdgeTessFactor dcl_output_siv o2.x, finalTriWeq0EdgeTessFactor
dcl_temps 1 dcl_temps 1
dcl_indexrange o0.x 3 dcl_indexrange o0.x 3
ineg r0.x, vForkInstanceID.x iadd r0.x, vForkInstanceID.x, l(1)
add r0.x, l(1.000000), vicp[r0.x + 2][0].x udiv null, r0.x, r0.x, l(3)
add r0.x, l(1.000000), vicp[r0.x + 0][0].x
max r0.x, r0.x, CB0[0][14].x max r0.x, r0.x, CB0[0][14].x
min r0.x, r0.x, CB0[0][14].y min r0.x, r0.x, CB0[0][14].y
mov r0.y, vForkInstanceID.x mov r0.y, vForkInstanceID.x
@ -115,4 +116,4 @@ dcl_temps 1
min r0.x, vpc0.x, vpc1.x min r0.x, vpc0.x, vpc1.x
min o3.x, r0.x, vpc2.x min o3.x, r0.x, vpc2.x
ret ret
// Approximately 11 instruction slots used // Approximately 12 instruction slots used