From a4ffcd51756b5480bd2c9bf46cab0ed1cb2770a8 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 23 Mar 2020 00:13:54 +0300 Subject: [PATCH] [D3D12] Update DXBC contribution notes --- src/xenia/gpu/dxbc_shader_translator.cc | 12 +++--- src/xenia/gpu/dxbc_shader_translator.h | 56 ++++++++++++++++++++----- 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 2c0aa1956..b465d2cc3 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -40,17 +40,15 @@ using namespace ucode; // Notes about operands: // // Reading and writing: -// - Writes to 4-component registers must be masked. -// - Reads from 4-component registers can be swizzled, or 1 component can be -// selected. // - r# (temporary registers) are 4-component and can be used anywhere. // - v# (inputs) are 4-component and read-only. // - o# (outputs) are 4-component and write-only. // - oDepth (pixel shader depth output) is 1-component and write-only. -// - x# (indexable temporary registers) are 4-component (though not sure what -// happens if you dcl them as 1-component) and can be accessed either via -// a mov load or a mov store (and those movs are counted as ArrayInstructions -// in STAT, not as MovInstructions). +// - x# (indexable temporary registers) are 4-component and can be accessed +// either via a mov load or a mov store (and those movs are counted as +// ArrayInstructions in STAT, not as MovInstructions), even though the D3D11.3 +// functional specification says x# can be used wherever r# can be used, but +// FXC emits only mov load/store in simple tests. // // Indexing: // - Constant buffers use 3D indices in CBx[y][z] format, where x is the ID of diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index e7b02f674..59d24351a 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -30,17 +30,53 @@ namespace gpu { // // IMPORTANT CONTRIBUTION NOTES: // -// Not all DXBC instructions accept all kinds of operands equally! -// Refer to Shader Model 4 and 5 Assembly on MSDN to see if the needed -// swizzle/selection, absolute/negate modifiers and saturation are supported by -// the instruction. +// While DXBC may look like a flexible and high-level representation with highly +// generalized building blocks, actually it has a lot of restrictions on operand +// usage! +// Check the Direct3D 11.3 Functional Specification before adding anything! +// https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm +// (the "7. Common Shader Internals" chapter and the documentation of the +// specific instruction you want to use). +// For instructions, MSDN also provides some information, but it's not as +// detailed as the functional specification: // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx9-graphics-reference-asm -// Before adding anything that behaves in a way that doesn't follow patterns -// already used in Xenia, try to write the same logic in HLSL, compile it with -// FXC and see the resulting assembly *and preferably binary bytecode* as some -// instructions may, for example, require selection rather than swizzling for -// certain operands. For bytecode structure, see d3d12TokenizedProgramFormat.hpp -// from the Windows Driver Kit. +// Most important limitations: +// - This is very easy to hit, looks weird at first, and also not very important +// for modern drivers using DXILConv, but still needs to be respected for +// safety! One instruction can't accept more than one immediate or constant +// buffer source operand combined in total: +// and r0.x, CB0[0][0].x, l(1) +// and r0.x, CB0[0][0].x, CB0[0][0].y +// are illegal, even though pretty useful. Copy one of the operands to r#. +// - Absolute, negate and saturate are only supported by instructions that +// explicitly support them. +// - Component selection in the general case (ALU instructions - things like +// resource access and flow control mostly explicitly need a specific +// component selection mode defined in the specification of the instruction): +// - 0-component - for operand types with no data (samplers, labels). +// - 1-component - for scalar destination operand types, and for scalar source +// operand types when the destination vector has 1 component masked +// (including scalar immediates). +// - Mask - for vector destination operand types. +// - Swizzle - for both vector and scalar (replicated in this case) source +// operand types, when the destination vector has 2 or more components +// masked. Immediates in this case have XYZW swizzle. +// - Select 1 - for vector source operand types, when the destination has 1 +// component masked or is of a scalar type. +// - Input operands (v#) can be used only as sources, output operands (o#) can +// be used only as destinations. +// - The specification says that x#[] can be used wherever r# can be used, +// however, in tests, FXC only emits load/store mov instructions for x#[] +// (they are also counted in ArrayInstructions rather than MovInstructions in +// STAT), so it's better to only use mov for x#[]. The specification also +// permits using x#[] in relative addressing along with r# (as long as +// relative addressing isn't nested), but it's probably not very safe either. +// Don't do anything that FXC wouldn't do. +// TODO(Triang3l): Fix all places violating these rules - currently there are +// lots of them in Xenia! +// +// For bytecode structure, see d3d12TokenizedProgramFormat.hpp from the Windows +// Driver Kit. // // Avoid using uninitialized register components - such as registers written to // in "if" and not in "else", but then used outside unconditionally or with a