From 8ced574c6ff5ab128d5741f1abb877dbf33e6c87 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 27 Aug 2018 15:18:30 +0300 Subject: [PATCH] [D3D12] Add DXBC tokens and checksum --- premake5.lua | 1 + src/xenia/app/premake5.lua | 1 + src/xenia/gpu/premake5.lua | 1 + third_party/dxbc.lua | 18 + third_party/dxbc/DXBCChecksum.cpp | 389 +++ third_party/dxbc/DXBCChecksum.h | 79 + .../dxbc/d3d12TokenizedProgramFormat.hpp | 2612 +++++++++++++++++ 7 files changed, 3101 insertions(+) create mode 100644 third_party/dxbc.lua create mode 100644 third_party/dxbc/DXBCChecksum.cpp create mode 100644 third_party/dxbc/DXBCChecksum.h create mode 100644 third_party/dxbc/d3d12TokenizedProgramFormat.hpp diff --git a/premake5.lua b/premake5.lua index 8e80c090d..3464fdd7c 100644 --- a/premake5.lua +++ b/premake5.lua @@ -231,6 +231,7 @@ solution("xenia") -- Include third party files first so they don't have to deal with gflags. include("third_party/capstone.lua") + include("third_party/dxbc.lua") include("third_party/gflags.lua") include("third_party/glew.lua") include("third_party/glslang-spirv.lua") diff --git a/src/xenia/app/premake5.lua b/src/xenia/app/premake5.lua index cddb36d41..4e2dc399f 100644 --- a/src/xenia/app/premake5.lua +++ b/src/xenia/app/premake5.lua @@ -9,6 +9,7 @@ project("xenia-app") language("C++") links({ "capstone", + "dxbc", "gflags", "glew", "glslang-spirv", diff --git a/src/xenia/gpu/premake5.lua b/src/xenia/gpu/premake5.lua index d0ce85820..952b3aa6c 100644 --- a/src/xenia/gpu/premake5.lua +++ b/src/xenia/gpu/premake5.lua @@ -7,6 +7,7 @@ project("xenia-gpu") kind("StaticLib") language("C++") links({ + "dxbc", "glslang-spirv", "snappy", "spirv-tools", diff --git a/third_party/dxbc.lua b/third_party/dxbc.lua new file mode 100644 index 000000000..5729dd1dc --- /dev/null +++ b/third_party/dxbc.lua @@ -0,0 +1,18 @@ +group("third_party") +project("dxbc") + uuid("c96688ca-51ca-406e-aeef-068734a67abe") + kind("StaticLib") + language("C++") + links({ + }) + defines({ + "_LIB", + }) + includedirs({ + "dxbc", + }) + files({ + "dxbc/d3d12TokenizedProgramFormat.hpp", + "dxbc/DXBCChecksum.cpp", + "dxbc/DXBCChecksum.h", + }) diff --git a/third_party/dxbc/DXBCChecksum.cpp b/third_party/dxbc/DXBCChecksum.cpp new file mode 100644 index 000000000..635fc9cff --- /dev/null +++ b/third_party/dxbc/DXBCChecksum.cpp @@ -0,0 +1,389 @@ +// Xenia: removed Windows.h dependency. + +//===================================================================== +// Copyright 2008-2016 (c), Advanced Micro Devices, Inc. All rights reserved. +// +/// \author AMD Developer Tools Team +/// \file DXBCChecksum.cpp +/// +//===================================================================== + +//===================================================================== +// $Id: //devtools/main/Common/Src/ShaderUtils/DX10/DXBCChecksum.cpp#4 $ +// +// Last checkin: $DateTime: 2016/04/18 06:01:26 $ +// Last edited by: $Author: AMD Developer Tools Team +//===================================================================== +// Free for all implementation of the MD5 hash algorithm + +/* +********************************************************************** +** DXBCChecksum.cpp ** +** ** +** - Modified by Seth Sowerby, November 2007 ** +** Modified to match DXBC checksum algorithm used by Microsoft ** +** ** +********************************************************************** +*/ + +/* +********************************************************************** +** MD5.cpp ** +** ** +** - Style modified by Tony Ray, January 2001 ** +** Added support for randomizing initialization constants ** +** - Style modified by Dominik Reichl, April 2003 ** +** Optimized code ** +** ** +********************************************************************** +*/ + +/* + ********************************************************************** + ** MD5.c ** + ** RSA Data Security, Inc. MD5 Message Digest Algorithm ** + ** Created: 2/17/90 RLR ** + ** Revised: 1/91 SRD,AJ,BSK,JT Reference C Version ** + ********************************************************************** + */ + +/* + ********************************************************************** + ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. ** + ** ** + ** License to copy and use this software is granted provided that ** + ** it is identified as the "RSA Data Security, Inc. MD5 Message ** + ** Digest Algorithm" in all material mentioning or referencing this ** + ** software or this function. ** + ** ** + ** License is also granted to make and use derivative works ** + ** provided that such works are identified as "derived from the RSA ** + ** Data Security, Inc. MD5 Message Digest Algorithm" in all ** + ** material mentioning or referencing the derived work. ** + ** ** + ** RSA Data Security, Inc. makes no representations concerning ** + ** either the merchantability of this software or the suitability ** + ** of this software for any particular purpose. It is provided "as ** + ** is" without express or implied warranty of any kind. ** + ** ** + ** These notices must be retained in any copies of any part of this ** + ** documentation and/or software. ** + ********************************************************************** + */ + +#include +#include +#include +#include "DXBCChecksum.h" + +/* Padding */ +static unsigned char MD5_PADDING[64] = +{ + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* MD5_F, MD5_G and MD5_H are basic MD5 functions: selection, majority, parity */ +#define MD5_F(x, y, z) (((x) & (y)) | ((~x) & (z))) +#define MD5_G(x, y, z) (((x) & (z)) | ((y) & (~z))) +#define MD5_H(x, y, z) ((x) ^ (y) ^ (z)) +#define MD5_I(x, y, z) ((y) ^ ((x) | (~z))) + +/* ROTATE_LEFT rotates x left n bits */ +#ifndef ROTATE_LEFT + #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) +#endif + +/* MD5_FF, MD5_GG, MD5_HH, and MD5_II transformations for rounds 1, 2, 3, and 4 */ +/* Rotation is separate from addition to prevent recomputation */ +#define MD5_FF(a, b, c, d, x, s, ac) {(a) += MD5_F ((b), (c), (d)) + (x) + (UINT4)(ac); (a) = ROTATE_LEFT ((a), (s)); (a) += (b); } +#define MD5_GG(a, b, c, d, x, s, ac) {(a) += MD5_G ((b), (c), (d)) + (x) + (UINT4)(ac); (a) = ROTATE_LEFT ((a), (s)); (a) += (b); } +#define MD5_HH(a, b, c, d, x, s, ac) {(a) += MD5_H ((b), (c), (d)) + (x) + (UINT4)(ac); (a) = ROTATE_LEFT ((a), (s)); (a) += (b); } +#define MD5_II(a, b, c, d, x, s, ac) {(a) += MD5_I ((b), (c), (d)) + (x) + (UINT4)(ac); (a) = ROTATE_LEFT ((a), (s)); (a) += (b); } + +/* Constants for transformation */ +#define MD5_S11 7 /* Round 1 */ +#define MD5_S12 12 +#define MD5_S13 17 +#define MD5_S14 22 +#define MD5_S21 5 /* Round 2 */ +#define MD5_S22 9 +#define MD5_S23 14 +#define MD5_S24 20 +#define MD5_S31 4 /* Round 3 */ +#define MD5_S32 11 +#define MD5_S33 16 +#define MD5_S34 23 +#define MD5_S41 6 /* Round 4 */ +#define MD5_S42 10 +#define MD5_S43 15 +#define MD5_S44 21 + +/* Typedef a 32 bit type */ +#ifndef UINT4 + typedef unsigned int UINT4; +#endif + +/* Data structure for MD5 (Message Digest) computation */ +typedef struct +{ + UINT4 i[2]; /* Number of _bits_ handled mod 2^64 */ + UINT4 buf[4]; /* Scratch buffer */ + unsigned char in[64]; /* Input buffer */ + unsigned char digest[16]; /* Actual digest after MD5Final call */ +} MD5_CTX; + +static void MD5_Transform(UINT4* buf, UINT4* in); + +void MD5Init(MD5_CTX* mdContext, unsigned long pseudoRandomNumber = 0); +void MD5Update(MD5_CTX* mdContext, unsigned char* inBuf, unsigned int inLen); +void MD5Final(MD5_CTX* mdContext); + +/* Basic MD5 step. MD5_Transform buf based on in */ +static void MD5_Transform(UINT4* buf, UINT4* in) +{ + UINT4 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; + + /* Round 1 */ + MD5_FF(a, b, c, d, in[ 0], MD5_S11, (UINT4) 3614090360u); /* 1 */ + MD5_FF(d, a, b, c, in[ 1], MD5_S12, (UINT4) 3905402710u); /* 2 */ + MD5_FF(c, d, a, b, in[ 2], MD5_S13, (UINT4) 606105819u); /* 3 */ + MD5_FF(b, c, d, a, in[ 3], MD5_S14, (UINT4) 3250441966u); /* 4 */ + MD5_FF(a, b, c, d, in[ 4], MD5_S11, (UINT4) 4118548399u); /* 5 */ + MD5_FF(d, a, b, c, in[ 5], MD5_S12, (UINT4) 1200080426u); /* 6 */ + MD5_FF(c, d, a, b, in[ 6], MD5_S13, (UINT4) 2821735955u); /* 7 */ + MD5_FF(b, c, d, a, in[ 7], MD5_S14, (UINT4) 4249261313u); /* 8 */ + MD5_FF(a, b, c, d, in[ 8], MD5_S11, (UINT4) 1770035416u); /* 9 */ + MD5_FF(d, a, b, c, in[ 9], MD5_S12, (UINT4) 2336552879u); /* 10 */ + MD5_FF(c, d, a, b, in[10], MD5_S13, (UINT4) 4294925233u); /* 11 */ + MD5_FF(b, c, d, a, in[11], MD5_S14, (UINT4) 2304563134u); /* 12 */ + MD5_FF(a, b, c, d, in[12], MD5_S11, (UINT4) 1804603682u); /* 13 */ + MD5_FF(d, a, b, c, in[13], MD5_S12, (UINT4) 4254626195u); /* 14 */ + MD5_FF(c, d, a, b, in[14], MD5_S13, (UINT4) 2792965006u); /* 15 */ + MD5_FF(b, c, d, a, in[15], MD5_S14, (UINT4) 1236535329u); /* 16 */ + + /* Round 2 */ + MD5_GG(a, b, c, d, in[ 1], MD5_S21, (UINT4) 4129170786u); /* 17 */ + MD5_GG(d, a, b, c, in[ 6], MD5_S22, (UINT4) 3225465664u); /* 18 */ + MD5_GG(c, d, a, b, in[11], MD5_S23, (UINT4) 643717713u); /* 19 */ + MD5_GG(b, c, d, a, in[ 0], MD5_S24, (UINT4) 3921069994u); /* 20 */ + MD5_GG(a, b, c, d, in[ 5], MD5_S21, (UINT4) 3593408605u); /* 21 */ + MD5_GG(d, a, b, c, in[10], MD5_S22, (UINT4) 38016083u); /* 22 */ + MD5_GG(c, d, a, b, in[15], MD5_S23, (UINT4) 3634488961u); /* 23 */ + MD5_GG(b, c, d, a, in[ 4], MD5_S24, (UINT4) 3889429448u); /* 24 */ + MD5_GG(a, b, c, d, in[ 9], MD5_S21, (UINT4) 568446438u); /* 25 */ + MD5_GG(d, a, b, c, in[14], MD5_S22, (UINT4) 3275163606u); /* 26 */ + MD5_GG(c, d, a, b, in[ 3], MD5_S23, (UINT4) 4107603335u); /* 27 */ + MD5_GG(b, c, d, a, in[ 8], MD5_S24, (UINT4) 1163531501u); /* 28 */ + MD5_GG(a, b, c, d, in[13], MD5_S21, (UINT4) 2850285829u); /* 29 */ + MD5_GG(d, a, b, c, in[ 2], MD5_S22, (UINT4) 4243563512u); /* 30 */ + MD5_GG(c, d, a, b, in[ 7], MD5_S23, (UINT4) 1735328473u); /* 31 */ + MD5_GG(b, c, d, a, in[12], MD5_S24, (UINT4) 2368359562u); /* 32 */ + + /* Round 3 */ + MD5_HH(a, b, c, d, in[ 5], MD5_S31, (UINT4) 4294588738u); /* 33 */ + MD5_HH(d, a, b, c, in[ 8], MD5_S32, (UINT4) 2272392833u); /* 34 */ + MD5_HH(c, d, a, b, in[11], MD5_S33, (UINT4) 1839030562u); /* 35 */ + MD5_HH(b, c, d, a, in[14], MD5_S34, (UINT4) 4259657740u); /* 36 */ + MD5_HH(a, b, c, d, in[ 1], MD5_S31, (UINT4) 2763975236u); /* 37 */ + MD5_HH(d, a, b, c, in[ 4], MD5_S32, (UINT4) 1272893353u); /* 38 */ + MD5_HH(c, d, a, b, in[ 7], MD5_S33, (UINT4) 4139469664u); /* 39 */ + MD5_HH(b, c, d, a, in[10], MD5_S34, (UINT4) 3200236656u); /* 40 */ + MD5_HH(a, b, c, d, in[13], MD5_S31, (UINT4) 681279174u); /* 41 */ + MD5_HH(d, a, b, c, in[ 0], MD5_S32, (UINT4) 3936430074u); /* 42 */ + MD5_HH(c, d, a, b, in[ 3], MD5_S33, (UINT4) 3572445317u); /* 43 */ + MD5_HH(b, c, d, a, in[ 6], MD5_S34, (UINT4) 76029189u); /* 44 */ + MD5_HH(a, b, c, d, in[ 9], MD5_S31, (UINT4) 3654602809u); /* 45 */ + MD5_HH(d, a, b, c, in[12], MD5_S32, (UINT4) 3873151461u); /* 46 */ + MD5_HH(c, d, a, b, in[15], MD5_S33, (UINT4) 530742520u); /* 47 */ + MD5_HH(b, c, d, a, in[ 2], MD5_S34, (UINT4) 3299628645u); /* 48 */ + + /* Round 4 */ + MD5_II(a, b, c, d, in[ 0], MD5_S41, (UINT4) 4096336452u); /* 49 */ + MD5_II(d, a, b, c, in[ 7], MD5_S42, (UINT4) 1126891415u); /* 50 */ + MD5_II(c, d, a, b, in[14], MD5_S43, (UINT4) 2878612391u); /* 51 */ + MD5_II(b, c, d, a, in[ 5], MD5_S44, (UINT4) 4237533241u); /* 52 */ + MD5_II(a, b, c, d, in[12], MD5_S41, (UINT4) 1700485571u); /* 53 */ + MD5_II(d, a, b, c, in[ 3], MD5_S42, (UINT4) 2399980690u); /* 54 */ + MD5_II(c, d, a, b, in[10], MD5_S43, (UINT4) 4293915773u); /* 55 */ + MD5_II(b, c, d, a, in[ 1], MD5_S44, (UINT4) 2240044497u); /* 56 */ + MD5_II(a, b, c, d, in[ 8], MD5_S41, (UINT4) 1873313359u); /* 57 */ + MD5_II(d, a, b, c, in[15], MD5_S42, (UINT4) 4264355552u); /* 58 */ + MD5_II(c, d, a, b, in[ 6], MD5_S43, (UINT4) 2734768916u); /* 59 */ + MD5_II(b, c, d, a, in[13], MD5_S44, (UINT4) 1309151649u); /* 60 */ + MD5_II(a, b, c, d, in[ 4], MD5_S41, (UINT4) 4149444226u); /* 61 */ + MD5_II(d, a, b, c, in[11], MD5_S42, (UINT4) 3174756917u); /* 62 */ + MD5_II(c, d, a, b, in[ 2], MD5_S43, (UINT4) 718787259u); /* 63 */ + MD5_II(b, c, d, a, in[ 9], MD5_S44, (UINT4) 3951481745u); /* 64 */ + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} + +// Set pseudoRandomNumber to zero for RFC MD5 implementation +void MD5Init(MD5_CTX* mdContext, unsigned long pseudoRandomNumber) +{ + mdContext->i[0] = mdContext->i[1] = (UINT4)0; + + /* Load magic initialization constants */ + mdContext->buf[0] = (UINT4)0x67452301 + (pseudoRandomNumber * 11); + mdContext->buf[1] = (UINT4)0xefcdab89 + (pseudoRandomNumber * 71); + mdContext->buf[2] = (UINT4)0x98badcfe + (pseudoRandomNumber * 37); + mdContext->buf[3] = (UINT4)0x10325476 + (pseudoRandomNumber * 97); +} + +void MD5Update(MD5_CTX* mdContext, unsigned char* inBuf, unsigned int inLen) +{ + UINT4 in[16]; + int mdi = 0; + unsigned int i = 0, ii = 0; + + /* Compute number of bytes mod 64 */ + mdi = (int)((mdContext->i[0] >> 3) & 0x3F); + + /* Update number of bits */ + if ((mdContext->i[0] + ((UINT4)inLen << 3)) < mdContext->i[0]) + { + mdContext->i[1]++; + } + + mdContext->i[0] += ((UINT4)inLen << 3); + mdContext->i[1] += ((UINT4)inLen >> 29); + + while (inLen--) + { + /* Add new character to buffer, increment mdi */ + mdContext->in[mdi++] = *inBuf++; + + /* Transform if necessary */ + if (mdi == 0x40) + { + for (i = 0, ii = 0; i < 16; i++, ii += 4) + in[i] = (((UINT4)mdContext->in[ii + 3]) << 24) | + (((UINT4)mdContext->in[ii + 2]) << 16) | + (((UINT4)mdContext->in[ii + 1]) << 8) | + ((UINT4)mdContext->in[ii]); + + MD5_Transform(mdContext->buf, in); + mdi = 0; + } + } +} + +void MD5Final(MD5_CTX* mdContext) +{ + UINT4 in[16]; + int mdi = 0; + unsigned int i = 0, ii = 0, padLen = 0; + + /* Save number of bits */ + unsigned int numberOfBits = mdContext->i[0]; + + /* Pad out to 56 mod 64 */ + padLen = (mdi < 56) ? (56 - mdi) : (120 - mdi); + + MD5Update(mdContext, (unsigned char*) &numberOfBits, 4); + + /* Compute number of bytes mod 64 */ + mdi = (int)((mdContext->i[0] >> 3) & 0x3F); + + /* Pad out to 56 mod 64 */ + padLen = (mdi < 56) ? (56 - mdi) : (120 - mdi); + MD5Update(mdContext, MD5_PADDING, padLen); + + ((UINT4*)mdContext->in)[15] = (numberOfBits >> 2) | 1; + // MD5Update (mdContext, (unsigned char*) terminator, 8); + + memcpy(in, mdContext->in, 64); + /* Append length in bits and transform */ + /* + for (i = 0, ii = 0; i < 14; i++, ii += 4) + in[i] = (((UINT4)mdContext->in[ii+3]) << 24) | + (((UINT4)mdContext->in[ii+2]) << 16) | + (((UINT4)mdContext->in[ii+1]) << 8) | + ((UINT4)mdContext->in[ii]); + */ + MD5_Transform(mdContext->buf, in); + + /* Store buffer in digest */ + for (i = 0, ii = 0; i < 4; i++, ii += 4) + { + mdContext->digest[ii] = (unsigned char)(mdContext->buf[i] & 0xFF); + mdContext->digest[ii + 1] = (unsigned char)((mdContext->buf[i] >> 8) & 0xFF); + mdContext->digest[ii + 2] = (unsigned char)((mdContext->buf[i] >> 16) & 0xFF); + mdContext->digest[ii + 3] = (unsigned char)((mdContext->buf[i] >> 24) & 0xFF); + } +} + +static const unsigned int dwHashOffset = 0x14; + +void CalculateDXBCChecksum(unsigned char* pData, unsigned int dwSize, unsigned int dwHash[4]) +{ + MD5_CTX md5Ctx; + MD5Init(&md5Ctx, 0); + + // Skip the start of the shader header + dwSize -= dwHashOffset; + pData += dwHashOffset; + + unsigned int dwNumberOfBits = dwSize * 8; + + // First we hash all the full chunks available + unsigned int dwFullChunksSize = dwSize & 0xffffffc0; + MD5Update(&md5Ctx, pData, dwFullChunksSize); + + unsigned int dwLastChunkSize = dwSize - dwFullChunksSize; + unsigned int dwPaddingSize = 64 - dwLastChunkSize; + unsigned char* pLastChunkData = pData + dwFullChunksSize; + + if (dwLastChunkSize >= 56) + { + MD5Update(&md5Ctx, pLastChunkData, dwLastChunkSize); + + /* Pad out to 56 mod 64 */ + MD5Update(&md5Ctx, MD5_PADDING, dwPaddingSize); + + // Pass in the number of bits + UINT4 in[16]; + memset(in, 0, sizeof(in)); + in[0] = dwNumberOfBits; + in[15] = (dwNumberOfBits >> 2) | 1; + + MD5_Transform(md5Ctx.buf, in); + } + else + { + // Pass in the number of bits + MD5Update(&md5Ctx, (unsigned char*) &dwNumberOfBits, 4); + + if (dwLastChunkSize) + { + MD5Update(&md5Ctx, pLastChunkData, dwLastChunkSize); + } + + + // Adjust for the space used for dwNumberOfBits + dwLastChunkSize += sizeof(unsigned int); + dwPaddingSize -= sizeof(unsigned int); + + /* Pad out to 56 mod 64 */ + memcpy(&md5Ctx.in[dwLastChunkSize], MD5_PADDING, dwPaddingSize); + + ((UINT4*)md5Ctx.in)[15] = (dwNumberOfBits >> 2) | 1; + + UINT4 in[16]; + memcpy(in, md5Ctx.in, 64); + + MD5_Transform(md5Ctx.buf, in); + } + + memcpy(dwHash, md5Ctx.buf, 4 * sizeof(unsigned int)); +} \ No newline at end of file diff --git a/third_party/dxbc/DXBCChecksum.h b/third_party/dxbc/DXBCChecksum.h new file mode 100644 index 000000000..f9407a0d6 --- /dev/null +++ b/third_party/dxbc/DXBCChecksum.h @@ -0,0 +1,79 @@ +// Xenia: removed Windows.h dependency. + +//===================================================================== +// Copyright 2008-2016 (c), Advanced Micro Devices, Inc. All rights reserved. +// +/// \author AMD Developer Tools Team +/// \file DXBCChecksum.h +/// +//===================================================================== + +//===================================================================== +// $Id: //devtools/main/Common/Src/ShaderUtils/DX10/DXBCChecksum.h#4 $ +// +// Last checkin: $DateTime: 2016/04/18 06:01:26 $ +// Last edited by: $Author: AMD Developer Tools Team +//===================================================================== + +#pragma once +#ifndef DXBCCHECKSUM_H +#define DXBCCHECKSUM_H + +/* + ********************************************************************** + ** MD5.h ** + ** ** + ** - Style modified by Tony Ray, January 2001 ** + ** Added support for randomizing initialization constants ** + ** - Style modified by Dominik Reichl, September 2002 ** + ** Optimized code ** + ** ** + ********************************************************************** + */ + +/* + ********************************************************************** + ** MD5.h -- Header file for implementation of MD5 ** + ** RSA Data Security, Inc. MD5 Message Digest Algorithm ** + ** Created: 2/17/90 RLR ** + ** Revised: 12/27/90 SRD,AJ,BSK,JT Reference C version ** + ** Revised (for MD5): RLR 4/27/91 ** + ** -- G modified to have y&~z instead of y&z ** + ** -- FF, GG, HH modified to add in last register done ** + ** -- Access pattern: round 2 works mod 5, round 3 works mod 3 ** + ** -- distinct additive constant for each step ** + ** -- round 4 added, working mod 7 ** + ********************************************************************** + */ + +/* + ********************************************************************** + ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. ** + ** ** + ** License to copy and use this software is granted provided that ** + ** it is identified as the "RSA Data Security, Inc. MD5 Message ** + ** Digest Algorithm" in all material mentioning or referencing this ** + ** software or this function. ** + ** ** + ** License is also granted to make and use derivative works ** + ** provided that such works are identified as "derived from the RSA ** + ** Data Security, Inc. MD5 Message Digest Algorithm" in all ** + ** material mentioning or referencing the derived work. ** + ** ** + ** RSA Data Security, Inc. makes no representations concerning ** + ** either the merchantability of this software or the suitability ** + ** of this software for any particular purpose. It is provided "as ** + ** is" without express or implied warranty of any kind. ** + ** ** + ** These notices must be retained in any copies of any part of this ** + ** documentation and/or software. ** + ********************************************************************** + */ + +/// Calculate the DXBC checksum for the provided chunk of memory. +/// \param[in] pData A pointer to the memory to checksum. +/// \param[in] dwSize The size of the memory to checksum. +/// \param[out] dwHash A DWORD array to copy the calculated check sum into. +void CalculateDXBCChecksum(unsigned char* pData, unsigned int dwSize, unsigned int dwHash[4]); + +#endif // DXBCCHECKSUM_H diff --git a/third_party/dxbc/d3d12TokenizedProgramFormat.hpp b/third_party/dxbc/d3d12TokenizedProgramFormat.hpp new file mode 100644 index 000000000..0fb259507 --- /dev/null +++ b/third_party/dxbc/d3d12TokenizedProgramFormat.hpp @@ -0,0 +1,2612 @@ +#pragma once +// Xenia: removed winapifamily.h dependency. + +// ---------------------------------------------------------------------------- +// +// D3D11 Tokenized Program Format +// +// Copyright (c) Microsoft Corporation. +// +// ---------------------------------------------------------------------------- +// +// High Level Goals +// +// - Serve as the runtime/DDI representation for all D3D11 tokenized code, +// for all classes of programs, including pixel program, vertex program, +// geometry program, etc. +// +// - Any information that HLSL needs to give to drivers is encoded in +// this token format in some form. +// +// - Enable common tools and source code for managing all tokenizable +// program formats. +// +// - Support extensible token definitions, allowing full customizations for +// specific program classes, while maintaining general conventions for all +// program models. +// +// - Binary backwards compatible with D3D10. Any token name that was originally +// defined with "D3D10" in it is unchanged; D3D11 only adds new tokens. +// +// ---------------------------------------------------------------------------- +// +// Low Level Feature Summary +// +// - DWORD based tokens always, for simplicity +// - Opcode token is generally a single DWORD, though there is a bit indicating +// if extended information (extra DWORD(s)) are present +// - Operand tokens are a completely self contained, extensible format, +// with scalar and 4-vector data types as first class citizens, but +// allowance for extension to n-component vectors. +// - Initial operand token identifies register type, register file +// structure/dimensionality and mode of indexing for each dimension, +// and choice of component selection mechanism (i.e. mask vs. swizzle etc). +// - Optional additional extended operand tokens can defined things like +// modifiers (which are not needed by default). +// - Operand's immediate index value(s), if needed, appear as subsequent DWORD +// values, and if relative addressing is specified, an additional completely +// self contained operand definition appears nested in the token sequence. +// +// ---------------------------------------------------------------------------- + +// ---------------------------------------------------------------------------- +// Version Token (VerTok) +// +// [07:00] minor version number (0-255) +// [15:08] major version number (0-255) +// [31:16] D3D10_SB_TOKENIZED_PROGRAM_TYPE +// +// ---------------------------------------------------------------------------- + +typedef enum D3D10_SB_TOKENIZED_PROGRAM_TYPE +{ + D3D10_SB_PIXEL_SHADER = 0, + D3D10_SB_VERTEX_SHADER = 1, + D3D10_SB_GEOMETRY_SHADER = 2, + + // D3D11 Shaders + D3D11_SB_HULL_SHADER = 3, + D3D11_SB_DOMAIN_SHADER = 4, + D3D11_SB_COMPUTE_SHADER = 5, + + D3D11_SB_RESERVED0 = 0xFFF0 +} D3D10_SB_TOKENIZED_PROGRAM_TYPE; + +#define D3D10_SB_TOKENIZED_PROGRAM_TYPE_MASK 0xffff0000 +#define D3D10_SB_TOKENIZED_PROGRAM_TYPE_SHIFT 16 + +// DECODER MACRO: Retrieve program type from version token +#define DECODE_D3D10_SB_TOKENIZED_PROGRAM_TYPE(VerTok) ((D3D10_SB_TOKENIZED_PROGRAM_TYPE)(((VerTok)&D3D10_SB_TOKENIZED_PROGRAM_TYPE_MASK)>>D3D10_SB_TOKENIZED_PROGRAM_TYPE_SHIFT)) + +#define D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_MASK 0x000000f0 +#define D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_SHIFT 4 +#define D3D10_SB_TOKENIZED_PROGRAM_MINOR_VERSION_MASK 0x0000000f + +// DECODER MACRO: Retrieve major version # from version token +#define DECODE_D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION(VerTok) (((VerTok)&D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_MASK)>>D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_SHIFT) +// DECODER MACRO: Retrieve minor version # from version token +#define DECODE_D3D10_SB_TOKENIZED_PROGRAM_MINOR_VERSION(VerTok) ((VerTok)&D3D10_SB_TOKENIZED_PROGRAM_MINOR_VERSION_MASK) + +// ENCODER MACRO: Create complete VerTok +#define ENCODE_D3D10_SB_TOKENIZED_PROGRAM_VERSION_TOKEN(ProgType,MajorVer,MinorVer) ((((ProgType)<> D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH_SHIFT) + +// ENCODER MACRO: Store instruction length +// portion of OpcodeToken0, in # of DWORDs +// including the opcode token(s). +// Valid range is 1-127. +#define ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(Length) (((Length)<>D3D10_SB_INSTRUCTION_TEST_BOOLEAN_SHIFT)) +// ENCODER MACRO: Store "zero" or "nonzero" in the opcode +// specific control range of OpcodeToken0 +#define ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(Boolean) (((Boolean)<>D3D11_SB_INSTRUCTION_PRECISE_VALUES_SHIFT)) +// ENCODER MACRO: Given a set of +// D3D10_SB_OPERAND_4_COMPONENT_[X|Y|Z|W] values +// or'd together, encode them in OpcodeToken0. +#define ENCODE_D3D11_SB_INSTRUCTION_PRECISE_VALUES(ComponentMask) (((ComponentMask)<>D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE_SHIFT)) +// ENCODER MACRO: Encode the return type for the resinfo instruction +// in the opcode specific control range of OpcodeToken0 +#define ENCODE_D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE(ReturnType) (((ReturnType)<> D3D10_SB_OPCODE_EXTENDED_SHIFT) +// ENCODER MACRO: Store in OpcodeToken0 whether the opcode is extended +// by an additional opcode token. +#define ENCODE_D3D10_SB_OPCODE_EXTENDED(bExtended) (((bExtended)!=0)?D3D10_SB_OPCODE_EXTENDED_MASK:0) + +// ---------------------------------------------------------------------------- +// Extended Opcode Format (OpcodeToken1) +// +// If bit31 of an opcode token is set, the +// opcode has an additional extended opcode token DWORD +// directly following OpcodeToken0. Other tokens +// expected for the opcode, such as the operand +// token(s) always follow +// OpcodeToken0 AND OpcodeToken1..n (extended +// opcode tokens, if present). +// +// [05:00] D3D10_SB_EXTENDED_OPCODE_TYPE +// [30:06] if([05:00] == D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS) +// { +// This custom opcode contains controls for SAMPLE. +// [08:06] Ignored, 0. +// [12:09] U texel immediate offset (4 bit 2's comp) (0 default) +// [16:13] V texel immediate offset (4 bit 2's comp) (0 default) +// [20:17] W texel immediate offset (4 bit 2's comp) (0 default) +// [30:14] Ignored, 0. +// } +// else if( [05:00] == D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM ) +// { +// [10:06] D3D10_SB_RESOURCE_DIMENSION +// [22:11] When dimension is D3D11_SB_RESOURCE_DIMENSION_STRUCTURED_BUFFER this holds the buffer stride, otherwise 0 +// [30:23] Ignored, 0. +// } +// else if( [05:00] == D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE ) +// { +// [09:06] D3D10_SB_RESOURCE_RETURN_TYPE for component X +// [13:10] D3D10_SB_RESOURCE_RETURN_TYPE for component Y +// [17:14] D3D10_SB_RESOURCE_RETURN_TYPE for component Z +// [21:18] D3D10_SB_RESOURCE_RETURN_TYPE for component W +// [30:22] Ignored, 0. +// } +// else +// { +// [30:04] Ignored, 0. +// } +// [31] 0 normally. 1 there is another extended opcode. Any number +// of extended opcode tokens can be chained. It is possible that some extended +// opcode tokens could include multiple DWORDS - that is defined +// on a case by case basis. +// +// ---------------------------------------------------------------------------- +typedef enum D3D10_SB_EXTENDED_OPCODE_TYPE +{ + D3D10_SB_EXTENDED_OPCODE_EMPTY = 0, + D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS = 1, + D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM = 2, + D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE = 3, +} D3D10_SB_EXTENDED_OPCODE_TYPE; +#define D3D11_SB_MAX_SIMULTANEOUS_EXTENDED_OPCODES 3 + +#define D3D10_SB_EXTENDED_OPCODE_TYPE_MASK 0x0000003f + +// DECODER MACRO: Given an extended opcode +// token (OpcodeToken1), figure out what type +// of token it is (from D3D10_SB_EXTENDED_OPCODE_TYPE enum) +// to be able to interpret the rest of the token's contents. +#define DECODE_D3D10_SB_EXTENDED_OPCODE_TYPE(OpcodeToken1) ((D3D10_SB_EXTENDED_OPCODE_TYPE)((OpcodeToken1)&D3D10_SB_EXTENDED_OPCODE_TYPE_MASK)) + +// ENCODER MACRO: Store extended opcode token +// type in OpcodeToken1. +#define ENCODE_D3D10_SB_EXTENDED_OPCODE_TYPE(ExtOpcodeType) ((ExtOpcodeType)&D3D10_SB_EXTENDED_OPCODE_TYPE_MASK) + +typedef enum D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_COORD +{ + D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_U = 0, + D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_V = 1, + D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_W = 2, +} D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_COORD; +#define D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_COORD_MASK (3) +#define D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord) (9+4*((Coord)&D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_COORD_MASK)) +#define D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_MASK(Coord) (0x0000000f<>(D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord)))) + +// ENCODER MACRO: Store the immediate texel address offset +// for U or V or W Coord (D3D10_SB_ADDRESS_OFFSET_COORD) in an extended +// opcode token (OpcodeToken1) that has extended opcode +// type == D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS (opcode type encoded separately) +// A 2's complement number is expected as input, from which the LSB 4 bits are extracted. +#define ENCODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(Coord,ImmediateOffset) (((ImmediateOffset)<>D3D11_SB_EXTENDED_RESOURCE_DIMENSION_SHIFT)) + +// ENCODER MACRO: Store resource dimension +// (D3D10_SB_RESOURCE_DIMENSION enum) into a +// an extended resource declaration token (D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM) +#define ENCODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION(ResourceDim) (((ResourceDim)<>D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE_SHIFT) + +// ENCODER MACRO: Store resource dimension structure stride +// (12-bit unsigned integer) into a +// an extended resource declaration token (D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM) +#define ENCODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE(Stride) (((Stride)<> \ + (Component * D3D10_SB_RESOURCE_RETURN_TYPE_NUMBITS + D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE_SHIFT))&D3D10_SB_RESOURCE_RETURN_TYPE_MASK)) + +// ENCODER MACRO: Generate a resource return type for a component in an extended +// resource delcaration token (D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE) +#define ENCODE_D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE(ReturnType, Component) \ + (((ReturnType)&D3D10_SB_RESOURCE_RETURN_TYPE_MASK) << (Component * D3D10_SB_RESOURCE_RETURN_TYPE_NUMBITS + D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE_SHIFT)) + +// ---------------------------------------------------------------------------- +// Custom-Data Block Format +// +// DWORD 0 (CustomDataDescTok): +// [10:00] == D3D10_SB_OPCODE_CUSTOMDATA +// [31:11] == D3D10_SB_CUSTOMDATA_CLASS +// +// DWORD 1: +// 32-bit unsigned integer count of number +// of DWORDs in custom-data block, +// including DWORD 0 and DWORD 1. +// So the minimum value is 0x00000002, +// meaning empty custom-data. +// +// Layout of custom-data contents, for the various meta-data classes, +// not defined in this file. +// +// ---------------------------------------------------------------------------- + +typedef enum D3D10_SB_CUSTOMDATA_CLASS +{ + D3D10_SB_CUSTOMDATA_COMMENT = 0, + D3D10_SB_CUSTOMDATA_DEBUGINFO, + D3D10_SB_CUSTOMDATA_OPAQUE, + D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER, + D3D11_SB_CUSTOMDATA_SHADER_MESSAGE, + D3D11_SB_CUSTOMDATA_SHADER_CLIP_PLANE_CONSTANT_MAPPINGS_FOR_DX9, +} D3D10_SB_CUSTOMDATA_CLASS; + +#define D3D10_SB_CUSTOMDATA_CLASS_MASK 0xfffff800 +#define D3D10_SB_CUSTOMDATA_CLASS_SHIFT 11 +// DECODER MACRO: Find out what class of custom-data is present. +// The contents of the custom-data block are defined +// for each class of custom-data. +#define DECODE_D3D10_SB_CUSTOMDATA_CLASS(CustomDataDescTok) ((D3D10_SB_CUSTOMDATA_CLASS)(((CustomDataDescTok)&D3D10_SB_CUSTOMDATA_CLASS_MASK)>>D3D10_SB_CUSTOMDATA_CLASS_SHIFT)) +// ENCODER MACRO: Create complete CustomDataDescTok +#define ENCODE_D3D10_SB_CUSTOMDATA_CLASS(CustomDataClass) (ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_CUSTOMDATA)|(((CustomDataClass)<= D3D10_SB_OPERAND_INDEX_1D ) +// D3D10_SB_OPERAND_INDEX_REPRESENTATION for first operand index +// else +// Ignored, 0 +// [27:25] if( [21:20] >= D3D10_SB_OPERAND_INDEX_2D ) +// D3D10_SB_OPERAND_INDEX_REPRESENTATION for second operand index +// else +// Ignored, 0 +// [30:28] if( [21:20] == D3D10_SB_OPERAND_INDEX_3D ) +// D3D10_SB_OPERAND_INDEX_REPRESENTATION for third operand index +// else +// Ignored, 0 +// [31] 0 normally. 1 if extended operand definition, meaning next DWORD +// contains extended operand description. +// +// ---------------------------------------------------------------------------- + +// Number of components in data vector referred to by operand. +typedef enum D3D10_SB_OPERAND_NUM_COMPONENTS +{ + D3D10_SB_OPERAND_0_COMPONENT = 0, + D3D10_SB_OPERAND_1_COMPONENT = 1, + D3D10_SB_OPERAND_4_COMPONENT = 2, + D3D10_SB_OPERAND_N_COMPONENT = 3 // unused for now +} D3D10_SB_OPERAND_NUM_COMPONENTS; +#define D3D10_SB_OPERAND_NUM_COMPONENTS_MASK 0x00000003 + +// DECODER MACRO: Extract from OperandToken0 how many components +// the data vector referred to by the operand contains. +// (D3D10_SB_OPERAND_NUM_COMPONENTS enum) +#define DECODE_D3D10_SB_OPERAND_NUM_COMPONENTS(OperandToken0) ((D3D10_SB_OPERAND_NUM_COMPONENTS)((OperandToken0)&D3D10_SB_OPERAND_NUM_COMPONENTS_MASK)) + +// ENCODER MACRO: Define in OperandToken0 how many components +// the data vector referred to by the operand contains. +// (D3D10_SB_OPERAND_NUM_COMPONENTS enum). +#define ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(NumComp) ((NumComp)&D3D10_SB_OPERAND_NUM_COMPONENTS_MASK) + +typedef enum D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE +{ + D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE = 0, // mask 4 components + D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE = 1, // swizzle 4 components + D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE = 2, // select 1 of 4 components +} D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE; + +#define D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_MASK 0x0000000c +#define D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_SHIFT 2 + +// DECODER MACRO: For an operand representing 4component data, +// extract from OperandToken0 the method for selecting data from +// the 4 components (D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE). +#define DECODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE(OperandToken0) ((D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE)(((OperandToken0)&D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_MASK)>>D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_SHIFT)) + +// ENCODER MACRO: For an operand representing 4component data, +// encode in OperandToken0 a value from D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE +#define ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE(SelectionMode) (((SelectionMode)<>(D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SHIFT+2*((DestComp)&D3D10_SB_4_COMPONENT_NAME_MASK)))&D3D10_SB_4_COMPONENT_NAME_MASK)) + +// ENCODER MACRO: Generate a 4 component swizzle given +// 4 D3D10_SB_4_COMPONENT_NAME source values for dest +// components x, y, z, w respectively. +#define ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(XSrc,YSrc,ZSrc,WSrc) ((((XSrc)&D3D10_SB_4_COMPONENT_NAME_MASK)| \ + (((YSrc)&D3D10_SB_4_COMPONENT_NAME_MASK)<<2)| \ + (((ZSrc)&D3D10_SB_4_COMPONENT_NAME_MASK)<<4)| \ + (((WSrc)&D3D10_SB_4_COMPONENT_NAME_MASK)<<6) \ + )<>D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_SHIFT)) + +// ENCODER MACRO: Given a D3D10_SB_4_COMPONENT_NAME selecting +// a single component for D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE, +// encode it into OperandToken0 +#define ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECT_1(SelectedComp) (((SelectedComp)<>D3D10_SB_OPERAND_TYPE_SHIFT)) + +// ENCODER MACRO: Store operand type in OperandToken0. +#define ENCODE_D3D10_SB_OPERAND_TYPE(OperandType) (((OperandType)<>D3D10_SB_OPERAND_INDEX_DIMENSION_SHIFT)) + +// ENCODER MACRO: Store operand index dimension +// (D3D10_SB_OPERAND_INDEX_DIMENSION enum) in OperandToken0. +#define ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(OperandIndexDim) (((OperandIndexDim)<>D3D10_SB_OPERAND_INDEX_REPRESENTATION_SHIFT(Dim))) + +// ENCODER MACRO: Store in OperandToken0 what representation +// an operand index is provided as (D3D10_SB_OPERAND_INDEX_REPRESENTATION enum), +// for index dimension [0], [1] or [2], depending on D3D10_SB_OPERAND_INDEX_DIMENSION. +#define ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION(Dim,IndexRepresentation) (((IndexRepresentation)<>D3D10_SB_OPERAND_EXTENDED_SHIFT) + +// ENCODER MACRO: Store in OperandToken0 whether the operand is extended +// by an additional operand token. +#define ENCODE_D3D10_SB_OPERAND_EXTENDED(bExtended) (((bExtended)!=0)?D3D10_SB_OPERAND_EXTENDED_MASK:0) + +// ---------------------------------------------------------------------------- +// Extended Instruction Operand Format (OperandToken1) +// +// If bit31 of an operand token is set, the +// operand has additional data in a second DWORD +// directly following OperandToken0. Other tokens +// expected for the operand, such as immmediate +// values or relative address operands (full +// operands in themselves) always follow +// OperandToken0 AND OperandToken1..n (extended +// operand tokens, if present). +// +// [05:00] D3D10_SB_EXTENDED_OPERAND_TYPE +// [16:06] if([05:00] == D3D10_SB_EXTENDED_OPERAND_MODIFIER) +// { +// [13:06] D3D10_SB_OPERAND_MODIFIER +// [16:14] Min Precision: D3D11_SB_OPERAND_MIN_PRECISION +// [17:17] Non-uniform: D3D12_SB_OPERAND_NON_UNIFORM +// } +// else +// { +// [17:06] Ignored, 0. +// } +// [30:18] Ignored, 0. +// [31] 0 normally. 1 if second order extended operand definition, +// meaning next DWORD contains yet ANOTHER extended operand +// description. Currently no second order extensions defined. +// This would be useful if a particular extended operand does +// not have enough space to store the required information in +// a single token and so is extended further. +// +// ---------------------------------------------------------------------------- + +typedef enum D3D10_SB_EXTENDED_OPERAND_TYPE +{ + D3D10_SB_EXTENDED_OPERAND_EMPTY = 0, // Might be used if this + // enum is full and + // further extended opcode + // is needed. + D3D10_SB_EXTENDED_OPERAND_MODIFIER = 1, +} D3D10_SB_EXTENDED_OPERAND_TYPE; +#define D3D10_SB_EXTENDED_OPERAND_TYPE_MASK 0x0000003f + +// DECODER MACRO: Given an extended operand +// token (OperandToken1), figure out what type +// of token it is (from D3D10_SB_EXTENDED_OPERAND_TYPE enum) +// to be able to interpret the rest of the token's contents. +#define DECODE_D3D10_SB_EXTENDED_OPERAND_TYPE(OperandToken1) ((D3D10_SB_EXTENDED_OPERAND_TYPE)((OperandToken1)&D3D10_SB_EXTENDED_OPERAND_TYPE_MASK)) + +// ENCODER MACRO: Store extended operand token +// type in OperandToken1. +#define ENCODE_D3D10_SB_EXTENDED_OPERAND_TYPE(ExtOperandType) ((ExtOperandType)&D3D10_SB_EXTENDED_OPERAND_TYPE_MASK) + +typedef enum D3D10_SB_OPERAND_MODIFIER +{ + D3D10_SB_OPERAND_MODIFIER_NONE = 0, // Nop. This is the implied + // default if the extended + // operand is not present for + // an operand for which source + // modifiers are meaningful + D3D10_SB_OPERAND_MODIFIER_NEG = 1, // Negate + D3D10_SB_OPERAND_MODIFIER_ABS = 2, // Absolute value, abs() + D3D10_SB_OPERAND_MODIFIER_ABSNEG = 3, // -abs() +} D3D10_SB_OPERAND_MODIFIER; +#define D3D10_SB_OPERAND_MODIFIER_MASK 0x00003fc0 +#define D3D10_SB_OPERAND_MODIFIER_SHIFT 6 + +// DECODER MACRO: Given a D3D10_SB_EXTENDED_OPERAND_MODIFIER +// extended token (OperandToken1), determine the source modifier +// (D3D10_SB_OPERAND_MODIFIER enum) +#define DECODE_D3D10_SB_OPERAND_MODIFIER(OperandToken1) ((D3D10_SB_OPERAND_MODIFIER)(((OperandToken1)&D3D10_SB_OPERAND_MODIFIER_MASK)>>D3D10_SB_OPERAND_MODIFIER_SHIFT)) + +// ENCODER MACRO: Generate a complete source modifier extended token +// (OperandToken1), given D3D10_SB_OPERAND_MODIFIER enum (the +// ext. operand type is also set to D3D10_SB_EXTENDED_OPERAND_MODIFIER). +#define ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(SourceMod) ((((SourceMod)<> D3D11_SB_OPERAND_MIN_PRECISION_SHIFT)) + +// ENCODER MACRO: Encode minimum precision for execution +// into the extended operand token, OperandToken1 +#define ENCODE_D3D11_SB_OPERAND_MIN_PRECISION(MinPrecision) (((MinPrecision)<< D3D11_SB_OPERAND_MIN_PRECISION_SHIFT)& D3D11_SB_OPERAND_MIN_PRECISION_MASK) + + +// Non-uniform extended operand modifier. +#define D3D12_SB_OPERAND_NON_UNIFORM_MASK 0x00020000 +#define D3D12_SB_OPERAND_NON_UNIFORM_SHIFT 17 + +// DECODER MACRO: For an OperandToken1 that can specify a non-uniform operand +#define DECODE_D3D12_SB_OPERAND_NON_UNIFORM(OperandToken1) (((OperandToken1)& D3D12_SB_OPERAND_NON_UNIFORM_MASK)>> D3D12_SB_OPERAND_NON_UNIFORM_SHIFT) + +// ENCODER MACRO: Encode non-uniform state into the extended operand token, OperandToken1 +#define ENCODE_D3D12_SB_OPERAND_NON_UNIFORM(NonUniform) (((NonUniform)<< D3D12_SB_OPERAND_NON_UNIFORM_SHIFT)& D3D12_SB_OPERAND_NON_UNIFORM_MASK) + + +#define D3D10_SB_OPERAND_DOUBLE_EXTENDED_MASK 0x80000000 +#define D3D10_SB_OPERAND_DOUBLE_EXTENDED_SHIFT 31 +// DECODER MACRO: Determine if an extended operand token +// (OperandToken1) is further extended by yet another token +// (OperandToken2). Currently there are no secondary +// extended operand tokens. +#define DECODE_IS_D3D10_SB_OPERAND_DOUBLE_EXTENDED(OperandToken1) (((OperandToken1)&D3D10_SB_OPERAND_DOUBLE_EXTENDED_MASK)>>D3D10_SB_OPERAND_DOUBLE_EXTENDED_SHIFT) + +// ENCODER MACRO: Store in OperandToken1 whether the operand is extended +// by an additional operand token. Currently there are no secondary +// extended operand tokens. +#define ENCODE_D3D10_SB_OPERAND_DOUBLE_EXTENDED(bExtended) (((bExtended)!=0)?D3D10_SB_OPERAND_DOUBLE_EXTENDED_MASK:0) + +// ---------------------------------------------------------------------------- +// Name Token (NameToken) (used in declaration statements) +// +// [15:00] D3D10_SB_NAME enumeration +// [31:16] Reserved, 0 +// +// ---------------------------------------------------------------------------- +#define D3D10_SB_NAME_MASK 0x0000ffff + +// DECODER MACRO: Get the name from NameToken +#define DECODE_D3D10_SB_NAME(NameToken) ((D3D10_SB_NAME)((NameToken)&D3D10_SB_NAME_MASK)) + +// ENCODER MACRO: Generate a complete NameToken given a D3D10_SB_NAME +#define ENCODE_D3D10_SB_NAME(Name) ((Name)&D3D10_SB_NAME_MASK) + +//--------------------------------------------------------------------- +// Declaration Statements +// +// Declarations start with a standard opcode token, +// having opcode type being D3D10_SB_OPCODE_DCL*. +// Each particular declaration type has custom +// operand token(s), described below. +//--------------------------------------------------------------------- + +// ---------------------------------------------------------------------------- +// Global Flags Declaration +// +// OpcodeToken0: +// +// [10:00] D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS +// [11:11] Refactoring allowed if bit set. +// [12:12] Enable double precision float ops. +// [13:13] Force early depth-stencil test. +// [14:14] Enable RAW and structured buffers in non-CS 4.x shaders. +// [15:15] Skip optimizations of shader IL when translating to native code +// [16:16] Enable minimum-precision data types +// [17:17] Enable 11.1 double-precision floating-point instruction extensions +// [18:18] Enable 11.1 non-double instruction extensions +// [23:19] Reserved for future flags. +// [30:24] Instruction length in DWORDs including the opcode token. == 1 +// [31] 0 normally. 1 if extended operand definition, meaning next DWORD +// contains extended operand description. This dcl is currently not +// extended. +// +// OpcodeToken0 is followed by no operands. +// +// ---------------------------------------------------------------------------- +#define D3D10_SB_GLOBAL_FLAG_REFACTORING_ALLOWED (1<<11) +#define D3D11_SB_GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS (1<<12) +#define D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL (1<<13) +#define D3D11_SB_GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS (1<<14) +#define D3D11_1_SB_GLOBAL_FLAG_SKIP_OPTIMIZATION (1<<15) +#define D3D11_1_SB_GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION (1<<16) +#define D3D11_1_SB_GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS (1<<17) +#define D3D11_1_SB_GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS (1<<18) +#define D3D12_SB_GLOBAL_FLAG_ALL_RESOURCES_BOUND (1<<19) + +#define D3D10_SB_GLOBAL_FLAGS_MASK 0x00fff800 + +// DECODER MACRO: Get global flags +#define DECODE_D3D10_SB_GLOBAL_FLAGS(OpcodeToken0) ((OpcodeToken0)&D3D10_SB_GLOBAL_FLAGS_MASK) + +// ENCODER MACRO: Encode global flags +#define ENCODE_D3D10_SB_GLOBAL_FLAGS(Flags) ((Flags)&D3D10_SB_GLOBAL_FLAGS_MASK) + +// ---------------------------------------------------------------------------- +// Resource Declaration (non multisampled) +// +// OpcodeToken0: +// +// [10:00] D3D10_SB_OPCODE_DCL_RESOURCE +// [15:11] D3D10_SB_RESOURCE_DIMENSION +// [23:16] Ignored, 0 +// [30:24] Instruction length in DWORDs including the opcode token. +// [31] 0 normally. 1 if extended operand definition, meaning next DWORD +// contains extended operand description. This dcl is currently not +// extended. +// +// OpcodeToken0 is followed by 2 operands on Shader Models 4.0 through 5.0: +// (1) an operand, starting with OperandToken0, defining which +// t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared. +// (2) a Resource Return Type token (ResourceReturnTypeToken) +// +// OpcodeToken0 is followed by 3 operands on Shader Model 5.1 and later: +// (1) an operand, starting with OperandToken0, defining which +// t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared. +// The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, +// and the meaning of the index dimensions are as follows: (t[:]) +// 1 : variable ID being declared +// 2 : the lower bound of the range of resources in the space +// 3 : the upper bound (inclusive) of this range +// As opposed to when the t# is used in shader instructions, where the register +// must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index +// dimensions are as follows: (t[]): +// 1 : variable ID being used (matches dcl) +// 2 : absolute index of resource within space (may be dynamically indexed) +// (2) a Resource Return Type token (ResourceReturnTypeToken) +// (3) a DWORD indicating the space index. +// +// ---------------------------------------------------------------------------- +#define D3D10_SB_RESOURCE_DIMENSION_MASK 0x0000F800 +#define D3D10_SB_RESOURCE_DIMENSION_SHIFT 11 + +// DECODER MACRO: Given a resource declaration token, +// (OpcodeToken0), determine the resource dimension +// (D3D10_SB_RESOURCE_DIMENSION enum) +#define DECODE_D3D10_SB_RESOURCE_DIMENSION(OpcodeToken0) ((D3D10_SB_RESOURCE_DIMENSION)(((OpcodeToken0)&D3D10_SB_RESOURCE_DIMENSION_MASK)>>D3D10_SB_RESOURCE_DIMENSION_SHIFT)) + +// ENCODER MACRO: Store resource dimension +// (D3D10_SB_RESOURCE_DIMENSION enum) into a +// a resource declaration token (OpcodeToken0) +#define ENCODE_D3D10_SB_RESOURCE_DIMENSION(ResourceDim) (((ResourceDim)<[:]) +// 1 : variable ID being declared +// 2 : the lower bound of the range of resources in the space +// 3 : the upper bound (inclusive) of this range +// As opposed to when the t# is used in shader instructions, where the register +// must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index +// dimensions are as follows: (t[]): +// 1 : variable ID being used (matches dcl) +// 2 : absolute index of resource within space (may be dynamically indexed) +// (2) a Resource Return Type token (ResourceReturnTypeToken) +// (3) a DWORD indicating the space index. +// +// ---------------------------------------------------------------------------- + +// use same macro for encoding/decoding resource dimension aas the non-msaa declaration + +#define D3D10_SB_RESOURCE_SAMPLE_COUNT_MASK 0x07F0000 +#define D3D10_SB_RESOURCE_SAMPLE_COUNT_SHIFT 16 + +// DECODER MACRO: Given a resource declaration token, +// (OpcodeToken0), determine the resource sample count (1..127) +#define DECODE_D3D10_SB_RESOURCE_SAMPLE_COUNT(OpcodeToken0) ((UINT)(((OpcodeToken0)&D3D10_SB_RESOURCE_SAMPLE_COUNT_MASK)>>D3D10_SB_RESOURCE_SAMPLE_COUNT_SHIFT)) + +// ENCODER MACRO: Store resource sample count up to 127 into a +// a resource declaration token (OpcodeToken0) +#define ENCODE_D3D10_SB_RESOURCE_SAMPLE_COUNT(SampleCount) (((SampleCount > 127 ? 127 : SampleCount)<> \ + (Component * D3D10_SB_RESOURCE_RETURN_TYPE_NUMBITS))&D3D10_SB_RESOURCE_RETURN_TYPE_MASK)) + +// ENCODER MACRO: Generate a resource return type for a component +#define ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnType, Component) \ + (((ReturnType)&D3D10_SB_RESOURCE_RETURN_TYPE_MASK) << (Component * D3D10_SB_RESOURCE_RETURN_TYPE_NUMBITS)) + +// ---------------------------------------------------------------------------- +// Sampler Declaration +// +// OpcodeToken0: +// +// [10:00] D3D10_SB_OPCODE_DCL_SAMPLER +// [14:11] D3D10_SB_SAMPLER_MODE +// [23:15] Ignored, 0 +// [30:24] Instruction length in DWORDs including the opcode token. +// [31] 0 normally. 1 if extended operand definition, meaning next DWORD +// contains extended operand description. This dcl is currently not +// extended. +// +// OpcodeToken0 is followed by 1 operand on Shader Models 4.0 through 5.0: +// (1) Operand starting with OperandToken0, defining which sampler +// (D3D10_SB_OPERAND_TYPE_SAMPLER) register # is being declared. +// +// OpcodeToken0 is followed by 2 operands on Shader Model 5.1 and later: +// (1) an operand, starting with OperandToken0, defining which +// s# register (D3D10_SB_OPERAND_TYPE_SAMPLER) is being declared. +// The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, +// and the meaning of the index dimensions are as follows: (s[:]) +// 1 : variable ID being declared +// 2 : the lower bound of the range of samplers in the space +// 3 : the upper bound (inclusive) of this range +// As opposed to when the s# is used in shader instructions, where the register +// must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index +// dimensions are as follows: (s[]): +// 1 : variable ID being used (matches dcl) +// 2 : absolute index of sampler within space (may be dynamically indexed) +// (2) a DWORD indicating the space index. +// +// ---------------------------------------------------------------------------- +typedef enum D3D10_SB_SAMPLER_MODE +{ + D3D10_SB_SAMPLER_MODE_DEFAULT = 0, + D3D10_SB_SAMPLER_MODE_COMPARISON = 1, + D3D10_SB_SAMPLER_MODE_MONO = 2, +} D3D10_SB_SAMPLER_MODE; + +#define D3D10_SB_SAMPLER_MODE_MASK 0x00007800 +#define D3D10_SB_SAMPLER_MODE_SHIFT 11 + +// DECODER MACRO: Find out if a Constant Buffer is going to be indexed or not +#define DECODE_D3D10_SB_SAMPLER_MODE(OpcodeToken0) ((D3D10_SB_SAMPLER_MODE)(((OpcodeToken0)&D3D10_SB_SAMPLER_MODE_MASK)>>D3D10_SB_SAMPLER_MODE_SHIFT)) + +// ENCODER MACRO: Generate a resource return type for a component +#define ENCODE_D3D10_SB_SAMPLER_MODE(SamplerMode) (((SamplerMode)<>D3D10_SB_INPUT_INTERPOLATION_MODE_SHIFT)) + +// ENCODER MACRO: Encode interpolation mode for a register. +#define ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(InterpolationMode) (((InterpolationMode)<[:]) +// 1 : variable ID being declared +// 2 : the lower bound of the range of constant buffers in the space +// 3 : the upper bound (inclusive) of this range +// As opposed to when the cb#[] is used in shader instructions: (cb[][]) +// 1 : variable ID being used (matches dcl) +// 2 : absolute index of constant buffer within space (may be dynamically indexed) +// 3 : location of vector within constant buffer being referenced, +// which may also be dynamically indexed, with no access pattern flag required. +// (2) a DWORD indicating the size of the constant buffer as a count of 16-byte vectors. +// Each vector is 32-bit*4 elements == 128-bits == 16 bytes. +// If the size is specified as 0, the CB size is not known (any size CB +// can be bound to the slot). +// (3) a DWORD indicating the space index. +// +// ---------------------------------------------------------------------------- + +typedef enum D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN +{ + D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED = 0, + D3D10_SB_CONSTANT_BUFFER_DYNAMIC_INDEXED = 1 +} D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN; + +#define D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_MASK 0x00000800 +#define D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_SHIFT 11 + +// DECODER MACRO: Find out if a Constant Buffer is going to be indexed or not +#define DECODE_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(OpcodeToken0) ((D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN)(((OpcodeToken0)&D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_MASK)>>D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_SHIFT)) + +// ENCODER MACRO: Encode the access pattern for the Constant Buffer +#define ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(AccessPattern) (((AccessPattern)<>D3D10_SB_GS_INPUT_PRIMITIVE_SHIFT)) + +// ENCODER MACRO: Store primitive topology +// (D3D10_SB_PRIMITIVE enum) into a +// a primitive topology declaration token (OpcodeToken0) +#define ENCODE_D3D10_SB_GS_INPUT_PRIMITIVE(Prim) (((Prim)<>D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY_SHIFT)) + +// ENCODER MACRO: Store primitive topology +// (D3D10_SB_PRIMITIVE_TOPOLOGY enum) into a +// a primitive topology declaration token (OpcodeToken0) +#define ENCODE_D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY(PrimTopology) (((PrimTopology)<>D3D11_SB_INPUT_CONTROL_POINT_COUNT_SHIFT)) + +// ENCODER MACRO: Store input control point count into a declaration token +#define ENCODE_D3D11_SB_INPUT_CONTROL_POINT_COUNT(Count) (((Count)<>D3D11_SB_OUTPUT_CONTROL_POINT_COUNT_SHIFT)) + +// ENCODER MACRO: Store output control point count into a declaration token +#define ENCODE_D3D11_SB_OUTPUT_CONTROL_POINT_COUNT(Count) (((Count)<>D3D11_SB_TESS_DOMAIN_SHIFT)) + +// ENCODER MACRO: Store tessellator domain +// (D3D11_SB_TESSELLATOR_DOMAIN enum) into a +// a tessellator domain declaration token (OpcodeToken0) +#define ENCODE_D3D11_SB_TESS_DOMAIN(Domain) (((Domain)<>D3D11_SB_TESS_PARTITIONING_SHIFT)) + +// ENCODER MACRO: Store tessellator partitioning +// (D3D11_SB_TESSELLATOR_PARTITIONING enum) into a +// a tessellator partitioning declaration token (OpcodeToken0) +#define ENCODE_D3D11_SB_TESS_PARTITIONING(Partitioning) (((Partitioning)<>D3D11_SB_TESS_OUTPUT_PRIMITIVE_SHIFT)) + +// ENCODER MACRO: Store tessellator output primitive +// (D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE enum) into a +// a tessellator output primitive declaration token (OpcodeToken0) +#define ENCODE_D3D11_SB_TESS_OUTPUT_PRIMITIVE(OutputPrimitive) (((OutputPrimitive)<>D3D10_SB_INSTRUCTION_RETURN_TYPE_SHIFT)) +// ENCODER MACRO: Encode the return type for instructions +// in the opcode specific control range of OpcodeToken0 +#define ENCODE_D3D10_SB_INSTRUCTION_RETURN_TYPE(ReturnType) (((ReturnType)<>D3D11_SB_INTERFACE_INDEXED_BIT_SHIFT) ? true : false) +#define ENCODE_D3D11_SB_INTERFACE_INDEXED_BIT(IndexedBit) (((IndexedBit)<>D3D11_SB_INTERFACE_TABLE_LENGTH_SHIFT)) +#define ENCODE_D3D11_SB_INTERFACE_TABLE_LENGTH(TableLength) (((TableLength)<>D3D11_SB_INTERFACE_ARRAY_LENGTH_SHIFT)) +#define ENCODE_D3D11_SB_INTERFACE_ARRAY_LENGTH(ArrayLength) (((ArrayLength)<[:]) +// 1 : variable ID being declared +// 2 : the lower bound of the range of UAV's in the space +// 3 : the upper bound (inclusive) of this range +// As opposed to when the u# is used in shader instructions, where the register +// must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index +// dimensions are as follows: (u[]): +// 1 : variable ID being used (matches dcl) +// 2 : absolute index of uav within space (may be dynamically indexed) +// (2) a Resource Return Type token (ResourceReturnTypeToken) +// (3) a DWORD indicating the space index. +// +// ---------------------------------------------------------------------------- +// UAV access scope flags +#define D3D11_SB_GLOBALLY_COHERENT_ACCESS 0x00010000 +#define D3D11_SB_ACCESS_COHERENCY_MASK 0x00010000 + +// DECODER MACRO: Retrieve flags for sync instruction from OpcodeToken0. +#define DECODE_D3D11_SB_ACCESS_COHERENCY_FLAGS(OperandToken0) ((OperandToken0)&D3D11_SB_ACCESS_COHERENCY_MASK) + +// ENCODER MACRO: Given a set of sync instruciton flags, encode them in OpcodeToken0. +#define ENCODE_D3D11_SB_ACCESS_COHERENCY_FLAGS(Flags) ((Flags)&D3D11_SB_ACCESS_COHERENCY_MASK) + +// Additional UAV access flags +#define D3D11_SB_RASTERIZER_ORDERED_ACCESS 0x00020000 + +// Resource flags mask. Use to retrieve all resource flags, including the order preserving counter. +#define D3D11_SB_RESOURCE_FLAGS_MASK (D3D11_SB_GLOBALLY_COHERENT_ACCESS|D3D11_SB_RASTERIZER_ORDERED_ACCESS|D3D11_SB_UAV_HAS_ORDER_PRESERVING_COUNTER) + +// DECODER MACRO: Retrieve UAV access flags for from OpcodeToken0. +#define DECODE_D3D11_SB_RESOURCE_FLAGS(OperandToken0) ((OperandToken0)&D3D11_SB_RESOURCE_FLAGS_MASK) + +// ENCODER MACRO: Given UAV access flags, encode them in OpcodeToken0. +#define ENCODE_D3D11_SB_RESOURCE_FLAGS(Flags) ((Flags)&D3D11_SB_RESOURCE_FLAGS_MASK) + +// ---------------------------------------------------------------------------- +// Raw Unordered Access View Declaration +// +// OpcodeToken0: +// +// [10:00] D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW +// [15:11] Ignored, 0 +// [16:16] D3D11_SB_GLOBALLY_COHERENT_ACCESS or 0 (LOCALLY_COHERENT) +// [17:17] D3D11_SB_RASTERIZER_ORDERED_ACCESS or 0 +// [23:18] Ignored, 0 +// [30:24] Instruction length in DWORDs including the opcode token. +// [31] 0 normally. 1 if extended operand definition, meaning next DWORD +// contains extended operand description. This dcl is currently not +// extended. +// +// OpcodeToken0 is followed by 1 operand on Shader Models 4.0 through 5.0: +// (1) an operand, starting with OperandToken0, defining which +// u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is being declared. +// +// OpcodeToken0 is followed by 2 operands on Shader Model 5.1 and later: +// (1) an operand, starting with OperandToken0, defining which +// u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is being declared. +// The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, +// and the meaning of the index dimensions are as follows: (u[:]) +// 1 : variable ID being declared +// 2 : the lower bound of the range of UAV's in the space +// 3 : the upper bound (inclusive) of this range +// As opposed to when the u# is used in shader instructions, where the register +// must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index +// dimensions are as follows: (u[]): +// 1 : variable ID being used (matches dcl) +// 2 : absolute index of uav within space (may be dynamically indexed) +// (2) a DWORD indicating the space index. +// +// ---------------------------------------------------------------------------- + +// ---------------------------------------------------------------------------- +// Structured Unordered Access View Declaration +// +// OpcodeToken0: +// +// [10:00] D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED +// [15:11] Ignored, 0 +// [16:16] D3D11_SB_GLOBALLY_COHERENT_ACCESS or 0 (LOCALLY_COHERENT) +// [17:17] D3D11_SB_RASTERIZER_ORDERED_ACCESS or 0 +// [22:18] Ignored, 0 +// [23:23] D3D11_SB_UAV_HAS_ORDER_PRESERVING_COUNTER or 0 +// +// The presence of this flag means that if a UAV is bound to the +// corresponding slot, it must have been created with +// D3D11_BUFFER_UAV_FLAG_COUNTER at the API. Also, the shader +// can contain either imm_atomic_alloc or _consume instructions +// operating on the given UAV. +// +// If this flag is not present, the shader can still contain +// either imm_atomic_alloc or imm_atomic_consume instructions for +// this UAV. But if such instructions are present in this case, +// and a UAV is bound corresponding slot, it must have been created +// with the D3D11_BUFFER_UAV_FLAG_APPEND flag at the API. +// Append buffers have a counter as well, but values returned +// to the shader are only valid for the lifetime of the shader +// invocation. +// +// [30:24] Instruction length in DWORDs including the opcode token. +// [31] 0 normally. 1 if extended operand definition, meaning next DWORD +// contains extended operand description. This dcl is currently not +// extended. +// +// OpcodeToken0 is followed by 2 operands: +// (1) an operand, starting with OperandToken0, defining which +// u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is +// being declared. +// (2) a DWORD indicating UINT32 byte stride +// +// OpcodeToken0 is followed by 3 operands on Shader Model 5.1 and later: +// (1) an operand, starting with OperandToken0, defining which +// u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is being declared. +// The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, +// and the meaning of the index dimensions are as follows: (u[:]) +// 1 : variable ID being declared +// 2 : the lower bound of the range of UAV's in the space +// 3 : the upper bound (inclusive) of this range +// As opposed to when the u# is used in shader instructions, where the register +// must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index +// dimensions are as follows: (u[]): +// 1 : variable ID being used (matches dcl) +// 2 : absolute index of uav within space (may be dynamically indexed) +// (2) a DWORD indicating UINT32 byte stride +// (3) a DWORD indicating the space index. +// +// ---------------------------------------------------------------------------- +// UAV flags +#define D3D11_SB_UAV_HAS_ORDER_PRESERVING_COUNTER 0x00800000 +#define D3D11_SB_UAV_FLAGS_MASK 0x00800000 + +// DECODER MACRO: Retrieve flags about UAV from OpcodeToken0. +#define DECODE_D3D11_SB_UAV_FLAGS(OperandToken0) ((OperandToken0)&D3D11_SB_UAV_FLAGS_MASK) + +// ENCODER MACRO: Given a set of UAV flags, encode them in OpcodeToken0. +#define ENCODE_D3D11_SB_UAV_FLAGS(Flags) ((Flags)&D3D11_SB_UAV_FLAGS_MASK) + +// ---------------------------------------------------------------------------- +// Raw Thread Group Shared Memory Declaration +// +// OpcodeToken0: +// +// [10:00] D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW +// [23:11] Ignored, 0 +// [30:24] Instruction length in DWORDs including the opcode token. +// [31] 0 normally. 1 if extended operand definition, meaning next DWORD +// contains extended operand description. This dcl is currently not +// extended. +// +// OpcodeToken0 is followed by 2 operands: +// (1) an operand, starting with OperandToken0, defining which +// g# register (D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) is being declared. +// (2) a DWORD indicating the element count, # of 32-bit scalars.. +// +// ---------------------------------------------------------------------------- + +// ---------------------------------------------------------------------------- +// Structured Thread Group Shared Memory Declaration +// +// OpcodeToken0: +// +// [10:00] D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED +// [23:11] Ignored, 0 +// [30:24] Instruction length in DWORDs including the opcode token. +// [31] 0 normally. 1 if extended operand definition, meaning next DWORD +// contains extended operand description. This dcl is currently not +// extended. +// +// OpcodeToken0 is followed by 3 operands: +// (1) an operand, starting with OperandToken0, defining which +// g# register (D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) is +// being declared. +// (2) a DWORD indicating UINT32 struct byte stride +// (3) a DWORD indicating UINT32 struct count +// +// ---------------------------------------------------------------------------- + +// ---------------------------------------------------------------------------- +// Raw Shader Resource View Declaration +// +// OpcodeToken0: +// +// [10:00] D3D11_SB_OPCODE_DCL_RESOURCE_RAW +// [23:11] Ignored, 0 +// [30:24] Instruction length in DWORDs including the opcode token. +// [31] 0 normally. 1 if extended operand definition, meaning next DWORD +// contains extended operand description. This dcl is currently not +// extended. +// +// OpcodeToken0 is followed by 1 operand: +// (1) an operand, starting with OperandToken0, defining which +// t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared. +// +// OpcodeToken0 is followed by 2 operands on Shader Model 5.1 and later: +// (1) an operand, starting with OperandToken0, defining which +// t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared. +// The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, +// and the meaning of the index dimensions are as follows: (t[:]) +// 1 : variable ID being declared +// 2 : the lower bound of the range of resources in the space +// 3 : the upper bound (inclusive) of this range +// As opposed to when the t# is used in shader instructions, where the register +// must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index +// dimensions are as follows: (t[]): +// 1 : variable ID being used (matches dcl) +// 2 : absolute index of resource within space (may be dynamically indexed) +// (2) a DWORD indicating the space index. +// +// ---------------------------------------------------------------------------- + +// ---------------------------------------------------------------------------- +// Structured Shader Resource View Declaration +// +// OpcodeToken0: +// +// [10:00] D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED +// [23:11] Ignored, 0 +// [30:24] Instruction length in DWORDs including the opcode token. +// [31] 0 normally. 1 if extended operand definition, meaning next DWORD +// contains extended operand description. This dcl is currently not +// extended. +// +// OpcodeToken0 is followed by 2 operands: +// (1) an operand, starting with OperandToken0, defining which +// g# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is +// being declared. +// (2) a DWORD indicating UINT32 struct byte stride +// +// OpcodeToken0 is followed by 3 operands on Shader Model 5.1 and later: +// (1) an operand, starting with OperandToken0, defining which +// t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared. +// The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, +// and the meaning of the index dimensions are as follows: (t[:]) +// 1 : variable ID being declared +// 2 : the lower bound of the range of resources in the space +// 3 : the upper bound (inclusive) of this range +// As opposed to when the t# is used in shader instructions, where the register +// must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index +// dimensions are as follows: (t[]): +// 1 : variable ID being used (matches dcl) +// 2 : absolute index of resource within space (may be dynamically indexed) +// (2) a DWORD indicating UINT32 struct byte stride +// (3) a DWORD indicating the space index. +// +// ----------------------------------------------------------------------------