Merge branch 'master' into vulkan
This commit is contained in:
parent
c14e3770a2
commit
4617dc5569
|
@ -29,6 +29,7 @@ init:
|
|||
- git config --global core.autocrlf input
|
||||
|
||||
install:
|
||||
- cmd: vcpkg integrate remove
|
||||
- cmd: xb setup
|
||||
|
||||
platform: Windows
|
||||
|
|
|
@ -64,6 +64,9 @@
|
|||
[submodule "third_party/date"]
|
||||
path = third_party/date
|
||||
url = https://github.com/HowardHinnant/date.git
|
||||
[submodule "third_party/xxhash"]
|
||||
path = third_party/xxhash
|
||||
url = https://github.com/Cyan4973/xxHash.git
|
||||
[submodule "third_party/glslang"]
|
||||
path = third_party/glslang
|
||||
url = https://github.com/KhronosGroup/glslang.git
|
||||
|
|
|
@ -65,6 +65,14 @@ DEFINE_path(
|
|||
"Root path for guest content storage (saves, etc.), or empty to use the "
|
||||
"content folder under the storage root.",
|
||||
"Storage");
|
||||
DEFINE_path(
|
||||
cache_root, "",
|
||||
"Root path for files used to speed up certain parts of the emulator or the "
|
||||
"game. These files may be persistent, but they can be deleted without "
|
||||
"major side effects such as progress loss. If empty, the cache folder "
|
||||
"under the storage root, or, if available, the cache directory preferred "
|
||||
"for the OS, will be used.",
|
||||
"Storage");
|
||||
|
||||
DEFINE_bool(mount_scratch, false, "Enable scratch mount", "Storage");
|
||||
DEFINE_bool(mount_cache, false, "Enable cache mount", "Storage");
|
||||
|
@ -189,10 +197,12 @@ std::vector<std::unique_ptr<hid::InputDriver>> CreateInputDrivers(
|
|||
Factory<hid::InputDriver, ui::Window*> factory;
|
||||
#if XE_PLATFORM_WIN32
|
||||
factory.Add("xinput", xe::hid::xinput::Create);
|
||||
#endif // XE_PLATFORM_WIN32
|
||||
factory.Add("sdl", xe::hid::sdl::Create);
|
||||
#if XE_PLATFORM_WIN32
|
||||
// WinKey input driver should always be the last input driver added!
|
||||
factory.Add("winkey", xe::hid::winkey::Create);
|
||||
#endif // XE_PLATFORM_WIN32
|
||||
factory.Add("sdl", xe::hid::sdl::Create);
|
||||
for (auto& driver : factory.CreateAll(cvars::hid, window)) {
|
||||
if (XSUCCEEDED(driver->Setup())) {
|
||||
drivers.emplace_back(std::move(driver));
|
||||
|
@ -220,6 +230,8 @@ int xenia_main(const std::vector<std::string>& args) {
|
|||
#if defined(XE_PLATFORM_WIN32) || defined(XE_PLATFORM_GNU_LINUX)
|
||||
storage_root = storage_root / "Xenia";
|
||||
#else
|
||||
// TODO(Triang3l): Point to the app's external storage "files" directory
|
||||
// on Android.
|
||||
#warning Unhandled platform for the data root.
|
||||
storage_root = storage_root / "Xenia";
|
||||
#endif
|
||||
|
@ -243,13 +255,29 @@ int xenia_main(const std::vector<std::string>& args) {
|
|||
content_root = std::filesystem::absolute(content_root);
|
||||
XELOGI("Content root: {}", xe::path_to_utf8(content_root));
|
||||
|
||||
std::filesystem::path cache_root = cvars::cache_root;
|
||||
if (cache_root.empty()) {
|
||||
cache_root = storage_root / "cache";
|
||||
// TODO(Triang3l): Point to the app's external storage "cache" directory on
|
||||
// Android.
|
||||
} else {
|
||||
// If content root isn't an absolute path, then it should be relative to the
|
||||
// storage root.
|
||||
if (!cache_root.is_absolute()) {
|
||||
cache_root = storage_root / cache_root;
|
||||
}
|
||||
}
|
||||
cache_root = std::filesystem::absolute(cache_root);
|
||||
XELOGI("Cache root: {}", xe::path_to_utf8(cache_root));
|
||||
|
||||
if (cvars::discord) {
|
||||
discord::DiscordPresence::Initialize();
|
||||
discord::DiscordPresence::NotPlaying();
|
||||
}
|
||||
|
||||
// Create the emulator but don't initialize so we can setup the window.
|
||||
auto emulator = std::make_unique<Emulator>("", storage_root, content_root);
|
||||
auto emulator =
|
||||
std::make_unique<Emulator>("", storage_root, content_root, cache_root);
|
||||
|
||||
// Main emulator display window.
|
||||
auto emulator_window = EmulatorWindow::Create(emulator.get());
|
||||
|
|
|
@ -17,7 +17,7 @@ namespace hash {
|
|||
|
||||
// For use in unordered_sets and unordered_maps (primarily multisets and
|
||||
// multimaps, with manual collision resolution), where the hash is calculated
|
||||
// externally (for instance, as XXH64), possibly requiring context data rather
|
||||
// externally (for instance, as XXH3), possibly requiring context data rather
|
||||
// than a pure function to calculate the hash
|
||||
template <typename Key>
|
||||
struct IdentityHasher {
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_BASE_XXHASH_H_
|
||||
#define XENIA_BASE_XXHASH_H_
|
||||
|
||||
#define XXH_INLINE_ALL
|
||||
|
||||
// Can't use XXH_X86DISPATCH because XXH is calculated on multiple threads,
|
||||
// while the dispatch writes the result (multiple pointers without any
|
||||
// synchronization) to XXH_g_dispatch at the first call.
|
||||
|
||||
#include "third_party/xxhash/xxhash.h"
|
||||
|
||||
#endif // XENIA_BASE_XXHASH_H_
|
|
@ -746,6 +746,8 @@ static const vec128_t xmm_consts[] = {
|
|||
/* XMMIntMaxPD */ vec128d(INT_MAX),
|
||||
/* XMMPosIntMinPS */ vec128f((float)0x80000000u),
|
||||
/* XMMQNaN */ vec128i(0x7FC00000u),
|
||||
/* XMMInt127 */ vec128i(0x7Fu),
|
||||
/* XMM2To32 */ vec128f(0x1.0p32f),
|
||||
};
|
||||
|
||||
// First location to try and place constants.
|
||||
|
|
|
@ -114,6 +114,8 @@ enum XmmConst {
|
|||
XMMIntMaxPD,
|
||||
XMMPosIntMinPS,
|
||||
XMMQNaN,
|
||||
XMMInt127,
|
||||
XMM2To32,
|
||||
};
|
||||
|
||||
// Unfortunately due to the design of xbyak we have to pass this to the ctor.
|
||||
|
|
|
@ -33,19 +33,41 @@ struct VECTOR_CONVERT_I2F
|
|||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
// flags = ARITHMETIC_UNSIGNED
|
||||
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
||||
// xmm0 = mask of positive values
|
||||
e.vpcmpgtd(e.xmm0, i.src1, e.GetXmmConstPtr(XMMFFFF));
|
||||
// Round manually to (1.stored mantissa bits * 2^31) or to 2^32 to the
|
||||
// nearest even (the only rounding mode used on AltiVec) if the number is
|
||||
// 0x80000000 or greater, instead of converting src & 0x7FFFFFFF and then
|
||||
// adding 2147483648.0f, which results in double rounding that can give a
|
||||
// result larger than needed - see OPCODE_VECTOR_CONVERT_I2F notes.
|
||||
|
||||
// scale any values >= (unsigned)INT_MIN back to [0, INT_MAX]
|
||||
e.vpsubd(e.xmm1, i.src1, e.GetXmmConstPtr(XMMSignMaskI32));
|
||||
e.vblendvps(e.xmm1, e.xmm1, i.src1, e.xmm0);
|
||||
// [0x80000000, 0xFFFFFFFF] case:
|
||||
|
||||
// xmm1 = [0, INT_MAX]
|
||||
e.vcvtdq2ps(i.dest, e.xmm1);
|
||||
// Round to the nearest even, from (0x80000000 | 31 stored mantissa bits)
|
||||
// to ((-1 << 23) | 23 stored mantissa bits), or to 0 if the result should
|
||||
// be 4294967296.0f.
|
||||
// xmm0 = src + 0b01111111 + ((src >> 8) & 1)
|
||||
// (xmm1 also used to launch reg + mem early and to require it late)
|
||||
e.vpaddd(e.xmm1, i.src1, e.GetXmmConstPtr(XMMInt127));
|
||||
e.vpslld(e.xmm0, i.src1, 31 - 8);
|
||||
e.vpsrld(e.xmm0, e.xmm0, 31);
|
||||
e.vpaddd(e.xmm0, e.xmm0, e.xmm1);
|
||||
// xmm0 = (0xFF800000 | 23 explicit mantissa bits), or 0 if overflowed
|
||||
e.vpsrad(e.xmm0, e.xmm0, 8);
|
||||
// Calculate the result for the [0x80000000, 0xFFFFFFFF] case - take the
|
||||
// rounded mantissa, and add -1 or 0 to the exponent of 32, depending on
|
||||
// whether the number should be (1.stored mantissa bits * 2^31) or 2^32.
|
||||
// xmm0 = [0x80000000, 0xFFFFFFFF] case result
|
||||
e.vpaddd(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMM2To32));
|
||||
|
||||
// scale values back above [INT_MIN, UINT_MAX]
|
||||
e.vpandn(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMMPosIntMinPS));
|
||||
e.vaddps(i.dest, i.dest, e.xmm0);
|
||||
// [0x00000000, 0x7FFFFFFF] case
|
||||
// (during vblendvps reg -> vpaddd reg -> vpaddd mem dependency):
|
||||
|
||||
// Convert from signed integer to float.
|
||||
// xmm1 = [0x00000000, 0x7FFFFFFF] case result
|
||||
e.vcvtdq2ps(e.xmm1, i.src1);
|
||||
|
||||
// Merge the two ways depending on whether the number is >= 0x80000000
|
||||
// (has high bit set).
|
||||
e.vblendvps(i.dest, e.xmm1, e.xmm0, i.src1);
|
||||
} else {
|
||||
e.vcvtdq2ps(i.dest, i.src1);
|
||||
}
|
||||
|
|
|
@ -143,6 +143,55 @@ enum Opcode {
|
|||
OPCODE_TRUNCATE,
|
||||
OPCODE_CONVERT,
|
||||
OPCODE_ROUND,
|
||||
// Note that 2147483648.0 + (src & 0x7FFFFFFF) is not a correct way of
|
||||
// performing the uint -> float conversion for large numbers on backends where
|
||||
// only sint -> float is available.
|
||||
//
|
||||
// Take 0b11000000000000000000000101000001 as an example,
|
||||
// or 1.1000000000000000000000101000001 * 2^31.
|
||||
// This one has 31 mantissa bits (excluding the implicit 1.), and needs to be
|
||||
// rounded to 23 bits - 8 mantissa bits need to be dropped:
|
||||
// 10000000000000000000001_01000001
|
||||
//
|
||||
// Rounding to the nearest even (the only rounding mode that exists on
|
||||
// AltiVec, and the likely rounding mode in the implementations) should be
|
||||
// done downwards - 01000001 of 1_01000001 is in [00000000, 01111111].
|
||||
// The correct mantissa in this case is:
|
||||
// 1.10000000000000000000001 * 2^31.
|
||||
//
|
||||
// With a two-step conversion, rounding is done twice instead, which gives an
|
||||
// incorrect result.
|
||||
//
|
||||
// First, converting the low 31 bits to float:
|
||||
// The number is 0.1000000000000000000000101000001 * 2^31.
|
||||
// Normalizing it, we get 1.000000000000000000000101000001 (30 significand
|
||||
// bits).
|
||||
// We need to round 30 bits to 23 - 7 bits need to be dropped:
|
||||
// 00000000000000000000010_1000001
|
||||
//
|
||||
// Rounding to the nearest even is done upwards in this case - 1000001 of
|
||||
// 0_1000001 is in [1000001, 1111111].
|
||||
// The result of the sint -> float conversion is:
|
||||
// 1.00000000000000000000011 * 2^30.
|
||||
//
|
||||
// Now 2147483648.0 (1 * 2^31) needs to be added. Aligning the exponents, we
|
||||
// get:
|
||||
// 0.|10000000000000000000001|1 * 2^31
|
||||
// + 1.|00000000000000000000000| * 2^31
|
||||
// = 1.|10000000000000000000001|1 * 2^31
|
||||
//
|
||||
// At "infinite precision", the result has 24 significand bits, but only 23
|
||||
// can be stored, thus rounding to the nearest even needs to be done. 1_1 is
|
||||
// (odd + 0.5). 0.5 is ambiguous, thus tie-breaking to the nearest even -
|
||||
// which is above in this case - is done. The result is:
|
||||
// 1.10000000000000000000010 * 2^31.
|
||||
//
|
||||
// This is incorrect - larger than the correctly rounded result, which is:
|
||||
// 1.10000000000000000000001 * 2^31.
|
||||
//
|
||||
// Test cases checked on real hardware via vcfux: 0xFFFDFF7E, 0xFFFCFF7D -
|
||||
// should be 0x4F7FFDFF and 0x4F7FFCFF respectively, not 0x4F7FFE00 and
|
||||
// 0x4F7FFD00.
|
||||
OPCODE_VECTOR_CONVERT_I2F,
|
||||
OPCODE_VECTOR_CONVERT_F2I,
|
||||
OPCODE_LOAD_VECTOR_SHL,
|
||||
|
|
|
@ -519,9 +519,11 @@ int InstrEmit_vavguw(PPCHIRBuilder& f, const InstrData& i) {
|
|||
int InstrEmit_vcfsx_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb,
|
||||
uint32_t uimm) {
|
||||
// (VD) <- float(VB as signed) / 2^uimm
|
||||
float fuimm = static_cast<float>(std::exp2(uimm));
|
||||
Value* v = f.Div(f.VectorConvertI2F(f.LoadVR(vb)),
|
||||
f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE));
|
||||
Value* v = f.VectorConvertI2F(f.LoadVR(vb));
|
||||
if (uimm) {
|
||||
float fuimm = std::ldexp(1.0f, -int(uimm));
|
||||
v = f.Mul(v, f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE));
|
||||
}
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
|
@ -535,9 +537,11 @@ int InstrEmit_vcsxwfp128(PPCHIRBuilder& f, const InstrData& i) {
|
|||
int InstrEmit_vcfux_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb,
|
||||
uint32_t uimm) {
|
||||
// (VD) <- float(VB as unsigned) / 2^uimm
|
||||
float fuimm = static_cast<float>(std::exp2(uimm));
|
||||
Value* v = f.Div(f.VectorConvertI2F(f.LoadVR(vb), ARITHMETIC_UNSIGNED),
|
||||
f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE));
|
||||
Value* v = f.VectorConvertI2F(f.LoadVR(vb), ARITHMETIC_UNSIGNED);
|
||||
if (uimm) {
|
||||
float fuimm = std::ldexp(1.0f, -int(uimm));
|
||||
v = f.Mul(v, f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE));
|
||||
}
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
# frsqrte tests disabled because accuracy is CPU dependent.
|
||||
|
||||
#test_frsqrte_1:
|
||||
#_ REGISTER_IN f1 1.0
|
||||
test_frsqrte_1:
|
||||
# _ REGISTER_IN f1 1.0
|
||||
# frsqrte f1, f1
|
||||
# blr
|
||||
#_ REGISTER_OUT f1 0.99975585937500000
|
||||
blr
|
||||
# _ REGISTER_OUT f1 0.99975585937500000
|
||||
# want: 0.97
|
||||
|
||||
#test_frsqrte_2:
|
||||
#_ REGISTER_IN f1 64.0
|
||||
test_frsqrte_2:
|
||||
# _ REGISTER_IN f1 64.0
|
||||
# frsqrte f1, f1
|
||||
# blr
|
||||
#_ REGISTER_OUT f1 0.12496948242187500
|
||||
blr
|
||||
# _ REGISTER_OUT f1 0.12496948242187500
|
||||
|
||||
#test_frsqrte_3:
|
||||
#_ REGISTER_IN f1 0.5
|
||||
test_frsqrte_3:
|
||||
# _ REGISTER_IN f1 0.5
|
||||
# frsqrte f1, f1
|
||||
# blr
|
||||
#_ REGISTER_OUT f1 1.41381835937500000
|
||||
blr
|
||||
# _ REGISTER_OUT f1 1.41381835937500000
|
||||
# want: 1.375
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/base/filesystem.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/main.h"
|
||||
|
@ -28,7 +29,7 @@ DEFINE_path(test_path, "src/xenia/cpu/ppc/testing/",
|
|||
"Directory scanned for test files.", "Other");
|
||||
DEFINE_path(test_bin_path, "src/xenia/cpu/ppc/testing/bin/",
|
||||
"Directory with binary outputs of the test files.", "Other");
|
||||
DEFINE_transient_string(test_name, "", "Specifies test name.", "General");
|
||||
DEFINE_transient_string(test_name, "", "Test suite name.", "General");
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
|
@ -475,13 +476,7 @@ bool RunTests(const std::string_view test_name) {
|
|||
}
|
||||
|
||||
int main(const std::vector<std::string>& args) {
|
||||
// Grab test name, if present.
|
||||
std::string test_name;
|
||||
if (args.size() >= 2) {
|
||||
test_name = args[1];
|
||||
}
|
||||
|
||||
return RunTests(test_name) ? 0 : 1;
|
||||
return RunTests(cvars::test_name) ? 0 : 1;
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
|
|
|
@ -358,7 +358,6 @@ bool Processor::ExecuteRaw(ThreadState* thread_state, uint32_t address) {
|
|||
return false;
|
||||
}
|
||||
|
||||
auto context = thread_state->context();
|
||||
return function->Call(thread_state, 0xBCBCBCBC);
|
||||
}
|
||||
|
||||
|
|
|
@ -59,13 +59,15 @@ namespace xe {
|
|||
|
||||
Emulator::Emulator(const std::filesystem::path& command_line,
|
||||
const std::filesystem::path& storage_root,
|
||||
const std::filesystem::path& content_root)
|
||||
const std::filesystem::path& content_root,
|
||||
const std::filesystem::path& cache_root)
|
||||
: on_launch(),
|
||||
on_terminate(),
|
||||
on_exit(),
|
||||
command_line_(command_line),
|
||||
storage_root_(storage_root),
|
||||
content_root_(content_root),
|
||||
cache_root_(cache_root),
|
||||
game_title_(),
|
||||
display_window_(nullptr),
|
||||
memory_(),
|
||||
|
@ -689,7 +691,7 @@ X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path,
|
|||
// playing before the video can be seen if doing this in parallel with the
|
||||
// main thread.
|
||||
on_shader_storage_initialization(true);
|
||||
graphics_system_->InitializeShaderStorage(storage_root_, title_id_, true);
|
||||
graphics_system_->InitializeShaderStorage(cache_root_, title_id_, true);
|
||||
on_shader_storage_initialization(false);
|
||||
|
||||
auto main_thread = kernel_state_->LaunchModule(module);
|
||||
|
|
|
@ -49,7 +49,8 @@ class Emulator {
|
|||
public:
|
||||
explicit Emulator(const std::filesystem::path& command_line,
|
||||
const std::filesystem::path& storage_root,
|
||||
const std::filesystem::path& content_root);
|
||||
const std::filesystem::path& content_root,
|
||||
const std::filesystem::path& cache_root);
|
||||
~Emulator();
|
||||
|
||||
// Full command line used when launching the process.
|
||||
|
@ -61,6 +62,9 @@ class Emulator {
|
|||
// Folder guest content is stored in.
|
||||
const std::filesystem::path& content_root() const { return content_root_; }
|
||||
|
||||
// Folder files safe to remove without significant side effects are stored in.
|
||||
const std::filesystem::path& cache_root() const { return cache_root_; }
|
||||
|
||||
// Title of the game in the default language.
|
||||
const std::string& game_title() const { return game_title_; }
|
||||
|
||||
|
@ -166,6 +170,7 @@ class Emulator {
|
|||
std::filesystem::path command_line_;
|
||||
std::filesystem::path storage_root_;
|
||||
std::filesystem::path content_root_;
|
||||
std::filesystem::path cache_root_;
|
||||
|
||||
std::string game_title_;
|
||||
|
||||
|
|
|
@ -89,8 +89,8 @@ void CommandProcessor::Shutdown() {
|
|||
}
|
||||
|
||||
void CommandProcessor::InitializeShaderStorage(
|
||||
const std::filesystem::path& storage_root, uint32_t title_id,
|
||||
bool blocking) {}
|
||||
const std::filesystem::path& cache_root, uint32_t title_id, bool blocking) {
|
||||
}
|
||||
|
||||
void CommandProcessor::RequestFrameTrace(
|
||||
const std::filesystem::path& root_path) {
|
||||
|
|
|
@ -133,9 +133,8 @@ class CommandProcessor {
|
|||
// May be called not only from the command processor thread when the command
|
||||
// processor is paused, and the termination of this function may be explicitly
|
||||
// awaited.
|
||||
virtual void InitializeShaderStorage(
|
||||
const std::filesystem::path& storage_root, uint32_t title_id,
|
||||
bool blocking);
|
||||
virtual void InitializeShaderStorage(const std::filesystem::path& cache_root,
|
||||
uint32_t title_id, bool blocking);
|
||||
|
||||
virtual void RequestFrameTrace(const std::filesystem::path& root_path);
|
||||
virtual void BeginTracing(const std::filesystem::path& root_path);
|
||||
|
|
|
@ -7,8 +7,6 @@
|
|||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "third_party/xxhash/xxhash.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <utility>
|
||||
|
@ -73,10 +71,9 @@ void D3D12CommandProcessor::ClearCaches() {
|
|||
}
|
||||
|
||||
void D3D12CommandProcessor::InitializeShaderStorage(
|
||||
const std::filesystem::path& storage_root, uint32_t title_id,
|
||||
bool blocking) {
|
||||
CommandProcessor::InitializeShaderStorage(storage_root, title_id, blocking);
|
||||
pipeline_cache_->InitializeShaderStorage(storage_root, title_id, blocking);
|
||||
const std::filesystem::path& cache_root, uint32_t title_id, bool blocking) {
|
||||
CommandProcessor::InitializeShaderStorage(cache_root, title_id, blocking);
|
||||
pipeline_cache_->InitializeShaderStorage(cache_root, title_id, blocking);
|
||||
}
|
||||
|
||||
void D3D12CommandProcessor::RequestFrameTrace(
|
||||
|
@ -102,7 +99,7 @@ void D3D12CommandProcessor::RestoreEdramSnapshot(const void* snapshot) {
|
|||
}
|
||||
|
||||
uint32_t D3D12CommandProcessor::GetCurrentColorMask(
|
||||
const D3D12Shader* pixel_shader) const {
|
||||
const Shader* pixel_shader) const {
|
||||
if (pixel_shader == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -159,25 +156,16 @@ void D3D12CommandProcessor::SubmitBarriers() {
|
|||
}
|
||||
|
||||
ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
||||
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) {
|
||||
assert_true(vertex_shader->is_translated());
|
||||
|
||||
const DxbcShader* vertex_shader, const DxbcShader* pixel_shader,
|
||||
bool tessellated) {
|
||||
if (bindless_resources_used_) {
|
||||
return vertex_shader->host_vertex_shader_type() !=
|
||||
Shader::HostVertexShaderType::kVertex
|
||||
? root_signature_bindless_ds_
|
||||
: root_signature_bindless_vs_;
|
||||
return tessellated ? root_signature_bindless_ds_
|
||||
: root_signature_bindless_vs_;
|
||||
}
|
||||
|
||||
assert_true(pixel_shader == nullptr || pixel_shader->is_translated());
|
||||
|
||||
D3D12_SHADER_VISIBILITY vertex_visibility;
|
||||
if (vertex_shader->host_vertex_shader_type() !=
|
||||
Shader::HostVertexShaderType::kVertex) {
|
||||
vertex_visibility = D3D12_SHADER_VISIBILITY_DOMAIN;
|
||||
} else {
|
||||
vertex_visibility = D3D12_SHADER_VISIBILITY_VERTEX;
|
||||
}
|
||||
D3D12_SHADER_VISIBILITY vertex_visibility =
|
||||
tessellated ? D3D12_SHADER_VISIBILITY_DOMAIN
|
||||
: D3D12_SHADER_VISIBILITY_VERTEX;
|
||||
|
||||
uint32_t texture_count_vertex, sampler_count_vertex;
|
||||
vertex_shader->GetTextureBindings(texture_count_vertex);
|
||||
|
@ -393,7 +381,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
|||
}
|
||||
|
||||
uint32_t D3D12CommandProcessor::GetRootBindfulExtraParameterIndices(
|
||||
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader,
|
||||
const DxbcShader* vertex_shader, const DxbcShader* pixel_shader,
|
||||
RootBindfulExtraParameterIndices& indices_out) {
|
||||
uint32_t texture_count_pixel = 0, sampler_count_pixel = 0;
|
||||
if (pixel_shader != nullptr) {
|
||||
|
@ -1202,6 +1190,7 @@ bool D3D12CommandProcessor::SetupContext() {
|
|||
|
||||
pipeline_cache_ = std::make_unique<PipelineCache>(
|
||||
*this, *register_file_, bindless_resources_used_, edram_rov_used_,
|
||||
render_target_cache_->depth_float24_conversion(),
|
||||
texture_cache_->IsResolutionScale2X() ? 2 : 1);
|
||||
if (!pipeline_cache_->Initialize()) {
|
||||
XELOGE("Failed to initialize the graphics pipeline cache");
|
||||
|
@ -1804,8 +1793,7 @@ Shader* D3D12CommandProcessor::LoadShader(xenos::ShaderType shader_type,
|
|||
uint32_t guest_address,
|
||||
const uint32_t* host_address,
|
||||
uint32_t dword_count) {
|
||||
return pipeline_cache_->LoadShader(shader_type, guest_address, host_address,
|
||||
dword_count);
|
||||
return pipeline_cache_->LoadShader(shader_type, host_address, dword_count);
|
||||
}
|
||||
|
||||
bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
||||
|
@ -1851,21 +1839,30 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
// Need a pixel shader in normal color mode.
|
||||
return false;
|
||||
}
|
||||
// Get tessellation info for the current draw for vertex shader translation.
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
pipeline_cache_->GetHostVertexShaderTypeIfValid();
|
||||
if (host_vertex_shader_type == Shader::HostVertexShaderType(-1)) {
|
||||
DxbcShaderTranslator::Modification vertex_shader_modification;
|
||||
DxbcShaderTranslator::Modification pixel_shader_modification;
|
||||
if (!pipeline_cache_->GetCurrentShaderModifications(
|
||||
vertex_shader_modification, pixel_shader_modification)) {
|
||||
return false;
|
||||
}
|
||||
D3D12Shader::D3D12Translation* vertex_shader_translation =
|
||||
static_cast<D3D12Shader::D3D12Translation*>(
|
||||
vertex_shader->GetOrCreateTranslation(
|
||||
vertex_shader_modification.value));
|
||||
D3D12Shader::D3D12Translation* pixel_shader_translation =
|
||||
pixel_shader ? static_cast<D3D12Shader::D3D12Translation*>(
|
||||
pixel_shader->GetOrCreateTranslation(
|
||||
pixel_shader_modification.value))
|
||||
: nullptr;
|
||||
// Translate the shaders now to get memexport configuration and color mask,
|
||||
// which is needed by the render target cache, to check the possibility of
|
||||
// doing early depth/stencil, and also to get used textures and samplers.
|
||||
if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader, pixel_shader,
|
||||
host_vertex_shader_type)) {
|
||||
// which is needed by the render target cache, and also to get used textures
|
||||
// and samplers.
|
||||
if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader_translation,
|
||||
pixel_shader_translation)) {
|
||||
return false;
|
||||
}
|
||||
bool tessellated =
|
||||
host_vertex_shader_type != Shader::HostVertexShaderType::kVertex;
|
||||
bool tessellated = vertex_shader_modification.host_vertex_shader_type !=
|
||||
Shader::HostVertexShaderType::kVertex;
|
||||
|
||||
// Check if memexport is used. If it is, we can't skip draw calls that have no
|
||||
// visual effect.
|
||||
|
@ -1967,26 +1964,14 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
(pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
|
||||
texture_cache_->RequestTextures(used_texture_mask);
|
||||
|
||||
// Check if early depth/stencil can be enabled.
|
||||
bool early_z;
|
||||
if (pixel_shader) {
|
||||
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
|
||||
early_z = pixel_shader->implicit_early_z_allowed() &&
|
||||
(!rb_colorcontrol.alpha_test_enable ||
|
||||
rb_colorcontrol.alpha_func == xenos::CompareFunction::kAlways) &&
|
||||
!rb_colorcontrol.alpha_to_mask_enable;
|
||||
} else {
|
||||
early_z = true;
|
||||
}
|
||||
|
||||
// Create the pipeline if needed and bind it.
|
||||
void* pipeline_handle;
|
||||
ID3D12RootSignature* root_signature;
|
||||
if (!pipeline_cache_->ConfigurePipeline(
|
||||
vertex_shader, pixel_shader, primitive_type_converted,
|
||||
vertex_shader_translation, pixel_shader_translation,
|
||||
primitive_type_converted,
|
||||
indexed ? index_buffer_info->format : xenos::IndexFormat::kInt16,
|
||||
early_z, pipeline_render_targets, &pipeline_handle,
|
||||
&root_signature)) {
|
||||
pipeline_render_targets, &pipeline_handle, &root_signature)) {
|
||||
return false;
|
||||
}
|
||||
if (current_cached_pipeline_ != pipeline_handle) {
|
||||
|
@ -2014,11 +1999,18 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
pixel_size_x *= 2;
|
||||
pixel_size_y *= 2;
|
||||
}
|
||||
flags::DepthFloat24Conversion depth_float24_conversion =
|
||||
render_target_cache_->depth_float24_conversion();
|
||||
draw_util::ViewportInfo viewport_info;
|
||||
draw_util::GetHostViewportInfo(regs, float(pixel_size_x), float(pixel_size_y),
|
||||
true, float(D3D12_VIEWPORT_BOUNDS_MAX),
|
||||
float(D3D12_VIEWPORT_BOUNDS_MAX), false,
|
||||
viewport_info);
|
||||
draw_util::GetHostViewportInfo(
|
||||
regs, float(pixel_size_x), float(pixel_size_y), true,
|
||||
float(D3D12_VIEWPORT_BOUNDS_MAX), float(D3D12_VIEWPORT_BOUNDS_MAX), false,
|
||||
!edram_rov_used_ &&
|
||||
(depth_float24_conversion ==
|
||||
flags::DepthFloat24Conversion::kOnOutputTruncating ||
|
||||
depth_float24_conversion ==
|
||||
flags::DepthFloat24Conversion::kOnOutputRounding),
|
||||
viewport_info);
|
||||
draw_util::Scissor scissor;
|
||||
draw_util::GetScissor(regs, scissor);
|
||||
scissor.left *= pixel_size_x;
|
||||
|
@ -2033,7 +2025,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
UpdateSystemConstantValues(
|
||||
memexport_used, primitive_polygonal, line_loop_closing_index,
|
||||
indexed ? index_buffer_info->endianness : xenos::Endian::kNone,
|
||||
viewport_info, pixel_size_x, pixel_size_y, used_texture_mask, early_z,
|
||||
viewport_info, pixel_size_x, pixel_size_y, used_texture_mask,
|
||||
GetCurrentColorMask(pixel_shader), pipeline_render_targets);
|
||||
|
||||
// Update constant buffers, descriptors and root parameters.
|
||||
|
@ -2659,6 +2651,8 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) {
|
|||
bool is_closing_frame = is_swap && frame_open_;
|
||||
|
||||
if (is_closing_frame) {
|
||||
render_target_cache_->EndFrame();
|
||||
|
||||
texture_cache_->EndFrame();
|
||||
}
|
||||
|
||||
|
@ -2873,8 +2867,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
bool shared_memory_is_uav, bool primitive_polygonal,
|
||||
uint32_t line_loop_closing_index, xenos::Endian index_endian,
|
||||
const draw_util::ViewportInfo& viewport_info, uint32_t pixel_size_x,
|
||||
uint32_t pixel_size_y, uint32_t used_texture_mask, bool early_z,
|
||||
uint32_t color_mask,
|
||||
uint32_t pixel_size_y, uint32_t used_texture_mask, uint32_t color_mask,
|
||||
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
|
||||
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
@ -2992,14 +2985,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
flags |= DxbcShaderTranslator::kSysFlag_KillIfAnyVertexKilled;
|
||||
}
|
||||
// Alpha test.
|
||||
if (rb_colorcontrol.alpha_test_enable) {
|
||||
flags |= uint32_t(rb_colorcontrol.alpha_func)
|
||||
<< DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift;
|
||||
} else {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_AlphaPassIfLess |
|
||||
DxbcShaderTranslator::kSysFlag_AlphaPassIfEqual |
|
||||
DxbcShaderTranslator::kSysFlag_AlphaPassIfGreater;
|
||||
}
|
||||
xenos::CompareFunction alpha_test_function =
|
||||
rb_colorcontrol.alpha_test_enable ? rb_colorcontrol.alpha_func
|
||||
: xenos::CompareFunction::kAlways;
|
||||
flags |= uint32_t(alpha_test_function)
|
||||
<< DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift;
|
||||
// Gamma writing.
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (color_infos[i].color_format ==
|
||||
|
@ -3028,7 +3018,9 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
if (rb_depthcontrol.stencil_enable) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_ROVStencilTest;
|
||||
}
|
||||
if (early_z) {
|
||||
// Hint - if not applicable to the shader, will not have effect.
|
||||
if (alpha_test_function == xenos::CompareFunction::kAlways &&
|
||||
!rb_colorcontrol.alpha_to_mask_enable) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencilEarlyWrite;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "xenia/gpu/d3d12/render_target_cache.h"
|
||||
#include "xenia/gpu/d3d12/texture_cache.h"
|
||||
#include "xenia/gpu/draw_util.h"
|
||||
#include "xenia/gpu/dxbc_shader.h"
|
||||
#include "xenia/gpu/dxbc_shader_translator.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/kernel/kernel_state.h"
|
||||
|
@ -47,7 +48,7 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
|
||||
void ClearCaches() override;
|
||||
|
||||
void InitializeShaderStorage(const std::filesystem::path& storage_root,
|
||||
void InitializeShaderStorage(const std::filesystem::path& cache_root,
|
||||
uint32_t title_id, bool blocking) override;
|
||||
|
||||
void RequestFrameTrace(const std::filesystem::path& root_path) override;
|
||||
|
@ -88,7 +89,7 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// there are 4 render targets bound with the same EDRAM base (clearly not
|
||||
// correct usage), but the shader only clears 1, and then EDRAM buffer stores
|
||||
// conflict with each other.
|
||||
uint32_t GetCurrentColorMask(const D3D12Shader* pixel_shader) const;
|
||||
uint32_t GetCurrentColorMask(const Shader* pixel_shader) const;
|
||||
|
||||
void PushTransitionBarrier(
|
||||
ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state,
|
||||
|
@ -100,8 +101,9 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
void SubmitBarriers();
|
||||
|
||||
// Finds or creates root signature for a pipeline.
|
||||
ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader,
|
||||
const D3D12Shader* pixel_shader);
|
||||
ID3D12RootSignature* GetRootSignature(const DxbcShader* vertex_shader,
|
||||
const DxbcShader* pixel_shader,
|
||||
bool tessellated);
|
||||
|
||||
ui::d3d12::D3D12UploadBufferPool& GetConstantBufferPool() const {
|
||||
return *constant_buffer_pool_;
|
||||
|
@ -300,7 +302,7 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// Gets the indices of optional root parameters. Returns the total parameter
|
||||
// count.
|
||||
static uint32_t GetRootBindfulExtraParameterIndices(
|
||||
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader,
|
||||
const DxbcShader* vertex_shader, const DxbcShader* pixel_shader,
|
||||
RootBindfulExtraParameterIndices& indices_out);
|
||||
|
||||
// BeginSubmission and EndSubmission may be called at any time. If there's an
|
||||
|
@ -353,8 +355,7 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
bool shared_memory_is_uav, bool primitive_polygonal,
|
||||
uint32_t line_loop_closing_index, xenos::Endian index_endian,
|
||||
const draw_util::ViewportInfo& viewport_info, uint32_t pixel_size_x,
|
||||
uint32_t pixel_size_y, uint32_t used_texture_mask, bool early_z,
|
||||
uint32_t color_mask,
|
||||
uint32_t pixel_size_y, uint32_t used_texture_mask, uint32_t color_mask,
|
||||
const RenderTargetCache::PipelineRenderTarget render_targets[4]);
|
||||
bool UpdateBindings(const D3D12Shader* vertex_shader,
|
||||
const D3D12Shader* pixel_shader,
|
||||
|
|
|
@ -10,9 +10,11 @@
|
|||
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <utility>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/gpu/dxbc_shader.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||
|
||||
|
@ -22,51 +24,13 @@ namespace d3d12 {
|
|||
|
||||
D3D12Shader::D3D12Shader(xenos::ShaderType shader_type, uint64_t data_hash,
|
||||
const uint32_t* dword_ptr, uint32_t dword_count)
|
||||
: Shader(shader_type, data_hash, dword_ptr, dword_count) {}
|
||||
: DxbcShader(shader_type, data_hash, dword_ptr, dword_count) {}
|
||||
|
||||
void D3D12Shader::SetTexturesAndSamplers(
|
||||
const DxbcShaderTranslator::TextureBinding* texture_bindings,
|
||||
uint32_t texture_binding_count,
|
||||
const DxbcShaderTranslator::SamplerBinding* sampler_bindings,
|
||||
uint32_t sampler_binding_count) {
|
||||
texture_bindings_.clear();
|
||||
texture_bindings_.reserve(texture_binding_count);
|
||||
used_texture_mask_ = 0;
|
||||
for (uint32_t i = 0; i < texture_binding_count; ++i) {
|
||||
TextureBinding& binding = texture_bindings_.emplace_back();
|
||||
// For a stable hash.
|
||||
std::memset(&binding, 0, sizeof(binding));
|
||||
const DxbcShaderTranslator::TextureBinding& translator_binding =
|
||||
texture_bindings[i];
|
||||
binding.bindless_descriptor_index =
|
||||
translator_binding.bindless_descriptor_index;
|
||||
binding.fetch_constant = translator_binding.fetch_constant;
|
||||
binding.dimension = translator_binding.dimension;
|
||||
binding.is_signed = translator_binding.is_signed;
|
||||
used_texture_mask_ |= 1u << translator_binding.fetch_constant;
|
||||
}
|
||||
sampler_bindings_.clear();
|
||||
sampler_bindings_.reserve(sampler_binding_count);
|
||||
for (uint32_t i = 0; i < sampler_binding_count; ++i) {
|
||||
SamplerBinding binding;
|
||||
const DxbcShaderTranslator::SamplerBinding& translator_binding =
|
||||
sampler_bindings[i];
|
||||
binding.bindless_descriptor_index =
|
||||
translator_binding.bindless_descriptor_index;
|
||||
binding.fetch_constant = translator_binding.fetch_constant;
|
||||
binding.mag_filter = translator_binding.mag_filter;
|
||||
binding.min_filter = translator_binding.min_filter;
|
||||
binding.mip_filter = translator_binding.mip_filter;
|
||||
binding.aniso_filter = translator_binding.aniso_filter;
|
||||
sampler_bindings_.push_back(binding);
|
||||
}
|
||||
}
|
||||
|
||||
void D3D12Shader::DisassembleDxbc(const ui::d3d12::D3D12Provider& provider,
|
||||
bool disassemble_dxbc,
|
||||
IDxbcConverter* dxbc_converter,
|
||||
IDxcUtils* dxc_utils,
|
||||
IDxcCompiler* dxc_compiler) {
|
||||
void D3D12Shader::D3D12Translation::DisassembleDxbcAndDxil(
|
||||
const ui::d3d12::D3D12Provider& provider, bool disassemble_dxbc,
|
||||
IDxbcConverter* dxbc_converter, IDxcUtils* dxc_utils,
|
||||
IDxcCompiler* dxc_compiler) {
|
||||
std::string disassembly;
|
||||
bool is_first_disassembly = true;
|
||||
if (disassemble_dxbc) {
|
||||
ID3DBlob* dxbc_disassembly;
|
||||
|
@ -77,11 +41,12 @@ void D3D12Shader::DisassembleDxbc(const ui::d3d12::D3D12Provider& provider,
|
|||
nullptr, &dxbc_disassembly))) {
|
||||
assert_true(is_first_disassembly);
|
||||
is_first_disassembly = false;
|
||||
host_disassembly_.append(
|
||||
disassembly.append(
|
||||
reinterpret_cast<const char*>(dxbc_disassembly->GetBufferPointer()));
|
||||
dxbc_disassembly->Release();
|
||||
} else {
|
||||
XELOGE("Failed to disassemble DXBC shader {:016X}", ucode_data_hash());
|
||||
XELOGE("Failed to disassemble DXBC shader {:016X}",
|
||||
shader().ucode_data_hash());
|
||||
}
|
||||
}
|
||||
if (dxbc_converter && dxc_utils && dxc_compiler) {
|
||||
|
@ -106,29 +71,36 @@ void D3D12Shader::DisassembleDxbc(const ui::d3d12::D3D12Provider& provider,
|
|||
dxil_disassembly->Release();
|
||||
if (dxil_disassembly_got_utf8) {
|
||||
if (!is_first_disassembly) {
|
||||
host_disassembly_.append("\n\n");
|
||||
disassembly.append("\n\n");
|
||||
}
|
||||
is_first_disassembly = false;
|
||||
host_disassembly_.append(reinterpret_cast<const char*>(
|
||||
disassembly.append(reinterpret_cast<const char*>(
|
||||
dxil_disassembly_utf8->GetStringPointer()));
|
||||
dxil_disassembly_utf8->Release();
|
||||
} else {
|
||||
XELOGE("Failed to get DXIL shader {:016X} disassembly as UTF-8",
|
||||
ucode_data_hash());
|
||||
shader().ucode_data_hash());
|
||||
}
|
||||
} else {
|
||||
XELOGE("Failed to disassemble DXIL shader {:016X}",
|
||||
ucode_data_hash());
|
||||
shader().ucode_data_hash());
|
||||
}
|
||||
} else {
|
||||
XELOGE("Failed to create a blob with DXIL shader {:016X}",
|
||||
ucode_data_hash());
|
||||
shader().ucode_data_hash());
|
||||
CoTaskMemFree(dxil);
|
||||
}
|
||||
} else {
|
||||
XELOGE("Failed to convert shader {:016X} to DXIL", ucode_data_hash());
|
||||
XELOGE("Failed to convert shader {:016X} to DXIL",
|
||||
shader().ucode_data_hash());
|
||||
}
|
||||
}
|
||||
set_host_disassembly(std::move(disassembly));
|
||||
}
|
||||
|
||||
Shader::Translation* D3D12Shader::CreateTranslationInstance(
|
||||
uint32_t modification) {
|
||||
return new D3D12Translation(*this, modification);
|
||||
}
|
||||
|
||||
} // namespace d3d12
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2018 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -10,106 +10,62 @@
|
|||
#ifndef XENIA_GPU_D3D12_D3D12_SHADER_H_
|
||||
#define XENIA_GPU_D3D12_D3D12_SHADER_H_
|
||||
|
||||
#include <vector>
|
||||
#include <atomic>
|
||||
|
||||
#include "xenia/gpu/dxbc_shader_translator.h"
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/gpu/dxbc_shader.h"
|
||||
#include "xenia/ui/d3d12/d3d12_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d12 {
|
||||
|
||||
class D3D12Shader : public Shader {
|
||||
class D3D12Shader : public DxbcShader {
|
||||
public:
|
||||
class D3D12Translation : public DxbcTranslation {
|
||||
public:
|
||||
D3D12Translation(D3D12Shader& shader, uint32_t modification)
|
||||
: DxbcTranslation(shader, modification) {}
|
||||
|
||||
void DisassembleDxbcAndDxil(const ui::d3d12::D3D12Provider& provider,
|
||||
bool disassemble_dxbc,
|
||||
IDxbcConverter* dxbc_converter = nullptr,
|
||||
IDxcUtils* dxc_utils = nullptr,
|
||||
IDxcCompiler* dxc_compiler = nullptr);
|
||||
};
|
||||
|
||||
D3D12Shader(xenos::ShaderType shader_type, uint64_t data_hash,
|
||||
const uint32_t* dword_ptr, uint32_t dword_count);
|
||||
|
||||
void SetTexturesAndSamplers(
|
||||
const DxbcShaderTranslator::TextureBinding* texture_bindings,
|
||||
uint32_t texture_binding_count,
|
||||
const DxbcShaderTranslator::SamplerBinding* sampler_bindings,
|
||||
uint32_t sampler_binding_count);
|
||||
|
||||
void SetForcedEarlyZShaderObject(const std::vector<uint8_t>& shader_object) {
|
||||
forced_early_z_shader_ = shader_object;
|
||||
}
|
||||
// Returns the shader with forced early depth/stencil set with
|
||||
// SetForcedEarlyZShader after translation. If there's none (for example,
|
||||
// if the shader discards pixels or writes to the depth buffer), an empty
|
||||
// vector is returned.
|
||||
const std::vector<uint8_t>& GetForcedEarlyZShaderObject() const {
|
||||
return forced_early_z_shader_;
|
||||
}
|
||||
|
||||
void DisassembleDxbc(const ui::d3d12::D3D12Provider& provider,
|
||||
bool disassemble_dxbc,
|
||||
IDxbcConverter* dxbc_converter = nullptr,
|
||||
IDxcUtils* dxc_utils = nullptr,
|
||||
IDxcCompiler* dxc_compiler = nullptr);
|
||||
|
||||
static constexpr uint32_t kMaxTextureBindingIndexBits =
|
||||
DxbcShaderTranslator::kMaxTextureBindingIndexBits;
|
||||
static constexpr uint32_t kMaxTextureBindings =
|
||||
DxbcShaderTranslator::kMaxTextureBindings;
|
||||
struct TextureBinding {
|
||||
uint32_t bindless_descriptor_index;
|
||||
uint32_t fetch_constant;
|
||||
// Stacked and 3D are separate TextureBindings, even for bindless for null
|
||||
// descriptor handling simplicity.
|
||||
xenos::FetchOpDimension dimension;
|
||||
bool is_signed;
|
||||
};
|
||||
// Safe to hash and compare with memcmp for layout hashing.
|
||||
const TextureBinding* GetTextureBindings(uint32_t& count_out) const {
|
||||
count_out = uint32_t(texture_bindings_.size());
|
||||
return texture_bindings_.data();
|
||||
}
|
||||
const uint32_t GetUsedTextureMask() const { return used_texture_mask_; }
|
||||
|
||||
static constexpr uint32_t kMaxSamplerBindingIndexBits =
|
||||
DxbcShaderTranslator::kMaxSamplerBindingIndexBits;
|
||||
static constexpr uint32_t kMaxSamplerBindings =
|
||||
DxbcShaderTranslator::kMaxSamplerBindings;
|
||||
struct SamplerBinding {
|
||||
uint32_t bindless_descriptor_index;
|
||||
uint32_t fetch_constant;
|
||||
xenos::TextureFilter mag_filter;
|
||||
xenos::TextureFilter min_filter;
|
||||
xenos::TextureFilter mip_filter;
|
||||
xenos::AnisoFilter aniso_filter;
|
||||
};
|
||||
const SamplerBinding* GetSamplerBindings(uint32_t& count_out) const {
|
||||
count_out = uint32_t(sampler_bindings_.size());
|
||||
return sampler_bindings_.data();
|
||||
}
|
||||
|
||||
// For owning subsystems like the pipeline cache, accessors for unique
|
||||
// For owning subsystem like the pipeline cache, accessors for unique
|
||||
// identifiers (used instead of hashes to make sure collisions can't happen)
|
||||
// of binding layouts used by the shader, for invalidation if a shader with an
|
||||
// incompatible layout was bound.
|
||||
size_t GetTextureBindingLayoutUserUID() const {
|
||||
return texture_binding_layout_user_uid_;
|
||||
}
|
||||
void SetTextureBindingLayoutUserUID(size_t uid) {
|
||||
texture_binding_layout_user_uid_ = uid;
|
||||
}
|
||||
size_t GetSamplerBindingLayoutUserUID() const {
|
||||
return sampler_binding_layout_user_uid_;
|
||||
}
|
||||
// Modifications of the same shader can be translated on different threads.
|
||||
// The "set" function must only be called if "enter" returned true - these are
|
||||
// set up only once.
|
||||
bool EnterBindingLayoutUserUIDSetup() {
|
||||
return !binding_layout_user_uids_set_up_.test_and_set();
|
||||
}
|
||||
void SetTextureBindingLayoutUserUID(size_t uid) {
|
||||
texture_binding_layout_user_uid_ = uid;
|
||||
}
|
||||
void SetSamplerBindingLayoutUserUID(size_t uid) {
|
||||
sampler_binding_layout_user_uid_ = uid;
|
||||
}
|
||||
|
||||
protected:
|
||||
Translation* CreateTranslationInstance(uint32_t modification) override;
|
||||
|
||||
private:
|
||||
std::vector<TextureBinding> texture_bindings_;
|
||||
std::vector<SamplerBinding> sampler_bindings_;
|
||||
std::atomic_flag binding_layout_user_uids_set_up_ = ATOMIC_FLAG_INIT;
|
||||
size_t texture_binding_layout_user_uid_ = 0;
|
||||
size_t sampler_binding_layout_user_uid_ = 0;
|
||||
uint32_t used_texture_mask_ = 0;
|
||||
|
||||
std::vector<uint8_t> forced_early_z_shader_;
|
||||
};
|
||||
|
||||
} // namespace d3d12
|
||||
|
|
|
@ -221,7 +221,9 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
|
|||
*reinterpret_cast<const D3DSetSamplePositionsArguments*>(stream);
|
||||
command_list_1->SetSamplePositions(
|
||||
args.num_samples_per_pixel, args.num_pixels,
|
||||
const_cast<D3D12_SAMPLE_POSITION*>(args.sample_positions));
|
||||
(args.num_samples_per_pixel && args.num_pixels)
|
||||
? const_cast<D3D12_SAMPLE_POSITION*>(args.sample_positions)
|
||||
: nullptr);
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -27,6 +27,7 @@
|
|||
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||
#include "xenia/gpu/d3d12/render_target_cache.h"
|
||||
#include "xenia/gpu/dxbc_shader_translator.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||
|
@ -43,36 +44,39 @@ class PipelineCache {
|
|||
|
||||
PipelineCache(D3D12CommandProcessor& command_processor,
|
||||
const RegisterFile& register_file, bool bindless_resources_used,
|
||||
bool edram_rov_used, uint32_t resolution_scale);
|
||||
bool edram_rov_used,
|
||||
flags::DepthFloat24Conversion depth_float24_conversion,
|
||||
uint32_t resolution_scale);
|
||||
~PipelineCache();
|
||||
|
||||
bool Initialize();
|
||||
void Shutdown();
|
||||
void ClearCache(bool shutting_down = false);
|
||||
|
||||
void InitializeShaderStorage(const std::filesystem::path& storage_root,
|
||||
void InitializeShaderStorage(const std::filesystem::path& cache_root,
|
||||
uint32_t title_id, bool blocking);
|
||||
void ShutdownShaderStorage();
|
||||
|
||||
void EndSubmission();
|
||||
bool IsCreatingPipelines();
|
||||
|
||||
D3D12Shader* LoadShader(xenos::ShaderType shader_type, uint32_t guest_address,
|
||||
D3D12Shader* LoadShader(xenos::ShaderType shader_type,
|
||||
const uint32_t* host_address, uint32_t dword_count);
|
||||
|
||||
// Returns the host vertex shader type for the current draw if it's valid and
|
||||
// supported, or Shader::HostVertexShaderType(-1) if not.
|
||||
Shader::HostVertexShaderType GetHostVertexShaderTypeIfValid() const;
|
||||
// Retrieves the shader modifications for the current state, and returns
|
||||
// whether they are valid.
|
||||
bool GetCurrentShaderModifications(
|
||||
DxbcShaderTranslator::Modification& vertex_shader_modification_out,
|
||||
DxbcShaderTranslator::Modification& pixel_shader_modification_out) const;
|
||||
|
||||
// Translates shaders if needed, also making shader info up to date.
|
||||
bool EnsureShadersTranslated(
|
||||
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type);
|
||||
bool EnsureShadersTranslated(D3D12Shader::D3D12Translation* vertex_shader,
|
||||
D3D12Shader::D3D12Translation* pixel_shader);
|
||||
|
||||
bool ConfigurePipeline(
|
||||
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||
D3D12Shader::D3D12Translation* vertex_shader,
|
||||
D3D12Shader::D3D12Translation* pixel_shader,
|
||||
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
|
||||
bool early_z,
|
||||
const RenderTargetCache::PipelineRenderTarget render_targets[5],
|
||||
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out);
|
||||
|
||||
|
@ -86,13 +90,12 @@ class PipelineCache {
|
|||
XEPACKEDSTRUCT(ShaderStoredHeader, {
|
||||
uint64_t ucode_data_hash;
|
||||
|
||||
uint32_t ucode_dword_count : 16;
|
||||
uint32_t ucode_dword_count : 31;
|
||||
xenos::ShaderType type : 1;
|
||||
Shader::HostVertexShaderType host_vertex_shader_type : 3;
|
||||
|
||||
reg::SQ_PROGRAM_CNTL sq_program_cntl;
|
||||
|
||||
static constexpr uint32_t kVersion = 0x20200405;
|
||||
static constexpr uint32_t kVersion = 0x20201207;
|
||||
});
|
||||
|
||||
// Update PipelineDescription::kVersion if any of the Pipeline* enums are
|
||||
|
@ -170,28 +173,28 @@ class PipelineCache {
|
|||
uint64_t vertex_shader_hash;
|
||||
// 0 if drawing without a pixel shader.
|
||||
uint64_t pixel_shader_hash;
|
||||
uint32_t vertex_shader_modification;
|
||||
uint32_t pixel_shader_modification;
|
||||
|
||||
int32_t depth_bias;
|
||||
float depth_bias_slope_scaled;
|
||||
|
||||
PipelineStripCutIndex strip_cut_index : 2; // 2
|
||||
Shader::HostVertexShaderType host_vertex_shader_type : 3; // 5
|
||||
PipelineStripCutIndex strip_cut_index : 2; // 2
|
||||
// PipelinePrimitiveTopologyType for a vertex shader.
|
||||
// xenos::TessellationMode for a domain shader.
|
||||
uint32_t primitive_topology_type_or_tessellation_mode : 2; // 7
|
||||
uint32_t primitive_topology_type_or_tessellation_mode : 2; // 4
|
||||
// Zero for non-kVertex host_vertex_shader_type.
|
||||
PipelineGeometryShader geometry_shader : 2; // 9
|
||||
uint32_t fill_mode_wireframe : 1; // 10
|
||||
PipelineCullMode cull_mode : 2; // 12
|
||||
uint32_t front_counter_clockwise : 1; // 13
|
||||
uint32_t depth_clip : 1; // 14
|
||||
uint32_t rov_msaa : 1; // 15
|
||||
xenos::DepthRenderTargetFormat depth_format : 1; // 16
|
||||
xenos::CompareFunction depth_func : 3; // 19
|
||||
uint32_t depth_write : 1; // 20
|
||||
uint32_t stencil_enable : 1; // 21
|
||||
uint32_t stencil_read_mask : 8; // 29
|
||||
uint32_t force_early_z : 1; // 30
|
||||
PipelineGeometryShader geometry_shader : 2; // 6
|
||||
uint32_t fill_mode_wireframe : 1; // 7
|
||||
PipelineCullMode cull_mode : 2; // 9
|
||||
uint32_t front_counter_clockwise : 1; // 10
|
||||
uint32_t depth_clip : 1; // 11
|
||||
uint32_t rov_msaa : 1; // 12
|
||||
xenos::DepthRenderTargetFormat depth_format : 1; // 13
|
||||
xenos::CompareFunction depth_func : 3; // 16
|
||||
uint32_t depth_write : 1; // 17
|
||||
uint32_t stencil_enable : 1; // 18
|
||||
uint32_t stencil_read_mask : 8; // 26
|
||||
|
||||
uint32_t stencil_write_mask : 8; // 8
|
||||
xenos::StencilOp stencil_front_fail_op : 3; // 11
|
||||
|
@ -205,7 +208,7 @@ class PipelineCache {
|
|||
|
||||
PipelineRenderTarget render_targets[4];
|
||||
|
||||
static constexpr uint32_t kVersion = 0x20200405;
|
||||
static constexpr uint32_t kVersion = 0x20201207;
|
||||
});
|
||||
|
||||
XEPACKEDSTRUCT(PipelineStoredDescription, {
|
||||
|
@ -215,24 +218,31 @@ class PipelineCache {
|
|||
|
||||
struct PipelineRuntimeDescription {
|
||||
ID3D12RootSignature* root_signature;
|
||||
D3D12Shader* vertex_shader;
|
||||
D3D12Shader* pixel_shader;
|
||||
D3D12Shader::D3D12Translation* vertex_shader;
|
||||
D3D12Shader::D3D12Translation* pixel_shader;
|
||||
PipelineDescription description;
|
||||
};
|
||||
|
||||
// Returns the host vertex shader type for the current draw if it's valid and
|
||||
// supported, or Shader::HostVertexShaderType(-1) if not.
|
||||
Shader::HostVertexShaderType GetCurrentHostVertexShaderTypeIfValid() const;
|
||||
|
||||
D3D12Shader* LoadShader(xenos::ShaderType shader_type,
|
||||
const uint32_t* host_address, uint32_t dword_count,
|
||||
uint64_t data_hash);
|
||||
|
||||
// Can be called from multiple threads.
|
||||
bool TranslateShader(DxbcShaderTranslator& translator, D3D12Shader& shader,
|
||||
bool TranslateShader(DxbcShaderTranslator& translator,
|
||||
D3D12Shader::D3D12Translation& translation,
|
||||
reg::SQ_PROGRAM_CNTL cntl,
|
||||
IDxbcConverter* dxbc_converter = nullptr,
|
||||
IDxcUtils* dxc_utils = nullptr,
|
||||
IDxcCompiler* dxc_compiler = nullptr,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
Shader::HostVertexShaderType::kVertex);
|
||||
IDxcCompiler* dxc_compiler = nullptr);
|
||||
|
||||
bool GetCurrentStateDescription(
|
||||
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||
D3D12Shader::D3D12Translation* vertex_shader,
|
||||
D3D12Shader::D3D12Translation* pixel_shader,
|
||||
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
|
||||
bool early_z,
|
||||
const RenderTargetCache::PipelineRenderTarget render_targets[5],
|
||||
PipelineRuntimeDescription& runtime_description_out);
|
||||
|
||||
|
@ -243,6 +253,8 @@ class PipelineCache {
|
|||
const RegisterFile& register_file_;
|
||||
bool bindless_resources_used_;
|
||||
bool edram_rov_used_;
|
||||
// 20e4 depth conversion mode to use for non-ROV output.
|
||||
flags::DepthFloat24Conversion depth_float24_conversion_;
|
||||
uint32_t resolution_scale_;
|
||||
|
||||
// Reusable shader translator.
|
||||
|
@ -267,7 +279,7 @@ class PipelineCache {
|
|||
// Texture binding layouts of different shaders, for obtaining layout UIDs.
|
||||
std::vector<D3D12Shader::TextureBinding> texture_binding_layouts_;
|
||||
// Map of texture binding layouts used by shaders, for obtaining UIDs. Keys
|
||||
// are XXH64 hashes of layouts, values need manual collision resolution using
|
||||
// are XXH3 hashes of layouts, values need manual collision resolution using
|
||||
// layout_vector_offset:layout_length of texture_binding_layouts_.
|
||||
std::unordered_multimap<uint64_t, LayoutUID,
|
||||
xe::hash::IdentityHasher<uint64_t>>
|
||||
|
@ -275,7 +287,7 @@ class PipelineCache {
|
|||
// Bindless sampler indices of different shaders, for obtaining layout UIDs.
|
||||
// For bindful, sampler count is used as the UID instead.
|
||||
std::vector<uint32_t> bindless_sampler_layouts_;
|
||||
// Keys are XXH64 hashes of used bindless sampler indices.
|
||||
// Keys are XXH3 hashes of used bindless sampler indices.
|
||||
std::unordered_multimap<uint64_t, LayoutUID,
|
||||
xe::hash::IdentityHasher<uint64_t>>
|
||||
bindless_sampler_layout_map_;
|
||||
|
@ -300,11 +312,14 @@ class PipelineCache {
|
|||
Pipeline* current_pipeline_ = nullptr;
|
||||
|
||||
// Currently open shader storage path.
|
||||
std::filesystem::path shader_storage_root_;
|
||||
std::filesystem::path shader_storage_cache_root_;
|
||||
uint32_t shader_storage_title_id_ = 0;
|
||||
|
||||
// Shader storage output stream, for preload in the next emulator runs.
|
||||
FILE* shader_storage_file_ = nullptr;
|
||||
// For only writing shaders to the currently open storage once, incremented
|
||||
// when switching the storage.
|
||||
uint32_t shader_storage_index_ = 0;
|
||||
bool shader_storage_file_flush_needed_ = false;
|
||||
|
||||
// Pipeline storage output stream, for preload in the next emulator runs.
|
||||
|
|
|
@ -40,11 +40,13 @@ namespace d3d12 {
|
|||
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_32bpp_cs.h"
|
||||
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_64bpp_cs.h"
|
||||
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_7e3_cs.h"
|
||||
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float24and32_cs.h"
|
||||
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float_cs.h"
|
||||
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_unorm_cs.h"
|
||||
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_32bpp_cs.h"
|
||||
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_64bpp_cs.h"
|
||||
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_7e3_cs.h"
|
||||
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float24and32_cs.h"
|
||||
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float_cs.h"
|
||||
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_unorm_cs.h"
|
||||
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_2xres_cs.h"
|
||||
|
@ -87,6 +89,12 @@ const RenderTargetCache::EdramLoadStoreModeInfo
|
|||
{edram_load_depth_float_cs, sizeof(edram_load_depth_float_cs),
|
||||
L"EDRAM Load Float Depth", edram_store_depth_float_cs,
|
||||
sizeof(edram_store_depth_float_cs), L"EDRAM Store Float Depth"},
|
||||
{edram_load_depth_float24and32_cs,
|
||||
sizeof(edram_load_depth_float24and32_cs),
|
||||
L"EDRAM Load 24-bit & 32-bit Float Depth",
|
||||
edram_store_depth_float24and32_cs,
|
||||
sizeof(edram_store_depth_float24and32_cs),
|
||||
L"EDRAM Store 24-bit & 32-bit Float Depth"},
|
||||
};
|
||||
|
||||
const std::pair<const uint8_t*, size_t>
|
||||
|
@ -126,6 +134,8 @@ RenderTargetCache::RenderTargetCache(D3D12CommandProcessor& command_processor,
|
|||
RenderTargetCache::~RenderTargetCache() { Shutdown(); }
|
||||
|
||||
bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
|
||||
depth_float24_conversion_ = flags::GetDepthFloat24Conversion();
|
||||
|
||||
// EDRAM buffer size depends on this.
|
||||
resolution_scale_2x_ = texture_cache.IsResolutionScale2X();
|
||||
assert_false(resolution_scale_2x_ && !edram_rov_used_);
|
||||
|
@ -420,7 +430,8 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
|
|||
return false;
|
||||
}
|
||||
resolve_clear_64bpp_pipeline_->SetName(L"Resolve Clear 64bpp");
|
||||
if (!edram_rov_used_) {
|
||||
if (!edram_rov_used_ &&
|
||||
depth_float24_conversion_ == flags::DepthFloat24Conversion::kOnCopy) {
|
||||
assert_false(resolution_scale_2x_);
|
||||
resolve_clear_depth_24_32_pipeline_ =
|
||||
ui::d3d12::util::CreateComputePipeline(
|
||||
|
@ -434,7 +445,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
|
|||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
resolve_clear_64bpp_pipeline_->SetName(
|
||||
resolve_clear_depth_24_32_pipeline_->SetName(
|
||||
L"Resolve Clear 24-bit & 32-bit Depth");
|
||||
}
|
||||
|
||||
|
@ -1266,10 +1277,12 @@ bool RenderTargetCache::Resolve(const Memory& memory,
|
|||
if (clear_depth) {
|
||||
// Also clear the host 32-bit floating-point depth used for loaing and
|
||||
// storing 24-bit floating-point depth at full precision.
|
||||
bool clear_float32_depth =
|
||||
!edram_rov_used_ && xenos::DepthRenderTargetFormat(
|
||||
resolve_info.depth_edram_info.format) ==
|
||||
xenos::DepthRenderTargetFormat::kD24FS8;
|
||||
bool clear_float32_depth = !edram_rov_used_ &&
|
||||
depth_float24_conversion_ ==
|
||||
flags::DepthFloat24Conversion::kOnCopy &&
|
||||
xenos::DepthRenderTargetFormat(
|
||||
resolve_info.depth_edram_info.format) ==
|
||||
xenos::DepthRenderTargetFormat::kD24FS8;
|
||||
draw_util::ResolveClearShaderConstants depth_clear_constants;
|
||||
resolve_info.GetDepthClearShaderConstants(clear_float32_depth,
|
||||
depth_clear_constants);
|
||||
|
@ -1558,7 +1571,8 @@ void RenderTargetCache::RestoreEdramSnapshot(const void* snapshot) {
|
|||
|
||||
uint32_t RenderTargetCache::GetEdramBufferSize() const {
|
||||
uint32_t size = xenos::kEdramSizeBytes;
|
||||
if (!edram_rov_used_) {
|
||||
if (!edram_rov_used_ &&
|
||||
depth_float24_conversion_ == flags::DepthFloat24Conversion::kOnCopy) {
|
||||
// Two 10 MB pages, one containing color and integer depth data, another
|
||||
// with 32-bit float depth when 20e4 depth is used to allow for multipass
|
||||
// drawing without precision loss in case of EDRAM store/load.
|
||||
|
@ -1831,12 +1845,15 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
|
|||
}
|
||||
|
||||
RenderTargetCache::EdramLoadStoreMode RenderTargetCache::GetLoadStoreMode(
|
||||
bool is_depth, uint32_t format) {
|
||||
bool is_depth, uint32_t format) const {
|
||||
if (is_depth) {
|
||||
return xenos::DepthRenderTargetFormat(format) ==
|
||||
xenos::DepthRenderTargetFormat::kD24FS8
|
||||
? EdramLoadStoreMode::kDepthFloat
|
||||
: EdramLoadStoreMode::kDepthUnorm;
|
||||
if (xenos::DepthRenderTargetFormat(format) ==
|
||||
xenos::DepthRenderTargetFormat::kD24FS8) {
|
||||
return depth_float24_conversion_ == flags::DepthFloat24Conversion::kOnCopy
|
||||
? EdramLoadStoreMode::kDepthFloat24And32
|
||||
: EdramLoadStoreMode::kDepthFloat;
|
||||
}
|
||||
return EdramLoadStoreMode::kDepthUnorm;
|
||||
}
|
||||
xenos::ColorRenderTargetFormat color_format =
|
||||
xenos::ColorRenderTargetFormat(format);
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "xenia/gpu/d3d12/d3d12_shared_memory.h"
|
||||
#include "xenia/gpu/d3d12/texture_cache.h"
|
||||
#include "xenia/gpu/draw_util.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/trace_writer.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
@ -259,6 +260,10 @@ class RenderTargetCache {
|
|||
void Shutdown();
|
||||
void ClearCache();
|
||||
|
||||
flags::DepthFloat24Conversion depth_float24_conversion() const {
|
||||
return depth_float24_conversion_;
|
||||
}
|
||||
|
||||
void CompletedSubmissionUpdated();
|
||||
void BeginSubmission();
|
||||
void EndFrame();
|
||||
|
@ -318,6 +323,7 @@ class RenderTargetCache {
|
|||
kColor7e3,
|
||||
kDepthUnorm,
|
||||
kDepthFloat,
|
||||
kDepthFloat24And32,
|
||||
|
||||
kCount
|
||||
};
|
||||
|
@ -424,7 +430,7 @@ class RenderTargetCache {
|
|||
uint32_t instance);
|
||||
#endif
|
||||
|
||||
static EdramLoadStoreMode GetLoadStoreMode(bool is_depth, uint32_t format);
|
||||
EdramLoadStoreMode GetLoadStoreMode(bool is_depth, uint32_t format) const;
|
||||
|
||||
// Must be in a frame to call. Stores the dirty areas of the currently bound
|
||||
// render targets and marks them as clean.
|
||||
|
@ -442,6 +448,9 @@ class RenderTargetCache {
|
|||
bool bindless_resources_used_;
|
||||
bool edram_rov_used_;
|
||||
|
||||
// 20e4 depth conversion mode to use for non-ROV output.
|
||||
flags::DepthFloat24Conversion depth_float24_conversion_;
|
||||
|
||||
// Whether 1 guest pixel is rendered as 2x2 host pixels (currently only
|
||||
// supported with ROV).
|
||||
bool resolution_scale_2x_ = false;
|
||||
|
|
|
@ -9,8 +9,6 @@
|
|||
|
||||
#include "xenia/gpu/d3d12/texture_cache.h"
|
||||
|
||||
#include "third_party/xxhash/xxhash.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cfloat>
|
||||
#include <cstring>
|
||||
|
@ -21,6 +19,7 @@
|
|||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/base/xxhash.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/texture_info.h"
|
||||
|
|
|
@ -114,6 +114,7 @@ int32_t FloatToD3D11Fixed16p8(float f32) {
|
|||
void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x,
|
||||
float pixel_size_y, bool origin_bottom_left,
|
||||
float x_max, float y_max, bool allow_reverse_z,
|
||||
bool convert_z_to_float24,
|
||||
ViewportInfo& viewport_info_out) {
|
||||
assert_true(pixel_size_x >= 1.0f);
|
||||
assert_true(pixel_size_y >= 1.0f);
|
||||
|
@ -227,6 +228,7 @@ void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x,
|
|||
ndc_offset_y = 0.0f;
|
||||
}
|
||||
} else {
|
||||
viewport_top = 0.0f;
|
||||
viewport_height = std::min(
|
||||
float(xenos::kTexture2DCubeMaxWidthHeight) * pixel_size_y, y_max);
|
||||
ndc_scale_y = (2.0f * pixel_size_y) / viewport_height;
|
||||
|
@ -269,6 +271,17 @@ void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x,
|
|||
ndc_scale_z = -ndc_scale_z;
|
||||
ndc_offset_z = 1.0f - ndc_offset_z;
|
||||
}
|
||||
if (convert_z_to_float24 && regs.Get<reg::RB_DEPTHCONTROL>().z_enable &&
|
||||
regs.Get<reg::RB_DEPTH_INFO>().depth_format ==
|
||||
xenos::DepthRenderTargetFormat::kD24FS8) {
|
||||
// Need to adjust the bounds that the resulting depth values will be clamped
|
||||
// to after the pixel shader. Preferring adding some error to interpolated Z
|
||||
// instead if conversion can't be done exactly, without modifying clipping
|
||||
// bounds by adjusting Z in vertex shaders, as that may cause polygons
|
||||
// placed explicitly at Z = 0 or Z = W to be clipped.
|
||||
viewport_z_min = xenos::Float20e4To32(xenos::Float32To20e4(viewport_z_min));
|
||||
viewport_z_max = xenos::Float20e4To32(xenos::Float32To20e4(viewport_z_max));
|
||||
}
|
||||
|
||||
viewport_info_out.left = viewport_left;
|
||||
viewport_info_out.top = viewport_top;
|
||||
|
|
|
@ -53,6 +53,7 @@ struct ViewportInfo {
|
|||
void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x,
|
||||
float pixel_size_y, bool origin_bottom_left,
|
||||
float x_max, float y_max, bool allow_reverse_z,
|
||||
bool convert_z_to_float24,
|
||||
ViewportInfo& viewport_info_out);
|
||||
|
||||
struct Scissor {
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/dxbc_shader.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
DxbcShader::DxbcShader(xenos::ShaderType shader_type, uint64_t data_hash,
|
||||
const uint32_t* dword_ptr, uint32_t dword_count)
|
||||
: Shader(shader_type, data_hash, dword_ptr, dword_count) {}
|
||||
|
||||
Shader::Translation* DxbcShader::CreateTranslationInstance(
|
||||
uint32_t modification) {
|
||||
return new DxbcTranslation(*this, modification);
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,83 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_DXBC_SHADER_H_
|
||||
#define XENIA_GPU_DXBC_SHADER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/gpu/dxbc_shader_translator.h"
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
class DxbcShader : public Shader {
|
||||
public:
|
||||
class DxbcTranslation : public Translation {
|
||||
public:
|
||||
DxbcTranslation(DxbcShader& shader, uint32_t modification)
|
||||
: Translation(shader, modification) {}
|
||||
};
|
||||
|
||||
DxbcShader(xenos::ShaderType shader_type, uint64_t data_hash,
|
||||
const uint32_t* dword_ptr, uint32_t dword_count);
|
||||
|
||||
static constexpr uint32_t kMaxTextureBindingIndexBits =
|
||||
DxbcShaderTranslator::kMaxTextureBindingIndexBits;
|
||||
static constexpr uint32_t kMaxTextureBindings =
|
||||
DxbcShaderTranslator::kMaxTextureBindings;
|
||||
struct TextureBinding {
|
||||
uint32_t bindless_descriptor_index;
|
||||
uint32_t fetch_constant;
|
||||
// Stacked and 3D are separate TextureBindings, even for bindless for null
|
||||
// descriptor handling simplicity.
|
||||
xenos::FetchOpDimension dimension;
|
||||
bool is_signed;
|
||||
};
|
||||
// Safe to hash and compare with memcmp for layout hashing.
|
||||
const TextureBinding* GetTextureBindings(uint32_t& count_out) const {
|
||||
count_out = uint32_t(texture_bindings_.size());
|
||||
return texture_bindings_.data();
|
||||
}
|
||||
const uint32_t GetUsedTextureMask() const { return used_texture_mask_; }
|
||||
|
||||
static constexpr uint32_t kMaxSamplerBindingIndexBits =
|
||||
DxbcShaderTranslator::kMaxSamplerBindingIndexBits;
|
||||
static constexpr uint32_t kMaxSamplerBindings =
|
||||
DxbcShaderTranslator::kMaxSamplerBindings;
|
||||
struct SamplerBinding {
|
||||
uint32_t bindless_descriptor_index;
|
||||
uint32_t fetch_constant;
|
||||
xenos::TextureFilter mag_filter;
|
||||
xenos::TextureFilter min_filter;
|
||||
xenos::TextureFilter mip_filter;
|
||||
xenos::AnisoFilter aniso_filter;
|
||||
};
|
||||
const SamplerBinding* GetSamplerBindings(uint32_t& count_out) const {
|
||||
count_out = uint32_t(sampler_bindings_.size());
|
||||
return sampler_bindings_.data();
|
||||
}
|
||||
|
||||
protected:
|
||||
Translation* CreateTranslationInstance(uint32_t modification) override;
|
||||
|
||||
private:
|
||||
friend class DxbcShaderTranslator;
|
||||
|
||||
std::vector<TextureBinding> texture_bindings_;
|
||||
std::vector<SamplerBinding> sampler_bindings_;
|
||||
uint32_t used_texture_mask_ = 0;
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_DXBC_SHADER_H_
|
|
@ -19,6 +19,7 @@
|
|||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/gpu/dxbc_shader.h"
|
||||
|
||||
DEFINE_bool(dxbc_switch, true,
|
||||
"Use switch rather than if for flow control. Turning this off or "
|
||||
|
@ -76,64 +77,31 @@ DxbcShaderTranslator::DxbcShaderTranslator(uint32_t vendor_id,
|
|||
}
|
||||
DxbcShaderTranslator::~DxbcShaderTranslator() = default;
|
||||
|
||||
std::vector<uint8_t> DxbcShaderTranslator::ForceEarlyDepthStencil(
|
||||
const uint8_t* shader) {
|
||||
const uint32_t* old_shader = reinterpret_cast<const uint32_t*>(shader);
|
||||
|
||||
// To return something anyway even if patching fails.
|
||||
std::vector<uint8_t> new_shader;
|
||||
uint32_t shader_size_bytes = old_shader[6];
|
||||
new_shader.resize(shader_size_bytes);
|
||||
std::memcpy(new_shader.data(), shader, shader_size_bytes);
|
||||
|
||||
// Find the SHEX chunk.
|
||||
uint32_t chunk_count = old_shader[7];
|
||||
for (uint32_t i = 0; i < chunk_count; ++i) {
|
||||
uint32_t chunk_offset_bytes = old_shader[8 + i];
|
||||
const uint32_t* chunk = old_shader + chunk_offset_bytes / sizeof(uint32_t);
|
||||
if (chunk[0] != 'XEHS') {
|
||||
continue;
|
||||
}
|
||||
// Find dcl_globalFlags and patch it.
|
||||
uint32_t code_size_dwords = chunk[3];
|
||||
chunk += 4;
|
||||
for (uint32_t j = 0; j < code_size_dwords;) {
|
||||
uint32_t opcode_token = chunk[j];
|
||||
uint32_t opcode = DECODE_D3D10_SB_OPCODE_TYPE(opcode_token);
|
||||
if (opcode == D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS) {
|
||||
opcode_token |= D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL;
|
||||
std::memcpy(new_shader.data() +
|
||||
(chunk_offset_bytes + (4 + j) * sizeof(uint32_t)),
|
||||
&opcode_token, sizeof(uint32_t));
|
||||
// Recalculate the checksum since the shader was modified.
|
||||
CalculateDXBCChecksum(
|
||||
reinterpret_cast<unsigned char*>(new_shader.data()),
|
||||
shader_size_bytes,
|
||||
reinterpret_cast<unsigned int*>(new_shader.data() +
|
||||
sizeof(uint32_t)));
|
||||
break;
|
||||
}
|
||||
if (opcode == D3D10_SB_OPCODE_CUSTOMDATA) {
|
||||
j += chunk[j + 1];
|
||||
} else {
|
||||
j += DECODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(opcode_token);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return std::move(new_shader);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> DxbcShaderTranslator::CreateDepthOnlyPixelShader() {
|
||||
Reset();
|
||||
Reset(xenos::ShaderType::kPixel);
|
||||
is_depth_only_pixel_shader_ = true;
|
||||
StartTranslation();
|
||||
return std::move(CompleteTranslation());
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::Reset() {
|
||||
ShaderTranslator::Reset();
|
||||
uint32_t DxbcShaderTranslator::GetDefaultModification(
|
||||
xenos::ShaderType shader_type,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type) const {
|
||||
Modification shader_modification;
|
||||
switch (shader_type) {
|
||||
case xenos::ShaderType::kVertex:
|
||||
shader_modification.host_vertex_shader_type = host_vertex_shader_type;
|
||||
break;
|
||||
case xenos::ShaderType::kPixel:
|
||||
shader_modification.depth_stencil_mode =
|
||||
Modification::DepthStencilMode::kNoModifiers;
|
||||
break;
|
||||
}
|
||||
return shader_modification.value;
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::Reset(xenos::ShaderType shader_type) {
|
||||
ShaderTranslator::Reset(shader_type);
|
||||
|
||||
shader_code_.clear();
|
||||
|
||||
|
@ -152,7 +120,7 @@ void DxbcShaderTranslator::Reset() {
|
|||
in_domain_location_used_ = 0;
|
||||
in_primitive_id_used_ = false;
|
||||
in_control_point_index_used_ = false;
|
||||
in_position_xy_used_ = false;
|
||||
in_position_used_ = 0;
|
||||
in_front_face_used_ = false;
|
||||
|
||||
system_temp_count_current_ = 0;
|
||||
|
@ -457,7 +425,9 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
|||
|
||||
// Remember that x# are only accessible via mov load or store - use a
|
||||
// temporary variable if need to do any computations!
|
||||
switch (host_vertex_shader_type()) {
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
GetDxbcShaderModification().host_vertex_shader_type;
|
||||
switch (host_vertex_shader_type) {
|
||||
case Shader::HostVertexShaderType::kVertex:
|
||||
StartVertexShader_LoadVertexIndex();
|
||||
break;
|
||||
|
@ -618,7 +588,7 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
|
|||
|
||||
default:
|
||||
// TODO(Triang3l): Support line and non-adaptive quad patches.
|
||||
assert_unhandled_case(host_vertex_shader_type());
|
||||
assert_unhandled_case(host_vertex_shader_type);
|
||||
EmitTranslationError(
|
||||
"Unsupported host vertex shader type in StartVertexOrDomainShader");
|
||||
break;
|
||||
|
@ -720,7 +690,7 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
// faceness as X sign bit. Using Z as scratch register now.
|
||||
if (edram_rov_used_) {
|
||||
// Get XY address of the current host pixel as float.
|
||||
in_position_xy_used_ = true;
|
||||
in_position_used_ |= 0b0011;
|
||||
DxbcOpRoundZ(DxbcDest::R(param_gen_temp, 0b0011),
|
||||
DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition)));
|
||||
// Revert resolution scale - after truncating, so if the pixel position
|
||||
|
@ -744,7 +714,7 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
} else {
|
||||
// Get XY address of the current SSAA sample by converting
|
||||
// SV_Position.xy to an integer.
|
||||
in_position_xy_used_ = true;
|
||||
in_position_used_ |= 0b0011;
|
||||
DxbcOpFToU(DxbcDest::R(param_gen_temp, 0b0011),
|
||||
DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition)));
|
||||
// Undo SSAA that is used instead of MSAA - since it's used as a
|
||||
|
@ -870,7 +840,7 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
void DxbcShaderTranslator::StartTranslation() {
|
||||
// Allocate global system temporary registers that may also be used in the
|
||||
// epilogue.
|
||||
if (IsDxbcVertexOrDomainShader()) {
|
||||
if (is_vertex_shader()) {
|
||||
system_temp_position_ = PushSystemTemp(0b1111);
|
||||
system_temp_point_size_edge_flag_kill_vertex_ = PushSystemTemp(0b0100);
|
||||
// Set the point size to a negative value to tell the geometry shader that
|
||||
|
@ -879,20 +849,21 @@ void DxbcShaderTranslator::StartTranslation() {
|
|||
DxbcOpMov(
|
||||
DxbcDest::R(system_temp_point_size_edge_flag_kill_vertex_, 0b0001),
|
||||
DxbcSrc::LF(-1.0f));
|
||||
} else if (IsDxbcPixelShader()) {
|
||||
} else if (is_pixel_shader()) {
|
||||
if (edram_rov_used_) {
|
||||
// Will be initialized unconditionally.
|
||||
system_temp_rov_params_ = PushSystemTemp();
|
||||
if (ROV_IsDepthStencilEarly() || writes_depth()) {
|
||||
// If the shader doesn't write to oDepth, each component will be written
|
||||
// to if depth/stencil is enabled and the respective sample is covered -
|
||||
// so need to initialize now because the first writes will be
|
||||
// conditional. If the shader writes to oDepth, this is oDepth of the
|
||||
// shader, written by the guest code, so initialize because assumptions
|
||||
// can't be made about the integrity of the guest code.
|
||||
system_temp_rov_depth_stencil_ =
|
||||
PushSystemTemp(writes_depth() ? 0b0001 : 0b1111);
|
||||
}
|
||||
}
|
||||
if (IsDepthStencilSystemTempUsed()) {
|
||||
// If the shader doesn't write to oDepth, and ROV is used, each
|
||||
// component will be written to if depth/stencil is enabled and the
|
||||
// respective sample is covered - so need to initialize now because the
|
||||
// first writes will be conditional.
|
||||
// If the shader writes to oDepth, this is oDepth of the shader, written
|
||||
// by the guest code, so initialize because assumptions can't be made
|
||||
// about the integrity of the guest code.
|
||||
system_temp_depth_stencil_ =
|
||||
PushSystemTemp(writes_depth() ? 0b0001 : 0b1111);
|
||||
}
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (writes_color_target(i)) {
|
||||
|
@ -942,7 +913,7 @@ void DxbcShaderTranslator::StartTranslation() {
|
|||
|
||||
// Zero general-purpose registers to prevent crashes when the game
|
||||
// references them after only initializing them conditionally.
|
||||
for (uint32_t i = IsDxbcPixelShader() ? xenos::kMaxInterpolators : 0;
|
||||
for (uint32_t i = is_pixel_shader() ? xenos::kMaxInterpolators : 0;
|
||||
i < register_count(); ++i) {
|
||||
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, i)
|
||||
: DxbcDest::R(i),
|
||||
|
@ -951,9 +922,9 @@ void DxbcShaderTranslator::StartTranslation() {
|
|||
}
|
||||
|
||||
// Write stage-specific prologue.
|
||||
if (IsDxbcVertexOrDomainShader()) {
|
||||
if (is_vertex_shader()) {
|
||||
StartVertexOrDomainShader();
|
||||
} else if (IsDxbcPixelShader()) {
|
||||
} else if (is_pixel_shader()) {
|
||||
StartPixelShader();
|
||||
}
|
||||
|
||||
|
@ -1168,31 +1139,31 @@ void DxbcShaderTranslator::CompleteShaderCode() {
|
|||
}
|
||||
|
||||
// Write stage-specific epilogue.
|
||||
if (IsDxbcVertexOrDomainShader()) {
|
||||
if (is_vertex_shader()) {
|
||||
CompleteVertexOrDomainShader();
|
||||
} else if (IsDxbcPixelShader()) {
|
||||
} else if (is_pixel_shader()) {
|
||||
CompletePixelShader();
|
||||
}
|
||||
|
||||
// Return from `main`.
|
||||
DxbcOpRet();
|
||||
|
||||
if (IsDxbcVertexOrDomainShader()) {
|
||||
if (is_vertex_shader()) {
|
||||
// Release system_temp_position_ and
|
||||
// system_temp_point_size_edge_flag_kill_vertex_.
|
||||
PopSystemTemp(2);
|
||||
} else if (IsDxbcPixelShader()) {
|
||||
} else if (is_pixel_shader()) {
|
||||
// Release system_temps_color_.
|
||||
for (int32_t i = 3; i >= 0; --i) {
|
||||
if (writes_color_target(i)) {
|
||||
PopSystemTemp();
|
||||
}
|
||||
}
|
||||
if (IsDepthStencilSystemTempUsed()) {
|
||||
// Release system_temp_depth_stencil_.
|
||||
PopSystemTemp();
|
||||
}
|
||||
if (edram_rov_used_) {
|
||||
if (ROV_IsDepthStencilEarly() || writes_depth()) {
|
||||
// Release system_temp_rov_depth_stencil_.
|
||||
PopSystemTemp();
|
||||
}
|
||||
// Release system_temp_rov_params_.
|
||||
PopSystemTemp();
|
||||
}
|
||||
|
@ -1303,6 +1274,44 @@ std::vector<uint8_t> DxbcShaderTranslator::CompleteTranslation() {
|
|||
return shader_object_bytes;
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::PostTranslation(
|
||||
Shader::Translation& translation, bool setup_shader_post_translation_info) {
|
||||
if (setup_shader_post_translation_info) {
|
||||
DxbcShader* dxbc_shader = dynamic_cast<DxbcShader*>(&translation.shader());
|
||||
if (dxbc_shader) {
|
||||
dxbc_shader->texture_bindings_.clear();
|
||||
dxbc_shader->texture_bindings_.reserve(texture_bindings_.size());
|
||||
dxbc_shader->used_texture_mask_ = 0;
|
||||
for (const TextureBinding& translator_binding : texture_bindings_) {
|
||||
DxbcShader::TextureBinding& shader_binding =
|
||||
dxbc_shader->texture_bindings_.emplace_back();
|
||||
// For a stable hash.
|
||||
std::memset(&shader_binding, 0, sizeof(shader_binding));
|
||||
shader_binding.bindless_descriptor_index =
|
||||
translator_binding.bindless_descriptor_index;
|
||||
shader_binding.fetch_constant = translator_binding.fetch_constant;
|
||||
shader_binding.dimension = translator_binding.dimension;
|
||||
shader_binding.is_signed = translator_binding.is_signed;
|
||||
dxbc_shader->used_texture_mask_ |= 1u
|
||||
<< translator_binding.fetch_constant;
|
||||
}
|
||||
dxbc_shader->sampler_bindings_.clear();
|
||||
dxbc_shader->sampler_bindings_.reserve(sampler_bindings_.size());
|
||||
for (const SamplerBinding& translator_binding : sampler_bindings_) {
|
||||
DxbcShader::SamplerBinding& shader_binding =
|
||||
dxbc_shader->sampler_bindings_.emplace_back();
|
||||
shader_binding.bindless_descriptor_index =
|
||||
translator_binding.bindless_descriptor_index;
|
||||
shader_binding.fetch_constant = translator_binding.fetch_constant;
|
||||
shader_binding.mag_filter = translator_binding.mag_filter;
|
||||
shader_binding.min_filter = translator_binding.min_filter;
|
||||
shader_binding.mip_filter = translator_binding.mip_filter;
|
||||
shader_binding.aniso_filter = translator_binding.aniso_filter;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::EmitInstructionDisassembly() {
|
||||
if (!emit_source_map_) {
|
||||
return;
|
||||
|
@ -1527,19 +1536,20 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
}
|
||||
break;
|
||||
case InstructionStorageTarget::kDepth:
|
||||
// Writes X to scalar oDepth or to X of system_temp_rov_depth_stencil_, no
|
||||
// Writes X to scalar oDepth or to X of system_temp_depth_stencil_, no
|
||||
// additional swizzling needed.
|
||||
assert_true(used_write_mask == 0b0001);
|
||||
assert_true(writes_depth());
|
||||
if (edram_rov_used_) {
|
||||
dest = DxbcDest::R(system_temp_rov_depth_stencil_);
|
||||
if (IsDepthStencilSystemTempUsed()) {
|
||||
dest = DxbcDest::R(system_temp_depth_stencil_);
|
||||
} else {
|
||||
dest = DxbcDest::ODepth();
|
||||
}
|
||||
// Depth outside [0, 1] is not safe for use with the ROV code. Though 20e4
|
||||
// float depth can store values below 2, it's a very unusual case.
|
||||
// Direct3D 10+ SV_Depth, however, can accept any values, including
|
||||
// specials, when the depth buffer is floating-point.
|
||||
// Depth outside [0, 1] is not safe for use with the ROV code and with
|
||||
// 20e4-as-32 conversion. Though 20e4 float depth can store values between
|
||||
// 1 and 2, it's a very unusual case. Direct3D 10+ SV_Depth, however, can
|
||||
// accept any values, including specials, when the depth buffer is
|
||||
// floating-point; but depth is clamped to the viewport bounds anyway.
|
||||
is_clamped = true;
|
||||
break;
|
||||
}
|
||||
|
@ -2094,7 +2104,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
// ds_5_1
|
||||
shader_object_.push_back(0x44530501u);
|
||||
} else {
|
||||
assert_true(IsDxbcPixelShader());
|
||||
assert_true(is_pixel_shader());
|
||||
// ps_5_1
|
||||
shader_object_.push_back(0xFFFF0501u);
|
||||
}
|
||||
|
@ -2765,7 +2775,7 @@ void DxbcShaderTranslator::WriteInputSignature() {
|
|||
control_point_index.semantic_name = semantic_offset;
|
||||
}
|
||||
semantic_offset += AppendString(shader_object_, "XEVERTEXID");
|
||||
} else if (IsDxbcPixelShader()) {
|
||||
} else if (is_pixel_shader()) {
|
||||
// Written dynamically, so assume it's always used if it can be written to
|
||||
// any interpolator register.
|
||||
bool param_gen_used = !is_depth_only_pixel_shader_ && register_count() != 0;
|
||||
|
@ -2843,7 +2853,7 @@ void DxbcShaderTranslator::WriteInputSignature() {
|
|||
position.component_type = DxbcSignatureRegisterComponentType::kFloat32;
|
||||
position.register_index = uint32_t(InOutRegister::kPSInPosition);
|
||||
position.mask = 0b1111;
|
||||
position.always_reads_mask = in_position_xy_used_ ? 0b0011 : 0b0000;
|
||||
position.always_reads_mask = in_position_used_;
|
||||
}
|
||||
|
||||
// Is front face (SV_IsFrontFace).
|
||||
|
@ -2927,7 +2937,9 @@ void DxbcShaderTranslator::WritePatchConstantSignature() {
|
|||
DxbcName tess_factor_edge_system_value = DxbcName::kUndefined;
|
||||
uint32_t tess_factor_inside_count = 0;
|
||||
DxbcName tess_factor_inside_system_value = DxbcName::kUndefined;
|
||||
switch (host_vertex_shader_type()) {
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
GetDxbcShaderModification().host_vertex_shader_type;
|
||||
switch (host_vertex_shader_type) {
|
||||
case Shader::HostVertexShaderType::kTriangleDomainCPIndexed:
|
||||
case Shader::HostVertexShaderType::kTriangleDomainPatchIndexed:
|
||||
tess_factor_edge_count = 3;
|
||||
|
@ -2944,7 +2956,7 @@ void DxbcShaderTranslator::WritePatchConstantSignature() {
|
|||
break;
|
||||
default:
|
||||
// TODO(Triang3l): Support line patches.
|
||||
assert_unhandled_case(host_vertex_shader_type());
|
||||
assert_unhandled_case(host_vertex_shader_type);
|
||||
EmitTranslationError(
|
||||
"Unsupported host vertex shader type in WritePatchConstantSignature");
|
||||
}
|
||||
|
@ -3033,7 +3045,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
|
|||
constexpr size_t kParameterDwords =
|
||||
sizeof(DxbcSignatureParameter) / sizeof(uint32_t);
|
||||
|
||||
if (IsDxbcVertexOrDomainShader()) {
|
||||
if (is_vertex_shader()) {
|
||||
// Intepolators (TEXCOORD#).
|
||||
size_t interpolator_position = shader_object_.size();
|
||||
shader_object_.resize(shader_object_.size() +
|
||||
|
@ -3195,7 +3207,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
|
|||
cull_distance.semantic_name = semantic_offset;
|
||||
}
|
||||
semantic_offset += AppendString(shader_object_, "SV_CullDistance");
|
||||
} else if (IsDxbcPixelShader()) {
|
||||
} else if (is_pixel_shader()) {
|
||||
if (!edram_rov_used_) {
|
||||
// Color render targets (SV_Target#).
|
||||
size_t target_position = SIZE_MAX;
|
||||
|
@ -3217,9 +3229,11 @@ void DxbcShaderTranslator::WriteOutputSignature() {
|
|||
}
|
||||
}
|
||||
|
||||
// Depth (SV_Depth).
|
||||
// Depth (SV_Depth or SV_DepthLessEqual).
|
||||
Modification::DepthStencilMode depth_stencil_mode =
|
||||
GetDxbcShaderModification().depth_stencil_mode;
|
||||
size_t depth_position = SIZE_MAX;
|
||||
if (writes_depth()) {
|
||||
if (writes_depth() || DSV_IsWritingFloat24Depth()) {
|
||||
depth_position = shader_object_.size();
|
||||
shader_object_.resize(shader_object_.size() + kParameterDwords);
|
||||
++parameter_count;
|
||||
|
@ -3253,7 +3267,15 @@ void DxbcShaderTranslator::WriteOutputSignature() {
|
|||
depth_position);
|
||||
depth.semantic_name = semantic_offset;
|
||||
}
|
||||
semantic_offset += AppendString(shader_object_, "SV_Depth");
|
||||
const char* depth_semantic_name;
|
||||
if (!writes_depth() &&
|
||||
GetDxbcShaderModification().depth_stencil_mode ==
|
||||
Modification::DepthStencilMode::kFloat24Truncating) {
|
||||
depth_semantic_name = "SV_DepthLessEqual";
|
||||
} else {
|
||||
depth_semantic_name = "SV_Depth";
|
||||
}
|
||||
semantic_offset += AppendString(shader_object_, depth_semantic_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3276,7 +3298,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
} else if (IsDxbcDomainShader()) {
|
||||
shader_type = D3D11_SB_DOMAIN_SHADER;
|
||||
} else {
|
||||
assert_true(IsDxbcPixelShader());
|
||||
assert_true(is_pixel_shader());
|
||||
shader_type = D3D10_SB_PIXEL_SHADER;
|
||||
}
|
||||
shader_object_.push_back(
|
||||
|
@ -3296,12 +3318,14 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
// Inputs/outputs have 1D-indexed operands with a component mask and a
|
||||
// register index.
|
||||
|
||||
Modification shader_modification = GetDxbcShaderModification();
|
||||
|
||||
if (IsDxbcDomainShader()) {
|
||||
// Not using control point data since Xenos only has a vertex shader acting
|
||||
// as both vertex shader and domain shader.
|
||||
stat_.c_control_points = 3;
|
||||
stat_.tessellator_domain = DxbcTessellatorDomain::kTriangle;
|
||||
switch (host_vertex_shader_type()) {
|
||||
switch (shader_modification.host_vertex_shader_type) {
|
||||
case Shader::HostVertexShaderType::kTriangleDomainCPIndexed:
|
||||
case Shader::HostVertexShaderType::kTriangleDomainPatchIndexed:
|
||||
stat_.c_control_points = 3;
|
||||
|
@ -3314,7 +3338,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
break;
|
||||
default:
|
||||
// TODO(Triang3l): Support line patches.
|
||||
assert_unhandled_case(host_vertex_shader_type());
|
||||
assert_unhandled_case(shader_modification.host_vertex_shader_type);
|
||||
EmitTranslationError(
|
||||
"Unsupported host vertex shader type in WriteShaderCode");
|
||||
}
|
||||
|
@ -3330,11 +3354,17 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
}
|
||||
|
||||
// Don't allow refactoring when converting to native code to maintain position
|
||||
// invariance (needed even in pixel shaders for oDepth invariance). Also this
|
||||
// dcl will be modified by ForceEarlyDepthStencil.
|
||||
shader_object_.push_back(
|
||||
// invariance (needed even in pixel shaders for oDepth invariance).
|
||||
uint32_t global_flags_opcode =
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1);
|
||||
if (is_pixel_shader() &&
|
||||
GetDxbcShaderModification().depth_stencil_mode ==
|
||||
Modification::DepthStencilMode::kEarlyHint &&
|
||||
!edram_rov_used_ && CanWriteZEarly()) {
|
||||
global_flags_opcode |= D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL;
|
||||
}
|
||||
shader_object_.push_back(global_flags_opcode);
|
||||
|
||||
// Constant buffers, from most frequenly accessed to least frequently accessed
|
||||
// (the order is a hint to the driver according to the DXBC header).
|
||||
|
@ -3560,7 +3590,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
}
|
||||
|
||||
// Inputs and outputs.
|
||||
if (IsDxbcVertexOrDomainShader()) {
|
||||
if (is_vertex_shader()) {
|
||||
if (IsDxbcDomainShader()) {
|
||||
if (in_domain_location_used_) {
|
||||
// Domain location input.
|
||||
|
@ -3584,7 +3614,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
if (in_control_point_index_used_) {
|
||||
// Control point indices as float input.
|
||||
uint32_t control_point_array_size;
|
||||
switch (host_vertex_shader_type()) {
|
||||
switch (shader_modification.host_vertex_shader_type) {
|
||||
case Shader::HostVertexShaderType::kTriangleDomainCPIndexed:
|
||||
control_point_array_size = 3;
|
||||
break;
|
||||
|
@ -3593,7 +3623,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
break;
|
||||
default:
|
||||
// TODO(Triang3l): Support line patches.
|
||||
assert_unhandled_case(host_vertex_shader_type());
|
||||
assert_unhandled_case(shader_modification.host_vertex_shader_type);
|
||||
EmitTranslationError(
|
||||
"Unsupported host vertex shader type in "
|
||||
"StartVertexOrDomainShader");
|
||||
|
@ -3683,7 +3713,8 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
uint32_t(InOutRegister::kVSDSOutClipDistance45AndCullDistance));
|
||||
shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_CULL_DISTANCE));
|
||||
++stat_.dcl_count;
|
||||
} else if (IsDxbcPixelShader()) {
|
||||
} else if (is_pixel_shader()) {
|
||||
bool is_writing_float24_depth = DSV_IsWritingFloat24Depth();
|
||||
// Interpolator input.
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
uint32_t interpolator_count =
|
||||
|
@ -3725,16 +3756,26 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
shader_object_.push_back(uint32_t(InOutRegister::kPSInClipSpaceZW));
|
||||
++stat_.dcl_count;
|
||||
}
|
||||
if (in_position_xy_used_) {
|
||||
// Position input (only XY needed for ps_param_gen, and the ROV depth code
|
||||
// calculates the depth from clip space Z and W).
|
||||
if (in_position_used_) {
|
||||
// Position input (XY needed for ps_param_gen, Z needed for non-ROV
|
||||
// float24 conversion; the ROV depth code calculates the depth the from
|
||||
// clip space Z and W with pull-mode per-sample interpolation instead).
|
||||
// At the cost of possibility of MSAA with pixel-rate shading, need
|
||||
// per-sample depth - otherwise intersections cannot be antialiased, and
|
||||
// with SV_DepthLessEqual, per-sample (or centroid, but this isn't
|
||||
// applicable here) position is mandatory. However, with depth output, on
|
||||
// the guest, there's only one depth value for the whole pixel.
|
||||
D3D10_SB_INTERPOLATION_MODE position_interpolation_mode =
|
||||
is_writing_float24_depth && !writes_depth()
|
||||
? D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE
|
||||
: D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE;
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS_SIV) |
|
||||
ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(
|
||||
D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE) |
|
||||
position_interpolation_mode) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));
|
||||
shader_object_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0b0011, 1));
|
||||
shader_object_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_INPUT, in_position_used_, 1));
|
||||
shader_object_.push_back(uint32_t(InOutRegister::kPSInPosition));
|
||||
shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_POSITION));
|
||||
++stat_.dcl_count;
|
||||
|
@ -3778,12 +3819,19 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
}
|
||||
}
|
||||
// Depth output.
|
||||
if (writes_depth()) {
|
||||
if (is_writing_float24_depth || writes_depth()) {
|
||||
D3D10_SB_OPERAND_TYPE depth_operand_type;
|
||||
if (!writes_depth() &&
|
||||
GetDxbcShaderModification().depth_stencil_mode ==
|
||||
Modification::DepthStencilMode::kFloat24Truncating) {
|
||||
depth_operand_type = D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL;
|
||||
} else {
|
||||
depth_operand_type = D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH;
|
||||
}
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(2));
|
||||
shader_object_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH, 0));
|
||||
shader_object_.push_back(EncodeScalarOperand(depth_operand_type, 0));
|
||||
++stat_.dcl_count;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -102,6 +102,51 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
bool edram_rov_used, bool force_emit_source_map = false);
|
||||
~DxbcShaderTranslator() override;
|
||||
|
||||
union Modification {
|
||||
// If anything in this is structure is changed in a way not compatible with
|
||||
// the previous layout, invalidate the pipeline storages by increasing this
|
||||
// version number (0xYYYYMMDD)!
|
||||
static constexpr uint32_t kVersion = 0x20201203;
|
||||
|
||||
enum class DepthStencilMode : uint32_t {
|
||||
kNoModifiers,
|
||||
// [earlydepthstencil] - enable if alpha test and alpha to coverage are
|
||||
// disabled; ignored if anything in the shader blocks early Z writing
|
||||
// (which is not known before translation, so this will be set anyway).
|
||||
kEarlyHint,
|
||||
// Converting the depth to the closest 32-bit float representable exactly
|
||||
// as a 20e4 float, to support invariance in cases when the guest
|
||||
// reuploads a previously resolved depth buffer to the EDRAM, rounding
|
||||
// towards zero (which contradicts the rounding used by the Direct3D 9
|
||||
// reference rasterizer, but allows SV_DepthLessEqual to be used to allow
|
||||
// slightly coarse early Z culling; also truncating regardless of whether
|
||||
// the shader writes depth and thus always uses SV_Depth, for
|
||||
// consistency). MSAA is limited - depth must be per-sample
|
||||
// (SV_DepthLessEqual also explicitly requires sample or centroid position
|
||||
// interpolation), thus the sampler has to run at sample frequency even if
|
||||
// the device supports stencil loading and thus true non-ROV MSAA via
|
||||
// SV_StencilRef.
|
||||
// Fixed-function viewport depth bounds must be snapped to float24 for
|
||||
// clamping purposes.
|
||||
kFloat24Truncating,
|
||||
// Similar to kFloat24Truncating, but rounding to the nearest even,
|
||||
// however, always using SV_Depth rather than SV_DepthLessEqual because
|
||||
// rounding up results in a bigger value. Same viewport usage rules apply.
|
||||
kFloat24Rounding,
|
||||
};
|
||||
|
||||
struct {
|
||||
// VS - pipeline stage and input configuration.
|
||||
Shader::HostVertexShaderType host_vertex_shader_type
|
||||
: Shader::kHostVertexShaderTypeBitCount;
|
||||
// PS, non-ROV - depth / stencil output mode.
|
||||
DepthStencilMode depth_stencil_mode : 2;
|
||||
};
|
||||
uint32_t value = 0;
|
||||
|
||||
Modification(uint32_t modification_value = 0) : value(modification_value) {}
|
||||
};
|
||||
|
||||
// Constant buffer bindings in space 0.
|
||||
enum class CbufferRegister {
|
||||
kSystemConstants,
|
||||
|
@ -144,12 +189,14 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
kSysFlag_ROVStencilTest_Shift,
|
||||
// If the depth/stencil test has failed, but resulted in a stencil value
|
||||
// that is different than the one currently in the depth buffer, write it
|
||||
// anyway and don't run the shader (to check if the sample may be discarded
|
||||
// some way). This, however, also results in depth/stencil testing done
|
||||
// entirely early even when it passes to prevent writing in divergent places
|
||||
// in the shader. When the shader can kill, this must be set only for
|
||||
// RB_DEPTHCONTROL EARLY_Z_ENABLE, not for alpha test/alpha to coverage
|
||||
// disabled.
|
||||
// anyway and don't run the rest of the shader (to check if the sample may
|
||||
// be discarded some way) - use when alpha test and alpha to coverage are
|
||||
// disabled. Ignored by the shader if not applicable to it (like if it has
|
||||
// kill instructions or writes the depth output).
|
||||
// TODO(Triang3l): Investigate replacement with an alpha-to-mask flag,
|
||||
// checking `(flags & (alpha test | alpha to mask)) == (always | disabled)`,
|
||||
// taking into account the potential relation with occlusion queries (but
|
||||
// should be safe at least temporarily).
|
||||
kSysFlag_ROVDepthStencilEarlyWrite_Shift,
|
||||
|
||||
kSysFlag_Count,
|
||||
|
@ -238,15 +285,15 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// EDRAM address calculation.
|
||||
uint32_t sample_count_log2[2];
|
||||
float alpha_test_reference;
|
||||
// If alpha to mask is disabled, the entire alpha_to_mask value must be 0.
|
||||
// If alpha to mask is enabled, bits 0:7 are sample offsets, and bit 8 must
|
||||
// be 1.
|
||||
uint32_t alpha_to_mask;
|
||||
|
||||
float color_exp_bias[4];
|
||||
|
||||
uint32_t color_output_map[4];
|
||||
|
||||
// If alpha to mask is disabled, the entire alpha_to_mask value must be 0.
|
||||
// If alpha to mask is enabled, bits 0:7 are sample offsets, and bit 8 must
|
||||
// be 1.
|
||||
uint32_t edram_resolution_square_scale;
|
||||
uint32_t edram_pitch_tiles;
|
||||
union {
|
||||
|
@ -358,12 +405,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
bool is_signed;
|
||||
std::string name;
|
||||
};
|
||||
// The first binding returned is at t[SRVMainRegister::kBindfulTexturesStart]
|
||||
// of space SRVSpace::kMain.
|
||||
const TextureBinding* GetTextureBindings(uint32_t& count_out) const {
|
||||
count_out = uint32_t(texture_bindings_.size());
|
||||
return texture_bindings_.data();
|
||||
}
|
||||
|
||||
// Arbitrary limit - there can't be more than 2048 in a shader-visible
|
||||
// descriptor heap, though some older hardware (tier 1 resource binding -
|
||||
|
@ -385,16 +426,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
xenos::AnisoFilter aniso_filter;
|
||||
std::string name;
|
||||
};
|
||||
const SamplerBinding* GetSamplerBindings(uint32_t& count_out) const {
|
||||
count_out = uint32_t(sampler_bindings_.size());
|
||||
return sampler_bindings_.data();
|
||||
}
|
||||
|
||||
// Returns the number of texture SRV and sampler offsets that need to be
|
||||
// passed via a constant buffer to the shader.
|
||||
uint32_t GetBindlessResourceCount() const {
|
||||
return uint32_t(texture_bindings_.size() + sampler_bindings_.size());
|
||||
}
|
||||
|
||||
// Unordered access view bindings in space 0.
|
||||
enum class UAVRegister {
|
||||
|
@ -402,10 +433,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
kEdram,
|
||||
};
|
||||
|
||||
// Creates a copy of the shader with early depth/stencil testing forced,
|
||||
// overriding that alpha testing is used in the shader.
|
||||
static std::vector<uint8_t> ForceEarlyDepthStencil(const uint8_t* shader);
|
||||
|
||||
// Returns the format with internal flags for passing via the
|
||||
// edram_rt_format_flags system constant.
|
||||
static constexpr uint32_t ROV_AddColorFormatFlags(
|
||||
|
@ -440,16 +467,22 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
float& clamp_alpha_high, uint32_t& keep_mask_low,
|
||||
uint32_t& keep_mask_high);
|
||||
|
||||
uint32_t GetDefaultModification(
|
||||
xenos::ShaderType shader_type,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
Shader::HostVertexShaderType::kVertex) const override;
|
||||
|
||||
// Creates a special pixel shader without color outputs - this resets the
|
||||
// state of the translator.
|
||||
std::vector<uint8_t> CreateDepthOnlyPixelShader();
|
||||
|
||||
protected:
|
||||
void Reset() override;
|
||||
void Reset(xenos::ShaderType shader_type) override;
|
||||
|
||||
void StartTranslation() override;
|
||||
|
||||
std::vector<uint8_t> CompleteTranslation() override;
|
||||
void PostTranslation(Shader::Translation& translation,
|
||||
bool setup_shader_post_translation_info) override;
|
||||
|
||||
void ProcessLabel(uint32_t cf_index) override;
|
||||
|
||||
|
@ -650,6 +683,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
kInputDomainPoint = 28,
|
||||
kUnorderedAccessView = 30,
|
||||
kInputCoverageMask = 35,
|
||||
kOutputDepthLessEqual = 39,
|
||||
};
|
||||
|
||||
// D3D10_SB_OPERAND_INDEX_DIMENSION
|
||||
|
@ -689,6 +723,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
return DxbcOperandDimension::kNoData;
|
||||
case DxbcOperandType::kInputPrimitiveID:
|
||||
case DxbcOperandType::kOutputDepth:
|
||||
case DxbcOperandType::kOutputDepthLessEqual:
|
||||
return DxbcOperandDimension::kScalar;
|
||||
case DxbcOperandType::kInputCoverageMask:
|
||||
return dest_in_dcl ? DxbcOperandDimension::kScalar
|
||||
|
@ -860,6 +895,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
return DxbcDest(DxbcOperandType::kUnorderedAccessView, write_mask,
|
||||
index_1d, index_2d);
|
||||
}
|
||||
static DxbcDest ODepthLE() {
|
||||
return DxbcDest(DxbcOperandType::kOutputDepthLessEqual, 0b0001);
|
||||
}
|
||||
|
||||
uint32_t GetMask() const {
|
||||
switch (GetDimension()) {
|
||||
|
@ -2145,21 +2183,19 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
(index_representation_1 << 25) | (index_representation_2 << 28);
|
||||
}
|
||||
|
||||
// Use these instead of is_vertex_shader/is_pixel_shader because they don't
|
||||
// take is_depth_only_pixel_shader_ into account.
|
||||
inline bool IsDxbcVertexOrDomainShader() const {
|
||||
return !is_depth_only_pixel_shader_ && is_vertex_shader();
|
||||
Modification GetDxbcShaderModification() const {
|
||||
return Modification(modification());
|
||||
}
|
||||
inline bool IsDxbcVertexShader() const {
|
||||
return IsDxbcVertexOrDomainShader() &&
|
||||
host_vertex_shader_type() == Shader::HostVertexShaderType::kVertex;
|
||||
|
||||
bool IsDxbcVertexShader() const {
|
||||
return is_vertex_shader() &&
|
||||
GetDxbcShaderModification().host_vertex_shader_type ==
|
||||
Shader::HostVertexShaderType::kVertex;
|
||||
}
|
||||
inline bool IsDxbcDomainShader() const {
|
||||
return IsDxbcVertexOrDomainShader() &&
|
||||
host_vertex_shader_type() != Shader::HostVertexShaderType::kVertex;
|
||||
}
|
||||
inline bool IsDxbcPixelShader() const {
|
||||
return is_depth_only_pixel_shader_ || is_pixel_shader();
|
||||
bool IsDxbcDomainShader() const {
|
||||
return is_vertex_shader() &&
|
||||
GetDxbcShaderModification().host_vertex_shader_type !=
|
||||
Shader::HostVertexShaderType::kVertex;
|
||||
}
|
||||
|
||||
// Whether to use switch-case rather than if (pc >= label) for control flow.
|
||||
|
@ -2181,10 +2217,37 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
uint32_t piece_temp_component, uint32_t accumulator_temp,
|
||||
uint32_t accumulator_temp_component);
|
||||
|
||||
// Converts the depth value externally clamped to the representable [0, 2)
|
||||
// range to 20e4 floating point, with zeros in bits 24:31, rounding to the
|
||||
// nearest even. Source and destination may be the same, temporary must be
|
||||
// different than both.
|
||||
void PreClampedDepthTo20e4(uint32_t d24_temp, uint32_t d24_temp_component,
|
||||
uint32_t d32_temp, uint32_t d32_temp_component,
|
||||
uint32_t temp_temp, uint32_t temp_temp_component);
|
||||
bool IsDepthStencilSystemTempUsed() const {
|
||||
// See system_temp_depth_stencil_ documentation for explanation of cases.
|
||||
if (edram_rov_used_) {
|
||||
return writes_depth() || ROV_IsDepthStencilEarly();
|
||||
}
|
||||
return writes_depth() && DSV_IsWritingFloat24Depth();
|
||||
}
|
||||
// Whether the current non-ROV pixel shader should convert the depth to 20e4.
|
||||
bool DSV_IsWritingFloat24Depth() const {
|
||||
if (edram_rov_used_) {
|
||||
return false;
|
||||
}
|
||||
Modification::DepthStencilMode depth_stencil_mode =
|
||||
GetDxbcShaderModification().depth_stencil_mode;
|
||||
return depth_stencil_mode ==
|
||||
Modification::DepthStencilMode::kFloat24Truncating ||
|
||||
depth_stencil_mode ==
|
||||
Modification::DepthStencilMode::kFloat24Rounding;
|
||||
}
|
||||
// Whether it's possible and worth skipping running the translated shader for
|
||||
// 2x2 quads.
|
||||
bool ROV_IsDepthStencilEarly() const {
|
||||
return !is_depth_only_pixel_shader_ && !writes_depth();
|
||||
return !is_depth_only_pixel_shader_ && !writes_depth() &&
|
||||
memexport_stream_constants().empty();
|
||||
}
|
||||
// Converts the depth value to 24-bit (storing the result in bits 0:23 and
|
||||
// zeros in 24:31, not creating room for stencil - since this may be involved
|
||||
|
@ -2197,8 +2260,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// Does all the depth/stencil-related things, including or not including
|
||||
// writing based on whether it's late, or on whether it's safe to do it early.
|
||||
// Updates system_temp_rov_params_ result and coverage if allowed and safe,
|
||||
// updates system_temp_rov_depth_stencil_, and if early and the coverage is
|
||||
// empty for all pixels in the 2x2 quad and safe to return early (stencil is
|
||||
// updates system_temp_depth_stencil_, and if early and the coverage is empty
|
||||
// for all pixels in the 2x2 quad and safe to return early (stencil is
|
||||
// unchanged or known that it's safe not to await kills/alphatest/AtoC),
|
||||
// returns from the shader.
|
||||
void ROV_DepthStencilTest();
|
||||
|
@ -2248,6 +2311,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// Discards the SSAA sample if it's masked out by alpha to coverage.
|
||||
void CompletePixelShader_WriteToRTVs_AlphaToMask();
|
||||
void CompletePixelShader_WriteToRTVs();
|
||||
void CompletePixelShader_DSV_DepthTo24Bit();
|
||||
// Masks the sample away from system_temp_rov_params_.x if it's not covered.
|
||||
// threshold_offset and temp.temp_component can be the same if needed.
|
||||
void CompletePixelShader_ROV_AlphaToMaskSample(
|
||||
|
@ -2333,6 +2397,11 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
xenos::TextureFilter min_filter,
|
||||
xenos::TextureFilter mip_filter,
|
||||
xenos::AnisoFilter aniso_filter);
|
||||
// Returns the number of texture SRV and sampler offsets that need to be
|
||||
// passed via a constant buffer to the shader.
|
||||
uint32_t GetBindlessResourceCount() const {
|
||||
return uint32_t(texture_bindings_.size() + sampler_bindings_.size());
|
||||
}
|
||||
// Marks fetch constants as used by the DXBC shader and returns DxbcSrc
|
||||
// for the words 01 (pair 0), 23 (pair 1) or 45 (pair 2) of the texture fetch
|
||||
// constant.
|
||||
|
@ -2364,7 +2433,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
static uint32_t AppendString(std::vector<uint32_t>& dest, const char* source);
|
||||
// Returns the length of a string as if it was appended to a DWORD stream, in
|
||||
// bytes.
|
||||
static inline uint32_t GetStringLength(const char* source) {
|
||||
static uint32_t GetStringLength(const char* source) {
|
||||
return uint32_t(xe::align(std::strlen(source) + 1, sizeof(uint32_t)));
|
||||
}
|
||||
|
||||
|
@ -2479,8 +2548,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
bool in_primitive_id_used_;
|
||||
// Whether InOutRegister::kDSInControlPointIndex has been used in the shader.
|
||||
bool in_control_point_index_used_;
|
||||
// Whether the XY of the pixel position has been used in the pixel shader.
|
||||
bool in_position_xy_used_;
|
||||
// Mask of the pixel/sample position actually used in the pixel shader.
|
||||
uint32_t in_position_used_;
|
||||
// Whether the faceness has been used in the pixel shader.
|
||||
bool in_front_face_used_;
|
||||
|
||||
|
@ -2518,15 +2587,14 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// W - Base-relative resolution-scaled EDRAM offset for 64bpp color data, in
|
||||
// dwords.
|
||||
uint32_t system_temp_rov_params_;
|
||||
// ROV only - new depth/stencil data. 4 VGPRs when not writing to oDepth, 1
|
||||
// VGPR when writing to oDepth. Not used in the depth-only pixel shader (or,
|
||||
// more formally, if neither early depth-stencil nor oDepth are used) because
|
||||
// it always calculates and writes in the same place.
|
||||
// When not writing to oDepth: New per-sample depth/stencil values, generated
|
||||
// during early depth/stencil test (actual writing checks coverage bits).
|
||||
// When writing to oDepth: X also used to hold the depth written by the
|
||||
// shader, later used as a temporary during depth/stencil testing.
|
||||
uint32_t system_temp_rov_depth_stencil_;
|
||||
// Two purposes:
|
||||
// - When writing to oDepth, and either using ROV or converting the depth to
|
||||
// float24: X also used to hold the depth written by the shader,
|
||||
// later used as a temporary during depth/stencil testing.
|
||||
// - Otherwise, when using ROV output with ROV_IsDepthStencilEarly being true:
|
||||
// New per-sample depth/stencil values, generated during early depth/stencil
|
||||
// test (actual writing checks coverage bits).
|
||||
uint32_t system_temp_depth_stencil_;
|
||||
// Up to 4 color outputs in pixel shaders (because of exponent bias, alpha
|
||||
// test and remapping, and also for ROV writing).
|
||||
uint32_t system_temps_color_[4];
|
||||
|
@ -2587,6 +2655,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
uint32_t srv_index_bindless_textures_3d_;
|
||||
uint32_t srv_index_bindless_textures_cube_;
|
||||
|
||||
// The first binding is at t[SRVMainRegister::kBindfulTexturesStart] of space
|
||||
// SRVSpace::kMain.
|
||||
std::vector<TextureBinding> texture_bindings_;
|
||||
std::unordered_map<uint32_t, uint32_t>
|
||||
texture_bindings_for_bindful_srv_indices_;
|
||||
|
|
|
@ -677,7 +677,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// Whether to use gradients (implicit or explicit) for LOD calculation.
|
||||
bool use_computed_lod =
|
||||
instr.attributes.use_computed_lod &&
|
||||
(IsDxbcPixelShader() || instr.attributes.use_register_gradients);
|
||||
(is_pixel_shader() || instr.attributes.use_register_gradients);
|
||||
if (instr.opcode == FetchOpcode::kGetTextureComputedLod &&
|
||||
(!use_computed_lod || instr.attributes.use_register_gradients)) {
|
||||
assert_always();
|
||||
|
|
|
@ -106,7 +106,7 @@ void DxbcShaderTranslator::ExportToMemory() {
|
|||
kSysConst_Flags_Vec)
|
||||
.Select(kSysConst_Flags_Comp),
|
||||
DxbcSrc::LU(kSysFlag_SharedMemoryIsUAV));
|
||||
if (IsDxbcPixelShader()) {
|
||||
if (is_pixel_shader()) {
|
||||
// Disable memexport in pixel shaders with supersampling since VPOS is
|
||||
// ambiguous.
|
||||
if (edram_rov_used_) {
|
||||
|
|
|
@ -167,7 +167,7 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
|||
// bigger) to integer to system_temp_rov_params_.zw.
|
||||
// system_temp_rov_params_.z = X host pixel position as uint
|
||||
// system_temp_rov_params_.w = Y host pixel position as uint
|
||||
in_position_xy_used_ = true;
|
||||
in_position_used_ |= 0b0011;
|
||||
DxbcOpFToU(DxbcDest::R(system_temp_rov_params_, 0b1100),
|
||||
DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition), 0b01000000));
|
||||
// Revert the resolution scale to convert the position to guest pixels.
|
||||
|
@ -315,7 +315,7 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
|||
// Add host pixel offsets.
|
||||
// system_temp_rov_params_.y = scaled 32bpp depth/stencil address
|
||||
// system_temp_rov_params_.z = scaled 32bpp color offset if needed
|
||||
in_position_xy_used_ = true;
|
||||
in_position_used_ |= 0b0011;
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
// Convert a position component to integer.
|
||||
DxbcOpFToU(DxbcDest::R(system_temp_rov_params_, 0b0001),
|
||||
|
@ -417,23 +417,50 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() {
|
|||
// With early depth/stencil, depth/stencil writing may be deferred to the
|
||||
// end of the shader to prevent writing in case something (like alpha test,
|
||||
// which is dynamic GPU state) discards the pixel. So, write directly to the
|
||||
// persistent register, system_temp_rov_depth_stencil_, instead of a local
|
||||
// persistent register, system_temp_depth_stencil_, instead of a local
|
||||
// temporary register.
|
||||
DxbcDest sample_depth_stencil_dest(
|
||||
depth_stencil_early
|
||||
? DxbcDest::R(system_temp_rov_depth_stencil_, 1 << i)
|
||||
: temp_x_dest);
|
||||
depth_stencil_early ? DxbcDest::R(system_temp_depth_stencil_, 1 << i)
|
||||
: temp_x_dest);
|
||||
DxbcSrc sample_depth_stencil_src(
|
||||
depth_stencil_early
|
||||
? DxbcSrc::R(system_temp_rov_depth_stencil_).Select(i)
|
||||
: temp_x_src);
|
||||
depth_stencil_early ? DxbcSrc::R(system_temp_depth_stencil_).Select(i)
|
||||
: temp_x_src);
|
||||
|
||||
if (!i) {
|
||||
if (writes_depth()) {
|
||||
// Clamp oDepth to the lower viewport depth bound (depth clamp happens
|
||||
// after the pixel shader in the pipeline, at least on Direct3D 11 and
|
||||
// Vulkan, thus applies to the shader's depth output too).
|
||||
system_constants_used_ |= 1ull << kSysConst_EdramDepthRange_Index;
|
||||
DxbcOpMax(DxbcDest::R(system_temp_depth_stencil_, 0b0001),
|
||||
DxbcSrc::R(system_temp_depth_stencil_, DxbcSrc::kXXXX),
|
||||
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_EdramDepthRange_Vec)
|
||||
.Select(kSysConst_EdramDepthRangeOffset_Comp));
|
||||
// Calculate the upper Z range bound to temp.x for clamping after
|
||||
// biasing.
|
||||
// temp.x = viewport maximum depth
|
||||
system_constants_used_ |= 1ull << kSysConst_EdramDepthRange_Index;
|
||||
DxbcOpAdd(temp_x_dest,
|
||||
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_EdramDepthRange_Vec)
|
||||
.Select(kSysConst_EdramDepthRangeOffset_Comp),
|
||||
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_EdramDepthRange_Vec)
|
||||
.Select(kSysConst_EdramDepthRangeScale_Comp));
|
||||
// Clamp oDepth to the upper viewport depth bound (already not above 1,
|
||||
// but saturate for total safety).
|
||||
// temp.x = free
|
||||
DxbcOpMin(DxbcDest::R(system_temp_depth_stencil_, 0b0001),
|
||||
DxbcSrc::R(system_temp_depth_stencil_, DxbcSrc::kXXXX),
|
||||
temp_x_src, true);
|
||||
// Convert the shader-generated depth to 24-bit, using temp.x as
|
||||
// temporary.
|
||||
ROV_DepthTo24Bit(system_temp_rov_depth_stencil_, 0,
|
||||
system_temp_rov_depth_stencil_, 0, temp, 0);
|
||||
ROV_DepthTo24Bit(system_temp_depth_stencil_, 0,
|
||||
system_temp_depth_stencil_, 0, temp, 0);
|
||||
} else {
|
||||
// Load the first sample's Z*W and W to temp.xy - need this regardless
|
||||
// of coverage for polygon offset.
|
||||
|
@ -529,14 +556,14 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() {
|
|||
}
|
||||
|
||||
// Get if the current sample is covered to temp.w.
|
||||
// temp.x = first sample's viewport space Z or 24-bit oDepth
|
||||
// temp.x = first sample's viewport space Z if not writing to oDepth
|
||||
// temp.y = polygon offset if not writing to oDepth
|
||||
// temp.z = viewport maximum depth if not writing to oDepth
|
||||
// temp.w = coverage of the current sample
|
||||
DxbcOpAnd(temp_w_dest, DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX),
|
||||
DxbcSrc::LU(1 << i));
|
||||
// Check if the current sample is covered. Release 1 VGPR.
|
||||
// temp.x = first sample's viewport space Z or 24-bit oDepth
|
||||
// temp.x = first sample's viewport space Z if not writing to oDepth
|
||||
// temp.y = polygon offset if not writing to oDepth
|
||||
// temp.z = viewport maximum depth if not writing to oDepth
|
||||
// temp.w = free
|
||||
|
@ -546,7 +573,7 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() {
|
|||
// Copy the 24-bit depth common to all samples to sample_depth_stencil.
|
||||
// temp.x = shader-generated 24-bit depth
|
||||
DxbcOpMov(sample_depth_stencil_dest,
|
||||
DxbcSrc::R(system_temp_rov_depth_stencil_, DxbcSrc::kXXXX));
|
||||
DxbcSrc::R(system_temp_depth_stencil_, DxbcSrc::kXXXX));
|
||||
} else {
|
||||
if (i) {
|
||||
// Sample's depth precalculated for sample 0 (for slope-scaled depth
|
||||
|
@ -997,51 +1024,60 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() {
|
|||
// temp.z = viewport maximum depth if not writing to oDepth
|
||||
// temp.w = whether depth/stencil has been modified
|
||||
DxbcOpINE(temp_w_dest, sample_depth_stencil_src, temp_w_src);
|
||||
// Check if need to write.
|
||||
// temp.x? = resulting sample depth/stencil
|
||||
// temp.y = polygon offset if not writing to oDepth
|
||||
// temp.z = viewport maximum depth if not writing to oDepth
|
||||
// temp.w = free
|
||||
DxbcOpIf(true, temp_w_src);
|
||||
{
|
||||
if (depth_stencil_early) {
|
||||
// Get if early depth/stencil write is enabled to temp.w.
|
||||
// temp.w = whether early depth/stencil write is enabled
|
||||
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
|
||||
DxbcOpAnd(temp_w_dest,
|
||||
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_Flags_Vec)
|
||||
.Select(kSysConst_Flags_Comp),
|
||||
DxbcSrc::LU(kSysFlag_ROVDepthStencilEarlyWrite));
|
||||
// Check if need to write early.
|
||||
// temp.w = free
|
||||
DxbcOpIf(true, temp_w_src);
|
||||
}
|
||||
// Write the new depth/stencil.
|
||||
if (uav_index_edram_ == kBindingIndexUnallocated) {
|
||||
uav_index_edram_ = uav_count_++;
|
||||
}
|
||||
DxbcOpStoreUAVTyped(
|
||||
DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)),
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1,
|
||||
sample_depth_stencil_src);
|
||||
if (depth_stencil_early) {
|
||||
// Need to still run the shader to know whether to write the
|
||||
// depth/stencil value.
|
||||
DxbcOpElse();
|
||||
// Set sample bit out of bits 4:7 of system_temp_rov_params_.x if need
|
||||
// to write later (after checking if the sample is not discarded by a
|
||||
// kill instruction, alphatest or alpha-to-coverage).
|
||||
DxbcOpOr(DxbcDest::R(system_temp_rov_params_, 0b0001),
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX),
|
||||
DxbcSrc::LU(1 << (4 + i)));
|
||||
// Close the early depth/stencil check.
|
||||
DxbcOpEndIf();
|
||||
if (depth_stencil_early && !CanWriteZEarly()) {
|
||||
// Set the sample bit in bits 4:7 of system_temp_rov_params_.x - always
|
||||
// need to write late in this shader, as it may do something like
|
||||
// explicitly killing pixels.
|
||||
DxbcOpBFI(DxbcDest::R(system_temp_rov_params_, 0b0001), DxbcSrc::LU(1),
|
||||
DxbcSrc::LU(4 + i), temp_w_src,
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX));
|
||||
} else {
|
||||
// Check if need to write.
|
||||
// temp.x? = resulting sample depth/stencil
|
||||
// temp.y = polygon offset if not writing to oDepth
|
||||
// temp.z = viewport maximum depth if not writing to oDepth
|
||||
// temp.w = free
|
||||
DxbcOpIf(true, temp_w_src);
|
||||
{
|
||||
if (depth_stencil_early) {
|
||||
// Get if early depth/stencil write is enabled to temp.w.
|
||||
// temp.w = whether early depth/stencil write is enabled
|
||||
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
|
||||
DxbcOpAnd(temp_w_dest,
|
||||
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_Flags_Vec)
|
||||
.Select(kSysConst_Flags_Comp),
|
||||
DxbcSrc::LU(kSysFlag_ROVDepthStencilEarlyWrite));
|
||||
// Check if need to write early.
|
||||
// temp.w = free
|
||||
DxbcOpIf(true, temp_w_src);
|
||||
}
|
||||
// Write the new depth/stencil.
|
||||
if (uav_index_edram_ == kBindingIndexUnallocated) {
|
||||
uav_index_edram_ = uav_count_++;
|
||||
}
|
||||
DxbcOpStoreUAVTyped(
|
||||
DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)),
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1,
|
||||
sample_depth_stencil_src);
|
||||
if (depth_stencil_early) {
|
||||
// Need to still run the shader to know whether to write the
|
||||
// depth/stencil value.
|
||||
DxbcOpElse();
|
||||
// Set the sample bit in bits 4:7 of system_temp_rov_params_.x if need
|
||||
// to write later (after checking if the sample is not discarded by a
|
||||
// kill instruction, alphatest or alpha-to-coverage).
|
||||
DxbcOpOr(DxbcDest::R(system_temp_rov_params_, 0b0001),
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX),
|
||||
DxbcSrc::LU(1 << (4 + i)));
|
||||
// Close the early depth/stencil check.
|
||||
DxbcOpEndIf();
|
||||
}
|
||||
}
|
||||
// Close the write check.
|
||||
DxbcOpEndIf();
|
||||
}
|
||||
// Close the write check.
|
||||
DxbcOpEndIf();
|
||||
|
||||
// Release sample_temp.
|
||||
PopSystemTemp();
|
||||
|
@ -1720,7 +1756,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs_AlphaToMask() {
|
|||
// Convert SSAA sample position to integer to temp.xy (not caring about the
|
||||
// resolution scale because it's not supported anywhere on the RTV output
|
||||
// path).
|
||||
in_position_xy_used_ = true;
|
||||
in_position_used_ |= 0b0011;
|
||||
DxbcOpFToU(DxbcDest::R(temp, 0b0011),
|
||||
DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition)));
|
||||
|
||||
|
@ -1913,6 +1949,139 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
|
|||
PopSystemTemp(2);
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::CompletePixelShader_DSV_DepthTo24Bit() {
|
||||
if (!DSV_IsWritingFloat24Depth()) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t temp;
|
||||
if (writes_depth()) {
|
||||
// The depth is already written to system_temp_depth_stencil_.x and clamped
|
||||
// to 0...1 with NaNs dropped (saturating in StoreResult); yzw are free.
|
||||
temp = system_temp_depth_stencil_;
|
||||
} else {
|
||||
// Need a temporary variable; copy the sample's depth input to it and
|
||||
// saturate it (in Direct3D 11, depth is clamped to the viewport bounds
|
||||
// after the pixel shader, and SV_Position.z contains the unclamped depth,
|
||||
// which may be outside the viewport's depth range if it's biased); though
|
||||
// it will be clamped to the viewport bounds anyway, but to be able to make
|
||||
// the assumption of it being clamped while working with the bit
|
||||
// representation.
|
||||
temp = PushSystemTemp();
|
||||
in_position_used_ |= 0b0100;
|
||||
DxbcOpMov(
|
||||
DxbcDest::R(temp, 0b0001),
|
||||
DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition), DxbcSrc::kZZZZ),
|
||||
true);
|
||||
}
|
||||
|
||||
DxbcDest temp_x_dest(DxbcDest::R(temp, 0b0001));
|
||||
DxbcSrc temp_x_src(DxbcSrc::R(temp, DxbcSrc::kXXXX));
|
||||
DxbcDest temp_y_dest(DxbcDest::R(temp, 0b0010));
|
||||
DxbcSrc temp_y_src(DxbcSrc::R(temp, DxbcSrc::kYYYY));
|
||||
|
||||
if (GetDxbcShaderModification().depth_stencil_mode ==
|
||||
Modification::DepthStencilMode::kFloat24Truncating) {
|
||||
// Simplified conversion, always less than or equal to the original value -
|
||||
// just drop the lower bits.
|
||||
// The float32 exponent bias is 127.
|
||||
// After saturating, the exponent range is -127...0.
|
||||
// The smallest normalized 20e4 exponent is -14 - should drop 3 mantissa
|
||||
// bits at -14 or above.
|
||||
// The smallest denormalized 20e4 number is -34 - should drop 23 mantissa
|
||||
// bits at -34.
|
||||
// Anything smaller than 2^-34 becomes 0.
|
||||
DxbcDest truncate_dest(writes_depth() ? DxbcDest::ODepth()
|
||||
: DxbcDest::ODepthLE());
|
||||
// Check if the number is representable as a float24 after truncation - the
|
||||
// exponent is at least -34.
|
||||
DxbcOpUGE(temp_y_dest, temp_x_src, DxbcSrc::LU(0x2E800000));
|
||||
DxbcOpIf(true, temp_y_src);
|
||||
{
|
||||
// Extract the biased float32 exponent to temp.y.
|
||||
// temp.y = 113+ at exponent -14+.
|
||||
// temp.y = 93 at exponent -34.
|
||||
DxbcOpUBFE(temp_y_dest, DxbcSrc::LU(8), DxbcSrc::LU(23), temp_x_src);
|
||||
// Convert exponent to the unclamped number of bits to truncate.
|
||||
// 116 - 113 = 3.
|
||||
// 116 - 93 = 23.
|
||||
// temp.y = 3+ at exponent -14+.
|
||||
// temp.y = 23 at exponent -34.
|
||||
DxbcOpIAdd(temp_y_dest, DxbcSrc::LI(116), -temp_y_src);
|
||||
// Clamp the truncated bit count to drop 3 bits of any normal number.
|
||||
// Exponents below -34 are handled separately.
|
||||
// temp.y = 3 at exponent -14.
|
||||
// temp.y = 23 at exponent -34.
|
||||
DxbcOpIMax(temp_y_dest, temp_y_src, DxbcSrc::LI(3));
|
||||
// Truncate the mantissa - fill the low bits with zeros.
|
||||
DxbcOpBFI(truncate_dest, temp_y_src, DxbcSrc::LU(0), DxbcSrc::LU(0),
|
||||
temp_x_src);
|
||||
}
|
||||
// The number is not representable as float24 after truncation - zero.
|
||||
DxbcOpElse();
|
||||
DxbcOpMov(truncate_dest, DxbcSrc::LF(0.0f));
|
||||
// Close the non-zero result check.
|
||||
DxbcOpEndIf();
|
||||
} else {
|
||||
// Properly convert to 20e4, with rounding to the nearest even.
|
||||
PreClampedDepthTo20e4(temp, 0, temp, 0, temp, 1);
|
||||
// Convert back to float32.
|
||||
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
|
||||
// Unpack the exponent to temp.y.
|
||||
DxbcOpUShR(temp_y_dest, temp_x_src, DxbcSrc::LU(20));
|
||||
// Unpack the mantissa to temp.x.
|
||||
DxbcOpAnd(temp_x_dest, temp_x_src, DxbcSrc::LU(0xFFFFF));
|
||||
// Check if the number is denormalized.
|
||||
DxbcOpIf(false, temp_y_src);
|
||||
{
|
||||
// Check if the number is non-zero (if the mantissa isn't zero - the
|
||||
// exponent is known to be zero at this point).
|
||||
DxbcOpIf(true, temp_x_src);
|
||||
{
|
||||
// Normalize the mantissa.
|
||||
// Note that HLSL firstbithigh(x) is compiled to DXBC like:
|
||||
// `x ? 31 - firstbit_hi(x) : -1`
|
||||
// (returns the index from the LSB, not the MSB, but -1 for zero too).
|
||||
// temp.y = firstbit_hi(mantissa)
|
||||
DxbcOpFirstBitHi(temp_y_dest, temp_x_src);
|
||||
// temp.y = 20 - firstbithigh(mantissa)
|
||||
// Or:
|
||||
// temp.y = 20 - (31 - firstbit_hi(mantissa))
|
||||
DxbcOpIAdd(temp_y_dest, temp_y_src, DxbcSrc::LI(20 - 31));
|
||||
// mantissa = mantissa << (20 - firstbithigh(mantissa))
|
||||
// AND 0xFFFFF not needed after this - BFI will do it.
|
||||
DxbcOpIShL(temp_x_dest, temp_x_src, temp_y_src);
|
||||
// Get the normalized exponent.
|
||||
// exponent = 1 - (20 - firstbithigh(mantissa))
|
||||
DxbcOpIAdd(temp_y_dest, DxbcSrc::LI(1), -temp_y_src);
|
||||
}
|
||||
// The number is zero.
|
||||
DxbcOpElse();
|
||||
{
|
||||
// Set the unbiased exponent to -112 for zero - 112 will be added later,
|
||||
// resulting in zero float32.
|
||||
DxbcOpMov(temp_y_dest, DxbcSrc::LI(-112));
|
||||
}
|
||||
// Close the non-zero check.
|
||||
DxbcOpEndIf();
|
||||
}
|
||||
// Close the denormal check.
|
||||
DxbcOpEndIf();
|
||||
// Bias the exponent and move it to the correct location in float32 to
|
||||
// temp.y.
|
||||
DxbcOpIMAd(temp_y_dest, temp_y_src, DxbcSrc::LI(1 << 23),
|
||||
DxbcSrc::LI(112 << 23));
|
||||
// Combine the mantissa and the exponent into the result.
|
||||
DxbcOpBFI(DxbcDest::ODepth(), DxbcSrc::LU(20), DxbcSrc::LU(3), temp_x_src,
|
||||
temp_y_src);
|
||||
}
|
||||
|
||||
if (!writes_depth()) {
|
||||
// Release temp.
|
||||
PopSystemTemp();
|
||||
}
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::CompletePixelShader_ROV_AlphaToMaskSample(
|
||||
uint32_t sample_index, float threshold_base, DxbcSrc threshold_offset,
|
||||
float threshold_offset_scale, uint32_t temp, uint32_t temp_component) {
|
||||
|
@ -1957,7 +2126,7 @@ void DxbcShaderTranslator::CompletePixelShader_ROV_AlphaToMask() {
|
|||
// floating-point. With resolution scaling, still using host pixels, to
|
||||
// preserve the idea of dithering.
|
||||
// temp.x = alpha to coverage offset as float 0.0...3.0.
|
||||
in_position_xy_used_ = true;
|
||||
in_position_used_ |= 0b0011;
|
||||
DxbcOpFToU(DxbcDest::R(temp, 0b0011),
|
||||
DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition)));
|
||||
DxbcOpAnd(DxbcDest::R(temp, 0b0010), DxbcSrc::R(temp, DxbcSrc::kYYYY),
|
||||
|
@ -2067,7 +2236,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
DxbcOpStoreUAVTyped(
|
||||
DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)),
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1,
|
||||
DxbcSrc::R(system_temp_rov_depth_stencil_).Select(i));
|
||||
DxbcSrc::R(system_temp_depth_stencil_).Select(i));
|
||||
}
|
||||
// Close the write check.
|
||||
DxbcOpEndIf();
|
||||
|
@ -3059,15 +3228,16 @@ void DxbcShaderTranslator::CompletePixelShader() {
|
|||
CompletePixelShader_WriteToROV();
|
||||
} else {
|
||||
CompletePixelShader_WriteToRTVs();
|
||||
CompletePixelShader_DSV_DepthTo24Bit();
|
||||
}
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::ROV_DepthTo24Bit(uint32_t d24_temp,
|
||||
uint32_t d24_temp_component,
|
||||
uint32_t d32_temp,
|
||||
uint32_t d32_temp_component,
|
||||
uint32_t temp_temp,
|
||||
uint32_t temp_temp_component) {
|
||||
void DxbcShaderTranslator::PreClampedDepthTo20e4(uint32_t d24_temp,
|
||||
uint32_t d24_temp_component,
|
||||
uint32_t d32_temp,
|
||||
uint32_t d32_temp_component,
|
||||
uint32_t temp_temp,
|
||||
uint32_t temp_temp_component) {
|
||||
assert_true(temp_temp != d24_temp ||
|
||||
temp_temp_component != d24_temp_component);
|
||||
assert_true(temp_temp != d32_temp ||
|
||||
|
@ -3079,68 +3249,83 @@ void DxbcShaderTranslator::ROV_DepthTo24Bit(uint32_t d24_temp,
|
|||
DxbcDest temp_dest(DxbcDest::R(temp_temp, 1 << temp_temp_component));
|
||||
DxbcSrc temp_src(DxbcSrc::R(temp_temp).Select(temp_temp_component));
|
||||
|
||||
// CFloat24 from d3dref9.dll.
|
||||
// Assuming the depth is already clamped to [0, 2) (in all places, the depth
|
||||
// is written with the saturate flag set).
|
||||
|
||||
// Check if the number is too small to be represented as normalized 20e4.
|
||||
// temp = f32 < 2^-14
|
||||
DxbcOpULT(temp_dest, d32_src, DxbcSrc::LU(0x38800000));
|
||||
// Handle denormalized numbers separately.
|
||||
DxbcOpIf(true, temp_src);
|
||||
{
|
||||
// temp = f32 >> 23
|
||||
DxbcOpUShR(temp_dest, d32_src, DxbcSrc::LU(23));
|
||||
// temp = 113 - (f32 >> 23)
|
||||
DxbcOpIAdd(temp_dest, DxbcSrc::LI(113), -temp_src);
|
||||
// Don't allow the shift to overflow, since in DXBC the lower 5 bits of the
|
||||
// shift amount are used (otherwise 0 becomes 8).
|
||||
// temp = min(113 - (f32 >> 23), 24)
|
||||
DxbcOpUMin(temp_dest, temp_src, DxbcSrc::LU(24));
|
||||
// biased_f32 = (f32 & 0x7FFFFF) | 0x800000
|
||||
DxbcOpBFI(d24_dest, DxbcSrc::LU(9), DxbcSrc::LU(23), DxbcSrc::LU(1),
|
||||
d32_src);
|
||||
// biased_f32 = ((f32 & 0x7FFFFF) | 0x800000) >> min(113 - (f32 >> 23), 24)
|
||||
DxbcOpUShR(d24_dest, d24_src, temp_src);
|
||||
}
|
||||
// Not denormalized?
|
||||
DxbcOpElse();
|
||||
{
|
||||
// Bias the exponent.
|
||||
// biased_f32 = f32 + (-112 << 23)
|
||||
// (left shift of a negative value is undefined behavior)
|
||||
DxbcOpIAdd(d24_dest, d32_src, DxbcSrc::LU(0xC8000000u));
|
||||
}
|
||||
// Close the denormal check.
|
||||
DxbcOpEndIf();
|
||||
// Build the 20e4 number.
|
||||
// temp = (biased_f32 >> 3) & 1
|
||||
DxbcOpUBFE(temp_dest, DxbcSrc::LU(1), DxbcSrc::LU(3), d24_src);
|
||||
// f24 = biased_f32 + 3
|
||||
DxbcOpIAdd(d24_dest, d24_src, DxbcSrc::LU(3));
|
||||
// f24 = biased_f32 + 3 + ((biased_f32 >> 3) & 1)
|
||||
DxbcOpIAdd(d24_dest, d24_src, temp_src);
|
||||
// f24 = ((biased_f32 + 3 + ((biased_f32 >> 3) & 1)) >> 3) & 0xFFFFFF
|
||||
DxbcOpUBFE(d24_dest, DxbcSrc::LU(24), DxbcSrc::LU(3), d24_src);
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::ROV_DepthTo24Bit(uint32_t d24_temp,
|
||||
uint32_t d24_temp_component,
|
||||
uint32_t d32_temp,
|
||||
uint32_t d32_temp_component,
|
||||
uint32_t temp_temp,
|
||||
uint32_t temp_temp_component) {
|
||||
assert_true(temp_temp != d32_temp ||
|
||||
temp_temp_component != d32_temp_component);
|
||||
// Source and destination may be the same.
|
||||
|
||||
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
|
||||
DxbcOpAnd(temp_dest,
|
||||
DxbcOpAnd(DxbcDest::R(temp_temp, 1 << temp_temp_component),
|
||||
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_Flags_Vec)
|
||||
.Select(kSysConst_Flags_Comp),
|
||||
DxbcSrc::LU(kSysFlag_ROVDepthFloat24));
|
||||
// Convert according to the format.
|
||||
DxbcOpIf(true, temp_src);
|
||||
DxbcOpIf(true, DxbcSrc::R(temp_temp).Select(temp_temp_component));
|
||||
{
|
||||
// 20e4 conversion, using 1 VGPR.
|
||||
// CFloat24 from d3dref9.dll.
|
||||
// Assuming the depth is already clamped to [0, 2) (in all places, the depth
|
||||
// is written with the saturate flag set).
|
||||
|
||||
// Check if the number is too small to be represented as normalized 20e4.
|
||||
// temp = f32 < 2^-14
|
||||
DxbcOpULT(temp_dest, d32_src, DxbcSrc::LU(0x38800000));
|
||||
// Handle denormalized numbers separately.
|
||||
DxbcOpIf(true, temp_src);
|
||||
{
|
||||
// temp = f32 >> 23
|
||||
DxbcOpUShR(temp_dest, d32_src, DxbcSrc::LU(23));
|
||||
// temp = 113 - (f32 >> 23)
|
||||
DxbcOpIAdd(temp_dest, DxbcSrc::LI(113), -temp_src);
|
||||
// Don't allow the shift to overflow, since in DXBC the lower 5 bits of
|
||||
// the shift amount are used (otherwise 0 becomes 8).
|
||||
// temp = min(113 - (f32 >> 23), 24)
|
||||
DxbcOpUMin(temp_dest, temp_src, DxbcSrc::LU(24));
|
||||
// biased_f32 = (f32 & 0x7FFFFF) | 0x800000
|
||||
DxbcOpBFI(d24_dest, DxbcSrc::LU(9), DxbcSrc::LU(23), DxbcSrc::LU(1),
|
||||
d32_src);
|
||||
// biased_f32 =
|
||||
// ((f32 & 0x7FFFFF) | 0x800000) >> min(113 - (f32 >> 23), 24)
|
||||
DxbcOpUShR(d24_dest, d24_src, temp_src);
|
||||
}
|
||||
// Not denormalized?
|
||||
DxbcOpElse();
|
||||
{
|
||||
// Bias the exponent.
|
||||
// biased_f32 = f32 + (-112 << 23)
|
||||
// (left shift of a negative value is undefined behavior)
|
||||
DxbcOpIAdd(d24_dest, d32_src, DxbcSrc::LU(0xC8000000u));
|
||||
}
|
||||
// Close the denormal check.
|
||||
DxbcOpEndIf();
|
||||
// Build the 20e4 number.
|
||||
// temp = (biased_f32 >> 3) & 1
|
||||
DxbcOpUBFE(temp_dest, DxbcSrc::LU(1), DxbcSrc::LU(3), d24_src);
|
||||
// f24 = biased_f32 + 3
|
||||
DxbcOpIAdd(d24_dest, d24_src, DxbcSrc::LU(3));
|
||||
// f24 = biased_f32 + 3 + ((biased_f32 >> 3) & 1)
|
||||
DxbcOpIAdd(d24_dest, d24_src, temp_src);
|
||||
// f24 = ((biased_f32 + 3 + ((biased_f32 >> 3) & 1)) >> 3) & 0xFFFFFF
|
||||
DxbcOpUBFE(d24_dest, DxbcSrc::LU(24), DxbcSrc::LU(3), d24_src);
|
||||
// 20e4 conversion.
|
||||
PreClampedDepthTo20e4(d24_temp, d24_temp_component, d32_temp,
|
||||
d32_temp_component, temp_temp, temp_temp_component);
|
||||
}
|
||||
DxbcOpElse();
|
||||
{
|
||||
// Unorm24 conversion.
|
||||
|
||||
DxbcDest d24_dest(DxbcDest::R(d24_temp, 1 << d24_temp_component));
|
||||
DxbcSrc d24_src(DxbcSrc::R(d24_temp).Select(d24_temp_component));
|
||||
// Multiply by float(0xFFFFFF).
|
||||
DxbcOpMul(d24_dest, d32_src, DxbcSrc::LF(16777215.0f));
|
||||
DxbcOpMul(d24_dest, DxbcSrc::R(d32_temp).Select(d32_temp_component),
|
||||
DxbcSrc::LF(16777215.0f));
|
||||
// Round to the nearest even integer. This seems to be the correct way:
|
||||
// rounding towards zero gives 0xFF instead of 0x100 in clear shaders in,
|
||||
// for instance, Halo 3, but other clear shaders in it are also broken if
|
||||
|
|
|
@ -40,9 +40,63 @@ DEFINE_bool(
|
|||
"be fully covered when MSAA is used with fullscreen passes.",
|
||||
"GPU");
|
||||
|
||||
DEFINE_string(
|
||||
depth_float24_conversion, "",
|
||||
"Method for converting 32-bit Z values to 20e4 floating point when using "
|
||||
"host depth buffers without native 20e4 support (when not using rasterizer-"
|
||||
"ordered views / fragment shader interlocks to perform depth testing "
|
||||
"manually).\n"
|
||||
"Use: [any, on_copy, truncate, round]\n"
|
||||
" on_copy:\n"
|
||||
" Do depth testing at host precision, converting when copying between "
|
||||
"host depth buffers and the EDRAM buffer to support reinterpretation, "
|
||||
"maintaining two copies, in both host and 20e4 formats, for reloading data "
|
||||
"to host depth buffers when it wasn't overwritten.\n"
|
||||
" + Highest performance, allows early depth test and writing.\n"
|
||||
" + Host MSAA is possible with pixel-rate shading where supported.\n"
|
||||
" - EDRAM > RAM > EDRAM depth buffer round trip done in certain games "
|
||||
"(such as GTA IV) destroys precision irreparably, causing artifacts if "
|
||||
"another rendering pass is done after the EDRAM reupload.\n"
|
||||
" truncate:\n"
|
||||
" Convert to 20e4 directly in pixel shaders, always rounding down.\n"
|
||||
" + Good performance, conservative early depth test is possible.\n"
|
||||
" + No precision loss when anything changes in the storage of the depth "
|
||||
"buffer, EDRAM > RAM > EDRAM copying preserves precision.\n"
|
||||
" - Rounding mode is incorrect, sometimes giving results smaller than "
|
||||
"they should be - may cause inaccuracy especially in edge cases when the "
|
||||
"game wants to write an exact value.\n"
|
||||
" - Host MSAA is only possible at SSAA speed, with per-sample shading.\n"
|
||||
" round:\n"
|
||||
" Convert to 20e4 directly in pixel shaders, correctly rounding to the "
|
||||
"nearest even.\n"
|
||||
" + Highest accuracy.\n"
|
||||
" - Significantly limited performance, early depth test is not possible.\n"
|
||||
" - Host MSAA is only possible at SSAA speed, with per-sample shading.\n"
|
||||
" Any other value:\n"
|
||||
" Choose what is considered the most optimal (currently \"on_copy\").",
|
||||
"GPU");
|
||||
|
||||
DEFINE_int32(query_occlusion_fake_sample_count, 1000,
|
||||
"If set to -1 no sample counts are written, games may hang. Else, "
|
||||
"the sample count of every tile will be incremented on every "
|
||||
"EVENT_WRITE_ZPD by this number. Setting this to 0 means "
|
||||
"everything is reported as occluded.",
|
||||
"GPU");
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace flags {
|
||||
|
||||
DepthFloat24Conversion GetDepthFloat24Conversion() {
|
||||
if (cvars::depth_float24_conversion == "truncate") {
|
||||
return DepthFloat24Conversion::kOnOutputTruncating;
|
||||
}
|
||||
if (cvars::depth_float24_conversion == "round") {
|
||||
return DepthFloat24Conversion::kOnOutputRounding;
|
||||
}
|
||||
return DepthFloat24Conversion::kOnCopy;
|
||||
}
|
||||
|
||||
} // namespace flags
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -22,6 +22,69 @@ DECLARE_bool(gpu_allow_invalid_fetch_constants);
|
|||
|
||||
DECLARE_bool(half_pixel_offset);
|
||||
|
||||
DECLARE_string(depth_float24_conversion);
|
||||
|
||||
DECLARE_int32(query_occlusion_fake_sample_count);
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace flags {
|
||||
|
||||
enum class DepthFloat24Conversion {
|
||||
// Doing depth test at the host precision, converting to 20e4 to support
|
||||
// reinterpretation, but keeping a separate EDRAM view containing depth values
|
||||
// in the host format. When copying from the EDRAM buffer to host depth
|
||||
// buffers, writing the stored host pixel if stored_f24 == to_f24(stored_host)
|
||||
// (otherwise it was overwritten by something else, like clearing, or a color
|
||||
// buffer; this is inexact though, and will incorrectly load pixels that were
|
||||
// overwritten by something else in the EDRAM, but turned out to have the same
|
||||
// value on the guest as before - an outdated host-precision value will be
|
||||
// loaded in these cases instead).
|
||||
//
|
||||
// EDRAM > RAM, then reusing the EDRAM region for something else > EDRAM round
|
||||
// trip destroys precision beyond repair.
|
||||
//
|
||||
// Full host early Z and MSAA with pixel-rate shading are supported.
|
||||
kOnCopy,
|
||||
// Converting the depth to the closest host value representable exactly as a
|
||||
// 20e4 float in pixel shaders, to support invariance in cases when the guest
|
||||
// reuploads a previously resolved depth buffer to the EDRAM, rounding towards
|
||||
// zero (which contradicts the rounding used by the Direct3D 9 reference
|
||||
// rasterizer, but allows less-than-or-equal pixel shader depth output to be
|
||||
// used to preserve most of early Z culling when the game is using reversed
|
||||
// depth, which is the usual way of doing depth testing on the Xbox 360 and of
|
||||
// utilizing the advantages of a floating-point encoding).
|
||||
//
|
||||
// With MSAA, pixel shaders must run at sample frequency - otherwise, if the
|
||||
// depth is the same for the entire pixel, intersections of polygons cannot be
|
||||
// antialiased.
|
||||
//
|
||||
// Important usage note: When using this mode, bounds of the fixed-function
|
||||
// viewport must be converted to and back from float24 too (preferably using
|
||||
// correct rounding to the nearest even, to reduce the error already caused by
|
||||
// truncation rather than to amplify it). This ensures that clamping to the
|
||||
// viewport bounds, which happens after the pixel shader even if it overwrites
|
||||
// the resulting depth, is never done to a value not representable as float24
|
||||
// (for example, if the minimum Z is a number too small to be represented as
|
||||
// float24, but not zero, it won't be possible to write what should become
|
||||
// 0x000000 to the depth buffer). Note that this may add some error to the
|
||||
// depth values from the rasterizer; however, modifying Z in the vertex shader
|
||||
// to make interpolated depth values would cause clipping to be done to
|
||||
// different bounds, which may be more undesirable, especially in cases when Z
|
||||
// is explicitly set to a value like 0 or W (in such cases, the adjusted
|
||||
// polygon may go outside 0...W in clip space and disappear).
|
||||
kOnOutputTruncating,
|
||||
// Similar to kOnOutputTruncating, but rounding to the nearest even, more
|
||||
// correctly, however, because the resulting depth can be bigger than the
|
||||
// original host value, early depth testing can't be used at all. Same
|
||||
// viewport usage rules apply.
|
||||
kOnOutputRounding,
|
||||
};
|
||||
|
||||
DepthFloat24Conversion GetDepthFloat24Conversion();
|
||||
|
||||
} // namespace flags
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_GPU_FLAGS_H_
|
||||
|
|
|
@ -277,8 +277,7 @@ void GraphicsSystem::ClearCaches() {
|
|||
}
|
||||
|
||||
void GraphicsSystem::InitializeShaderStorage(
|
||||
const std::filesystem::path& storage_root, uint32_t title_id,
|
||||
bool blocking) {
|
||||
const std::filesystem::path& cache_root, uint32_t title_id, bool blocking) {
|
||||
if (!cvars::store_shaders) {
|
||||
return;
|
||||
}
|
||||
|
@ -286,21 +285,18 @@ void GraphicsSystem::InitializeShaderStorage(
|
|||
if (command_processor_->is_paused()) {
|
||||
// Safe to run on any thread while the command processor is paused, no
|
||||
// race condition.
|
||||
command_processor_->InitializeShaderStorage(storage_root, title_id, true);
|
||||
command_processor_->InitializeShaderStorage(cache_root, title_id, true);
|
||||
} else {
|
||||
xe::threading::Fence fence;
|
||||
command_processor_->CallInThread(
|
||||
[this, storage_root, title_id, &fence]() {
|
||||
command_processor_->InitializeShaderStorage(storage_root, title_id,
|
||||
true);
|
||||
fence.Signal();
|
||||
});
|
||||
command_processor_->CallInThread([this, cache_root, title_id, &fence]() {
|
||||
command_processor_->InitializeShaderStorage(cache_root, title_id, true);
|
||||
fence.Signal();
|
||||
});
|
||||
fence.Wait();
|
||||
}
|
||||
} else {
|
||||
command_processor_->CallInThread([this, storage_root, title_id]() {
|
||||
command_processor_->InitializeShaderStorage(storage_root, title_id,
|
||||
false);
|
||||
command_processor_->CallInThread([this, cache_root, title_id]() {
|
||||
command_processor_->InitializeShaderStorage(cache_root, title_id, false);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -63,7 +63,7 @@ class GraphicsSystem {
|
|||
|
||||
virtual void ClearCaches();
|
||||
|
||||
void InitializeShaderStorage(const std::filesystem::path& storage_root,
|
||||
void InitializeShaderStorage(const std::filesystem::path& cache_root,
|
||||
uint32_t title_id, bool blocking);
|
||||
|
||||
void RequestFrameTrace();
|
||||
|
|
|
@ -254,15 +254,15 @@ union PA_SU_SC_MODE_CNTL {
|
|||
uint32_t msaa_enable : 1; // +15
|
||||
uint32_t vtx_window_offset_enable : 1; // +16
|
||||
// LINE_STIPPLE_ENABLE was added on Adreno.
|
||||
uint32_t : 2; // +17
|
||||
uint32_t provoking_vtx_last : 1; // +19
|
||||
uint32_t persp_corr_dis : 1; // +20
|
||||
uint32_t multi_prim_ib_ena : 1; // +21
|
||||
uint32_t : 1; // +22
|
||||
uint32_t quad_order_enable : 1; // +23
|
||||
uint32_t : 2; // +17
|
||||
uint32_t provoking_vtx_last : 1; // +19
|
||||
uint32_t persp_corr_dis : 1; // +20
|
||||
uint32_t multi_prim_ib_ena : 1; // +21
|
||||
uint32_t : 1; // +22
|
||||
uint32_t quad_order_enable : 1; // +23
|
||||
uint32_t sc_one_quad_per_clock : 1; // +24
|
||||
// WAIT_RB_IDLE_ALL_TRI and WAIT_RB_IDLE_FIRST_TRI_NEW_STATE were added on
|
||||
// Adreno.
|
||||
// TODO(Triang3l): Find SC_ONE_QUAD_PER_CLOCK offset.
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL;
|
||||
|
@ -298,7 +298,7 @@ union PA_SC_VIZ_QUERY {
|
|||
// discard geometry after test (but use for testing)
|
||||
uint32_t kill_pix_post_hi_z : 1; // +7
|
||||
// not used with d3d
|
||||
uint32_t kill_pix_detail_mask : 1; // +8
|
||||
uint32_t kill_pix_post_detail_mask : 1; // +8
|
||||
};
|
||||
uint32_t value;
|
||||
static constexpr Register register_index = XE_GPU_REG_PA_SC_VIZ_QUERY;
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#include <cstring>
|
||||
#include <memory>
|
||||
|
||||
#include "third_party/xxhash/xxhash.h"
|
||||
#include "xenia/base/xxhash.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -51,7 +51,7 @@ bool SamplerInfo::Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
|
|||
}
|
||||
|
||||
uint64_t SamplerInfo::hash() const {
|
||||
return XXH64(this, sizeof(SamplerInfo), 0);
|
||||
return XXH3_64bits(this, sizeof(SamplerInfo));
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
|
|
|
@ -31,9 +31,13 @@ Shader::Shader(xenos::ShaderType shader_type, uint64_t ucode_data_hash,
|
|||
xe::copy_and_swap(ucode_data_.data(), ucode_dwords, ucode_dword_count);
|
||||
}
|
||||
|
||||
Shader::~Shader() = default;
|
||||
Shader::~Shader() {
|
||||
for (auto it : translations_) {
|
||||
delete it.second;
|
||||
}
|
||||
}
|
||||
|
||||
std::string Shader::GetTranslatedBinaryString() const {
|
||||
std::string Shader::Translation::GetTranslatedBinaryString() const {
|
||||
std::string result;
|
||||
result.resize(translated_binary_.size());
|
||||
std::memcpy(const_cast<char*>(result.data()), translated_binary_.data(),
|
||||
|
@ -41,36 +45,24 @@ std::string Shader::GetTranslatedBinaryString() const {
|
|||
return result;
|
||||
}
|
||||
|
||||
std::pair<std::filesystem::path, std::filesystem::path> Shader::Dump(
|
||||
std::filesystem::path Shader::Translation::Dump(
|
||||
const std::filesystem::path& base_path, const char* path_prefix) {
|
||||
std::filesystem::path path = base_path;
|
||||
// Ensure target path exists.
|
||||
auto target_path = base_path;
|
||||
if (!target_path.empty()) {
|
||||
target_path = std::filesystem::absolute(target_path);
|
||||
std::filesystem::create_directories(target_path);
|
||||
if (!path.empty()) {
|
||||
path = std::filesystem::absolute(path);
|
||||
std::filesystem::create_directories(path);
|
||||
}
|
||||
|
||||
auto base_name =
|
||||
fmt::format("shader_{}_{:016X}", path_prefix, ucode_data_hash_);
|
||||
|
||||
std::string txt_name, bin_name;
|
||||
if (shader_type_ == xenos::ShaderType::kVertex) {
|
||||
txt_name = base_name + ".vert";
|
||||
bin_name = base_name + ".bin.vert";
|
||||
} else {
|
||||
txt_name = base_name + ".frag";
|
||||
bin_name = base_name + ".bin.frag";
|
||||
}
|
||||
|
||||
std::filesystem::path txt_path, bin_path;
|
||||
txt_path = base_path / txt_name;
|
||||
bin_path = base_path / bin_name;
|
||||
|
||||
FILE* f = filesystem::OpenFile(txt_path, "wb");
|
||||
path = path /
|
||||
fmt::format(
|
||||
"shader_{:016X}_{:08X}.{}.{}", shader().ucode_data_hash(),
|
||||
modification(), path_prefix,
|
||||
shader().type() == xenos::ShaderType::kVertex ? "vert" : "frag");
|
||||
FILE* f = filesystem::OpenFile(path, "wb");
|
||||
if (f) {
|
||||
fwrite(translated_binary_.data(), 1, translated_binary_.size(), f);
|
||||
fprintf(f, "\n\n");
|
||||
auto ucode_disasm_ptr = ucode_disassembly().c_str();
|
||||
auto ucode_disasm_ptr = shader().ucode_disassembly().c_str();
|
||||
while (*ucode_disasm_ptr) {
|
||||
auto line_end = std::strchr(ucode_disasm_ptr, '\n');
|
||||
fprintf(f, "// ");
|
||||
|
@ -83,14 +75,58 @@ std::pair<std::filesystem::path, std::filesystem::path> Shader::Dump(
|
|||
}
|
||||
fclose(f);
|
||||
}
|
||||
return std::move(path);
|
||||
}
|
||||
|
||||
f = filesystem::OpenFile(bin_path, "wb");
|
||||
Shader::Translation* Shader::GetOrCreateTranslation(uint32_t modification,
|
||||
bool* is_new) {
|
||||
auto it = translations_.find(modification);
|
||||
if (it != translations_.end()) {
|
||||
if (is_new) {
|
||||
*is_new = false;
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
Translation* translation = CreateTranslationInstance(modification);
|
||||
translations_.emplace(modification, translation);
|
||||
if (is_new) {
|
||||
*is_new = true;
|
||||
}
|
||||
return translation;
|
||||
}
|
||||
|
||||
void Shader::DestroyTranslation(uint32_t modification) {
|
||||
auto it = translations_.find(modification);
|
||||
if (it == translations_.end()) {
|
||||
return;
|
||||
}
|
||||
delete it->second;
|
||||
translations_.erase(it);
|
||||
}
|
||||
|
||||
std::filesystem::path Shader::DumpUcodeBinary(
|
||||
const std::filesystem::path& base_path) {
|
||||
// Ensure target path exists.
|
||||
std::filesystem::path path = base_path;
|
||||
if (!path.empty()) {
|
||||
path = std::filesystem::absolute(path);
|
||||
std::filesystem::create_directories(path);
|
||||
}
|
||||
path = path /
|
||||
fmt::format("shader_{:016X}.ucode.bin.{}", ucode_data_hash(),
|
||||
type() == xenos::ShaderType::kVertex ? "vert" : "frag");
|
||||
|
||||
FILE* f = filesystem::OpenFile(path, "wb");
|
||||
if (f) {
|
||||
fwrite(ucode_data_.data(), 4, ucode_data_.size(), f);
|
||||
fwrite(ucode_data().data(), 4, ucode_data().size(), f);
|
||||
fclose(f);
|
||||
}
|
||||
return std::move(path);
|
||||
}
|
||||
|
||||
return {std::move(txt_path), std::move(bin_path)};
|
||||
Shader::Translation* Shader::CreateTranslationInstance(uint32_t modification) {
|
||||
// Default implementation for simple cases like ucode disassembly.
|
||||
return new Translation(*this, modification);
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
|
|
|
@ -11,8 +11,12 @@
|
|||
#define XENIA_GPU_SHADER_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <filesystem>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/math.h"
|
||||
|
@ -591,6 +595,8 @@ struct ParsedAluInstruction {
|
|||
|
||||
class Shader {
|
||||
public:
|
||||
// Type of the vertex shader in a D3D11-like rendering pipeline - shader
|
||||
// interface depends on in, so it must be known at translation time.
|
||||
// If values are changed, INVALIDATE SHADER STORAGES (increase their version
|
||||
// constexpr) where those are stored! And check bit count where this is
|
||||
// packed. This is : uint32_t for simplicity of packing in bit fields.
|
||||
|
@ -603,6 +609,8 @@ class Shader {
|
|||
kQuadDomainCPIndexed,
|
||||
kQuadDomainPatchIndexed,
|
||||
};
|
||||
// For packing HostVertexShaderType in bit fields.
|
||||
static constexpr uint32_t kHostVertexShaderTypeBitCount = 3;
|
||||
|
||||
struct Error {
|
||||
bool is_fatal = false;
|
||||
|
@ -683,6 +691,70 @@ class Shader {
|
|||
}
|
||||
};
|
||||
|
||||
class Translation {
|
||||
public:
|
||||
virtual ~Translation() {}
|
||||
|
||||
Shader& shader() const { return shader_; }
|
||||
|
||||
// Translator-specific modification bits.
|
||||
uint32_t modification() const { return modification_; }
|
||||
|
||||
// True if the shader was translated and prepared without error.
|
||||
bool is_valid() const { return is_valid_; }
|
||||
|
||||
// True if the shader has already been translated.
|
||||
bool is_translated() const { return is_translated_; }
|
||||
|
||||
// Errors that occurred during translation.
|
||||
const std::vector<Error>& errors() const { return errors_; }
|
||||
|
||||
// Translated shader binary (or text).
|
||||
const std::vector<uint8_t>& translated_binary() const {
|
||||
return translated_binary_;
|
||||
}
|
||||
|
||||
// Gets the translated shader binary as a string.
|
||||
// This is only valid if it is actually text.
|
||||
std::string GetTranslatedBinaryString() const;
|
||||
|
||||
// Disassembly of the translated from the host graphics layer.
|
||||
// May be empty if the host does not support disassembly.
|
||||
const std::string& host_disassembly() const { return host_disassembly_; }
|
||||
|
||||
// In case disassembly depends on the GPU backend, for setting it
|
||||
// externally.
|
||||
void set_host_disassembly(std::string disassembly) {
|
||||
host_disassembly_ = std::move(disassembly);
|
||||
}
|
||||
|
||||
// For dumping after translation. Dumps the shader's disassembled microcode,
|
||||
// translated code, and, if available, translated disassembly, to a file in
|
||||
// the given path based on ucode hash. Returns the name of the written file.
|
||||
std::filesystem::path Dump(const std::filesystem::path& base_path,
|
||||
const char* path_prefix);
|
||||
|
||||
protected:
|
||||
Translation(Shader& shader, uint32_t modification)
|
||||
: shader_(shader), modification_(modification) {}
|
||||
|
||||
// If there was some failure during preparation on the implementation side.
|
||||
void MakeInvalid() { is_valid_ = false; }
|
||||
|
||||
private:
|
||||
friend class Shader;
|
||||
friend class ShaderTranslator;
|
||||
|
||||
Shader& shader_;
|
||||
uint32_t modification_;
|
||||
|
||||
bool is_valid_ = false;
|
||||
bool is_translated_ = false;
|
||||
std::vector<Error> errors_;
|
||||
std::vector<uint8_t> translated_binary_;
|
||||
std::string host_disassembly_;
|
||||
};
|
||||
|
||||
Shader(xenos::ShaderType shader_type, uint64_t ucode_data_hash,
|
||||
const uint32_t* ucode_dwords, size_t ucode_dword_count);
|
||||
virtual ~Shader();
|
||||
|
@ -690,19 +762,30 @@ class Shader {
|
|||
// Whether the shader is identified as a vertex or pixel shader.
|
||||
xenos::ShaderType type() const { return shader_type_; }
|
||||
|
||||
// If this is a vertex shader, and it has been translated, type of the shader
|
||||
// in a D3D11-like rendering pipeline - shader interface depends on in, so it
|
||||
// must be known at translation time.
|
||||
HostVertexShaderType host_vertex_shader_type() const {
|
||||
return host_vertex_shader_type_;
|
||||
}
|
||||
|
||||
// Microcode dwords in host endianness.
|
||||
const std::vector<uint32_t>& ucode_data() const { return ucode_data_; }
|
||||
uint64_t ucode_data_hash() const { return ucode_data_hash_; }
|
||||
const uint32_t* ucode_dwords() const { return ucode_data_.data(); }
|
||||
size_t ucode_dword_count() const { return ucode_data_.size(); }
|
||||
|
||||
// Host translations with the specified modification bits. Not thread-safe
|
||||
// with respect to translation creation/destruction.
|
||||
const std::unordered_map<uint32_t, Translation*>& translations() const {
|
||||
return translations_;
|
||||
}
|
||||
Translation* GetTranslation(uint32_t modification) const {
|
||||
auto it = translations_.find(modification);
|
||||
if (it != translations_.cend()) {
|
||||
return it->second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
Translation* GetOrCreateTranslation(uint32_t modification,
|
||||
bool* is_new = nullptr);
|
||||
// For shader storage loading, to remove a modification in case of translation
|
||||
// failure. Not thread-safe.
|
||||
void DestroyTranslation(uint32_t modification);
|
||||
|
||||
// All vertex bindings used in the shader.
|
||||
// Valid for vertex shaders only.
|
||||
const std::vector<VertexBinding>& vertex_bindings() const {
|
||||
|
@ -733,73 +816,55 @@ class Shader {
|
|||
// True if the shader overrides the pixel depth.
|
||||
bool writes_depth() const { return writes_depth_; }
|
||||
|
||||
// True if Xenia can automatically enable early depth/stencil for the pixel
|
||||
// shader when RB_DEPTHCONTROL EARLY_Z_ENABLE is not set, provided alpha
|
||||
// testing and alpha to coverage are disabled.
|
||||
bool implicit_early_z_allowed() const { return implicit_early_z_allowed_; }
|
||||
|
||||
// True if the shader was translated and prepared without error.
|
||||
bool is_valid() const { return is_valid_; }
|
||||
|
||||
// True if the shader has already been translated.
|
||||
bool is_translated() const { return is_translated_; }
|
||||
|
||||
// Errors that occurred during translation.
|
||||
const std::vector<Error>& errors() const { return errors_; }
|
||||
// True if the current shader has any `kill` instructions.
|
||||
bool kills_pixels() const { return kills_pixels_; }
|
||||
|
||||
// Microcode disassembly in D3D format.
|
||||
const std::string& ucode_disassembly() const { return ucode_disassembly_; }
|
||||
|
||||
// Translated shader binary (or text).
|
||||
const std::vector<uint8_t>& translated_binary() const {
|
||||
return translated_binary_;
|
||||
// An externally managed identifier of the shader storage the microcode of the
|
||||
// shader was last written to, or was loaded from, to only write the shader
|
||||
// microcode to the storage once. UINT32_MAX by default.
|
||||
uint32_t ucode_storage_index() const { return ucode_storage_index_; }
|
||||
void set_ucode_storage_index(uint32_t storage_index) {
|
||||
ucode_storage_index_ = storage_index;
|
||||
}
|
||||
|
||||
// Gets the translated shader binary as a string.
|
||||
// This is only valid if it is actually text.
|
||||
std::string GetTranslatedBinaryString() const;
|
||||
|
||||
// Disassembly of the translated from the host graphics layer.
|
||||
// May be empty if the host does not support disassembly.
|
||||
const std::string& host_disassembly() const { return host_disassembly_; }
|
||||
// A lot of errors that occurred during preparation of the host shader.
|
||||
const std::string& host_error_log() const { return host_error_log_; }
|
||||
// Host binary that can be saved and reused across runs.
|
||||
// May be empty if the host does not support saving binaries.
|
||||
const std::vector<uint8_t>& host_binary() const { return host_binary_; }
|
||||
|
||||
// Dumps the shader to a file in the given path based on ucode hash.
|
||||
// Both the ucode binary and disassembled and translated shader will be
|
||||
// written.
|
||||
// Returns the filename of the shader and the binary.
|
||||
std::pair<std::filesystem::path, std::filesystem::path> Dump(
|
||||
const std::filesystem::path& base_path, const char* path_prefix);
|
||||
// Dumps the shader's microcode binary to a file in the given path based on
|
||||
// ucode hash. Returns the name of the written file. Can be called at any
|
||||
// time, doesn't require the shader to be translated.
|
||||
std::filesystem::path DumpUcodeBinary(const std::filesystem::path& base_path);
|
||||
|
||||
protected:
|
||||
friend class ShaderTranslator;
|
||||
|
||||
virtual Translation* CreateTranslationInstance(uint32_t modification);
|
||||
|
||||
xenos::ShaderType shader_type_;
|
||||
HostVertexShaderType host_vertex_shader_type_ = HostVertexShaderType::kVertex;
|
||||
std::vector<uint32_t> ucode_data_;
|
||||
uint64_t ucode_data_hash_;
|
||||
|
||||
// Modification bits -> translation.
|
||||
std::unordered_map<uint32_t, Translation*> translations_;
|
||||
|
||||
// Whether setup of the post-translation parameters (listed below, plus those
|
||||
// specific to the implementation) has been initiated, by any thread. If
|
||||
// translation is performed on multiple threads, only one thread must be
|
||||
// setting this up (other threads would write the same data anyway).
|
||||
std::atomic_flag post_translation_info_set_up_ = ATOMIC_FLAG_INIT;
|
||||
|
||||
// Initialized after the first successful translation (these don't depend on
|
||||
// the host-side modification bits).
|
||||
std::string ucode_disassembly_;
|
||||
std::vector<VertexBinding> vertex_bindings_;
|
||||
std::vector<TextureBinding> texture_bindings_;
|
||||
ConstantRegisterMap constant_register_map_ = {0};
|
||||
bool writes_color_targets_[4] = {false, false, false, false};
|
||||
bool writes_depth_ = false;
|
||||
bool implicit_early_z_allowed_ = true;
|
||||
bool kills_pixels_ = false;
|
||||
std::vector<uint32_t> memexport_stream_constants_;
|
||||
|
||||
bool is_valid_ = false;
|
||||
bool is_translated_ = false;
|
||||
std::vector<Error> errors_;
|
||||
|
||||
std::string ucode_disassembly_;
|
||||
std::vector<uint8_t> translated_binary_;
|
||||
std::string host_disassembly_;
|
||||
std::string host_error_log_;
|
||||
std::vector<uint8_t> host_binary_;
|
||||
uint32_t ucode_storage_index_ = UINT32_MAX;
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
|
|
|
@ -144,11 +144,15 @@ int shader_compiler_main(const std::vector<std::string>& args) {
|
|||
Shader::HostVertexShaderType::kQuadDomainPatchIndexed;
|
||||
}
|
||||
}
|
||||
uint32_t modification =
|
||||
translator->GetDefaultModification(shader_type, host_vertex_shader_type);
|
||||
|
||||
translator->Translate(shader.get(), host_vertex_shader_type);
|
||||
Shader::Translation* translation =
|
||||
shader->GetOrCreateTranslation(modification);
|
||||
translator->Translate(*translation);
|
||||
|
||||
const void* source_data = shader->translated_binary().data();
|
||||
size_t source_data_size = shader->translated_binary().size();
|
||||
const void* source_data = translation->translated_binary().data();
|
||||
size_t source_data_size = translation->translated_binary().size();
|
||||
|
||||
std::string spirv_disasm;
|
||||
if (cvars::shader_output_type == "spirvtext") {
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
#include "shader_translator.h"
|
||||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
|
@ -14,6 +13,7 @@
|
|||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
|
||||
|
@ -46,7 +46,9 @@ ShaderTranslator::ShaderTranslator() = default;
|
|||
|
||||
ShaderTranslator::~ShaderTranslator() = default;
|
||||
|
||||
void ShaderTranslator::Reset() {
|
||||
void ShaderTranslator::Reset(xenos::ShaderType shader_type) {
|
||||
shader_type_ = shader_type;
|
||||
modification_ = GetDefaultModification(shader_type);
|
||||
errors_.clear();
|
||||
ucode_disasm_buffer_.Reset();
|
||||
ucode_disasm_line_number_ = 0;
|
||||
|
@ -64,37 +66,37 @@ void ShaderTranslator::Reset() {
|
|||
writes_color_targets_[i] = false;
|
||||
}
|
||||
writes_depth_ = false;
|
||||
implicit_early_z_allowed_ = true;
|
||||
kills_pixels_ = false;
|
||||
memexport_alloc_count_ = 0;
|
||||
memexport_eA_written_ = 0;
|
||||
std::memset(&memexport_eM_written_, 0, sizeof(memexport_eM_written_));
|
||||
memexport_stream_constants_.clear();
|
||||
}
|
||||
|
||||
bool ShaderTranslator::Translate(
|
||||
Shader* shader, reg::SQ_PROGRAM_CNTL cntl,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type) {
|
||||
Reset();
|
||||
uint32_t cntl_num_reg = shader->type() == xenos::ShaderType::kVertex
|
||||
bool ShaderTranslator::Translate(Shader::Translation& translation,
|
||||
reg::SQ_PROGRAM_CNTL cntl) {
|
||||
xenos::ShaderType shader_type = translation.shader().type();
|
||||
Reset(shader_type);
|
||||
uint32_t cntl_num_reg = shader_type == xenos::ShaderType::kVertex
|
||||
? cntl.vs_num_reg
|
||||
: cntl.ps_num_reg;
|
||||
register_count_ = (cntl_num_reg & 0x80) ? 0 : (cntl_num_reg + 1);
|
||||
|
||||
return TranslateInternal(shader, host_vertex_shader_type);
|
||||
return TranslateInternal(translation);
|
||||
}
|
||||
|
||||
bool ShaderTranslator::Translate(
|
||||
Shader* shader, Shader::HostVertexShaderType host_vertex_shader_type) {
|
||||
Reset();
|
||||
return TranslateInternal(shader, host_vertex_shader_type);
|
||||
bool ShaderTranslator::Translate(Shader::Translation& translation) {
|
||||
Reset(translation.shader().type());
|
||||
return TranslateInternal(translation);
|
||||
}
|
||||
|
||||
bool ShaderTranslator::TranslateInternal(
|
||||
Shader* shader, Shader::HostVertexShaderType host_vertex_shader_type) {
|
||||
shader_type_ = shader->type();
|
||||
host_vertex_shader_type_ = host_vertex_shader_type;
|
||||
ucode_dwords_ = shader->ucode_dwords();
|
||||
ucode_dword_count_ = shader->ucode_dword_count();
|
||||
bool ShaderTranslator::TranslateInternal(Shader::Translation& translation) {
|
||||
Shader& shader = translation.shader();
|
||||
assert_true(shader_type_ == shader.type());
|
||||
shader_type_ = shader.type();
|
||||
ucode_dwords_ = shader.ucode_dwords();
|
||||
ucode_dword_count_ = shader.ucode_dword_count();
|
||||
modification_ = translation.modification();
|
||||
|
||||
// Control flow instructions come paired in blocks of 3 dwords and all are
|
||||
// listed at the top of the ucode.
|
||||
|
@ -147,12 +149,6 @@ bool ShaderTranslator::TranslateInternal(
|
|||
if (memexport_eA_written_ == 0) {
|
||||
memexport_stream_constants_.clear();
|
||||
}
|
||||
if (!memexport_stream_constants_.empty()) {
|
||||
// TODO(Triang3l): Investigate what happens to memexport when the pixel
|
||||
// fails the depth/stencil test, but in Direct3D 11 UAV writes disable early
|
||||
// depth/stencil.
|
||||
implicit_early_z_allowed_ = false;
|
||||
}
|
||||
|
||||
StartTranslation();
|
||||
|
||||
|
@ -187,35 +183,44 @@ bool ShaderTranslator::TranslateInternal(
|
|||
++cf_index;
|
||||
}
|
||||
|
||||
shader->errors_ = std::move(errors_);
|
||||
shader->translated_binary_ = CompleteTranslation();
|
||||
shader->ucode_disassembly_ = ucode_disasm_buffer_.to_string();
|
||||
shader->host_vertex_shader_type_ = host_vertex_shader_type_;
|
||||
shader->vertex_bindings_ = std::move(vertex_bindings_);
|
||||
shader->texture_bindings_ = std::move(texture_bindings_);
|
||||
shader->constant_register_map_ = std::move(constant_register_map_);
|
||||
for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) {
|
||||
shader->writes_color_targets_[i] = writes_color_targets_[i];
|
||||
}
|
||||
shader->writes_depth_ = writes_depth_;
|
||||
shader->implicit_early_z_allowed_ = implicit_early_z_allowed_;
|
||||
shader->memexport_stream_constants_.clear();
|
||||
for (uint32_t memexport_stream_constant : memexport_stream_constants_) {
|
||||
shader->memexport_stream_constants_.push_back(memexport_stream_constant);
|
||||
}
|
||||
translation.errors_ = std::move(errors_);
|
||||
translation.translated_binary_ = CompleteTranslation();
|
||||
translation.is_translated_ = true;
|
||||
|
||||
shader->is_valid_ = true;
|
||||
shader->is_translated_ = true;
|
||||
for (const auto& error : shader->errors_) {
|
||||
bool is_valid = true;
|
||||
for (const auto& error : translation.errors_) {
|
||||
if (error.is_fatal) {
|
||||
shader->is_valid_ = false;
|
||||
is_valid = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
translation.is_valid_ = is_valid;
|
||||
|
||||
PostTranslation(shader);
|
||||
// Setup info that doesn't depend on the modification only once.
|
||||
bool setup_shader_post_translation_info =
|
||||
is_valid && !shader.post_translation_info_set_up_.test_and_set();
|
||||
if (setup_shader_post_translation_info) {
|
||||
shader.ucode_disassembly_ = ucode_disasm_buffer_.to_string();
|
||||
shader.vertex_bindings_ = std::move(vertex_bindings_);
|
||||
shader.texture_bindings_ = std::move(texture_bindings_);
|
||||
shader.constant_register_map_ = std::move(constant_register_map_);
|
||||
for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) {
|
||||
shader.writes_color_targets_[i] = writes_color_targets_[i];
|
||||
}
|
||||
shader.writes_depth_ = writes_depth_;
|
||||
shader.kills_pixels_ = kills_pixels_;
|
||||
shader.memexport_stream_constants_.clear();
|
||||
shader.memexport_stream_constants_.reserve(
|
||||
memexport_stream_constants_.size());
|
||||
shader.memexport_stream_constants_.insert(
|
||||
shader.memexport_stream_constants_.cend(),
|
||||
memexport_stream_constants_.cbegin(),
|
||||
memexport_stream_constants_.cend());
|
||||
}
|
||||
PostTranslation(translation, setup_shader_post_translation_info);
|
||||
|
||||
return shader->is_valid_;
|
||||
// In case is_valid_ is modified by PostTranslation, reload.
|
||||
return translation.is_valid_;
|
||||
}
|
||||
|
||||
void ShaderTranslator::MarkUcodeInstruction(uint32_t dword_offset) {
|
||||
|
@ -338,14 +343,9 @@ void ShaderTranslator::GatherInstructionInformation(
|
|||
ParsedAluInstruction instr;
|
||||
ParseAluInstruction(op, instr);
|
||||
|
||||
const auto& vector_opcode_info =
|
||||
alu_vector_opcode_infos_[uint32_t(op.vector_opcode())];
|
||||
implicit_early_z_allowed_ &=
|
||||
!vector_opcode_info.disable_implicit_early_z;
|
||||
const auto& scalar_opcode_info =
|
||||
alu_scalar_opcode_infos_[uint32_t(op.scalar_opcode())];
|
||||
implicit_early_z_allowed_ &=
|
||||
!scalar_opcode_info.disable_implicit_early_z;
|
||||
kills_pixels_ = kills_pixels_ ||
|
||||
ucode::AluVectorOpcodeIsKill(op.vector_opcode()) ||
|
||||
ucode::AluScalarOpcodeIsKill(op.scalar_opcode());
|
||||
|
||||
if (instr.vector_and_constant_result.storage_target !=
|
||||
InstructionStorageTarget::kRegister ||
|
||||
|
@ -403,7 +403,6 @@ void ShaderTranslator::GatherInstructionInformation(
|
|||
break;
|
||||
case InstructionStorageTarget::kDepth:
|
||||
writes_depth_ = true;
|
||||
implicit_early_z_allowed_ = false;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -1077,91 +1076,91 @@ uint32_t ParsedTextureFetchInstruction::GetNonZeroResultComponents() const {
|
|||
|
||||
const ShaderTranslator::AluOpcodeInfo
|
||||
ShaderTranslator::alu_vector_opcode_infos_[0x20] = {
|
||||
{"add", 2, 4, false}, // 0
|
||||
{"mul", 2, 4, false}, // 1
|
||||
{"max", 2, 4, false}, // 2
|
||||
{"min", 2, 4, false}, // 3
|
||||
{"seq", 2, 4, false}, // 4
|
||||
{"sgt", 2, 4, false}, // 5
|
||||
{"sge", 2, 4, false}, // 6
|
||||
{"sne", 2, 4, false}, // 7
|
||||
{"frc", 1, 4, false}, // 8
|
||||
{"trunc", 1, 4, false}, // 9
|
||||
{"floor", 1, 4, false}, // 10
|
||||
{"mad", 3, 4, false}, // 11
|
||||
{"cndeq", 3, 4, false}, // 12
|
||||
{"cndge", 3, 4, false}, // 13
|
||||
{"cndgt", 3, 4, false}, // 14
|
||||
{"dp4", 2, 4, false}, // 15
|
||||
{"dp3", 2, 4, false}, // 16
|
||||
{"dp2add", 3, 4, false}, // 17
|
||||
{"cube", 2, 4, false}, // 18
|
||||
{"max4", 1, 4, false}, // 19
|
||||
{"setp_eq_push", 2, 4, false}, // 20
|
||||
{"setp_ne_push", 2, 4, false}, // 21
|
||||
{"setp_gt_push", 2, 4, false}, // 22
|
||||
{"setp_ge_push", 2, 4, false}, // 23
|
||||
{"kill_eq", 2, 4, true}, // 24
|
||||
{"kill_gt", 2, 4, true}, // 25
|
||||
{"kill_ge", 2, 4, true}, // 26
|
||||
{"kill_ne", 2, 4, true}, // 27
|
||||
{"dst", 2, 4, false}, // 28
|
||||
{"maxa", 2, 4, false}, // 29
|
||||
{"add", 2, 4}, // 0
|
||||
{"mul", 2, 4}, // 1
|
||||
{"max", 2, 4}, // 2
|
||||
{"min", 2, 4}, // 3
|
||||
{"seq", 2, 4}, // 4
|
||||
{"sgt", 2, 4}, // 5
|
||||
{"sge", 2, 4}, // 6
|
||||
{"sne", 2, 4}, // 7
|
||||
{"frc", 1, 4}, // 8
|
||||
{"trunc", 1, 4}, // 9
|
||||
{"floor", 1, 4}, // 10
|
||||
{"mad", 3, 4}, // 11
|
||||
{"cndeq", 3, 4}, // 12
|
||||
{"cndge", 3, 4}, // 13
|
||||
{"cndgt", 3, 4}, // 14
|
||||
{"dp4", 2, 4}, // 15
|
||||
{"dp3", 2, 4}, // 16
|
||||
{"dp2add", 3, 4}, // 17
|
||||
{"cube", 2, 4}, // 18
|
||||
{"max4", 1, 4}, // 19
|
||||
{"setp_eq_push", 2, 4}, // 20
|
||||
{"setp_ne_push", 2, 4}, // 21
|
||||
{"setp_gt_push", 2, 4}, // 22
|
||||
{"setp_ge_push", 2, 4}, // 23
|
||||
{"kill_eq", 2, 4}, // 24
|
||||
{"kill_gt", 2, 4}, // 25
|
||||
{"kill_ge", 2, 4}, // 26
|
||||
{"kill_ne", 2, 4}, // 27
|
||||
{"dst", 2, 4}, // 28
|
||||
{"maxa", 2, 4}, // 29
|
||||
};
|
||||
|
||||
const ShaderTranslator::AluOpcodeInfo
|
||||
ShaderTranslator::alu_scalar_opcode_infos_[0x40] = {
|
||||
{"adds", 1, 2, false}, // 0
|
||||
{"adds_prev", 1, 1, false}, // 1
|
||||
{"muls", 1, 2, false}, // 2
|
||||
{"muls_prev", 1, 1, false}, // 3
|
||||
{"muls_prev2", 1, 2, false}, // 4
|
||||
{"maxs", 1, 2, false}, // 5
|
||||
{"mins", 1, 2, false}, // 6
|
||||
{"seqs", 1, 1, false}, // 7
|
||||
{"sgts", 1, 1, false}, // 8
|
||||
{"sges", 1, 1, false}, // 9
|
||||
{"snes", 1, 1, false}, // 10
|
||||
{"frcs", 1, 1, false}, // 11
|
||||
{"truncs", 1, 1, false}, // 12
|
||||
{"floors", 1, 1, false}, // 13
|
||||
{"exp", 1, 1, false}, // 14
|
||||
{"logc", 1, 1, false}, // 15
|
||||
{"log", 1, 1, false}, // 16
|
||||
{"rcpc", 1, 1, false}, // 17
|
||||
{"rcpf", 1, 1, false}, // 18
|
||||
{"rcp", 1, 1, false}, // 19
|
||||
{"rsqc", 1, 1, false}, // 20
|
||||
{"rsqf", 1, 1, false}, // 21
|
||||
{"rsq", 1, 1, false}, // 22
|
||||
{"maxas", 1, 2, false}, // 23
|
||||
{"maxasf", 1, 2, false}, // 24
|
||||
{"subs", 1, 2, false}, // 25
|
||||
{"subs_prev", 1, 1, false}, // 26
|
||||
{"setp_eq", 1, 1, false}, // 27
|
||||
{"setp_ne", 1, 1, false}, // 28
|
||||
{"setp_gt", 1, 1, false}, // 29
|
||||
{"setp_ge", 1, 1, false}, // 30
|
||||
{"setp_inv", 1, 1, false}, // 31
|
||||
{"setp_pop", 1, 1, false}, // 32
|
||||
{"setp_clr", 0, 0, false}, // 33
|
||||
{"setp_rstr", 1, 1, false}, // 34
|
||||
{"kills_eq", 1, 1, true}, // 35
|
||||
{"kills_gt", 1, 1, true}, // 36
|
||||
{"kills_ge", 1, 1, true}, // 37
|
||||
{"kills_ne", 1, 1, true}, // 38
|
||||
{"kills_one", 1, 1, true}, // 39
|
||||
{"sqrt", 1, 1, false}, // 40
|
||||
{"UNKNOWN", 0, 0, false}, // 41
|
||||
{"mulsc", 2, 1, false}, // 42
|
||||
{"mulsc", 2, 1, false}, // 43
|
||||
{"addsc", 2, 1, false}, // 44
|
||||
{"addsc", 2, 1, false}, // 45
|
||||
{"subsc", 2, 1, false}, // 46
|
||||
{"subsc", 2, 1, false}, // 47
|
||||
{"sin", 1, 1, false}, // 48
|
||||
{"cos", 1, 1, false}, // 49
|
||||
{"retain_prev", 0, 0, false}, // 50
|
||||
{"adds", 1, 2}, // 0
|
||||
{"adds_prev", 1, 1}, // 1
|
||||
{"muls", 1, 2}, // 2
|
||||
{"muls_prev", 1, 1}, // 3
|
||||
{"muls_prev2", 1, 2}, // 4
|
||||
{"maxs", 1, 2}, // 5
|
||||
{"mins", 1, 2}, // 6
|
||||
{"seqs", 1, 1}, // 7
|
||||
{"sgts", 1, 1}, // 8
|
||||
{"sges", 1, 1}, // 9
|
||||
{"snes", 1, 1}, // 10
|
||||
{"frcs", 1, 1}, // 11
|
||||
{"truncs", 1, 1}, // 12
|
||||
{"floors", 1, 1}, // 13
|
||||
{"exp", 1, 1}, // 14
|
||||
{"logc", 1, 1}, // 15
|
||||
{"log", 1, 1}, // 16
|
||||
{"rcpc", 1, 1}, // 17
|
||||
{"rcpf", 1, 1}, // 18
|
||||
{"rcp", 1, 1}, // 19
|
||||
{"rsqc", 1, 1}, // 20
|
||||
{"rsqf", 1, 1}, // 21
|
||||
{"rsq", 1, 1}, // 22
|
||||
{"maxas", 1, 2}, // 23
|
||||
{"maxasf", 1, 2}, // 24
|
||||
{"subs", 1, 2}, // 25
|
||||
{"subs_prev", 1, 1}, // 26
|
||||
{"setp_eq", 1, 1}, // 27
|
||||
{"setp_ne", 1, 1}, // 28
|
||||
{"setp_gt", 1, 1}, // 29
|
||||
{"setp_ge", 1, 1}, // 30
|
||||
{"setp_inv", 1, 1}, // 31
|
||||
{"setp_pop", 1, 1}, // 32
|
||||
{"setp_clr", 0, 0}, // 33
|
||||
{"setp_rstr", 1, 1}, // 34
|
||||
{"kills_eq", 1, 1}, // 35
|
||||
{"kills_gt", 1, 1}, // 36
|
||||
{"kills_ge", 1, 1}, // 37
|
||||
{"kills_ne", 1, 1}, // 38
|
||||
{"kills_one", 1, 1}, // 39
|
||||
{"sqrt", 1, 1}, // 40
|
||||
{"UNKNOWN", 0, 0}, // 41
|
||||
{"mulsc", 2, 1}, // 42
|
||||
{"mulsc", 2, 1}, // 43
|
||||
{"addsc", 2, 1}, // 44
|
||||
{"addsc", 2, 1}, // 45
|
||||
{"subsc", 2, 1}, // 46
|
||||
{"subsc", 2, 1}, // 47
|
||||
{"sin", 1, 1}, // 48
|
||||
{"cos", 1, 1}, // 49
|
||||
{"retain_prev", 0, 0}, // 50
|
||||
};
|
||||
|
||||
void ShaderTranslator::TranslateAluInstruction(const AluInstruction& op) {
|
||||
|
|
|
@ -29,18 +29,27 @@ class ShaderTranslator {
|
|||
public:
|
||||
virtual ~ShaderTranslator();
|
||||
|
||||
bool Translate(Shader* shader, reg::SQ_PROGRAM_CNTL cntl,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
Shader::HostVertexShaderType::kVertex);
|
||||
bool Translate(Shader* shader,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
Shader::HostVertexShaderType::kVertex);
|
||||
virtual uint32_t GetDefaultModification(
|
||||
xenos::ShaderType shader_type,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
Shader::HostVertexShaderType::kVertex) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool Translate(Shader::Translation& translation, reg::SQ_PROGRAM_CNTL cntl);
|
||||
bool Translate(Shader::Translation& translation);
|
||||
|
||||
protected:
|
||||
ShaderTranslator();
|
||||
|
||||
// Resets translator state before beginning translation.
|
||||
virtual void Reset();
|
||||
// shader_type is passed here so translator implementations can generate
|
||||
// special fixed shaders for internal use, and set up the type for this
|
||||
// purpose.
|
||||
virtual void Reset(xenos::ShaderType shader_type);
|
||||
|
||||
// Current host-side modification being generated.
|
||||
uint32_t modification() const { return modification_; }
|
||||
|
||||
// Register count.
|
||||
uint32_t register_count() const { return register_count_; }
|
||||
|
@ -48,11 +57,6 @@ class ShaderTranslator {
|
|||
bool is_vertex_shader() const {
|
||||
return shader_type_ == xenos::ShaderType::kVertex;
|
||||
}
|
||||
// If translating a vertex shader, type of the shader in a D3D11-like
|
||||
// rendering pipeline.
|
||||
Shader::HostVertexShaderType host_vertex_shader_type() const {
|
||||
return host_vertex_shader_type_;
|
||||
}
|
||||
// True if the current shader is a pixel shader.
|
||||
bool is_pixel_shader() const {
|
||||
return shader_type_ == xenos::ShaderType::kPixel;
|
||||
|
@ -85,10 +89,8 @@ class ShaderTranslator {
|
|||
// True if the current shader overrides the pixel depth, set before
|
||||
// translation. Doesn't include writes with an empty used write mask.
|
||||
bool writes_depth() const { return writes_depth_; }
|
||||
// True if Xenia can automatically enable early depth/stencil for the pixel
|
||||
// shader when RB_DEPTHCONTROL EARLY_Z_ENABLE is not set, provided alpha
|
||||
// testing and alpha to coverage are disabled.
|
||||
bool implicit_early_z_allowed() const { return implicit_early_z_allowed_; }
|
||||
// True if the current shader has any `kill` instructions.
|
||||
bool kills_pixels() const { return kills_pixels_; }
|
||||
// A list of all vertex bindings, populated before translation occurs.
|
||||
const std::vector<Shader::VertexBinding>& vertex_bindings() const {
|
||||
return vertex_bindings_;
|
||||
|
@ -112,6 +114,17 @@ class ShaderTranslator {
|
|||
return memexport_stream_constants_;
|
||||
}
|
||||
|
||||
// Whether the shader can have early depth and stencil writing enabled, unless
|
||||
// alpha test or alpha to coverage is enabled. Data gathered before
|
||||
// translation.
|
||||
bool CanWriteZEarly() const {
|
||||
// TODO(Triang3l): Investigate what happens to memexport when the pixel
|
||||
// fails the depth/stencil test, but in Direct3D 11 UAV writes disable early
|
||||
// depth/stencil.
|
||||
return !writes_depth_ && !kills_pixels_ &&
|
||||
memexport_stream_constants_.empty();
|
||||
}
|
||||
|
||||
// Current line number in the ucode disassembly.
|
||||
size_t ucode_disasm_line_number() const { return ucode_disasm_line_number_; }
|
||||
// Ucode disassembly buffer accumulated during translation.
|
||||
|
@ -130,10 +143,14 @@ class ShaderTranslator {
|
|||
}
|
||||
|
||||
// Handles post-translation tasks when the shader has been fully translated.
|
||||
virtual void PostTranslation(Shader* shader) {}
|
||||
// setup_shader_post_translation_info if non-modification-specific parameters
|
||||
// of the Shader object behind the Translation can be set by this invocation.
|
||||
virtual void PostTranslation(Shader::Translation& translation,
|
||||
bool setup_shader_post_translation_info) {}
|
||||
// Sets the host disassembly on a shader.
|
||||
void set_host_disassembly(Shader* shader, std::string value) {
|
||||
shader->host_disassembly_ = std::move(value);
|
||||
void set_host_disassembly(Shader::Translation& translation,
|
||||
std::string value) {
|
||||
translation.host_disassembly_ = std::move(value);
|
||||
}
|
||||
|
||||
// Handles translation for control flow label addresses.
|
||||
|
@ -184,11 +201,9 @@ class ShaderTranslator {
|
|||
const char* name;
|
||||
uint32_t argument_count;
|
||||
uint32_t src_swizzle_component_count;
|
||||
bool disable_implicit_early_z;
|
||||
};
|
||||
|
||||
bool TranslateInternal(Shader* shader,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type);
|
||||
bool TranslateInternal(Shader::Translation& translation);
|
||||
|
||||
void MarkUcodeInstruction(uint32_t dword_offset);
|
||||
void AppendUcodeDisasm(char c);
|
||||
|
@ -242,12 +257,13 @@ class ShaderTranslator {
|
|||
|
||||
// Input shader metadata and microcode.
|
||||
xenos::ShaderType shader_type_;
|
||||
Shader::HostVertexShaderType host_vertex_shader_type_;
|
||||
const uint32_t* ucode_dwords_;
|
||||
size_t ucode_dword_count_;
|
||||
reg::SQ_PROGRAM_CNTL program_cntl_;
|
||||
uint32_t register_count_;
|
||||
|
||||
// Current host-side modification being generated.
|
||||
uint32_t modification_ = 0;
|
||||
|
||||
// Accumulated translation errors.
|
||||
std::vector<Shader::Error> errors_;
|
||||
|
||||
|
@ -268,7 +284,8 @@ class ShaderTranslator {
|
|||
// translation.
|
||||
std::set<uint32_t> label_addresses_;
|
||||
|
||||
// Detected binding information gathered before translation.
|
||||
// Detected binding information gathered before translation. Must not be
|
||||
// affected by the modification index.
|
||||
int total_attrib_count_ = 0;
|
||||
std::vector<Shader::VertexBinding> vertex_bindings_;
|
||||
std::vector<Shader::TextureBinding> texture_bindings_;
|
||||
|
@ -278,13 +295,15 @@ class ShaderTranslator {
|
|||
// These all are gathered before translation.
|
||||
// uses_register_dynamic_addressing_ for writes, writes_color_targets_,
|
||||
// writes_depth_ don't include empty used write masks.
|
||||
// Must not be affected by the modification index.
|
||||
Shader::ConstantRegisterMap constant_register_map_ = {0};
|
||||
bool uses_register_dynamic_addressing_ = false;
|
||||
bool writes_color_targets_[4] = {false, false, false, false};
|
||||
bool writes_depth_ = false;
|
||||
bool implicit_early_z_allowed_ = true;
|
||||
bool kills_pixels_ = false;
|
||||
|
||||
// Memexport info is gathered before translation.
|
||||
// Must not be affected by the modification index.
|
||||
uint32_t memexport_alloc_count_ = 0;
|
||||
// For register allocation in implementations - what was used after each
|
||||
// `alloc export`.
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1,296 @@
|
|||
// generated from `xb buildhlsl`
|
||||
// source: edram_load_depth_float24and32.cs.hlsl
|
||||
const uint8_t edram_load_depth_float24and32_cs[] = {
|
||||
0x44, 0x58, 0x42, 0x43, 0xF3, 0xA3, 0xA4, 0x14, 0x0A, 0x50, 0x56, 0x49,
|
||||
0x5D, 0x09, 0x6C, 0xBF, 0x33, 0xC9, 0xC1, 0x9A, 0x01, 0x00, 0x00, 0x00,
|
||||
0xAC, 0x0D, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
|
||||
0x0C, 0x03, 0x00, 0x00, 0x1C, 0x03, 0x00, 0x00, 0x2C, 0x03, 0x00, 0x00,
|
||||
0x10, 0x0D, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD0, 0x02, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0x53, 0x43, 0x00, 0x05, 0x00, 0x00,
|
||||
0xA8, 0x02, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
|
||||
0x24, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xB4, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xCF, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x6C, 0x6F, 0x61,
|
||||
0x64, 0x5F, 0x73, 0x74, 0x6F, 0x72, 0x65, 0x5F, 0x73, 0x6F, 0x75, 0x72,
|
||||
0x63, 0x65, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F,
|
||||
0x6C, 0x6F, 0x61, 0x64, 0x5F, 0x73, 0x74, 0x6F, 0x72, 0x65, 0x5F, 0x64,
|
||||
0x65, 0x73, 0x74, 0x00, 0x58, 0x65, 0x45, 0x64, 0x72, 0x61, 0x6D, 0x4C,
|
||||
0x6F, 0x61, 0x64, 0x53, 0x74, 0x6F, 0x72, 0x65, 0x43, 0x6F, 0x6E, 0x73,
|
||||
0x74, 0x61, 0x6E, 0x74, 0x73, 0x00, 0xAB, 0xAB, 0xE8, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x1C, 0x01, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE4, 0x01, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
|
||||
0x30, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0x00, 0x00, 0x00, 0x00, 0x4E, 0x02, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x69, 0x02, 0x00, 0x00,
|
||||
0x0C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
|
||||
0x83, 0x02, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0x00, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D,
|
||||
0x5F, 0x72, 0x74, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x72, 0x5F, 0x64, 0x65,
|
||||
0x70, 0x74, 0x68, 0x5F, 0x6F, 0x66, 0x66, 0x73, 0x65, 0x74, 0x00, 0x64,
|
||||
0x77, 0x6F, 0x72, 0x64, 0x00, 0xAB, 0xAB, 0xAB, 0x00, 0x00, 0x13, 0x00,
|
||||
0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x03, 0x02, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x65,
|
||||
0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x63, 0x6F, 0x6C, 0x6F,
|
||||
0x72, 0x5F, 0x64, 0x65, 0x70, 0x74, 0x68, 0x5F, 0x70, 0x69, 0x74, 0x63,
|
||||
0x68, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72,
|
||||
0x74, 0x5F, 0x73, 0x74, 0x65, 0x6E, 0x63, 0x69, 0x6C, 0x5F, 0x6F, 0x66,
|
||||
0x66, 0x73, 0x65, 0x74, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61,
|
||||
0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x73, 0x74, 0x65, 0x6E, 0x63, 0x69, 0x6C,
|
||||
0x5F, 0x70, 0x69, 0x74, 0x63, 0x68, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64,
|
||||
0x72, 0x61, 0x6D, 0x5F, 0x62, 0x61, 0x73, 0x65, 0x5F, 0x73, 0x61, 0x6D,
|
||||
0x70, 0x6C, 0x65, 0x73, 0x5F, 0x32, 0x78, 0x5F, 0x64, 0x65, 0x70, 0x74,
|
||||
0x68, 0x5F, 0x70, 0x69, 0x74, 0x63, 0x68, 0x00, 0x4D, 0x69, 0x63, 0x72,
|
||||
0x6F, 0x73, 0x6F, 0x66, 0x74, 0x20, 0x28, 0x52, 0x29, 0x20, 0x48, 0x4C,
|
||||
0x53, 0x4C, 0x20, 0x53, 0x68, 0x61, 0x64, 0x65, 0x72, 0x20, 0x43, 0x6F,
|
||||
0x6D, 0x70, 0x69, 0x6C, 0x65, 0x72, 0x20, 0x31, 0x30, 0x2E, 0x31, 0x00,
|
||||
0x49, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x4F, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x53, 0x48, 0x45, 0x58,
|
||||
0xDC, 0x09, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x77, 0x02, 0x00, 0x00,
|
||||
0x6A, 0x08, 0x00, 0x01, 0x59, 0x00, 0x00, 0x07, 0x46, 0x8E, 0x30, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x06,
|
||||
0x46, 0x7E, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9D, 0x00, 0x00, 0x06,
|
||||
0x46, 0xEE, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x02,
|
||||
0x32, 0x10, 0x02, 0x00, 0x5F, 0x00, 0x00, 0x02, 0x32, 0x20, 0x02, 0x00,
|
||||
0x5F, 0x00, 0x00, 0x02, 0x32, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x02,
|
||||
0x07, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x04, 0x14, 0x00, 0x00, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x06,
|
||||
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0C,
|
||||
0x62, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x80, 0x30, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
|
||||
0xFF, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x04, 0x03,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x06,
|
||||
0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x06,
|
||||
0x82, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A,
|
||||
0xA2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x56, 0x0D, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xD8, 0xFF, 0xFF, 0xFF,
|
||||
0x1E, 0x00, 0x00, 0x07, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x3A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x01,
|
||||
0x55, 0x00, 0x00, 0x09, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
|
||||
0x23, 0x00, 0x00, 0x08, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x10, 0x02, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x06,
|
||||
0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0A, 0x10, 0x02, 0x00, 0x26, 0x00, 0x00, 0x07,
|
||||
0x00, 0xD0, 0x00, 0x00, 0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00,
|
||||
0x23, 0x00, 0x00, 0x09, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x00, 0x14, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x29, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0B,
|
||||
0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07, 0x22, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07,
|
||||
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xA5, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x7E, 0x20, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0xA0, 0x00, 0xA5, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x46, 0x7E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
|
||||
0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
|
||||
0x14, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x05, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0xF5, 0xFF, 0xFF, 0xFF, 0xF5, 0xFF, 0xFF, 0xFF, 0xF5, 0xFF, 0xFF, 0xFF,
|
||||
0xF5, 0xFF, 0xFF, 0xFF, 0x37, 0x00, 0x00, 0x0C, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x15, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x15, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80, 0x41, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x37, 0x00, 0x00, 0x09, 0xF2, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x29, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0x0F, 0x00,
|
||||
0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0x0F, 0x00, 0x37, 0x00, 0x00, 0x09,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x38,
|
||||
0x00, 0x00, 0x00, 0x38, 0x29, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x37, 0x00, 0x00, 0x0C, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80,
|
||||
0x41, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F,
|
||||
0xFF, 0xFF, 0xFF, 0x7F, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F,
|
||||
0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F, 0x8C, 0x00, 0x00, 0x14,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
|
||||
0x00, 0x00, 0x80, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80,
|
||||
0x41, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
|
||||
0x71, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x07,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x4F, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38,
|
||||
0x00, 0x00, 0x80, 0x38, 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8,
|
||||
0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8, 0x37, 0x00, 0x00, 0x09,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x23, 0x00, 0x00, 0x09, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x29, 0x00, 0x00, 0x09, 0x32, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x02, 0x00, 0x02, 0x40, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x23, 0x00, 0x00, 0x0A, 0x32, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x56, 0x05, 0x02, 0x00, 0xD6, 0x85, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x09, 0x32, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x86, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08, 0xF2, 0xE0, 0x21, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0xFF, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x14, 0xE2, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x56, 0x0E, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xE6, 0x0A, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08,
|
||||
0x12, 0xE0, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
|
||||
0x94, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1F, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
};
|
|
@ -0,0 +1,117 @@
|
|||
//
|
||||
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
|
||||
//
|
||||
//
|
||||
// Buffer Definitions:
|
||||
//
|
||||
// cbuffer XeEdramLoadStoreConstants
|
||||
// {
|
||||
//
|
||||
// uint xe_edram_rt_color_depth_offset;// Offset: 0 Size: 4
|
||||
// uint xe_edram_rt_color_depth_pitch;// Offset: 4 Size: 4
|
||||
// uint xe_edram_rt_stencil_offset; // Offset: 8 Size: 4
|
||||
// uint xe_edram_rt_stencil_pitch; // Offset: 12 Size: 4
|
||||
// uint xe_edram_base_samples_2x_depth_pitch;// Offset: 16 Size: 4
|
||||
//
|
||||
// }
|
||||
//
|
||||
//
|
||||
// Resource Bindings:
|
||||
//
|
||||
// Name Type Format Dim ID HLSL Bind Count
|
||||
// ------------------------------ ---------- ------- ----------- ------- -------------- ------
|
||||
// xe_edram_load_store_source texture byte r/o T0 t0 1
|
||||
// xe_edram_load_store_dest UAV byte r/w U0 u0 1
|
||||
// XeEdramLoadStoreConstants cbuffer NA NA CB0 cb0 1
|
||||
//
|
||||
//
|
||||
//
|
||||
// Input signature:
|
||||
//
|
||||
// Name Index Mask Register SysValue Format Used
|
||||
// -------------------- ----- ------ -------- -------- ------- ------
|
||||
// no Input
|
||||
//
|
||||
// Output signature:
|
||||
//
|
||||
// Name Index Mask Register SysValue Format Used
|
||||
// -------------------- ----- ------ -------- -------- ------- ------
|
||||
// no Output
|
||||
cs_5_1
|
||||
dcl_globalFlags refactoringAllowed
|
||||
dcl_constantbuffer CB0[0:0][2], immediateIndexed, space=0
|
||||
dcl_resource_raw T0[0:0], space=0
|
||||
dcl_uav_raw U0[0:0], space=0
|
||||
dcl_input vThreadGroupID.xy
|
||||
dcl_input vThreadIDInGroup.xy
|
||||
dcl_input vThreadID.xy
|
||||
dcl_temps 7
|
||||
dcl_thread_group 20, 16, 1
|
||||
ishl r0.x, vThreadIDInGroup.x, l(2)
|
||||
and r0.yz, CB0[0][1].xxxx, l(0, 0x00008000, 2047, 0)
|
||||
if_nz r0.y
|
||||
ult r0.y, vThreadIDInGroup.x, l(10)
|
||||
uge r0.w, vThreadIDInGroup.x, l(10)
|
||||
and r0.yw, r0.yyyw, l(0, 40, 0, -40)
|
||||
iadd r0.y, r0.w, r0.y
|
||||
iadd r0.x, r0.y, r0.x
|
||||
endif
|
||||
ushr r0.y, CB0[0][1].x, l(16)
|
||||
imad r0.y, vThreadGroupID.y, r0.y, r0.z
|
||||
iadd r0.y, r0.y, vThreadGroupID.x
|
||||
imul null, r0.z, vThreadIDInGroup.y, l(320)
|
||||
imad r0.y, r0.y, l(5120), r0.z
|
||||
ishl r0.x, r0.x, l(2)
|
||||
iadd r0.x, r0.x, r0.y
|
||||
ubfe r0.y, l(1), l(13), CB0[0][1].x
|
||||
ishl r0.y, r0.y, l(1)
|
||||
ishl r0.x, r0.x, r0.y
|
||||
ld_raw r1.xyzw, r0.x, T0[0].xyzw
|
||||
ushr r2.xyzw, r1.xyzw, l(8, 8, 8, 8)
|
||||
iadd r0.x, r0.x, l(0x00a00000)
|
||||
ld_raw r0.xyzw, r0.x, T0[0].xyzw
|
||||
ubfe r3.xyzw, l(20, 20, 20, 20), l(8, 8, 8, 8), r1.xyzw
|
||||
ushr r4.xyzw, r2.xyzw, l(20, 20, 20, 20)
|
||||
firstbit_hi r5.xyzw, r3.xyzw
|
||||
iadd r5.xyzw, r5.xyzw, l(-11, -11, -11, -11)
|
||||
movc r5.xyzw, r3.xyzw, r5.xyzw, l(21,21,21,21)
|
||||
iadd r6.xyzw, -r5.xyzw, l(1, 1, 1, 1)
|
||||
movc r6.xyzw, r4.xyzw, r4.xyzw, r6.xyzw
|
||||
ishl r5.xyzw, r3.xyzw, r5.xyzw
|
||||
and r5.xyzw, r5.xyzw, l(0x000fffff, 0x000fffff, 0x000fffff, 0x000fffff)
|
||||
movc r3.xyzw, r4.xyzw, r3.xyzw, r5.xyzw
|
||||
ishl r4.xyzw, r6.xyzw, l(23, 23, 23, 23)
|
||||
iadd r4.xyzw, r4.xyzw, l(0x38000000, 0x38000000, 0x38000000, 0x38000000)
|
||||
ishl r3.xyzw, r3.xyzw, l(3, 3, 3, 3)
|
||||
iadd r3.xyzw, r4.xyzw, r3.xyzw
|
||||
movc r3.xyzw, r2.xyzw, r3.xyzw, l(0,0,0,0)
|
||||
iadd r4.xyzw, r0.xyzw, -r3.xyzw
|
||||
uge r5.xyzw, l(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff), r0.xyzw
|
||||
and r0.xyzw, r0.xyzw, r5.xyzw
|
||||
umin r0.xyzw, r0.xyzw, l(0x3ffffff8, 0x3ffffff8, 0x3ffffff8, 0x3ffffff8)
|
||||
bfi r5.xyzw, l(23, 23, 23, 23), l(0, 0, 0, 0), r0.xyzw, l(0x00800000, 0x00800000, 0x00800000, 0x00800000)
|
||||
ushr r6.xyzw, r0.xyzw, l(23, 23, 23, 23)
|
||||
iadd r6.xyzw, -r6.xyzw, l(113, 113, 113, 113)
|
||||
umin r6.xyzw, r6.xyzw, l(24, 24, 24, 24)
|
||||
ushr r5.xyzw, r5.xyzw, r6.xyzw
|
||||
ult r6.xyzw, r0.xyzw, l(0x38800000, 0x38800000, 0x38800000, 0x38800000)
|
||||
iadd r0.xyzw, r0.xyzw, l(0xc8000000, 0xc8000000, 0xc8000000, 0xc8000000)
|
||||
movc r0.xyzw, r6.xyzw, r5.xyzw, r0.xyzw
|
||||
iadd r5.xyzw, r0.xyzw, l(3, 3, 3, 3)
|
||||
ubfe r0.xyzw, l(1, 1, 1, 1), l(3, 3, 3, 3), r0.xyzw
|
||||
iadd r0.xyzw, r0.xyzw, r5.xyzw
|
||||
ubfe r0.xyzw, l(24, 24, 24, 24), l(3, 3, 3, 3), r0.xyzw
|
||||
ieq r0.xyzw, r2.xyzw, r0.xyzw
|
||||
and r0.xyzw, r0.xyzw, l(1, 1, 1, 1)
|
||||
imad r0.xyzw, r4.xyzw, r0.xyzw, r3.xyzw
|
||||
ishl r2.xy, vThreadID.xxxx, l(4, 2, 0, 0)
|
||||
imad r2.xy, vThreadID.yyyy, CB0[0][0].ywyy, r2.xyxx
|
||||
iadd r2.xy, r2.xyxx, CB0[0][0].xzxx
|
||||
store_raw U0[0].xyzw, r2.x, r0.xyzw
|
||||
and r0.x, r1.x, l(255)
|
||||
bfi r0.yzw, l(0, 8, 8, 8), l(0, 8, 16, 24), r1.yyzw, l(0, 0, 0, 0)
|
||||
iadd r0.xy, r0.zwzz, r0.xyxx
|
||||
iadd r0.x, r0.y, r0.x
|
||||
store_raw U0[0].x, r2.y, r0.x
|
||||
ret
|
||||
// Approximately 67 instruction slots used
|
Binary file not shown.
|
@ -1,11 +1,11 @@
|
|||
// generated from `xb buildhlsl`
|
||||
// source: edram_load_depth_float.cs.hlsl
|
||||
const uint8_t edram_load_depth_float_cs[] = {
|
||||
0x44, 0x58, 0x42, 0x43, 0xF3, 0xA3, 0xA4, 0x14, 0x0A, 0x50, 0x56, 0x49,
|
||||
0x5D, 0x09, 0x6C, 0xBF, 0x33, 0xC9, 0xC1, 0x9A, 0x01, 0x00, 0x00, 0x00,
|
||||
0xAC, 0x0D, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
|
||||
0x44, 0x58, 0x42, 0x43, 0x17, 0xEE, 0x03, 0x06, 0xD3, 0x6E, 0x58, 0x75,
|
||||
0x66, 0x3B, 0x5B, 0x87, 0x2F, 0xF9, 0x44, 0x9E, 0x01, 0x00, 0x00, 0x00,
|
||||
0x64, 0x0A, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
|
||||
0x0C, 0x03, 0x00, 0x00, 0x1C, 0x03, 0x00, 0x00, 0x2C, 0x03, 0x00, 0x00,
|
||||
0x10, 0x0D, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD0, 0x02, 0x00, 0x00,
|
||||
0xC8, 0x09, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD0, 0x02, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0x53, 0x43, 0x00, 0x05, 0x00, 0x00,
|
||||
0xA8, 0x02, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00,
|
||||
|
@ -69,7 +69,7 @@ const uint8_t edram_load_depth_float_cs[] = {
|
|||
0x49, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x4F, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x53, 0x48, 0x45, 0x58,
|
||||
0xDC, 0x09, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x77, 0x02, 0x00, 0x00,
|
||||
0x94, 0x06, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0xA5, 0x01, 0x00, 0x00,
|
||||
0x6A, 0x08, 0x00, 0x01, 0x59, 0x00, 0x00, 0x07, 0x46, 0x8E, 0x30, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x06,
|
||||
|
@ -126,168 +126,98 @@ const uint8_t edram_load_depth_float_cs[] = {
|
|||
0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07,
|
||||
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xA5, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0xA5, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x7E, 0x20, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x09,
|
||||
0x32, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x02, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x0A,
|
||||
0x32, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x56, 0x05, 0x02, 0x00,
|
||||
0xD6, 0x85, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x09, 0x32, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x46, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x86, 0x80, 0x30, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0xA0, 0x00, 0xA5, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x46, 0x7E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
|
||||
0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
|
||||
0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
|
||||
0x14, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x05, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0xF5, 0xFF, 0xFF, 0xFF, 0xF5, 0xFF, 0xFF, 0xFF, 0xF5, 0xFF, 0xFF, 0xFF,
|
||||
0xF5, 0xFF, 0xFF, 0xFF, 0x37, 0x00, 0x00, 0x0C, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x15, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x15, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80, 0x41, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x37, 0x00, 0x00, 0x09, 0xF2, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x29, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
|
||||
0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x05,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0x0F, 0x00,
|
||||
0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0x0F, 0x00, 0x37, 0x00, 0x00, 0x09,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x38,
|
||||
0x00, 0x00, 0x00, 0x38, 0x29, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x37, 0x00, 0x00, 0x0C, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80,
|
||||
0x41, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F,
|
||||
0xFF, 0xFF, 0xFF, 0x7F, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F,
|
||||
0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F, 0x8C, 0x00, 0x00, 0x14,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
|
||||
0x00, 0x00, 0x80, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B,
|
||||
0x02, 0x40, 0x00, 0x00, 0xF5, 0xFF, 0xFF, 0xFF, 0xF5, 0xFF, 0xFF, 0xFF,
|
||||
0xF5, 0xFF, 0xFF, 0xFF, 0xF5, 0xFF, 0xFF, 0xFF, 0x37, 0x00, 0x00, 0x0C,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x15, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80,
|
||||
0x41, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
|
||||
0x71, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x07,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x4F, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38,
|
||||
0x00, 0x00, 0x80, 0x38, 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8,
|
||||
0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8, 0x37, 0x00, 0x00, 0x09,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x41, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x23, 0x00, 0x00, 0x09, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x09, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x29, 0x00, 0x00, 0x09, 0x32, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x02, 0x00, 0x02, 0x40, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x23, 0x00, 0x00, 0x0A, 0x32, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x56, 0x05, 0x02, 0x00, 0xD6, 0x85, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x09, 0x32, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x86, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0xFF, 0xFF, 0x0F, 0x00,
|
||||
0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0x0F, 0x00,
|
||||
0x37, 0x00, 0x00, 0x09, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x29, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x38,
|
||||
0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x38, 0x29, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x0C, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08, 0xF2, 0xE0, 0x21, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0xFF, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x14, 0xE2, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x56, 0x0E, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xE6, 0x0A, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
|
||||
0xE6, 0x0A, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08,
|
||||
0x12, 0xE0, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
|
||||
0x94, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0x94, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1F, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
|
|
@ -66,11 +66,12 @@ iadd r0.x, r0.x, r0.y
|
|||
ubfe r0.y, l(1), l(13), CB0[0][1].x
|
||||
ishl r0.y, r0.y, l(1)
|
||||
ishl r0.x, r0.x, r0.y
|
||||
ld_raw r1.xyzw, r0.x, T0[0].xyzw
|
||||
ushr r2.xyzw, r1.xyzw, l(8, 8, 8, 8)
|
||||
iadd r0.x, r0.x, l(0x00a00000)
|
||||
ld_raw r0.xyzw, r0.x, T0[0].xyzw
|
||||
ubfe r3.xyzw, l(20, 20, 20, 20), l(8, 8, 8, 8), r1.xyzw
|
||||
ishl r1.xy, vThreadID.xxxx, l(4, 2, 0, 0)
|
||||
imad r1.xy, vThreadID.yyyy, CB0[0][0].ywyy, r1.xyxx
|
||||
iadd r1.xy, r1.xyxx, CB0[0][0].xzxx
|
||||
ushr r2.xyzw, r0.xyzw, l(8, 8, 8, 8)
|
||||
ubfe r3.xyzw, l(20, 20, 20, 20), l(8, 8, 8, 8), r0.xyzw
|
||||
ushr r4.xyzw, r2.xyzw, l(20, 20, 20, 20)
|
||||
firstbit_hi r5.xyzw, r3.xyzw
|
||||
iadd r5.xyzw, r5.xyzw, l(-11, -11, -11, -11)
|
||||
|
@ -84,34 +85,12 @@ ishl r4.xyzw, r6.xyzw, l(23, 23, 23, 23)
|
|||
iadd r4.xyzw, r4.xyzw, l(0x38000000, 0x38000000, 0x38000000, 0x38000000)
|
||||
ishl r3.xyzw, r3.xyzw, l(3, 3, 3, 3)
|
||||
iadd r3.xyzw, r4.xyzw, r3.xyzw
|
||||
movc r3.xyzw, r2.xyzw, r3.xyzw, l(0,0,0,0)
|
||||
iadd r4.xyzw, r0.xyzw, -r3.xyzw
|
||||
uge r5.xyzw, l(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff), r0.xyzw
|
||||
and r0.xyzw, r0.xyzw, r5.xyzw
|
||||
umin r0.xyzw, r0.xyzw, l(0x3ffffff8, 0x3ffffff8, 0x3ffffff8, 0x3ffffff8)
|
||||
bfi r5.xyzw, l(23, 23, 23, 23), l(0, 0, 0, 0), r0.xyzw, l(0x00800000, 0x00800000, 0x00800000, 0x00800000)
|
||||
ushr r6.xyzw, r0.xyzw, l(23, 23, 23, 23)
|
||||
iadd r6.xyzw, -r6.xyzw, l(113, 113, 113, 113)
|
||||
umin r6.xyzw, r6.xyzw, l(24, 24, 24, 24)
|
||||
ushr r5.xyzw, r5.xyzw, r6.xyzw
|
||||
ult r6.xyzw, r0.xyzw, l(0x38800000, 0x38800000, 0x38800000, 0x38800000)
|
||||
iadd r0.xyzw, r0.xyzw, l(0xc8000000, 0xc8000000, 0xc8000000, 0xc8000000)
|
||||
movc r0.xyzw, r6.xyzw, r5.xyzw, r0.xyzw
|
||||
iadd r5.xyzw, r0.xyzw, l(3, 3, 3, 3)
|
||||
ubfe r0.xyzw, l(1, 1, 1, 1), l(3, 3, 3, 3), r0.xyzw
|
||||
iadd r0.xyzw, r0.xyzw, r5.xyzw
|
||||
ubfe r0.xyzw, l(24, 24, 24, 24), l(3, 3, 3, 3), r0.xyzw
|
||||
ieq r0.xyzw, r2.xyzw, r0.xyzw
|
||||
and r0.xyzw, r0.xyzw, l(1, 1, 1, 1)
|
||||
imad r0.xyzw, r4.xyzw, r0.xyzw, r3.xyzw
|
||||
ishl r2.xy, vThreadID.xxxx, l(4, 2, 0, 0)
|
||||
imad r2.xy, vThreadID.yyyy, CB0[0][0].ywyy, r2.xyxx
|
||||
iadd r2.xy, r2.xyxx, CB0[0][0].xzxx
|
||||
store_raw U0[0].xyzw, r2.x, r0.xyzw
|
||||
and r0.x, r1.x, l(255)
|
||||
bfi r0.yzw, l(0, 8, 8, 8), l(0, 8, 16, 24), r1.yyzw, l(0, 0, 0, 0)
|
||||
iadd r0.xy, r0.zwzz, r0.xyxx
|
||||
movc r2.xyzw, r2.xyzw, r3.xyzw, l(0,0,0,0)
|
||||
store_raw U0[0].xyzw, r1.x, r2.xyzw
|
||||
and r2.x, r0.x, l(255)
|
||||
bfi r2.yzw, l(0, 8, 8, 8), l(0, 8, 16, 24), r0.yyzw, l(0, 0, 0, 0)
|
||||
iadd r0.xy, r2.zwzz, r2.xyxx
|
||||
iadd r0.x, r0.y, r0.x
|
||||
store_raw U0[0].x, r2.y, r0.x
|
||||
store_raw U0[0].x, r1.y, r0.x
|
||||
ret
|
||||
// Approximately 67 instruction slots used
|
||||
// Approximately 46 instruction slots used
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1,226 @@
|
|||
// generated from `xb buildhlsl`
|
||||
// source: edram_store_depth_float24and32.cs.hlsl
|
||||
const uint8_t edram_store_depth_float24and32_cs[] = {
|
||||
0x44, 0x58, 0x42, 0x43, 0xC6, 0x10, 0x80, 0x14, 0x97, 0x01, 0xE4, 0x46,
|
||||
0x76, 0xF1, 0x67, 0xD3, 0xDF, 0x50, 0x25, 0xF7, 0x01, 0x00, 0x00, 0x00,
|
||||
0x64, 0x0A, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
|
||||
0x0C, 0x03, 0x00, 0x00, 0x1C, 0x03, 0x00, 0x00, 0x2C, 0x03, 0x00, 0x00,
|
||||
0xC8, 0x09, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD0, 0x02, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0x53, 0x43, 0x00, 0x05, 0x00, 0x00,
|
||||
0xA8, 0x02, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
|
||||
0x24, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xB4, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xCF, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x6C, 0x6F, 0x61,
|
||||
0x64, 0x5F, 0x73, 0x74, 0x6F, 0x72, 0x65, 0x5F, 0x73, 0x6F, 0x75, 0x72,
|
||||
0x63, 0x65, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F,
|
||||
0x6C, 0x6F, 0x61, 0x64, 0x5F, 0x73, 0x74, 0x6F, 0x72, 0x65, 0x5F, 0x64,
|
||||
0x65, 0x73, 0x74, 0x00, 0x58, 0x65, 0x45, 0x64, 0x72, 0x61, 0x6D, 0x4C,
|
||||
0x6F, 0x61, 0x64, 0x53, 0x74, 0x6F, 0x72, 0x65, 0x43, 0x6F, 0x6E, 0x73,
|
||||
0x74, 0x61, 0x6E, 0x74, 0x73, 0x00, 0xAB, 0xAB, 0xE8, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x1C, 0x01, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE4, 0x01, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
|
||||
0x30, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0x00, 0x00, 0x00, 0x00, 0x4E, 0x02, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x69, 0x02, 0x00, 0x00,
|
||||
0x0C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
|
||||
0x83, 0x02, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0x00, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D,
|
||||
0x5F, 0x72, 0x74, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x72, 0x5F, 0x64, 0x65,
|
||||
0x70, 0x74, 0x68, 0x5F, 0x6F, 0x66, 0x66, 0x73, 0x65, 0x74, 0x00, 0x64,
|
||||
0x77, 0x6F, 0x72, 0x64, 0x00, 0xAB, 0xAB, 0xAB, 0x00, 0x00, 0x13, 0x00,
|
||||
0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x03, 0x02, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x65,
|
||||
0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x63, 0x6F, 0x6C, 0x6F,
|
||||
0x72, 0x5F, 0x64, 0x65, 0x70, 0x74, 0x68, 0x5F, 0x70, 0x69, 0x74, 0x63,
|
||||
0x68, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72,
|
||||
0x74, 0x5F, 0x73, 0x74, 0x65, 0x6E, 0x63, 0x69, 0x6C, 0x5F, 0x6F, 0x66,
|
||||
0x66, 0x73, 0x65, 0x74, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61,
|
||||
0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x73, 0x74, 0x65, 0x6E, 0x63, 0x69, 0x6C,
|
||||
0x5F, 0x70, 0x69, 0x74, 0x63, 0x68, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64,
|
||||
0x72, 0x61, 0x6D, 0x5F, 0x62, 0x61, 0x73, 0x65, 0x5F, 0x73, 0x61, 0x6D,
|
||||
0x70, 0x6C, 0x65, 0x73, 0x5F, 0x32, 0x78, 0x5F, 0x64, 0x65, 0x70, 0x74,
|
||||
0x68, 0x5F, 0x70, 0x69, 0x74, 0x63, 0x68, 0x00, 0x4D, 0x69, 0x63, 0x72,
|
||||
0x6F, 0x73, 0x6F, 0x66, 0x74, 0x20, 0x28, 0x52, 0x29, 0x20, 0x48, 0x4C,
|
||||
0x53, 0x4C, 0x20, 0x53, 0x68, 0x61, 0x64, 0x65, 0x72, 0x20, 0x43, 0x6F,
|
||||
0x6D, 0x70, 0x69, 0x6C, 0x65, 0x72, 0x20, 0x31, 0x30, 0x2E, 0x31, 0x00,
|
||||
0x49, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x4F, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x53, 0x48, 0x45, 0x58,
|
||||
0x94, 0x06, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0xA5, 0x01, 0x00, 0x00,
|
||||
0x6A, 0x08, 0x00, 0x01, 0x59, 0x00, 0x00, 0x07, 0x46, 0x8E, 0x30, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x06,
|
||||
0x46, 0x7E, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9D, 0x00, 0x00, 0x06,
|
||||
0x46, 0xEE, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x02,
|
||||
0x32, 0x10, 0x02, 0x00, 0x5F, 0x00, 0x00, 0x02, 0x32, 0x20, 0x02, 0x00,
|
||||
0x5F, 0x00, 0x00, 0x02, 0x32, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x02,
|
||||
0x05, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x04, 0x14, 0x00, 0x00, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x09,
|
||||
0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x02, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x0A,
|
||||
0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x56, 0x05, 0x02, 0x00,
|
||||
0xD6, 0x85, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x09, 0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x46, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x80, 0x30, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xA5, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x7E, 0x20, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F,
|
||||
0xFF, 0xFF, 0xFF, 0x7F, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F,
|
||||
0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F, 0x8C, 0x00, 0x00, 0x14,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
|
||||
0x00, 0x00, 0x80, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80,
|
||||
0x41, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
|
||||
0x71, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x07,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x4F, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38,
|
||||
0x00, 0x00, 0x80, 0x38, 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8,
|
||||
0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8, 0x37, 0x00, 0x00, 0x09,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, 0x08,
|
||||
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x06, 0x70, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xE2, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x11,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x06, 0x12, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0C, 0x62, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x06, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0xFF, 0x07, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x04, 0x03, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x06, 0x22, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x06, 0x82, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A, 0xA2, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x56, 0x0D, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xD8, 0xFF, 0xFF, 0xFF, 0x1E, 0x00, 0x00, 0x07,
|
||||
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x01, 0x55, 0x00, 0x00, 0x09,
|
||||
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x08,
|
||||
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x10, 0x02, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x06, 0x22, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x10, 0x02, 0x00, 0x26, 0x00, 0x00, 0x07, 0x00, 0xD0, 0x00, 0x00,
|
||||
0x42, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x20, 0x02, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x23, 0x00, 0x00, 0x09,
|
||||
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00,
|
||||
0x2A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07,
|
||||
0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0B, 0x22, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x29, 0x00, 0x00, 0x07, 0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08,
|
||||
0xF2, 0xE0, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0xA0, 0x00, 0xA6, 0x00, 0x00, 0x08,
|
||||
0xF2, 0xE0, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
|
||||
0x94, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
};
|
|
@ -0,0 +1,95 @@
|
|||
//
|
||||
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
|
||||
//
|
||||
//
|
||||
// Buffer Definitions:
|
||||
//
|
||||
// cbuffer XeEdramLoadStoreConstants
|
||||
// {
|
||||
//
|
||||
// uint xe_edram_rt_color_depth_offset;// Offset: 0 Size: 4
|
||||
// uint xe_edram_rt_color_depth_pitch;// Offset: 4 Size: 4
|
||||
// uint xe_edram_rt_stencil_offset; // Offset: 8 Size: 4
|
||||
// uint xe_edram_rt_stencil_pitch; // Offset: 12 Size: 4
|
||||
// uint xe_edram_base_samples_2x_depth_pitch;// Offset: 16 Size: 4
|
||||
//
|
||||
// }
|
||||
//
|
||||
//
|
||||
// Resource Bindings:
|
||||
//
|
||||
// Name Type Format Dim ID HLSL Bind Count
|
||||
// ------------------------------ ---------- ------- ----------- ------- -------------- ------
|
||||
// xe_edram_load_store_source texture byte r/o T0 t0 1
|
||||
// xe_edram_load_store_dest UAV byte r/w U0 u0 1
|
||||
// XeEdramLoadStoreConstants cbuffer NA NA CB0 cb0 1
|
||||
//
|
||||
//
|
||||
//
|
||||
// Input signature:
|
||||
//
|
||||
// Name Index Mask Register SysValue Format Used
|
||||
// -------------------- ----- ------ -------- -------- ------- ------
|
||||
// no Input
|
||||
//
|
||||
// Output signature:
|
||||
//
|
||||
// Name Index Mask Register SysValue Format Used
|
||||
// -------------------- ----- ------ -------- -------- ------- ------
|
||||
// no Output
|
||||
cs_5_1
|
||||
dcl_globalFlags refactoringAllowed
|
||||
dcl_constantbuffer CB0[0:0][2], immediateIndexed, space=0
|
||||
dcl_resource_raw T0[0:0], space=0
|
||||
dcl_uav_raw U0[0:0], space=0
|
||||
dcl_input vThreadGroupID.xy
|
||||
dcl_input vThreadIDInGroup.xy
|
||||
dcl_input vThreadID.xy
|
||||
dcl_temps 5
|
||||
dcl_thread_group 20, 16, 1
|
||||
ishl r0.xy, vThreadID.xxxx, l(4, 2, 0, 0)
|
||||
imad r0.xy, vThreadID.yyyy, CB0[0][0].ywyy, r0.xyxx
|
||||
iadd r0.xy, r0.xyxx, CB0[0][0].xzxx
|
||||
ld_raw r1.xyzw, r0.x, T0[0].xyzw
|
||||
uge r2.xyzw, l(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff), r1.xyzw
|
||||
and r2.xyzw, r1.xyzw, r2.xyzw
|
||||
umin r2.xyzw, r2.xyzw, l(0x3ffffff8, 0x3ffffff8, 0x3ffffff8, 0x3ffffff8)
|
||||
bfi r3.xyzw, l(23, 23, 23, 23), l(0, 0, 0, 0), r2.xyzw, l(0x00800000, 0x00800000, 0x00800000, 0x00800000)
|
||||
ushr r4.xyzw, r2.xyzw, l(23, 23, 23, 23)
|
||||
iadd r4.xyzw, -r4.xyzw, l(113, 113, 113, 113)
|
||||
umin r4.xyzw, r4.xyzw, l(24, 24, 24, 24)
|
||||
ushr r3.xyzw, r3.xyzw, r4.xyzw
|
||||
ult r4.xyzw, r2.xyzw, l(0x38800000, 0x38800000, 0x38800000, 0x38800000)
|
||||
iadd r2.xyzw, r2.xyzw, l(0xc8000000, 0xc8000000, 0xc8000000, 0xc8000000)
|
||||
movc r2.xyzw, r4.xyzw, r3.xyzw, r2.xyzw
|
||||
iadd r3.xyzw, r2.xyzw, l(3, 3, 3, 3)
|
||||
ubfe r2.xyzw, l(1, 1, 1, 1), l(3, 3, 3, 3), r2.xyzw
|
||||
iadd r2.xyzw, r2.xyzw, r3.xyzw
|
||||
ushr r2.xyzw, r2.xyzw, l(3, 3, 3, 3)
|
||||
ld_raw r0.x, r0.y, T0[0].xxxx
|
||||
ushr r0.yzw, r0.xxxx, l(0, 8, 16, 24)
|
||||
bfi r0.xyzw, l(24, 24, 24, 24), l(8, 8, 8, 8), r2.xyzw, r0.xyzw
|
||||
ishl r2.x, vThreadIDInGroup.x, l(2)
|
||||
and r2.yz, CB0[0][1].xxxx, l(0, 0x00008000, 2047, 0)
|
||||
if_nz r2.y
|
||||
ult r2.y, vThreadIDInGroup.x, l(10)
|
||||
uge r2.w, vThreadIDInGroup.x, l(10)
|
||||
and r2.yw, r2.yyyw, l(0, 40, 0, -40)
|
||||
iadd r2.y, r2.w, r2.y
|
||||
iadd r2.x, r2.y, r2.x
|
||||
endif
|
||||
ushr r2.y, CB0[0][1].x, l(16)
|
||||
imad r2.y, vThreadGroupID.y, r2.y, r2.z
|
||||
iadd r2.y, r2.y, vThreadGroupID.x
|
||||
imul null, r2.z, vThreadIDInGroup.y, l(320)
|
||||
imad r2.y, r2.y, l(5120), r2.z
|
||||
ishl r2.x, r2.x, l(2)
|
||||
iadd r2.x, r2.x, r2.y
|
||||
ubfe r2.y, l(1), l(13), CB0[0][1].x
|
||||
ishl r2.y, r2.y, l(1)
|
||||
ishl r2.x, r2.x, r2.y
|
||||
store_raw U0[0].xyzw, r2.x, r0.xyzw
|
||||
iadd r0.x, r2.x, l(0x00a00000)
|
||||
store_raw U0[0].xyzw, r0.x, r1.xyzw
|
||||
ret
|
||||
// Approximately 45 instruction slots used
|
Binary file not shown.
|
@ -1,11 +1,11 @@
|
|||
// generated from `xb buildhlsl`
|
||||
// source: edram_store_depth_float.cs.hlsl
|
||||
const uint8_t edram_store_depth_float_cs[] = {
|
||||
0x44, 0x58, 0x42, 0x43, 0xC6, 0x10, 0x80, 0x14, 0x97, 0x01, 0xE4, 0x46,
|
||||
0x76, 0xF1, 0x67, 0xD3, 0xDF, 0x50, 0x25, 0xF7, 0x01, 0x00, 0x00, 0x00,
|
||||
0x64, 0x0A, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
|
||||
0x44, 0x58, 0x42, 0x43, 0xF1, 0x72, 0x64, 0x54, 0x9D, 0xF6, 0x79, 0x48,
|
||||
0x2F, 0x8C, 0xD1, 0x59, 0x56, 0x1C, 0x90, 0x9A, 0x01, 0x00, 0x00, 0x00,
|
||||
0x28, 0x0A, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
|
||||
0x0C, 0x03, 0x00, 0x00, 0x1C, 0x03, 0x00, 0x00, 0x2C, 0x03, 0x00, 0x00,
|
||||
0xC8, 0x09, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD0, 0x02, 0x00, 0x00,
|
||||
0x8C, 0x09, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD0, 0x02, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0x53, 0x43, 0x00, 0x05, 0x00, 0x00,
|
||||
0xA8, 0x02, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00,
|
||||
|
@ -69,7 +69,7 @@ const uint8_t edram_store_depth_float_cs[] = {
|
|||
0x49, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x4F, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x53, 0x48, 0x45, 0x58,
|
||||
0x94, 0x06, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0xA5, 0x01, 0x00, 0x00,
|
||||
0x58, 0x06, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x96, 0x01, 0x00, 0x00,
|
||||
0x6A, 0x08, 0x00, 0x01, 0x59, 0x00, 0x00, 0x07, 0x46, 0x8E, 0x30, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x06,
|
||||
|
@ -79,7 +79,7 @@ const uint8_t edram_store_depth_float_cs[] = {
|
|||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x02,
|
||||
0x32, 0x10, 0x02, 0x00, 0x5F, 0x00, 0x00, 0x02, 0x32, 0x20, 0x02, 0x00,
|
||||
0x5F, 0x00, 0x00, 0x02, 0x32, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x02,
|
||||
0x05, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x04, 0x14, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x04, 0x14, 0x00, 0x00, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x09,
|
||||
0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x02, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
|
@ -96,53 +96,53 @@ const uint8_t edram_store_depth_float_cs[] = {
|
|||
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F,
|
||||
0xFF, 0xFF, 0xFF, 0x7F, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F,
|
||||
0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F, 0x8C, 0x00, 0x00, 0x14,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
|
||||
0x00, 0x00, 0x80, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80,
|
||||
0x41, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80,
|
||||
0x41, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
|
||||
0x71, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x07,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x4F, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x4F, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38,
|
||||
0x00, 0x00, 0x80, 0x38, 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8,
|
||||
0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8, 0x37, 0x00, 0x00, 0x09,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A,
|
||||
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, 0x08,
|
||||
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
|
@ -155,64 +155,59 @@ const uint8_t edram_store_depth_float_cs[] = {
|
|||
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x06, 0x12, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0C, 0x62, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x06, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x06, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0xFF, 0x07, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x04, 0x03, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x06, 0x22, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x06, 0x22, 0x00, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x06, 0x82, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A, 0xA2, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x56, 0x0D, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x56, 0x0D, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xD8, 0xFF, 0xFF, 0xFF, 0x1E, 0x00, 0x00, 0x07,
|
||||
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x01, 0x55, 0x00, 0x00, 0x09,
|
||||
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00,
|
||||
0x22, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x01, 0x55, 0x00, 0x00, 0x09,
|
||||
0x22, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x08,
|
||||
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x10, 0x02, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x06, 0x22, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x22, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1A, 0x10, 0x02, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x06, 0x22, 0x00, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x10, 0x02, 0x00, 0x26, 0x00, 0x00, 0x07, 0x00, 0xD0, 0x00, 0x00,
|
||||
0x42, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x20, 0x02, 0x00,
|
||||
0x42, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1A, 0x20, 0x02, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x23, 0x00, 0x00, 0x09,
|
||||
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00,
|
||||
0x2A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07,
|
||||
0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0B, 0x22, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x22, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00,
|
||||
0x2A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07,
|
||||
0x12, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0B, 0x22, 0x00, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x29, 0x00, 0x00, 0x07, 0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x29, 0x00, 0x00, 0x07, 0x22, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08,
|
||||
0x01, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08,
|
||||
0xF2, 0xE0, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0xA0, 0x00, 0xA6, 0x00, 0x00, 0x08,
|
||||
0xF2, 0xE0, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
|
||||
0x94, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
|
||||
0x94, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x12, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
@ -222,5 +217,5 @@ const uint8_t edram_store_depth_float_cs[] = {
|
|||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
|
|
@ -45,51 +45,49 @@ dcl_uav_raw U0[0:0], space=0
|
|||
dcl_input vThreadGroupID.xy
|
||||
dcl_input vThreadIDInGroup.xy
|
||||
dcl_input vThreadID.xy
|
||||
dcl_temps 5
|
||||
dcl_temps 4
|
||||
dcl_thread_group 20, 16, 1
|
||||
ishl r0.xy, vThreadID.xxxx, l(4, 2, 0, 0)
|
||||
imad r0.xy, vThreadID.yyyy, CB0[0][0].ywyy, r0.xyxx
|
||||
iadd r0.xy, r0.xyxx, CB0[0][0].xzxx
|
||||
ld_raw r1.xyzw, r0.x, T0[0].xyzw
|
||||
uge r2.xyzw, l(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff), r1.xyzw
|
||||
and r2.xyzw, r1.xyzw, r2.xyzw
|
||||
umin r2.xyzw, r2.xyzw, l(0x3ffffff8, 0x3ffffff8, 0x3ffffff8, 0x3ffffff8)
|
||||
bfi r3.xyzw, l(23, 23, 23, 23), l(0, 0, 0, 0), r2.xyzw, l(0x00800000, 0x00800000, 0x00800000, 0x00800000)
|
||||
ushr r4.xyzw, r2.xyzw, l(23, 23, 23, 23)
|
||||
iadd r4.xyzw, -r4.xyzw, l(113, 113, 113, 113)
|
||||
umin r4.xyzw, r4.xyzw, l(24, 24, 24, 24)
|
||||
ushr r3.xyzw, r3.xyzw, r4.xyzw
|
||||
ult r4.xyzw, r2.xyzw, l(0x38800000, 0x38800000, 0x38800000, 0x38800000)
|
||||
iadd r2.xyzw, r2.xyzw, l(0xc8000000, 0xc8000000, 0xc8000000, 0xc8000000)
|
||||
movc r2.xyzw, r4.xyzw, r3.xyzw, r2.xyzw
|
||||
iadd r3.xyzw, r2.xyzw, l(3, 3, 3, 3)
|
||||
ubfe r2.xyzw, l(1, 1, 1, 1), l(3, 3, 3, 3), r2.xyzw
|
||||
iadd r2.xyzw, r2.xyzw, r3.xyzw
|
||||
ushr r2.xyzw, r2.xyzw, l(3, 3, 3, 3)
|
||||
and r1.xyzw, r1.xyzw, r2.xyzw
|
||||
umin r1.xyzw, r1.xyzw, l(0x3ffffff8, 0x3ffffff8, 0x3ffffff8, 0x3ffffff8)
|
||||
bfi r2.xyzw, l(23, 23, 23, 23), l(0, 0, 0, 0), r1.xyzw, l(0x00800000, 0x00800000, 0x00800000, 0x00800000)
|
||||
ushr r3.xyzw, r1.xyzw, l(23, 23, 23, 23)
|
||||
iadd r3.xyzw, -r3.xyzw, l(113, 113, 113, 113)
|
||||
umin r3.xyzw, r3.xyzw, l(24, 24, 24, 24)
|
||||
ushr r2.xyzw, r2.xyzw, r3.xyzw
|
||||
ult r3.xyzw, r1.xyzw, l(0x38800000, 0x38800000, 0x38800000, 0x38800000)
|
||||
iadd r1.xyzw, r1.xyzw, l(0xc8000000, 0xc8000000, 0xc8000000, 0xc8000000)
|
||||
movc r1.xyzw, r3.xyzw, r2.xyzw, r1.xyzw
|
||||
iadd r2.xyzw, r1.xyzw, l(3, 3, 3, 3)
|
||||
ubfe r1.xyzw, l(1, 1, 1, 1), l(3, 3, 3, 3), r1.xyzw
|
||||
iadd r1.xyzw, r1.xyzw, r2.xyzw
|
||||
ushr r1.xyzw, r1.xyzw, l(3, 3, 3, 3)
|
||||
ld_raw r0.x, r0.y, T0[0].xxxx
|
||||
ushr r0.yzw, r0.xxxx, l(0, 8, 16, 24)
|
||||
bfi r0.xyzw, l(24, 24, 24, 24), l(8, 8, 8, 8), r2.xyzw, r0.xyzw
|
||||
ishl r2.x, vThreadIDInGroup.x, l(2)
|
||||
and r2.yz, CB0[0][1].xxxx, l(0, 0x00008000, 2047, 0)
|
||||
if_nz r2.y
|
||||
ult r2.y, vThreadIDInGroup.x, l(10)
|
||||
uge r2.w, vThreadIDInGroup.x, l(10)
|
||||
and r2.yw, r2.yyyw, l(0, 40, 0, -40)
|
||||
iadd r2.y, r2.w, r2.y
|
||||
iadd r2.x, r2.y, r2.x
|
||||
bfi r0.xyzw, l(24, 24, 24, 24), l(8, 8, 8, 8), r1.xyzw, r0.xyzw
|
||||
ishl r1.x, vThreadIDInGroup.x, l(2)
|
||||
and r1.yz, CB0[0][1].xxxx, l(0, 0x00008000, 2047, 0)
|
||||
if_nz r1.y
|
||||
ult r1.y, vThreadIDInGroup.x, l(10)
|
||||
uge r1.w, vThreadIDInGroup.x, l(10)
|
||||
and r1.yw, r1.yyyw, l(0, 40, 0, -40)
|
||||
iadd r1.y, r1.w, r1.y
|
||||
iadd r1.x, r1.y, r1.x
|
||||
endif
|
||||
ushr r2.y, CB0[0][1].x, l(16)
|
||||
imad r2.y, vThreadGroupID.y, r2.y, r2.z
|
||||
iadd r2.y, r2.y, vThreadGroupID.x
|
||||
imul null, r2.z, vThreadIDInGroup.y, l(320)
|
||||
imad r2.y, r2.y, l(5120), r2.z
|
||||
ishl r2.x, r2.x, l(2)
|
||||
iadd r2.x, r2.x, r2.y
|
||||
ubfe r2.y, l(1), l(13), CB0[0][1].x
|
||||
ishl r2.y, r2.y, l(1)
|
||||
ishl r2.x, r2.x, r2.y
|
||||
store_raw U0[0].xyzw, r2.x, r0.xyzw
|
||||
iadd r0.x, r2.x, l(0x00a00000)
|
||||
store_raw U0[0].xyzw, r0.x, r1.xyzw
|
||||
ushr r1.y, CB0[0][1].x, l(16)
|
||||
imad r1.y, vThreadGroupID.y, r1.y, r1.z
|
||||
iadd r1.y, r1.y, vThreadGroupID.x
|
||||
imul null, r1.z, vThreadIDInGroup.y, l(320)
|
||||
imad r1.y, r1.y, l(5120), r1.z
|
||||
ishl r1.x, r1.x, l(2)
|
||||
iadd r1.x, r1.x, r1.y
|
||||
ubfe r1.y, l(1), l(13), CB0[0][1].x
|
||||
ishl r1.y, r1.y, l(1)
|
||||
ishl r1.x, r1.x, r1.y
|
||||
store_raw U0[0].xyzw, r1.x, r0.xyzw
|
||||
ret
|
||||
// Approximately 45 instruction slots used
|
||||
// Approximately 43 instruction slots used
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1,156 @@
|
|||
// generated from `xb buildhlsl`
|
||||
// source: float24_round.ps.hlsl
|
||||
const uint8_t float24_round_ps[] = {
|
||||
0x44, 0x58, 0x42, 0x43, 0xDF, 0x71, 0xF3, 0x0A, 0x4A, 0xDB, 0xC3, 0x80,
|
||||
0x1E, 0xE4, 0x39, 0x21, 0x59, 0x07, 0x78, 0x97, 0x01, 0x00, 0x00, 0x00,
|
||||
0x18, 0x07, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
|
||||
0xA0, 0x00, 0x00, 0x00, 0x90, 0x02, 0x00, 0x00, 0xC4, 0x02, 0x00, 0x00,
|
||||
0x7C, 0x06, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x64, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0xFF, 0xFF, 0x00, 0x05, 0x00, 0x00,
|
||||
0x3C, 0x00, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
|
||||
0x24, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x4D, 0x69, 0x63, 0x72, 0x6F, 0x73, 0x6F, 0x66, 0x74, 0x20, 0x28, 0x52,
|
||||
0x29, 0x20, 0x48, 0x4C, 0x53, 0x4C, 0x20, 0x53, 0x68, 0x61, 0x64, 0x65,
|
||||
0x72, 0x20, 0x43, 0x6F, 0x6D, 0x70, 0x69, 0x6C, 0x65, 0x72, 0x20, 0x31,
|
||||
0x30, 0x2E, 0x31, 0x00, 0x49, 0x53, 0x47, 0x4E, 0xE8, 0x01, 0x00, 0x00,
|
||||
0x13, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x0B, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x0C, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x0F, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x11, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xD9, 0x01, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x12, 0x00, 0x00, 0x00, 0x0F, 0x04, 0x00, 0x00, 0x54, 0x45, 0x58, 0x43,
|
||||
0x4F, 0x4F, 0x52, 0x44, 0x00, 0x53, 0x56, 0x5F, 0x50, 0x6F, 0x73, 0x69,
|
||||
0x74, 0x69, 0x6F, 0x6E, 0x00, 0xAB, 0xAB, 0xAB, 0x4F, 0x53, 0x47, 0x4E,
|
||||
0x2C, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x0E, 0x00, 0x00,
|
||||
0x53, 0x56, 0x5F, 0x44, 0x65, 0x70, 0x74, 0x68, 0x00, 0xAB, 0xAB, 0xAB,
|
||||
0x53, 0x48, 0x45, 0x58, 0xB0, 0x03, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00,
|
||||
0xEC, 0x00, 0x00, 0x00, 0x6A, 0x08, 0x00, 0x01, 0x64, 0x38, 0x00, 0x04,
|
||||
0x42, 0x10, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x65, 0x00, 0x00, 0x02, 0x01, 0xC0, 0x00, 0x00, 0x68, 0x00, 0x00, 0x02,
|
||||
0x02, 0x00, 0x00, 0x00, 0x36, 0x20, 0x08, 0x05, 0x12, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x2A, 0x10, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00,
|
||||
0x50, 0x00, 0x10, 0x07, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x7F, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x08, 0x07, 0x12, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x00, 0x08, 0x07,
|
||||
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0x3F,
|
||||
0x8C, 0x00, 0x10, 0x0B, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x55, 0x00, 0x20, 0x07,
|
||||
0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x20, 0x08, 0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x2A, 0x00, 0x10, 0x80, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x54, 0x00, 0x20, 0x07,
|
||||
0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x55, 0x00, 0x10, 0x07, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x20, 0x07, 0x42, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x38, 0x1E, 0x00, 0x08, 0x07,
|
||||
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC8,
|
||||
0x37, 0x00, 0x08, 0x09, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x10, 0x07, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x08, 0x09, 0x12, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x08, 0x07, 0x12, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x38, 0x0F,
|
||||
0x72, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x87, 0x00, 0x40, 0x05,
|
||||
0x82, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x40, 0x07, 0x82, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0xF5, 0xFF, 0xFF, 0xFF, 0x37, 0x00, 0x40, 0x09,
|
||||
0x82, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x08, 0x08,
|
||||
0x12, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x80,
|
||||
0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x37, 0x00, 0x08, 0x09, 0x12, 0x00, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x40, 0x07, 0x82, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x3A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x40, 0x07,
|
||||
0x82, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0xFF, 0xFF, 0x0F, 0x00,
|
||||
0x37, 0x00, 0x10, 0x09, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x29, 0x00, 0x20, 0x07, 0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x20, 0x07, 0x42, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x29, 0x00, 0x10, 0x07,
|
||||
0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x10, 0x07, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x37, 0x00, 0x08, 0x08, 0x01, 0xC0, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54, 0x94, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
};
|
|
@ -0,0 +1,74 @@
|
|||
//
|
||||
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
|
||||
//
|
||||
//
|
||||
//
|
||||
// Input signature:
|
||||
//
|
||||
// Name Index Mask Register SysValue Format Used
|
||||
// -------------------- ----- ------ -------- -------- ------- ------
|
||||
// TEXCOORD 0 xyzw 0 NONE float
|
||||
// TEXCOORD 1 xyzw 1 NONE float
|
||||
// TEXCOORD 2 xyzw 2 NONE float
|
||||
// TEXCOORD 3 xyzw 3 NONE float
|
||||
// TEXCOORD 4 xyzw 4 NONE float
|
||||
// TEXCOORD 5 xyzw 5 NONE float
|
||||
// TEXCOORD 6 xyzw 6 NONE float
|
||||
// TEXCOORD 7 xyzw 7 NONE float
|
||||
// TEXCOORD 8 xyzw 8 NONE float
|
||||
// TEXCOORD 9 xyzw 9 NONE float
|
||||
// TEXCOORD 10 xyzw 10 NONE float
|
||||
// TEXCOORD 11 xyzw 11 NONE float
|
||||
// TEXCOORD 12 xyzw 12 NONE float
|
||||
// TEXCOORD 13 xyzw 13 NONE float
|
||||
// TEXCOORD 14 xyzw 14 NONE float
|
||||
// TEXCOORD 15 xyzw 15 NONE float
|
||||
// TEXCOORD 16 xyz 16 NONE float
|
||||
// TEXCOORD 17 xy 17 NONE float
|
||||
// SV_Position 0 xyzw 18 POS float z
|
||||
//
|
||||
//
|
||||
// Output signature:
|
||||
//
|
||||
// Name Index Mask Register SysValue Format Used
|
||||
// -------------------- ----- ------ -------- -------- ------- ------
|
||||
// SV_Depth 0 N/A oDepth DEPTH float YES
|
||||
//
|
||||
// Pixel Shader runs at sample frequency
|
||||
//
|
||||
ps_5_1
|
||||
dcl_globalFlags refactoringAllowed
|
||||
dcl_input_ps_siv linear noperspective sample v18.z, position
|
||||
dcl_output oDepth
|
||||
dcl_temps 2
|
||||
mov_sat [precise(x)] r0.x, v18.z
|
||||
uge [precise(y)] r0.y, l(0x7fffffff), r0.x
|
||||
and [precise(x)] r0.x, r0.x, r0.y
|
||||
umin [precise(x)] r0.x, r0.x, l(0x3ffffff8)
|
||||
bfi [precise(y)] r0.y, l(23), l(0), r0.x, l(0x00800000)
|
||||
ushr [precise(z)] r0.z, r0.x, l(23)
|
||||
iadd [precise(z)] r0.z, -r0.z, l(113)
|
||||
umin [precise(z)] r0.z, r0.z, l(24)
|
||||
ushr [precise(y)] r0.y, r0.y, r0.z
|
||||
ult [precise(z)] r0.z, r0.x, l(0x38800000)
|
||||
iadd [precise(x)] r0.x, r0.x, l(0xc8000000)
|
||||
movc [precise(x)] r0.x, r0.z, r0.y, r0.x
|
||||
iadd [precise(y)] r0.y, r0.x, l(3)
|
||||
ubfe [precise(x)] r0.x, l(1), l(3), r0.x
|
||||
iadd [precise(x)] r0.x, r0.x, r0.y
|
||||
ubfe [precise(xyz)] r0.xyz, l(24, 20, 4, 0), l(3, 3, 23, 0), r0.xxxx
|
||||
firstbit_hi [precise(w)] r0.w, r0.y
|
||||
iadd [precise(w)] r0.w, r0.w, l(-11)
|
||||
movc [precise(w)] r0.w, r0.y, r0.w, l(21)
|
||||
iadd [precise(x)] r1.x, -r0.w, l(1)
|
||||
movc [precise(x)] r1.x, r0.z, r0.z, r1.x
|
||||
ishl [precise(w)] r0.w, r0.y, r0.w
|
||||
and [precise(w)] r0.w, r0.w, l(0x000fffff)
|
||||
movc [precise(y)] r0.y, r0.z, r0.y, r0.w
|
||||
ishl [precise(z)] r0.z, r1.x, l(23)
|
||||
iadd [precise(z)] r0.z, r0.z, l(0x38000000)
|
||||
ishl [precise(y)] r0.y, r0.y, l(3)
|
||||
iadd [precise(y)] r0.y, r0.z, r0.y
|
||||
movc [precise(x)] oDepth, r0.x, r0.y, l(0)
|
||||
ret
|
||||
// Approximately 30 instruction slots used
|
Binary file not shown.
|
@ -0,0 +1,100 @@
|
|||
// generated from `xb buildhlsl`
|
||||
// source: float24_truncate.ps.hlsl
|
||||
const uint8_t float24_truncate_ps[] = {
|
||||
0x44, 0x58, 0x42, 0x43, 0xB8, 0x51, 0x55, 0x1D, 0xF4, 0xF1, 0xC9, 0xC0,
|
||||
0x0C, 0x22, 0xD3, 0x43, 0x94, 0xDF, 0x83, 0x9D, 0x01, 0x00, 0x00, 0x00,
|
||||
0x7C, 0x04, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
|
||||
0xA0, 0x00, 0x00, 0x00, 0x90, 0x02, 0x00, 0x00, 0xCC, 0x02, 0x00, 0x00,
|
||||
0xE0, 0x03, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x64, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0xFF, 0xFF, 0x00, 0x05, 0x00, 0x00,
|
||||
0x3C, 0x00, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
|
||||
0x24, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x4D, 0x69, 0x63, 0x72, 0x6F, 0x73, 0x6F, 0x66, 0x74, 0x20, 0x28, 0x52,
|
||||
0x29, 0x20, 0x48, 0x4C, 0x53, 0x4C, 0x20, 0x53, 0x68, 0x61, 0x64, 0x65,
|
||||
0x72, 0x20, 0x43, 0x6F, 0x6D, 0x70, 0x69, 0x6C, 0x65, 0x72, 0x20, 0x31,
|
||||
0x30, 0x2E, 0x31, 0x00, 0x49, 0x53, 0x47, 0x4E, 0xE8, 0x01, 0x00, 0x00,
|
||||
0x13, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x0A, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x0B, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x0C, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x0F, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
|
||||
0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x11, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xD9, 0x01, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x12, 0x00, 0x00, 0x00, 0x0F, 0x04, 0x00, 0x00, 0x54, 0x45, 0x58, 0x43,
|
||||
0x4F, 0x4F, 0x52, 0x44, 0x00, 0x53, 0x56, 0x5F, 0x50, 0x6F, 0x73, 0x69,
|
||||
0x74, 0x69, 0x6F, 0x6E, 0x00, 0xAB, 0xAB, 0xAB, 0x4F, 0x53, 0x47, 0x4E,
|
||||
0x34, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x0E, 0x00, 0x00,
|
||||
0x53, 0x56, 0x5F, 0x44, 0x65, 0x70, 0x74, 0x68, 0x4C, 0x65, 0x73, 0x73,
|
||||
0x45, 0x71, 0x75, 0x61, 0x6C, 0x00, 0xAB, 0xAB, 0x53, 0x48, 0x45, 0x58,
|
||||
0x0C, 0x01, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00,
|
||||
0x6A, 0x08, 0x00, 0x01, 0x64, 0x38, 0x00, 0x04, 0x42, 0x10, 0x10, 0x00,
|
||||
0x12, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x02,
|
||||
0x01, 0x70, 0x02, 0x00, 0x68, 0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00,
|
||||
0x36, 0x20, 0x08, 0x05, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x2A, 0x10, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0x50, 0x00, 0x10, 0x07,
|
||||
0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x2E,
|
||||
0x1F, 0x00, 0x04, 0x03, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x8A, 0x00, 0x10, 0x09, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x10, 0x08, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1A, 0x00, 0x10, 0x80, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x24, 0x00, 0x10, 0x07,
|
||||
0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x8C, 0x00, 0x08, 0x0A, 0x01, 0x70, 0x02, 0x00, 0x1A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x01, 0x36, 0x00, 0x08, 0x04,
|
||||
0x01, 0x70, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x15, 0x00, 0x00, 0x01, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
|
||||
0x94, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
|
@ -0,0 +1,55 @@
|
|||
//
|
||||
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
|
||||
//
|
||||
//
|
||||
//
|
||||
// Input signature:
|
||||
//
|
||||
// Name Index Mask Register SysValue Format Used
|
||||
// -------------------- ----- ------ -------- -------- ------- ------
|
||||
// TEXCOORD 0 xyzw 0 NONE float
|
||||
// TEXCOORD 1 xyzw 1 NONE float
|
||||
// TEXCOORD 2 xyzw 2 NONE float
|
||||
// TEXCOORD 3 xyzw 3 NONE float
|
||||
// TEXCOORD 4 xyzw 4 NONE float
|
||||
// TEXCOORD 5 xyzw 5 NONE float
|
||||
// TEXCOORD 6 xyzw 6 NONE float
|
||||
// TEXCOORD 7 xyzw 7 NONE float
|
||||
// TEXCOORD 8 xyzw 8 NONE float
|
||||
// TEXCOORD 9 xyzw 9 NONE float
|
||||
// TEXCOORD 10 xyzw 10 NONE float
|
||||
// TEXCOORD 11 xyzw 11 NONE float
|
||||
// TEXCOORD 12 xyzw 12 NONE float
|
||||
// TEXCOORD 13 xyzw 13 NONE float
|
||||
// TEXCOORD 14 xyzw 14 NONE float
|
||||
// TEXCOORD 15 xyzw 15 NONE float
|
||||
// TEXCOORD 16 xyz 16 NONE float
|
||||
// TEXCOORD 17 xy 17 NONE float
|
||||
// SV_Position 0 xyzw 18 POS float z
|
||||
//
|
||||
//
|
||||
// Output signature:
|
||||
//
|
||||
// Name Index Mask Register SysValue Format Used
|
||||
// -------------------- ----- ------ -------- -------- ------- ------
|
||||
// SV_DepthLessEqual 0 N/A oDepthLE DEPTHLE float YES
|
||||
//
|
||||
// Pixel Shader runs at sample frequency
|
||||
//
|
||||
ps_5_1
|
||||
dcl_globalFlags refactoringAllowed
|
||||
dcl_input_ps_siv linear noperspective sample v18.z, position
|
||||
dcl_output oDepthLE
|
||||
dcl_temps 1
|
||||
mov_sat [precise(x)] r0.x, v18.z
|
||||
uge [precise(y)] r0.y, r0.x, l(0x2e800000)
|
||||
if_nz r0.y
|
||||
ubfe [precise(y)] r0.y, l(8), l(23), r0.x
|
||||
iadd [precise(y)] r0.y, -r0.y, l(116)
|
||||
imax [precise(y)] r0.y, r0.y, l(3)
|
||||
bfi [precise(x)] oDepthLE, r0.y, l(0), l(0), r0.x
|
||||
else
|
||||
mov [precise(x)] oDepthLE, l(0)
|
||||
endif
|
||||
ret
|
||||
// Approximately 11 instruction slots used
|
|
@ -7,22 +7,14 @@ void main(uint3 xe_group_id : SV_GroupID,
|
|||
uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||
uint2 tile_sample_index = xe_group_thread_id.xy;
|
||||
tile_sample_index.x *= 4u;
|
||||
uint edram_offset = XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index);
|
||||
uint4 depth24_stencil = xe_edram_load_store_source.Load4(edram_offset);
|
||||
uint4 depth24 = depth24_stencil >> 8u;
|
||||
uint4 depth32 = xe_edram_load_store_source.Load4(10485760u + edram_offset);
|
||||
// Depth. If the stored 32-bit depth converted to 24-bit is the same as the
|
||||
// stored 24-bit depth, load the 32-bit value because it has more precision
|
||||
// (and multipass rendering is possible), if it's not, convert the 24-bit
|
||||
// depth because it was overwritten by aliasing.
|
||||
uint4 depth24to32 = XeFloat20e4To32(depth24);
|
||||
uint4 depth = depth24to32 + (depth32 - depth24to32) *
|
||||
uint4(XeFloat32To20e4(depth32) == depth24);
|
||||
uint4 samples = xe_edram_load_store_source.Load4(
|
||||
XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index));
|
||||
// Depth (exact conversion ensured during drawing).
|
||||
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
|
||||
xe_thread_id.x * 16u + xe_edram_rt_color_depth_offset;
|
||||
xe_edram_load_store_dest.Store4(rt_offset, depth);
|
||||
xe_edram_load_store_dest.Store4(rt_offset, XeFloat20e4To32(samples >> 8u));
|
||||
// Stencil.
|
||||
uint4 stencil = (depth24_stencil & 0xFFu) << uint4(0u, 8u, 16u, 24u);
|
||||
uint4 stencil = (samples & 0xFFu) << uint4(0u, 8u, 16u, 24u);
|
||||
stencil.xy |= stencil.zw;
|
||||
stencil.x |= stencil.y;
|
||||
rt_offset = xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u +
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
#include "edram_load_store.hlsli"
|
||||
#include "pixel_formats.hlsli"
|
||||
|
||||
[numthreads(20, 16, 1)]
|
||||
void main(uint3 xe_group_id : SV_GroupID,
|
||||
uint3 xe_group_thread_id : SV_GroupThreadID,
|
||||
uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||
uint2 tile_sample_index = xe_group_thread_id.xy;
|
||||
tile_sample_index.x *= 4u;
|
||||
uint edram_offset = XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index);
|
||||
uint4 depth24_stencil = xe_edram_load_store_source.Load4(edram_offset);
|
||||
uint4 depth24 = depth24_stencil >> 8u;
|
||||
uint4 depth32 = xe_edram_load_store_source.Load4(10485760u + edram_offset);
|
||||
// Depth. If the stored 32-bit depth converted to 24-bit is the same as the
|
||||
// stored 24-bit depth, load the 32-bit value because it has more precision
|
||||
// (and multipass rendering is possible), if it's not, convert the 24-bit
|
||||
// depth because it was overwritten by aliasing.
|
||||
uint4 depth24to32 = XeFloat20e4To32(depth24);
|
||||
uint4 depth = depth24to32 + (depth32 - depth24to32) *
|
||||
uint4(XeFloat32To20e4(depth32) == depth24);
|
||||
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
|
||||
xe_thread_id.x * 16u + xe_edram_rt_color_depth_offset;
|
||||
xe_edram_load_store_dest.Store4(rt_offset, depth);
|
||||
// Stencil.
|
||||
uint4 stencil = (depth24_stencil & 0xFFu) << uint4(0u, 8u, 16u, 24u);
|
||||
stencil.xy |= stencil.zw;
|
||||
stencil.x |= stencil.y;
|
||||
rt_offset = xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u +
|
||||
xe_edram_rt_stencil_offset;
|
||||
xe_edram_load_store_dest.Store(rt_offset, stencil.x);
|
||||
}
|
|
@ -5,21 +5,18 @@
|
|||
void main(uint3 xe_group_id : SV_GroupID,
|
||||
uint3 xe_group_thread_id : SV_GroupThreadID,
|
||||
uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||
// Depth.
|
||||
// Depth (exact conversion ensured during drawing).
|
||||
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
|
||||
xe_thread_id.x * 16u + xe_edram_rt_color_depth_offset;
|
||||
uint4 depth32 = xe_edram_load_store_source.Load4(rt_offset);
|
||||
uint4 depth24_stencil = XeFloat32To20e4(depth32) << 8u;
|
||||
uint4 samples =
|
||||
XeFloat32To20e4(xe_edram_load_store_source.Load4(rt_offset)) << 8u;
|
||||
// Stencil.
|
||||
rt_offset = xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u +
|
||||
xe_edram_rt_stencil_offset;
|
||||
depth24_stencil |= (xe_edram_load_store_source.Load(rt_offset).xxxx >>
|
||||
uint4(0u, 8u, 16u, 24u)) & 0xFFu;
|
||||
samples |= (xe_edram_load_store_source.Load(rt_offset).xxxx >>
|
||||
uint4(0u, 8u, 16u, 24u)) & 0xFFu;
|
||||
uint2 tile_sample_index = xe_group_thread_id.xy;
|
||||
tile_sample_index.x *= 4u;
|
||||
uint edram_offset = XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index);
|
||||
// Store 24-bit depth for aliasing and checking if 32-bit depth is up to date.
|
||||
xe_edram_load_store_dest.Store4(edram_offset, depth24_stencil);
|
||||
// Store 32-bit depth so precision isn't lost when doing multipass rendering.
|
||||
xe_edram_load_store_dest.Store4(10485760u + edram_offset, depth32);
|
||||
xe_edram_load_store_dest.Store4(
|
||||
XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index), samples);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
#include "edram_load_store.hlsli"
|
||||
#include "pixel_formats.hlsli"
|
||||
|
||||
[numthreads(20, 16, 1)]
|
||||
void main(uint3 xe_group_id : SV_GroupID,
|
||||
uint3 xe_group_thread_id : SV_GroupThreadID,
|
||||
uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||
// Depth.
|
||||
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
|
||||
xe_thread_id.x * 16u + xe_edram_rt_color_depth_offset;
|
||||
uint4 depth32 = xe_edram_load_store_source.Load4(rt_offset);
|
||||
uint4 depth24_stencil = XeFloat32To20e4(depth32) << 8u;
|
||||
// Stencil.
|
||||
rt_offset = xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u +
|
||||
xe_edram_rt_stencil_offset;
|
||||
depth24_stencil |= (xe_edram_load_store_source.Load(rt_offset).xxxx >>
|
||||
uint4(0u, 8u, 16u, 24u)) & 0xFFu;
|
||||
uint2 tile_sample_index = xe_group_thread_id.xy;
|
||||
tile_sample_index.x *= 4u;
|
||||
uint edram_offset = XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index);
|
||||
// Store 24-bit depth for aliasing and checking if 32-bit depth is up to date.
|
||||
xe_edram_load_store_dest.Store4(edram_offset, depth24_stencil);
|
||||
// Store 32-bit depth so precision isn't lost when doing multipass rendering.
|
||||
xe_edram_load_store_dest.Store4(10485760u + edram_offset, depth32);
|
||||
}
|
|
@ -7,8 +7,7 @@ void main(uint3 xe_group_id : SV_GroupID,
|
|||
// Depth.
|
||||
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
|
||||
xe_thread_id.x * 16u + xe_edram_rt_color_depth_offset;
|
||||
uint4 samples =
|
||||
(xe_edram_load_store_source.Load4(rt_offset) & 0xFFFFFFu) << 8u;
|
||||
uint4 samples = xe_edram_load_store_source.Load4(rt_offset) << 8u;
|
||||
// Stencil.
|
||||
rt_offset = xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u +
|
||||
xe_edram_rt_stencil_offset;
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
#include "pixel_formats.hlsli"
|
||||
#include "xenos_draw.hlsli"
|
||||
|
||||
struct XePSInput {
|
||||
XeVertexPrePS pre_ps;
|
||||
sample float4 position : SV_Position;
|
||||
};
|
||||
|
||||
precise float main(XePSInput xe_input) : SV_Depth {
|
||||
// Input Z may be outside the viewport range (it's clamped after the shader).
|
||||
return asfloat(
|
||||
XeFloat20e4To32(XeFloat32To20e4(asuint(saturate(xe_input.position.z)))));
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
#include "pixel_formats.hlsli"
|
||||
#include "xenos_draw.hlsli"
|
||||
|
||||
struct XePSInput {
|
||||
XeVertexPrePS pre_ps;
|
||||
sample float4 position : SV_Position;
|
||||
};
|
||||
|
||||
precise float main(XePSInput xe_input) : SV_DepthLessEqual {
|
||||
// Simplified conversion, always less than or equal to the original value -
|
||||
// just drop the lower bits.
|
||||
// The float32 exponent bias is 127.
|
||||
// After saturating, the exponent range is -127...0.
|
||||
// The smallest normalized 20e4 exponent is -14 - should drop 3 mantissa bits
|
||||
// at -14 or above.
|
||||
// The smallest denormalized 20e4 number is -34 - should drop 23 mantissa bits
|
||||
// at -34.
|
||||
// Anything smaller than 2^-34 becomes 0.
|
||||
// Input Z may be outside the viewport range (it's clamped after the shader).
|
||||
precise uint depth = asuint(saturate(xe_input.position.z));
|
||||
// Check if the number is representable as a float24 after truncation - the
|
||||
// exponent is at least -34.
|
||||
if (depth >= 0x2E800000u) {
|
||||
// Extract the biased float32 exponent:
|
||||
// 113+ at exponent -14+.
|
||||
// 93 at exponent -34.
|
||||
uint exponent = (depth >> 23u) & 0xFFu;
|
||||
// Convert exponent to the shift amount.
|
||||
// 116 - 113 = 3.
|
||||
// 116 - 93 = 23.
|
||||
uint shift = asuint(max(116 - asint(exponent), 3));
|
||||
depth = depth >> shift << shift;
|
||||
} else {
|
||||
// The number is not representable as float24 after truncation - zero.
|
||||
depth = 0u;
|
||||
}
|
||||
return asfloat(depth);
|
||||
}
|
|
@ -495,6 +495,16 @@ void XeR11G11B10SNormToRGBA16(uint4 packed_texels, out uint4 out_01,
|
|||
// 6e4 has a different exponent bias allowing [0,512) values, 20e4 allows [0,2).
|
||||
// We also can't clamp the stored value to 1 as load->store->load must be exact.
|
||||
|
||||
uint XeFloat32To20e4(uint f32u32) {
|
||||
// Keep only positive (high bit set means negative for both float and int) and
|
||||
// saturate to the maximum representable value near 2 (also dropping NaNs).
|
||||
f32u32 = min((f32u32 <= 0x7FFFFFFFu) ? f32u32 : 0u, 0x3FFFFFF8u);
|
||||
uint denormalized =
|
||||
((f32u32 & 0x7FFFFFu) | 0x800000u) >> min(113u - (f32u32 >> 23u), 24u);
|
||||
uint f24u32 = (f32u32 < 0x38800000u) ? denormalized : (f32u32 + 0xC8000000u);
|
||||
return ((f24u32 + 3u + ((f24u32 >> 3u) & 1u)) >> 3u) & 0xFFFFFFu;
|
||||
}
|
||||
|
||||
uint4 XeFloat32To20e4(uint4 f32u32) {
|
||||
// Keep only positive (high bit set means negative for both float and int) and
|
||||
// saturate to the maximum representable value near 2 (also dropping NaNs).
|
||||
|
@ -505,6 +515,21 @@ uint4 XeFloat32To20e4(uint4 f32u32) {
|
|||
return ((f24u32 + 3u + ((f24u32 >> 3u) & 1u)) >> 3u) & 0xFFFFFFu;
|
||||
}
|
||||
|
||||
uint XeFloat20e4To32(uint f24u32) {
|
||||
uint mantissa = f24u32 & 0xFFFFFu;
|
||||
uint exponent = f24u32 >> 20u;
|
||||
// Normalize the values for the denormalized components.
|
||||
// Exponent = 1;
|
||||
// do { Exponent--; Mantissa <<= 1; } while ((Mantissa & 0x100000) == 0);
|
||||
bool is_denormalized = exponent == 0u;
|
||||
uint mantissa_lzcnt = 20u - firstbithigh(mantissa);
|
||||
exponent = is_denormalized ? (1u - mantissa_lzcnt) : exponent;
|
||||
mantissa =
|
||||
is_denormalized ? ((mantissa << mantissa_lzcnt) & 0xFFFFFu) : mantissa;
|
||||
// Combine into 32-bit float bits and clear zeros.
|
||||
return (f24u32 != 0u) ? (((exponent + 112u) << 23u) | (mantissa << 3u)) : 0u;
|
||||
}
|
||||
|
||||
uint4 XeFloat20e4To32(uint4 f24u32) {
|
||||
uint4 mantissa = f24u32 & 0xFFFFFu;
|
||||
uint4 exponent = f24u32 >> 20u;
|
||||
|
|
|
@ -10,9 +10,9 @@ void main(point XeVertexPreGS xe_in[1],
|
|||
}
|
||||
|
||||
XeVertexPostGS xe_out;
|
||||
xe_out.interpolators = xe_in[0].post_gs.interpolators;
|
||||
xe_out.point_params.z = xe_in[0].post_gs.point_params.z;
|
||||
xe_out.clip_space_zw = xe_in[0].post_gs.clip_space_zw;
|
||||
xe_out.pre_ps.interpolators = xe_in[0].post_gs.pre_ps.interpolators;
|
||||
xe_out.pre_ps.point_params.z = xe_in[0].post_gs.pre_ps.point_params.z;
|
||||
xe_out.pre_ps.clip_space_zw = xe_in[0].post_gs.pre_ps.clip_space_zw;
|
||||
xe_out.position.zw = xe_in[0].post_gs.position.zw;
|
||||
xe_out.clip_distance_0123 = xe_in[0].post_gs.clip_distance_0123;
|
||||
xe_out.clip_distance_45 = xe_in[0].post_gs.clip_distance_45;
|
||||
|
@ -20,26 +20,27 @@ void main(point XeVertexPreGS xe_in[1],
|
|||
// Shader header writes -1.0f to point_size by default, so any positive value
|
||||
// means that it was overwritten by the translated vertex shader.
|
||||
float2 point_size =
|
||||
(xe_in[0].post_gs.point_params.z > 0.0f ? xe_in[0].post_gs.point_params.zz
|
||||
: xe_point_size);
|
||||
xe_in[0].post_gs.pre_ps.point_params.z > 0.0f
|
||||
? xe_in[0].post_gs.pre_ps.point_params.zz
|
||||
: xe_point_size;
|
||||
point_size =
|
||||
clamp(point_size, xe_point_size_min_max.xx, xe_point_size_min_max.yy) *
|
||||
xe_point_screen_to_ndc * xe_in[0].post_gs.position.w;
|
||||
|
||||
xe_out.point_params.xy = float2(0.0, 0.0);
|
||||
xe_out.pre_ps.point_params.xy = float2(0.0, 0.0);
|
||||
// TODO(Triang3l): On Vulkan, sign of Y needs to inverted because of
|
||||
// upper-left origin.
|
||||
// TODO(Triang3l): Investigate the true signs of point sprites.
|
||||
xe_out.position.xy =
|
||||
xe_in[0].post_gs.position.xy + float2(-point_size.x, point_size.y);
|
||||
xe_stream.Append(xe_out);
|
||||
xe_out.point_params.xy = float2(0.0, 1.0);
|
||||
xe_out.pre_ps.point_params.xy = float2(0.0, 1.0);
|
||||
xe_out.position.xy = xe_in[0].post_gs.position.xy - point_size;
|
||||
xe_stream.Append(xe_out);
|
||||
xe_out.point_params.xy = float2(1.0, 0.0);
|
||||
xe_out.pre_ps.point_params.xy = float2(1.0, 0.0);
|
||||
xe_out.position.xy = xe_in[0].post_gs.position.xy + point_size;
|
||||
xe_stream.Append(xe_out);
|
||||
xe_out.point_params.xy = float2(1.0, 1.0);
|
||||
xe_out.pre_ps.point_params.xy = float2(1.0, 1.0);
|
||||
xe_out.position.xy =
|
||||
xe_in[0].post_gs.position.xy + float2(point_size.x, -point_size.y);
|
||||
xe_stream.Append(xe_out);
|
||||
|
|
|
@ -80,16 +80,19 @@ void main(triangle XeVertexPreGS xe_in[3],
|
|||
v3_signs = float3(1.0f, 1.0f, -1.0f);
|
||||
}
|
||||
[unroll] for (int i = 0; i < 16; ++i) {
|
||||
xe_out.interpolators[i] = v3_signs.x * xe_in[0].post_gs.interpolators[i] +
|
||||
v3_signs.y * xe_in[1].post_gs.interpolators[i] +
|
||||
v3_signs.z * xe_in[2].post_gs.interpolators[i];
|
||||
xe_out.pre_ps.interpolators[i] =
|
||||
v3_signs.x * xe_in[0].post_gs.pre_ps.interpolators[i] +
|
||||
v3_signs.y * xe_in[1].post_gs.pre_ps.interpolators[i] +
|
||||
v3_signs.z * xe_in[2].post_gs.pre_ps.interpolators[i];
|
||||
}
|
||||
xe_out.point_params = v3_signs.x * xe_in[0].post_gs.point_params +
|
||||
v3_signs.y * xe_in[1].post_gs.point_params +
|
||||
v3_signs.z * xe_in[2].post_gs.point_params;
|
||||
xe_out.clip_space_zw = v3_signs.x * xe_in[0].post_gs.clip_space_zw +
|
||||
v3_signs.y * xe_in[1].post_gs.clip_space_zw +
|
||||
v3_signs.z * xe_in[2].post_gs.clip_space_zw;
|
||||
xe_out.pre_ps.point_params =
|
||||
v3_signs.x * xe_in[0].post_gs.pre_ps.point_params +
|
||||
v3_signs.y * xe_in[1].post_gs.pre_ps.point_params +
|
||||
v3_signs.z * xe_in[2].post_gs.pre_ps.point_params;
|
||||
xe_out.pre_ps.clip_space_zw =
|
||||
v3_signs.x * xe_in[0].post_gs.pre_ps.clip_space_zw +
|
||||
v3_signs.y * xe_in[1].post_gs.pre_ps.clip_space_zw +
|
||||
v3_signs.z * xe_in[2].post_gs.pre_ps.clip_space_zw;
|
||||
xe_out.position = v3_signs.x * xe_in[0].post_gs.position +
|
||||
v3_signs.y * xe_in[1].post_gs.position +
|
||||
v3_signs.z * xe_in[2].post_gs.position;
|
||||
|
|
|
@ -63,10 +63,14 @@ struct XeHSControlPointOutput {
|
|||
float index : XEVERTEXID;
|
||||
};
|
||||
|
||||
struct XeVertexPostGS {
|
||||
struct XeVertexPrePS {
|
||||
float4 interpolators[16] : TEXCOORD0;
|
||||
float3 point_params : TEXCOORD16;
|
||||
float2 clip_space_zw : TEXCOORD17;
|
||||
};
|
||||
|
||||
struct XeVertexPostGS {
|
||||
XeVertexPrePS pre_ps;
|
||||
// Precise needed to preserve NaN - guest primitives may be converted to more
|
||||
// than 1 triangle, so need to kill them entirely manually in GS if any vertex
|
||||
// is NaN.
|
||||
|
|
|
@ -66,8 +66,22 @@ SpirvShaderTranslator::Features::Features(
|
|||
SpirvShaderTranslator::SpirvShaderTranslator(const Features& features)
|
||||
: features_(features) {}
|
||||
|
||||
void SpirvShaderTranslator::Reset() {
|
||||
ShaderTranslator::Reset();
|
||||
uint32_t SpirvShaderTranslator::GetDefaultModification(
|
||||
xenos::ShaderType shader_type,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type) const {
|
||||
Modification shader_modification;
|
||||
switch (shader_type) {
|
||||
case xenos::ShaderType::kVertex:
|
||||
shader_modification.host_vertex_shader_type = host_vertex_shader_type;
|
||||
break;
|
||||
case xenos::ShaderType::kPixel:
|
||||
break;
|
||||
}
|
||||
return shader_modification.value;
|
||||
}
|
||||
|
||||
void SpirvShaderTranslator::Reset(xenos::ShaderType shader_type) {
|
||||
ShaderTranslator::Reset(shader_type);
|
||||
|
||||
builder_.reset();
|
||||
|
||||
|
@ -226,8 +240,8 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
"xe_uniform_float_constants");
|
||||
builder_->addDecoration(
|
||||
uniform_float_constants_, spv::DecorationDescriptorSet,
|
||||
int(IsSpirvFragmentShader() ? kDescriptorSetFloatConstantsPixel
|
||||
: kDescriptorSetFloatConstantsVertex));
|
||||
int(is_pixel_shader() ? kDescriptorSetFloatConstantsPixel
|
||||
: kDescriptorSetFloatConstantsVertex));
|
||||
builder_->addDecoration(uniform_float_constants_, spv::DecorationBinding,
|
||||
0);
|
||||
if (features_.spirv_version >= spv::Spv_1_4) {
|
||||
|
@ -335,7 +349,7 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
main_interface_.push_back(buffers_shared_memory_);
|
||||
}
|
||||
|
||||
if (IsSpirvVertexOrTessEvalShader()) {
|
||||
if (is_vertex_shader()) {
|
||||
StartVertexOrTessEvalShaderBeforeMain();
|
||||
}
|
||||
|
||||
|
@ -383,7 +397,7 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
|
||||
// Write the execution model-specific prologue with access to variables in the
|
||||
// main function.
|
||||
if (IsSpirvVertexOrTessEvalShader()) {
|
||||
if (is_vertex_shader()) {
|
||||
StartVertexOrTessEvalShaderInMain();
|
||||
}
|
||||
|
||||
|
@ -507,7 +521,7 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
|
|||
function_main_->addBlock(main_loop_merge_);
|
||||
builder_->setBuildPoint(main_loop_merge_);
|
||||
|
||||
if (IsSpirvVertexOrTessEvalShader()) {
|
||||
if (is_vertex_shader()) {
|
||||
CompleteVertexOrTessEvalShaderInMain();
|
||||
}
|
||||
|
||||
|
@ -516,12 +530,12 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
|
|||
|
||||
// Make the main function the entry point.
|
||||
spv::ExecutionModel execution_model;
|
||||
if (IsSpirvFragmentShader()) {
|
||||
if (is_pixel_shader()) {
|
||||
execution_model = spv::ExecutionModelFragment;
|
||||
builder_->addExecutionMode(function_main_,
|
||||
spv::ExecutionModeOriginUpperLeft);
|
||||
} else {
|
||||
assert_true(IsSpirvVertexOrTessEvalShader());
|
||||
assert_true(is_vertex_shader());
|
||||
execution_model = IsSpirvTessEvalShader()
|
||||
? spv::ExecutionModelTessellationEvaluation
|
||||
: spv::ExecutionModelVertex;
|
||||
|
@ -1479,7 +1493,7 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
spv::StorageClassFunction, var_main_registers_, id_vector_temp_util_);
|
||||
} break;
|
||||
case InstructionStorageTarget::kPosition:
|
||||
assert_true(IsSpirvVertexOrTessEvalShader());
|
||||
assert_true(is_vertex_shader());
|
||||
id_vector_temp_util_.clear();
|
||||
id_vector_temp_util_.push_back(
|
||||
builder_->makeIntConstant(kOutputPerVertexMemberPosition));
|
||||
|
|
|
@ -25,6 +25,25 @@ namespace gpu {
|
|||
|
||||
class SpirvShaderTranslator : public ShaderTranslator {
|
||||
public:
|
||||
union Modification {
|
||||
// If anything in this is structure is changed in a way not compatible with
|
||||
// the previous layout, invalidate the pipeline storages by increasing this
|
||||
// version number (0xYYYYMMDD)!
|
||||
// TODO(Triang3l): Change to 0xYYYYMMDD once it's out of the rapid
|
||||
// prototyping stage (easier to do small granular updates with an
|
||||
// incremental counter).
|
||||
static constexpr uint32_t kVersion = 1;
|
||||
|
||||
struct {
|
||||
// VS - pipeline stage and input configuration.
|
||||
Shader::HostVertexShaderType host_vertex_shader_type
|
||||
: Shader::kHostVertexShaderTypeBitCount;
|
||||
};
|
||||
uint32_t value = 0;
|
||||
|
||||
Modification(uint32_t modification_value = 0) : value(modification_value) {}
|
||||
};
|
||||
|
||||
enum : uint32_t {
|
||||
kSysFlag_XYDividedByW_Shift,
|
||||
kSysFlag_ZDividedByW_Shift,
|
||||
|
@ -118,6 +137,11 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
};
|
||||
SpirvShaderTranslator(const Features& features);
|
||||
|
||||
uint32_t GetDefaultModification(
|
||||
xenos::ShaderType shader_type,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
Shader::HostVertexShaderType::kVertex) const override;
|
||||
|
||||
static constexpr uint32_t GetSharedMemoryStorageBufferCountLog2(
|
||||
uint32_t max_storage_buffer_range) {
|
||||
if (max_storage_buffer_range >= 512 * 1024 * 1024) {
|
||||
|
@ -134,7 +158,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
}
|
||||
|
||||
protected:
|
||||
void Reset() override;
|
||||
void Reset(xenos::ShaderType shader_type) override;
|
||||
|
||||
void StartTranslation() override;
|
||||
|
||||
|
@ -166,17 +190,21 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
builder_->getBuildPoint()->addInstruction(std::move(selection_merge_op));
|
||||
}
|
||||
|
||||
Modification GetSpirvShaderModification() const {
|
||||
return Modification(modification());
|
||||
}
|
||||
|
||||
// TODO(Triang3l): Depth-only pixel shader.
|
||||
bool IsSpirvVertexOrTessEvalShader() const { return is_vertex_shader(); }
|
||||
bool IsSpirvVertexShader() const {
|
||||
return IsSpirvVertexOrTessEvalShader() &&
|
||||
host_vertex_shader_type() == Shader::HostVertexShaderType::kVertex;
|
||||
return is_vertex_shader() &&
|
||||
GetSpirvShaderModification().host_vertex_shader_type ==
|
||||
Shader::HostVertexShaderType::kVertex;
|
||||
}
|
||||
bool IsSpirvTessEvalShader() const {
|
||||
return IsSpirvVertexOrTessEvalShader() &&
|
||||
host_vertex_shader_type() != Shader::HostVertexShaderType::kVertex;
|
||||
return is_vertex_shader() &&
|
||||
GetSpirvShaderModification().host_vertex_shader_type !=
|
||||
Shader::HostVertexShaderType::kVertex;
|
||||
}
|
||||
bool IsSpirvFragmentShader() const { return is_pixel_shader(); }
|
||||
|
||||
// Must be called before emitting any SPIR-V operations that must be in a
|
||||
// block in translator callbacks to ensure that if the last instruction added
|
||||
|
|
|
@ -18,8 +18,7 @@
|
|||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
|
||||
#include "third_party/xxhash/xxhash.h"
|
||||
#include "xenia/base/xxhash.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
|
|
@ -16,8 +16,7 @@
|
|||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
|
||||
#include "third_party/xxhash/xxhash.h"
|
||||
#include "xenia/base/xxhash.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -319,7 +318,7 @@ bool TextureInfo::GetPackedTileOffset(int packed_tile, uint32_t* offset_x,
|
|||
}
|
||||
|
||||
uint64_t TextureInfo::hash() const {
|
||||
return XXH64(this, sizeof(TextureInfo), 0);
|
||||
return XXH3_64bits(this, sizeof(TextureInfo));
|
||||
}
|
||||
|
||||
void TextureInfo::SetupMemoryInfo(uint32_t base_address, uint32_t mip_address) {
|
||||
|
|
|
@ -92,7 +92,7 @@ int TraceDump::Main(const std::vector<std::string>& args) {
|
|||
|
||||
bool TraceDump::Setup() {
|
||||
// Create the emulator but don't initialize so we can setup the window.
|
||||
emulator_ = std::make_unique<Emulator>("", "", "");
|
||||
emulator_ = std::make_unique<Emulator>("", "", "", "");
|
||||
X_STATUS result = emulator_->Setup(
|
||||
nullptr, nullptr, [this]() { return CreateGraphicsSystem(); }, nullptr);
|
||||
if (XFAILED(result)) {
|
||||
|
|
|
@ -121,7 +121,7 @@ bool TraceViewer::Setup() {
|
|||
window_->Resize(1920, 1200);
|
||||
|
||||
// Create the emulator but don't initialize so we can setup the window.
|
||||
emulator_ = std::make_unique<Emulator>("", "", "");
|
||||
emulator_ = std::make_unique<Emulator>("", "", "", "");
|
||||
X_STATUS result = emulator_->Setup(
|
||||
window_.get(), nullptr, [this]() { return CreateGraphicsSystem(); },
|
||||
nullptr);
|
||||
|
@ -566,8 +566,21 @@ TraceViewer::ShaderDisplayType TraceViewer::DrawShaderTypeUI() {
|
|||
|
||||
void TraceViewer::DrawShaderUI(Shader* shader, ShaderDisplayType display_type) {
|
||||
// Must be prepared for advanced display modes.
|
||||
// FIXME(Triang3l): This should display the actual translation used in the
|
||||
// draw, but it may depend on multiple backend-related factors, including
|
||||
// drawing multiple times with multiple modifications, even depending on
|
||||
// values obtained during translation of other modifications (for instance,
|
||||
// a memexporting shader can be executed both as a vertex shader (to draw the
|
||||
// points) and as a compute shader (to actually export) if the host doesn't
|
||||
// support writes from vertex shaders.
|
||||
const Shader::Translation* translation = nullptr;
|
||||
if (display_type != ShaderDisplayType::kUcode) {
|
||||
if (!shader->is_valid()) {
|
||||
for (const auto& translation_pair : shader->translations()) {
|
||||
if (translation_pair.second->is_valid()) {
|
||||
translation = translation_pair.second;
|
||||
}
|
||||
}
|
||||
if (!translation) {
|
||||
ImGui::TextColored(kColorError,
|
||||
"ERROR: shader error during parsing/translation");
|
||||
return;
|
||||
|
@ -580,7 +593,7 @@ void TraceViewer::DrawShaderUI(Shader* shader, ShaderDisplayType display_type) {
|
|||
break;
|
||||
}
|
||||
case ShaderDisplayType::kTranslated: {
|
||||
const auto& str = shader->GetTranslatedBinaryString();
|
||||
const auto& str = translation->GetTranslatedBinaryString();
|
||||
size_t i = 0;
|
||||
bool done = false;
|
||||
while (!done && i < str.size()) {
|
||||
|
@ -600,7 +613,7 @@ void TraceViewer::DrawShaderUI(Shader* shader, ShaderDisplayType display_type) {
|
|||
break;
|
||||
}
|
||||
case ShaderDisplayType::kHostDisasm: {
|
||||
DrawMultilineString(shader->host_disassembly());
|
||||
DrawMultilineString(translation->host_disassembly());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -816,10 +816,11 @@ static_assert_size(TextureFetchInstruction, 12);
|
|||
// move of the third operand in case of zero multiplicands, because the term
|
||||
// may be -0, while the result should be +0 in this case.
|
||||
// http://developer.amd.com/wordpress/media/2013/10/R5xx_Acceleration_v1.5.pdf
|
||||
// Multiply-add also appears to be not fused (the SM3 behavior instruction on
|
||||
// GCN is called v_mad_legacy_f32, not v_fma_legacy_f32) - shader translators
|
||||
// should not use instructions that may be interpreted by the host GPU as
|
||||
// fused multiply-add.
|
||||
// Multiply-add also appears to be not fused; the SM3 behavior instruction on
|
||||
// GCN is called v_mad_legacy_f32, not v_fma_legacy_f32 (in 2012-2020, before
|
||||
// RDNA 2, which removed v_mad_f32 as well) - shader translators should not
|
||||
// use instructions that may be interpreted by the host GPU as fused
|
||||
// multiply-add.
|
||||
|
||||
enum class AluScalarOpcode : uint32_t {
|
||||
// Floating-Point Add
|
||||
|
@ -1147,6 +1148,19 @@ enum class AluScalarOpcode : uint32_t {
|
|||
kRetainPrev = 50,
|
||||
};
|
||||
|
||||
constexpr bool AluScalarOpcodeIsKill(AluScalarOpcode scalar_opcode) {
|
||||
switch (scalar_opcode) {
|
||||
case AluScalarOpcode::kKillsEq:
|
||||
case AluScalarOpcode::kKillsGt:
|
||||
case AluScalarOpcode::kKillsGe:
|
||||
case AluScalarOpcode::kKillsNe:
|
||||
case AluScalarOpcode::kKillsOne:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
enum class AluVectorOpcode : uint32_t {
|
||||
// Per-Component Floating-Point Add
|
||||
// add/ADDv dest, src0, src1
|
||||
|
@ -1471,27 +1485,37 @@ enum class AluVectorOpcode : uint32_t {
|
|||
kMaxA = 29,
|
||||
};
|
||||
|
||||
constexpr bool AluVectorOpcodeIsKill(AluVectorOpcode vector_opcode) {
|
||||
switch (vector_opcode) {
|
||||
case AluVectorOpcode::kKillEq:
|
||||
case AluVectorOpcode::kKillGt:
|
||||
case AluVectorOpcode::kKillGe:
|
||||
case AluVectorOpcode::kKillNe:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Whether the vector instruction has side effects such as discarding a pixel or
|
||||
// setting the predicate and can't be ignored even if it doesn't write to
|
||||
// anywhere. Note that all scalar operations except for retain_prev have a side
|
||||
// effect of modifying the previous scalar result register, so they must always
|
||||
// be executed even if not writing.
|
||||
constexpr bool AluVectorOpHasSideEffects(AluVectorOpcode vector_opcode) {
|
||||
if (AluVectorOpcodeIsKill(vector_opcode)) {
|
||||
return true;
|
||||
}
|
||||
switch (vector_opcode) {
|
||||
case AluVectorOpcode::kSetpEqPush:
|
||||
case AluVectorOpcode::kSetpNePush:
|
||||
case AluVectorOpcode::kSetpGtPush:
|
||||
case AluVectorOpcode::kSetpGePush:
|
||||
case AluVectorOpcode::kKillEq:
|
||||
case AluVectorOpcode::kKillGt:
|
||||
case AluVectorOpcode::kKillGe:
|
||||
case AluVectorOpcode::kKillNe:
|
||||
case AluVectorOpcode::kMaxA:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Whether each component of a source operand is used at all in the instruction
|
||||
|
|
|
@ -627,6 +627,17 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
|||
}
|
||||
// TODO(Triang3l): Get a pixel shader.
|
||||
VulkanShader* pixel_shader = nullptr;
|
||||
SpirvShaderTranslator::Modification vertex_shader_modification;
|
||||
SpirvShaderTranslator::Modification pixel_shader_modification;
|
||||
if (!pipeline_cache_->GetCurrentShaderModifications(
|
||||
vertex_shader_modification, pixel_shader_modification)) {
|
||||
return false;
|
||||
}
|
||||
VulkanShader::VulkanTranslation* vertex_shader_translation =
|
||||
static_cast<VulkanShader::VulkanTranslation*>(
|
||||
vertex_shader->GetOrCreateTranslation(
|
||||
vertex_shader_modification.value));
|
||||
VulkanShader::VulkanTranslation* pixel_shader_translation = nullptr;
|
||||
|
||||
VulkanRenderTargetCache::FramebufferKey framebuffer_key;
|
||||
if (!render_target_cache_->UpdateRenderTargets(framebuffer_key)) {
|
||||
|
@ -648,7 +659,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
|||
// current_graphics_pipeline_layout_.
|
||||
VkPipeline pipeline;
|
||||
const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider;
|
||||
if (!pipeline_cache_->ConfigurePipeline(vertex_shader, pixel_shader,
|
||||
if (!pipeline_cache_->ConfigurePipeline(vertex_shader_translation,
|
||||
pixel_shader_translation,
|
||||
framebuffer_key.render_pass_key,
|
||||
pipeline, pipeline_layout_provider)) {
|
||||
return false;
|
||||
|
@ -713,7 +725,7 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
|||
draw_util::GetHostViewportInfo(
|
||||
regs, 1.0f, 1.0f, false,
|
||||
float(device_properties.limits.maxViewportDimensions[0]),
|
||||
float(device_properties.limits.maxViewportDimensions[1]), true,
|
||||
float(device_properties.limits.maxViewportDimensions[1]), true, false,
|
||||
viewport_info);
|
||||
|
||||
// Update fixed-function dynamic state.
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/base/xxhash.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
|
@ -84,7 +86,8 @@ VulkanShader* VulkanPipelineCache::LoadShader(xenos::ShaderType shader_type,
|
|||
const uint32_t* host_address,
|
||||
uint32_t dword_count) {
|
||||
// Hash the input memory and lookup the shader.
|
||||
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
|
||||
uint64_t data_hash =
|
||||
XXH3_64bits(host_address, dword_count * sizeof(uint32_t));
|
||||
auto it = shaders_.find(data_hash);
|
||||
if (it != shaders_.end()) {
|
||||
// Shader has been previously loaded.
|
||||
|
@ -94,16 +97,31 @@ VulkanShader* VulkanPipelineCache::LoadShader(xenos::ShaderType shader_type,
|
|||
// Always create the shader and stash it away.
|
||||
// We need to track it even if it fails translation so we know not to try
|
||||
// again.
|
||||
VulkanShader* shader =
|
||||
new VulkanShader(shader_type, data_hash, host_address, dword_count);
|
||||
VulkanShader* shader = new VulkanShader(
|
||||
shader_type, data_hash, host_address, dword_count,
|
||||
command_processor_.GetVulkanContext().GetVulkanProvider());
|
||||
shaders_.emplace(data_hash, shader);
|
||||
if (!cvars::dump_shaders.empty()) {
|
||||
shader->DumpUcodeBinary(cvars::dump_shaders);
|
||||
}
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
||||
bool VulkanPipelineCache::GetCurrentShaderModifications(
|
||||
SpirvShaderTranslator::Modification& vertex_shader_modification_out,
|
||||
SpirvShaderTranslator::Modification& pixel_shader_modification_out) const {
|
||||
// TODO(Triang3l): Tessellation, depth output.
|
||||
vertex_shader_modification_out = SpirvShaderTranslator::Modification(
|
||||
shader_translator_->GetDefaultModification(xenos::ShaderType::kVertex));
|
||||
pixel_shader_modification_out = SpirvShaderTranslator::Modification(
|
||||
shader_translator_->GetDefaultModification(xenos::ShaderType::kPixel));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VulkanPipelineCache::EnsureShadersTranslated(
|
||||
VulkanShader* vertex_shader, VulkanShader* pixel_shader,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type) {
|
||||
VulkanShader::VulkanTranslation* vertex_shader,
|
||||
VulkanShader::VulkanTranslation* pixel_shader) {
|
||||
const RegisterFile& regs = register_file_;
|
||||
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
|
||||
|
||||
|
@ -133,7 +151,8 @@ bool VulkanPipelineCache::EnsureShadersTranslated(
|
|||
}
|
||||
|
||||
bool VulkanPipelineCache::ConfigurePipeline(
|
||||
VulkanShader* vertex_shader, VulkanShader* pixel_shader,
|
||||
VulkanShader::VulkanTranslation* vertex_shader,
|
||||
VulkanShader::VulkanTranslation* pixel_shader,
|
||||
VulkanRenderTargetCache::RenderPassKey render_pass_key,
|
||||
VkPipeline& pipeline_out,
|
||||
const PipelineLayoutProvider*& pipeline_layout_out) {
|
||||
|
@ -160,8 +179,7 @@ bool VulkanPipelineCache::ConfigurePipeline(
|
|||
}
|
||||
|
||||
// Create the pipeline if not the latest and not already existing.
|
||||
if (!EnsureShadersTranslated(vertex_shader, pixel_shader,
|
||||
Shader::HostVertexShaderType::kVertex)) {
|
||||
if (!EnsureShadersTranslated(vertex_shader, pixel_shader)) {
|
||||
return false;
|
||||
}
|
||||
const PipelineLayoutProvider* pipeline_layout =
|
||||
|
@ -189,24 +207,22 @@ bool VulkanPipelineCache::ConfigurePipeline(
|
|||
return true;
|
||||
}
|
||||
|
||||
bool VulkanPipelineCache::TranslateShader(SpirvShaderTranslator& translator,
|
||||
VulkanShader& shader,
|
||||
reg::SQ_PROGRAM_CNTL cntl) {
|
||||
bool VulkanPipelineCache::TranslateShader(
|
||||
SpirvShaderTranslator& translator,
|
||||
VulkanShader::VulkanTranslation& translation, reg::SQ_PROGRAM_CNTL cntl) {
|
||||
// Perform translation.
|
||||
// If this fails the shader will be marked as invalid and ignored later.
|
||||
// TODO(Triang3l): Host vertex shader type.
|
||||
if (!translator.Translate(&shader, cntl,
|
||||
Shader::HostVertexShaderType::kVertex)) {
|
||||
if (!translator.Translate(translation, cntl)) {
|
||||
XELOGE("Shader {:016X} translation failed; marking as ignored",
|
||||
shader.ucode_data_hash());
|
||||
translation.shader().ucode_data_hash());
|
||||
return false;
|
||||
}
|
||||
return shader.InitializeShaderModule(
|
||||
command_processor_.GetVulkanContext().GetVulkanProvider());
|
||||
return translation.GetOrCreateShaderModule() != VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
bool VulkanPipelineCache::GetCurrentStateDescription(
|
||||
const VulkanShader* vertex_shader, const VulkanShader* pixel_shader,
|
||||
const VulkanShader::VulkanTranslation* vertex_shader,
|
||||
const VulkanShader::VulkanTranslation* pixel_shader,
|
||||
VulkanRenderTargetCache::RenderPassKey render_pass_key,
|
||||
PipelineDescription& description_out) const {
|
||||
description_out.Reset();
|
||||
|
@ -215,9 +231,14 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
|
|||
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
||||
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
|
||||
|
||||
description_out.vertex_shader_hash = vertex_shader->ucode_data_hash();
|
||||
description_out.pixel_shader_hash =
|
||||
pixel_shader ? pixel_shader->ucode_data_hash() : 0;
|
||||
description_out.vertex_shader_hash =
|
||||
vertex_shader->shader().ucode_data_hash();
|
||||
description_out.vertex_shader_modification = vertex_shader->modification();
|
||||
if (pixel_shader) {
|
||||
description_out.pixel_shader_hash =
|
||||
pixel_shader->shader().ucode_data_hash();
|
||||
description_out.pixel_shader_modification = pixel_shader->modification();
|
||||
}
|
||||
description_out.render_pass_key = render_pass_key;
|
||||
|
||||
xenos::PrimitiveType primitive_type = vgt_draw_initiator.prim_type;
|
||||
|
@ -321,11 +342,11 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
|
|||
|
||||
if (creation_arguments.pixel_shader) {
|
||||
XELOGGPU("Creating graphics pipeline state with VS {:016X}, PS {:016X}",
|
||||
creation_arguments.vertex_shader->ucode_data_hash(),
|
||||
creation_arguments.pixel_shader->ucode_data_hash());
|
||||
creation_arguments.vertex_shader->shader().ucode_data_hash(),
|
||||
creation_arguments.pixel_shader->shader().ucode_data_hash());
|
||||
} else {
|
||||
XELOGGPU("Creating graphics pipeline state with VS {:016X}",
|
||||
creation_arguments.vertex_shader->ucode_data_hash());
|
||||
creation_arguments.vertex_shader->shader().ucode_data_hash());
|
||||
}
|
||||
|
||||
const PipelineDescription& description = creation_arguments.pipeline->first;
|
||||
|
@ -514,11 +535,11 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
|
|||
/* if (creation_arguments.pixel_shader) {
|
||||
XELOGE(
|
||||
"Failed to create graphics pipeline with VS {:016X}, PS {:016X}",
|
||||
creation_arguments.vertex_shader->ucode_data_hash(),
|
||||
creation_arguments.pixel_shader->ucode_data_hash());
|
||||
creation_arguments.vertex_shader->shader().ucode_data_hash(),
|
||||
creation_arguments.pixel_shader->shader().ucode_data_hash());
|
||||
} else {
|
||||
XELOGE("Failed to create graphics pipeline with VS {:016X}",
|
||||
creation_arguments.vertex_shader->ucode_data_hash());
|
||||
creation_arguments.vertex_shader->shader().ucode_data_hash());
|
||||
} */
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -16,9 +16,9 @@
|
|||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include "third_party/xxhash/xxhash.h"
|
||||
#include "xenia/base/hash.h"
|
||||
#include "xenia/base/platform.h"
|
||||
#include "xenia/base/xxhash.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_render_target_cache.h"
|
||||
|
@ -55,14 +55,19 @@ class VulkanPipelineCache {
|
|||
uint32_t guest_address, const uint32_t* host_address,
|
||||
uint32_t dword_count);
|
||||
|
||||
// Retrieves the shader modifications for the current state, and returns
|
||||
// whether they are valid.
|
||||
bool GetCurrentShaderModifications(
|
||||
SpirvShaderTranslator::Modification& vertex_shader_modification_out,
|
||||
SpirvShaderTranslator::Modification& pixel_shader_modification_out) const;
|
||||
|
||||
// Translates shaders if needed, also making shader info up to date.
|
||||
bool EnsureShadersTranslated(
|
||||
VulkanShader* vertex_shader, VulkanShader* pixel_shader,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type);
|
||||
bool EnsureShadersTranslated(VulkanShader::VulkanTranslation* vertex_shader,
|
||||
VulkanShader::VulkanTranslation* pixel_shader);
|
||||
|
||||
// TODO(Triang3l): Return a deferred creation handle.
|
||||
bool ConfigurePipeline(VulkanShader* vertex_shader,
|
||||
VulkanShader* pixel_shader,
|
||||
bool ConfigurePipeline(VulkanShader::VulkanTranslation* vertex_shader,
|
||||
VulkanShader::VulkanTranslation* pixel_shader,
|
||||
VulkanRenderTargetCache::RenderPassKey render_pass_key,
|
||||
VkPipeline& pipeline_out,
|
||||
const PipelineLayoutProvider*& pipeline_layout_out);
|
||||
|
@ -102,6 +107,8 @@ class VulkanPipelineCache {
|
|||
uint64_t vertex_shader_hash;
|
||||
// 0 if no pixel shader.
|
||||
uint64_t pixel_shader_hash;
|
||||
uint32_t vertex_shader_modification;
|
||||
uint32_t pixel_shader_modification;
|
||||
VulkanRenderTargetCache::RenderPassKey render_pass_key;
|
||||
|
||||
// Input assembly.
|
||||
|
@ -126,7 +133,7 @@ class VulkanPipelineCache {
|
|||
return std::memcmp(this, &description, sizeof(*this)) == 0;
|
||||
}
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
uint64_t GetHash() const { return XXH64(this, sizeof(*this), 0); }
|
||||
uint64_t GetHash() const { return XXH3_64bits(this, sizeof(*this)); }
|
||||
struct Hasher {
|
||||
size_t operator()(const PipelineDescription& description) const {
|
||||
return size_t(description.GetHash());
|
||||
|
@ -146,17 +153,19 @@ class VulkanPipelineCache {
|
|||
// creation threads, with everything needed from caches pre-looked-up.
|
||||
struct PipelineCreationArguments {
|
||||
std::pair<const PipelineDescription, Pipeline>* pipeline;
|
||||
const VulkanShader* vertex_shader;
|
||||
const VulkanShader* pixel_shader;
|
||||
const VulkanShader::VulkanTranslation* vertex_shader;
|
||||
const VulkanShader::VulkanTranslation* pixel_shader;
|
||||
VkRenderPass render_pass;
|
||||
};
|
||||
|
||||
// Can be called from multiple threads.
|
||||
bool TranslateShader(SpirvShaderTranslator& translator, VulkanShader& shader,
|
||||
bool TranslateShader(SpirvShaderTranslator& translator,
|
||||
VulkanShader::VulkanTranslation& translation,
|
||||
reg::SQ_PROGRAM_CNTL cntl);
|
||||
|
||||
bool GetCurrentStateDescription(
|
||||
const VulkanShader* vertex_shader, const VulkanShader* pixel_shader,
|
||||
const VulkanShader::VulkanTranslation* vertex_shader,
|
||||
const VulkanShader::VulkanTranslation* pixel_shader,
|
||||
VulkanRenderTargetCache::RenderPassKey render_pass_key,
|
||||
PipelineDescription& description_out) const;
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
#include <cstring>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "third_party/xxhash/xxhash.h"
|
||||
#include "xenia/base/xxhash.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
|
@ -49,7 +49,7 @@ class VulkanRenderTargetCache {
|
|||
return std::memcmp(this, &key, sizeof(*this)) == 0;
|
||||
}
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
uint64_t GetHash() const { return XXH64(this, sizeof(*this), 0); }
|
||||
uint64_t GetHash() const { return XXH3_64bits(this, sizeof(*this)); }
|
||||
struct Hasher {
|
||||
size_t operator()(const FramebufferKey& description) const {
|
||||
return size_t(description.GetHash());
|
||||
|
|
|
@ -11,22 +11,30 @@
|
|||
|
||||
#include <cstdint>
|
||||
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
VulkanShader::VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash,
|
||||
const uint32_t* dword_ptr, uint32_t dword_count)
|
||||
: Shader(shader_type, data_hash, dword_ptr, dword_count) {}
|
||||
VulkanShader::VulkanTranslation::~VulkanTranslation() {
|
||||
if (shader_module_) {
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
static_cast<const VulkanShader&>(shader()).provider_;
|
||||
provider.dfn().vkDestroyShaderModule(provider.device(), shader_module_,
|
||||
nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
bool VulkanShader::InitializeShaderModule(
|
||||
const ui::vulkan::VulkanProvider& provider) {
|
||||
VkShaderModule VulkanShader::VulkanTranslation::GetOrCreateShaderModule() {
|
||||
if (!is_valid()) {
|
||||
return false;
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
if (shader_module_ != VK_NULL_HANDLE) {
|
||||
return true;
|
||||
return shader_module_;
|
||||
}
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
static_cast<const VulkanShader&>(shader()).provider_;
|
||||
VkShaderModuleCreateInfo shader_module_create_info;
|
||||
shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
|
||||
shader_module_create_info.pNext = nullptr;
|
||||
|
@ -37,10 +45,21 @@ bool VulkanShader::InitializeShaderModule(
|
|||
if (provider.dfn().vkCreateShaderModule(provider.device(),
|
||||
&shader_module_create_info, nullptr,
|
||||
&shader_module_) != VK_SUCCESS) {
|
||||
is_valid_ = false;
|
||||
return false;
|
||||
MakeInvalid();
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
return true;
|
||||
return shader_module_;
|
||||
}
|
||||
|
||||
VulkanShader::VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash,
|
||||
const uint32_t* dword_ptr, uint32_t dword_count,
|
||||
const ui::vulkan::VulkanProvider& provider)
|
||||
: Shader(shader_type, data_hash, dword_ptr, dword_count),
|
||||
provider_(provider) {}
|
||||
|
||||
Shader::Translation* VulkanShader::CreateTranslationInstance(
|
||||
uint32_t modification) {
|
||||
return new VulkanTranslation(*this, modification);
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
|
|
|
@ -22,14 +22,28 @@ namespace vulkan {
|
|||
|
||||
class VulkanShader : public Shader {
|
||||
public:
|
||||
VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash,
|
||||
const uint32_t* dword_ptr, uint32_t dword_count);
|
||||
class VulkanTranslation : public Translation {
|
||||
public:
|
||||
VulkanTranslation(VulkanShader& shader, uint32_t modification)
|
||||
: Translation(shader, modification) {}
|
||||
~VulkanTranslation() override;
|
||||
|
||||
bool InitializeShaderModule(const ui::vulkan::VulkanProvider& provider);
|
||||
VkShaderModule shader_module() const { return shader_module_; }
|
||||
VkShaderModule GetOrCreateShaderModule();
|
||||
VkShaderModule shader_module() const { return shader_module_; }
|
||||
|
||||
private:
|
||||
VkShaderModule shader_module_ = VK_NULL_HANDLE;
|
||||
};
|
||||
|
||||
VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash,
|
||||
const uint32_t* dword_ptr, uint32_t dword_count,
|
||||
const ui::vulkan::VulkanProvider& provider);
|
||||
|
||||
protected:
|
||||
Translation* CreateTranslationInstance(uint32_t modification) override;
|
||||
|
||||
private:
|
||||
VkShaderModule shader_module_ = VK_NULL_HANDLE;
|
||||
const ui::vulkan::VulkanProvider& provider_;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
|
|
|
@ -9,17 +9,41 @@
|
|||
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "xenia/base/math.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace xenos {
|
||||
|
||||
// Based on CFloat24 from d3dref9.dll and the 6e4 code from:
|
||||
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
|
||||
// 6e4 has a different exponent bias allowing [0,512) values, 20e4 allows [0,2).
|
||||
|
||||
uint32_t Float32To20e4(float f32) {
|
||||
if (!(f32 > 0.0f)) {
|
||||
// Positive only, and not -0 or NaN.
|
||||
return 0;
|
||||
}
|
||||
uint32_t f32u32 = *reinterpret_cast<const uint32_t*>(&f32);
|
||||
if (f32u32 >= 0x3FFFFFF8) {
|
||||
// Saturate.
|
||||
return 0xFFFFFF;
|
||||
}
|
||||
if (f32u32 < 0x38800000) {
|
||||
// The number is too small to be represented as a normalized 20e4.
|
||||
// Convert it to a denormalized value.
|
||||
uint32_t shift = std::min(uint32_t(113 - (f32u32 >> 23)), uint32_t(24));
|
||||
f32u32 = (0x800000 | (f32u32 & 0x7FFFFF)) >> shift;
|
||||
} else {
|
||||
// Rebias the exponent to represent the value as a normalized 20e4.
|
||||
f32u32 += 0xC8000000u;
|
||||
}
|
||||
return ((f32u32 + 3 + ((f32u32 >> 3) & 1)) >> 3) & 0xFFFFFF;
|
||||
}
|
||||
|
||||
float Float20e4To32(uint32_t f24) {
|
||||
// Based on CFloat24 from d3dref9.dll and the 6e4 code from:
|
||||
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
|
||||
// 6e4 has a different exponent bias allowing [0,512) values, 20e4 allows
|
||||
// [0,2).
|
||||
f24 &= 0xFFFFFF;
|
||||
if (!f24) {
|
||||
return 0.0f;
|
||||
|
|
|
@ -305,6 +305,9 @@ enum class DepthRenderTargetFormat : uint32_t {
|
|||
|
||||
const char* GetDepthRenderTargetFormatName(DepthRenderTargetFormat format);
|
||||
|
||||
// Converts an IEEE-754 32-bit floating-point number to Xenos floating-point
|
||||
// depth, rounding to the nearest even.
|
||||
uint32_t Float32To20e4(float f32);
|
||||
// Converts Xenos floating-point depth in bits 0:23 (not clamping) to an
|
||||
// IEEE-754 32-bit floating-point number.
|
||||
float Float20e4To32(uint32_t f24);
|
||||
|
@ -1036,10 +1039,9 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, {
|
|||
ClampMode clamp_y : 3; // +13
|
||||
ClampMode clamp_z : 3; // +16
|
||||
SignedRepeatingFractionMode signed_rf_mode_all : 1; // +19
|
||||
// TODO(Triang3l): 1 or 2 dim_tbd bits?
|
||||
uint32_t unk_0 : 2; // +20
|
||||
uint32_t pitch : 9; // +22 byte_pitch >> 5
|
||||
uint32_t tiled : 1; // +31
|
||||
uint32_t dim_tbd : 2; // +20
|
||||
uint32_t pitch : 9; // +22 byte_pitch >> 5
|
||||
uint32_t tiled : 1; // +31
|
||||
|
||||
TextureFormat format : 6; // +0 dword_1
|
||||
Endian endianness : 2; // +6
|
||||
|
|
|
@ -38,6 +38,7 @@ DEFINE_string(hid, "any", "Input system. Use: [any, nop, sdl, winkey, xinput]",
|
|||
"General");
|
||||
|
||||
#define MAX_USERS 4
|
||||
#define ROW_HEIGHT_GENERAL 60
|
||||
#define COL_WIDTH_STATE 320
|
||||
#define COL_WIDTH_STROKE 416
|
||||
|
||||
|
@ -45,6 +46,7 @@ namespace xe {
|
|||
namespace hid {
|
||||
|
||||
std::unique_ptr<xe::hid::InputSystem> input_system_;
|
||||
bool is_active = true;
|
||||
|
||||
std::vector<std::unique_ptr<hid::InputDriver>> CreateInputDrivers(
|
||||
ui::Window* window) {
|
||||
|
@ -118,7 +120,7 @@ int hid_demo_main(const std::vector<std::string>& args) {
|
|||
loop->on_quit.AddListener([&window](xe::ui::UIEvent* e) { window.reset(); });
|
||||
|
||||
// Initial size setting, done here so that it knows the menu exists.
|
||||
window->Resize(COL_WIDTH_STATE + COL_WIDTH_STROKE, 500);
|
||||
window->Resize(COL_WIDTH_STATE + COL_WIDTH_STROKE, ROW_HEIGHT_GENERAL + 500);
|
||||
|
||||
// Create the graphics context used for drawing and setup the window.
|
||||
std::unique_ptr<xe::ui::GraphicsProvider> graphics_provider;
|
||||
|
@ -133,7 +135,9 @@ int hid_demo_main(const std::vector<std::string>& args) {
|
|||
input_system_ = std::make_unique<xe::hid::InputSystem>(window.get());
|
||||
auto drivers = CreateInputDrivers(window.get());
|
||||
for (size_t i = 0; i < drivers.size(); ++i) {
|
||||
input_system_->AddDriver(std::move(drivers[i]));
|
||||
auto& driver = drivers[i];
|
||||
driver->set_is_active_callback([]() -> bool { return is_active; });
|
||||
input_system_->AddDriver(std::move(driver));
|
||||
}
|
||||
|
||||
window->Invalidate();
|
||||
|
@ -149,10 +153,22 @@ int hid_demo_main(const std::vector<std::string>& args) {
|
|||
ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoSavedSettings |
|
||||
ImGuiWindowFlags_NoScrollbar;
|
||||
|
||||
ImGui::Begin("GetState()", nullptr, wflags);
|
||||
ImGui::Begin("General", nullptr, wflags);
|
||||
{
|
||||
ImGui::SetWindowPos(ImVec2(0, 0));
|
||||
ImGui::SetWindowSize(ImVec2(COL_WIDTH_STATE, io.DisplaySize.y));
|
||||
ImGui::SetWindowSize(
|
||||
ImVec2(COL_WIDTH_STATE + COL_WIDTH_STROKE, ROW_HEIGHT_GENERAL));
|
||||
|
||||
ImGui::Text("Input System (hid) = \"%s\"", cvars::hid.c_str());
|
||||
ImGui::Checkbox("is_active", &is_active);
|
||||
}
|
||||
ImGui::End();
|
||||
|
||||
ImGui::Begin("GetState()", nullptr, wflags);
|
||||
{
|
||||
ImGui::SetWindowPos(ImVec2(0, ROW_HEIGHT_GENERAL));
|
||||
ImGui::SetWindowSize(
|
||||
ImVec2(COL_WIDTH_STATE, io.DisplaySize.y - ROW_HEIGHT_GENERAL));
|
||||
|
||||
static bool enable_GetState = false;
|
||||
ImGui::Checkbox("Active", &enable_GetState);
|
||||
|
@ -167,8 +183,9 @@ int hid_demo_main(const std::vector<std::string>& args) {
|
|||
|
||||
ImGui::Begin("GetKeystroke()", nullptr, wflags);
|
||||
{
|
||||
ImGui::SetWindowPos(ImVec2(COL_WIDTH_STATE, 0));
|
||||
ImGui::SetWindowSize(ImVec2(COL_WIDTH_STROKE, io.DisplaySize.y));
|
||||
ImGui::SetWindowPos(ImVec2(COL_WIDTH_STATE, ROW_HEIGHT_GENERAL));
|
||||
ImGui::SetWindowSize(
|
||||
ImVec2(COL_WIDTH_STROKE, io.DisplaySize.y - ROW_HEIGHT_GENERAL));
|
||||
|
||||
static bool enable_GetKeystroke = false;
|
||||
static bool hide_repeats = false;
|
||||
|
|
|
@ -77,7 +77,7 @@ X_STATUS SDLInputDriver::Setup() {
|
|||
sdl_events_initialized_ = true;
|
||||
|
||||
SDL_EventFilter event_filter{[](void* userdata, SDL_Event* event) -> int {
|
||||
if (!userdata) {
|
||||
if (!userdata || !event) {
|
||||
assert_always();
|
||||
return 0;
|
||||
}
|
||||
|
@ -102,17 +102,17 @@ X_STATUS SDLInputDriver::Setup() {
|
|||
}
|
||||
switch (type) {
|
||||
case SDL_CONTROLLERDEVICEADDED:
|
||||
driver->OnControllerDeviceAdded(event);
|
||||
driver->OnControllerDeviceAdded(*event);
|
||||
break;
|
||||
case SDL_CONTROLLERDEVICEREMOVED:
|
||||
driver->OnControllerDeviceRemoved(event);
|
||||
driver->OnControllerDeviceRemoved(*event);
|
||||
break;
|
||||
case SDL_CONTROLLERAXISMOTION:
|
||||
driver->OnControllerDeviceAxisMotion(event);
|
||||
driver->OnControllerDeviceAxisMotion(*event);
|
||||
break;
|
||||
case SDL_CONTROLLERBUTTONDOWN:
|
||||
case SDL_CONTROLLERBUTTONUP:
|
||||
driver->OnControllerDeviceButtonChanged(event);
|
||||
driver->OnControllerDeviceButtonChanged(*event);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -193,7 +193,11 @@ X_RESULT SDLInputDriver::GetState(uint32_t user_index,
|
|||
return X_ERROR_BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
QueueControllerUpdate();
|
||||
auto is_active = this->is_active();
|
||||
|
||||
if (is_active) {
|
||||
QueueControllerUpdate();
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> guard(controllers_mutex_);
|
||||
|
||||
|
@ -203,12 +207,20 @@ X_RESULT SDLInputDriver::GetState(uint32_t user_index,
|
|||
}
|
||||
|
||||
// Make sure packet_number is only incremented by 1, even if there have been
|
||||
// multiple updates between GetState calls.
|
||||
if (controller->state_changed) {
|
||||
// multiple updates between GetState calls. Also track `is_active` to
|
||||
// increment the packet number if it changed.
|
||||
if ((is_active != controller->is_active) ||
|
||||
(is_active && controller->state_changed)) {
|
||||
controller->state.packet_number++;
|
||||
controller->is_active = is_active;
|
||||
controller->state_changed = false;
|
||||
}
|
||||
*out_state = controller->state;
|
||||
std::memcpy(out_state, &controller->state, sizeof(*out_state));
|
||||
if (!is_active) {
|
||||
// Simulate an "untouched" controller. When we become active again the
|
||||
// pressed buttons aren't lost and will be visible again.
|
||||
std::memset(&out_state->gamepad, 0, sizeof(out_state->gamepad));
|
||||
}
|
||||
return X_ERROR_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -242,6 +254,8 @@ X_RESULT SDLInputDriver::SetState(uint32_t user_index,
|
|||
|
||||
X_RESULT SDLInputDriver::GetKeystroke(uint32_t users, uint32_t flags,
|
||||
X_INPUT_KEYSTROKE* out_keystroke) {
|
||||
// TODO(JoelLinn): Figure out the flags
|
||||
// https://github.com/evilC/UCR/blob/0489929e2a8e39caa3484c67f3993d3fba39e46f/Libraries/XInput.ahk#L85-L98
|
||||
assert(sdl_events_initialized_ && sdl_gamecontroller_initialized_);
|
||||
bool user_any = users == 0xFF;
|
||||
if (users >= HID_SDL_USER_COUNT && !user_any) {
|
||||
|
@ -296,7 +310,11 @@ X_RESULT SDLInputDriver::GetKeystroke(uint32_t users, uint32_t flags,
|
|||
X_INPUT_GAMEPAD_VK_RTHUMB_DOWNLEFT,
|
||||
};
|
||||
|
||||
QueueControllerUpdate();
|
||||
auto is_active = this->is_active();
|
||||
|
||||
if (is_active) {
|
||||
QueueControllerUpdate();
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> guard(controllers_mutex_);
|
||||
|
||||
|
@ -311,8 +329,13 @@ X_RESULT SDLInputDriver::GetKeystroke(uint32_t users, uint32_t flags,
|
|||
}
|
||||
}
|
||||
|
||||
const uint64_t curr_butts = controller->state.gamepad.buttons |
|
||||
AnalogToKeyfield(controller->state.gamepad);
|
||||
// If input is not active (e.g. due to a dialog overlay), force buttons to
|
||||
// "unpressed". The algorithm will automatically send UP events when
|
||||
// `is_active()` goes low and DOWN events when it goes high again.
|
||||
const uint64_t curr_butts =
|
||||
is_active ? (controller->state.gamepad.buttons |
|
||||
AnalogToKeyfield(controller->state.gamepad))
|
||||
: uint64_t(0);
|
||||
KeystrokeState& last = keystroke_states_.at(user_index);
|
||||
|
||||
// Handle repeating
|
||||
|
@ -384,12 +407,12 @@ X_RESULT SDLInputDriver::GetKeystroke(uint32_t users, uint32_t flags,
|
|||
return X_ERROR_EMPTY;
|
||||
}
|
||||
|
||||
void SDLInputDriver::OnControllerDeviceAdded(SDL_Event* event) {
|
||||
void SDLInputDriver::OnControllerDeviceAdded(const SDL_Event& event) {
|
||||
assert(window()->loop()->is_on_loop_thread());
|
||||
std::unique_lock<std::mutex> guard(controllers_mutex_);
|
||||
|
||||
// Open the controller.
|
||||
const auto controller = SDL_GameControllerOpen(event->cdevice.which);
|
||||
const auto controller = SDL_GameControllerOpen(event.cdevice.which);
|
||||
if (!controller) {
|
||||
assert_always();
|
||||
return;
|
||||
|
@ -423,52 +446,52 @@ void SDLInputDriver::OnControllerDeviceAdded(SDL_Event* event) {
|
|||
}
|
||||
}
|
||||
|
||||
void SDLInputDriver::OnControllerDeviceRemoved(SDL_Event* event) {
|
||||
void SDLInputDriver::OnControllerDeviceRemoved(const SDL_Event& event) {
|
||||
assert(window()->loop()->is_on_loop_thread());
|
||||
std::unique_lock<std::mutex> guard(controllers_mutex_);
|
||||
|
||||
// Find the disconnected gamecontroller and close it.
|
||||
auto [found, i] = GetControllerIndexFromInstanceID(event->cdevice.which);
|
||||
assert(found);
|
||||
SDL_GameControllerClose(controllers_.at(i).sdl);
|
||||
controllers_.at(i) = {};
|
||||
keystroke_states_.at(i) = {};
|
||||
auto idx = GetControllerIndexFromInstanceID(event.cdevice.which);
|
||||
assert(idx);
|
||||
SDL_GameControllerClose(controllers_.at(*idx).sdl);
|
||||
controllers_.at(*idx) = {};
|
||||
keystroke_states_.at(*idx) = {};
|
||||
}
|
||||
|
||||
void SDLInputDriver::OnControllerDeviceAxisMotion(SDL_Event* event) {
|
||||
void SDLInputDriver::OnControllerDeviceAxisMotion(const SDL_Event& event) {
|
||||
assert(window()->loop()->is_on_loop_thread());
|
||||
std::unique_lock<std::mutex> guard(controllers_mutex_);
|
||||
|
||||
auto [found, i] = GetControllerIndexFromInstanceID(event->caxis.which);
|
||||
assert(found);
|
||||
auto& pad = controllers_.at(i).state.gamepad;
|
||||
switch (event->caxis.axis) {
|
||||
auto idx = GetControllerIndexFromInstanceID(event.caxis.which);
|
||||
assert(idx);
|
||||
auto& pad = controllers_.at(*idx).state.gamepad;
|
||||
switch (event.caxis.axis) {
|
||||
case SDL_CONTROLLER_AXIS_LEFTX:
|
||||
pad.thumb_lx = event->caxis.value;
|
||||
pad.thumb_lx = event.caxis.value;
|
||||
break;
|
||||
case SDL_CONTROLLER_AXIS_LEFTY:
|
||||
pad.thumb_ly = ~event->caxis.value;
|
||||
pad.thumb_ly = ~event.caxis.value;
|
||||
break;
|
||||
case SDL_CONTROLLER_AXIS_RIGHTX:
|
||||
pad.thumb_rx = event->caxis.value;
|
||||
pad.thumb_rx = event.caxis.value;
|
||||
break;
|
||||
case SDL_CONTROLLER_AXIS_RIGHTY:
|
||||
pad.thumb_ry = ~event->caxis.value;
|
||||
pad.thumb_ry = ~event.caxis.value;
|
||||
break;
|
||||
case SDL_CONTROLLER_AXIS_TRIGGERLEFT:
|
||||
pad.left_trigger = static_cast<uint8_t>(event->caxis.value >> 7);
|
||||
pad.left_trigger = static_cast<uint8_t>(event.caxis.value >> 7);
|
||||
break;
|
||||
case SDL_CONTROLLER_AXIS_TRIGGERRIGHT:
|
||||
pad.right_trigger = static_cast<uint8_t>(event->caxis.value >> 7);
|
||||
pad.right_trigger = static_cast<uint8_t>(event.caxis.value >> 7);
|
||||
break;
|
||||
default:
|
||||
assert_always();
|
||||
break;
|
||||
}
|
||||
controllers_.at(i).state_changed = true;
|
||||
controllers_.at(*idx).state_changed = true;
|
||||
}
|
||||
|
||||
void SDLInputDriver::OnControllerDeviceButtonChanged(SDL_Event* event) {
|
||||
void SDLInputDriver::OnControllerDeviceButtonChanged(const SDL_Event& event) {
|
||||
assert(window()->loop()->is_on_loop_thread());
|
||||
std::unique_lock<std::mutex> guard(controllers_mutex_);
|
||||
|
||||
|
@ -492,15 +515,15 @@ void SDLInputDriver::OnControllerDeviceButtonChanged(SDL_Event* event) {
|
|||
X_INPUT_GAMEPAD_DPAD_LEFT,
|
||||
X_INPUT_GAMEPAD_DPAD_RIGHT};
|
||||
|
||||
auto [found, i] = GetControllerIndexFromInstanceID(event->cbutton.which);
|
||||
assert(found);
|
||||
auto& controller = controllers_.at(i);
|
||||
auto idx = GetControllerIndexFromInstanceID(event.cbutton.which);
|
||||
assert(idx);
|
||||
auto& controller = controllers_.at(*idx);
|
||||
|
||||
uint16_t xbuttons = controller.state.gamepad.buttons;
|
||||
// Lookup the XInput button code.
|
||||
auto xbutton = xbutton_lookup.at(event->cbutton.button);
|
||||
auto xbutton = xbutton_lookup.at(event.cbutton.button);
|
||||
// Pressed or released?
|
||||
if (event->cbutton.state == SDL_PRESSED) {
|
||||
if (event.cbutton.state == SDL_PRESSED) {
|
||||
if (xbutton == X_INPUT_GAMEPAD_GUIDE && !cvars::guide_button) {
|
||||
return;
|
||||
}
|
||||
|
@ -512,7 +535,7 @@ void SDLInputDriver::OnControllerDeviceButtonChanged(SDL_Event* event) {
|
|||
controller.state_changed = true;
|
||||
}
|
||||
|
||||
std::pair<bool, size_t> SDLInputDriver::GetControllerIndexFromInstanceID(
|
||||
std::optional<size_t> SDLInputDriver::GetControllerIndexFromInstanceID(
|
||||
SDL_JoystickID instance_id) {
|
||||
// Loop through our controllers and try to match the given ID.
|
||||
for (size_t i = 0; i < controllers_.size(); i++) {
|
||||
|
@ -525,10 +548,10 @@ std::pair<bool, size_t> SDLInputDriver::GetControllerIndexFromInstanceID(
|
|||
auto joy_instance_id = SDL_JoystickInstanceID(joystick);
|
||||
assert(joy_instance_id >= 0);
|
||||
if (joy_instance_id == instance_id) {
|
||||
return {true, i};
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return {false, 0};
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
SDLInputDriver::ControllerState* SDLInputDriver::GetControllerState(
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <array>
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
|
||||
#include "SDL.h"
|
||||
#include "xenia/hid/input_driver.h"
|
||||
|
@ -44,8 +45,9 @@ class SDLInputDriver : public InputDriver {
|
|||
protected:
|
||||
struct ControllerState {
|
||||
SDL_GameController* sdl;
|
||||
bool state_changed;
|
||||
X_INPUT_STATE state;
|
||||
bool state_changed;
|
||||
bool is_active;
|
||||
};
|
||||
|
||||
enum class RepeatState {
|
||||
|
@ -63,11 +65,11 @@ class SDLInputDriver : public InputDriver {
|
|||
};
|
||||
|
||||
protected:
|
||||
void OnControllerDeviceAdded(SDL_Event* event);
|
||||
void OnControllerDeviceRemoved(SDL_Event* event);
|
||||
void OnControllerDeviceAxisMotion(SDL_Event* event);
|
||||
void OnControllerDeviceButtonChanged(SDL_Event* event);
|
||||
std::pair<bool, size_t> GetControllerIndexFromInstanceID(
|
||||
void OnControllerDeviceAdded(const SDL_Event& event);
|
||||
void OnControllerDeviceRemoved(const SDL_Event& event);
|
||||
void OnControllerDeviceAxisMotion(const SDL_Event& event);
|
||||
void OnControllerDeviceButtonChanged(const SDL_Event& event);
|
||||
std::optional<size_t> GetControllerIndexFromInstanceID(
|
||||
SDL_JoystickID instance_id);
|
||||
ControllerState* GetControllerState(uint32_t user_index);
|
||||
bool TestSDLVersion() const;
|
||||
|
|
|
@ -202,7 +202,7 @@ class UserProfile {
|
|||
uint64_t xuid() const { return xuid_; }
|
||||
std::string name() const { return name_; }
|
||||
uint32_t signin_state() const { return 1; }
|
||||
uint32_t type() const { return 2; /* online profile? */ }
|
||||
uint32_t type() const { return 1 | 2; /* local | online profile? */ }
|
||||
|
||||
void AddSetting(std::unique_ptr<Setting> setting);
|
||||
Setting* GetSetting(uint32_t setting_id);
|
||||
|
|
|
@ -32,50 +32,44 @@ uint32_t xeXamEnumerate(uint32_t handle, uint32_t flags, void* buffer,
|
|||
uint32_t overlapped_ptr) {
|
||||
assert_true(flags == 0);
|
||||
|
||||
auto e = kernel_state()->object_table()->LookupObject<XEnumerator>(handle);
|
||||
if (!e) {
|
||||
if (overlapped_ptr) {
|
||||
kernel_state()->CompleteOverlappedImmediateEx(
|
||||
overlapped_ptr, X_ERROR_INVALID_HANDLE, X_ERROR_INVALID_HANDLE, 0);
|
||||
return X_ERROR_IO_PENDING;
|
||||
} else {
|
||||
return X_ERROR_INVALID_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
size_t actual_buffer_length = buffer_length;
|
||||
if (buffer_length == e->items_per_enumerate()) {
|
||||
actual_buffer_length = e->item_size() * e->items_per_enumerate();
|
||||
// Known culprits:
|
||||
// Final Fight: Double Impact (saves)
|
||||
XELOGW(
|
||||
"Broken usage of XamEnumerate! buffer length={:X} vs actual "
|
||||
"length={:X} "
|
||||
"(item size={:X}, items per enumerate={})",
|
||||
(uint32_t)buffer_length, actual_buffer_length, e->item_size(),
|
||||
e->items_per_enumerate());
|
||||
}
|
||||
|
||||
std::memset(buffer, 0, actual_buffer_length);
|
||||
|
||||
X_RESULT result;
|
||||
uint32_t item_count = 0;
|
||||
|
||||
if (actual_buffer_length < e->item_size()) {
|
||||
result = X_ERROR_INSUFFICIENT_BUFFER;
|
||||
} else if (e->current_item() >= e->item_count()) {
|
||||
result = X_ERROR_NO_MORE_FILES;
|
||||
auto e = kernel_state()->object_table()->LookupObject<XEnumerator>(handle);
|
||||
if (!e) {
|
||||
result = X_ERROR_INVALID_HANDLE;
|
||||
} else {
|
||||
auto item_buffer = static_cast<uint8_t*>(buffer);
|
||||
auto max_items = actual_buffer_length / e->item_size();
|
||||
while (max_items--) {
|
||||
if (!e->WriteItem(item_buffer)) {
|
||||
break;
|
||||
}
|
||||
item_buffer += e->item_size();
|
||||
item_count++;
|
||||
size_t actual_buffer_length = buffer_length;
|
||||
if (buffer_length == e->items_per_enumerate()) {
|
||||
actual_buffer_length = e->item_size() * e->items_per_enumerate();
|
||||
// Known culprits:
|
||||
// Final Fight: Double Impact (saves)
|
||||
XELOGW(
|
||||
"Broken usage of XamEnumerate! buffer length={:X} vs actual "
|
||||
"length={:X} "
|
||||
"(item size={:X}, items per enumerate={})",
|
||||
(uint32_t)buffer_length, actual_buffer_length, e->item_size(),
|
||||
e->items_per_enumerate());
|
||||
}
|
||||
|
||||
std::memset(buffer, 0, actual_buffer_length);
|
||||
|
||||
if (actual_buffer_length < e->item_size()) {
|
||||
result = X_ERROR_INSUFFICIENT_BUFFER;
|
||||
} else if (e->current_item() >= e->item_count()) {
|
||||
result = X_ERROR_NO_MORE_FILES;
|
||||
} else {
|
||||
auto item_buffer = static_cast<uint8_t*>(buffer);
|
||||
auto max_items = actual_buffer_length / e->item_size();
|
||||
while (max_items--) {
|
||||
if (!e->WriteItem(item_buffer)) {
|
||||
break;
|
||||
}
|
||||
item_buffer += e->item_size();
|
||||
item_count++;
|
||||
}
|
||||
result = X_ERROR_SUCCESS;
|
||||
}
|
||||
result = X_ERROR_SUCCESS;
|
||||
}
|
||||
|
||||
if (items_returned) {
|
||||
|
|
|
@ -958,6 +958,11 @@ dword_result_t NetDll___WSAFDIsSet(dword_t socket_handle,
|
|||
}
|
||||
DECLARE_XAM_EXPORT1(NetDll___WSAFDIsSet, kNetworking, kImplemented);
|
||||
|
||||
void NetDll_WSASetLastError(dword_t error_code) {
|
||||
XThread::SetLastError(error_code);
|
||||
}
|
||||
DECLARE_XAM_EXPORT1(NetDll_WSASetLastError, kNetworking, kImplemented);
|
||||
|
||||
void RegisterNetExports(xe::cpu::ExportResolver* export_resolver,
|
||||
KernelState* kernel_state) {}
|
||||
|
||||
|
|
|
@ -142,7 +142,8 @@ dword_result_t NtCreateFile(lpdword_t handle_out, dword_t desired_access,
|
|||
X_STATUS result = kernel_state()->file_system()->OpenFile(
|
||||
root_entry, target_path,
|
||||
vfs::FileDisposition((uint32_t)creation_disposition), desired_access,
|
||||
(create_options & CreateOptions::FILE_DIRECTORY_FILE) != 0, &vfs_file,
|
||||
(create_options & CreateOptions::FILE_DIRECTORY_FILE) != 0,
|
||||
(create_options & CreateOptions::FILE_NON_DIRECTORY_FILE) != 0, &vfs_file,
|
||||
&file_action);
|
||||
object_ref<XFile> file = nullptr;
|
||||
|
||||
|
|
|
@ -135,8 +135,10 @@ dword_result_t NtAllocateVirtualMemory(lpdword_t base_addr_ptr,
|
|||
}
|
||||
uint32_t protect = FromXdkProtectFlags(protect_bits);
|
||||
uint32_t address = 0;
|
||||
BaseHeap* heap;
|
||||
|
||||
if (adjusted_base != 0) {
|
||||
auto heap = kernel_memory()->LookupHeap(adjusted_base);
|
||||
heap = kernel_memory()->LookupHeap(adjusted_base);
|
||||
if (heap->page_size() != page_size) {
|
||||
// Specified the wrong page size for the wrong heap.
|
||||
return X_STATUS_ACCESS_DENIED;
|
||||
|
@ -148,7 +150,7 @@ dword_result_t NtAllocateVirtualMemory(lpdword_t base_addr_ptr,
|
|||
}
|
||||
} else {
|
||||
bool top_down = !!(alloc_type & X_MEM_TOP_DOWN);
|
||||
auto heap = kernel_memory()->LookupHeapByType(false, page_size);
|
||||
heap = kernel_memory()->LookupHeapByType(false, page_size);
|
||||
heap->Alloc(adjusted_size, page_size, allocation_type, protect, top_down,
|
||||
&address);
|
||||
}
|
||||
|
@ -160,7 +162,14 @@ dword_result_t NtAllocateVirtualMemory(lpdword_t base_addr_ptr,
|
|||
// Zero memory, if needed.
|
||||
if (address && !(alloc_type & X_MEM_NOZERO)) {
|
||||
if (alloc_type & X_MEM_COMMIT) {
|
||||
if (!(protect & kMemoryProtectWrite)) {
|
||||
heap->Protect(address, adjusted_size,
|
||||
kMemoryProtectRead | kMemoryProtectWrite);
|
||||
}
|
||||
kernel_memory()->Zero(address, adjusted_size);
|
||||
if (!(protect & kMemoryProtectWrite)) {
|
||||
heap->Protect(address, adjusted_size, protect);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -400,7 +409,7 @@ dword_result_t MmQueryAddressProtect(dword_t base_address) {
|
|||
if (!heap->QueryProtect(base_address, &access)) {
|
||||
access = 0;
|
||||
}
|
||||
access = ToXdkProtectFlags(access);
|
||||
access = !access ? 0 : ToXdkProtectFlags(access);
|
||||
|
||||
return access;
|
||||
}
|
||||
|
|
|
@ -205,22 +205,30 @@ dword_result_t NtSuspendThread(dword_t handle, lpdword_t suspend_count_ptr) {
|
|||
}
|
||||
DECLARE_XBOXKRNL_EXPORT1(NtSuspendThread, kThreading, kImplemented);
|
||||
|
||||
void KeSetCurrentStackPointers(lpvoid_t stack_ptr,
|
||||
pointer_t<X_KTHREAD> cur_thread,
|
||||
void KeSetCurrentStackPointers(lpvoid_t stack_ptr, pointer_t<X_KTHREAD> thread,
|
||||
lpvoid_t stack_alloc_base, lpvoid_t stack_base,
|
||||
lpvoid_t stack_limit) {
|
||||
auto thread = XThread::GetCurrentThread();
|
||||
auto context = thread->thread_state()->context();
|
||||
context->r[1] = stack_ptr.guest_address();
|
||||
auto current_thread = XThread::GetCurrentThread();
|
||||
auto context = current_thread->thread_state()->context();
|
||||
auto pcr = kernel_memory()->TranslateVirtual<X_KPCR*>(
|
||||
static_cast<uint32_t>(context->r[13]));
|
||||
|
||||
auto pcr =
|
||||
kernel_memory()->TranslateVirtual<X_KPCR*>((uint32_t)context->r[13]);
|
||||
thread->stack_alloc_base = stack_alloc_base.value();
|
||||
thread->stack_base = stack_base.value();
|
||||
thread->stack_limit = stack_limit.value();
|
||||
pcr->stack_base_ptr = stack_base.guest_address();
|
||||
pcr->stack_end_ptr = stack_limit.guest_address();
|
||||
context->r[1] = stack_ptr.guest_address();
|
||||
|
||||
// TODO: Do we need to set the stack info on cur_thread?
|
||||
// If a fiber is set, and the thread matches, reenter to avoid issues with
|
||||
// host stack overflowing.
|
||||
if (thread->fiber_ptr &&
|
||||
current_thread->guest_object() == thread.guest_address()) {
|
||||
current_thread->Reenter(static_cast<uint32_t>(context->lr));
|
||||
}
|
||||
}
|
||||
DECLARE_XBOXKRNL_EXPORT1(KeSetCurrentStackPointers, kThreading, kImplemented);
|
||||
DECLARE_XBOXKRNL_EXPORT2(KeSetCurrentStackPointers, kThreading, kImplemented,
|
||||
kHighFrequency);
|
||||
|
||||
dword_result_t KeSetAffinityThread(lpvoid_t thread_ptr, dword_t affinity,
|
||||
lpdword_t previous_affinity_ptr) {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue