Merge branch 'master' of https://github.com/xenia-project/xenia into canary_new

This commit is contained in:
Gliniak 2021-01-30 22:10:21 +01:00
commit d1c4a514b7
175 changed files with 12919 additions and 10637 deletions

5
.gitmodules vendored
View File

@ -51,7 +51,7 @@
url = https://github.com/jarro2783/cxxopts.git
[submodule "third_party/SDL2"]
path = third_party/SDL2
url = https://github.com/spurious/SDL-mirror.git
url = https://github.com/JoelLinn/SDL.git
[submodule "third_party/utfcpp"]
path = third_party/utfcpp
url = https://github.com/xenia-project/utfcpp.git
@ -73,3 +73,6 @@
[submodule "third_party/date"]
path = third_party/date
url = https://github.com/HowardHinnant/date.git
[submodule "third_party/xxhash"]
path = third_party/xxhash
url = https://github.com/Cyan4973/xxHash.git

View File

@ -196,6 +196,7 @@ filter("platforms:Windows")
"shcore",
"shlwapi",
"dxguid",
"bcrypt",
})
-- Create scratch/ path

View File

@ -66,6 +66,14 @@ DEFINE_path(
"Root path for guest content storage (saves, etc.), or empty to use the "
"content folder under the storage root.",
"Storage");
DEFINE_path(
cache_root, "",
"Root path for files used to speed up certain parts of the emulator or the "
"game. These files may be persistent, but they can be deleted without "
"major side effects such as progress loss. If empty, the cache folder "
"under the storage root, or, if available, the cache directory preferred "
"for the OS, will be used.",
"Storage");
DEFINE_bool(mount_scratch, false, "Enable scratch mount", "Storage");
@ -189,6 +197,9 @@ std::vector<std::unique_ptr<hid::InputDriver>> CreateInputDrivers(
factory.Add("sdl", xe::hid::sdl::Create);
#if XE_PLATFORM_WIN32
factory.Add("xinput", xe::hid::xinput::Create);
#endif // XE_PLATFORM_WIN32
factory.Add("sdl", xe::hid::sdl::Create);
#if XE_PLATFORM_WIN32
// WinKey input driver should always be the last input driver added!
factory.Add("winkey", xe::hid::winkey::Create);
#endif // XE_PLATFORM_WIN32
@ -219,6 +230,8 @@ int xenia_main(const std::vector<std::string>& args) {
#if defined(XE_PLATFORM_WIN32) || defined(XE_PLATFORM_GNU_LINUX)
storage_root = storage_root / "Xenia";
#else
// TODO(Triang3l): Point to the app's external storage "files" directory
// on Android.
#warning Unhandled platform for the data root.
storage_root = storage_root / "Xenia";
#endif
@ -242,13 +255,29 @@ int xenia_main(const std::vector<std::string>& args) {
content_root = std::filesystem::absolute(content_root);
XELOGI("Content root: {}", xe::path_to_utf8(content_root));
std::filesystem::path cache_root = cvars::cache_root;
if (cache_root.empty()) {
cache_root = storage_root / "cache";
// TODO(Triang3l): Point to the app's external storage "cache" directory on
// Android.
} else {
// If content root isn't an absolute path, then it should be relative to the
// storage root.
if (!cache_root.is_absolute()) {
cache_root = storage_root / cache_root;
}
}
cache_root = std::filesystem::absolute(cache_root);
XELOGI("Cache root: {}", xe::path_to_utf8(cache_root));
if (cvars::discord) {
discord::DiscordPresence::Initialize();
discord::DiscordPresence::NotPlaying();
}
// Create the emulator but don't initialize so we can setup the window.
auto emulator = std::make_unique<Emulator>("", storage_root, content_root);
auto emulator =
std::make_unique<Emulator>("", storage_root, content_root, cache_root);
// Main emulator display window.
auto emulator_window = EmulatorWindow::Create(emulator.get());

View File

@ -23,6 +23,7 @@ namespace cvar {
cxxopts::Options options("xenia", "Xbox 360 Emulator");
std::map<std::string, ICommandVar*>* CmdVars;
std::map<std::string, IConfigVar*>* ConfigVars;
std::multimap<uint32_t, const IConfigVarUpdate*>* IConfigVarUpdate::updates_;
void PrintHelpAndExit() {
std::cout << options.help({""}) << std::endl;

View File

@ -17,6 +17,7 @@
#include "cpptoml/include/cpptoml.h"
#include "cxxopts/include/cxxopts.hpp"
#include "xenia/base/assert.h"
#include "xenia/base/filesystem.h"
#include "xenia/base/string_util.h"
@ -43,6 +44,7 @@ class IConfigVar : virtual public ICommandVar {
virtual std::string config_value() const = 0;
virtual void LoadConfigValue(std::shared_ptr<cpptoml::base> result) = 0;
virtual void LoadGameConfigValue(std::shared_ptr<cpptoml::base> result) = 0;
virtual void ResetConfigValueToDefault() = 0;
};
template <class T>
@ -75,6 +77,7 @@ class ConfigVar : public CommandVar<T>, virtual public IConfigVar {
ConfigVar<T>(const char* name, T* default_value, const char* description,
const char* category, bool is_transient);
std::string config_value() const override;
const T& GetTypedConfigValue() const;
const std::string& category() const override;
bool is_transient() const override;
void AddToLaunchOptions(cxxopts::Options* options) override;
@ -89,6 +92,7 @@ class ConfigVar : public CommandVar<T>, virtual public IConfigVar {
std::unique_ptr<T> config_value_ = nullptr;
std::unique_ptr<T> game_config_value_ = nullptr;
void UpdateValue() override;
void ResetConfigValueToDefault() override;
};
#pragma warning(pop)
@ -233,6 +237,10 @@ std::string ConfigVar<T>::config_value() const {
return this->ToString(this->default_value_);
}
template <class T>
const T& ConfigVar<T>::GetTypedConfigValue() const {
return config_value_ ? *config_value_ : this->default_value_;
}
template <class T>
void CommandVar<T>::SetCommandLineValue(const T val) {
commandline_value_ = std::make_unique<T>(val);
UpdateValue();
@ -247,36 +255,47 @@ void ConfigVar<T>::SetGameConfigValue(T val) {
game_config_value_ = std::make_unique<T>(val);
UpdateValue();
}
template <class T>
void ConfigVar<T>::ResetConfigValueToDefault() {
SetConfigValue(this->default_value_);
}
// CVars can be initialized before these, thus initialized on-demand using new.
extern std::map<std::string, ICommandVar*>* CmdVars;
extern std::map<std::string, IConfigVar*>* ConfigVars;
inline void AddConfigVar(IConfigVar* cv) {
if (!ConfigVars) ConfigVars = new std::map<std::string, IConfigVar*>();
ConfigVars->insert(std::pair<std::string, IConfigVar*>(cv->name(), cv));
if (!ConfigVars) {
ConfigVars = new std::map<std::string, IConfigVar*>;
}
ConfigVars->emplace(cv->name(), cv);
}
inline void AddCommandVar(ICommandVar* cv) {
if (!CmdVars) CmdVars = new std::map<std::string, ICommandVar*>();
CmdVars->insert(std::pair<std::string, ICommandVar*>(cv->name(), cv));
if (!CmdVars) {
CmdVars = new std::map<std::string, ICommandVar*>;
}
CmdVars->emplace(cv->name(), cv);
}
void ParseLaunchArguments(int& argc, char**& argv,
const std::string_view positional_help,
const std::vector<std::string>& positional_options);
template <typename T>
T* define_configvar(const char* name, T* default_value, const char* description,
const char* category, bool is_transient) {
IConfigVar* cfgVar = new ConfigVar<T>(name, default_value, description,
IConfigVar* define_configvar(const char* name, T* default_value,
const char* description, const char* category,
bool is_transient) {
IConfigVar* cfgvar = new ConfigVar<T>(name, default_value, description,
category, is_transient);
AddConfigVar(cfgVar);
return default_value;
AddConfigVar(cfgvar);
return cfgvar;
}
template <typename T>
T* define_cmdvar(const char* name, T* default_value, const char* description) {
ICommandVar* cmdVar = new CommandVar<T>(name, default_value, description);
AddCommandVar(cmdVar);
return default_value;
ICommandVar* define_cmdvar(const char* name, T* default_value,
const char* description) {
ICommandVar* cmdvar = new CommandVar<T>(name, default_value, description);
AddCommandVar(cmdvar);
return cmdvar;
}
#define DEFINE_bool(name, default_value, description, category) \
@ -285,6 +304,9 @@ T* define_cmdvar(const char* name, T* default_value, const char* description) {
#define DEFINE_int32(name, default_value, description, category) \
DEFINE_CVar(name, default_value, description, category, false, int32_t)
#define DEFINE_uint32(name, default_value, description, category) \
DEFINE_CVar(name, default_value, description, category, false, uint32_t)
#define DEFINE_uint64(name, default_value, description, category) \
DEFINE_CVar(name, default_value, description, category, false, uint64_t)
@ -314,7 +336,7 @@ T* define_cmdvar(const char* name, T* default_value, const char* description) {
type name = default_value; \
} \
namespace cv { \
static auto cv_##name = cvar::define_configvar( \
static cvar::IConfigVar* const cv_##name = cvar::define_configvar( \
#name, &cvars::name, description, category, is_transient); \
}
@ -324,7 +346,7 @@ T* define_cmdvar(const char* name, T* default_value, const char* description) {
std::string name = default_value; \
} \
namespace cv { \
static auto cv_##name = \
static cvar::ICommandVar* const cv_##name = \
cvar::define_cmdvar(#name, &cvars::name, description); \
}
@ -332,6 +354,8 @@ T* define_cmdvar(const char* name, T* default_value, const char* description) {
#define DECLARE_int32(name) DECLARE_CVar(name, int32_t)
#define DECLARE_uint32(name) DECLARE_CVar(name, uint32_t)
#define DECLARE_uint64(name) DECLARE_CVar(name, uint64_t)
#define DECLARE_double(name) DECLARE_CVar(name, double)
@ -345,6 +369,212 @@ T* define_cmdvar(const char* name, T* default_value, const char* description) {
extern type name; \
}
// Interface for changing the default value of a variable with auto-upgrading of
// users' configs (to distinguish between a leftover old default and an explicit
// override), without having to rename the variable.
//
// Two types of updates are supported:
// - Changing the value of the variable (UPDATE_from_type) from an explicitly
// specified previous default value to a new one, but keeping the
// user-specified value if it was not the default, and thus explicitly
// overridden.
// - Changing the meaning / domain of the variable (UPDATE_from_any), when
// previous user-specified overrides also stop making sense. Config variable
// type changes are also considered this type of updates (though
// UPDATE_from_type, if the new type doesn't match the previous one, is also
// safe to use - it behaves like UPDATE_from_any in this case).
//
// Rules of using UPDATE_:
// - Do not remove previous UPDATE_ entries (both typed and from-any) if you're
// adding a new UPDATE_from_type.
// This ensures that if the default was changed from 1 to 2 and then to 3,
// both users who last launched Xenia when it was 1 and when it was 2 receive
// the update (however, those who have explicitly changed it from 2 to 1 when
// 2 was the default will have it kept at 1).
// It's safe to remove the history before a new UPDATE_from_any, however.
// - The date should preferably be in UTC+0 timezone.
// - No other pull recent pull requests should have the same date (since builds
// are made after every commit).
// - IConfigVarUpdate::kLastCommittedUpdateDate must be updated - see the
// comment near its declaration.
constexpr uint32_t MakeConfigVarUpdateDate(uint32_t year, uint32_t month,
uint32_t day, uint32_t utc_hour) {
// Written to the config as a decimal number - pack as decimal for user
// readability.
// Using 31 bits in the 3rd millennium already - don't add more digits.
return utc_hour + day * 100 + month * 10000 + year * 1000000;
}
class IConfigVarUpdate {
public:
// This global highest version constant is used to ensure that version (which
// is stored as one value for the whole config file) is monotonically
// increased when commits - primarily pull requests - are pushed to the main
// branch.
//
// This is to prevent the following situation:
// - Pull request #1 created on day 1.
// - Pull request #2 created on day 2.
// - Pull request #2 from day 2 merged on day 3.
// - User launches the latest version on day 4.
// CVar default changes from PR #2 (day 2) applied because the user's config
// version is day 0, which is < 2.
// User's config has day 2 version now.
// - Pull request #1 from day 1 merged on day 5.
// - User launches the latest version on day 5.
// CVar default changes from PR #1 (day 1) IGNORED because the user's config
// version is day 2, which is >= 1.
//
// If this constant is not updated, static_assert will be triggered for a new
// DEFINE_, requiring this constant to be raised. But changing this will
// result in merge conflicts in all other pull requests also changing cvar
// defaults - before they're merged, they will need to be updated, which will
// ensure monotonic growth of the versions of all cvars on the main branch. In
// the example above, PR #1 will need to be updated before it's merged.
//
// If you've encountered a merge conflict here in your pull request:
// 1) Update any UPDATE_s you've added in the pull request to the current
// date.
// 2) Change this value to the same date.
// If you're reviewing a pull request with a change here, check if 1) has been
// done by the submitter before merging.
static constexpr uint32_t kLastCommittedUpdateDate =
MakeConfigVarUpdateDate(2020, 12, 31, 13);
virtual ~IConfigVarUpdate() = default;
virtual void Apply() const = 0;
static void ApplyUpdates(uint32_t config_date) {
if (!updates_) {
return;
}
auto it_end = updates_->end();
for (auto it = updates_->upper_bound(config_date); it != it_end; ++it) {
it->second->Apply();
}
}
// More reliable than kLastCommittedUpdateDate for actual usage
// (kLastCommittedUpdateDate is just a pull request merge order guard), though
// usually should be the same, but kLastCommittedUpdateDate may not include
// removal of cvars.
static uint32_t GetLastUpdateDate() {
return (updates_ && !updates_->empty()) ? updates_->crbegin()->first : 0;
}
protected:
IConfigVarUpdate(IConfigVar* const& config_var, uint32_t year, uint32_t month,
uint32_t day, uint32_t utc_hour)
: config_var_(config_var) {
if (!updates_) {
updates_ = new std::multimap<uint32_t, const IConfigVarUpdate*>;
}
updates_->emplace(MakeConfigVarUpdateDate(year, month, day, utc_hour),
this);
}
IConfigVar& config_var() const {
assert_not_null(config_var_);
return *config_var_;
}
private:
// Reference to pointer to loosen initialization order requirements.
IConfigVar* const& config_var_;
// Updates can be initialized before these, thus initialized on demand using
// `new`.
static std::multimap<uint32_t, const IConfigVarUpdate*>* updates_;
};
class ConfigVarUpdateFromAny : public IConfigVarUpdate {
public:
ConfigVarUpdateFromAny(IConfigVar* const& config_var, uint32_t year,
uint32_t month, uint32_t day, uint32_t utc_hour)
: IConfigVarUpdate(config_var, year, month, day, utc_hour) {}
void Apply() const override { config_var().ResetConfigValueToDefault(); }
};
template <typename T>
class ConfigVarUpdate : public IConfigVarUpdate {
public:
ConfigVarUpdate(IConfigVar* const& config_var, uint32_t year, uint32_t month,
uint32_t day, uint32_t utc_hour, const T& old_default_value)
: IConfigVarUpdate(config_var, year, month, day, utc_hour),
old_default_value_(old_default_value) {}
void Apply() const override {
IConfigVar& config_var_untyped = config_var();
ConfigVar<T>* config_var_typed =
dynamic_cast<ConfigVar<T>*>(&config_var_untyped);
// Update only from the previous default value if the same type,
// unconditionally reset if the type has been changed.
if (!config_var_typed ||
config_var_typed->GetTypedConfigValue() == old_default_value_) {
config_var_untyped.ResetConfigValueToDefault();
}
}
private:
T old_default_value_;
};
#define UPDATE_from_any(name, year, month, day, utc_hour) \
static_assert( \
cvar::MakeConfigVarUpdateDate(year, month, day, utc_hour) <= \
cvar::IConfigVarUpdate::kLastCommittedUpdateDate, \
"A new config variable default value update was added - raise " \
"cvar::IConfigVarUpdate::kLastCommittedUpdateDate to the same date in " \
"base/cvar.h to ensure coherence between different pull requests " \
"updating config variable defaults."); \
namespace cv { \
static const cvar::ConfigVarUpdateFromAny \
update_##name_##year_##month_##day_##utc_hour(cv_##name, year, month, \
day, utc_hour); \
}
#define UPDATE_CVar(name, year, month, day, utc_hour, old_default_value, type) \
static_assert( \
cvar::MakeConfigVarUpdateDate(year, month, day, utc_hour) <= \
cvar::IConfigVarUpdate::kLastCommittedUpdateDate, \
"A new config variable default value update was added - raise " \
"cvar::IConfigVarUpdate::kLastCommittedUpdateDate to the same date in " \
"base/cvar.h to ensure coherence between different pull requests " \
"updating config variable defaults."); \
namespace cv { \
static const cvar::ConfigVarUpdate<type> \
update_##name_##year_##month_##day_##utc_hour(cv_##name, year, month, \
day, utc_hour, \
old_default_value); \
}
#define UPDATE_from_bool(name, year, month, day, utc_hour, old_default_value) \
UPDATE_CVar(name, year, month, day, utc_hour, old_default_value, bool)
#define UPDATE_from_int32(name, year, month, day, utc_hour, old_default_value) \
UPDATE_CVar(name, year, month, day, utc_hour, old_default_value, int32_t)
#define UPDATE_from_uint32(name, year, month, day, utc_hour, \
old_default_value) \
UPDATE_CVar(name, year, month, day, utc_hour, old_default_value, uint32_t)
#define UPDATE_from_uint64(name, year, month, day, utc_hour, \
old_default_value) \
UPDATE_CVar(name, year, month, day, utc_hour, old_default_value, uint64_t)
#define UPDATE_from_double(name, year, month, day, utc_hour, \
old_default_value) \
UPDATE_CVar(name, year, month, day, utc_hour, old_default_value, double)
#define UPDATE_from_string(name, year, month, day, utc_hour, \
old_default_value) \
UPDATE_CVar(name, year, month, day, utc_hour, old_default_value, std::string)
#define UPDATE_from_path(name, year, month, day, utc_hour, old_default_value) \
UPDATE_CVar(name, year, month, day, utc_hour, old_default_value, \
std::filesystem::path)
} // namespace cvar
#endif // XENIA_CVAR_H_

View File

@ -17,7 +17,7 @@ namespace hash {
// For use in unordered_sets and unordered_maps (primarily multisets and
// multimaps, with manual collision resolution), where the hash is calculated
// externally (for instance, as XXH64), possibly requiring context data rather
// externally (for instance, as XXH3), possibly requiring context data rather
// than a pure function to calculate the hash
template <typename Key>
struct IdentityHasher {

View File

@ -69,6 +69,11 @@ class global_critical_region {
return std::unique_lock<std::recursive_mutex>(mutex());
}
// Acquires a deferred lock on the global critical section.
inline std::unique_lock<std::recursive_mutex> AcquireDeferred() {
return std::unique_lock<std::recursive_mutex>(mutex(), std::defer_lock);
}
// Tries to acquire a lock on the glboal critical section.
// Check owns_lock() to see if the lock was successfully acquired.
inline std::unique_lock<std::recursive_mutex> TryAcquire() {

View File

@ -22,6 +22,7 @@
#define NOMINMAX
#include <ObjBase.h>
#include <SDKDDKVer.h>
#include <bcrypt.h>
#include <dwmapi.h>
#include <shellapi.h>
#include <shlwapi.h>

View File

@ -34,6 +34,11 @@
namespace xe {
namespace string_util {
enum class Safety {
IDontKnowWhatIAmDoing,
IKnowWhatIAmDoing,
};
inline size_t copy_truncating(char* dest, const std::string_view source,
size_t dest_buffer_count) {
if (!dest_buffer_count) {
@ -68,6 +73,44 @@ inline size_t copy_and_swap_truncating(char16_t* dest,
return chars_copied;
}
template <Safety safety = Safety::IDontKnowWhatIAmDoing>
inline size_t copy_maybe_truncating(char* dest, const std::string_view source,
size_t dest_buffer_count) {
static_assert(safety == Safety::IKnowWhatIAmDoing);
if (!dest_buffer_count) {
return 0;
}
size_t chars_copied = std::min(source.size(), dest_buffer_count);
std::memcpy(dest, source.data(), chars_copied);
return chars_copied;
}
template <Safety safety = Safety::IDontKnowWhatIAmDoing>
inline size_t copy_maybe_truncating(char16_t* dest,
const std::u16string_view source,
size_t dest_buffer_count) {
static_assert(safety == Safety::IKnowWhatIAmDoing);
if (!dest_buffer_count) {
return 0;
}
size_t chars_copied = std::min(source.size(), dest_buffer_count);
std::memcpy(dest, source.data(), chars_copied * sizeof(char16_t));
return chars_copied;
}
template <Safety safety = Safety::IDontKnowWhatIAmDoing>
inline size_t copy_and_swap_maybe_truncating(char16_t* dest,
const std::u16string_view source,
size_t dest_buffer_count) {
static_assert(safety == Safety::IKnowWhatIAmDoing);
if (!dest_buffer_count) {
return 0;
}
size_t chars_copied = std::min(source.size(), dest_buffer_count);
xe::copy_and_swap(dest, source.data(), chars_copied);
return chars_copied;
}
inline std::string to_hex_string(uint32_t value) {
return fmt::format("{:08X}", value);
}

21
src/xenia/base/xxhash.h Normal file
View File

@ -0,0 +1,21 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_BASE_XXHASH_H_
#define XENIA_BASE_XXHASH_H_
#define XXH_INLINE_ALL
// Can't use XXH_X86DISPATCH because XXH is calculated on multiple threads,
// while the dispatch writes the result (multiple pointers without any
// synchronization) to XXH_g_dispatch at the first call.
#include "third_party/xxhash/xxhash.h"
#endif // XENIA_BASE_XXHASH_H_

View File

@ -11,6 +11,7 @@
#include "third_party/cpptoml/include/cpptoml.h"
#include "third_party/fmt/include/fmt/format.h"
#include "xenia/base/assert.h"
#include "xenia/base/cvar.h"
#include "xenia/base/filesystem.h"
#include "xenia/base/logging.h"
@ -29,6 +30,13 @@ std::shared_ptr<cpptoml::table> ParseFile(
}
CmdVar(config, "", "Specifies the target config to load.");
DEFINE_uint32(
defaults_date, 0,
"Do not modify - internal version of the default values in the config, for "
"seamless updates if default value of any option is changed.",
"Config");
namespace config {
std::string config_name = "xenia-canary.config.toml";
std::filesystem::path config_folder;
@ -53,8 +61,19 @@ std::shared_ptr<cpptoml::table> ParseConfig(
}
}
void ReadConfig(const std::filesystem::path& file_path) {
void ReadConfig(const std::filesystem::path& file_path,
bool update_if_no_version_stored) {
if (!cvar::ConfigVars) {
return;
}
const auto config = ParseConfig(file_path);
// Loading an actual global config file that exists - if there's no
// defaults_date in it, it's very old (before updating was added at all, thus
// all defaults need to be updated).
auto defaults_date_cvar =
dynamic_cast<cvar::ConfigVar<uint32_t>*>(cv::cv_defaults_date);
assert_not_null(defaults_date_cvar);
defaults_date_cvar->SetConfigValue(0);
for (auto& it : *cvar::ConfigVars) {
auto config_var = static_cast<cvar::IConfigVar*>(it.second);
auto config_key = config_var->category() + "." + config_var->name();
@ -62,10 +81,17 @@ void ReadConfig(const std::filesystem::path& file_path) {
config_var->LoadConfigValue(config->get_qualified(config_key));
}
}
uint32_t config_defaults_date = defaults_date_cvar->GetTypedConfigValue();
if (update_if_no_version_stored || config_defaults_date) {
cvar::IConfigVarUpdate::ApplyUpdates(config_defaults_date);
}
XELOGI("Loaded config: {}", xe::path_to_utf8(file_path));
}
void ReadGameConfig(const std::filesystem::path& file_path) {
if (!cvar::ConfigVars) {
return;
}
const auto config = ParseConfig(file_path);
for (auto& it : *cvar::ConfigVars) {
auto config_var = static_cast<cvar::IConfigVar*>(it.second);
@ -78,10 +104,19 @@ void ReadGameConfig(const std::filesystem::path& file_path) {
}
void SaveConfig() {
// All cvar defaults have been updated on loading - store the current date.
auto defaults_date_cvar =
dynamic_cast<cvar::ConfigVar<uint32_t>*>(cv::cv_defaults_date);
assert_not_null(defaults_date_cvar);
defaults_date_cvar->SetConfigValue(
cvar::IConfigVarUpdate::GetLastUpdateDate());
std::vector<cvar::IConfigVar*> vars;
if (cvar::ConfigVars) {
for (const auto& s : *cvar::ConfigVars) {
vars.push_back(s.second);
}
}
std::sort(vars.begin(), vars.end(), [](auto a, auto b) {
if (a->category() < b->category()) return true;
if (a->category() > b->category()) return false;
@ -174,7 +209,12 @@ void SetupConfig(const std::filesystem::path& config_folder) {
if (!cvars::config.empty()) {
config_path = xe::to_path(cvars::config);
if (std::filesystem::exists(config_path)) {
ReadConfig(config_path);
// An external config file may contain only explicit overrides - in this
// case, it will likely not contain the defaults version; don't update
// from the version 0 in this case. Or, it may be a full config - in this
// case, if it's recent enough (created at least in 2021), it will contain
// the version number - updates the defaults in it.
ReadConfig(config_path, false);
return;
}
}
@ -183,10 +223,11 @@ void SetupConfig(const std::filesystem::path& config_folder) {
if (!config_folder.empty()) {
config_path = config_folder / config_name;
if (std::filesystem::exists(config_path)) {
ReadConfig(config_path);
ReadConfig(config_path, true);
}
// we only want to save the config if the user is using the default
// config, we don't want to override a user created specific config
// Re-save the loaded config to present the most up-to-date list of
// parameters to the user, if new options were added, descriptions were
// updated, or default values were changed.
SaveConfig();
}
}

View File

@ -745,7 +745,9 @@ static const vec128_t xmm_consts[] = {
/* XMMIntMaxPD */ vec128d(INT_MAX),
/* XMMPosIntMinPS */ vec128f((float)0x80000000u),
/* XMMQNaN */ vec128i(0x7FC00000u),
/* XMMOneDouble */ vec128d(1.0)
/* XMMOneDouble */ vec128d(1.0),
/* XMMInt127 */ vec128i(0x7Fu),
/* XMM2To32 */ vec128f(0x1.0p32f),
};
// First location to try and place constants.

View File

@ -114,7 +114,9 @@ enum XmmConst {
XMMIntMaxPD,
XMMPosIntMinPS,
XMMQNaN,
XMMOneDouble
XMMOneDouble,
XMMInt127,
XMM2To32
};
// Unfortunately due to the design of xbyak we have to pass this to the ctor.

View File

@ -33,19 +33,41 @@ struct VECTOR_CONVERT_I2F
static void Emit(X64Emitter& e, const EmitArgType& i) {
// flags = ARITHMETIC_UNSIGNED
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
// xmm0 = mask of positive values
e.vpcmpgtd(e.xmm0, i.src1, e.GetXmmConstPtr(XMMFFFF));
// Round manually to (1.stored mantissa bits * 2^31) or to 2^32 to the
// nearest even (the only rounding mode used on AltiVec) if the number is
// 0x80000000 or greater, instead of converting src & 0x7FFFFFFF and then
// adding 2147483648.0f, which results in double rounding that can give a
// result larger than needed - see OPCODE_VECTOR_CONVERT_I2F notes.
// scale any values >= (unsigned)INT_MIN back to [0, INT_MAX]
e.vpsubd(e.xmm1, i.src1, e.GetXmmConstPtr(XMMSignMaskI32));
e.vblendvps(e.xmm1, e.xmm1, i.src1, e.xmm0);
// [0x80000000, 0xFFFFFFFF] case:
// xmm1 = [0, INT_MAX]
e.vcvtdq2ps(i.dest, e.xmm1);
// Round to the nearest even, from (0x80000000 | 31 stored mantissa bits)
// to ((-1 << 23) | 23 stored mantissa bits), or to 0 if the result should
// be 4294967296.0f.
// xmm0 = src + 0b01111111 + ((src >> 8) & 1)
// (xmm1 also used to launch reg + mem early and to require it late)
e.vpaddd(e.xmm1, i.src1, e.GetXmmConstPtr(XMMInt127));
e.vpslld(e.xmm0, i.src1, 31 - 8);
e.vpsrld(e.xmm0, e.xmm0, 31);
e.vpaddd(e.xmm0, e.xmm0, e.xmm1);
// xmm0 = (0xFF800000 | 23 explicit mantissa bits), or 0 if overflowed
e.vpsrad(e.xmm0, e.xmm0, 8);
// Calculate the result for the [0x80000000, 0xFFFFFFFF] case - take the
// rounded mantissa, and add -1 or 0 to the exponent of 32, depending on
// whether the number should be (1.stored mantissa bits * 2^31) or 2^32.
// xmm0 = [0x80000000, 0xFFFFFFFF] case result
e.vpaddd(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMM2To32));
// scale values back above [INT_MIN, UINT_MAX]
e.vpandn(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMMPosIntMinPS));
e.vaddps(i.dest, i.dest, e.xmm0);
// [0x00000000, 0x7FFFFFFF] case
// (during vblendvps reg -> vpaddd reg -> vpaddd mem dependency):
// Convert from signed integer to float.
// xmm1 = [0x00000000, 0x7FFFFFFF] case result
e.vcvtdq2ps(e.xmm1, i.src1);
// Merge the two ways depending on whether the number is >= 0x80000000
// (has high bit set).
e.vblendvps(i.dest, e.xmm1, e.xmm0, i.src1);
} else {
e.vcvtdq2ps(i.dest, i.src1);
}

View File

@ -2631,7 +2631,11 @@ struct LOG2_F32 : Sequence<LOG2_F32, I<OPCODE_LOG2, F32Op, F32Op>> {
}
static void Emit(X64Emitter& e, const EmitArgType& i) {
assert_always();
if (i.src1.is_constant) {
e.lea(e.GetNativeParam(0), e.StashConstantXmm(0, i.src1.constant()));
} else {
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
}
e.CallNativeSafe(reinterpret_cast<void*>(EmulateLog2));
e.vmovaps(i.dest, e.xmm0);
}
@ -2645,7 +2649,11 @@ struct LOG2_F64 : Sequence<LOG2_F64, I<OPCODE_LOG2, F64Op, F64Op>> {
}
static void Emit(X64Emitter& e, const EmitArgType& i) {
assert_always();
if (i.src1.is_constant) {
e.lea(e.GetNativeParam(0), e.StashConstantXmm(0, i.src1.constant()));
} else {
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
}
e.CallNativeSafe(reinterpret_cast<void*>(EmulateLog2));
e.vmovaps(i.dest, e.xmm0);
}
@ -2660,7 +2668,11 @@ struct LOG2_V128 : Sequence<LOG2_V128, I<OPCODE_LOG2, V128Op, V128Op>> {
return _mm_load_ps(values);
}
static void Emit(X64Emitter& e, const EmitArgType& i) {
if (i.src1.is_constant) {
e.lea(e.GetNativeParam(0), e.StashConstantXmm(0, i.src1.constant()));
} else {
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
}
e.CallNativeSafe(reinterpret_cast<void*>(EmulateLog2));
e.vmovaps(i.dest, e.xmm0);
}

View File

@ -143,6 +143,55 @@ enum Opcode {
OPCODE_TRUNCATE,
OPCODE_CONVERT,
OPCODE_ROUND,
// Note that 2147483648.0 + (src & 0x7FFFFFFF) is not a correct way of
// performing the uint -> float conversion for large numbers on backends where
// only sint -> float is available.
//
// Take 0b11000000000000000000000101000001 as an example,
// or 1.1000000000000000000000101000001 * 2^31.
// This one has 31 mantissa bits (excluding the implicit 1.), and needs to be
// rounded to 23 bits - 8 mantissa bits need to be dropped:
// 10000000000000000000001_01000001
//
// Rounding to the nearest even (the only rounding mode that exists on
// AltiVec, and the likely rounding mode in the implementations) should be
// done downwards - 01000001 of 1_01000001 is in [00000000, 01111111].
// The correct mantissa in this case is:
// 1.10000000000000000000001 * 2^31.
//
// With a two-step conversion, rounding is done twice instead, which gives an
// incorrect result.
//
// First, converting the low 31 bits to float:
// The number is 0.1000000000000000000000101000001 * 2^31.
// Normalizing it, we get 1.000000000000000000000101000001 (30 significand
// bits).
// We need to round 30 bits to 23 - 7 bits need to be dropped:
// 00000000000000000000010_1000001
//
// Rounding to the nearest even is done upwards in this case - 1000001 of
// 0_1000001 is in [1000001, 1111111].
// The result of the sint -> float conversion is:
// 1.00000000000000000000011 * 2^30.
//
// Now 2147483648.0 (1 * 2^31) needs to be added. Aligning the exponents, we
// get:
// 0.|10000000000000000000001|1 * 2^31
// + 1.|00000000000000000000000| * 2^31
// = 1.|10000000000000000000001|1 * 2^31
//
// At "infinite precision", the result has 24 significand bits, but only 23
// can be stored, thus rounding to the nearest even needs to be done. 1_1 is
// (odd + 0.5). 0.5 is ambiguous, thus tie-breaking to the nearest even -
// which is above in this case - is done. The result is:
// 1.10000000000000000000010 * 2^31.
//
// This is incorrect - larger than the correctly rounded result, which is:
// 1.10000000000000000000001 * 2^31.
//
// Test cases checked on real hardware via vcfux: 0xFFFDFF7E, 0xFFFCFF7D -
// should be 0x4F7FFDFF and 0x4F7FFCFF respectively, not 0x4F7FFE00 and
// 0x4F7FFD00.
OPCODE_VECTOR_CONVERT_I2F,
OPCODE_VECTOR_CONVERT_F2I,
OPCODE_LOAD_VECTOR_SHL,

View File

@ -519,9 +519,11 @@ int InstrEmit_vavguw(PPCHIRBuilder& f, const InstrData& i) {
int InstrEmit_vcfsx_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb,
uint32_t uimm) {
// (VD) <- float(VB as signed) / 2^uimm
float fuimm = static_cast<float>(std::exp2(uimm));
Value* v = f.Div(f.VectorConvertI2F(f.LoadVR(vb)),
f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE));
Value* v = f.VectorConvertI2F(f.LoadVR(vb));
if (uimm) {
float fuimm = std::ldexp(1.0f, -int(uimm));
v = f.Mul(v, f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE));
}
f.StoreVR(vd, v);
return 0;
}
@ -535,9 +537,11 @@ int InstrEmit_vcsxwfp128(PPCHIRBuilder& f, const InstrData& i) {
int InstrEmit_vcfux_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb,
uint32_t uimm) {
// (VD) <- float(VB as unsigned) / 2^uimm
float fuimm = static_cast<float>(std::exp2(uimm));
Value* v = f.Div(f.VectorConvertI2F(f.LoadVR(vb), ARITHMETIC_UNSIGNED),
f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE));
Value* v = f.VectorConvertI2F(f.LoadVR(vb), ARITHMETIC_UNSIGNED);
if (uimm) {
float fuimm = std::ldexp(1.0f, -int(uimm));
v = f.Mul(v, f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE));
}
f.StoreVR(vd, v);
return 0;
}

View File

@ -1,21 +1,21 @@
# frsqrte tests disabled because accuracy is CPU dependent.
#test_frsqrte_1:
test_frsqrte_1:
# _ REGISTER_IN f1 1.0
# frsqrte f1, f1
# blr
blr
# _ REGISTER_OUT f1 0.99975585937500000
# want: 0.97
#test_frsqrte_2:
test_frsqrte_2:
# _ REGISTER_IN f1 64.0
# frsqrte f1, f1
# blr
blr
# _ REGISTER_OUT f1 0.12496948242187500
#test_frsqrte_3:
test_frsqrte_3:
# _ REGISTER_IN f1 0.5
# frsqrte f1, f1
# blr
blr
# _ REGISTER_OUT f1 1.41381835937500000
# want: 1.375

View File

@ -7,6 +7,7 @@
******************************************************************************
*/
#include "xenia/base/cvar.h"
#include "xenia/base/filesystem.h"
#include "xenia/base/logging.h"
#include "xenia/base/main.h"
@ -28,7 +29,7 @@ DEFINE_path(test_path, "src/xenia/cpu/ppc/testing/",
"Directory scanned for test files.", "Other");
DEFINE_path(test_bin_path, "src/xenia/cpu/ppc/testing/bin/",
"Directory with binary outputs of the test files.", "Other");
DEFINE_transient_string(test_name, "", "Specifies test name.", "General");
DEFINE_transient_string(test_name, "", "Test suite name.", "General");
namespace xe {
namespace cpu {
@ -475,13 +476,7 @@ bool RunTests(const std::string_view test_name) {
}
int main(const std::vector<std::string>& args) {
// Grab test name, if present.
std::string test_name;
if (args.size() >= 2) {
test_name = args[1];
}
return RunTests(test_name) ? 0 : 1;
return RunTests(cvars::test_name) ? 0 : 1;
}
} // namespace test

View File

@ -358,7 +358,6 @@ bool Processor::ExecuteRaw(ThreadState* thread_state, uint32_t address) {
return false;
}
auto context = thread_state->context();
return function->Call(thread_state, 0xBCBCBCBC);
}

View File

@ -62,13 +62,15 @@ namespace xe {
Emulator::Emulator(const std::filesystem::path& command_line,
const std::filesystem::path& storage_root,
const std::filesystem::path& content_root)
const std::filesystem::path& content_root,
const std::filesystem::path& cache_root)
: on_launch(),
on_terminate(),
on_exit(),
command_line_(command_line),
storage_root_(storage_root),
content_root_(content_root),
cache_root_(cache_root),
game_title_(),
display_window_(nullptr),
memory_(),
@ -187,7 +189,10 @@ X_STATUS Emulator::Setup(
if (input_driver_factory) {
auto input_drivers = input_driver_factory(display_window_);
for (size_t i = 0; i < input_drivers.size(); ++i) {
input_system_->AddDriver(std::move(input_drivers[i]));
auto& input_driver = input_drivers[i];
input_driver->set_is_active_callback(
[]() -> bool { return !xe::kernel::xam::xeXamIsUIActive(); });
input_system_->AddDriver(std::move(input_driver));
}
}
@ -771,7 +776,7 @@ X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path,
// playing before the video can be seen if doing this in parallel with the
// main thread.
on_shader_storage_initialization(true);
graphics_system_->InitializeShaderStorage(storage_root_, title_id_, true);
graphics_system_->InitializeShaderStorage(cache_root_, title_id_, true);
on_shader_storage_initialization(false);
auto main_thread = kernel_state_->LaunchModule(module);

View File

@ -50,7 +50,8 @@ class Emulator {
public:
explicit Emulator(const std::filesystem::path& command_line,
const std::filesystem::path& storage_root,
const std::filesystem::path& content_root);
const std::filesystem::path& content_root,
const std::filesystem::path& cache_root);
~Emulator();
// Full command line used when launching the process.
@ -62,6 +63,9 @@ class Emulator {
// Folder guest content is stored in.
const std::filesystem::path& content_root() const { return content_root_; }
// Folder files safe to remove without significant side effects are stored in.
const std::filesystem::path& cache_root() const { return cache_root_; }
// Title of the game in the default language.
const std::string& game_title() const { return game_title_; }
@ -175,6 +179,7 @@ class Emulator {
std::filesystem::path command_line_;
std::filesystem::path storage_root_;
std::filesystem::path content_root_;
std::filesystem::path cache_root_;
std::string game_title_;

View File

@ -89,8 +89,8 @@ void CommandProcessor::Shutdown() {
}
void CommandProcessor::InitializeShaderStorage(
const std::filesystem::path& storage_root, uint32_t title_id,
bool blocking) {}
const std::filesystem::path& cache_root, uint32_t title_id, bool blocking) {
}
void CommandProcessor::RequestFrameTrace(
const std::filesystem::path& root_path) {

View File

@ -133,9 +133,8 @@ class CommandProcessor {
// May be called not only from the command processor thread when the command
// processor is paused, and the termination of this function may be explicitly
// awaited.
virtual void InitializeShaderStorage(
const std::filesystem::path& storage_root, uint32_t title_id,
bool blocking);
virtual void InitializeShaderStorage(const std::filesystem::path& cache_root,
uint32_t title_id, bool blocking);
virtual void RequestFrameTrace(const std::filesystem::path& root_path);
virtual void BeginTracing(const std::filesystem::path& root_path);

View File

@ -7,8 +7,6 @@
******************************************************************************
*/
#include "third_party/xxhash/xxhash.h"
#include <algorithm>
#include <cstring>
#include <utility>
@ -73,10 +71,9 @@ void D3D12CommandProcessor::ClearCaches() {
}
void D3D12CommandProcessor::InitializeShaderStorage(
const std::filesystem::path& storage_root, uint32_t title_id,
bool blocking) {
CommandProcessor::InitializeShaderStorage(storage_root, title_id, blocking);
pipeline_cache_->InitializeShaderStorage(storage_root, title_id, blocking);
const std::filesystem::path& cache_root, uint32_t title_id, bool blocking) {
CommandProcessor::InitializeShaderStorage(cache_root, title_id, blocking);
pipeline_cache_->InitializeShaderStorage(cache_root, title_id, blocking);
}
void D3D12CommandProcessor::RequestFrameTrace(
@ -102,14 +99,15 @@ void D3D12CommandProcessor::RestoreEdramSnapshot(const void* snapshot) {
}
uint32_t D3D12CommandProcessor::GetCurrentColorMask(
const D3D12Shader* pixel_shader) const {
if (pixel_shader == nullptr) {
uint32_t shader_writes_color_targets) const {
auto& regs = *register_file_;
if (regs.Get<reg::RB_MODECONTROL>().edram_mode !=
xenos::ModeControl::kColorDepth) {
return 0;
}
auto& regs = *register_file_;
uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF;
for (uint32_t i = 0; i < 4; ++i) {
if (!pixel_shader->writes_color_target(i)) {
if (!(shader_writes_color_targets & (1 << i))) {
color_mask &= ~(0xF << (i * 4));
}
}
@ -159,34 +157,29 @@ void D3D12CommandProcessor::SubmitBarriers() {
}
ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) {
assert_true(vertex_shader->is_translated());
const DxbcShader* vertex_shader, const DxbcShader* pixel_shader,
bool tessellated) {
if (bindless_resources_used_) {
return vertex_shader->host_vertex_shader_type() !=
Shader::HostVertexShaderType::kVertex
? root_signature_bindless_ds_
return tessellated ? root_signature_bindless_ds_
: root_signature_bindless_vs_;
}
assert_true(pixel_shader == nullptr || pixel_shader->is_translated());
D3D12_SHADER_VISIBILITY vertex_visibility =
tessellated ? D3D12_SHADER_VISIBILITY_DOMAIN
: D3D12_SHADER_VISIBILITY_VERTEX;
D3D12_SHADER_VISIBILITY vertex_visibility;
if (vertex_shader->host_vertex_shader_type() !=
Shader::HostVertexShaderType::kVertex) {
vertex_visibility = D3D12_SHADER_VISIBILITY_DOMAIN;
} else {
vertex_visibility = D3D12_SHADER_VISIBILITY_VERTEX;
}
uint32_t texture_count_vertex, sampler_count_vertex;
vertex_shader->GetTextureBindings(texture_count_vertex);
vertex_shader->GetSamplerBindings(sampler_count_vertex);
uint32_t texture_count_pixel = 0, sampler_count_pixel = 0;
if (pixel_shader != nullptr) {
pixel_shader->GetTextureBindings(texture_count_pixel);
pixel_shader->GetSamplerBindings(sampler_count_pixel);
}
uint32_t texture_count_vertex =
uint32_t(vertex_shader->GetTextureBindingsAfterTranslation().size());
uint32_t sampler_count_vertex =
uint32_t(vertex_shader->GetSamplerBindingsAfterTranslation().size());
uint32_t texture_count_pixel =
pixel_shader
? uint32_t(pixel_shader->GetTextureBindingsAfterTranslation().size())
: 0;
uint32_t sampler_count_pixel =
pixel_shader
? uint32_t(pixel_shader->GetSamplerBindingsAfterTranslation().size())
: 0;
// Better put the pixel texture/sampler in the lower bits probably because it
// changes often.
@ -393,35 +386,28 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
}
uint32_t D3D12CommandProcessor::GetRootBindfulExtraParameterIndices(
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader,
const DxbcShader* vertex_shader, const DxbcShader* pixel_shader,
RootBindfulExtraParameterIndices& indices_out) {
uint32_t texture_count_pixel = 0, sampler_count_pixel = 0;
if (pixel_shader != nullptr) {
pixel_shader->GetTextureBindings(texture_count_pixel);
pixel_shader->GetSamplerBindings(sampler_count_pixel);
}
uint32_t texture_count_vertex, sampler_count_vertex;
vertex_shader->GetTextureBindings(texture_count_vertex);
vertex_shader->GetSamplerBindings(sampler_count_vertex);
uint32_t index = kRootParameter_Bindful_Count_Base;
if (texture_count_pixel != 0) {
if (pixel_shader &&
!pixel_shader->GetTextureBindingsAfterTranslation().empty()) {
indices_out.textures_pixel = index++;
} else {
indices_out.textures_pixel = RootBindfulExtraParameterIndices::kUnavailable;
}
if (sampler_count_pixel != 0) {
if (pixel_shader &&
!pixel_shader->GetSamplerBindingsAfterTranslation().empty()) {
indices_out.samplers_pixel = index++;
} else {
indices_out.samplers_pixel = RootBindfulExtraParameterIndices::kUnavailable;
}
if (texture_count_vertex != 0) {
if (!vertex_shader->GetTextureBindingsAfterTranslation().empty()) {
indices_out.textures_vertex = index++;
} else {
indices_out.textures_vertex =
RootBindfulExtraParameterIndices::kUnavailable;
}
if (sampler_count_vertex != 0) {
if (!vertex_shader->GetSamplerBindingsAfterTranslation().empty()) {
indices_out.samplers_vertex = index++;
} else {
indices_out.samplers_vertex =
@ -1202,6 +1188,7 @@ bool D3D12CommandProcessor::SetupContext() {
pipeline_cache_ = std::make_unique<PipelineCache>(
*this, *register_file_, bindless_resources_used_, edram_rov_used_,
render_target_cache_->depth_float24_conversion(),
texture_cache_->IsResolutionScale2X() ? 2 : 1);
if (!pipeline_cache_->Initialize()) {
XELOGE("Failed to initialize the graphics pipeline cache");
@ -1804,8 +1791,7 @@ Shader* D3D12CommandProcessor::LoadShader(xenos::ShaderType shader_type,
uint32_t guest_address,
const uint32_t* host_address,
uint32_t dword_count) {
return pipeline_cache_->LoadShader(shader_type, guest_address, host_address,
dword_count);
return pipeline_cache_->LoadShader(shader_type, host_address, dword_count);
}
bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
@ -1819,12 +1805,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
xenos::ModeControl enable_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
if (enable_mode == xenos::ModeControl::kIgnore) {
// Ignored.
return true;
}
if (enable_mode == xenos::ModeControl::kCopy) {
xenos::ModeControl edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
if (edram_mode == xenos::ModeControl::kCopy) {
// Special copy handling.
return IssueCopy();
}
@ -1836,63 +1818,68 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
return true;
}
// Shaders will have already been defined by previous loads.
// We need them to do just about anything so validate here.
// Vertex shader.
auto vertex_shader = static_cast<D3D12Shader*>(active_vertex_shader());
auto pixel_shader = static_cast<D3D12Shader*>(active_pixel_shader());
if (!vertex_shader) {
// Always need a vertex shader.
return false;
}
// Depth-only mode doesn't need a pixel shader.
if (enable_mode == xenos::ModeControl::kDepth) {
pixel_shader = nullptr;
} else if (!pixel_shader) {
// Need a pixel shader in normal color mode.
return false;
}
// Get tessellation info for the current draw for vertex shader translation.
Shader::HostVertexShaderType host_vertex_shader_type =
pipeline_cache_->GetHostVertexShaderTypeIfValid();
if (host_vertex_shader_type == Shader::HostVertexShaderType(-1)) {
return false;
}
// Translate the shaders now to get memexport configuration and color mask,
// which is needed by the render target cache, to check the possibility of
// doing early depth/stencil, and also to get used textures and samplers.
if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader, pixel_shader,
host_vertex_shader_type)) {
return false;
}
bool tessellated =
host_vertex_shader_type != Shader::HostVertexShaderType::kVertex;
// Check if memexport is used. If it is, we can't skip draw calls that have no
// visual effect.
pipeline_cache_->AnalyzeShaderUcode(*vertex_shader);
bool memexport_used_vertex =
!vertex_shader->memexport_stream_constants().empty();
bool memexport_used_pixel =
pixel_shader != nullptr &&
!pixel_shader->memexport_stream_constants().empty();
bool memexport_used = memexport_used_vertex || memexport_used_pixel;
DxbcShaderTranslator::Modification vertex_shader_modification;
pipeline_cache_->GetCurrentShaderModification(*vertex_shader,
vertex_shader_modification);
bool tessellated = vertex_shader_modification.host_vertex_shader_type !=
Shader::HostVertexShaderType::kVertex;
bool primitive_polygonal =
xenos::IsPrimitivePolygonal(tessellated, primitive_type);
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
if (!memexport_used_vertex &&
(sq_program_cntl.vs_export_mode ==
xenos::VertexShaderExportMode::kMultipass ||
(primitive_polygonal && pa_su_sc_mode_cntl.cull_front &&
pa_su_sc_mode_cntl.cull_back))) {
// All faces are culled - can't be expressed in the pipeline.
// Pixel shader.
D3D12Shader* pixel_shader = nullptr;
if (draw_util::IsRasterizationPotentiallyDone(regs, primitive_polygonal)) {
// See xenos::ModeControl for explanation why the pixel shader is only used
// when it's kColorDepth here.
if (edram_mode == xenos::ModeControl::kColorDepth) {
pixel_shader = static_cast<D3D12Shader*>(active_pixel_shader());
if (pixel_shader) {
pipeline_cache_->AnalyzeShaderUcode(*pixel_shader);
if (!draw_util::IsPixelShaderNeededWithRasterization(*pixel_shader,
regs)) {
pixel_shader = nullptr;
}
}
}
} else {
// Disabling pixel shader for this case is also required by the pipeline
// cache.
if (!memexport_used_vertex) {
// This draw has no effect.
return true;
}
}
bool memexport_used_pixel;
DxbcShaderTranslator::Modification pixel_shader_modification;
if (pixel_shader) {
memexport_used_pixel = !pixel_shader->memexport_stream_constants().empty();
if (!pipeline_cache_->GetCurrentShaderModification(
*pixel_shader, pixel_shader_modification)) {
return false;
}
} else {
memexport_used_pixel = false;
pixel_shader_modification = DxbcShaderTranslator::Modification(0);
}
bool memexport_used = memexport_used_vertex || memexport_used_pixel;
BeginSubmission(true);
// Set up the render targets - this may bind pipelines.
if (!render_target_cache_->UpdateRenderTargets(pixel_shader)) {
uint32_t pixel_shader_writes_color_targets =
pixel_shader ? pixel_shader->writes_color_targets() : 0;
if (!render_target_cache_->UpdateRenderTargets(
pixel_shader_writes_color_targets)) {
return false;
}
const RenderTargetCache::PipelineRenderTarget* pipeline_render_targets =
@ -1961,34 +1948,36 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
line_loop_closing_index = 0;
}
// Update the textures - this may bind pipelines.
uint32_t used_texture_mask =
vertex_shader->GetUsedTextureMask() |
(pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
texture_cache_->RequestTextures(used_texture_mask);
// Check if early depth/stencil can be enabled.
bool early_z;
if (pixel_shader) {
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
early_z = pixel_shader->implicit_early_z_allowed() &&
(!rb_colorcontrol.alpha_test_enable ||
rb_colorcontrol.alpha_func == xenos::CompareFunction::kAlways) &&
!rb_colorcontrol.alpha_to_mask_enable;
} else {
early_z = true;
}
// Create the pipeline if needed and bind it.
// Translate the shaders and create the pipeline if needed.
D3D12Shader::D3D12Translation* vertex_shader_translation =
static_cast<D3D12Shader::D3D12Translation*>(
vertex_shader->GetOrCreateTranslation(
vertex_shader_modification.value));
D3D12Shader::D3D12Translation* pixel_shader_translation =
pixel_shader ? static_cast<D3D12Shader::D3D12Translation*>(
pixel_shader->GetOrCreateTranslation(
pixel_shader_modification.value))
: nullptr;
void* pipeline_handle;
ID3D12RootSignature* root_signature;
if (!pipeline_cache_->ConfigurePipeline(
vertex_shader, pixel_shader, primitive_type_converted,
vertex_shader_translation, pixel_shader_translation,
primitive_type_converted,
indexed ? index_buffer_info->format : xenos::IndexFormat::kInt16,
early_z, pipeline_render_targets, &pipeline_handle,
&root_signature)) {
pipeline_render_targets, &pipeline_handle, &root_signature)) {
return false;
}
// Update the textures - this may bind pipelines.
uint32_t used_texture_mask =
vertex_shader->GetUsedTextureMaskAfterTranslation() |
(pixel_shader != nullptr
? pixel_shader->GetUsedTextureMaskAfterTranslation()
: 0);
texture_cache_->RequestTextures(used_texture_mask);
// Bind the pipeline after configuring it and doing everything that may bind
// other pipelines.
if (current_cached_pipeline_ != pipeline_handle) {
deferred_command_list_.SetPipelineStateHandle(
reinterpret_cast<void*>(pipeline_handle));
@ -2014,10 +2003,17 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
pixel_size_x *= 2;
pixel_size_y *= 2;
}
flags::DepthFloat24Conversion depth_float24_conversion =
render_target_cache_->depth_float24_conversion();
draw_util::ViewportInfo viewport_info;
draw_util::GetHostViewportInfo(regs, float(pixel_size_x), float(pixel_size_y),
true, float(D3D12_VIEWPORT_BOUNDS_MAX),
float(D3D12_VIEWPORT_BOUNDS_MAX), false,
draw_util::GetHostViewportInfo(
regs, float(pixel_size_x), float(pixel_size_y), true,
float(D3D12_VIEWPORT_BOUNDS_MAX), float(D3D12_VIEWPORT_BOUNDS_MAX), false,
!edram_rov_used_ &&
(depth_float24_conversion ==
flags::DepthFloat24Conversion::kOnOutputTruncating ||
depth_float24_conversion ==
flags::DepthFloat24Conversion::kOnOutputRounding),
viewport_info);
draw_util::Scissor scissor;
draw_util::GetScissor(regs, scissor);
@ -2033,8 +2029,10 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
UpdateSystemConstantValues(
memexport_used, primitive_polygonal, line_loop_closing_index,
indexed ? index_buffer_info->endianness : xenos::Endian::kNone,
viewport_info, pixel_size_x, pixel_size_y, used_texture_mask, early_z,
GetCurrentColorMask(pixel_shader), pipeline_render_targets);
viewport_info, pixel_size_x, pixel_size_y, used_texture_mask,
pixel_shader ? GetCurrentColorMask(pixel_shader->writes_color_targets())
: 0,
pipeline_render_targets);
// Update constant buffers, descriptors and root parameters.
if (!UpdateBindings(vertex_shader, pixel_shader, root_signature)) {
@ -2097,9 +2095,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
MemExportRange memexport_ranges[512];
uint32_t memexport_range_count = 0;
if (memexport_used_vertex) {
const std::vector<uint32_t>& memexport_stream_constants_vertex =
vertex_shader->memexport_stream_constants();
for (uint32_t constant_index : memexport_stream_constants_vertex) {
for (uint32_t constant_index :
vertex_shader->memexport_stream_constants()) {
const auto& memexport_stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4);
if (memexport_stream.index_count == 0) {
@ -2140,9 +2137,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
}
}
if (memexport_used_pixel) {
const std::vector<uint32_t>& memexport_stream_constants_pixel =
pixel_shader->memexport_stream_constants();
for (uint32_t constant_index : memexport_stream_constants_pixel) {
for (uint32_t constant_index : pixel_shader->memexport_stream_constants()) {
const auto& memexport_stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4);
if (memexport_stream.index_count == 0) {
@ -2659,6 +2654,8 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) {
bool is_closing_frame = is_swap && frame_open_;
if (is_closing_frame) {
render_target_cache_->EndFrame();
texture_cache_->EndFrame();
}
@ -2852,7 +2849,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
Register stencil_ref_mask_reg;
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
if (primitive_polygonal &&
regs.Get<reg::RB_DEPTHCONTROL>().backface_enable &&
draw_util::GetDepthControlForCurrentEdramMode(regs).backface_enable &&
pa_su_sc_mode_cntl.cull_front && !pa_su_sc_mode_cntl.cull_back) {
stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK_BF;
} else {
@ -2873,8 +2870,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
bool shared_memory_is_uav, bool primitive_polygonal,
uint32_t line_loop_closing_index, xenos::Endian index_endian,
const draw_util::ViewportInfo& viewport_info, uint32_t pixel_size_x,
uint32_t pixel_size_y, uint32_t used_texture_mask, bool early_z,
uint32_t color_mask,
uint32_t pixel_size_y, uint32_t used_texture_mask, uint32_t color_mask,
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
@ -2889,7 +2885,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>();
auto rb_depthcontrol = draw_util::GetDepthControlForCurrentEdramMode(regs);
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
auto rb_stencilrefmask_bf =
regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF);
@ -2992,14 +2988,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
flags |= DxbcShaderTranslator::kSysFlag_KillIfAnyVertexKilled;
}
// Alpha test.
if (rb_colorcontrol.alpha_test_enable) {
flags |= uint32_t(rb_colorcontrol.alpha_func)
xenos::CompareFunction alpha_test_function =
rb_colorcontrol.alpha_test_enable ? rb_colorcontrol.alpha_func
: xenos::CompareFunction::kAlways;
flags |= uint32_t(alpha_test_function)
<< DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift;
} else {
flags |= DxbcShaderTranslator::kSysFlag_AlphaPassIfLess |
DxbcShaderTranslator::kSysFlag_AlphaPassIfEqual |
DxbcShaderTranslator::kSysFlag_AlphaPassIfGreater;
}
// Gamma writing.
for (uint32_t i = 0; i < 4; ++i) {
if (color_infos[i].color_format ==
@ -3028,7 +3021,9 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
if (rb_depthcontrol.stencil_enable) {
flags |= DxbcShaderTranslator::kSysFlag_ROVStencilTest;
}
if (early_z) {
// Hint - if not applicable to the shader, will not have effect.
if (alpha_test_function == xenos::CompareFunction::kAlways &&
!rb_colorcontrol.alpha_to_mask_enable) {
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencilEarlyWrite;
}
}
@ -3078,25 +3073,12 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
}
// Conversion to Direct3D 12 normalized device coordinates.
// Kill all primitives if multipass or both faces are culled, but still need
// to do memexport.
if (sq_program_cntl.vs_export_mode ==
xenos::VertexShaderExportMode::kMultipass ||
(primitive_polygonal && pa_su_sc_mode_cntl.cull_front &&
pa_su_sc_mode_cntl.cull_back)) {
float nan_value = std::nanf("");
for (uint32_t i = 0; i < 3; ++i) {
dirty |= !std::isnan(system_constants_.ndc_scale[i]);
system_constants_.ndc_scale[i] = nan_value;
}
} else {
for (uint32_t i = 0; i < 3; ++i) {
dirty |= system_constants_.ndc_scale[i] != viewport_info.ndc_scale[i];
dirty |= system_constants_.ndc_offset[i] != viewport_info.ndc_offset[i];
system_constants_.ndc_scale[i] = viewport_info.ndc_scale[i];
system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i];
}
}
// Point size.
float point_size_x = float(pa_su_point_size.width) * 0.125f;
@ -3596,20 +3578,21 @@ bool D3D12CommandProcessor::UpdateBindings(
vertex_shader->GetTextureBindingLayoutUserUID();
size_t sampler_layout_uid_vertex =
vertex_shader->GetSamplerBindingLayoutUserUID();
uint32_t texture_count_vertex, sampler_count_vertex;
const D3D12Shader::TextureBinding* textures_vertex =
vertex_shader->GetTextureBindings(texture_count_vertex);
const D3D12Shader::SamplerBinding* samplers_vertex =
vertex_shader->GetSamplerBindings(sampler_count_vertex);
const std::vector<D3D12Shader::TextureBinding>& textures_vertex =
vertex_shader->GetTextureBindingsAfterTranslation();
const std::vector<D3D12Shader::SamplerBinding>& samplers_vertex =
vertex_shader->GetSamplerBindingsAfterTranslation();
size_t texture_count_vertex = textures_vertex.size();
size_t sampler_count_vertex = samplers_vertex.size();
if (sampler_count_vertex) {
if (current_sampler_layout_uid_vertex_ != sampler_layout_uid_vertex) {
current_sampler_layout_uid_vertex_ = sampler_layout_uid_vertex;
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
bindful_samplers_written_vertex_ = false;
}
current_samplers_vertex_.resize(std::max(current_samplers_vertex_.size(),
size_t(sampler_count_vertex)));
for (uint32_t i = 0; i < sampler_count_vertex; ++i) {
current_samplers_vertex_.resize(
std::max(current_samplers_vertex_.size(), sampler_count_vertex));
for (size_t i = 0; i < sampler_count_vertex; ++i) {
TextureCache::SamplerParameters parameters =
texture_cache_->GetSamplerParameters(samplers_vertex[i]);
if (current_samplers_vertex_[i] != parameters) {
@ -3623,14 +3606,16 @@ bool D3D12CommandProcessor::UpdateBindings(
// Get textures and samplers used by the pixel shader, check if the last used
// samplers are compatible and update them.
size_t texture_layout_uid_pixel, sampler_layout_uid_pixel;
uint32_t texture_count_pixel, sampler_count_pixel;
const D3D12Shader::TextureBinding* textures_pixel;
const D3D12Shader::SamplerBinding* samplers_pixel;
const std::vector<D3D12Shader::TextureBinding>* textures_pixel;
const std::vector<D3D12Shader::SamplerBinding>* samplers_pixel;
size_t texture_count_pixel, sampler_count_pixel;
if (pixel_shader != nullptr) {
texture_layout_uid_pixel = pixel_shader->GetTextureBindingLayoutUserUID();
sampler_layout_uid_pixel = pixel_shader->GetSamplerBindingLayoutUserUID();
textures_pixel = pixel_shader->GetTextureBindings(texture_count_pixel);
samplers_pixel = pixel_shader->GetSamplerBindings(sampler_count_pixel);
textures_pixel = &pixel_shader->GetTextureBindingsAfterTranslation();
texture_count_pixel = textures_pixel->size();
samplers_pixel = &pixel_shader->GetSamplerBindingsAfterTranslation();
sampler_count_pixel = samplers_pixel->size();
if (sampler_count_pixel) {
if (current_sampler_layout_uid_pixel_ != sampler_layout_uid_pixel) {
current_sampler_layout_uid_pixel_ = sampler_layout_uid_pixel;
@ -3641,7 +3626,7 @@ bool D3D12CommandProcessor::UpdateBindings(
size_t(sampler_count_pixel)));
for (uint32_t i = 0; i < sampler_count_pixel; ++i) {
TextureCache::SamplerParameters parameters =
texture_cache_->GetSamplerParameters(samplers_pixel[i]);
texture_cache_->GetSamplerParameters((*samplers_pixel)[i]);
if (current_samplers_pixel_[i] != parameters) {
current_samplers_pixel_[i] = parameters;
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
@ -3671,7 +3656,7 @@ bool D3D12CommandProcessor::UpdateBindings(
cbuffer_binding_descriptor_indices_vertex_.up_to_date &&
(current_texture_layout_uid_vertex_ != texture_layout_uid_vertex ||
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
current_texture_srv_keys_vertex_.data(), textures_vertex,
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
texture_count_vertex))) {
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
}
@ -3679,7 +3664,7 @@ bool D3D12CommandProcessor::UpdateBindings(
cbuffer_binding_descriptor_indices_pixel_.up_to_date &&
(current_texture_layout_uid_pixel_ != texture_layout_uid_pixel ||
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
current_texture_srv_keys_pixel_.data(), textures_pixel,
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
texture_count_pixel))) {
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
}
@ -3812,15 +3797,14 @@ bool D3D12CommandProcessor::UpdateBindings(
uint32_t* descriptor_indices =
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
frame_current_,
std::max(texture_count_vertex + sampler_count_vertex,
uint32_t(1)) *
std::max(texture_count_vertex + sampler_count_vertex, size_t(1)) *
sizeof(uint32_t),
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
&cbuffer_binding_descriptor_indices_vertex_.address));
if (!descriptor_indices) {
return false;
}
for (uint32_t i = 0; i < texture_count_vertex; ++i) {
for (size_t i = 0; i < texture_count_vertex; ++i) {
const D3D12Shader::TextureBinding& texture = textures_vertex[i];
descriptor_indices[texture.bindless_descriptor_index] =
texture_cache_->GetActiveTextureBindlessSRVIndex(texture) -
@ -3832,11 +3816,11 @@ bool D3D12CommandProcessor::UpdateBindings(
std::max(current_texture_srv_keys_vertex_.size(),
size_t(texture_count_vertex)));
texture_cache_->WriteActiveTextureSRVKeys(
current_texture_srv_keys_vertex_.data(), textures_vertex,
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
texture_count_vertex);
}
// Current samplers have already been updated.
for (uint32_t i = 0; i < sampler_count_vertex; ++i) {
for (size_t i = 0; i < sampler_count_vertex; ++i) {
descriptor_indices[samplers_vertex[i].bindless_descriptor_index] =
current_sampler_bindless_indices_vertex_[i];
}
@ -3849,15 +3833,15 @@ bool D3D12CommandProcessor::UpdateBindings(
uint32_t* descriptor_indices =
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
frame_current_,
std::max(texture_count_pixel + sampler_count_pixel, uint32_t(1)) *
std::max(texture_count_pixel + sampler_count_pixel, size_t(1)) *
sizeof(uint32_t),
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
&cbuffer_binding_descriptor_indices_pixel_.address));
if (!descriptor_indices) {
return false;
}
for (uint32_t i = 0; i < texture_count_pixel; ++i) {
const D3D12Shader::TextureBinding& texture = textures_pixel[i];
for (size_t i = 0; i < texture_count_pixel; ++i) {
const D3D12Shader::TextureBinding& texture = (*textures_pixel)[i];
descriptor_indices[texture.bindless_descriptor_index] =
texture_cache_->GetActiveTextureBindlessSRVIndex(texture) -
uint32_t(SystemBindlessView::kUnboundedSRVsStart);
@ -3868,12 +3852,12 @@ bool D3D12CommandProcessor::UpdateBindings(
std::max(current_texture_srv_keys_pixel_.size(),
size_t(texture_count_pixel)));
texture_cache_->WriteActiveTextureSRVKeys(
current_texture_srv_keys_pixel_.data(), textures_pixel,
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
texture_count_pixel);
}
// Current samplers have already been updated.
for (uint32_t i = 0; i < sampler_count_pixel; ++i) {
descriptor_indices[samplers_pixel[i].bindless_descriptor_index] =
for (size_t i = 0; i < sampler_count_pixel; ++i) {
descriptor_indices[(*samplers_pixel)[i].bindless_descriptor_index] =
current_sampler_bindless_indices_pixel_[i];
}
cbuffer_binding_descriptor_indices_pixel_.up_to_date = true;
@ -3892,14 +3876,14 @@ bool D3D12CommandProcessor::UpdateBindings(
(!bindful_textures_written_vertex_ ||
current_texture_layout_uid_vertex_ != texture_layout_uid_vertex ||
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
current_texture_srv_keys_vertex_.data(), textures_vertex,
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
texture_count_vertex));
bool write_textures_pixel =
texture_count_pixel &&
(!bindful_textures_written_pixel_ ||
current_texture_layout_uid_pixel_ != texture_layout_uid_pixel ||
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
current_texture_srv_keys_pixel_.data(), textures_pixel,
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
texture_count_pixel));
bool write_samplers_vertex =
sampler_count_vertex && !bindful_samplers_written_vertex_;
@ -3907,7 +3891,7 @@ bool D3D12CommandProcessor::UpdateBindings(
sampler_count_pixel && !bindful_samplers_written_pixel_;
// Allocate the descriptors.
uint32_t view_count_partial_update = 0;
size_t view_count_partial_update = 0;
if (write_textures_vertex) {
view_count_partial_update += texture_count_vertex;
}
@ -3915,7 +3899,7 @@ bool D3D12CommandProcessor::UpdateBindings(
view_count_partial_update += texture_count_pixel;
}
// All the constants + shared memory SRV and UAV + textures.
uint32_t view_count_full_update =
size_t view_count_full_update =
2 + texture_count_vertex + texture_count_pixel;
if (edram_rov_used_) {
// + EDRAM UAV.
@ -3925,14 +3909,14 @@ bool D3D12CommandProcessor::UpdateBindings(
D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle;
uint32_t descriptor_size_view = provider.GetViewDescriptorSize();
uint64_t view_heap_index = RequestViewBindfulDescriptors(
draw_view_bindful_heap_index_, view_count_partial_update,
view_count_full_update, view_cpu_handle, view_gpu_handle);
draw_view_bindful_heap_index_, uint32_t(view_count_partial_update),
uint32_t(view_count_full_update), view_cpu_handle, view_gpu_handle);
if (view_heap_index ==
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) {
XELOGE("Failed to allocate view descriptors");
return false;
}
uint32_t sampler_count_partial_update = 0;
size_t sampler_count_partial_update = 0;
if (write_samplers_vertex) {
sampler_count_partial_update += sampler_count_vertex;
}
@ -3946,9 +3930,10 @@ bool D3D12CommandProcessor::UpdateBindings(
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid;
if (sampler_count_vertex != 0 || sampler_count_pixel != 0) {
sampler_heap_index = RequestSamplerBindfulDescriptors(
draw_sampler_bindful_heap_index_, sampler_count_partial_update,
sampler_count_vertex + sampler_count_pixel, sampler_cpu_handle,
sampler_gpu_handle);
draw_sampler_bindful_heap_index_,
uint32_t(sampler_count_partial_update),
uint32_t(sampler_count_vertex + sampler_count_pixel),
sampler_cpu_handle, sampler_gpu_handle);
if (sampler_heap_index ==
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) {
XELOGE("Failed to allocate sampler descriptors");
@ -3993,7 +3978,7 @@ bool D3D12CommandProcessor::UpdateBindings(
assert_true(current_graphics_root_bindful_extras_.textures_vertex !=
RootBindfulExtraParameterIndices::kUnavailable);
gpu_handle_textures_vertex_ = view_gpu_handle;
for (uint32_t i = 0; i < texture_count_vertex; ++i) {
for (size_t i = 0; i < texture_count_vertex; ++i) {
texture_cache_->WriteActiveTextureBindfulSRV(textures_vertex[i],
view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view;
@ -4004,7 +3989,7 @@ bool D3D12CommandProcessor::UpdateBindings(
std::max(current_texture_srv_keys_vertex_.size(),
size_t(texture_count_vertex)));
texture_cache_->WriteActiveTextureSRVKeys(
current_texture_srv_keys_vertex_.data(), textures_vertex,
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
texture_count_vertex);
bindful_textures_written_vertex_ = true;
current_graphics_root_up_to_date_ &=
@ -4014,8 +3999,8 @@ bool D3D12CommandProcessor::UpdateBindings(
assert_true(current_graphics_root_bindful_extras_.textures_pixel !=
RootBindfulExtraParameterIndices::kUnavailable);
gpu_handle_textures_pixel_ = view_gpu_handle;
for (uint32_t i = 0; i < texture_count_pixel; ++i) {
texture_cache_->WriteActiveTextureBindfulSRV(textures_pixel[i],
for (size_t i = 0; i < texture_count_pixel; ++i) {
texture_cache_->WriteActiveTextureBindfulSRV((*textures_pixel)[i],
view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view;
view_gpu_handle.ptr += descriptor_size_view;
@ -4024,7 +4009,7 @@ bool D3D12CommandProcessor::UpdateBindings(
current_texture_srv_keys_pixel_.resize(std::max(
current_texture_srv_keys_pixel_.size(), size_t(texture_count_pixel)));
texture_cache_->WriteActiveTextureSRVKeys(
current_texture_srv_keys_pixel_.data(), textures_pixel,
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
texture_count_pixel);
bindful_textures_written_pixel_ = true;
current_graphics_root_up_to_date_ &=
@ -4034,7 +4019,7 @@ bool D3D12CommandProcessor::UpdateBindings(
assert_true(current_graphics_root_bindful_extras_.samplers_vertex !=
RootBindfulExtraParameterIndices::kUnavailable);
gpu_handle_samplers_vertex_ = sampler_gpu_handle;
for (uint32_t i = 0; i < sampler_count_vertex; ++i) {
for (size_t i = 0; i < sampler_count_vertex; ++i) {
texture_cache_->WriteSampler(current_samplers_vertex_[i],
sampler_cpu_handle);
sampler_cpu_handle.ptr += descriptor_size_sampler;
@ -4049,7 +4034,7 @@ bool D3D12CommandProcessor::UpdateBindings(
assert_true(current_graphics_root_bindful_extras_.samplers_pixel !=
RootBindfulExtraParameterIndices::kUnavailable);
gpu_handle_samplers_pixel_ = sampler_gpu_handle;
for (uint32_t i = 0; i < sampler_count_pixel; ++i) {
for (size_t i = 0; i < sampler_count_pixel; ++i) {
texture_cache_->WriteSampler(current_samplers_pixel_[i],
sampler_cpu_handle);
sampler_cpu_handle.ptr += descriptor_size_sampler;

View File

@ -28,6 +28,7 @@
#include "xenia/gpu/d3d12/render_target_cache.h"
#include "xenia/gpu/d3d12/texture_cache.h"
#include "xenia/gpu/draw_util.h"
#include "xenia/gpu/dxbc_shader.h"
#include "xenia/gpu/dxbc_shader_translator.h"
#include "xenia/gpu/xenos.h"
#include "xenia/kernel/kernel_state.h"
@ -51,7 +52,7 @@ class D3D12CommandProcessor : public CommandProcessor {
void ClearCaches() override;
void InitializeShaderStorage(const std::filesystem::path& storage_root,
void InitializeShaderStorage(const std::filesystem::path& cache_root,
uint32_t title_id, bool blocking) override;
void RequestFrameTrace(const std::filesystem::path& root_path) override;
@ -92,7 +93,7 @@ class D3D12CommandProcessor : public CommandProcessor {
// there are 4 render targets bound with the same EDRAM base (clearly not
// correct usage), but the shader only clears 1, and then EDRAM buffer stores
// conflict with each other.
uint32_t GetCurrentColorMask(const D3D12Shader* pixel_shader) const;
uint32_t GetCurrentColorMask(uint32_t shader_writes_color_targets) const;
void PushTransitionBarrier(
ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state,
@ -104,8 +105,9 @@ class D3D12CommandProcessor : public CommandProcessor {
void SubmitBarriers();
// Finds or creates root signature for a pipeline.
ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader,
const D3D12Shader* pixel_shader);
ID3D12RootSignature* GetRootSignature(const DxbcShader* vertex_shader,
const DxbcShader* pixel_shader,
bool tessellated);
ui::d3d12::D3D12UploadBufferPool& GetConstantBufferPool() const {
return *constant_buffer_pool_;
@ -304,7 +306,7 @@ class D3D12CommandProcessor : public CommandProcessor {
// Gets the indices of optional root parameters. Returns the total parameter
// count.
static uint32_t GetRootBindfulExtraParameterIndices(
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader,
const DxbcShader* vertex_shader, const DxbcShader* pixel_shader,
RootBindfulExtraParameterIndices& indices_out);
// BeginSubmission and EndSubmission may be called at any time. If there's an
@ -357,8 +359,7 @@ class D3D12CommandProcessor : public CommandProcessor {
bool shared_memory_is_uav, bool primitive_polygonal,
uint32_t line_loop_closing_index, xenos::Endian index_endian,
const draw_util::ViewportInfo& viewport_info, uint32_t pixel_size_x,
uint32_t pixel_size_y, uint32_t used_texture_mask, bool early_z,
uint32_t color_mask,
uint32_t pixel_size_y, uint32_t used_texture_mask, uint32_t color_mask,
const RenderTargetCache::PipelineRenderTarget render_targets[4]);
bool UpdateBindings(const D3D12Shader* vertex_shader,
const D3D12Shader* pixel_shader,

View File

@ -10,9 +10,11 @@
#include "xenia/gpu/d3d12/d3d12_shader.h"
#include <cstring>
#include <utility>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/gpu/dxbc_shader.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/ui/d3d12/d3d12_api.h"
@ -22,51 +24,13 @@ namespace d3d12 {
D3D12Shader::D3D12Shader(xenos::ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count)
: Shader(shader_type, data_hash, dword_ptr, dword_count) {}
: DxbcShader(shader_type, data_hash, dword_ptr, dword_count) {}
void D3D12Shader::SetTexturesAndSamplers(
const DxbcShaderTranslator::TextureBinding* texture_bindings,
uint32_t texture_binding_count,
const DxbcShaderTranslator::SamplerBinding* sampler_bindings,
uint32_t sampler_binding_count) {
texture_bindings_.clear();
texture_bindings_.reserve(texture_binding_count);
used_texture_mask_ = 0;
for (uint32_t i = 0; i < texture_binding_count; ++i) {
TextureBinding& binding = texture_bindings_.emplace_back();
// For a stable hash.
std::memset(&binding, 0, sizeof(binding));
const DxbcShaderTranslator::TextureBinding& translator_binding =
texture_bindings[i];
binding.bindless_descriptor_index =
translator_binding.bindless_descriptor_index;
binding.fetch_constant = translator_binding.fetch_constant;
binding.dimension = translator_binding.dimension;
binding.is_signed = translator_binding.is_signed;
used_texture_mask_ |= 1u << translator_binding.fetch_constant;
}
sampler_bindings_.clear();
sampler_bindings_.reserve(sampler_binding_count);
for (uint32_t i = 0; i < sampler_binding_count; ++i) {
SamplerBinding binding;
const DxbcShaderTranslator::SamplerBinding& translator_binding =
sampler_bindings[i];
binding.bindless_descriptor_index =
translator_binding.bindless_descriptor_index;
binding.fetch_constant = translator_binding.fetch_constant;
binding.mag_filter = translator_binding.mag_filter;
binding.min_filter = translator_binding.min_filter;
binding.mip_filter = translator_binding.mip_filter;
binding.aniso_filter = translator_binding.aniso_filter;
sampler_bindings_.push_back(binding);
}
}
void D3D12Shader::DisassembleDxbc(const ui::d3d12::D3D12Provider& provider,
bool disassemble_dxbc,
IDxbcConverter* dxbc_converter,
IDxcUtils* dxc_utils,
void D3D12Shader::D3D12Translation::DisassembleDxbcAndDxil(
const ui::d3d12::D3D12Provider& provider, bool disassemble_dxbc,
IDxbcConverter* dxbc_converter, IDxcUtils* dxc_utils,
IDxcCompiler* dxc_compiler) {
std::string disassembly;
bool is_first_disassembly = true;
if (disassemble_dxbc) {
ID3DBlob* dxbc_disassembly;
@ -77,11 +41,12 @@ void D3D12Shader::DisassembleDxbc(const ui::d3d12::D3D12Provider& provider,
nullptr, &dxbc_disassembly))) {
assert_true(is_first_disassembly);
is_first_disassembly = false;
host_disassembly_.append(
disassembly.append(
reinterpret_cast<const char*>(dxbc_disassembly->GetBufferPointer()));
dxbc_disassembly->Release();
} else {
XELOGE("Failed to disassemble DXBC shader {:016X}", ucode_data_hash());
XELOGE("Failed to disassemble DXBC shader {:016X}",
shader().ucode_data_hash());
}
}
if (dxbc_converter && dxc_utils && dxc_compiler) {
@ -106,29 +71,36 @@ void D3D12Shader::DisassembleDxbc(const ui::d3d12::D3D12Provider& provider,
dxil_disassembly->Release();
if (dxil_disassembly_got_utf8) {
if (!is_first_disassembly) {
host_disassembly_.append("\n\n");
disassembly.append("\n\n");
}
is_first_disassembly = false;
host_disassembly_.append(reinterpret_cast<const char*>(
disassembly.append(reinterpret_cast<const char*>(
dxil_disassembly_utf8->GetStringPointer()));
dxil_disassembly_utf8->Release();
} else {
XELOGE("Failed to get DXIL shader {:016X} disassembly as UTF-8",
ucode_data_hash());
shader().ucode_data_hash());
}
} else {
XELOGE("Failed to disassemble DXIL shader {:016X}",
ucode_data_hash());
shader().ucode_data_hash());
}
} else {
XELOGE("Failed to create a blob with DXIL shader {:016X}",
ucode_data_hash());
shader().ucode_data_hash());
CoTaskMemFree(dxil);
}
} else {
XELOGE("Failed to convert shader {:016X} to DXIL", ucode_data_hash());
XELOGE("Failed to convert shader {:016X} to DXIL",
shader().ucode_data_hash());
}
}
set_host_disassembly(std::move(disassembly));
}
Shader::Translation* D3D12Shader::CreateTranslationInstance(
uint64_t modification) {
return new D3D12Translation(*this, modification);
}
} // namespace d3d12

View File

@ -2,7 +2,7 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2018 Ben Vanik. All rights reserved. *
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
@ -10,106 +10,62 @@
#ifndef XENIA_GPU_D3D12_D3D12_SHADER_H_
#define XENIA_GPU_D3D12_D3D12_SHADER_H_
#include <vector>
#include <atomic>
#include "xenia/gpu/dxbc_shader_translator.h"
#include "xenia/gpu/shader.h"
#include "xenia/gpu/xenos.h"
#include "xenia/gpu/dxbc_shader.h"
#include "xenia/ui/d3d12/d3d12_provider.h"
namespace xe {
namespace gpu {
namespace d3d12 {
class D3D12Shader : public Shader {
class D3D12Shader : public DxbcShader {
public:
D3D12Shader(xenos::ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count);
class D3D12Translation : public DxbcTranslation {
public:
D3D12Translation(D3D12Shader& shader, uint64_t modification)
: DxbcTranslation(shader, modification) {}
void SetTexturesAndSamplers(
const DxbcShaderTranslator::TextureBinding* texture_bindings,
uint32_t texture_binding_count,
const DxbcShaderTranslator::SamplerBinding* sampler_bindings,
uint32_t sampler_binding_count);
void SetForcedEarlyZShaderObject(const std::vector<uint8_t>& shader_object) {
forced_early_z_shader_ = shader_object;
}
// Returns the shader with forced early depth/stencil set with
// SetForcedEarlyZShader after translation. If there's none (for example,
// if the shader discards pixels or writes to the depth buffer), an empty
// vector is returned.
const std::vector<uint8_t>& GetForcedEarlyZShaderObject() const {
return forced_early_z_shader_;
}
void DisassembleDxbc(const ui::d3d12::D3D12Provider& provider,
void DisassembleDxbcAndDxil(const ui::d3d12::D3D12Provider& provider,
bool disassemble_dxbc,
IDxbcConverter* dxbc_converter = nullptr,
IDxcUtils* dxc_utils = nullptr,
IDxcCompiler* dxc_compiler = nullptr);
static constexpr uint32_t kMaxTextureBindingIndexBits =
DxbcShaderTranslator::kMaxTextureBindingIndexBits;
static constexpr uint32_t kMaxTextureBindings =
DxbcShaderTranslator::kMaxTextureBindings;
struct TextureBinding {
uint32_t bindless_descriptor_index;
uint32_t fetch_constant;
// Stacked and 3D are separate TextureBindings, even for bindless for null
// descriptor handling simplicity.
xenos::FetchOpDimension dimension;
bool is_signed;
};
// Safe to hash and compare with memcmp for layout hashing.
const TextureBinding* GetTextureBindings(uint32_t& count_out) const {
count_out = uint32_t(texture_bindings_.size());
return texture_bindings_.data();
}
const uint32_t GetUsedTextureMask() const { return used_texture_mask_; }
static constexpr uint32_t kMaxSamplerBindingIndexBits =
DxbcShaderTranslator::kMaxSamplerBindingIndexBits;
static constexpr uint32_t kMaxSamplerBindings =
DxbcShaderTranslator::kMaxSamplerBindings;
struct SamplerBinding {
uint32_t bindless_descriptor_index;
uint32_t fetch_constant;
xenos::TextureFilter mag_filter;
xenos::TextureFilter min_filter;
xenos::TextureFilter mip_filter;
xenos::AnisoFilter aniso_filter;
};
const SamplerBinding* GetSamplerBindings(uint32_t& count_out) const {
count_out = uint32_t(sampler_bindings_.size());
return sampler_bindings_.data();
}
D3D12Shader(xenos::ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count);
// For owning subsystems like the pipeline cache, accessors for unique
// For owning subsystem like the pipeline cache, accessors for unique
// identifiers (used instead of hashes to make sure collisions can't happen)
// of binding layouts used by the shader, for invalidation if a shader with an
// incompatible layout was bound.
size_t GetTextureBindingLayoutUserUID() const {
return texture_binding_layout_user_uid_;
}
void SetTextureBindingLayoutUserUID(size_t uid) {
texture_binding_layout_user_uid_ = uid;
}
size_t GetSamplerBindingLayoutUserUID() const {
return sampler_binding_layout_user_uid_;
}
// Modifications of the same shader can be translated on different threads.
// The "set" function must only be called if "enter" returned true - these are
// set up only once.
bool EnterBindingLayoutUserUIDSetup() {
return !binding_layout_user_uids_set_up_.test_and_set();
}
void SetTextureBindingLayoutUserUID(size_t uid) {
texture_binding_layout_user_uid_ = uid;
}
void SetSamplerBindingLayoutUserUID(size_t uid) {
sampler_binding_layout_user_uid_ = uid;
}
protected:
Translation* CreateTranslationInstance(uint64_t modification) override;
private:
std::vector<TextureBinding> texture_bindings_;
std::vector<SamplerBinding> sampler_bindings_;
std::atomic_flag binding_layout_user_uids_set_up_ = ATOMIC_FLAG_INIT;
size_t texture_binding_layout_user_uid_ = 0;
size_t sampler_binding_layout_user_uid_ = 0;
uint32_t used_texture_mask_ = 0;
std::vector<uint8_t> forced_early_z_shader_;
};
} // namespace d3d12

View File

@ -221,7 +221,9 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
*reinterpret_cast<const D3DSetSamplePositionsArguments*>(stream);
command_list_1->SetSamplePositions(
args.num_samples_per_pixel, args.num_pixels,
const_cast<D3D12_SAMPLE_POSITION*>(args.sample_positions));
(args.num_samples_per_pixel && args.num_pixels)
? const_cast<D3D12_SAMPLE_POSITION*>(args.sample_positions)
: nullptr);
}
} break;
default:

File diff suppressed because it is too large Load Diff

View File

@ -23,10 +23,12 @@
#include "xenia/base/hash.h"
#include "xenia/base/platform.h"
#include "xenia/base/string_buffer.h"
#include "xenia/base/threading.h"
#include "xenia/gpu/d3d12/d3d12_shader.h"
#include "xenia/gpu/d3d12/render_target_cache.h"
#include "xenia/gpu/dxbc_shader_translator.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/d3d12/d3d12_api.h"
@ -43,36 +45,41 @@ class PipelineCache {
PipelineCache(D3D12CommandProcessor& command_processor,
const RegisterFile& register_file, bool bindless_resources_used,
bool edram_rov_used, uint32_t resolution_scale);
bool edram_rov_used,
flags::DepthFloat24Conversion depth_float24_conversion,
uint32_t resolution_scale);
~PipelineCache();
bool Initialize();
void Shutdown();
void ClearCache(bool shutting_down = false);
void InitializeShaderStorage(const std::filesystem::path& storage_root,
void InitializeShaderStorage(const std::filesystem::path& cache_root,
uint32_t title_id, bool blocking);
void ShutdownShaderStorage();
void EndSubmission();
bool IsCreatingPipelines();
D3D12Shader* LoadShader(xenos::ShaderType shader_type, uint32_t guest_address,
D3D12Shader* LoadShader(xenos::ShaderType shader_type,
const uint32_t* host_address, uint32_t dword_count);
// Analyze shader microcode on the translator thread.
void AnalyzeShaderUcode(Shader& shader) {
shader.AnalyzeUcode(ucode_disasm_buffer_);
}
// Returns the host vertex shader type for the current draw if it's valid and
// supported, or Shader::HostVertexShaderType(-1) if not.
Shader::HostVertexShaderType GetHostVertexShaderTypeIfValid() const;
// Translates shaders if needed, also making shader info up to date.
bool EnsureShadersTranslated(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
Shader::HostVertexShaderType host_vertex_shader_type);
// Retrieves the shader modification for the current state, and returns
// whether it is valid. The shader must have microcode analyzed.
bool PipelineCache::GetCurrentShaderModification(
const Shader& shader,
DxbcShaderTranslator::Modification& modification_out) const;
// If draw_util::IsRasterizationPotentiallyDone is false, the pixel shader
// MUST be made nullptr BEFORE calling this!
bool ConfigurePipeline(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
D3D12Shader::D3D12Translation* vertex_shader,
D3D12Shader::D3D12Translation* pixel_shader,
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5],
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out);
@ -86,13 +93,10 @@ class PipelineCache {
XEPACKEDSTRUCT(ShaderStoredHeader, {
uint64_t ucode_data_hash;
uint32_t ucode_dword_count : 16;
uint32_t ucode_dword_count : 31;
xenos::ShaderType type : 1;
Shader::HostVertexShaderType host_vertex_shader_type : 3;
reg::SQ_PROGRAM_CNTL sq_program_cntl;
static constexpr uint32_t kVersion = 0x20200405;
static constexpr uint32_t kVersion = 0x20201219;
});
// Update PipelineDescription::kVersion if any of the Pipeline* enums are
@ -135,6 +139,8 @@ class PipelineCache {
kNone,
kFront,
kBack,
// Special case, handled via disabling the pixel shader and depth / stencil.
kDisableRasterization,
};
enum class PipelineBlendFactor : uint32_t {
@ -168,30 +174,30 @@ class PipelineCache {
XEPACKEDSTRUCT(PipelineDescription, {
uint64_t vertex_shader_hash;
uint64_t vertex_shader_modification;
// 0 if drawing without a pixel shader.
uint64_t pixel_shader_hash;
uint64_t pixel_shader_modification;
int32_t depth_bias;
float depth_bias_slope_scaled;
PipelineStripCutIndex strip_cut_index : 2; // 2
Shader::HostVertexShaderType host_vertex_shader_type : 3; // 5
// PipelinePrimitiveTopologyType for a vertex shader.
// xenos::TessellationMode for a domain shader.
uint32_t primitive_topology_type_or_tessellation_mode : 2; // 7
uint32_t primitive_topology_type_or_tessellation_mode : 2; // 4
// Zero for non-kVertex host_vertex_shader_type.
PipelineGeometryShader geometry_shader : 2; // 9
uint32_t fill_mode_wireframe : 1; // 10
PipelineCullMode cull_mode : 2; // 12
uint32_t front_counter_clockwise : 1; // 13
uint32_t depth_clip : 1; // 14
uint32_t rov_msaa : 1; // 15
xenos::DepthRenderTargetFormat depth_format : 1; // 16
xenos::CompareFunction depth_func : 3; // 19
uint32_t depth_write : 1; // 20
uint32_t stencil_enable : 1; // 21
uint32_t stencil_read_mask : 8; // 29
uint32_t force_early_z : 1; // 30
PipelineGeometryShader geometry_shader : 2; // 6
uint32_t fill_mode_wireframe : 1; // 7
PipelineCullMode cull_mode : 2; // 9
uint32_t front_counter_clockwise : 1; // 10
uint32_t depth_clip : 1; // 11
uint32_t rov_msaa : 1; // 12
xenos::DepthRenderTargetFormat depth_format : 1; // 13
xenos::CompareFunction depth_func : 3; // 16
uint32_t depth_write : 1; // 17
uint32_t stencil_enable : 1; // 18
uint32_t stencil_read_mask : 8; // 26
uint32_t stencil_write_mask : 8; // 8
xenos::StencilOp stencil_front_fail_op : 3; // 11
@ -205,7 +211,7 @@ class PipelineCache {
PipelineRenderTarget render_targets[4];
static constexpr uint32_t kVersion = 0x20200405;
static constexpr uint32_t kVersion = 0x20201219;
});
XEPACKEDSTRUCT(PipelineStoredDescription, {
@ -215,24 +221,32 @@ class PipelineCache {
struct PipelineRuntimeDescription {
ID3D12RootSignature* root_signature;
D3D12Shader* vertex_shader;
D3D12Shader* pixel_shader;
D3D12Shader::D3D12Translation* vertex_shader;
D3D12Shader::D3D12Translation* pixel_shader;
PipelineDescription description;
};
// Returns the host vertex shader type for the current draw if it's valid and
// supported, or Shader::HostVertexShaderType(-1) if not.
Shader::HostVertexShaderType GetCurrentHostVertexShaderTypeIfValid() const;
D3D12Shader* LoadShader(xenos::ShaderType shader_type,
const uint32_t* host_address, uint32_t dword_count,
uint64_t data_hash);
// Can be called from multiple threads.
bool TranslateShader(DxbcShaderTranslator& translator, D3D12Shader& shader,
reg::SQ_PROGRAM_CNTL cntl,
bool TranslateAnalyzedShader(DxbcShaderTranslator& translator,
D3D12Shader::D3D12Translation& translation,
IDxbcConverter* dxbc_converter = nullptr,
IDxcUtils* dxc_utils = nullptr,
IDxcCompiler* dxc_compiler = nullptr,
Shader::HostVertexShaderType host_vertex_shader_type =
Shader::HostVertexShaderType::kVertex);
IDxcCompiler* dxc_compiler = nullptr);
// If draw_util::IsRasterizationPotentiallyDone is false, the pixel shader
// MUST be made nullptr BEFORE calling this!
bool GetCurrentStateDescription(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
D3D12Shader::D3D12Translation* vertex_shader,
D3D12Shader::D3D12Translation* pixel_shader,
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5],
PipelineRuntimeDescription& runtime_description_out);
@ -243,9 +257,13 @@ class PipelineCache {
const RegisterFile& register_file_;
bool bindless_resources_used_;
bool edram_rov_used_;
// 20e4 depth conversion mode to use for non-ROV output.
flags::DepthFloat24Conversion depth_float24_conversion_;
uint32_t resolution_scale_;
// Reusable shader translator.
// Temporary storage for AnalyzeUcode calls on the processor thread.
StringBuffer ucode_disasm_buffer_;
// Reusable shader translator for the processor thread.
std::unique_ptr<DxbcShaderTranslator> shader_translator_;
// Command processor thread DXIL conversion/disassembly interfaces, if DXIL
@ -267,7 +285,7 @@ class PipelineCache {
// Texture binding layouts of different shaders, for obtaining layout UIDs.
std::vector<D3D12Shader::TextureBinding> texture_binding_layouts_;
// Map of texture binding layouts used by shaders, for obtaining UIDs. Keys
// are XXH64 hashes of layouts, values need manual collision resolution using
// are XXH3 hashes of layouts, values need manual collision resolution using
// layout_vector_offset:layout_length of texture_binding_layouts_.
std::unordered_multimap<uint64_t, LayoutUID,
xe::hash::IdentityHasher<uint64_t>>
@ -275,7 +293,7 @@ class PipelineCache {
// Bindless sampler indices of different shaders, for obtaining layout UIDs.
// For bindful, sampler count is used as the UID instead.
std::vector<uint32_t> bindless_sampler_layouts_;
// Keys are XXH64 hashes of used bindless sampler indices.
// Keys are XXH3 hashes of used bindless sampler indices.
std::unordered_multimap<uint64_t, LayoutUID,
xe::hash::IdentityHasher<uint64_t>>
bindless_sampler_layout_map_;
@ -300,11 +318,14 @@ class PipelineCache {
Pipeline* current_pipeline_ = nullptr;
// Currently open shader storage path.
std::filesystem::path shader_storage_root_;
std::filesystem::path shader_storage_cache_root_;
uint32_t shader_storage_title_id_ = 0;
// Shader storage output stream, for preload in the next emulator runs.
FILE* shader_storage_file_ = nullptr;
// For only writing shaders to the currently open storage once, incremented
// when switching the storage.
uint32_t shader_storage_index_ = 0;
bool shader_storage_file_flush_needed_ = false;
// Pipeline storage output stream, for preload in the next emulator runs.
@ -317,8 +338,7 @@ class PipelineCache {
std::condition_variable storage_write_request_cond_;
// Storage thread input is protected with storage_write_request_lock_, and the
// thread is notified about its change via storage_write_request_cond_.
std::deque<std::pair<const Shader*, reg::SQ_PROGRAM_CNTL>>
storage_write_shader_queue_;
std::deque<const Shader*> storage_write_shader_queue_;
std::deque<PipelineStoredDescription> storage_write_pipeline_queue_;
bool storage_write_flush_shaders_ = false;
bool storage_write_flush_pipelines_ = false;

View File

@ -40,11 +40,13 @@ namespace d3d12 {
#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_color_32bpp_cs.h"
#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_color_64bpp_cs.h"
#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_color_7e3_cs.h"
#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_float24and32_cs.h"
#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_float_cs.h"
#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_unorm_cs.h"
#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_color_32bpp_cs.h"
#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_color_64bpp_cs.h"
#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_color_7e3_cs.h"
#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_float24and32_cs.h"
#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_float_cs.h"
#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_unorm_cs.h"
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_2xres_cs.h"
@ -87,6 +89,12 @@ const RenderTargetCache::EdramLoadStoreModeInfo
{edram_load_depth_float_cs, sizeof(edram_load_depth_float_cs),
L"EDRAM Load Float Depth", edram_store_depth_float_cs,
sizeof(edram_store_depth_float_cs), L"EDRAM Store Float Depth"},
{edram_load_depth_float24and32_cs,
sizeof(edram_load_depth_float24and32_cs),
L"EDRAM Load 24-bit & 32-bit Float Depth",
edram_store_depth_float24and32_cs,
sizeof(edram_store_depth_float24and32_cs),
L"EDRAM Store 24-bit & 32-bit Float Depth"},
};
const std::pair<const uint8_t*, size_t>
@ -126,6 +134,8 @@ RenderTargetCache::RenderTargetCache(D3D12CommandProcessor& command_processor,
RenderTargetCache::~RenderTargetCache() { Shutdown(); }
bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
depth_float24_conversion_ = flags::GetDepthFloat24Conversion();
// EDRAM buffer size depends on this.
resolution_scale_2x_ = texture_cache.IsResolutionScale2X();
assert_false(resolution_scale_2x_ && !edram_rov_used_);
@ -420,7 +430,8 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
return false;
}
resolve_clear_64bpp_pipeline_->SetName(L"Resolve Clear 64bpp");
if (!edram_rov_used_) {
if (!edram_rov_used_ &&
depth_float24_conversion_ == flags::DepthFloat24Conversion::kOnCopy) {
assert_false(resolution_scale_2x_);
resolve_clear_depth_24_32_pipeline_ =
ui::d3d12::util::CreateComputePipeline(
@ -434,7 +445,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
Shutdown();
return false;
}
resolve_clear_64bpp_pipeline_->SetName(
resolve_clear_depth_24_32_pipeline_->SetName(
L"Resolve Clear 24-bit & 32-bit Depth");
}
@ -524,7 +535,8 @@ void RenderTargetCache::EndFrame() {
FlushAndUnbindRenderTargets();
}
bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
bool RenderTargetCache::UpdateRenderTargets(
uint32_t shader_writes_color_targets) {
// There are two kinds of render target binding updates in this implementation
// in case something has been changed - full and partial.
//
@ -624,7 +636,8 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
uint32_t edram_bases[5];
uint32_t formats[5];
bool formats_are_64bpp[5];
uint32_t color_mask = command_processor_.GetCurrentColorMask(pixel_shader);
uint32_t color_mask =
command_processor_.GetCurrentColorMask(shader_writes_color_targets);
for (uint32_t i = 0; i < 4; ++i) {
enabled[i] = (color_mask & (0xF << (i * 4))) != 0;
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
@ -634,7 +647,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
formats_are_64bpp[i] = xenos::IsColorRenderTargetFormat64bpp(
xenos::ColorRenderTargetFormat(formats[i]));
}
auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>();
auto rb_depthcontrol = draw_util::GetDepthControlForCurrentEdramMode(regs);
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
// 0x1 = stencil test, 0x2 = depth test.
enabled[4] = rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable;
@ -1266,8 +1279,10 @@ bool RenderTargetCache::Resolve(const Memory& memory,
if (clear_depth) {
// Also clear the host 32-bit floating-point depth used for loaing and
// storing 24-bit floating-point depth at full precision.
bool clear_float32_depth =
!edram_rov_used_ && xenos::DepthRenderTargetFormat(
bool clear_float32_depth = !edram_rov_used_ &&
depth_float24_conversion_ ==
flags::DepthFloat24Conversion::kOnCopy &&
xenos::DepthRenderTargetFormat(
resolve_info.depth_edram_info.format) ==
xenos::DepthRenderTargetFormat::kD24FS8;
draw_util::ResolveClearShaderConstants depth_clear_constants;
@ -1558,7 +1573,8 @@ void RenderTargetCache::RestoreEdramSnapshot(const void* snapshot) {
uint32_t RenderTargetCache::GetEdramBufferSize() const {
uint32_t size = xenos::kEdramSizeBytes;
if (!edram_rov_used_) {
if (!edram_rov_used_ &&
depth_float24_conversion_ == flags::DepthFloat24Conversion::kOnCopy) {
// Two 10 MB pages, one containing color and integer depth data, another
// with 32-bit float depth when 20e4 depth is used to allow for multipass
// drawing without precision loss in case of EDRAM store/load.
@ -1831,12 +1847,15 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
}
RenderTargetCache::EdramLoadStoreMode RenderTargetCache::GetLoadStoreMode(
bool is_depth, uint32_t format) {
bool is_depth, uint32_t format) const {
if (is_depth) {
return xenos::DepthRenderTargetFormat(format) ==
xenos::DepthRenderTargetFormat::kD24FS8
? EdramLoadStoreMode::kDepthFloat
: EdramLoadStoreMode::kDepthUnorm;
if (xenos::DepthRenderTargetFormat(format) ==
xenos::DepthRenderTargetFormat::kD24FS8) {
return depth_float24_conversion_ == flags::DepthFloat24Conversion::kOnCopy
? EdramLoadStoreMode::kDepthFloat24And32
: EdramLoadStoreMode::kDepthFloat;
}
return EdramLoadStoreMode::kDepthUnorm;
}
xenos::ColorRenderTargetFormat color_format =
xenos::ColorRenderTargetFormat(format);

View File

@ -18,6 +18,7 @@
#include "xenia/gpu/d3d12/d3d12_shared_memory.h"
#include "xenia/gpu/d3d12/texture_cache.h"
#include "xenia/gpu/draw_util.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/trace_writer.h"
#include "xenia/gpu/xenos.h"
@ -259,12 +260,16 @@ class RenderTargetCache {
void Shutdown();
void ClearCache();
flags::DepthFloat24Conversion depth_float24_conversion() const {
return depth_float24_conversion_;
}
void CompletedSubmissionUpdated();
void BeginSubmission();
void EndFrame();
// Called in the beginning of a draw call - may bind pipelines and change the
// view descriptor heap.
bool UpdateRenderTargets(const D3D12Shader* pixel_shader);
bool UpdateRenderTargets(uint32_t shader_writes_color_targets);
// Returns the host-to-guest mappings and host formats of currently bound
// render targets for pipeline creation and remapping in shaders. They are
// consecutive, and format DXGI_FORMAT_UNKNOWN terminates the list. Depth
@ -272,17 +277,14 @@ class RenderTargetCache {
const PipelineRenderTarget* GetCurrentPipelineRenderTargets() const {
return current_pipeline_render_targets_;
}
// Performs the resolve to a shared memory area according to the current
// register values, and also clears the EDRAM buffer if needed. Must be in a
// frame for calling.
bool Resolve(const Memory& memory, D3D12SharedMemory& shared_memory,
TextureCache& texture_cache, uint32_t& written_address_out,
uint32_t& written_length_out);
bool Resolve(D3D12SharedMemory* shared_memory, TextureCache* texture_cache,
Memory* memory, uint32_t& written_address_out,
uint32_t& written_length_out);
// Flushes the render targets to EDRAM and unbinds them, for instance, when
// the command processor takes over framebuffer bindings to draw something
// special. May change the CBV/SRV/UAV descriptor heap.
@ -321,6 +323,7 @@ class RenderTargetCache {
kColor7e3,
kDepthUnorm,
kDepthFloat,
kDepthFloat24And32,
kCount
};
@ -399,37 +402,6 @@ class RenderTargetCache {
RenderTarget* render_target;
};
// Converting resolve pipeline.
struct ResolvePipeline {
ID3D12PipelineState* pipeline;
DXGI_FORMAT dest_format;
};
union ResolveTargetKey {
struct {
// 2560 / 32 = 80 (7 bits), * 2 for 2x resolution scale = 160 (8 bits).
uint32_t width_div_32 : 8;
uint32_t height_div_32 : 8;
DXGI_FORMAT format : 16;
};
uint32_t value;
};
// Target for converting resolves.
struct ResolveTarget {
ID3D12Resource* resource;
D3D12_RESOURCE_STATES state;
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle;
ResolveTargetKey key;
#if 0
// The first 4 MB page in the heaps.
uint32_t heap_page_first;
#endif
D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint;
// Buffer size needed to copy the resolve target to a linear buffer.
uint32_t copy_buffer_size;
};
uint32_t GetEdramBufferSize() const;
void TransitionEdramBuffer(D3D12_RESOURCE_STATES new_state);
@ -458,7 +430,7 @@ class RenderTargetCache {
uint32_t instance);
#endif
static EdramLoadStoreMode GetLoadStoreMode(bool is_depth, uint32_t format);
EdramLoadStoreMode GetLoadStoreMode(bool is_depth, uint32_t format) const;
// Must be in a frame to call. Stores the dirty areas of the currently bound
// render targets and marks them as clean.
@ -470,24 +442,15 @@ class RenderTargetCache {
RenderTarget* const* render_targets,
const uint32_t* edram_bases);
// Returns any available resolve target placed at least at
// min_heap_first_page, or tries to place it at the specified position (if not
// possible, will place it in the next heap).
#if 0
ResolveTarget* FindOrCreateResolveTarget(uint32_t width, uint32_t height,
DXGI_FORMAT format,
uint32_t min_heap_first_page);
#else
ResolveTarget* FindOrCreateResolveTarget(uint32_t width, uint32_t height,
DXGI_FORMAT format);
#endif
D3D12CommandProcessor& command_processor_;
const RegisterFile& register_file_;
TraceWriter& trace_writer_;
bool bindless_resources_used_;
bool edram_rov_used_;
// 20e4 depth conversion mode to use for non-ROV output.
flags::DepthFloat24Conversion depth_float24_conversion_;
// Whether 1 guest pixel is rendered as 2x2 host pixels (currently only
// supported with ROV).
bool resolution_scale_2x_ = false;

View File

@ -0,0 +1,296 @@
// generated from `xb buildhlsl`
// source: edram_load_depth_float24and32.cs.hlsl
const uint8_t edram_load_depth_float24and32_cs[] = {
0x44, 0x58, 0x42, 0x43, 0xF3, 0xA3, 0xA4, 0x14, 0x0A, 0x50, 0x56, 0x49,
0x5D, 0x09, 0x6C, 0xBF, 0x33, 0xC9, 0xC1, 0x9A, 0x01, 0x00, 0x00, 0x00,
0xAC, 0x0D, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
0x0C, 0x03, 0x00, 0x00, 0x1C, 0x03, 0x00, 0x00, 0x2C, 0x03, 0x00, 0x00,
0x10, 0x0D, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD0, 0x02, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0x53, 0x43, 0x00, 0x05, 0x00, 0x00,
0xA8, 0x02, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x24, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xB4, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xCF, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x6C, 0x6F, 0x61,
0x64, 0x5F, 0x73, 0x74, 0x6F, 0x72, 0x65, 0x5F, 0x73, 0x6F, 0x75, 0x72,
0x63, 0x65, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F,
0x6C, 0x6F, 0x61, 0x64, 0x5F, 0x73, 0x74, 0x6F, 0x72, 0x65, 0x5F, 0x64,
0x65, 0x73, 0x74, 0x00, 0x58, 0x65, 0x45, 0x64, 0x72, 0x61, 0x6D, 0x4C,
0x6F, 0x61, 0x64, 0x53, 0x74, 0x6F, 0x72, 0x65, 0x43, 0x6F, 0x6E, 0x73,
0x74, 0x61, 0x6E, 0x74, 0x73, 0x00, 0xAB, 0xAB, 0xE8, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00, 0x1C, 0x01, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE4, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
0x30, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
0x00, 0x00, 0x00, 0x00, 0x4E, 0x02, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x69, 0x02, 0x00, 0x00,
0x0C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
0x83, 0x02, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
0x00, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D,
0x5F, 0x72, 0x74, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x72, 0x5F, 0x64, 0x65,
0x70, 0x74, 0x68, 0x5F, 0x6F, 0x66, 0x66, 0x73, 0x65, 0x74, 0x00, 0x64,
0x77, 0x6F, 0x72, 0x64, 0x00, 0xAB, 0xAB, 0xAB, 0x00, 0x00, 0x13, 0x00,
0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x03, 0x02, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x65,
0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x63, 0x6F, 0x6C, 0x6F,
0x72, 0x5F, 0x64, 0x65, 0x70, 0x74, 0x68, 0x5F, 0x70, 0x69, 0x74, 0x63,
0x68, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72,
0x74, 0x5F, 0x73, 0x74, 0x65, 0x6E, 0x63, 0x69, 0x6C, 0x5F, 0x6F, 0x66,
0x66, 0x73, 0x65, 0x74, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61,
0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x73, 0x74, 0x65, 0x6E, 0x63, 0x69, 0x6C,
0x5F, 0x70, 0x69, 0x74, 0x63, 0x68, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64,
0x72, 0x61, 0x6D, 0x5F, 0x62, 0x61, 0x73, 0x65, 0x5F, 0x73, 0x61, 0x6D,
0x70, 0x6C, 0x65, 0x73, 0x5F, 0x32, 0x78, 0x5F, 0x64, 0x65, 0x70, 0x74,
0x68, 0x5F, 0x70, 0x69, 0x74, 0x63, 0x68, 0x00, 0x4D, 0x69, 0x63, 0x72,
0x6F, 0x73, 0x6F, 0x66, 0x74, 0x20, 0x28, 0x52, 0x29, 0x20, 0x48, 0x4C,
0x53, 0x4C, 0x20, 0x53, 0x68, 0x61, 0x64, 0x65, 0x72, 0x20, 0x43, 0x6F,
0x6D, 0x70, 0x69, 0x6C, 0x65, 0x72, 0x20, 0x31, 0x30, 0x2E, 0x31, 0x00,
0x49, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x4F, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x53, 0x48, 0x45, 0x58,
0xDC, 0x09, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x77, 0x02, 0x00, 0x00,
0x6A, 0x08, 0x00, 0x01, 0x59, 0x00, 0x00, 0x07, 0x46, 0x8E, 0x30, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x06,
0x46, 0x7E, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9D, 0x00, 0x00, 0x06,
0x46, 0xEE, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x02,
0x32, 0x10, 0x02, 0x00, 0x5F, 0x00, 0x00, 0x02, 0x32, 0x20, 0x02, 0x00,
0x5F, 0x00, 0x00, 0x02, 0x32, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x02,
0x07, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x04, 0x14, 0x00, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x06,
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00,
0x01, 0x40, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0C,
0x62, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x80, 0x30, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
0xFF, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x04, 0x03,
0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x06,
0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00,
0x01, 0x40, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x06,
0x82, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00,
0x01, 0x40, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A,
0xA2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x56, 0x0D, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xD8, 0xFF, 0xFF, 0xFF,
0x1E, 0x00, 0x00, 0x07, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x3A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x01,
0x55, 0x00, 0x00, 0x09, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
0x23, 0x00, 0x00, 0x08, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1A, 0x10, 0x02, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x06,
0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0A, 0x10, 0x02, 0x00, 0x26, 0x00, 0x00, 0x07,
0x00, 0xD0, 0x00, 0x00, 0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00,
0x23, 0x00, 0x00, 0x09, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0x00, 0x14, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x29, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0B,
0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00,
0x0A, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07, 0x22, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07,
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0xA5, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x7E, 0x20, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0x00, 0x00, 0xA0, 0x00, 0xA5, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x46, 0x7E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x05, 0xF2, 0x00, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0xF5, 0xFF, 0xFF, 0xFF, 0xF5, 0xFF, 0xFF, 0xFF, 0xF5, 0xFF, 0xFF, 0xFF,
0xF5, 0xFF, 0xFF, 0xFF, 0x37, 0x00, 0x00, 0x0C, 0xF2, 0x00, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x15, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
0x15, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B, 0xF2, 0x00, 0x10, 0x00,
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80, 0x41, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x37, 0x00, 0x00, 0x09, 0xF2, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
0x29, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0x0F, 0x00,
0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0x0F, 0x00, 0x37, 0x00, 0x00, 0x09,
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x06, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x38,
0x00, 0x00, 0x00, 0x38, 0x29, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07,
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x37, 0x00, 0x00, 0x0C, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80,
0x41, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F,
0xFF, 0xFF, 0xFF, 0x7F, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F,
0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F, 0x8C, 0x00, 0x00, 0x14,
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
0x00, 0x00, 0x80, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B,
0xF2, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80,
0x41, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
0x71, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x07,
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
0x4F, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38,
0x00, 0x00, 0x80, 0x38, 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8,
0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8, 0x37, 0x00, 0x00, 0x09,
0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x23, 0x00, 0x00, 0x09, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x29, 0x00, 0x00, 0x09, 0x32, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x06, 0x00, 0x02, 0x00, 0x02, 0x40, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x23, 0x00, 0x00, 0x0A, 0x32, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x56, 0x05, 0x02, 0x00, 0xD6, 0x85, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x09, 0x32, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x86, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08, 0xF2, 0xE0, 0x21, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0xFF, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x14, 0xE2, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x56, 0x0E, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0xE6, 0x0A, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08,
0x12, 0xE0, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
0x94, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1F, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
};

View File

@ -0,0 +1,117 @@
//
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
//
//
// Buffer Definitions:
//
// cbuffer XeEdramLoadStoreConstants
// {
//
// uint xe_edram_rt_color_depth_offset;// Offset: 0 Size: 4
// uint xe_edram_rt_color_depth_pitch;// Offset: 4 Size: 4
// uint xe_edram_rt_stencil_offset; // Offset: 8 Size: 4
// uint xe_edram_rt_stencil_pitch; // Offset: 12 Size: 4
// uint xe_edram_base_samples_2x_depth_pitch;// Offset: 16 Size: 4
//
// }
//
//
// Resource Bindings:
//
// Name Type Format Dim ID HLSL Bind Count
// ------------------------------ ---------- ------- ----------- ------- -------------- ------
// xe_edram_load_store_source texture byte r/o T0 t0 1
// xe_edram_load_store_dest UAV byte r/w U0 u0 1
// XeEdramLoadStoreConstants cbuffer NA NA CB0 cb0 1
//
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// no Input
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// no Output
cs_5_1
dcl_globalFlags refactoringAllowed
dcl_constantbuffer CB0[0:0][2], immediateIndexed, space=0
dcl_resource_raw T0[0:0], space=0
dcl_uav_raw U0[0:0], space=0
dcl_input vThreadGroupID.xy
dcl_input vThreadIDInGroup.xy
dcl_input vThreadID.xy
dcl_temps 7
dcl_thread_group 20, 16, 1
ishl r0.x, vThreadIDInGroup.x, l(2)
and r0.yz, CB0[0][1].xxxx, l(0, 0x00008000, 2047, 0)
if_nz r0.y
ult r0.y, vThreadIDInGroup.x, l(10)
uge r0.w, vThreadIDInGroup.x, l(10)
and r0.yw, r0.yyyw, l(0, 40, 0, -40)
iadd r0.y, r0.w, r0.y
iadd r0.x, r0.y, r0.x
endif
ushr r0.y, CB0[0][1].x, l(16)
imad r0.y, vThreadGroupID.y, r0.y, r0.z
iadd r0.y, r0.y, vThreadGroupID.x
imul null, r0.z, vThreadIDInGroup.y, l(320)
imad r0.y, r0.y, l(5120), r0.z
ishl r0.x, r0.x, l(2)
iadd r0.x, r0.x, r0.y
ubfe r0.y, l(1), l(13), CB0[0][1].x
ishl r0.y, r0.y, l(1)
ishl r0.x, r0.x, r0.y
ld_raw r1.xyzw, r0.x, T0[0].xyzw
ushr r2.xyzw, r1.xyzw, l(8, 8, 8, 8)
iadd r0.x, r0.x, l(0x00a00000)
ld_raw r0.xyzw, r0.x, T0[0].xyzw
ubfe r3.xyzw, l(20, 20, 20, 20), l(8, 8, 8, 8), r1.xyzw
ushr r4.xyzw, r2.xyzw, l(20, 20, 20, 20)
firstbit_hi r5.xyzw, r3.xyzw
iadd r5.xyzw, r5.xyzw, l(-11, -11, -11, -11)
movc r5.xyzw, r3.xyzw, r5.xyzw, l(21,21,21,21)
iadd r6.xyzw, -r5.xyzw, l(1, 1, 1, 1)
movc r6.xyzw, r4.xyzw, r4.xyzw, r6.xyzw
ishl r5.xyzw, r3.xyzw, r5.xyzw
and r5.xyzw, r5.xyzw, l(0x000fffff, 0x000fffff, 0x000fffff, 0x000fffff)
movc r3.xyzw, r4.xyzw, r3.xyzw, r5.xyzw
ishl r4.xyzw, r6.xyzw, l(23, 23, 23, 23)
iadd r4.xyzw, r4.xyzw, l(0x38000000, 0x38000000, 0x38000000, 0x38000000)
ishl r3.xyzw, r3.xyzw, l(3, 3, 3, 3)
iadd r3.xyzw, r4.xyzw, r3.xyzw
movc r3.xyzw, r2.xyzw, r3.xyzw, l(0,0,0,0)
iadd r4.xyzw, r0.xyzw, -r3.xyzw
uge r5.xyzw, l(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff), r0.xyzw
and r0.xyzw, r0.xyzw, r5.xyzw
umin r0.xyzw, r0.xyzw, l(0x3ffffff8, 0x3ffffff8, 0x3ffffff8, 0x3ffffff8)
bfi r5.xyzw, l(23, 23, 23, 23), l(0, 0, 0, 0), r0.xyzw, l(0x00800000, 0x00800000, 0x00800000, 0x00800000)
ushr r6.xyzw, r0.xyzw, l(23, 23, 23, 23)
iadd r6.xyzw, -r6.xyzw, l(113, 113, 113, 113)
umin r6.xyzw, r6.xyzw, l(24, 24, 24, 24)
ushr r5.xyzw, r5.xyzw, r6.xyzw
ult r6.xyzw, r0.xyzw, l(0x38800000, 0x38800000, 0x38800000, 0x38800000)
iadd r0.xyzw, r0.xyzw, l(0xc8000000, 0xc8000000, 0xc8000000, 0xc8000000)
movc r0.xyzw, r6.xyzw, r5.xyzw, r0.xyzw
iadd r5.xyzw, r0.xyzw, l(3, 3, 3, 3)
ubfe r0.xyzw, l(1, 1, 1, 1), l(3, 3, 3, 3), r0.xyzw
iadd r0.xyzw, r0.xyzw, r5.xyzw
ubfe r0.xyzw, l(24, 24, 24, 24), l(3, 3, 3, 3), r0.xyzw
ieq r0.xyzw, r2.xyzw, r0.xyzw
and r0.xyzw, r0.xyzw, l(1, 1, 1, 1)
imad r0.xyzw, r4.xyzw, r0.xyzw, r3.xyzw
ishl r2.xy, vThreadID.xxxx, l(4, 2, 0, 0)
imad r2.xy, vThreadID.yyyy, CB0[0][0].ywyy, r2.xyxx
iadd r2.xy, r2.xyxx, CB0[0][0].xzxx
store_raw U0[0].xyzw, r2.x, r0.xyzw
and r0.x, r1.x, l(255)
bfi r0.yzw, l(0, 8, 8, 8), l(0, 8, 16, 24), r1.yyzw, l(0, 0, 0, 0)
iadd r0.xy, r0.zwzz, r0.xyxx
iadd r0.x, r0.y, r0.x
store_raw U0[0].x, r2.y, r0.x
ret
// Approximately 67 instruction slots used

View File

@ -1,11 +1,11 @@
// generated from `xb buildhlsl`
// source: edram_load_depth_float.cs.hlsl
const uint8_t edram_load_depth_float_cs[] = {
0x44, 0x58, 0x42, 0x43, 0xF3, 0xA3, 0xA4, 0x14, 0x0A, 0x50, 0x56, 0x49,
0x5D, 0x09, 0x6C, 0xBF, 0x33, 0xC9, 0xC1, 0x9A, 0x01, 0x00, 0x00, 0x00,
0xAC, 0x0D, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
0x44, 0x58, 0x42, 0x43, 0x17, 0xEE, 0x03, 0x06, 0xD3, 0x6E, 0x58, 0x75,
0x66, 0x3B, 0x5B, 0x87, 0x2F, 0xF9, 0x44, 0x9E, 0x01, 0x00, 0x00, 0x00,
0x64, 0x0A, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
0x0C, 0x03, 0x00, 0x00, 0x1C, 0x03, 0x00, 0x00, 0x2C, 0x03, 0x00, 0x00,
0x10, 0x0D, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD0, 0x02, 0x00, 0x00,
0xC8, 0x09, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD0, 0x02, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0x53, 0x43, 0x00, 0x05, 0x00, 0x00,
0xA8, 0x02, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00,
@ -69,7 +69,7 @@ const uint8_t edram_load_depth_float_cs[] = {
0x49, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x4F, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x53, 0x48, 0x45, 0x58,
0xDC, 0x09, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x77, 0x02, 0x00, 0x00,
0x94, 0x06, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0xA5, 0x01, 0x00, 0x00,
0x6A, 0x08, 0x00, 0x01, 0x59, 0x00, 0x00, 0x07, 0x46, 0x8E, 0x30, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x06,
@ -126,168 +126,98 @@ const uint8_t edram_load_depth_float_cs[] = {
0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07,
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0xA5, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0xA5, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x7E, 0x20, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x09,
0x32, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x02, 0x00,
0x02, 0x40, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x0A,
0x32, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x56, 0x05, 0x02, 0x00,
0xD6, 0x85, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x09, 0x32, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x46, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x86, 0x80, 0x30, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0x00, 0x00, 0xA0, 0x00, 0xA5, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x46, 0x7E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00,
0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x05, 0xF2, 0x00, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0xF5, 0xFF, 0xFF, 0xFF, 0xF5, 0xFF, 0xFF, 0xFF, 0xF5, 0xFF, 0xFF, 0xFF,
0xF5, 0xFF, 0xFF, 0xFF, 0x37, 0x00, 0x00, 0x0C, 0xF2, 0x00, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x15, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
0x15, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B, 0xF2, 0x00, 0x10, 0x00,
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80, 0x41, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x37, 0x00, 0x00, 0x09, 0xF2, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
0x29, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x05,
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0x0F, 0x00,
0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0x0F, 0x00, 0x37, 0x00, 0x00, 0x09,
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x06, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x38,
0x00, 0x00, 0x00, 0x38, 0x29, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07,
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x37, 0x00, 0x00, 0x0C, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80,
0x41, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F,
0xFF, 0xFF, 0xFF, 0x7F, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F,
0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F, 0x8C, 0x00, 0x00, 0x14,
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
0x00, 0x00, 0x80, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B,
0x02, 0x40, 0x00, 0x00, 0xF5, 0xFF, 0xFF, 0xFF, 0xF5, 0xFF, 0xFF, 0xFF,
0xF5, 0xFF, 0xFF, 0xFF, 0xF5, 0xFF, 0xFF, 0xFF, 0x37, 0x00, 0x00, 0x0C,
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
0x15, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B,
0xF2, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80,
0x41, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
0x71, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x07,
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
0x4F, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38,
0x00, 0x00, 0x80, 0x38, 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8,
0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8, 0x37, 0x00, 0x00, 0x09,
0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x41, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x23, 0x00, 0x00, 0x09, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x09, 0xF2, 0x00, 0x10, 0x00,
0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x29, 0x00, 0x00, 0x09, 0x32, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x06, 0x00, 0x02, 0x00, 0x02, 0x40, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x23, 0x00, 0x00, 0x0A, 0x32, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x56, 0x05, 0x02, 0x00, 0xD6, 0x85, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x09, 0x32, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x86, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0xFF, 0xFF, 0x0F, 0x00,
0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0x0F, 0x00,
0x37, 0x00, 0x00, 0x09, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00,
0x29, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x38,
0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x38, 0x29, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x03, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x0C, 0xF2, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08, 0xF2, 0xE0, 0x21, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0xFF, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x14, 0xE2, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x56, 0x0E, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0xE6, 0x0A, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
0xE6, 0x0A, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08,
0x12, 0xE0, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
0x94, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0x94, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1F, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,

View File

@ -66,11 +66,12 @@ iadd r0.x, r0.x, r0.y
ubfe r0.y, l(1), l(13), CB0[0][1].x
ishl r0.y, r0.y, l(1)
ishl r0.x, r0.x, r0.y
ld_raw r1.xyzw, r0.x, T0[0].xyzw
ushr r2.xyzw, r1.xyzw, l(8, 8, 8, 8)
iadd r0.x, r0.x, l(0x00a00000)
ld_raw r0.xyzw, r0.x, T0[0].xyzw
ubfe r3.xyzw, l(20, 20, 20, 20), l(8, 8, 8, 8), r1.xyzw
ishl r1.xy, vThreadID.xxxx, l(4, 2, 0, 0)
imad r1.xy, vThreadID.yyyy, CB0[0][0].ywyy, r1.xyxx
iadd r1.xy, r1.xyxx, CB0[0][0].xzxx
ushr r2.xyzw, r0.xyzw, l(8, 8, 8, 8)
ubfe r3.xyzw, l(20, 20, 20, 20), l(8, 8, 8, 8), r0.xyzw
ushr r4.xyzw, r2.xyzw, l(20, 20, 20, 20)
firstbit_hi r5.xyzw, r3.xyzw
iadd r5.xyzw, r5.xyzw, l(-11, -11, -11, -11)
@ -84,34 +85,12 @@ ishl r4.xyzw, r6.xyzw, l(23, 23, 23, 23)
iadd r4.xyzw, r4.xyzw, l(0x38000000, 0x38000000, 0x38000000, 0x38000000)
ishl r3.xyzw, r3.xyzw, l(3, 3, 3, 3)
iadd r3.xyzw, r4.xyzw, r3.xyzw
movc r3.xyzw, r2.xyzw, r3.xyzw, l(0,0,0,0)
iadd r4.xyzw, r0.xyzw, -r3.xyzw
uge r5.xyzw, l(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff), r0.xyzw
and r0.xyzw, r0.xyzw, r5.xyzw
umin r0.xyzw, r0.xyzw, l(0x3ffffff8, 0x3ffffff8, 0x3ffffff8, 0x3ffffff8)
bfi r5.xyzw, l(23, 23, 23, 23), l(0, 0, 0, 0), r0.xyzw, l(0x00800000, 0x00800000, 0x00800000, 0x00800000)
ushr r6.xyzw, r0.xyzw, l(23, 23, 23, 23)
iadd r6.xyzw, -r6.xyzw, l(113, 113, 113, 113)
umin r6.xyzw, r6.xyzw, l(24, 24, 24, 24)
ushr r5.xyzw, r5.xyzw, r6.xyzw
ult r6.xyzw, r0.xyzw, l(0x38800000, 0x38800000, 0x38800000, 0x38800000)
iadd r0.xyzw, r0.xyzw, l(0xc8000000, 0xc8000000, 0xc8000000, 0xc8000000)
movc r0.xyzw, r6.xyzw, r5.xyzw, r0.xyzw
iadd r5.xyzw, r0.xyzw, l(3, 3, 3, 3)
ubfe r0.xyzw, l(1, 1, 1, 1), l(3, 3, 3, 3), r0.xyzw
iadd r0.xyzw, r0.xyzw, r5.xyzw
ubfe r0.xyzw, l(24, 24, 24, 24), l(3, 3, 3, 3), r0.xyzw
ieq r0.xyzw, r2.xyzw, r0.xyzw
and r0.xyzw, r0.xyzw, l(1, 1, 1, 1)
imad r0.xyzw, r4.xyzw, r0.xyzw, r3.xyzw
ishl r2.xy, vThreadID.xxxx, l(4, 2, 0, 0)
imad r2.xy, vThreadID.yyyy, CB0[0][0].ywyy, r2.xyxx
iadd r2.xy, r2.xyxx, CB0[0][0].xzxx
store_raw U0[0].xyzw, r2.x, r0.xyzw
and r0.x, r1.x, l(255)
bfi r0.yzw, l(0, 8, 8, 8), l(0, 8, 16, 24), r1.yyzw, l(0, 0, 0, 0)
iadd r0.xy, r0.zwzz, r0.xyxx
movc r2.xyzw, r2.xyzw, r3.xyzw, l(0,0,0,0)
store_raw U0[0].xyzw, r1.x, r2.xyzw
and r2.x, r0.x, l(255)
bfi r2.yzw, l(0, 8, 8, 8), l(0, 8, 16, 24), r0.yyzw, l(0, 0, 0, 0)
iadd r0.xy, r2.zwzz, r2.xyxx
iadd r0.x, r0.y, r0.x
store_raw U0[0].x, r2.y, r0.x
store_raw U0[0].x, r1.y, r0.x
ret
// Approximately 67 instruction slots used
// Approximately 46 instruction slots used

View File

@ -0,0 +1,226 @@
// generated from `xb buildhlsl`
// source: edram_store_depth_float24and32.cs.hlsl
const uint8_t edram_store_depth_float24and32_cs[] = {
0x44, 0x58, 0x42, 0x43, 0xC6, 0x10, 0x80, 0x14, 0x97, 0x01, 0xE4, 0x46,
0x76, 0xF1, 0x67, 0xD3, 0xDF, 0x50, 0x25, 0xF7, 0x01, 0x00, 0x00, 0x00,
0x64, 0x0A, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
0x0C, 0x03, 0x00, 0x00, 0x1C, 0x03, 0x00, 0x00, 0x2C, 0x03, 0x00, 0x00,
0xC8, 0x09, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD0, 0x02, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0x53, 0x43, 0x00, 0x05, 0x00, 0x00,
0xA8, 0x02, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x24, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xB4, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xCF, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x6C, 0x6F, 0x61,
0x64, 0x5F, 0x73, 0x74, 0x6F, 0x72, 0x65, 0x5F, 0x73, 0x6F, 0x75, 0x72,
0x63, 0x65, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F,
0x6C, 0x6F, 0x61, 0x64, 0x5F, 0x73, 0x74, 0x6F, 0x72, 0x65, 0x5F, 0x64,
0x65, 0x73, 0x74, 0x00, 0x58, 0x65, 0x45, 0x64, 0x72, 0x61, 0x6D, 0x4C,
0x6F, 0x61, 0x64, 0x53, 0x74, 0x6F, 0x72, 0x65, 0x43, 0x6F, 0x6E, 0x73,
0x74, 0x61, 0x6E, 0x74, 0x73, 0x00, 0xAB, 0xAB, 0xE8, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00, 0x1C, 0x01, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE4, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
0x30, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
0x00, 0x00, 0x00, 0x00, 0x4E, 0x02, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x69, 0x02, 0x00, 0x00,
0x0C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
0x83, 0x02, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
0x00, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D,
0x5F, 0x72, 0x74, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x72, 0x5F, 0x64, 0x65,
0x70, 0x74, 0x68, 0x5F, 0x6F, 0x66, 0x66, 0x73, 0x65, 0x74, 0x00, 0x64,
0x77, 0x6F, 0x72, 0x64, 0x00, 0xAB, 0xAB, 0xAB, 0x00, 0x00, 0x13, 0x00,
0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x03, 0x02, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x65,
0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x63, 0x6F, 0x6C, 0x6F,
0x72, 0x5F, 0x64, 0x65, 0x70, 0x74, 0x68, 0x5F, 0x70, 0x69, 0x74, 0x63,
0x68, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72,
0x74, 0x5F, 0x73, 0x74, 0x65, 0x6E, 0x63, 0x69, 0x6C, 0x5F, 0x6F, 0x66,
0x66, 0x73, 0x65, 0x74, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61,
0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x73, 0x74, 0x65, 0x6E, 0x63, 0x69, 0x6C,
0x5F, 0x70, 0x69, 0x74, 0x63, 0x68, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64,
0x72, 0x61, 0x6D, 0x5F, 0x62, 0x61, 0x73, 0x65, 0x5F, 0x73, 0x61, 0x6D,
0x70, 0x6C, 0x65, 0x73, 0x5F, 0x32, 0x78, 0x5F, 0x64, 0x65, 0x70, 0x74,
0x68, 0x5F, 0x70, 0x69, 0x74, 0x63, 0x68, 0x00, 0x4D, 0x69, 0x63, 0x72,
0x6F, 0x73, 0x6F, 0x66, 0x74, 0x20, 0x28, 0x52, 0x29, 0x20, 0x48, 0x4C,
0x53, 0x4C, 0x20, 0x53, 0x68, 0x61, 0x64, 0x65, 0x72, 0x20, 0x43, 0x6F,
0x6D, 0x70, 0x69, 0x6C, 0x65, 0x72, 0x20, 0x31, 0x30, 0x2E, 0x31, 0x00,
0x49, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x4F, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x53, 0x48, 0x45, 0x58,
0x94, 0x06, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0xA5, 0x01, 0x00, 0x00,
0x6A, 0x08, 0x00, 0x01, 0x59, 0x00, 0x00, 0x07, 0x46, 0x8E, 0x30, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x06,
0x46, 0x7E, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9D, 0x00, 0x00, 0x06,
0x46, 0xEE, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x02,
0x32, 0x10, 0x02, 0x00, 0x5F, 0x00, 0x00, 0x02, 0x32, 0x20, 0x02, 0x00,
0x5F, 0x00, 0x00, 0x02, 0x32, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x02,
0x05, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x04, 0x14, 0x00, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x09,
0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x02, 0x00,
0x02, 0x40, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x0A,
0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x56, 0x05, 0x02, 0x00,
0xD6, 0x85, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x09, 0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x46, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x80, 0x30, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xA5, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x7E, 0x20, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F,
0xFF, 0xFF, 0xFF, 0x7F, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F,
0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F, 0x8C, 0x00, 0x00, 0x14,
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
0x00, 0x00, 0x80, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B,
0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80,
0x41, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
0x71, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x07,
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
0x4F, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38,
0x00, 0x00, 0x80, 0x38, 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8,
0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8, 0x37, 0x00, 0x00, 0x09,
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x03, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, 0x08,
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x06, 0x70, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xE2, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x11,
0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x06, 0x12, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0C, 0x62, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x06, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0xFF, 0x07, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x04, 0x03, 0x1A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x06, 0x22, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x06, 0x82, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A, 0xA2, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x56, 0x0D, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xD8, 0xFF, 0xFF, 0xFF, 0x1E, 0x00, 0x00, 0x07,
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x01, 0x55, 0x00, 0x00, 0x09,
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x08,
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x10, 0x02, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x06, 0x22, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x0A, 0x10, 0x02, 0x00, 0x26, 0x00, 0x00, 0x07, 0x00, 0xD0, 0x00, 0x00,
0x42, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x20, 0x02, 0x00,
0x01, 0x40, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x23, 0x00, 0x00, 0x09,
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00,
0x2A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07,
0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0B, 0x22, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x29, 0x00, 0x00, 0x07, 0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08,
0xF2, 0xE0, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0xA0, 0x00, 0xA6, 0x00, 0x00, 0x08,
0xF2, 0xE0, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
0x94, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x13, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
};

View File

@ -0,0 +1,95 @@
//
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
//
//
// Buffer Definitions:
//
// cbuffer XeEdramLoadStoreConstants
// {
//
// uint xe_edram_rt_color_depth_offset;// Offset: 0 Size: 4
// uint xe_edram_rt_color_depth_pitch;// Offset: 4 Size: 4
// uint xe_edram_rt_stencil_offset; // Offset: 8 Size: 4
// uint xe_edram_rt_stencil_pitch; // Offset: 12 Size: 4
// uint xe_edram_base_samples_2x_depth_pitch;// Offset: 16 Size: 4
//
// }
//
//
// Resource Bindings:
//
// Name Type Format Dim ID HLSL Bind Count
// ------------------------------ ---------- ------- ----------- ------- -------------- ------
// xe_edram_load_store_source texture byte r/o T0 t0 1
// xe_edram_load_store_dest UAV byte r/w U0 u0 1
// XeEdramLoadStoreConstants cbuffer NA NA CB0 cb0 1
//
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// no Input
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// no Output
cs_5_1
dcl_globalFlags refactoringAllowed
dcl_constantbuffer CB0[0:0][2], immediateIndexed, space=0
dcl_resource_raw T0[0:0], space=0
dcl_uav_raw U0[0:0], space=0
dcl_input vThreadGroupID.xy
dcl_input vThreadIDInGroup.xy
dcl_input vThreadID.xy
dcl_temps 5
dcl_thread_group 20, 16, 1
ishl r0.xy, vThreadID.xxxx, l(4, 2, 0, 0)
imad r0.xy, vThreadID.yyyy, CB0[0][0].ywyy, r0.xyxx
iadd r0.xy, r0.xyxx, CB0[0][0].xzxx
ld_raw r1.xyzw, r0.x, T0[0].xyzw
uge r2.xyzw, l(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff), r1.xyzw
and r2.xyzw, r1.xyzw, r2.xyzw
umin r2.xyzw, r2.xyzw, l(0x3ffffff8, 0x3ffffff8, 0x3ffffff8, 0x3ffffff8)
bfi r3.xyzw, l(23, 23, 23, 23), l(0, 0, 0, 0), r2.xyzw, l(0x00800000, 0x00800000, 0x00800000, 0x00800000)
ushr r4.xyzw, r2.xyzw, l(23, 23, 23, 23)
iadd r4.xyzw, -r4.xyzw, l(113, 113, 113, 113)
umin r4.xyzw, r4.xyzw, l(24, 24, 24, 24)
ushr r3.xyzw, r3.xyzw, r4.xyzw
ult r4.xyzw, r2.xyzw, l(0x38800000, 0x38800000, 0x38800000, 0x38800000)
iadd r2.xyzw, r2.xyzw, l(0xc8000000, 0xc8000000, 0xc8000000, 0xc8000000)
movc r2.xyzw, r4.xyzw, r3.xyzw, r2.xyzw
iadd r3.xyzw, r2.xyzw, l(3, 3, 3, 3)
ubfe r2.xyzw, l(1, 1, 1, 1), l(3, 3, 3, 3), r2.xyzw
iadd r2.xyzw, r2.xyzw, r3.xyzw
ushr r2.xyzw, r2.xyzw, l(3, 3, 3, 3)
ld_raw r0.x, r0.y, T0[0].xxxx
ushr r0.yzw, r0.xxxx, l(0, 8, 16, 24)
bfi r0.xyzw, l(24, 24, 24, 24), l(8, 8, 8, 8), r2.xyzw, r0.xyzw
ishl r2.x, vThreadIDInGroup.x, l(2)
and r2.yz, CB0[0][1].xxxx, l(0, 0x00008000, 2047, 0)
if_nz r2.y
ult r2.y, vThreadIDInGroup.x, l(10)
uge r2.w, vThreadIDInGroup.x, l(10)
and r2.yw, r2.yyyw, l(0, 40, 0, -40)
iadd r2.y, r2.w, r2.y
iadd r2.x, r2.y, r2.x
endif
ushr r2.y, CB0[0][1].x, l(16)
imad r2.y, vThreadGroupID.y, r2.y, r2.z
iadd r2.y, r2.y, vThreadGroupID.x
imul null, r2.z, vThreadIDInGroup.y, l(320)
imad r2.y, r2.y, l(5120), r2.z
ishl r2.x, r2.x, l(2)
iadd r2.x, r2.x, r2.y
ubfe r2.y, l(1), l(13), CB0[0][1].x
ishl r2.y, r2.y, l(1)
ishl r2.x, r2.x, r2.y
store_raw U0[0].xyzw, r2.x, r0.xyzw
iadd r0.x, r2.x, l(0x00a00000)
store_raw U0[0].xyzw, r0.x, r1.xyzw
ret
// Approximately 45 instruction slots used

View File

@ -1,11 +1,11 @@
// generated from `xb buildhlsl`
// source: edram_store_depth_float.cs.hlsl
const uint8_t edram_store_depth_float_cs[] = {
0x44, 0x58, 0x42, 0x43, 0xC6, 0x10, 0x80, 0x14, 0x97, 0x01, 0xE4, 0x46,
0x76, 0xF1, 0x67, 0xD3, 0xDF, 0x50, 0x25, 0xF7, 0x01, 0x00, 0x00, 0x00,
0x64, 0x0A, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
0x44, 0x58, 0x42, 0x43, 0xF1, 0x72, 0x64, 0x54, 0x9D, 0xF6, 0x79, 0x48,
0x2F, 0x8C, 0xD1, 0x59, 0x56, 0x1C, 0x90, 0x9A, 0x01, 0x00, 0x00, 0x00,
0x28, 0x0A, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
0x0C, 0x03, 0x00, 0x00, 0x1C, 0x03, 0x00, 0x00, 0x2C, 0x03, 0x00, 0x00,
0xC8, 0x09, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD0, 0x02, 0x00, 0x00,
0x8C, 0x09, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD0, 0x02, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0x53, 0x43, 0x00, 0x05, 0x00, 0x00,
0xA8, 0x02, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00,
@ -69,7 +69,7 @@ const uint8_t edram_store_depth_float_cs[] = {
0x49, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x4F, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x53, 0x48, 0x45, 0x58,
0x94, 0x06, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0xA5, 0x01, 0x00, 0x00,
0x58, 0x06, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x96, 0x01, 0x00, 0x00,
0x6A, 0x08, 0x00, 0x01, 0x59, 0x00, 0x00, 0x07, 0x46, 0x8E, 0x30, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x06,
@ -79,7 +79,7 @@ const uint8_t edram_store_depth_float_cs[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x02,
0x32, 0x10, 0x02, 0x00, 0x5F, 0x00, 0x00, 0x02, 0x32, 0x20, 0x02, 0x00,
0x5F, 0x00, 0x00, 0x02, 0x32, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x02,
0x05, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x04, 0x14, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x04, 0x14, 0x00, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x09,
0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x02, 0x00,
0x02, 0x40, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
@ -96,53 +96,53 @@ const uint8_t edram_store_depth_float_cs[] = {
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F,
0xFF, 0xFF, 0xFF, 0x7F, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F,
0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F, 0x8C, 0x00, 0x00, 0x14,
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00,
0x00, 0x00, 0x80, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B,
0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80,
0x41, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80,
0x41, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
0x71, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x07,
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
0x4F, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x4F, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38,
0x00, 0x00, 0x80, 0x38, 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8,
0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8, 0x37, 0x00, 0x00, 0x09,
0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A,
0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x03, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, 0x08,
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
@ -155,64 +155,59 @@ const uint8_t edram_store_depth_float_cs[] = {
0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x06, 0x12, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0C, 0x62, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x06, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x06, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0xFF, 0x07, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x04, 0x03, 0x1A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x06, 0x22, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x06, 0x22, 0x00, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x06, 0x82, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A, 0xA2, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x56, 0x0D, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x56, 0x0D, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xD8, 0xFF, 0xFF, 0xFF, 0x1E, 0x00, 0x00, 0x07,
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x01, 0x55, 0x00, 0x00, 0x09,
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00,
0x22, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x01, 0x55, 0x00, 0x00, 0x09,
0x22, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x08,
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x10, 0x02, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x06, 0x22, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x22, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1A, 0x10, 0x02, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x06, 0x22, 0x00, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0A, 0x10, 0x02, 0x00, 0x26, 0x00, 0x00, 0x07, 0x00, 0xD0, 0x00, 0x00,
0x42, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x20, 0x02, 0x00,
0x42, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1A, 0x20, 0x02, 0x00,
0x01, 0x40, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x23, 0x00, 0x00, 0x09,
0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00,
0x2A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07,
0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0B, 0x22, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x22, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00,
0x2A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07,
0x12, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0B, 0x22, 0x00, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x29, 0x00, 0x00, 0x07, 0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0x29, 0x00, 0x00, 0x07, 0x22, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08,
0x01, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08,
0xF2, 0xE0, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0xA0, 0x00, 0xA6, 0x00, 0x00, 0x08,
0xF2, 0xE0, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
0x94, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
0x94, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x13, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@ -222,5 +217,5 @@ const uint8_t edram_store_depth_float_cs[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
};

View File

@ -45,51 +45,49 @@ dcl_uav_raw U0[0:0], space=0
dcl_input vThreadGroupID.xy
dcl_input vThreadIDInGroup.xy
dcl_input vThreadID.xy
dcl_temps 5
dcl_temps 4
dcl_thread_group 20, 16, 1
ishl r0.xy, vThreadID.xxxx, l(4, 2, 0, 0)
imad r0.xy, vThreadID.yyyy, CB0[0][0].ywyy, r0.xyxx
iadd r0.xy, r0.xyxx, CB0[0][0].xzxx
ld_raw r1.xyzw, r0.x, T0[0].xyzw
uge r2.xyzw, l(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff), r1.xyzw
and r2.xyzw, r1.xyzw, r2.xyzw
umin r2.xyzw, r2.xyzw, l(0x3ffffff8, 0x3ffffff8, 0x3ffffff8, 0x3ffffff8)
bfi r3.xyzw, l(23, 23, 23, 23), l(0, 0, 0, 0), r2.xyzw, l(0x00800000, 0x00800000, 0x00800000, 0x00800000)
ushr r4.xyzw, r2.xyzw, l(23, 23, 23, 23)
iadd r4.xyzw, -r4.xyzw, l(113, 113, 113, 113)
umin r4.xyzw, r4.xyzw, l(24, 24, 24, 24)
ushr r3.xyzw, r3.xyzw, r4.xyzw
ult r4.xyzw, r2.xyzw, l(0x38800000, 0x38800000, 0x38800000, 0x38800000)
iadd r2.xyzw, r2.xyzw, l(0xc8000000, 0xc8000000, 0xc8000000, 0xc8000000)
movc r2.xyzw, r4.xyzw, r3.xyzw, r2.xyzw
iadd r3.xyzw, r2.xyzw, l(3, 3, 3, 3)
ubfe r2.xyzw, l(1, 1, 1, 1), l(3, 3, 3, 3), r2.xyzw
iadd r2.xyzw, r2.xyzw, r3.xyzw
ushr r2.xyzw, r2.xyzw, l(3, 3, 3, 3)
and r1.xyzw, r1.xyzw, r2.xyzw
umin r1.xyzw, r1.xyzw, l(0x3ffffff8, 0x3ffffff8, 0x3ffffff8, 0x3ffffff8)
bfi r2.xyzw, l(23, 23, 23, 23), l(0, 0, 0, 0), r1.xyzw, l(0x00800000, 0x00800000, 0x00800000, 0x00800000)
ushr r3.xyzw, r1.xyzw, l(23, 23, 23, 23)
iadd r3.xyzw, -r3.xyzw, l(113, 113, 113, 113)
umin r3.xyzw, r3.xyzw, l(24, 24, 24, 24)
ushr r2.xyzw, r2.xyzw, r3.xyzw
ult r3.xyzw, r1.xyzw, l(0x38800000, 0x38800000, 0x38800000, 0x38800000)
iadd r1.xyzw, r1.xyzw, l(0xc8000000, 0xc8000000, 0xc8000000, 0xc8000000)
movc r1.xyzw, r3.xyzw, r2.xyzw, r1.xyzw
iadd r2.xyzw, r1.xyzw, l(3, 3, 3, 3)
ubfe r1.xyzw, l(1, 1, 1, 1), l(3, 3, 3, 3), r1.xyzw
iadd r1.xyzw, r1.xyzw, r2.xyzw
ushr r1.xyzw, r1.xyzw, l(3, 3, 3, 3)
ld_raw r0.x, r0.y, T0[0].xxxx
ushr r0.yzw, r0.xxxx, l(0, 8, 16, 24)
bfi r0.xyzw, l(24, 24, 24, 24), l(8, 8, 8, 8), r2.xyzw, r0.xyzw
ishl r2.x, vThreadIDInGroup.x, l(2)
and r2.yz, CB0[0][1].xxxx, l(0, 0x00008000, 2047, 0)
if_nz r2.y
ult r2.y, vThreadIDInGroup.x, l(10)
uge r2.w, vThreadIDInGroup.x, l(10)
and r2.yw, r2.yyyw, l(0, 40, 0, -40)
iadd r2.y, r2.w, r2.y
iadd r2.x, r2.y, r2.x
bfi r0.xyzw, l(24, 24, 24, 24), l(8, 8, 8, 8), r1.xyzw, r0.xyzw
ishl r1.x, vThreadIDInGroup.x, l(2)
and r1.yz, CB0[0][1].xxxx, l(0, 0x00008000, 2047, 0)
if_nz r1.y
ult r1.y, vThreadIDInGroup.x, l(10)
uge r1.w, vThreadIDInGroup.x, l(10)
and r1.yw, r1.yyyw, l(0, 40, 0, -40)
iadd r1.y, r1.w, r1.y
iadd r1.x, r1.y, r1.x
endif
ushr r2.y, CB0[0][1].x, l(16)
imad r2.y, vThreadGroupID.y, r2.y, r2.z
iadd r2.y, r2.y, vThreadGroupID.x
imul null, r2.z, vThreadIDInGroup.y, l(320)
imad r2.y, r2.y, l(5120), r2.z
ishl r2.x, r2.x, l(2)
iadd r2.x, r2.x, r2.y
ubfe r2.y, l(1), l(13), CB0[0][1].x
ishl r2.y, r2.y, l(1)
ishl r2.x, r2.x, r2.y
store_raw U0[0].xyzw, r2.x, r0.xyzw
iadd r0.x, r2.x, l(0x00a00000)
store_raw U0[0].xyzw, r0.x, r1.xyzw
ushr r1.y, CB0[0][1].x, l(16)
imad r1.y, vThreadGroupID.y, r1.y, r1.z
iadd r1.y, r1.y, vThreadGroupID.x
imul null, r1.z, vThreadIDInGroup.y, l(320)
imad r1.y, r1.y, l(5120), r1.z
ishl r1.x, r1.x, l(2)
iadd r1.x, r1.x, r1.y
ubfe r1.y, l(1), l(13), CB0[0][1].x
ishl r1.y, r1.y, l(1)
ishl r1.x, r1.x, r1.y
store_raw U0[0].xyzw, r1.x, r0.xyzw
ret
// Approximately 45 instruction slots used
// Approximately 43 instruction slots used

Binary file not shown.

View File

@ -0,0 +1,156 @@
// generated from `xb buildhlsl`
// source: float24_round.ps.hlsl
const uint8_t float24_round_ps[] = {
0x44, 0x58, 0x42, 0x43, 0xDF, 0x71, 0xF3, 0x0A, 0x4A, 0xDB, 0xC3, 0x80,
0x1E, 0xE4, 0x39, 0x21, 0x59, 0x07, 0x78, 0x97, 0x01, 0x00, 0x00, 0x00,
0x18, 0x07, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
0xA0, 0x00, 0x00, 0x00, 0x90, 0x02, 0x00, 0x00, 0xC4, 0x02, 0x00, 0x00,
0x7C, 0x06, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x64, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0xFF, 0xFF, 0x00, 0x05, 0x00, 0x00,
0x3C, 0x00, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x24, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x4D, 0x69, 0x63, 0x72, 0x6F, 0x73, 0x6F, 0x66, 0x74, 0x20, 0x28, 0x52,
0x29, 0x20, 0x48, 0x4C, 0x53, 0x4C, 0x20, 0x53, 0x68, 0x61, 0x64, 0x65,
0x72, 0x20, 0x43, 0x6F, 0x6D, 0x70, 0x69, 0x6C, 0x65, 0x72, 0x20, 0x31,
0x30, 0x2E, 0x31, 0x00, 0x49, 0x53, 0x47, 0x4E, 0xE8, 0x01, 0x00, 0x00,
0x13, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x0B, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x0C, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x0F, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x11, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xD9, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x0F, 0x04, 0x00, 0x00, 0x54, 0x45, 0x58, 0x43,
0x4F, 0x4F, 0x52, 0x44, 0x00, 0x53, 0x56, 0x5F, 0x50, 0x6F, 0x73, 0x69,
0x74, 0x69, 0x6F, 0x6E, 0x00, 0xAB, 0xAB, 0xAB, 0x4F, 0x53, 0x47, 0x4E,
0x2C, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x0E, 0x00, 0x00,
0x53, 0x56, 0x5F, 0x44, 0x65, 0x70, 0x74, 0x68, 0x00, 0xAB, 0xAB, 0xAB,
0x53, 0x48, 0x45, 0x58, 0xB0, 0x03, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00,
0xEC, 0x00, 0x00, 0x00, 0x6A, 0x08, 0x00, 0x01, 0x64, 0x38, 0x00, 0x04,
0x42, 0x10, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x65, 0x00, 0x00, 0x02, 0x01, 0xC0, 0x00, 0x00, 0x68, 0x00, 0x00, 0x02,
0x02, 0x00, 0x00, 0x00, 0x36, 0x20, 0x08, 0x05, 0x12, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x2A, 0x10, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00,
0x50, 0x00, 0x10, 0x07, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x7F, 0x0A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x08, 0x07, 0x12, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x00, 0x08, 0x07,
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0x3F,
0x8C, 0x00, 0x10, 0x0B, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x55, 0x00, 0x20, 0x07,
0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x20, 0x08, 0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x2A, 0x00, 0x10, 0x80, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x54, 0x00, 0x20, 0x07,
0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
0x55, 0x00, 0x10, 0x07, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x20, 0x07, 0x42, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x38, 0x1E, 0x00, 0x08, 0x07,
0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC8,
0x37, 0x00, 0x08, 0x09, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x10, 0x07, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x08, 0x09, 0x12, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x08, 0x07, 0x12, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x38, 0x0F,
0x72, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x06, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x87, 0x00, 0x40, 0x05,
0x82, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x40, 0x07, 0x82, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0xF5, 0xFF, 0xFF, 0xFF, 0x37, 0x00, 0x40, 0x09,
0x82, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x08, 0x08,
0x12, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x80,
0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x37, 0x00, 0x08, 0x09, 0x12, 0x00, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x40, 0x07, 0x82, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x3A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x40, 0x07,
0x82, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0xFF, 0xFF, 0x0F, 0x00,
0x37, 0x00, 0x10, 0x09, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x29, 0x00, 0x20, 0x07, 0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x20, 0x07, 0x42, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x29, 0x00, 0x10, 0x07,
0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x10, 0x07, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x37, 0x00, 0x08, 0x08, 0x01, 0xC0, 0x00, 0x00,
0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54, 0x94, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
};

View File

@ -0,0 +1,74 @@
//
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
//
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// TEXCOORD 0 xyzw 0 NONE float
// TEXCOORD 1 xyzw 1 NONE float
// TEXCOORD 2 xyzw 2 NONE float
// TEXCOORD 3 xyzw 3 NONE float
// TEXCOORD 4 xyzw 4 NONE float
// TEXCOORD 5 xyzw 5 NONE float
// TEXCOORD 6 xyzw 6 NONE float
// TEXCOORD 7 xyzw 7 NONE float
// TEXCOORD 8 xyzw 8 NONE float
// TEXCOORD 9 xyzw 9 NONE float
// TEXCOORD 10 xyzw 10 NONE float
// TEXCOORD 11 xyzw 11 NONE float
// TEXCOORD 12 xyzw 12 NONE float
// TEXCOORD 13 xyzw 13 NONE float
// TEXCOORD 14 xyzw 14 NONE float
// TEXCOORD 15 xyzw 15 NONE float
// TEXCOORD 16 xyz 16 NONE float
// TEXCOORD 17 xy 17 NONE float
// SV_Position 0 xyzw 18 POS float z
//
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_Depth 0 N/A oDepth DEPTH float YES
//
// Pixel Shader runs at sample frequency
//
ps_5_1
dcl_globalFlags refactoringAllowed
dcl_input_ps_siv linear noperspective sample v18.z, position
dcl_output oDepth
dcl_temps 2
mov_sat [precise(x)] r0.x, v18.z
uge [precise(y)] r0.y, l(0x7fffffff), r0.x
and [precise(x)] r0.x, r0.x, r0.y
umin [precise(x)] r0.x, r0.x, l(0x3ffffff8)
bfi [precise(y)] r0.y, l(23), l(0), r0.x, l(0x00800000)
ushr [precise(z)] r0.z, r0.x, l(23)
iadd [precise(z)] r0.z, -r0.z, l(113)
umin [precise(z)] r0.z, r0.z, l(24)
ushr [precise(y)] r0.y, r0.y, r0.z
ult [precise(z)] r0.z, r0.x, l(0x38800000)
iadd [precise(x)] r0.x, r0.x, l(0xc8000000)
movc [precise(x)] r0.x, r0.z, r0.y, r0.x
iadd [precise(y)] r0.y, r0.x, l(3)
ubfe [precise(x)] r0.x, l(1), l(3), r0.x
iadd [precise(x)] r0.x, r0.x, r0.y
ubfe [precise(xyz)] r0.xyz, l(24, 20, 4, 0), l(3, 3, 23, 0), r0.xxxx
firstbit_hi [precise(w)] r0.w, r0.y
iadd [precise(w)] r0.w, r0.w, l(-11)
movc [precise(w)] r0.w, r0.y, r0.w, l(21)
iadd [precise(x)] r1.x, -r0.w, l(1)
movc [precise(x)] r1.x, r0.z, r0.z, r1.x
ishl [precise(w)] r0.w, r0.y, r0.w
and [precise(w)] r0.w, r0.w, l(0x000fffff)
movc [precise(y)] r0.y, r0.z, r0.y, r0.w
ishl [precise(z)] r0.z, r1.x, l(23)
iadd [precise(z)] r0.z, r0.z, l(0x38000000)
ishl [precise(y)] r0.y, r0.y, l(3)
iadd [precise(y)] r0.y, r0.z, r0.y
movc [precise(x)] oDepth, r0.x, r0.y, l(0)
ret
// Approximately 30 instruction slots used

View File

@ -0,0 +1,100 @@
// generated from `xb buildhlsl`
// source: float24_truncate.ps.hlsl
const uint8_t float24_truncate_ps[] = {
0x44, 0x58, 0x42, 0x43, 0xB8, 0x51, 0x55, 0x1D, 0xF4, 0xF1, 0xC9, 0xC0,
0x0C, 0x22, 0xD3, 0x43, 0x94, 0xDF, 0x83, 0x9D, 0x01, 0x00, 0x00, 0x00,
0x7C, 0x04, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
0xA0, 0x00, 0x00, 0x00, 0x90, 0x02, 0x00, 0x00, 0xCC, 0x02, 0x00, 0x00,
0xE0, 0x03, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x64, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0xFF, 0xFF, 0x00, 0x05, 0x00, 0x00,
0x3C, 0x00, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x24, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x4D, 0x69, 0x63, 0x72, 0x6F, 0x73, 0x6F, 0x66, 0x74, 0x20, 0x28, 0x52,
0x29, 0x20, 0x48, 0x4C, 0x53, 0x4C, 0x20, 0x53, 0x68, 0x61, 0x64, 0x65,
0x72, 0x20, 0x43, 0x6F, 0x6D, 0x70, 0x69, 0x6C, 0x65, 0x72, 0x20, 0x31,
0x30, 0x2E, 0x31, 0x00, 0x49, 0x53, 0x47, 0x4E, 0xE8, 0x01, 0x00, 0x00,
0x13, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x0B, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x0C, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x0F, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00,
0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x11, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xD9, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x0F, 0x04, 0x00, 0x00, 0x54, 0x45, 0x58, 0x43,
0x4F, 0x4F, 0x52, 0x44, 0x00, 0x53, 0x56, 0x5F, 0x50, 0x6F, 0x73, 0x69,
0x74, 0x69, 0x6F, 0x6E, 0x00, 0xAB, 0xAB, 0xAB, 0x4F, 0x53, 0x47, 0x4E,
0x34, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x0E, 0x00, 0x00,
0x53, 0x56, 0x5F, 0x44, 0x65, 0x70, 0x74, 0x68, 0x4C, 0x65, 0x73, 0x73,
0x45, 0x71, 0x75, 0x61, 0x6C, 0x00, 0xAB, 0xAB, 0x53, 0x48, 0x45, 0x58,
0x0C, 0x01, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00,
0x6A, 0x08, 0x00, 0x01, 0x64, 0x38, 0x00, 0x04, 0x42, 0x10, 0x10, 0x00,
0x12, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x02,
0x01, 0x70, 0x02, 0x00, 0x68, 0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00,
0x36, 0x20, 0x08, 0x05, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x2A, 0x10, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0x50, 0x00, 0x10, 0x07,
0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x2E,
0x1F, 0x00, 0x04, 0x03, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x8A, 0x00, 0x10, 0x09, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x10, 0x08, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1A, 0x00, 0x10, 0x80, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x24, 0x00, 0x10, 0x07,
0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x8C, 0x00, 0x08, 0x0A, 0x01, 0x70, 0x02, 0x00, 0x1A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x01, 0x36, 0x00, 0x08, 0x04,
0x01, 0x70, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x15, 0x00, 0x00, 0x01, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
0x94, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};

View File

@ -0,0 +1,55 @@
//
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
//
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// TEXCOORD 0 xyzw 0 NONE float
// TEXCOORD 1 xyzw 1 NONE float
// TEXCOORD 2 xyzw 2 NONE float
// TEXCOORD 3 xyzw 3 NONE float
// TEXCOORD 4 xyzw 4 NONE float
// TEXCOORD 5 xyzw 5 NONE float
// TEXCOORD 6 xyzw 6 NONE float
// TEXCOORD 7 xyzw 7 NONE float
// TEXCOORD 8 xyzw 8 NONE float
// TEXCOORD 9 xyzw 9 NONE float
// TEXCOORD 10 xyzw 10 NONE float
// TEXCOORD 11 xyzw 11 NONE float
// TEXCOORD 12 xyzw 12 NONE float
// TEXCOORD 13 xyzw 13 NONE float
// TEXCOORD 14 xyzw 14 NONE float
// TEXCOORD 15 xyzw 15 NONE float
// TEXCOORD 16 xyz 16 NONE float
// TEXCOORD 17 xy 17 NONE float
// SV_Position 0 xyzw 18 POS float z
//
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_DepthLessEqual 0 N/A oDepthLE DEPTHLE float YES
//
// Pixel Shader runs at sample frequency
//
ps_5_1
dcl_globalFlags refactoringAllowed
dcl_input_ps_siv linear noperspective sample v18.z, position
dcl_output oDepthLE
dcl_temps 1
mov_sat [precise(x)] r0.x, v18.z
uge [precise(y)] r0.y, r0.x, l(0x2e800000)
if_nz r0.y
ubfe [precise(y)] r0.y, l(8), l(23), r0.x
iadd [precise(y)] r0.y, -r0.y, l(116)
imax [precise(y)] r0.y, r0.y, l(3)
bfi [precise(x)] oDepthLE, r0.y, l(0), l(0), r0.x
else
mov [precise(x)] oDepthLE, l(0)
endif
ret
// Approximately 11 instruction slots used

View File

@ -7,22 +7,14 @@ void main(uint3 xe_group_id : SV_GroupID,
uint3 xe_thread_id : SV_DispatchThreadID) {
uint2 tile_sample_index = xe_group_thread_id.xy;
tile_sample_index.x *= 4u;
uint edram_offset = XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index);
uint4 depth24_stencil = xe_edram_load_store_source.Load4(edram_offset);
uint4 depth24 = depth24_stencil >> 8u;
uint4 depth32 = xe_edram_load_store_source.Load4(10485760u + edram_offset);
// Depth. If the stored 32-bit depth converted to 24-bit is the same as the
// stored 24-bit depth, load the 32-bit value because it has more precision
// (and multipass rendering is possible), if it's not, convert the 24-bit
// depth because it was overwritten by aliasing.
uint4 depth24to32 = XeFloat20e4To32(depth24);
uint4 depth = depth24to32 + (depth32 - depth24to32) *
uint4(XeFloat32To20e4(depth32) == depth24);
uint4 samples = xe_edram_load_store_source.Load4(
XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index));
// Depth (exact conversion ensured during drawing).
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u + xe_edram_rt_color_depth_offset;
xe_edram_load_store_dest.Store4(rt_offset, depth);
xe_edram_load_store_dest.Store4(rt_offset, XeFloat20e4To32(samples >> 8u));
// Stencil.
uint4 stencil = (depth24_stencil & 0xFFu) << uint4(0u, 8u, 16u, 24u);
uint4 stencil = (samples & 0xFFu) << uint4(0u, 8u, 16u, 24u);
stencil.xy |= stencil.zw;
stencil.x |= stencil.y;
rt_offset = xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u +

View File

@ -0,0 +1,31 @@
#include "edram_load_store.hlsli"
#include "pixel_formats.hlsli"
[numthreads(20, 16, 1)]
void main(uint3 xe_group_id : SV_GroupID,
uint3 xe_group_thread_id : SV_GroupThreadID,
uint3 xe_thread_id : SV_DispatchThreadID) {
uint2 tile_sample_index = xe_group_thread_id.xy;
tile_sample_index.x *= 4u;
uint edram_offset = XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index);
uint4 depth24_stencil = xe_edram_load_store_source.Load4(edram_offset);
uint4 depth24 = depth24_stencil >> 8u;
uint4 depth32 = xe_edram_load_store_source.Load4(10485760u + edram_offset);
// Depth. If the stored 32-bit depth converted to 24-bit is the same as the
// stored 24-bit depth, load the 32-bit value because it has more precision
// (and multipass rendering is possible), if it's not, convert the 24-bit
// depth because it was overwritten by aliasing.
uint4 depth24to32 = XeFloat20e4To32(depth24);
uint4 depth = depth24to32 + (depth32 - depth24to32) *
uint4(XeFloat32To20e4(depth32) == depth24);
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u + xe_edram_rt_color_depth_offset;
xe_edram_load_store_dest.Store4(rt_offset, depth);
// Stencil.
uint4 stencil = (depth24_stencil & 0xFFu) << uint4(0u, 8u, 16u, 24u);
stencil.xy |= stencil.zw;
stencil.x |= stencil.y;
rt_offset = xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u +
xe_edram_rt_stencil_offset;
xe_edram_load_store_dest.Store(rt_offset, stencil.x);
}

View File

@ -5,21 +5,18 @@
void main(uint3 xe_group_id : SV_GroupID,
uint3 xe_group_thread_id : SV_GroupThreadID,
uint3 xe_thread_id : SV_DispatchThreadID) {
// Depth.
// Depth (exact conversion ensured during drawing).
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u + xe_edram_rt_color_depth_offset;
uint4 depth32 = xe_edram_load_store_source.Load4(rt_offset);
uint4 depth24_stencil = XeFloat32To20e4(depth32) << 8u;
uint4 samples =
XeFloat32To20e4(xe_edram_load_store_source.Load4(rt_offset)) << 8u;
// Stencil.
rt_offset = xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u +
xe_edram_rt_stencil_offset;
depth24_stencil |= (xe_edram_load_store_source.Load(rt_offset).xxxx >>
samples |= (xe_edram_load_store_source.Load(rt_offset).xxxx >>
uint4(0u, 8u, 16u, 24u)) & 0xFFu;
uint2 tile_sample_index = xe_group_thread_id.xy;
tile_sample_index.x *= 4u;
uint edram_offset = XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index);
// Store 24-bit depth for aliasing and checking if 32-bit depth is up to date.
xe_edram_load_store_dest.Store4(edram_offset, depth24_stencil);
// Store 32-bit depth so precision isn't lost when doing multipass rendering.
xe_edram_load_store_dest.Store4(10485760u + edram_offset, depth32);
xe_edram_load_store_dest.Store4(
XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index), samples);
}

View File

@ -0,0 +1,25 @@
#include "edram_load_store.hlsli"
#include "pixel_formats.hlsli"
[numthreads(20, 16, 1)]
void main(uint3 xe_group_id : SV_GroupID,
uint3 xe_group_thread_id : SV_GroupThreadID,
uint3 xe_thread_id : SV_DispatchThreadID) {
// Depth.
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u + xe_edram_rt_color_depth_offset;
uint4 depth32 = xe_edram_load_store_source.Load4(rt_offset);
uint4 depth24_stencil = XeFloat32To20e4(depth32) << 8u;
// Stencil.
rt_offset = xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u +
xe_edram_rt_stencil_offset;
depth24_stencil |= (xe_edram_load_store_source.Load(rt_offset).xxxx >>
uint4(0u, 8u, 16u, 24u)) & 0xFFu;
uint2 tile_sample_index = xe_group_thread_id.xy;
tile_sample_index.x *= 4u;
uint edram_offset = XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index);
// Store 24-bit depth for aliasing and checking if 32-bit depth is up to date.
xe_edram_load_store_dest.Store4(edram_offset, depth24_stencil);
// Store 32-bit depth so precision isn't lost when doing multipass rendering.
xe_edram_load_store_dest.Store4(10485760u + edram_offset, depth32);
}

View File

@ -7,8 +7,7 @@ void main(uint3 xe_group_id : SV_GroupID,
// Depth.
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u + xe_edram_rt_color_depth_offset;
uint4 samples =
(xe_edram_load_store_source.Load4(rt_offset) & 0xFFFFFFu) << 8u;
uint4 samples = xe_edram_load_store_source.Load4(rt_offset) << 8u;
// Stencil.
rt_offset = xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u +
xe_edram_rt_stencil_offset;

View File

@ -0,0 +1,13 @@
#include "pixel_formats.hlsli"
#include "xenos_draw.hlsli"
struct XePSInput {
XeVertexPrePS pre_ps;
sample float4 position : SV_Position;
};
precise float main(XePSInput xe_input) : SV_Depth {
// Input Z may be outside the viewport range (it's clamped after the shader).
return asfloat(
XeFloat20e4To32(XeFloat32To20e4(asuint(saturate(xe_input.position.z)))));
}

View File

@ -0,0 +1,38 @@
#include "pixel_formats.hlsli"
#include "xenos_draw.hlsli"
struct XePSInput {
XeVertexPrePS pre_ps;
sample float4 position : SV_Position;
};
precise float main(XePSInput xe_input) : SV_DepthLessEqual {
// Simplified conversion, always less than or equal to the original value -
// just drop the lower bits.
// The float32 exponent bias is 127.
// After saturating, the exponent range is -127...0.
// The smallest normalized 20e4 exponent is -14 - should drop 3 mantissa bits
// at -14 or above.
// The smallest denormalized 20e4 number is -34 - should drop 23 mantissa bits
// at -34.
// Anything smaller than 2^-34 becomes 0.
// Input Z may be outside the viewport range (it's clamped after the shader).
precise uint depth = asuint(saturate(xe_input.position.z));
// Check if the number is representable as a float24 after truncation - the
// exponent is at least -34.
if (depth >= 0x2E800000u) {
// Extract the biased float32 exponent:
// 113+ at exponent -14+.
// 93 at exponent -34.
uint exponent = (depth >> 23u) & 0xFFu;
// Convert exponent to the shift amount.
// 116 - 113 = 3.
// 116 - 93 = 23.
uint shift = asuint(max(116 - asint(exponent), 3));
depth = depth >> shift << shift;
} else {
// The number is not representable as float24 after truncation - zero.
depth = 0u;
}
return asfloat(depth);
}

View File

@ -495,6 +495,16 @@ void XeR11G11B10SNormToRGBA16(uint4 packed_texels, out uint4 out_01,
// 6e4 has a different exponent bias allowing [0,512) values, 20e4 allows [0,2).
// We also can't clamp the stored value to 1 as load->store->load must be exact.
uint XeFloat32To20e4(uint f32u32) {
// Keep only positive (high bit set means negative for both float and int) and
// saturate to the maximum representable value near 2 (also dropping NaNs).
f32u32 = min((f32u32 <= 0x7FFFFFFFu) ? f32u32 : 0u, 0x3FFFFFF8u);
uint denormalized =
((f32u32 & 0x7FFFFFu) | 0x800000u) >> min(113u - (f32u32 >> 23u), 24u);
uint f24u32 = (f32u32 < 0x38800000u) ? denormalized : (f32u32 + 0xC8000000u);
return ((f24u32 + 3u + ((f24u32 >> 3u) & 1u)) >> 3u) & 0xFFFFFFu;
}
uint4 XeFloat32To20e4(uint4 f32u32) {
// Keep only positive (high bit set means negative for both float and int) and
// saturate to the maximum representable value near 2 (also dropping NaNs).
@ -505,6 +515,21 @@ uint4 XeFloat32To20e4(uint4 f32u32) {
return ((f24u32 + 3u + ((f24u32 >> 3u) & 1u)) >> 3u) & 0xFFFFFFu;
}
uint XeFloat20e4To32(uint f24u32) {
uint mantissa = f24u32 & 0xFFFFFu;
uint exponent = f24u32 >> 20u;
// Normalize the values for the denormalized components.
// Exponent = 1;
// do { Exponent--; Mantissa <<= 1; } while ((Mantissa & 0x100000) == 0);
bool is_denormalized = exponent == 0u;
uint mantissa_lzcnt = 20u - firstbithigh(mantissa);
exponent = is_denormalized ? (1u - mantissa_lzcnt) : exponent;
mantissa =
is_denormalized ? ((mantissa << mantissa_lzcnt) & 0xFFFFFu) : mantissa;
// Combine into 32-bit float bits and clear zeros.
return (f24u32 != 0u) ? (((exponent + 112u) << 23u) | (mantissa << 3u)) : 0u;
}
uint4 XeFloat20e4To32(uint4 f24u32) {
uint4 mantissa = f24u32 & 0xFFFFFu;
uint4 exponent = f24u32 >> 20u;

View File

@ -10,9 +10,9 @@ void main(point XeVertexPreGS xe_in[1],
}
XeVertexPostGS xe_out;
xe_out.interpolators = xe_in[0].post_gs.interpolators;
xe_out.point_params.z = xe_in[0].post_gs.point_params.z;
xe_out.clip_space_zw = xe_in[0].post_gs.clip_space_zw;
xe_out.pre_ps.interpolators = xe_in[0].post_gs.pre_ps.interpolators;
xe_out.pre_ps.point_params.z = xe_in[0].post_gs.pre_ps.point_params.z;
xe_out.pre_ps.clip_space_zw = xe_in[0].post_gs.pre_ps.clip_space_zw;
xe_out.position.zw = xe_in[0].post_gs.position.zw;
xe_out.clip_distance_0123 = xe_in[0].post_gs.clip_distance_0123;
xe_out.clip_distance_45 = xe_in[0].post_gs.clip_distance_45;
@ -20,26 +20,27 @@ void main(point XeVertexPreGS xe_in[1],
// Shader header writes -1.0f to point_size by default, so any positive value
// means that it was overwritten by the translated vertex shader.
float2 point_size =
(xe_in[0].post_gs.point_params.z > 0.0f ? xe_in[0].post_gs.point_params.zz
: xe_point_size);
xe_in[0].post_gs.pre_ps.point_params.z > 0.0f
? xe_in[0].post_gs.pre_ps.point_params.zz
: xe_point_size;
point_size =
clamp(point_size, xe_point_size_min_max.xx, xe_point_size_min_max.yy) *
xe_point_screen_to_ndc * xe_in[0].post_gs.position.w;
xe_out.point_params.xy = float2(0.0, 0.0);
xe_out.pre_ps.point_params.xy = float2(0.0, 0.0);
// TODO(Triang3l): On Vulkan, sign of Y needs to inverted because of
// upper-left origin.
// TODO(Triang3l): Investigate the true signs of point sprites.
xe_out.position.xy =
xe_in[0].post_gs.position.xy + float2(-point_size.x, point_size.y);
xe_stream.Append(xe_out);
xe_out.point_params.xy = float2(0.0, 1.0);
xe_out.pre_ps.point_params.xy = float2(0.0, 1.0);
xe_out.position.xy = xe_in[0].post_gs.position.xy - point_size;
xe_stream.Append(xe_out);
xe_out.point_params.xy = float2(1.0, 0.0);
xe_out.pre_ps.point_params.xy = float2(1.0, 0.0);
xe_out.position.xy = xe_in[0].post_gs.position.xy + point_size;
xe_stream.Append(xe_out);
xe_out.point_params.xy = float2(1.0, 1.0);
xe_out.pre_ps.point_params.xy = float2(1.0, 1.0);
xe_out.position.xy =
xe_in[0].post_gs.position.xy + float2(point_size.x, -point_size.y);
xe_stream.Append(xe_out);

View File

@ -80,16 +80,19 @@ void main(triangle XeVertexPreGS xe_in[3],
v3_signs = float3(1.0f, 1.0f, -1.0f);
}
[unroll] for (int i = 0; i < 16; ++i) {
xe_out.interpolators[i] = v3_signs.x * xe_in[0].post_gs.interpolators[i] +
v3_signs.y * xe_in[1].post_gs.interpolators[i] +
v3_signs.z * xe_in[2].post_gs.interpolators[i];
xe_out.pre_ps.interpolators[i] =
v3_signs.x * xe_in[0].post_gs.pre_ps.interpolators[i] +
v3_signs.y * xe_in[1].post_gs.pre_ps.interpolators[i] +
v3_signs.z * xe_in[2].post_gs.pre_ps.interpolators[i];
}
xe_out.point_params = v3_signs.x * xe_in[0].post_gs.point_params +
v3_signs.y * xe_in[1].post_gs.point_params +
v3_signs.z * xe_in[2].post_gs.point_params;
xe_out.clip_space_zw = v3_signs.x * xe_in[0].post_gs.clip_space_zw +
v3_signs.y * xe_in[1].post_gs.clip_space_zw +
v3_signs.z * xe_in[2].post_gs.clip_space_zw;
xe_out.pre_ps.point_params =
v3_signs.x * xe_in[0].post_gs.pre_ps.point_params +
v3_signs.y * xe_in[1].post_gs.pre_ps.point_params +
v3_signs.z * xe_in[2].post_gs.pre_ps.point_params;
xe_out.pre_ps.clip_space_zw =
v3_signs.x * xe_in[0].post_gs.pre_ps.clip_space_zw +
v3_signs.y * xe_in[1].post_gs.pre_ps.clip_space_zw +
v3_signs.z * xe_in[2].post_gs.pre_ps.clip_space_zw;
xe_out.position = v3_signs.x * xe_in[0].post_gs.position +
v3_signs.y * xe_in[1].post_gs.position +
v3_signs.z * xe_in[2].post_gs.position;

View File

@ -63,10 +63,14 @@ struct XeHSControlPointOutput {
float index : XEVERTEXID;
};
struct XeVertexPostGS {
struct XeVertexPrePS {
float4 interpolators[16] : TEXCOORD0;
float3 point_params : TEXCOORD16;
float2 clip_space_zw : TEXCOORD17;
};
struct XeVertexPostGS {
XeVertexPrePS pre_ps;
// Precise needed to preserve NaN - guest primitives may be converted to more
// than 1 triangle, so need to kill them entirely manually in GS if any vertex
// is NaN.

View File

@ -9,8 +9,6 @@
#include "xenia/gpu/d3d12/texture_cache.h"
#include "third_party/xxhash/xxhash.h"
#include <algorithm>
#include <cfloat>
#include <cstring>
@ -21,6 +19,7 @@
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/profiling.h"
#include "xenia/base/xxhash.h"
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/texture_info.h"
@ -1335,8 +1334,8 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
bool TextureCache::AreActiveTextureSRVKeysUpToDate(
const TextureSRVKey* keys,
const D3D12Shader::TextureBinding* host_shader_bindings,
uint32_t host_shader_binding_count) const {
for (uint32_t i = 0; i < host_shader_binding_count; ++i) {
size_t host_shader_binding_count) const {
for (size_t i = 0; i < host_shader_binding_count; ++i) {
const TextureSRVKey& key = keys[i];
const TextureBinding& binding =
texture_bindings_[host_shader_bindings[i].fetch_constant];
@ -1351,8 +1350,8 @@ bool TextureCache::AreActiveTextureSRVKeysUpToDate(
void TextureCache::WriteActiveTextureSRVKeys(
TextureSRVKey* keys,
const D3D12Shader::TextureBinding* host_shader_bindings,
uint32_t host_shader_binding_count) const {
for (uint32_t i = 0; i < host_shader_binding_count; ++i) {
size_t host_shader_binding_count) const {
for (size_t i = 0; i < host_shader_binding_count; ++i) {
TextureSRVKey& key = keys[i];
const TextureBinding& binding =
texture_bindings_[host_shader_bindings[i].fetch_constant];

View File

@ -196,14 +196,14 @@ class TextureCache {
bool AreActiveTextureSRVKeysUpToDate(
const TextureSRVKey* keys,
const D3D12Shader::TextureBinding* host_shader_bindings,
uint32_t host_shader_binding_count) const;
size_t host_shader_binding_count) const;
// Exports the current binding data to texture SRV keys so they can be stored
// for checking whether subsequent draw calls can keep using the same
// bindings. Write host_shader_binding_count keys.
void WriteActiveTextureSRVKeys(
TextureSRVKey* keys,
const D3D12Shader::TextureBinding* host_shader_bindings,
uint32_t host_shader_binding_count) const;
size_t host_shader_binding_count) const;
// Returns the post-swizzle signedness of a currently bound texture (must be
// called after RequestTextures).
uint8_t GetActiveTextureSwizzledSigns(uint32_t index) const {

View File

@ -111,9 +111,82 @@ int32_t FloatToD3D11Fixed16p8(float f32) {
return result.s;
}
bool IsRasterizationPotentiallyDone(const RegisterFile& regs,
bool primitive_polygonal) {
// TODO(Triang3l): Investigate ModeControl::kIgnore better, with respect to
// sample counting. Let's assume sample counting is a part of depth / stencil,
// thus disabled too.
xenos::ModeControl edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
if (edram_mode != xenos::ModeControl::kColorDepth &&
edram_mode != xenos::ModeControl::kDepth) {
return false;
}
if (regs.Get<reg::SQ_PROGRAM_CNTL>().vs_export_mode ==
xenos::VertexShaderExportMode::kMultipass ||
!regs.Get<reg::RB_SURFACE_INFO>().surface_pitch) {
return false;
}
if (primitive_polygonal) {
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
if (pa_su_sc_mode_cntl.cull_front && pa_su_sc_mode_cntl.cull_back) {
// Both faces are culled.
return false;
}
}
return true;
}
bool IsPixelShaderNeededWithRasterization(const Shader& shader,
const RegisterFile& regs) {
assert_true(shader.type() == xenos::ShaderType::kPixel);
assert_true(shader.is_ucode_analyzed());
// See xenos::ModeControl for explanation why the pixel shader is only used
// when it's kColorDepth here.
if (regs.Get<reg::RB_MODECONTROL>().edram_mode !=
xenos::ModeControl::kColorDepth) {
return false;
}
// Discarding (explicitly or through alphatest or alpha to coverage) has side
// effects on pixel counting.
//
// Depth output only really matters if depth test is active, but it's used
// extremely rarely, and pretty much always intentionally - for simplicity,
// consider it as always mattering.
//
// Memory export is an obvious intentional side effect.
if (shader.kills_pixels() || shader.writes_depth() ||
!shader.memexport_stream_constants().empty() ||
(shader.writes_color_target(0) &&
DoesCoverageDependOnAlpha(regs.Get<reg::RB_COLORCONTROL>()))) {
return true;
}
// Check if a color target is actually written.
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
uint32_t rts_remaining = shader.writes_color_targets();
uint32_t rt_index;
while (xe::bit_scan_forward(rts_remaining, &rt_index)) {
rts_remaining &= ~(uint32_t(1) << rt_index);
uint32_t format_component_count = GetColorRenderTargetFormatComponentCount(
regs.Get<reg::RB_COLOR_INFO>(
reg::RB_COLOR_INFO::rt_register_indices[rt_index])
.color_format);
if ((rb_color_mask >> (rt_index * 4)) &
((uint32_t(1) << format_component_count) - 1)) {
return true;
}
}
// Only depth / stencil passthrough potentially.
return false;
}
void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x,
float pixel_size_y, bool origin_bottom_left,
float x_max, float y_max, bool allow_reverse_z,
bool convert_z_to_float24,
ViewportInfo& viewport_info_out) {
assert_true(pixel_size_x >= 1.0f);
assert_true(pixel_size_y >= 1.0f);
@ -227,6 +300,7 @@ void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x,
ndc_offset_y = 0.0f;
}
} else {
viewport_top = 0.0f;
viewport_height = std::min(
float(xenos::kTexture2DCubeMaxWidthHeight) * pixel_size_y, y_max);
ndc_scale_y = (2.0f * pixel_size_y) / viewport_height;
@ -269,6 +343,18 @@ void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x,
ndc_scale_z = -ndc_scale_z;
ndc_offset_z = 1.0f - ndc_offset_z;
}
if (convert_z_to_float24 &&
GetDepthControlForCurrentEdramMode(regs).z_enable &&
regs.Get<reg::RB_DEPTH_INFO>().depth_format ==
xenos::DepthRenderTargetFormat::kD24FS8) {
// Need to adjust the bounds that the resulting depth values will be clamped
// to after the pixel shader. Preferring adding some error to interpolated Z
// instead if conversion can't be done exactly, without modifying clipping
// bounds by adjusting Z in vertex shaders, as that may cause polygons
// placed explicitly at Z = 0 or Z = W to be clipped.
viewport_z_min = xenos::Float20e4To32(xenos::Float32To20e4(viewport_z_min));
viewport_z_max = xenos::Float20e4To32(xenos::Float32To20e4(viewport_z_max));
}
viewport_info_out.left = viewport_left;
viewport_info_out.top = viewport_top;
@ -304,6 +390,20 @@ void GetScissor(const RegisterFile& regs, Scissor& scissor_out) {
br_y = uint32_t(std::max(
int32_t(br_y) + pa_sc_window_offset.window_y_offset, int32_t(0)));
}
// Clamp the horizontal scissor to surface_pitch for safety, in case that's
// not done by the guest for some reason (it's not when doing draws completely
// without a viewport, for instance), to prevent overflow - this is important
// for host implementations, both based on target-indepedent rasterization
// without render target width at all (pixel shader interlocks-based custom RB
// implementations) and using conventional render targets, but padded to EDRAM
// tiles.
uint32_t surface_pitch = regs.Get<reg::RB_SURFACE_INFO>().surface_pitch;
tl_x = std::min(tl_x, surface_pitch);
br_x = std::min(br_x, surface_pitch);
// Ensure the rectangle is non-negative, by collapsing it into a 0-sized one
// (not by reordering the bounds preserving the width / height, which would
// reveal samples not meant to be covered, unless TL > BR does that on a real
// console, but no evidence of such has ever been seen).
br_x = std::max(br_x, tl_x);
br_y = std::max(br_y, tl_y);
scissor_out.left = tl_x;

View File

@ -16,6 +16,7 @@
#include "xenia/base/assert.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/registers.h"
#include "xenia/gpu/shader.h"
#include "xenia/gpu/trace_writer.h"
#include "xenia/gpu/xenos.h"
#include "xenia/memory.h"
@ -33,6 +34,48 @@ namespace draw_util {
// for use with the top-left rasterization rule later.
int32_t FloatToD3D11Fixed16p8(float f32);
// Whether with the current state, any samples to rasterize (for any reason, not
// only to write something to a render target, but also to do sample counting or
// pixel shader memexport) can be generated. Finally dropping draw calls can
// only be done if the vertex shader doesn't memexport. Checks mostly special
// cases (for both the guest and usual host implementations), not everything
// like whether viewport / scissor are empty (until this truly matters in any
// game, of course).
bool IsRasterizationPotentiallyDone(const RegisterFile& regs,
bool primitive_polygonal);
inline reg::RB_DEPTHCONTROL GetDepthControlForCurrentEdramMode(
const RegisterFile& regs) {
xenos::ModeControl edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
if (edram_mode != xenos::ModeControl::kColorDepth &&
edram_mode != xenos::ModeControl::kDepth) {
// Both depth and stencil disabled (EDRAM depth and stencil ignored).
reg::RB_DEPTHCONTROL disabled;
disabled.value = 0;
return disabled;
}
return regs.Get<reg::RB_DEPTHCONTROL>();
}
inline bool DoesCoverageDependOnAlpha(reg::RB_COLORCONTROL rb_colorcontrol) {
return (rb_colorcontrol.alpha_test_enable &&
rb_colorcontrol.alpha_func != xenos::CompareFunction::kAlways) ||
rb_colorcontrol.alpha_to_mask_enable;
}
// Whether the pixel shader can be disabled on the host to speed up depth
// pre-passes and shadowmaps. The shader must have its ucode analyzed. If
// IsRasterizationPotentiallyDone, this shouldn't be called, and assumed false
// instead. Helps reject the pixel shader in some cases - memexport draws in
// Halo 3, and also most of some 1-point draws not covering anything done for
// some reason in different games with a leftover pixel shader from the previous
// draw, but with SQ_PROGRAM_CNTL destroyed, reducing the number of
// unpredictable unneeded translations of random shaders with different host
// modification bits, such as register count and depth format-related (though
// shaders with side effects on depth or memory export will still be preserved).
bool IsPixelShaderNeededWithRasterization(const Shader& shader,
const RegisterFile& regs);
struct ViewportInfo {
// The returned viewport will always be in the positive quarter-plane for
// simplicity of clamping to the maximum size supported by the host, negative
@ -53,6 +96,7 @@ struct ViewportInfo {
void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x,
float pixel_size_y, bool origin_bottom_left,
float x_max, float y_max, bool allow_reverse_z,
bool convert_z_to_float24,
ViewportInfo& viewport_info_out);
struct Scissor {

1611
src/xenia/gpu/dxbc.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,27 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/dxbc_shader.h"
#include <cstring>
namespace xe {
namespace gpu {
DxbcShader::DxbcShader(xenos::ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count)
: Shader(shader_type, data_hash, dword_ptr, dword_count) {}
Shader::Translation* DxbcShader::CreateTranslationInstance(
uint64_t modification) {
return new DxbcTranslation(*this, modification);
}
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,91 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_DXBC_SHADER_H_
#define XENIA_GPU_DXBC_SHADER_H_
#include <atomic>
#include <vector>
#include "xenia/gpu/dxbc_shader_translator.h"
#include "xenia/gpu/shader.h"
#include "xenia/gpu/xenos.h"
namespace xe {
namespace gpu {
class DxbcShader : public Shader {
public:
class DxbcTranslation : public Translation {
public:
DxbcTranslation(DxbcShader& shader, uint64_t modification)
: Translation(shader, modification) {}
};
DxbcShader(xenos::ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count);
// Resource bindings are gathered after the successful translation of any
// modification for simplicity of translation (and they don't depend on
// modification bits).
static constexpr uint32_t kMaxTextureBindingIndexBits =
DxbcShaderTranslator::kMaxTextureBindingIndexBits;
static constexpr uint32_t kMaxTextureBindings =
DxbcShaderTranslator::kMaxTextureBindings;
struct TextureBinding {
uint32_t bindless_descriptor_index;
uint32_t fetch_constant;
// Stacked and 3D are separate TextureBindings, even for bindless for null
// descriptor handling simplicity.
xenos::FetchOpDimension dimension;
bool is_signed;
};
// Safe to hash and compare with memcmp for layout hashing.
const std::vector<TextureBinding>& GetTextureBindingsAfterTranslation()
const {
return texture_bindings_;
}
const uint32_t GetUsedTextureMaskAfterTranslation() const {
return used_texture_mask_;
}
static constexpr uint32_t kMaxSamplerBindingIndexBits =
DxbcShaderTranslator::kMaxSamplerBindingIndexBits;
static constexpr uint32_t kMaxSamplerBindings =
DxbcShaderTranslator::kMaxSamplerBindings;
struct SamplerBinding {
uint32_t bindless_descriptor_index;
uint32_t fetch_constant;
xenos::TextureFilter mag_filter;
xenos::TextureFilter min_filter;
xenos::TextureFilter mip_filter;
xenos::AnisoFilter aniso_filter;
};
const std::vector<SamplerBinding>& GetSamplerBindingsAfterTranslation()
const {
return sampler_bindings_;
}
protected:
Translation* CreateTranslationInstance(uint64_t modification) override;
private:
friend class DxbcShaderTranslator;
std::atomic_flag bindings_setup_entered_ = ATOMIC_FLAG_INIT;
std::vector<TextureBinding> texture_bindings_;
std::vector<SamplerBinding> sampler_bindings_;
uint32_t used_texture_mask_ = 0;
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_DXBC_SHADER_H_

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -15,7 +15,7 @@ using namespace ucode;
void DxbcShaderTranslator::ExportToMemory_PackFixed32(
const uint32_t* eM_temps, uint32_t eM_count, const uint32_t bits[4],
const DxbcSrc& is_integer, const DxbcSrc& is_signed) {
const dxbc::Src& is_integer, const dxbc::Src& is_signed) {
// Will insert with BFI - sign extension of red will be overwritten, not
// truncated.
assert_not_zero(bits[0]);
@ -26,64 +26,64 @@ void DxbcShaderTranslator::ExportToMemory_PackFixed32(
mask |= 1 << i;
}
}
DxbcOpIf(true, is_signed);
a_.OpIf(true, is_signed);
{
float range[4];
for (uint32_t i = 0; i < 4; ++i) {
range[i] = bits[i] ? float((uint32_t(1) << (bits[i] - 1)) - 1) : 0.0f;
}
DxbcSrc range_src(DxbcSrc::LP(range));
DxbcOpIf(false, is_integer);
dxbc::Src range_src(dxbc::Src::LP(range));
a_.OpIf(false, is_integer);
for (uint32_t i = 0; i < eM_count; ++i) {
uint32_t eM_temp = eM_temps[i];
DxbcOpMul(DxbcDest::R(eM_temp, mask), DxbcSrc::R(eM_temp), range_src);
a_.OpMul(dxbc::Dest::R(eM_temp, mask), dxbc::Src::R(eM_temp), range_src);
}
DxbcOpEndIf();
a_.OpEndIf();
for (uint32_t i = 0; i < eM_count; ++i) {
DxbcDest eM_dest(DxbcDest::R(eM_temps[i], mask));
DxbcSrc eM_src(DxbcSrc::R(eM_temps[i]));
dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[i], mask));
dxbc::Src eM_src(dxbc::Src::R(eM_temps[i]));
// TODO(Triang3l): NaN should become zero, not -range.
DxbcOpMax(eM_dest, eM_src, -range_src);
DxbcOpMin(eM_dest, eM_src, range_src);
a_.OpMax(eM_dest, eM_src, -range_src);
a_.OpMin(eM_dest, eM_src, range_src);
}
}
DxbcOpElse();
a_.OpElse();
{
float range[4];
for (uint32_t i = 0; i < 4; ++i) {
range[i] = float((uint32_t(1) << bits[i]) - 1);
}
DxbcSrc range_src(DxbcSrc::LP(range));
DxbcOpIf(false, is_integer);
dxbc::Src range_src(dxbc::Src::LP(range));
a_.OpIf(false, is_integer);
for (uint32_t i = 0; i < eM_count; ++i) {
uint32_t eM_temp = eM_temps[i];
DxbcOpMul(DxbcDest::R(eM_temp, mask), DxbcSrc::R(eM_temp), range_src);
a_.OpMul(dxbc::Dest::R(eM_temp, mask), dxbc::Src::R(eM_temp), range_src);
}
DxbcOpEndIf();
a_.OpEndIf();
for (uint32_t i = 0; i < eM_count; ++i) {
DxbcDest eM_dest(DxbcDest::R(eM_temps[i], mask));
DxbcSrc eM_src(DxbcSrc::R(eM_temps[i]));
DxbcOpMax(eM_dest, eM_src, DxbcSrc::LF(0.0f));
DxbcOpMin(eM_dest, eM_src, range_src);
dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[i], mask));
dxbc::Src eM_src(dxbc::Src::R(eM_temps[i]));
a_.OpMax(eM_dest, eM_src, dxbc::Src::LF(0.0f));
a_.OpMin(eM_dest, eM_src, range_src);
}
}
DxbcOpEndIf();
a_.OpEndIf();
for (uint32_t i = 0; i < eM_count; ++i) {
uint32_t eM_temp = eM_temps[i];
// Round to the nearest integer, according to the rules of handling integer
// formats in Direct3D.
// TODO(Triang3l): Round by adding +-0.5, not with round_ne.
DxbcOpRoundNE(DxbcDest::R(eM_temp, mask), DxbcSrc::R(eM_temp));
DxbcOpFToI(DxbcDest::R(eM_temp, mask), DxbcSrc::R(eM_temp));
DxbcDest eM_packed_dest(DxbcDest::R(eM_temp, 0b0001));
DxbcSrc eM_packed_src(DxbcSrc::R(eM_temp, DxbcSrc::kXXXX));
a_.OpRoundNE(dxbc::Dest::R(eM_temp, mask), dxbc::Src::R(eM_temp));
a_.OpFToI(dxbc::Dest::R(eM_temp, mask), dxbc::Src::R(eM_temp));
dxbc::Dest eM_packed_dest(dxbc::Dest::R(eM_temp, 0b0001));
dxbc::Src eM_packed_src(dxbc::Src::R(eM_temp, dxbc::Src::kXXXX));
uint32_t offset = bits[0];
for (uint32_t j = 1; j < 4; ++j) {
if (!bits[j]) {
continue;
}
DxbcOpBFI(eM_packed_dest, DxbcSrc::LU(bits[j]), DxbcSrc::LU(offset),
DxbcSrc::R(eM_temp).Select(j), eM_packed_src);
a_.OpBFI(eM_packed_dest, dxbc::Src::LU(bits[j]), dxbc::Src::LU(offset),
dxbc::Src::R(eM_temp).Select(j), eM_packed_src);
offset += bits[j];
}
}
@ -100,43 +100,43 @@ void DxbcShaderTranslator::ExportToMemory() {
// Safety check if the shared memory is bound as UAV.
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
DxbcOpAnd(DxbcDest::R(control_temp, 0b0001),
DxbcSrc::CB(cbuffer_index_system_constants_,
a_.OpAnd(dxbc::Dest::R(control_temp, 0b0001),
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_Flags_Vec)
.Select(kSysConst_Flags_Comp),
DxbcSrc::LU(kSysFlag_SharedMemoryIsUAV));
if (IsDxbcPixelShader()) {
dxbc::Src::LU(kSysFlag_SharedMemoryIsUAV));
if (is_pixel_shader()) {
// Disable memexport in pixel shaders with supersampling since VPOS is
// ambiguous.
if (edram_rov_used_) {
system_constants_used_ |= 1ull
<< kSysConst_EdramResolutionSquareScale_Index;
DxbcOpULT(DxbcDest::R(control_temp, 0b0010),
DxbcSrc::CB(cbuffer_index_system_constants_,
a_.OpULT(dxbc::Dest::R(control_temp, 0b0010),
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_EdramResolutionSquareScale_Vec)
.Select(kSysConst_EdramResolutionSquareScale_Comp),
DxbcSrc::LU(2));
DxbcOpAnd(DxbcDest::R(control_temp, 0b0001),
DxbcSrc::R(control_temp, DxbcSrc::kXXXX),
DxbcSrc::R(control_temp, DxbcSrc::kYYYY));
dxbc::Src::LU(2));
a_.OpAnd(dxbc::Dest::R(control_temp, 0b0001),
dxbc::Src::R(control_temp, dxbc::Src::kXXXX),
dxbc::Src::R(control_temp, dxbc::Src::kYYYY));
} else {
// Enough to check just Y because it's scaled for both 2x and 4x.
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
DxbcOpMovC(DxbcDest::R(control_temp, 0b0001),
DxbcSrc::CB(cbuffer_index_system_constants_,
a_.OpMovC(dxbc::Dest::R(control_temp, 0b0001),
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_SampleCountLog2_Vec)
.Select(kSysConst_SampleCountLog2_Comp + 1),
DxbcSrc::LU(0), DxbcSrc::R(control_temp, DxbcSrc::kXXXX));
dxbc::Src::LU(0), dxbc::Src::R(control_temp, dxbc::Src::kXXXX));
}
}
// Check if memexport can be done.
DxbcOpIf(true, DxbcSrc::R(control_temp, DxbcSrc::kXXXX));
a_.OpIf(true, dxbc::Src::R(control_temp, dxbc::Src::kXXXX));
// control_temp.x is now free.
for (uint32_t i = 0; i < kMaxMemExports; ++i) {
for (uint32_t i = 0; i < Shader::kMaxMemExports; ++i) {
uint32_t eA_temp = system_temps_memexport_address_[i];
if (eA_temp == UINT32_MAX) {
// Export not used.
@ -160,21 +160,21 @@ void DxbcShaderTranslator::ExportToMemory() {
}
// Swap red and blue if needed.
DxbcOpAnd(DxbcDest::R(control_temp, 0b0001),
DxbcSrc::R(eA_temp, DxbcSrc::kZZZZ),
DxbcSrc::LU(uint32_t(1) << 19));
a_.OpAnd(dxbc::Dest::R(control_temp, 0b0001),
dxbc::Src::R(eA_temp, dxbc::Src::kZZZZ),
dxbc::Src::LU(uint32_t(1) << 19));
for (uint32_t j = 0; j < eM_count; ++j) {
uint32_t eM_temp = eM_temps[j];
DxbcOpMovC(DxbcDest::R(eM_temp, 0b0101),
DxbcSrc::R(control_temp, DxbcSrc::kXXXX),
DxbcSrc::R(eM_temp, 0b000010), DxbcSrc::R(eM_temp));
a_.OpMovC(dxbc::Dest::R(eM_temp, 0b0101),
dxbc::Src::R(control_temp, dxbc::Src::kXXXX),
dxbc::Src::R(eM_temp, 0b000010), dxbc::Src::R(eM_temp));
}
// Initialize element size in control_temp.x to 4 bytes as this is the most
// common size.
DxbcDest element_size_dest(DxbcDest::R(control_temp, 0b0001));
DxbcSrc element_size_src(DxbcSrc::R(control_temp, DxbcSrc::kXXXX));
DxbcOpMov(element_size_dest, DxbcSrc::LU(4));
dxbc::Dest element_size_dest(dxbc::Dest::R(control_temp, 0b0001));
dxbc::Src element_size_src(dxbc::Src::R(control_temp, dxbc::Src::kXXXX));
a_.OpMov(element_size_dest, dxbc::Src::LU(4));
// Each eM should get a packed value in the destination format now.
@ -182,285 +182,288 @@ void DxbcShaderTranslator::ExportToMemory() {
// Y - signedness if fixed-point.
// Z - fractional/integer if fixed-point.
// W - color format.
DxbcOpUBFE(DxbcDest::R(control_temp, 0b1110), DxbcSrc::LU(0, 1, 1, 6),
DxbcSrc::LU(0, 16, 17, 8), DxbcSrc::R(eA_temp, DxbcSrc::kZZZZ));
DxbcSrc is_signed(DxbcSrc::R(control_temp, DxbcSrc::kYYYY));
DxbcSrc is_integer(DxbcSrc::R(control_temp, DxbcSrc::kZZZZ));
a_.OpUBFE(dxbc::Dest::R(control_temp, 0b1110), dxbc::Src::LU(0, 1, 1, 6),
dxbc::Src::LU(0, 16, 17, 8),
dxbc::Src::R(eA_temp, dxbc::Src::kZZZZ));
dxbc::Src is_signed(dxbc::Src::R(control_temp, dxbc::Src::kYYYY));
dxbc::Src is_integer(dxbc::Src::R(control_temp, dxbc::Src::kZZZZ));
// Convert and pack the format.
DxbcOpSwitch(DxbcSrc::R(control_temp, DxbcSrc::kWWWW));
a_.OpSwitch(dxbc::Src::R(control_temp, dxbc::Src::kWWWW));
// control_temp.w is now free.
{
// k_8_8_8_8
// k_8_8_8_8_AS_16_16_16_16
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_8_8_8_8)));
DxbcOpCase(
DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_8_8_8_8_AS_16_16_16_16)));
a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_8_8_8_8)));
a_.OpCase(dxbc::Src::LU(
uint32_t(xenos::ColorFormat::k_8_8_8_8_AS_16_16_16_16)));
{
uint32_t bits[4] = {8, 8, 8, 8};
ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer,
is_signed);
}
DxbcOpBreak();
a_.OpBreak();
// k_2_10_10_10
// k_2_10_10_10_AS_16_16_16_16
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_2_10_10_10)));
DxbcOpCase(DxbcSrc::LU(
a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_2_10_10_10)));
a_.OpCase(dxbc::Src::LU(
uint32_t(xenos::ColorFormat::k_2_10_10_10_AS_16_16_16_16)));
{
uint32_t bits[4] = {10, 10, 10, 2};
ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer,
is_signed);
}
DxbcOpBreak();
a_.OpBreak();
// k_10_11_11
// k_10_11_11_AS_16_16_16_16
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_10_11_11)));
DxbcOpCase(
DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_10_11_11_AS_16_16_16_16)));
a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_10_11_11)));
a_.OpCase(dxbc::Src::LU(
uint32_t(xenos::ColorFormat::k_10_11_11_AS_16_16_16_16)));
{
uint32_t bits[4] = {11, 11, 10};
ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer,
is_signed);
}
DxbcOpBreak();
a_.OpBreak();
// k_11_11_10
// k_11_11_10_AS_16_16_16_16
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_11_11_10)));
DxbcOpCase(
DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_11_11_10_AS_16_16_16_16)));
a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_11_11_10)));
a_.OpCase(dxbc::Src::LU(
uint32_t(xenos::ColorFormat::k_11_11_10_AS_16_16_16_16)));
{
uint32_t bits[4] = {10, 11, 11};
ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer,
is_signed);
}
DxbcOpBreak();
a_.OpBreak();
// k_16_16
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_16_16)));
a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_16_16)));
{
uint32_t bits[4] = {16, 16};
ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer,
is_signed);
}
DxbcOpBreak();
a_.OpBreak();
// k_16_16_16_16
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_16_16_16_16)));
DxbcOpMov(element_size_dest, DxbcSrc::LU(8));
DxbcOpIf(true, is_signed);
a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_16_16_16_16)));
a_.OpMov(element_size_dest, dxbc::Src::LU(8));
a_.OpIf(true, is_signed);
{
DxbcOpIf(false, is_integer);
a_.OpIf(false, is_integer);
for (uint32_t j = 0; j < eM_count; ++j) {
uint32_t eM_temp = eM_temps[j];
DxbcOpMul(DxbcDest::R(eM_temp), DxbcSrc::R(eM_temp),
DxbcSrc::LF(32767.0f));
a_.OpMul(dxbc::Dest::R(eM_temp), dxbc::Src::R(eM_temp),
dxbc::Src::LF(32767.0f));
}
DxbcOpEndIf();
a_.OpEndIf();
for (uint32_t j = 0; j < eM_count; ++j) {
DxbcDest eM_dest(DxbcDest::R(eM_temps[j]));
DxbcSrc eM_src(DxbcSrc::R(eM_temps[j]));
dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[j]));
dxbc::Src eM_src(dxbc::Src::R(eM_temps[j]));
// TODO(Triang3l): NaN should become zero, not -range.
DxbcOpMax(eM_dest, eM_src, DxbcSrc::LF(-32767.0f));
DxbcOpMin(eM_dest, eM_src, DxbcSrc::LF(32767.0f));
a_.OpMax(eM_dest, eM_src, dxbc::Src::LF(-32767.0f));
a_.OpMin(eM_dest, eM_src, dxbc::Src::LF(32767.0f));
}
}
DxbcOpElse();
a_.OpElse();
{
DxbcOpIf(false, is_integer);
a_.OpIf(false, is_integer);
for (uint32_t j = 0; j < eM_count; ++j) {
uint32_t eM_temp = eM_temps[j];
DxbcOpMul(DxbcDest::R(eM_temp), DxbcSrc::R(eM_temp),
DxbcSrc::LF(65535.0f));
a_.OpMul(dxbc::Dest::R(eM_temp), dxbc::Src::R(eM_temp),
dxbc::Src::LF(65535.0f));
}
DxbcOpEndIf();
a_.OpEndIf();
for (uint32_t j = 0; j < eM_count; ++j) {
DxbcDest eM_dest(DxbcDest::R(eM_temps[j]));
DxbcSrc eM_src(DxbcSrc::R(eM_temps[j]));
DxbcOpMax(eM_dest, eM_src, DxbcSrc::LF(0.0f));
DxbcOpMin(eM_dest, eM_src, DxbcSrc::LF(65535.0f));
dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[j]));
dxbc::Src eM_src(dxbc::Src::R(eM_temps[j]));
a_.OpMax(eM_dest, eM_src, dxbc::Src::LF(0.0f));
a_.OpMin(eM_dest, eM_src, dxbc::Src::LF(65535.0f));
}
}
DxbcOpEndIf();
a_.OpEndIf();
for (uint32_t j = 0; j < eM_count; ++j) {
uint32_t eM_temp = eM_temps[j];
// Round to the nearest integer, according to the rules of handling
// integer formats in Direct3D.
// TODO(Triang3l): Round by adding +-0.5, not with round_ne.
DxbcOpRoundNE(DxbcDest::R(eM_temp), DxbcSrc::R(eM_temp));
DxbcOpFToI(DxbcDest::R(eM_temp), DxbcSrc::R(eM_temp));
DxbcOpBFI(DxbcDest::R(eM_temp, 0b0011), DxbcSrc::LU(16),
DxbcSrc::LU(16), DxbcSrc::R(eM_temp, 0b1101),
DxbcSrc::R(eM_temp, 0b1000));
a_.OpRoundNE(dxbc::Dest::R(eM_temp), dxbc::Src::R(eM_temp));
a_.OpFToI(dxbc::Dest::R(eM_temp), dxbc::Src::R(eM_temp));
a_.OpBFI(dxbc::Dest::R(eM_temp, 0b0011), dxbc::Src::LU(16),
dxbc::Src::LU(16), dxbc::Src::R(eM_temp, 0b1101),
dxbc::Src::R(eM_temp, 0b1000));
}
DxbcOpBreak();
a_.OpBreak();
// k_16_16_FLOAT
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_16_16_FLOAT)));
a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_16_16_FLOAT)));
for (uint32_t j = 0; j < eM_count; ++j) {
uint32_t eM_temp = eM_temps[j];
DxbcOpF32ToF16(DxbcDest::R(eM_temp, 0b0011), DxbcSrc::R(eM_temp));
DxbcOpBFI(DxbcDest::R(eM_temp, 0b0001), DxbcSrc::LU(16),
DxbcSrc::LU(16), DxbcSrc::R(eM_temp, DxbcSrc::kYYYY),
DxbcSrc::R(eM_temp, DxbcSrc::kXXXX));
a_.OpF32ToF16(dxbc::Dest::R(eM_temp, 0b0011), dxbc::Src::R(eM_temp));
a_.OpBFI(dxbc::Dest::R(eM_temp, 0b0001), dxbc::Src::LU(16),
dxbc::Src::LU(16), dxbc::Src::R(eM_temp, dxbc::Src::kYYYY),
dxbc::Src::R(eM_temp, dxbc::Src::kXXXX));
}
DxbcOpBreak();
a_.OpBreak();
// k_16_16_16_16_FLOAT
DxbcOpCase(
DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_16_16_16_16_FLOAT)));
DxbcOpMov(element_size_dest, DxbcSrc::LU(8));
a_.OpCase(
dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_16_16_16_16_FLOAT)));
a_.OpMov(element_size_dest, dxbc::Src::LU(8));
for (uint32_t j = 0; j < eM_count; ++j) {
uint32_t eM_temp = eM_temps[j];
DxbcOpF32ToF16(DxbcDest::R(eM_temp), DxbcSrc::R(eM_temp));
DxbcOpBFI(DxbcDest::R(eM_temp, 0b0011), DxbcSrc::LU(16),
DxbcSrc::LU(16), DxbcSrc::R(eM_temp, 0b1101),
DxbcSrc::R(eM_temp, 0b1000));
a_.OpF32ToF16(dxbc::Dest::R(eM_temp), dxbc::Src::R(eM_temp));
a_.OpBFI(dxbc::Dest::R(eM_temp, 0b0011), dxbc::Src::LU(16),
dxbc::Src::LU(16), dxbc::Src::R(eM_temp, 0b1101),
dxbc::Src::R(eM_temp, 0b1000));
}
DxbcOpBreak();
a_.OpBreak();
// k_32_FLOAT
// Already in the destination format, 4 bytes per element already
// selected.
// k_32_32_FLOAT
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_32_32_FLOAT)));
DxbcOpMov(element_size_dest, DxbcSrc::LU(8));
a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_32_32_FLOAT)));
a_.OpMov(element_size_dest, dxbc::Src::LU(8));
// Already in the destination format.
DxbcOpBreak();
a_.OpBreak();
// k_32_32_32_32_FLOAT
DxbcOpCase(
DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_32_32_32_32_FLOAT)));
DxbcOpMov(element_size_dest, DxbcSrc::LU(16));
a_.OpCase(
dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_32_32_32_32_FLOAT)));
a_.OpMov(element_size_dest, dxbc::Src::LU(16));
// Already in the destination format.
DxbcOpBreak();
a_.OpBreak();
}
DxbcOpEndSwitch();
a_.OpEndSwitch();
// control_temp.yz are now free.
// Do endian swap.
{
DxbcDest endian_dest(DxbcDest::R(control_temp, 0b0010));
DxbcSrc endian_src(DxbcSrc::R(control_temp, DxbcSrc::kYYYY));
dxbc::Dest endian_dest(dxbc::Dest::R(control_temp, 0b0010));
dxbc::Src endian_src(dxbc::Src::R(control_temp, dxbc::Src::kYYYY));
// Extract endianness into control_temp.y.
DxbcOpAnd(endian_dest, DxbcSrc::R(eA_temp, DxbcSrc::kZZZZ),
DxbcSrc::LU(0b111));
a_.OpAnd(endian_dest, dxbc::Src::R(eA_temp, dxbc::Src::kZZZZ),
dxbc::Src::LU(0b111));
// Change 8-in-64 and 8-in-128 to 8-in-32.
for (uint32_t j = 0; j < 2; ++j) {
DxbcOpIEq(DxbcDest::R(control_temp, 0b0100), endian_src,
DxbcSrc::LU(uint32_t(j ? xenos::Endian128::k8in128
a_.OpIEq(dxbc::Dest::R(control_temp, 0b0100), endian_src,
dxbc::Src::LU(uint32_t(j ? xenos::Endian128::k8in128
: xenos::Endian128::k8in64)));
for (uint32_t k = 0; k < eM_count; ++k) {
uint32_t eM_temp = eM_temps[k];
DxbcOpMovC(DxbcDest::R(eM_temp),
DxbcSrc::R(control_temp, DxbcSrc::kZZZZ),
DxbcSrc::R(eM_temp, j ? 0b00011011 : 0b10110001),
DxbcSrc::R(eM_temp));
a_.OpMovC(dxbc::Dest::R(eM_temp),
dxbc::Src::R(control_temp, dxbc::Src::kZZZZ),
dxbc::Src::R(eM_temp, j ? 0b00011011 : 0b10110001),
dxbc::Src::R(eM_temp));
}
DxbcOpMovC(endian_dest, DxbcSrc::R(control_temp, DxbcSrc::kZZZZ),
DxbcSrc::LU(uint32_t(xenos::Endian128::k8in32)), endian_src);
a_.OpMovC(endian_dest, dxbc::Src::R(control_temp, dxbc::Src::kZZZZ),
dxbc::Src::LU(uint32_t(xenos::Endian128::k8in32)),
endian_src);
}
uint32_t swap_temp = PushSystemTemp();
DxbcDest swap_temp_dest(DxbcDest::R(swap_temp));
DxbcSrc swap_temp_src(DxbcSrc::R(swap_temp));
dxbc::Dest swap_temp_dest(dxbc::Dest::R(swap_temp));
dxbc::Src swap_temp_src(dxbc::Src::R(swap_temp));
// 8-in-16 or one half of 8-in-32.
DxbcOpSwitch(endian_src);
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k8in16)));
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k8in32)));
a_.OpSwitch(endian_src);
a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k8in16)));
a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k8in32)));
for (uint32_t j = 0; j < eM_count; ++j) {
DxbcDest eM_dest(DxbcDest::R(eM_temps[j]));
DxbcSrc eM_src(DxbcSrc::R(eM_temps[j]));
dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[j]));
dxbc::Src eM_src(dxbc::Src::R(eM_temps[j]));
// Temp = X0Z0.
DxbcOpAnd(swap_temp_dest, eM_src, DxbcSrc::LU(0x00FF00FF));
a_.OpAnd(swap_temp_dest, eM_src, dxbc::Src::LU(0x00FF00FF));
// eM = YZW0.
DxbcOpUShR(eM_dest, eM_src, DxbcSrc::LU(8));
a_.OpUShR(eM_dest, eM_src, dxbc::Src::LU(8));
// eM = Y0W0.
DxbcOpAnd(eM_dest, eM_src, DxbcSrc::LU(0x00FF00FF));
a_.OpAnd(eM_dest, eM_src, dxbc::Src::LU(0x00FF00FF));
// eM = YXWZ.
DxbcOpUMAd(eM_dest, swap_temp_src, DxbcSrc::LU(256), eM_src);
a_.OpUMAd(eM_dest, swap_temp_src, dxbc::Src::LU(256), eM_src);
}
DxbcOpBreak();
DxbcOpEndSwitch();
a_.OpBreak();
a_.OpEndSwitch();
// 16-in-32 or another half of 8-in-32.
DxbcOpSwitch(endian_src);
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k8in32)));
DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k16in32)));
a_.OpSwitch(endian_src);
a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k8in32)));
a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k16in32)));
for (uint32_t j = 0; j < eM_count; ++j) {
DxbcDest eM_dest(DxbcDest::R(eM_temps[j]));
DxbcSrc eM_src(DxbcSrc::R(eM_temps[j]));
dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[j]));
dxbc::Src eM_src(dxbc::Src::R(eM_temps[j]));
// Temp = ZW00.
DxbcOpUShR(swap_temp_dest, eM_src, DxbcSrc::LU(16));
a_.OpUShR(swap_temp_dest, eM_src, dxbc::Src::LU(16));
// eM = ZWXY.
DxbcOpBFI(eM_dest, DxbcSrc::LU(16), DxbcSrc::LU(16), eM_src,
a_.OpBFI(eM_dest, dxbc::Src::LU(16), dxbc::Src::LU(16), eM_src,
swap_temp_src);
}
DxbcOpBreak();
DxbcOpEndSwitch();
a_.OpBreak();
a_.OpEndSwitch();
// Release swap_temp.
PopSystemTemp();
}
// control_temp.yz are now free.
DxbcDest address_dest(DxbcDest::R(eA_temp, 0b0001));
DxbcSrc address_src(DxbcSrc::R(eA_temp, DxbcSrc::kXXXX));
dxbc::Dest address_dest(dxbc::Dest::R(eA_temp, 0b0001));
dxbc::Src address_src(dxbc::Src::R(eA_temp, dxbc::Src::kXXXX));
// Multiply the base address by dword size, also dropping the 0x40000000
// bit.
DxbcOpIShL(address_dest, address_src, DxbcSrc::LU(2));
a_.OpIShL(address_dest, address_src, dxbc::Src::LU(2));
// Drop the exponent in the element index.
DxbcOpAnd(DxbcDest::R(eA_temp, 0b0010), DxbcSrc::R(eA_temp, DxbcSrc::kYYYY),
DxbcSrc::LU((1 << 23) - 1));
a_.OpAnd(dxbc::Dest::R(eA_temp, 0b0010),
dxbc::Src::R(eA_temp, dxbc::Src::kYYYY),
dxbc::Src::LU((1 << 23) - 1));
// Add the offset of the first written element to the base address.
DxbcOpUMAd(address_dest, DxbcSrc::R(eA_temp, DxbcSrc::kYYYY),
a_.OpUMAd(address_dest, dxbc::Src::R(eA_temp, dxbc::Src::kYYYY),
element_size_src, address_src);
// Do the writes.
DxbcSrc eM_written_src(
DxbcSrc::R(system_temp_memexport_written_).Select(i >> 2));
dxbc::Src eM_written_src(
dxbc::Src::R(system_temp_memexport_written_).Select(i >> 2));
uint32_t eM_written_base = 1u << ((i & 3) << 3);
for (uint32_t j = 0; j < eM_count; ++j) {
// Go to the next eM#.
uint32_t eM_relative_offset = eM_offsets[j] - (j ? eM_offsets[j - 1] : 0);
if (eM_relative_offset) {
if (eM_relative_offset == 1) {
DxbcOpIAdd(address_dest, element_size_src, address_src);
a_.OpIAdd(address_dest, element_size_src, address_src);
} else {
DxbcOpUMAd(address_dest, DxbcSrc::LU(eM_relative_offset),
a_.OpUMAd(address_dest, dxbc::Src::LU(eM_relative_offset),
element_size_src, address_src);
}
}
// Check if the eM# was actually written to on the execution path.
DxbcOpAnd(DxbcDest::R(control_temp, 0b0010), eM_written_src,
DxbcSrc::LU(eM_written_base << eM_offsets[j]));
DxbcOpIf(true, DxbcSrc::R(control_temp, DxbcSrc::kYYYY));
a_.OpAnd(dxbc::Dest::R(control_temp, 0b0010), eM_written_src,
dxbc::Src::LU(eM_written_base << eM_offsets[j]));
a_.OpIf(true, dxbc::Src::R(control_temp, dxbc::Src::kYYYY));
// Write the element of the needed size.
DxbcSrc eM_src(DxbcSrc::R(eM_temps[j]));
DxbcOpSwitch(element_size_src);
dxbc::Src eM_src(dxbc::Src::R(eM_temps[j]));
a_.OpSwitch(element_size_src);
for (uint32_t k = 1; k <= 4; k <<= 1) {
DxbcOpCase(DxbcSrc::LU(k * 4));
a_.OpCase(dxbc::Src::LU(k * 4));
if (uav_index_shared_memory_ == kBindingIndexUnallocated) {
uav_index_shared_memory_ = uav_count_++;
}
DxbcOpStoreRaw(
DxbcDest::U(uav_index_shared_memory_,
a_.OpStoreRaw(
dxbc::Dest::U(uav_index_shared_memory_,
uint32_t(UAVRegister::kSharedMemory), (1 << k) - 1),
address_src, eM_src);
DxbcOpBreak();
a_.OpBreak();
}
DxbcOpEndSwitch();
DxbcOpEndIf();
a_.OpEndSwitch();
a_.OpEndIf();
}
// control_temp.y is now free.
}
// Close the memexport possibility check.
DxbcOpEndIf();
a_.OpEndIf();
// Release control_temp.
PopSystemTemp();

File diff suppressed because it is too large Load Diff

View File

@ -40,9 +40,71 @@ DEFINE_bool(
"be fully covered when MSAA is used with fullscreen passes.",
"GPU");
DEFINE_bool(
ssaa_scale_gradients, true,
"When using SSAA instead of native MSAA, adjust texture coordinate "
"derivatives used for mipmap selection, and getGradients results, to guest "
"pixels as if true MSAA rather than SSAA was used.\n"
"Reduces bandwidth usage of texture fetching.",
"GPU");
DEFINE_string(
depth_float24_conversion, "",
"Method for converting 32-bit Z values to 20e4 floating point when using "
"host depth buffers without native 20e4 support (when not using rasterizer-"
"ordered views / fragment shader interlocks to perform depth testing "
"manually).\n"
"Use: [any, on_copy, truncate, round]\n"
" on_copy:\n"
" Do depth testing at host precision, converting when copying between "
"host depth buffers and the EDRAM buffer to support reinterpretation, "
"maintaining two copies, in both host and 20e4 formats, for reloading data "
"to host depth buffers when it wasn't overwritten.\n"
" + Highest performance, allows early depth test and writing.\n"
" + Host MSAA is possible with pixel-rate shading where supported.\n"
" - EDRAM > RAM > EDRAM depth buffer round trip done in certain games "
"(such as GTA IV) destroys precision irreparably, causing artifacts if "
"another rendering pass is done after the EDRAM reupload.\n"
" truncate:\n"
" Convert to 20e4 directly in pixel shaders, always rounding down.\n"
" + Good performance, conservative early depth test is possible.\n"
" + No precision loss when anything changes in the storage of the depth "
"buffer, EDRAM > RAM > EDRAM copying preserves precision.\n"
" - Rounding mode is incorrect, sometimes giving results smaller than "
"they should be - may cause inaccuracy especially in edge cases when the "
"game wants to write an exact value.\n"
" - Host MSAA is only possible at SSAA speed, with per-sample shading.\n"
" round:\n"
" Convert to 20e4 directly in pixel shaders, correctly rounding to the "
"nearest even.\n"
" + Highest accuracy.\n"
" - Significantly limited performance, early depth test is not possible.\n"
" - Host MSAA is only possible at SSAA speed, with per-sample shading.\n"
" Any other value:\n"
" Choose what is considered the most optimal (currently \"on_copy\").",
"GPU");
DEFINE_int32(query_occlusion_fake_sample_count, 1000,
"If set to -1 no sample counts are written, games may hang. Else, "
"the sample count of every tile will be incremented on every "
"EVENT_WRITE_ZPD by this number. Setting this to 0 means "
"everything is reported as occluded.",
"GPU");
namespace xe {
namespace gpu {
namespace flags {
DepthFloat24Conversion GetDepthFloat24Conversion() {
if (cvars::depth_float24_conversion == "truncate") {
return DepthFloat24Conversion::kOnOutputTruncating;
}
if (cvars::depth_float24_conversion == "round") {
return DepthFloat24Conversion::kOnOutputRounding;
}
return DepthFloat24Conversion::kOnCopy;
}
} // namespace flags
} // namespace gpu
} // namespace xe

View File

@ -22,6 +22,71 @@ DECLARE_bool(gpu_allow_invalid_fetch_constants);
DECLARE_bool(half_pixel_offset);
DECLARE_bool(ssaa_scale_gradients);
DECLARE_string(depth_float24_conversion);
DECLARE_int32(query_occlusion_fake_sample_count);
namespace xe {
namespace gpu {
namespace flags {
enum class DepthFloat24Conversion {
// Doing depth test at the host precision, converting to 20e4 to support
// reinterpretation, but keeping a separate EDRAM view containing depth values
// in the host format. When copying from the EDRAM buffer to host depth
// buffers, writing the stored host pixel if stored_f24 == to_f24(stored_host)
// (otherwise it was overwritten by something else, like clearing, or a color
// buffer; this is inexact though, and will incorrectly load pixels that were
// overwritten by something else in the EDRAM, but turned out to have the same
// value on the guest as before - an outdated host-precision value will be
// loaded in these cases instead).
//
// EDRAM > RAM, then reusing the EDRAM region for something else > EDRAM round
// trip destroys precision beyond repair.
//
// Full host early Z and MSAA with pixel-rate shading are supported.
kOnCopy,
// Converting the depth to the closest host value representable exactly as a
// 20e4 float in pixel shaders, to support invariance in cases when the guest
// reuploads a previously resolved depth buffer to the EDRAM, rounding towards
// zero (which contradicts the rounding used by the Direct3D 9 reference
// rasterizer, but allows less-than-or-equal pixel shader depth output to be
// used to preserve most of early Z culling when the game is using reversed
// depth, which is the usual way of doing depth testing on the Xbox 360 and of
// utilizing the advantages of a floating-point encoding).
//
// With MSAA, pixel shaders must run at sample frequency - otherwise, if the
// depth is the same for the entire pixel, intersections of polygons cannot be
// antialiased.
//
// Important usage note: When using this mode, bounds of the fixed-function
// viewport must be converted to and back from float24 too (preferably using
// correct rounding to the nearest even, to reduce the error already caused by
// truncation rather than to amplify it). This ensures that clamping to the
// viewport bounds, which happens after the pixel shader even if it overwrites
// the resulting depth, is never done to a value not representable as float24
// (for example, if the minimum Z is a number too small to be represented as
// float24, but not zero, it won't be possible to write what should become
// 0x000000 to the depth buffer). Note that this may add some error to the
// depth values from the rasterizer; however, modifying Z in the vertex shader
// to make interpolated depth values would cause clipping to be done to
// different bounds, which may be more undesirable, especially in cases when Z
// is explicitly set to a value like 0 or W (in such cases, the adjusted
// polygon may go outside 0...W in clip space and disappear).
kOnOutputTruncating,
// Similar to kOnOutputTruncating, but rounding to the nearest even, more
// correctly, however, because the resulting depth can be bigger than the
// original host value, early depth testing can't be used at all. Same
// viewport usage rules apply.
kOnOutputRounding,
};
DepthFloat24Conversion GetDepthFloat24Conversion();
} // namespace flags
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_GPU_FLAGS_H_

View File

@ -276,8 +276,7 @@ void GraphicsSystem::ClearCaches() {
}
void GraphicsSystem::InitializeShaderStorage(
const std::filesystem::path& storage_root, uint32_t title_id,
bool blocking) {
const std::filesystem::path& cache_root, uint32_t title_id, bool blocking) {
if (!cvars::store_shaders) {
return;
}
@ -285,21 +284,18 @@ void GraphicsSystem::InitializeShaderStorage(
if (command_processor_->is_paused()) {
// Safe to run on any thread while the command processor is paused, no
// race condition.
command_processor_->InitializeShaderStorage(storage_root, title_id, true);
command_processor_->InitializeShaderStorage(cache_root, title_id, true);
} else {
xe::threading::Fence fence;
command_processor_->CallInThread(
[this, storage_root, title_id, &fence]() {
command_processor_->InitializeShaderStorage(storage_root, title_id,
true);
command_processor_->CallInThread([this, cache_root, title_id, &fence]() {
command_processor_->InitializeShaderStorage(cache_root, title_id, true);
fence.Signal();
});
fence.Wait();
}
} else {
command_processor_->CallInThread([this, storage_root, title_id]() {
command_processor_->InitializeShaderStorage(storage_root, title_id,
false);
command_processor_->CallInThread([this, cache_root, title_id]() {
command_processor_->InitializeShaderStorage(cache_root, title_id, false);
});
}
}

View File

@ -63,7 +63,7 @@ class GraphicsSystem {
virtual void ClearCaches();
void InitializeShaderStorage(const std::filesystem::path& storage_root,
void InitializeShaderStorage(const std::filesystem::path& cache_root,
uint32_t title_id, bool blocking);
void RequestFrameTrace();

View File

@ -97,6 +97,7 @@ union SQ_PROGRAM_CNTL {
// Note from a2xx.xml:
// Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG,
// but high bit is set to indicate "0 registers used".
// (Register count = (num_reg & 0x80) ? 0 : (num_reg + 1))
uint32_t vs_num_reg : 8; // +0
uint32_t ps_num_reg : 8; // +8
uint32_t vs_resource : 1; // +16
@ -260,9 +261,9 @@ union PA_SU_SC_MODE_CNTL {
uint32_t multi_prim_ib_ena : 1; // +21
uint32_t : 1; // +22
uint32_t quad_order_enable : 1; // +23
uint32_t sc_one_quad_per_clock : 1; // +24
// WAIT_RB_IDLE_ALL_TRI and WAIT_RB_IDLE_FIRST_TRI_NEW_STATE were added on
// Adreno.
// TODO(Triang3l): Find SC_ONE_QUAD_PER_CLOCK offset.
};
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL;
@ -298,7 +299,7 @@ union PA_SC_VIZ_QUERY {
// discard geometry after test (but use for testing)
uint32_t kill_pix_post_hi_z : 1; // +7
// not used with d3d
uint32_t kill_pix_detail_mask : 1; // +8
uint32_t kill_pix_post_detail_mask : 1; // +8
};
uint32_t value;
static constexpr Register register_index = XE_GPU_REG_PA_SC_VIZ_QUERY;

View File

@ -12,7 +12,7 @@
#include <cstring>
#include <memory>
#include "third_party/xxhash/xxhash.h"
#include "xenia/base/xxhash.h"
namespace xe {
namespace gpu {
@ -51,7 +51,7 @@ bool SamplerInfo::Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
}
uint64_t SamplerInfo::hash() const {
return XXH64(this, sizeof(SamplerInfo), 0);
return XXH3_64bits(this, sizeof(SamplerInfo));
}
} // namespace gpu

View File

@ -11,6 +11,7 @@
#include <cinttypes>
#include <cstring>
#include <utility>
#include "third_party/fmt/include/fmt/format.h"
#include "xenia/base/filesystem.h"
@ -31,9 +32,13 @@ Shader::Shader(xenos::ShaderType shader_type, uint64_t ucode_data_hash,
xe::copy_and_swap(ucode_data_.data(), ucode_dwords, ucode_dword_count);
}
Shader::~Shader() = default;
Shader::~Shader() {
for (auto it : translations_) {
delete it.second;
}
}
std::string Shader::GetTranslatedBinaryString() const {
std::string Shader::Translation::GetTranslatedBinaryString() const {
std::string result;
result.resize(translated_binary_.size());
std::memcpy(const_cast<char*>(result.data()), translated_binary_.data(),
@ -41,56 +46,118 @@ std::string Shader::GetTranslatedBinaryString() const {
return result;
}
std::pair<std::filesystem::path, std::filesystem::path> Shader::Dump(
const std::filesystem::path& base_path, const char* path_prefix) {
std::pair<std::filesystem::path, std::filesystem::path>
Shader::Translation::Dump(const std::filesystem::path& base_path,
const char* path_prefix) const {
if (!is_valid()) {
return std::make_pair(std::filesystem::path(), std::filesystem::path());
}
std::filesystem::path path = base_path;
// Ensure target path exists.
auto target_path = base_path;
std::filesystem::path target_path = base_path;
if (!target_path.empty()) {
target_path = std::filesystem::absolute(target_path);
std::filesystem::create_directories(target_path);
}
auto base_name =
fmt::format("shader_{}_{:016X}", path_prefix, ucode_data_hash_);
const char* type_extension =
shader().type() == xenos::ShaderType::kVertex ? "vert" : "frag";
std::string txt_name, bin_name;
if (shader_type_ == xenos::ShaderType::kVertex) {
txt_name = base_name + ".vert";
bin_name = base_name + ".bin.vert";
} else {
txt_name = base_name + ".frag";
bin_name = base_name + ".bin.frag";
std::filesystem::path binary_path =
target_path / fmt::format("shader_{:016X}_{:016X}.{}.bin.{}",
shader().ucode_data_hash(), modification(),
path_prefix, type_extension);
FILE* binary_file = filesystem::OpenFile(binary_path, "wb");
if (binary_file) {
fwrite(translated_binary_.data(), sizeof(*translated_binary_.data()),
translated_binary_.size(), binary_file);
fclose(binary_file);
}
std::filesystem::path txt_path, bin_path;
txt_path = base_path / txt_name;
bin_path = base_path / bin_name;
FILE* f = filesystem::OpenFile(txt_path, "wb");
if (f) {
fwrite(translated_binary_.data(), 1, translated_binary_.size(), f);
fprintf(f, "\n\n");
auto ucode_disasm_ptr = ucode_disassembly().c_str();
while (*ucode_disasm_ptr) {
auto line_end = std::strchr(ucode_disasm_ptr, '\n');
fprintf(f, "// ");
fwrite(ucode_disasm_ptr, 1, line_end - ucode_disasm_ptr + 1, f);
ucode_disasm_ptr = line_end + 1;
}
fprintf(f, "\n\n");
std::filesystem::path disasm_path;
if (!host_disassembly_.empty()) {
fprintf(f, "\n\n/*\n%s\n*/\n", host_disassembly_.c_str());
disasm_path =
target_path / fmt::format("shader_{:016X}_{:016X}.{}.{}",
shader().ucode_data_hash(), modification(),
path_prefix, type_extension);
FILE* disasm_file = filesystem::OpenFile(disasm_path, "w");
if (disasm_file) {
fwrite(host_disassembly_.data(), sizeof(*host_disassembly_.data()),
host_disassembly_.size(), disasm_file);
fclose(disasm_file);
}
fclose(f);
}
f = filesystem::OpenFile(bin_path, "wb");
if (f) {
fwrite(ucode_data_.data(), 4, ucode_data_.size(), f);
fclose(f);
return std::make_pair(std::move(binary_path), std::move(disasm_path));
}
return {std::move(txt_path), std::move(bin_path)};
Shader::Translation* Shader::GetOrCreateTranslation(uint64_t modification,
bool* is_new) {
auto it = translations_.find(modification);
if (it != translations_.end()) {
if (is_new) {
*is_new = false;
}
return it->second;
}
Translation* translation = CreateTranslationInstance(modification);
translations_.emplace(modification, translation);
if (is_new) {
*is_new = true;
}
return translation;
}
void Shader::DestroyTranslation(uint64_t modification) {
auto it = translations_.find(modification);
if (it == translations_.end()) {
return;
}
delete it->second;
translations_.erase(it);
}
std::pair<std::filesystem::path, std::filesystem::path> Shader::DumpUcode(
const std::filesystem::path& base_path) const {
// Ensure target path exists.
std::filesystem::path target_path = base_path;
if (!target_path.empty()) {
target_path = std::filesystem::absolute(target_path);
std::filesystem::create_directories(target_path);
}
const char* type_extension =
type() == xenos::ShaderType::kVertex ? "vert" : "frag";
std::filesystem::path binary_path =
target_path / fmt::format("shader_{:016X}.ucode.bin.{}",
ucode_data_hash(), type_extension);
FILE* binary_file = filesystem::OpenFile(binary_path, "wb");
if (binary_file) {
fwrite(ucode_data().data(), sizeof(*ucode_data().data()),
ucode_data().size(), binary_file);
fclose(binary_file);
}
std::filesystem::path disasm_path;
if (is_ucode_analyzed()) {
disasm_path = target_path / fmt::format("shader_{:016X}.ucode.{}",
ucode_data_hash(), type_extension);
FILE* disasm_file = filesystem::OpenFile(disasm_path, "w");
if (disasm_file) {
fwrite(ucode_disassembly().data(), sizeof(*ucode_disassembly().data()),
ucode_disassembly().size(), disasm_file);
fclose(disasm_file);
}
}
return std::make_pair(std::move(binary_path), std::move(disasm_path));
}
Shader::Translation* Shader::CreateTranslationInstance(uint64_t modification) {
// Default implementation for simple cases like ucode disassembly.
return new Translation(*this, modification);
}
} // namespace gpu

View File

@ -11,8 +11,12 @@
#define XENIA_GPU_SHADER_H_
#include <algorithm>
#include <cstdint>
#include <filesystem>
#include <set>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "xenia/base/math.h"
@ -589,8 +593,45 @@ struct ParsedAluInstruction {
void Disassemble(StringBuffer* out) const;
};
void ParseControlFlowExec(const ucode::ControlFlowExecInstruction& cf,
uint32_t cf_index, ParsedExecInstruction& instr);
void ParseControlFlowCondExec(const ucode::ControlFlowCondExecInstruction& cf,
uint32_t cf_index, ParsedExecInstruction& instr);
void ParseControlFlowCondExecPred(
const ucode::ControlFlowCondExecPredInstruction& cf, uint32_t cf_index,
ParsedExecInstruction& instr);
void ParseControlFlowLoopStart(const ucode::ControlFlowLoopStartInstruction& cf,
uint32_t cf_index,
ParsedLoopStartInstruction& instr);
void ParseControlFlowLoopEnd(const ucode::ControlFlowLoopEndInstruction& cf,
uint32_t cf_index,
ParsedLoopEndInstruction& instr);
void ParseControlFlowCondCall(const ucode::ControlFlowCondCallInstruction& cf,
uint32_t cf_index, ParsedCallInstruction& instr);
void ParseControlFlowReturn(const ucode::ControlFlowReturnInstruction& cf,
uint32_t cf_index, ParsedReturnInstruction& instr);
void ParseControlFlowCondJmp(const ucode::ControlFlowCondJmpInstruction& cf,
uint32_t cf_index, ParsedJumpInstruction& instr);
void ParseControlFlowAlloc(const ucode::ControlFlowAllocInstruction& cf,
uint32_t cf_index, bool is_vertex_shader,
ParsedAllocInstruction& instr);
// Returns whether the fetch is a full one, and the next parsed mini vertex
// fetch should inherit most of its parameters.
bool ParseVertexFetchInstruction(
const ucode::VertexFetchInstruction& op,
const ucode::VertexFetchInstruction& previous_full_op,
ParsedVertexFetchInstruction& instr);
void ParseTextureFetchInstruction(const ucode::TextureFetchInstruction& op,
ParsedTextureFetchInstruction& instr);
void ParseAluInstruction(const ucode::AluInstruction& op,
xenos::ShaderType shader_type,
ParsedAluInstruction& instr);
class Shader {
public:
// Type of the vertex shader in a D3D11-like rendering pipeline - shader
// interface depends on in, so it must be known at translation time.
// If values are changed, INVALIDATE SHADER STORAGES (increase their version
// constexpr) where those are stored! And check bit count where this is
// packed. This is : uint32_t for simplicity of packing in bit fields.
@ -603,6 +644,8 @@ class Shader {
kQuadDomainCPIndexed,
kQuadDomainPatchIndexed,
};
// For packing HostVertexShaderType in bit fields.
static constexpr uint32_t kHostVertexShaderTypeBitCount = 3;
struct Error {
bool is_fatal = false;
@ -611,12 +654,8 @@ class Shader {
struct VertexBinding {
struct Attribute {
// Attribute index, 0-based in the entire shader.
int attrib_index;
// Fetch instruction with all parameters.
ParsedVertexFetchInstruction fetch_instr;
// Size of the attribute, in words.
uint32_t size_words;
};
// Index within the vertex binding listing.
@ -683,60 +722,18 @@ class Shader {
}
};
Shader(xenos::ShaderType shader_type, uint64_t ucode_data_hash,
const uint32_t* ucode_dwords, size_t ucode_dword_count);
virtual ~Shader();
// Based on the number of AS_VS/PS_EXPORT_STREAM_* enum sets found in a game
// .pdb.
static constexpr uint32_t kMaxMemExports = 16;
// Whether the shader is identified as a vertex or pixel shader.
xenos::ShaderType type() const { return shader_type_; }
class Translation {
public:
virtual ~Translation() {}
// If this is a vertex shader, and it has been translated, type of the shader
// in a D3D11-like rendering pipeline - shader interface depends on in, so it
// must be known at translation time.
HostVertexShaderType host_vertex_shader_type() const {
return host_vertex_shader_type_;
}
Shader& shader() const { return shader_; }
// Microcode dwords in host endianness.
const std::vector<uint32_t>& ucode_data() const { return ucode_data_; }
uint64_t ucode_data_hash() const { return ucode_data_hash_; }
const uint32_t* ucode_dwords() const { return ucode_data_.data(); }
size_t ucode_dword_count() const { return ucode_data_.size(); }
// All vertex bindings used in the shader.
// Valid for vertex shaders only.
const std::vector<VertexBinding>& vertex_bindings() const {
return vertex_bindings_;
}
// All texture bindings used in the shader.
// Valid for both vertex and pixel shaders.
const std::vector<TextureBinding>& texture_bindings() const {
return texture_bindings_;
}
// Bitmaps of all constant registers accessed by the shader.
const ConstantRegisterMap& constant_register_map() const {
return constant_register_map_;
}
// All c# registers used as the addend in MAD operations to eA.
const std::vector<uint32_t>& memexport_stream_constants() const {
return memexport_stream_constants_;
}
// Returns true if the given color target index [0-3].
bool writes_color_target(uint32_t i) const {
return writes_color_targets_[i];
}
// True if the shader overrides the pixel depth.
bool writes_depth() const { return writes_depth_; }
// True if Xenia can automatically enable early depth/stencil for the pixel
// shader when RB_DEPTHCONTROL EARLY_Z_ENABLE is not set, provided alpha
// testing and alpha to coverage are disabled.
bool implicit_early_z_allowed() const { return implicit_early_z_allowed_; }
// Translator-specific modification bits.
uint64_t modification() const { return modification_; }
// True if the shader was translated and prepared without error.
bool is_valid() const { return is_valid_; }
@ -747,9 +744,6 @@ class Shader {
// Errors that occurred during translation.
const std::vector<Error>& errors() const { return errors_; }
// Microcode disassembly in D3D format.
const std::string& ucode_disassembly() const { return ucode_disassembly_; }
// Translated shader binary (or text).
const std::vector<uint8_t>& translated_binary() const {
return translated_binary_;
@ -762,44 +756,251 @@ class Shader {
// Disassembly of the translated from the host graphics layer.
// May be empty if the host does not support disassembly.
const std::string& host_disassembly() const { return host_disassembly_; }
// A lot of errors that occurred during preparation of the host shader.
const std::string& host_error_log() const { return host_error_log_; }
// Host binary that can be saved and reused across runs.
// May be empty if the host does not support saving binaries.
const std::vector<uint8_t>& host_binary() const { return host_binary_; }
// Dumps the shader to a file in the given path based on ucode hash.
// Both the ucode binary and disassembled and translated shader will be
// written.
// Returns the filename of the shader and the binary.
// In case disassembly depends on the GPU backend, for setting it
// externally.
void set_host_disassembly(std::string disassembly) {
host_disassembly_ = std::move(disassembly);
}
// For dumping after translation. Dumps the shader's translated code, and,
// if available, translated disassembly, to files in the given directory
// based on ucode hash. Returns {binary path, disassembly path if written}.
std::pair<std::filesystem::path, std::filesystem::path> Dump(
const std::filesystem::path& base_path, const char* path_prefix);
const std::filesystem::path& base_path, const char* path_prefix) const;
protected:
Translation(Shader& shader, uint64_t modification)
: shader_(shader), modification_(modification) {}
private:
friend class Shader;
friend class ShaderTranslator;
xenos::ShaderType shader_type_;
HostVertexShaderType host_vertex_shader_type_ = HostVertexShaderType::kVertex;
std::vector<uint32_t> ucode_data_;
uint64_t ucode_data_hash_;
std::vector<VertexBinding> vertex_bindings_;
std::vector<TextureBinding> texture_bindings_;
ConstantRegisterMap constant_register_map_ = {0};
bool writes_color_targets_[4] = {false, false, false, false};
bool writes_depth_ = false;
bool implicit_early_z_allowed_ = true;
std::vector<uint32_t> memexport_stream_constants_;
Shader& shader_;
uint64_t modification_;
bool is_valid_ = false;
bool is_translated_ = false;
std::vector<Error> errors_;
std::string ucode_disassembly_;
std::vector<uint8_t> translated_binary_;
std::string host_disassembly_;
std::string host_error_log_;
std::vector<uint8_t> host_binary_;
};
Shader(xenos::ShaderType shader_type, uint64_t ucode_data_hash,
const uint32_t* ucode_dwords, size_t ucode_dword_count);
virtual ~Shader();
// Whether the shader is identified as a vertex or pixel shader.
xenos::ShaderType type() const { return shader_type_; }
// Microcode dwords in host endianness.
const std::vector<uint32_t>& ucode_data() const { return ucode_data_; }
uint64_t ucode_data_hash() const { return ucode_data_hash_; }
const uint32_t* ucode_dwords() const { return ucode_data_.data(); }
size_t ucode_dword_count() const { return ucode_data_.size(); }
bool is_ucode_analyzed() const { return is_ucode_analyzed_; }
// ucode_disasm_buffer is temporary storage for disassembly (provided
// externally so it won't need to be reallocated for every shader).
void AnalyzeUcode(StringBuffer& ucode_disasm_buffer);
// The following parameters, until the translation, are valid if ucode
// information has been gathered.
// Microcode disassembly in D3D format.
const std::string& ucode_disassembly() const { return ucode_disassembly_; }
// All vertex bindings used in the shader.
const std::vector<VertexBinding>& vertex_bindings() const {
return vertex_bindings_;
}
// All texture bindings used in the shader.
const std::vector<TextureBinding>& texture_bindings() const {
return texture_bindings_;
}
// Bitmaps of all constant registers accessed by the shader.
const ConstantRegisterMap& constant_register_map() const {
return constant_register_map_;
}
// uint5[Shader::kMaxMemExports] - bits indicating which eM# registers have
// been written to after each `alloc export`, for up to Shader::kMaxMemExports
// exports. This will contain zero for certain corrupt exports - for those to
// which a valid eA was not written via a MAD with a stream constant.
const uint8_t* memexport_eM_written() const { return memexport_eM_written_; }
// All c# registers used as the addend in MAD operations to eA.
const std::set<uint32_t>& memexport_stream_constants() const {
return memexport_stream_constants_;
}
// Labels that jumps (explicit or from loops) can be done to.
const std::set<uint32_t>& label_addresses() const { return label_addresses_; }
// Exclusive upper bound of the indexes of paired control flow instructions
// (each corresponds to 3 dwords).
uint32_t cf_pair_index_bound() const { return cf_pair_index_bound_; }
// Upper bound of temporary registers addressed statically by the shader -
// highest static register address + 1, or 0 if no registers referenced this
// way. SQ_PROGRAM_CNTL is not always reliable - some draws (like single point
// draws with oPos = 0001 that are done by Xbox 360's Direct3D 9 sometimes;
// can be reproduced by launching Arrival in Halo 3 from the campaign lobby)
// that aren't supposed to cover any pixels use an invalid (zero)
// SQ_PROGRAM_CNTL, but with an outdated pixel shader loaded, in this case
// SQ_PROGRAM_CNTL may contain a number smaller than actually needed by the
// pixel shader - SQ_PROGRAM_CNTL should be used to go above this count if
// uses_register_dynamic_addressing is true.
uint32_t register_static_address_bound() const {
return register_static_address_bound_;
}
// Whether the shader addresses temporary registers dynamically, thus
// SQ_PROGRAM_CNTL should determine the number of registers to use, not only
// register_static_address_bound.
bool uses_register_dynamic_addressing() const {
return uses_register_dynamic_addressing_;
}
// For building shader modification bits (and also for normalization of them),
// returns the amount of temporary registers that need to be allocated
// explicitly - if not using register dynamic addressing, the shader
// translator will use register_static_address_bound directly.
uint32_t GetDynamicAddressableRegisterCount(
uint32_t program_cntl_num_reg) const {
if (!uses_register_dynamic_addressing()) {
return 0;
}
return std::max((program_cntl_num_reg & 0x80)
? uint32_t(0)
: (program_cntl_num_reg + uint32_t(1)),
register_static_address_bound());
}
// True if the current shader has any `kill` instructions.
bool kills_pixels() const { return kills_pixels_; }
// True if the shader overrides the pixel depth.
bool writes_depth() const { return writes_depth_; }
// Whether the shader can have early depth and stencil writing enabled, unless
// alpha test or alpha to coverage is enabled.
bool implicit_early_z_write_allowed() const {
// TODO(Triang3l): Investigate what happens to memexport when the pixel
// fails the depth/stencil test, but in Direct3D 11 UAV writes disable early
// depth/stencil.
return !kills_pixels() && !writes_depth() &&
memexport_stream_constants().empty();
}
// Whether each color render target is written to on any execution path.
uint32_t writes_color_targets() const { return writes_color_targets_; }
bool writes_color_target(uint32_t i) const {
return (writes_color_targets() & (uint32_t(1) << i)) != 0;
}
// Host translations with the specified modification bits. Not thread-safe
// with respect to translation creation/destruction.
const std::unordered_map<uint64_t, Translation*>& translations() const {
return translations_;
}
Translation* GetTranslation(uint64_t modification) const {
auto it = translations_.find(modification);
if (it != translations_.cend()) {
return it->second;
}
return nullptr;
}
Translation* GetOrCreateTranslation(uint64_t modification,
bool* is_new = nullptr);
// For shader storage loading, to remove a modification in case of translation
// failure. Not thread-safe.
void DestroyTranslation(uint64_t modification);
// An externally managed identifier of the shader storage the microcode of the
// shader was last written to, or was loaded from, to only write the shader
// microcode to the storage once. UINT32_MAX by default.
uint32_t ucode_storage_index() const { return ucode_storage_index_; }
void set_ucode_storage_index(uint32_t storage_index) {
ucode_storage_index_ = storage_index;
}
// Dumps the shader's microcode binary and, if analyzed, disassembly, to files
// in the given directory based on ucode hash. Returns the name of the written
// file. Can be called at any time, doesn't require the shader to be
// translated. Returns {binary path, disassembly path if written}.
std::pair<std::filesystem::path, std::filesystem::path> DumpUcode(
const std::filesystem::path& base_path) const;
protected:
friend class ShaderTranslator;
virtual Translation* CreateTranslationInstance(uint64_t modification);
xenos::ShaderType shader_type_;
std::vector<uint32_t> ucode_data_;
uint64_t ucode_data_hash_;
// Whether info needed before translating has been gathered already - may be
// needed to determine which modifications are actually needed and make sense
// (for instance, there may be draws not covering anything and not allocating
// any pixel shader registers in SQ_PROGRAM_CNTL, but still using the pixel
// shader from the previous draw - in this case, every shader that happens to
// be before such draw will need to be translated again with a different
// dynamically addressed register count, which may cause compilation of
// different random pipelines across many random frames, thus causing
// stuttering - normally host pipeline states are deterministically only
// compiled when a new material appears in the game, and having the order of
// draws also matter in such unpredictable way would break this rule; limit
// the effect to shaders with dynamic register addressing only, which are
// extremely rare; however care should be taken regarding depth format-related
// translation modifications in this case), also some info needed for drawing
// is collected during the ucode analysis.
bool is_ucode_analyzed_ = false;
std::string ucode_disassembly_;
std::vector<VertexBinding> vertex_bindings_;
std::vector<TextureBinding> texture_bindings_;
ConstantRegisterMap constant_register_map_ = {0};
uint8_t memexport_eM_written_[kMaxMemExports] = {};
std::set<uint32_t> memexport_stream_constants_;
std::set<uint32_t> label_addresses_;
uint32_t cf_pair_index_bound_ = 0;
uint32_t register_static_address_bound_ = 0;
bool uses_register_dynamic_addressing_ = false;
bool kills_pixels_ = false;
bool writes_depth_ = false;
uint32_t writes_color_targets_ = 0b0000;
// Modification bits -> translation.
std::unordered_map<uint64_t, Translation*> translations_;
uint32_t ucode_storage_index_ = UINT32_MAX;
private:
void GatherExecInformation(
const ParsedExecInstruction& instr,
ucode::VertexFetchInstruction& previous_vfetch_full,
uint32_t& unique_texture_bindings, uint32_t memexport_alloc_current_count,
uint32_t& memexport_eA_written, StringBuffer& ucode_disasm_buffer);
void GatherVertexFetchInformation(
const ucode::VertexFetchInstruction& op,
ucode::VertexFetchInstruction& previous_vfetch_full,
StringBuffer& ucode_disasm_buffer);
void GatherTextureFetchInformation(const ucode::TextureFetchInstruction& op,
uint32_t& unique_texture_bindings,
StringBuffer& ucode_disasm_buffer);
void GatherAluInstructionInformation(const ucode::AluInstruction& op,
uint32_t memexport_alloc_current_count,
uint32_t& memexport_eA_written,
StringBuffer& ucode_disasm_buffer);
void GatherOperandInformation(const InstructionOperand& operand);
void GatherFetchResultInformation(const InstructionResult& result);
void GatherAluResultInformation(const InstructionResult& result,
uint32_t memexport_alloc_current_count);
};
} // namespace gpu

View File

@ -17,6 +17,7 @@
#include "xenia/base/main.h"
#include "xenia/base/platform.h"
#include "xenia/base/string.h"
#include "xenia/base/string_buffer.h"
#include "xenia/gpu/dxbc_shader_translator.h"
#include "xenia/gpu/shader_translator.h"
#include "xenia/gpu/spirv_shader_translator.h"
@ -104,6 +105,9 @@ int shader_compiler_main(const std::vector<std::string>& args) {
auto shader = std::make_unique<Shader>(
shader_type, ucode_data_hash, ucode_dwords.data(), ucode_dwords.size());
StringBuffer ucode_disasm_buffer;
shader->AnalyzeUcode(ucode_disasm_buffer);
std::unique_ptr<ShaderTranslator> translator;
if (cvars::shader_output_type == "spirv" ||
cvars::shader_output_type == "spirvtext") {
@ -114,7 +118,15 @@ int shader_compiler_main(const std::vector<std::string>& args) {
0, cvars::shader_output_bindless_resources,
cvars::shader_output_dxbc_rov);
} else {
translator = std::make_unique<UcodeShaderTranslator>();
// Just output microcode disassembly generated during microcode information
// gathering.
if (!cvars::shader_output.empty()) {
auto output_file = filesystem::OpenFile(cvars::shader_output, "wb");
fwrite(shader->ucode_disassembly().c_str(), 1,
shader->ucode_disassembly().length(), output_file);
fclose(output_file);
}
return 0;
}
Shader::HostVertexShaderType host_vertex_shader_type =
@ -140,11 +152,15 @@ int shader_compiler_main(const std::vector<std::string>& args) {
Shader::HostVertexShaderType::kQuadDomainPatchIndexed;
}
}
uint64_t modification = translator->GetDefaultModification(
shader_type, 64, host_vertex_shader_type);
translator->Translate(shader.get(), host_vertex_shader_type);
Shader::Translation* translation =
shader->GetOrCreateTranslation(modification);
translator->TranslateAnalyzedShader(*translation);
const void* source_data = shader->translated_binary().data();
size_t source_data_size = shader->translated_binary().size();
const void* source_data = translation->translated_binary().data();
size_t source_data_size = translation->translated_binary().size();
std::unique_ptr<xe::ui::spirv::SpirvDisassembler::Result> spirv_disasm_result;
if (cvars::shader_output_type == "spirvtext") {

File diff suppressed because it is too large Load Diff

View File

@ -29,12 +29,16 @@ class ShaderTranslator {
public:
virtual ~ShaderTranslator();
bool Translate(Shader* shader, reg::SQ_PROGRAM_CNTL cntl,
virtual uint64_t GetDefaultModification(
xenos::ShaderType shader_type,
uint32_t dynamic_addressable_register_count,
Shader::HostVertexShaderType host_vertex_shader_type =
Shader::HostVertexShaderType::kVertex);
bool Translate(Shader* shader,
Shader::HostVertexShaderType host_vertex_shader_type =
Shader::HostVertexShaderType::kVertex);
Shader::HostVertexShaderType::kVertex) const {
return 0;
}
// AnalyzeUcode must be done on the shader before translating!
bool TranslateAnalyzedShader(Shader::Translation& translation);
protected:
ShaderTranslator();
@ -42,80 +46,26 @@ class ShaderTranslator {
// Resets translator state before beginning translation.
virtual void Reset();
// Register count.
uint32_t register_count() const { return register_count_; }
// Shader and modification currently being translated.
Shader::Translation& current_translation() const { return *translation_; }
Shader& current_shader() const { return current_translation().shader(); }
// Register count from SQ_PROGRAM_CNTL, stored by the implementation in its
// modification bits.
virtual uint32_t GetModificationRegisterCount() const { return 64; }
// True if the current shader is a vertex shader.
bool is_vertex_shader() const {
return shader_type_ == xenos::ShaderType::kVertex;
}
// If translating a vertex shader, type of the shader in a D3D11-like
// rendering pipeline.
Shader::HostVertexShaderType host_vertex_shader_type() const {
return host_vertex_shader_type_;
return current_shader().type() == xenos::ShaderType::kVertex;
}
// True if the current shader is a pixel shader.
bool is_pixel_shader() const {
return shader_type_ == xenos::ShaderType::kPixel;
}
// Labels that jumps (explicit or from loops) can be done to, gathered before
// translation.
const std::set<uint32_t>& label_addresses() const { return label_addresses_; }
// Used constant register info, populated before translation.
const Shader::ConstantRegisterMap& constant_register_map() const {
return constant_register_map_;
}
// True if the current shader addresses general-purpose registers with dynamic
// indices, set before translation. Doesn't include writes to r[#+a#] with an
// empty used write mask.
bool uses_register_dynamic_addressing() const {
return uses_register_dynamic_addressing_;
}
// True if the current shader writes to a color target on any execution path,
// set before translation. Doesn't include writes with an empty used write
// mask.
bool writes_color_target(int i) const { return writes_color_targets_[i]; }
bool writes_any_color_target() const {
for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) {
if (writes_color_targets_[i]) {
return true;
}
}
return false;
}
// True if the current shader overrides the pixel depth, set before
// translation. Doesn't include writes with an empty used write mask.
bool writes_depth() const { return writes_depth_; }
// True if Xenia can automatically enable early depth/stencil for the pixel
// shader when RB_DEPTHCONTROL EARLY_Z_ENABLE is not set, provided alpha
// testing and alpha to coverage are disabled.
bool implicit_early_z_allowed() const { return implicit_early_z_allowed_; }
// A list of all vertex bindings, populated before translation occurs.
const std::vector<Shader::VertexBinding>& vertex_bindings() const {
return vertex_bindings_;
}
// A list of all texture bindings, populated before translation occurs.
const std::vector<Shader::TextureBinding>& texture_bindings() const {
return texture_bindings_;
return current_shader().type() == xenos::ShaderType::kPixel;
}
// Based on the number of AS_VS/PS_EXPORT_STREAM_* enum sets found in a game
// .pdb.
static constexpr uint32_t kMaxMemExports = 16;
// Bits indicating which eM# registers have been written to after each
// `alloc export`, for up to kMaxMemExports exports. This will contain zero
// for certain corrupt exports - that don't write to eA before writing to eM#,
// or if the write was done any way other than MAD with a stream constant.
const uint8_t* memexport_eM_written() const { return memexport_eM_written_; }
// All c# registers used as the addend in MAD operations to eA, populated
// before translation occurs.
const std::set<uint32_t>& memexport_stream_constants() const {
return memexport_stream_constants_;
}
// Temporary register count, accessible via static and dynamic addressing.
uint32_t register_count() const { return register_count_; }
// Current line number in the ucode disassembly.
size_t ucode_disasm_line_number() const { return ucode_disasm_line_number_; }
// Ucode disassembly buffer accumulated during translation.
StringBuffer& ucode_disasm_buffer() { return ucode_disasm_buffer_; }
// Emits a translation error that will be passed back in the result.
virtual void EmitTranslationError(const char* message, bool is_fatal = true);
@ -130,10 +80,11 @@ class ShaderTranslator {
}
// Handles post-translation tasks when the shader has been fully translated.
virtual void PostTranslation(Shader* shader) {}
virtual void PostTranslation() {}
// Sets the host disassembly on a shader.
void set_host_disassembly(Shader* shader, std::string value) {
shader->host_disassembly_ = std::move(value);
void set_host_disassembly(Shader::Translation& translation,
std::string value) {
translation.host_disassembly_ = std::move(value);
}
// Pre-process a control-flow instruction before anything else.
@ -184,128 +135,23 @@ class ShaderTranslator {
virtual void ProcessAluInstruction(const ParsedAluInstruction& instr) {}
private:
struct AluOpcodeInfo {
const char* name;
uint32_t argument_count;
uint32_t src_swizzle_component_count;
bool disable_implicit_early_z;
};
bool TranslateInternal(Shader* shader,
Shader::HostVertexShaderType host_vertex_shader_type);
void MarkUcodeInstruction(uint32_t dword_offset);
void AppendUcodeDisasm(char c);
void AppendUcodeDisasm(const char* value);
void AppendUcodeDisasmFormat(const char* format, ...);
void GatherInstructionInformation(const ucode::ControlFlowInstruction& cf);
void GatherVertexFetchInformation(const ucode::VertexFetchInstruction& op);
void GatherTextureFetchInformation(const ucode::TextureFetchInstruction& op);
void TranslateControlFlowInstruction(const ucode::ControlFlowInstruction& cf);
void TranslateControlFlowNop(const ucode::ControlFlowInstruction& cf);
void TranslateControlFlowExec(const ucode::ControlFlowExecInstruction& cf);
void TranslateControlFlowCondExec(
const ucode::ControlFlowCondExecInstruction& cf);
void TranslateControlFlowCondExecPred(
const ucode::ControlFlowCondExecPredInstruction& cf);
void TranslateControlFlowLoopStart(
const ucode::ControlFlowLoopStartInstruction& cf);
void TranslateControlFlowLoopEnd(
const ucode::ControlFlowLoopEndInstruction& cf);
void TranslateControlFlowCondCall(
const ucode::ControlFlowCondCallInstruction& cf);
void TranslateControlFlowReturn(
const ucode::ControlFlowReturnInstruction& cf);
void TranslateControlFlowCondJmp(
const ucode::ControlFlowCondJmpInstruction& cf);
void TranslateControlFlowAlloc(const ucode::ControlFlowAllocInstruction& cf);
void TranslateExecInstructions(const ParsedExecInstruction& instr);
void TranslateVertexFetchInstruction(const ucode::VertexFetchInstruction& op);
void ParseVertexFetchInstruction(const ucode::VertexFetchInstruction& op,
ParsedVertexFetchInstruction* out_instr);
void TranslateTextureFetchInstruction(
const ucode::TextureFetchInstruction& op);
void ParseTextureFetchInstruction(const ucode::TextureFetchInstruction& op,
ParsedTextureFetchInstruction* out_instr);
void TranslateAluInstruction(const ucode::AluInstruction& op);
void ParseAluInstruction(const ucode::AluInstruction& op,
ParsedAluInstruction& out_instr) const;
static void ParseAluInstructionOperand(const ucode::AluInstruction& op,
uint32_t i,
uint32_t swizzle_component_count,
InstructionOperand& out_op);
static void ParseAluInstructionOperandSpecial(
const ucode::AluInstruction& op, InstructionStorageSource storage_source,
uint32_t reg, bool negate, int const_slot, uint32_t component_index,
InstructionOperand& out_op);
// Input shader metadata and microcode.
xenos::ShaderType shader_type_;
Shader::HostVertexShaderType host_vertex_shader_type_;
const uint32_t* ucode_dwords_;
size_t ucode_dword_count_;
reg::SQ_PROGRAM_CNTL program_cntl_;
uint32_t register_count_;
// Current shader and modification being translated.
Shader::Translation* translation_ = nullptr;
// Accumulated translation errors.
std::vector<Shader::Error> errors_;
// Temporary register count, accessible via static and dynamic addressing.
uint32_t register_count_ = 0;
// Current control flow dword index.
uint32_t cf_index_ = 0;
// Microcode disassembly buffer, accumulated throughout the translation.
StringBuffer ucode_disasm_buffer_;
// Current line number in the disasm, which can be used for source annotation.
size_t ucode_disasm_line_number_ = 0;
// Last offset used when scanning for line numbers.
size_t previous_ucode_disasm_scan_offset_ = 0;
// Kept for supporting vfetch_mini.
ucode::VertexFetchInstruction previous_vfetch_full_;
// Labels that jumps (explicit or from loops) can be done to, gathered before
// translation.
std::set<uint32_t> label_addresses_;
// Detected binding information gathered before translation.
int total_attrib_count_ = 0;
std::vector<Shader::VertexBinding> vertex_bindings_;
std::vector<Shader::TextureBinding> texture_bindings_;
uint32_t unique_vertex_bindings_ = 0;
uint32_t unique_texture_bindings_ = 0;
// These all are gathered before translation.
// uses_register_dynamic_addressing_ for writes, writes_color_targets_,
// writes_depth_ don't include empty used write masks.
Shader::ConstantRegisterMap constant_register_map_ = {0};
bool uses_register_dynamic_addressing_ = false;
bool writes_color_targets_[4] = {false, false, false, false};
bool writes_depth_ = false;
bool implicit_early_z_allowed_ = true;
// Memexport info is gathered before translation.
uint32_t memexport_alloc_count_ = 0;
// For register allocation in implementations - what was used after each
// `alloc export`.
uint32_t memexport_eA_written_ = 0;
uint8_t memexport_eM_written_[kMaxMemExports] = {0};
std::set<uint32_t> memexport_stream_constants_;
static const AluOpcodeInfo alu_vector_opcode_infos_[0x20];
static const AluOpcodeInfo alu_scalar_opcode_infos_[0x40];
};
class UcodeShaderTranslator : public ShaderTranslator {
public:
UcodeShaderTranslator() = default;
protected:
std::vector<uint8_t> CompleteTranslation() override;
};
} // namespace gpu

View File

@ -203,7 +203,9 @@ void SpirvShaderTranslator::StartTranslation() {
push_consts_ = b.createVariable(spv::StorageClass::StorageClassPushConstant,
push_constants_type, "push_consts");
if (!texture_bindings().empty()) {
const std::vector<Shader::TextureBinding>& texture_bindings =
current_shader().texture_bindings();
if (!texture_bindings.empty()) {
image_2d_type_ =
b.makeImageType(float_type_, spv::Dim::Dim2D, false, false, false, 1,
spv::ImageFormat::ImageFormatUnknown);
@ -220,7 +222,7 @@ void SpirvShaderTranslator::StartTranslation() {
b.makeSampledImageType(image_cube_type_)};
uint32_t num_tex_bindings = 0;
for (const auto& binding : texture_bindings()) {
for (const auto& binding : texture_bindings) {
// Calculate the highest binding index.
num_tex_bindings =
std::max(num_tex_bindings, uint32_t(binding.binding_index + 1));
@ -241,7 +243,7 @@ void SpirvShaderTranslator::StartTranslation() {
}
// Set up the map from binding -> ssbo index
for (const auto& binding : texture_bindings()) {
for (const auto& binding : texture_bindings) {
tex_binding_map_[binding.fetch_constant] =
uint32_t(binding.binding_index);
}
@ -254,7 +256,9 @@ void SpirvShaderTranslator::StartTranslation() {
// Vertex inputs/outputs
// Inputs: 32 SSBOs on DS 2 binding 0
if (!vertex_bindings().empty()) {
const std::vector<Shader::VertexBinding>& vertex_bindings =
current_shader().vertex_bindings();
if (!vertex_bindings.empty()) {
// Runtime array for vertex data
Id vtx_t = b.makeRuntimeArray(uint_type_);
b.addDecoration(vtx_t, spv::Decoration::DecorationArrayStride,
@ -269,7 +273,7 @@ void SpirvShaderTranslator::StartTranslation() {
// Create the vertex bindings variable.
Id vtx_a_t = b.makeArrayType(
vtx_s, b.makeUintConstant(uint32_t(vertex_bindings().size())), 0);
vtx_s, b.makeUintConstant(uint32_t(vertex_bindings.size())), 0);
vtx_ = b.createVariable(spv::StorageClass::StorageClassUniform, vtx_a_t,
"vertex_bindings");
@ -279,7 +283,7 @@ void SpirvShaderTranslator::StartTranslation() {
b.addDecoration(vtx_, spv::Decoration::DecorationNonWritable);
// Set up the map from binding -> ssbo index
for (const auto& binding : vertex_bindings()) {
for (const auto& binding : vertex_bindings) {
vtx_binding_map_[binding.fetch_constant] = binding.binding_index;
}
}
@ -494,7 +498,7 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
b.addExecutionMode(mainFn, spv::ExecutionModeOriginUpperLeft);
// If we write a new depth value, we must declare this mode!
if (writes_depth()) {
if (current_shader().writes_depth()) {
b.addExecutionMode(mainFn, spv::ExecutionModeDepthReplacing);
}
@ -667,12 +671,18 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
return spirv_bytes;
}
void SpirvShaderTranslator::PostTranslation(Shader* shader) {
void SpirvShaderTranslator::PostTranslation() {
Shader::Translation& translation = current_translation();
if (!translation.is_valid()) {
return;
}
// Validation.
if (cvars::spv_validate) {
auto validation = validator_.Validate(
reinterpret_cast<const uint32_t*>(shader->translated_binary().data()),
shader->translated_binary().size() / sizeof(uint32_t));
reinterpret_cast<const uint32_t*>(
translation.translated_binary().data()),
translation.translated_binary().size() / sizeof(uint32_t));
if (validation->has_error()) {
XELOGE("SPIR-V Shader Validation failed! Error: {}",
validation->error_string());
@ -682,12 +692,13 @@ void SpirvShaderTranslator::PostTranslation(Shader* shader) {
if (cvars::spv_disasm) {
// TODO(benvanik): only if needed? could be slowish.
auto disasm = disassembler_.Disassemble(
reinterpret_cast<const uint32_t*>(shader->translated_binary().data()),
shader->translated_binary().size() / 4);
reinterpret_cast<const uint32_t*>(
translation.translated_binary().data()),
translation.translated_binary().size() / sizeof(uint32_t));
if (disasm->has_error()) {
XELOGE("Failed to disassemble SPIRV - invalid?");
} else {
set_host_disassembly(shader, disasm->to_string());
set_host_disassembly(translation, disasm->to_string());
}
}
}

View File

@ -58,10 +58,23 @@ class SpirvShaderTranslator : public ShaderTranslator {
SpirvShaderTranslator();
~SpirvShaderTranslator() override;
// Not storing anything else in modifications (as this shader translator is
// being replaced anyway).
uint64_t GetDefaultModification(
xenos::ShaderType shader_type,
uint32_t dynamic_addressable_register_count,
Shader::HostVertexShaderType host_vertex_shader_type =
Shader::HostVertexShaderType::kVertex) const override {
return dynamic_addressable_register_count;
}
protected:
uint32_t GetModificationRegisterCount() const override {
return uint32_t(current_translation().modification());
}
void StartTranslation() override;
std::vector<uint8_t> CompleteTranslation() override;
void PostTranslation(Shader* shader) override;
void PostTranslation() override;
void PreProcessControlFlowInstructions(
std::vector<ucode::ControlFlowInstruction> instrs) override;

View File

@ -18,8 +18,7 @@
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h"
#include "third_party/xxhash/xxhash.h"
#include "xenia/base/xxhash.h"
namespace xe {
namespace gpu {

View File

@ -16,8 +16,7 @@
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "third_party/xxhash/xxhash.h"
#include "xenia/base/xxhash.h"
namespace xe {
namespace gpu {
@ -319,7 +318,7 @@ bool TextureInfo::GetPackedTileOffset(int packed_tile, uint32_t* offset_x,
}
uint64_t TextureInfo::hash() const {
return XXH64(this, sizeof(TextureInfo), 0);
return XXH3_64bits(this, sizeof(TextureInfo));
}
void TextureInfo::SetupMemoryInfo(uint32_t base_address, uint32_t mip_address) {

View File

@ -92,7 +92,7 @@ int TraceDump::Main(const std::vector<std::string>& args) {
bool TraceDump::Setup() {
// Create the emulator but don't initialize so we can setup the window.
emulator_ = std::make_unique<Emulator>("", "", "");
emulator_ = std::make_unique<Emulator>("", "", "", "");
X_STATUS result = emulator_->Setup(
nullptr, nullptr, [this]() { return CreateGraphicsSystem(); }, nullptr);
if (XFAILED(result)) {

View File

@ -121,7 +121,7 @@ bool TraceViewer::Setup() {
window_->Resize(1920, 1200);
// Create the emulator but don't initialize so we can setup the window.
emulator_ = std::make_unique<Emulator>("", "", "");
emulator_ = std::make_unique<Emulator>("", "", "", "");
X_STATUS result = emulator_->Setup(
window_.get(), nullptr, [this]() { return CreateGraphicsSystem(); },
nullptr);
@ -566,8 +566,21 @@ TraceViewer::ShaderDisplayType TraceViewer::DrawShaderTypeUI() {
void TraceViewer::DrawShaderUI(Shader* shader, ShaderDisplayType display_type) {
// Must be prepared for advanced display modes.
// FIXME(Triang3l): This should display the actual translation used in the
// draw, but it may depend on multiple backend-related factors, including
// drawing multiple times with multiple modifications, even depending on
// values obtained during translation of other modifications (for instance,
// a memexporting shader can be executed both as a vertex shader (to draw the
// points) and as a compute shader (to actually export) if the host doesn't
// support writes from vertex shaders.
const Shader::Translation* translation = nullptr;
if (display_type != ShaderDisplayType::kUcode) {
if (!shader->is_valid()) {
for (const auto& translation_pair : shader->translations()) {
if (translation_pair.second->is_valid()) {
translation = translation_pair.second;
}
}
if (!translation) {
ImGui::TextColored(kColorError,
"ERROR: shader error during parsing/translation");
return;
@ -580,7 +593,7 @@ void TraceViewer::DrawShaderUI(Shader* shader, ShaderDisplayType display_type) {
break;
}
case ShaderDisplayType::kTranslated: {
const auto& str = shader->GetTranslatedBinaryString();
const auto& str = translation->GetTranslatedBinaryString();
size_t i = 0;
bool done = false;
while (!done && i < str.size()) {
@ -600,7 +613,7 @@ void TraceViewer::DrawShaderUI(Shader* shader, ShaderDisplayType display_type) {
break;
}
case ShaderDisplayType::kHostDisasm: {
DrawMultilineString(shader->host_disassembly());
DrawMultilineString(translation->host_disassembly());
break;
}
}

View File

@ -431,15 +431,14 @@ XEPACKEDUNION(ControlFlowInstruction, {
static_assert_size(ControlFlowInstruction, 8);
inline void UnpackControlFlowInstructions(const uint32_t* dwords,
ControlFlowInstruction* out_a,
ControlFlowInstruction* out_b) {
ControlFlowInstruction* out_ab) {
uint32_t dword_0 = dwords[0];
uint32_t dword_1 = dwords[1];
uint32_t dword_2 = dwords[2];
out_a->dword_0 = dword_0;
out_a->dword_1 = dword_1 & 0xFFFF;
out_b->dword_0 = (dword_1 >> 16) | (dword_2 << 16);
out_b->dword_1 = dword_2 >> 16;
out_ab[0].dword_0 = dword_0;
out_ab[0].dword_1 = dword_1 & 0xFFFF;
out_ab[1].dword_0 = (dword_1 >> 16) | (dword_2 << 16);
out_ab[1].dword_1 = dword_2 >> 16;
}
enum class FetchOpcode : uint32_t {
@ -552,6 +551,12 @@ enum class FetchOpcode : uint32_t {
kGetTextureComputedLod = 17,
// Source is 2-component. XZ = ddx(source.xy), YW = ddy(source.xy).
// TODO(Triang3l): Verify whether it's coarse or fine (on Adreno 200, for
// instance). This is using the texture unit, where the LOD is computed for
// the whole quad (according to the Direct3D 11.3 specification), so likely
// coarse; ddx / ddy from the Shader Model 4 era is also compiled by FXC to
// deriv_rtx/rty_coarse when targeting Shader Model 5, and on TeraScale,
// coarse / fine selection only appeared on Direct3D 11 GPUs.
kGetTextureGradients = 18,
// Gets the weights used in a bilinear fetch.
@ -816,10 +821,11 @@ static_assert_size(TextureFetchInstruction, 12);
// move of the third operand in case of zero multiplicands, because the term
// may be -0, while the result should be +0 in this case.
// http://developer.amd.com/wordpress/media/2013/10/R5xx_Acceleration_v1.5.pdf
// Multiply-add also appears to be not fused (the SM3 behavior instruction on
// GCN is called v_mad_legacy_f32, not v_fma_legacy_f32) - shader translators
// should not use instructions that may be interpreted by the host GPU as
// fused multiply-add.
// Multiply-add also appears to be not fused; the SM3 behavior instruction on
// GCN is called v_mad_legacy_f32, not v_fma_legacy_f32 (in 2012-2020, before
// RDNA 2, which removed v_mad_f32 as well) - shader translators should not
// use instructions that may be interpreted by the host GPU as fused
// multiply-add.
enum class AluScalarOpcode : uint32_t {
// Floating-Point Add
@ -1147,6 +1153,19 @@ enum class AluScalarOpcode : uint32_t {
kRetainPrev = 50,
};
constexpr bool AluScalarOpcodeIsKill(AluScalarOpcode scalar_opcode) {
switch (scalar_opcode) {
case AluScalarOpcode::kKillsEq:
case AluScalarOpcode::kKillsGt:
case AluScalarOpcode::kKillsGe:
case AluScalarOpcode::kKillsNe:
case AluScalarOpcode::kKillsOne:
return true;
default:
return false;
}
}
enum class AluVectorOpcode : uint32_t {
// Per-Component Floating-Point Add
// add/ADDv dest, src0, src1
@ -1471,28 +1490,38 @@ enum class AluVectorOpcode : uint32_t {
kMaxA = 29,
};
constexpr bool AluVectorOpcodeIsKill(AluVectorOpcode vector_opcode) {
switch (vector_opcode) {
case AluVectorOpcode::kKillEq:
case AluVectorOpcode::kKillGt:
case AluVectorOpcode::kKillGe:
case AluVectorOpcode::kKillNe:
return true;
default:
return false;
}
}
// Whether the vector instruction has side effects such as discarding a pixel or
// setting the predicate and can't be ignored even if it doesn't write to
// anywhere. Note that all scalar operations except for retain_prev have a side
// effect of modifying the previous scalar result register, so they must always
// be executed even if not writing.
constexpr bool AluVectorOpHasSideEffects(AluVectorOpcode vector_opcode) {
if (AluVectorOpcodeIsKill(vector_opcode)) {
return true;
}
switch (vector_opcode) {
case AluVectorOpcode::kSetpEqPush:
case AluVectorOpcode::kSetpNePush:
case AluVectorOpcode::kSetpGtPush:
case AluVectorOpcode::kSetpGePush:
case AluVectorOpcode::kKillEq:
case AluVectorOpcode::kKillGt:
case AluVectorOpcode::kKillGe:
case AluVectorOpcode::kKillNe:
case AluVectorOpcode::kMaxA:
return true;
default:
break;
}
return false;
}
}
// Whether each component of a source operand is used at all in the instruction
// (doesn't check the operand count though).

View File

@ -552,14 +552,14 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
}
void BufferCache::HashVertexBindings(
XXH64_state_t* hash_state,
XXH3_state_t* hash_state,
const std::vector<Shader::VertexBinding>& vertex_bindings) {
auto& regs = *register_file_;
for (const auto& vertex_binding : vertex_bindings) {
#if 0
XXH64_update(hash_state, &vertex_binding.binding_index, sizeof(vertex_binding.binding_index));
XXH64_update(hash_state, &vertex_binding.fetch_constant, sizeof(vertex_binding.fetch_constant));
XXH64_update(hash_state, &vertex_binding.stride_words, sizeof(vertex_binding.stride_words));
XXH3_64bits_update(hash_state, &vertex_binding.binding_index, sizeof(vertex_binding.binding_index));
XXH3_64bits_update(hash_state, &vertex_binding.fetch_constant, sizeof(vertex_binding.fetch_constant));
XXH3_64bits_update(hash_state, &vertex_binding.stride_words, sizeof(vertex_binding.stride_words));
#endif
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
(vertex_binding.fetch_constant / 3) * 6;
@ -567,15 +567,15 @@ void BufferCache::HashVertexBindings(
switch (vertex_binding.fetch_constant % 3) {
case 0: {
auto& fetch = group->vertex_fetch_0;
XXH64_update(hash_state, &fetch, sizeof(fetch));
XXH3_64bits_update(hash_state, &fetch, sizeof(fetch));
} break;
case 1: {
auto& fetch = group->vertex_fetch_1;
XXH64_update(hash_state, &fetch, sizeof(fetch));
XXH3_64bits_update(hash_state, &fetch, sizeof(fetch));
} break;
case 2: {
auto& fetch = group->vertex_fetch_2;
XXH64_update(hash_state, &fetch, sizeof(fetch));
XXH3_64bits_update(hash_state, &fetch, sizeof(fetch));
} break;
}
}
@ -585,12 +585,12 @@ VkDescriptorSet BufferCache::PrepareVertexSet(
VkCommandBuffer command_buffer, VkFence fence,
const std::vector<Shader::VertexBinding>& vertex_bindings) {
// (quickly) Generate a hash.
XXH64_state_t hash_state;
XXH64_reset(&hash_state, 0);
XXH3_state_t hash_state;
XXH3_64bits_reset(&hash_state);
// (quickly) Generate a hash.
HashVertexBindings(&hash_state, vertex_bindings);
uint64_t hash = XXH64_digest(&hash_state);
uint64_t hash = XXH3_64bits_digest(&hash_state);
for (auto it = vertex_sets_.find(hash); it != vertex_sets_.end(); ++it) {
// TODO(DrChat): We need to compare the bindings and ensure they're equal.
return it->second;

View File

@ -10,6 +10,7 @@
#ifndef XENIA_GPU_VULKAN_BUFFER_CACHE_H_
#define XENIA_GPU_VULKAN_BUFFER_CACHE_H_
#include "xenia/base/xxhash.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/shader.h"
#include "xenia/gpu/xenos.h"
@ -20,7 +21,6 @@
#include "xenia/ui/vulkan/vulkan_device.h"
#include "third_party/vulkan/vk_mem_alloc.h"
#include "third_party/xxhash/xxhash.h"
#include <map>
#include <unordered_map>
@ -127,7 +127,7 @@ class BufferCache {
void FreeConstantDescriptorSet();
void HashVertexBindings(
XXH64_state_t* hash_state,
XXH3_state_t* hash_state,
const std::vector<Shader::VertexBinding>& vertex_bindings);
// Allocates a block of memory in the transient buffer.

View File

@ -9,11 +9,11 @@
#include "xenia/gpu/vulkan/pipeline_cache.h"
#include "third_party/xxhash/xxhash.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h"
#include "xenia/base/xxhash.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
@ -208,7 +208,8 @@ VulkanShader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
const uint32_t* host_address,
uint32_t dword_count) {
// Hash the input memory and lookup the shader.
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
uint64_t data_hash =
XXH3_64bits(host_address, dword_count * sizeof(uint32_t));
auto it = shader_map_.find(data_hash);
if (it != shader_map_.end()) {
// Shader has been previously loaded.
@ -259,7 +260,7 @@ PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline(
}
if (!pipeline) {
// Should have a hash key produced by the UpdateState pass.
uint64_t hash_key = XXH64_digest(&hash_state_);
uint64_t hash_key = XXH3_64bits_digest(&hash_state_);
pipeline = GetPipeline(render_state, hash_key);
current_pipeline_ = pipeline;
if (!pipeline) {
@ -362,35 +363,39 @@ VkPipeline PipelineCache::GetPipeline(const RenderState* render_state,
return pipeline;
}
bool PipelineCache::TranslateShader(VulkanShader* shader,
reg::SQ_PROGRAM_CNTL cntl) {
bool PipelineCache::TranslateShader(
VulkanShader::VulkanTranslation& translation) {
translation.shader().AnalyzeUcode(ucode_disasm_buffer_);
// Perform translation.
// If this fails the shader will be marked as invalid and ignored later.
if (!shader_translator_->Translate(shader, cntl)) {
if (!shader_translator_->TranslateAnalyzedShader(translation)) {
XELOGE("Shader translation failed; marking shader as ignored");
return false;
}
// Prepare the shader for use (creates our VkShaderModule).
// It could still fail at this point.
if (!shader->Prepare()) {
if (!translation.Prepare()) {
XELOGE("Shader preparation failed; marking shader as ignored");
return false;
}
if (shader->is_valid()) {
if (translation.is_valid()) {
XELOGGPU("Generated {} shader ({}b) - hash {:016X}:\n{}\n",
shader->type() == xenos::ShaderType::kVertex ? "vertex" : "pixel",
shader->ucode_dword_count() * 4, shader->ucode_data_hash(),
shader->ucode_disassembly());
translation.shader().type() == xenos::ShaderType::kVertex
? "vertex"
: "pixel",
translation.shader().ucode_dword_count() * 4,
translation.shader().ucode_data_hash(),
translation.shader().ucode_disassembly());
}
// Dump shader files if desired.
if (!cvars::dump_shaders.empty()) {
shader->Dump(cvars::dump_shaders, "vk");
translation.Dump(cvars::dump_shaders, "vk");
}
return shader->is_valid();
return translation.is_valid();
}
static void DumpShaderStatisticsAMD(const VkShaderStatisticsInfoAMD& stats) {
@ -958,7 +963,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateState(
bool mismatch = false;
// Reset hash so we can build it up.
XXH64_reset(&hash_state_, 0);
XXH3_64bits_reset(&hash_state_);
#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \
{ \
@ -1025,7 +1030,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargetState() {
regs.rb_color1_info.color_format = cur_regs->rb_color1_info.color_format;
regs.rb_color2_info.color_format = cur_regs->rb_color2_info.color_format;
regs.rb_color3_info.color_format = cur_regs->rb_color3_info.color_format;
XXH64_update(&hash_state_, &regs, sizeof(regs));
XXH3_64bits_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) {
return UpdateStatus::kCompatible;
}
@ -1058,22 +1063,38 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
regs.vertex_shader = vertex_shader;
regs.pixel_shader = pixel_shader;
regs.primitive_type = primitive_type;
XXH64_update(&hash_state_, &regs, sizeof(regs));
XXH3_64bits_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) {
return UpdateStatus::kCompatible;
}
if (!vertex_shader->is_translated() &&
!TranslateShader(vertex_shader, regs.sq_program_cntl)) {
VulkanShader::VulkanTranslation* vertex_shader_translation =
static_cast<VulkanShader::VulkanTranslation*>(
vertex_shader->GetOrCreateTranslation(
shader_translator_->GetDefaultModification(
xenos::ShaderType::kVertex,
vertex_shader->GetDynamicAddressableRegisterCount(
regs.sq_program_cntl.vs_num_reg))));
if (!vertex_shader_translation->is_translated() &&
!TranslateShader(*vertex_shader_translation)) {
XELOGE("Failed to translate the vertex shader!");
return UpdateStatus::kError;
}
if (pixel_shader && !pixel_shader->is_translated() &&
!TranslateShader(pixel_shader, regs.sq_program_cntl)) {
VulkanShader::VulkanTranslation* pixel_shader_translation = nullptr;
if (pixel_shader) {
pixel_shader_translation = static_cast<VulkanShader::VulkanTranslation*>(
pixel_shader->GetOrCreateTranslation(
shader_translator_->GetDefaultModification(
xenos::ShaderType::kPixel,
pixel_shader->GetDynamicAddressableRegisterCount(
regs.sq_program_cntl.ps_num_reg))));
if (!pixel_shader_translation->is_translated() &&
!TranslateShader(*pixel_shader_translation)) {
XELOGE("Failed to translate the pixel shader!");
return UpdateStatus::kError;
}
}
update_shader_stages_stage_count_ = 0;
@ -1084,7 +1105,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
vertex_pipeline_stage.pNext = nullptr;
vertex_pipeline_stage.flags = 0;
vertex_pipeline_stage.stage = VK_SHADER_STAGE_VERTEX_BIT;
vertex_pipeline_stage.module = vertex_shader->shader_module();
vertex_pipeline_stage.module = vertex_shader_translation->shader_module();
vertex_pipeline_stage.pName = "main";
vertex_pipeline_stage.pSpecializationInfo = nullptr;
@ -1116,8 +1137,9 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
pixel_pipeline_stage.pNext = nullptr;
pixel_pipeline_stage.flags = 0;
pixel_pipeline_stage.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
pixel_pipeline_stage.module =
pixel_shader ? pixel_shader->shader_module() : dummy_pixel_shader_;
pixel_pipeline_stage.module = pixel_shader_translation
? pixel_shader_translation->shader_module()
: dummy_pixel_shader_;
pixel_pipeline_stage.pName = "main";
pixel_pipeline_stage.pSpecializationInfo = nullptr;
@ -1132,7 +1154,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState(
bool dirty = false;
dirty |= vertex_shader != regs.vertex_shader;
regs.vertex_shader = vertex_shader;
XXH64_update(&hash_state_, &regs, sizeof(regs));
XXH3_64bits_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) {
return UpdateStatus::kCompatible;
}
@ -1161,7 +1183,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateInputAssemblyState(
dirty |= SetShadowRegister(&regs.multi_prim_ib_reset_index,
XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX);
regs.primitive_type = primitive_type;
XXH64_update(&hash_state_, &regs, sizeof(regs));
XXH3_64bits_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) {
return UpdateStatus::kCompatible;
}
@ -1287,7 +1309,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState(
dirty = true;
}
XXH64_update(&hash_state_, &regs, sizeof(regs));
XXH3_64bits_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) {
return UpdateStatus::kCompatible;
}
@ -1369,7 +1391,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateMultisampleState() {
dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
dirty |= SetShadowRegister(&regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
XXH64_update(&hash_state_, &regs, sizeof(regs));
XXH3_64bits_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) {
return UpdateStatus::kCompatible;
}
@ -1421,7 +1443,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
dirty |= SetShadowRegister(&regs.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
dirty |=
SetShadowRegister(&regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
XXH64_update(&hash_state_, &regs, sizeof(regs));
XXH3_64bits_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) {
return UpdateStatus::kCompatible;
}
@ -1510,7 +1532,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() {
dirty |=
SetShadowRegister(&regs.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL3);
dirty |= SetShadowRegister(&regs.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
XXH64_update(&hash_state_, &regs, sizeof(regs));
XXH3_64bits_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) {
return UpdateStatus::kCompatible;
}

View File

@ -12,8 +12,8 @@
#include <unordered_map>
#include "third_party/xxhash/xxhash.h"
#include "xenia/base/string_buffer.h"
#include "xenia/base/xxhash.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/spirv_shader_translator.h"
#include "xenia/gpu/vulkan/render_cache.h"
@ -79,7 +79,7 @@ class PipelineCache {
// state.
VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key);
bool TranslateShader(VulkanShader* shader, reg::SQ_PROGRAM_CNTL cntl);
bool TranslateShader(VulkanShader::VulkanTranslation& translation);
void DumpShaderDisasmAMD(VkPipeline pipeline);
void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info);
@ -92,6 +92,8 @@ class PipelineCache {
RegisterFile* register_file_ = nullptr;
ui::vulkan::VulkanDevice* device_ = nullptr;
// Temporary storage for AnalyzeUcode calls.
StringBuffer ucode_disasm_buffer_;
// Reusable shader translator.
std::unique_ptr<ShaderTranslator> shader_translator_ = nullptr;
// Disassembler used to get the SPIRV disasm. Only used in debug.
@ -120,7 +122,7 @@ class PipelineCache {
// Hash state used to incrementally produce pipeline hashes during update.
// By the time the full update pass has run the hash will represent the
// current state in a way that can uniquely identify the produced VkPipeline.
XXH64_state_t hash_state_;
XXH3_state_t hash_state_;
// All previously generated pipelines mapped by hash.
std::unordered_map<uint64_t, VkPipeline> cached_pipelines_;

View File

@ -1377,7 +1377,7 @@ void TextureCache::WritebackTexture(Texture* texture) {
}
void TextureCache::HashTextureBindings(
XXH64_state_t* hash_state, uint32_t& fetch_mask,
XXH3_state_t* hash_state, uint32_t& fetch_mask,
const std::vector<Shader::TextureBinding>& bindings) {
for (auto& binding : bindings) {
uint32_t fetch_bit = 1 << binding.fetch_constant;
@ -1393,7 +1393,7 @@ void TextureCache::HashTextureBindings(
reinterpret_cast<const xenos::xe_gpu_fetch_group_t*>(&regs.values[r]);
auto& fetch = group->texture_fetch;
XXH64_update(hash_state, &fetch, sizeof(fetch));
XXH3_64bits_update(hash_state, &fetch, sizeof(fetch));
}
}
@ -1401,14 +1401,14 @@ VkDescriptorSet TextureCache::PrepareTextureSet(
VkCommandBuffer command_buffer, VkFence completion_fence,
const std::vector<Shader::TextureBinding>& vertex_bindings,
const std::vector<Shader::TextureBinding>& pixel_bindings) {
XXH64_state_t hash_state;
XXH64_reset(&hash_state, 0);
XXH3_state_t hash_state;
XXH3_64bits_reset(&hash_state);
// (quickly) Generate a hash.
uint32_t fetch_mask = 0;
HashTextureBindings(&hash_state, fetch_mask, vertex_bindings);
HashTextureBindings(&hash_state, fetch_mask, pixel_bindings);
uint64_t hash = XXH64_digest(&hash_state);
uint64_t hash = XXH3_64bits_digest(&hash_state);
for (auto it = texture_sets_.find(hash); it != texture_sets_.end(); ++it) {
// TODO(DrChat): We need to compare the bindings and ensure they're equal.
return it->second;

View File

@ -186,7 +186,7 @@ class TextureCache {
bool UploadTexture(VkCommandBuffer command_buffer, VkFence completion_fence,
Texture* dest, const TextureInfo& src);
void HashTextureBindings(XXH64_state_t* hash_state, uint32_t& fetch_mask,
void HashTextureBindings(XXH3_state_t* hash_state, uint32_t& fetch_mask,
const std::vector<Shader::TextureBinding>& bindings);
bool SetupTextureBindings(
VkCommandBuffer command_buffer, VkFence completion_fence,

View File

@ -27,38 +27,56 @@ VulkanShader::VulkanShader(ui::vulkan::VulkanDevice* device,
const uint32_t* dword_ptr, uint32_t dword_count)
: Shader(shader_type, data_hash, dword_ptr, dword_count), device_(device) {}
VulkanShader::~VulkanShader() {
VulkanShader::VulkanTranslation::~VulkanTranslation() {
if (shader_module_) {
vkDestroyShaderModule(*device_, shader_module_, nullptr);
const VulkanShader& vulkan_shader = static_cast<VulkanShader&>(shader());
vkDestroyShaderModule(*vulkan_shader.device_, shader_module_, nullptr);
shader_module_ = nullptr;
}
}
bool VulkanShader::Prepare() {
bool VulkanShader::VulkanTranslation::Prepare() {
assert_null(shader_module_);
assert_true(is_valid());
const VulkanShader& vulkan_shader = static_cast<VulkanShader&>(shader());
ui::vulkan::VulkanDevice* device = vulkan_shader.device_;
// Create the shader module.
VkShaderModuleCreateInfo shader_info;
shader_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
shader_info.pNext = nullptr;
shader_info.flags = 0;
shader_info.codeSize = translated_binary_.size();
shader_info.codeSize = translated_binary().size();
shader_info.pCode =
reinterpret_cast<const uint32_t*>(translated_binary_.data());
reinterpret_cast<const uint32_t*>(translated_binary().data());
auto status =
vkCreateShaderModule(*device_, &shader_info, nullptr, &shader_module_);
vkCreateShaderModule(*device, &shader_info, nullptr, &shader_module_);
CheckResult(status, "vkCreateShaderModule");
char typeChar = shader_type_ == xenos::ShaderType::kPixel
? 'p'
: shader_type_ == xenos::ShaderType::kVertex ? 'v' : 'u';
device_->DbgSetObjectName(
uint64_t(shader_module_), VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT,
fmt::format("S({}): {:016X}", typeChar, ucode_data_hash()));
char type_char;
switch (vulkan_shader.type()) {
case xenos::ShaderType::kVertex:
type_char = 'v';
break;
case xenos::ShaderType::kPixel:
type_char = 'p';
break;
default:
type_char = 'u';
}
device->DbgSetObjectName(uint64_t(shader_module_),
VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT,
fmt::format("S({}): {:016X}", type_char,
vulkan_shader.ucode_data_hash()));
return status == VK_SUCCESS;
}
Shader::Translation* VulkanShader::CreateTranslationInstance(
uint64_t modification) {
return new VulkanTranslation(*this, modification);
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

Some files were not shown because too many files have changed in this diff Show More