Merge master

This commit is contained in:
Dr. Chat 2018-05-05 17:11:18 -05:00
commit 53be87f6ba
154 changed files with 10053 additions and 74023 deletions

3
.gitmodules vendored
View File

@ -37,3 +37,6 @@
[submodule "third_party/spirv-headers"]
path = third_party/spirv-headers
url = https://github.com/KhronosGroup/SPIRV-Headers
[submodule "third_party/volk"]
path = third_party/volk
url = https://github.com/zeux/volk.git

View File

@ -59,7 +59,7 @@ script:
# Run linter.
- if [[ $LINT == true ]]; then ./xenia-build lint --all; fi
# Build and run our simple hello world test.
# Build and run base tests
- if [[ $BUILD == true ]]; then ./xenia-build build --config=$CONFIG --target=xenia-base-tests; fi
- if [[ $BUILD == true ]]; then ./build/bin/Linux/$CONFIG/xenia-base-tests; fi
# Build and run ppc tests

View File

@ -4,11 +4,11 @@ Xenia - Xbox 360 Emulator Research Project
Xenia is an experimental emulator for the Xbox 360. For more information see the
[main xenia website](http://xenia.jp/).
Come chat with us about **emulator-related topics** in
[#xenia @ irc.freenode.net](http://webchat.freenode.net?channels=%23xenia&uio=MTE9NzIaa).
For developer chat join `#xenia-dev` but stay on topic. Lurking is fine.
Come chat with us about **emulator-related topics** on [Discord](https://discord.gg/Q9mxZf9).
For developer chat join `#dev` but stay on topic. Lurking is fine.
Please check the [frequently asked questions](http://xenia.jp/faq/) page before
asking questions. We've got jobs/lives/etc, so don't expect instant answers.
Discussing illegal activities will get you banned. No warnings.
## Status
@ -16,7 +16,7 @@ Discussing illegal activities will get you banned. No warnings.
Buildbot | Status
-------- | ------
[Windows](https://ci.appveyor.com/project/benvanik/xenia/branch/master) | [![Build status](https://ci.appveyor.com/api/projects/status/ftqiy86kdfawyx3a/branch/master?svg=true)](https://ci.appveyor.com/project/benvanik/xenia/branch/master)
[Linux](https://travis-ci.org/benvanik/xenia) | [![Build status](https://travis-ci.org/benvanik/xenia.svg)](https://travis-ci.org/benvanik/xenia)
[Linux](https://travis-ci.org/xenia-project/xenia) | [![Build status](https://travis-ci.org/xenia-project/xenia.svg)](https://travis-ci.org/xenia-project/xenia)
Some real games run. Most don't.
See the [Game compatibility list](https://github.com/xenia-project/game-compatibility/issues)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 KiB

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.3 KiB

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

After

Width:  |  Height:  |  Size: 3.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.7 KiB

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.9 KiB

After

Width:  |  Height:  |  Size: 5.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.7 KiB

After

Width:  |  Height:  |  Size: 150 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.1 KiB

After

Width:  |  Height:  |  Size: 9.0 KiB

BIN
assets/icon/Icon.blend Normal file

Binary file not shown.

View File

@ -0,0 +1,26 @@
/// -------------- *** -------------- ///
/// ///
/// Rendering Instructions ///
/// ///
/// -------------- *** -------------- ///
For those not well-versed in Blender, this document explains
how to render the icon in any desired resolution and color.
To Change Resolution:
1. Open Icon.blend in Blender.
2. In the right tool pane, look for "Resolution" under the
"Dimensions" settings. It should be defaulted to 512x512.
This is where you'll change the icon output dimensions.
3. After you've set the value to what you want, hit F12 or
select 'Render' > 'Render Image' from the top bar. This
will render the icon.
4. To save, hit F3 or select 'Image' > 'Save As Image' on
the left toward the middle of the screen.
To Change Colors:
1. Open Icon.blend in Blender.
2. The icon's colors will appear in the bottom window as
labeled nodes. Change them by clicking on each node.
3. Render and save as described above.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 267 KiB

After

Width:  |  Height:  |  Size: 222 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 267 KiB

View File

@ -28,9 +28,11 @@ defines({
})
-- TODO(DrChat): Find a way to disable this on other architectures.
filter("architecture:x86_64")
vectorextensions("AVX")
filter({})
if ARCH ~= "ppc64" then
filter("architecture:x86_64")
vectorextensions("AVX")
filter({})
end
characterset("Unicode")
flags({
@ -95,13 +97,6 @@ filter("platforms:Linux")
"dl",
"lz4",
"rt",
"X11",
"xcb",
"X11-xcb",
"GL",
"vulkan",
"c++",
"c++abi"
})
linkoptions({
"`pkg-config --libs gtk+-3.0`",
@ -112,17 +107,31 @@ filter({"platforms:Linux", "kind:*App"})
filter({"platforms:Linux", "language:C++", "toolset:gcc"})
buildoptions({
"--std=c++11",
"-std=c++14",
})
links({
})
filter({"platforms:Linux", "toolset:gcc"})
if ARCH == "ppc64" then
buildoptions({
"-m32",
"-mpowerpc64"
})
linkoptions({
"-m32",
"-mpowerpc64"
})
end
filter({"platforms:Linux", "language:C++", "toolset:clang"})
buildoptions({
"-std=c++14",
"-stdlib=libstdc++",
})
links({
"c++",
"c++abi"
})
disablewarnings({
"deprecated-register"
@ -219,7 +228,7 @@ solution("xenia")
include("third_party/libav.lua")
include("third_party/snappy.lua")
include("third_party/spirv-tools.lua")
include("third_party/vulkan/loader")
include("third_party/volk.lua")
include("third_party/xxhash.lua")
include("third_party/yaml-cpp.lua")
@ -233,13 +242,11 @@ solution("xenia")
include("src/xenia/debug/ui")
include("src/xenia/gpu")
include("src/xenia/gpu/null")
include("src/xenia/gpu/gl4")
include("src/xenia/gpu/vulkan")
include("src/xenia/hid")
include("src/xenia/hid/nop")
include("src/xenia/kernel")
include("src/xenia/ui")
include("src/xenia/ui/gl")
include("src/xenia/ui/spirv")
include("src/xenia/ui/vulkan")
include("src/xenia/vfs")

View File

@ -17,7 +17,7 @@ project("xenia-app")
"libavutil",
"snappy",
"spirv-tools",
"vulkan-loader",
"volk",
"xenia-apu",
"xenia-apu-nop",
"xenia-base",
@ -26,14 +26,12 @@ project("xenia-app")
"xenia-cpu-backend-x64",
"xenia-debug-ui",
"xenia-gpu",
"xenia-gpu-gl4",
"xenia-gpu-null",
"xenia-gpu-vulkan",
"xenia-hid",
"xenia-hid-nop",
"xenia-kernel",
"xenia-ui",
"xenia-ui-gl",
"xenia-ui-spirv",
"xenia-ui-vulkan",
"xenia-vfs",
@ -60,6 +58,15 @@ project("xenia-app")
project_root,
})
filter("platforms:Linux")
links({
"X11",
"xcb",
"X11-xcb",
"GL",
"vulkan",
})
filter("platforms:Windows")
links({
"xenia-apu-xaudio2",

View File

@ -26,7 +26,6 @@
#endif // XE_PLATFORM_WIN32
// Available graphics systems:
#include "xenia/gpu/gl4/gl4_graphics_system.h"
#include "xenia/gpu/null/null_graphics_system.h"
#include "xenia/gpu/vulkan/vulkan_graphics_system.h"
@ -38,7 +37,7 @@
#endif // XE_PLATFORM_WIN32
DEFINE_string(apu, "any", "Audio system. Use: [any, nop, xaudio2]");
DEFINE_string(gpu, "any", "Graphics system. Use: [any, gl4, vulkan, null]");
DEFINE_string(gpu, "any", "Graphics system. Use: [any, vulkan, null]");
DEFINE_string(hid, "any", "Input system. Use: [any, nop, winkey, xinput]");
DEFINE_string(target, "", "Specifies the target .xex or .iso to execute.");
@ -71,10 +70,7 @@ std::unique_ptr<apu::AudioSystem> CreateAudioSystem(cpu::Processor* processor) {
}
std::unique_ptr<gpu::GraphicsSystem> CreateGraphicsSystem() {
if (FLAGS_gpu.compare("gl4") == 0) {
return std::unique_ptr<gpu::GraphicsSystem>(
new xe::gpu::gl4::GL4GraphicsSystem());
} else if (FLAGS_gpu.compare("vulkan") == 0) {
if (FLAGS_gpu.compare("vulkan") == 0) {
return std::unique_ptr<gpu::GraphicsSystem>(
new xe::gpu::vulkan::VulkanGraphicsSystem());
} else if (FLAGS_gpu.compare("null") == 0) {

View File

@ -19,20 +19,28 @@ namespace xe {
// Bitfield, where position starts at the LSB.
template <typename T, size_t position, size_t n_bits>
struct bf {
bf() = default;
inline operator T() const { return value(); }
inline T value() const {
return static_cast<T>((storage & mask()) >> position);
}
// For enum values, we strip them down to an underlying type.
typedef
typename std::conditional<std::is_enum<T>::value, std::underlying_type<T>,
std::remove_reference<T>>::type::type
value_type;
bf() = default;
inline operator T() const { return value(); }
inline T value() const {
auto value = (storage & mask()) >> position;
if (std::is_signed<value_type>::value) {
// If the value is signed, sign-extend it.
value_type sign_mask = value_type(1) << (n_bits - 1);
value = (sign_mask ^ value) - sign_mask;
}
return static_cast<T>(value);
}
inline value_type mask() const {
return (((value_type)~0) >> (8 * sizeof(value_type) - n_bits)) << position;
return ((value_type(1) << n_bits) - 1) << position;
}
value_type storage;

View File

@ -26,7 +26,9 @@ void copy_128_aligned(void* dest, const void* src, size_t count) {
#if XE_ARCH_AMD64
void copy_and_swap_16_aligned(void* dest_ptr, const void* src_ptr,
size_t count) {
assert_zero(reinterpret_cast<uintptr_t>(src_ptr) & 0x1);
assert_zero(reinterpret_cast<uintptr_t>(dest_ptr) & 0xF);
assert_zero(reinterpret_cast<uintptr_t>(src_ptr) & 0xF);
auto dest = reinterpret_cast<uint16_t*>(dest_ptr);
auto src = reinterpret_cast<const uint16_t*>(src_ptr);
__m128i shufmask =
@ -34,12 +36,7 @@ void copy_and_swap_16_aligned(void* dest_ptr, const void* src_ptr,
0x04, 0x05, 0x02, 0x03, 0x00, 0x01);
size_t i = 0;
size_t unaligned_words = (reinterpret_cast<uintptr_t>(src_ptr) & 0xF) / 2;
for (; unaligned_words > 0 && i < count; unaligned_words--, i++) {
// Copy up to 16 byte alignment.
dest[i] = byte_swap(src[i]);
}
for (; i + 8 <= count; i += 8) {
for (i = 0; i + 8 <= count; i += 8) {
__m128i input = _mm_load_si128(reinterpret_cast<const __m128i*>(&src[i]));
__m128i output = _mm_shuffle_epi8(input, shufmask);
_mm_store_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
@ -70,20 +67,17 @@ void copy_and_swap_16_unaligned(void* dest_ptr, const void* src_ptr,
void copy_and_swap_32_aligned(void* dest_ptr, const void* src_ptr,
size_t count) {
assert_zero(reinterpret_cast<uintptr_t>(src_ptr) & 0x3);
assert_zero(reinterpret_cast<uintptr_t>(dest_ptr) & 0xF);
assert_zero(reinterpret_cast<uintptr_t>(src_ptr) & 0xF);
auto dest = reinterpret_cast<uint32_t*>(dest_ptr);
auto src = reinterpret_cast<const uint32_t*>(src_ptr);
__m128i shufmask =
_mm_set_epi8(0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B, 0x04, 0x05,
0x06, 0x07, 0x00, 0x01, 0x02, 0x03);
size_t i = 0;
size_t unaligned_dwords = (reinterpret_cast<uintptr_t>(src_ptr) & 0xF) / 4;
for (; unaligned_dwords > 0 && i < count; unaligned_dwords--, i++) {
// Copy up to 16 byte alignment.
dest[i] = byte_swap(src[i]);
}
for (; i + 4 <= count; i += 4) {
size_t i;
for (i = 0; i + 4 <= count; i += 4) {
__m128i input = _mm_load_si128(reinterpret_cast<const __m128i*>(&src[i]));
__m128i output = _mm_shuffle_epi8(input, shufmask);
_mm_store_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
@ -114,20 +108,17 @@ void copy_and_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
void copy_and_swap_64_aligned(void* dest_ptr, const void* src_ptr,
size_t count) {
assert_zero(reinterpret_cast<uintptr_t>(src_ptr) & 0x7);
assert_zero(reinterpret_cast<uintptr_t>(dest_ptr) & 0xF);
assert_zero(reinterpret_cast<uintptr_t>(src_ptr) & 0xF);
auto dest = reinterpret_cast<uint64_t*>(dest_ptr);
auto src = reinterpret_cast<const uint64_t*>(src_ptr);
__m128i shufmask =
_mm_set_epi8(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00, 0x01,
0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
size_t i = 0;
size_t unaligned_qwords = (reinterpret_cast<uintptr_t>(src_ptr) & 0xF) / 8;
for (; unaligned_qwords > 0 && i < count; unaligned_qwords--, i++) {
// Copy up to 16 byte alignment.
dest[i] = byte_swap(src[i]);
}
for (; i + 2 <= count; i += 2) {
size_t i;
for (i = 0; i + 2 <= count; i += 2) {
__m128i input = _mm_load_si128(reinterpret_cast<const __m128i*>(&src[i]));
__m128i output = _mm_shuffle_epi8(input, shufmask);
_mm_store_si128(reinterpret_cast<__m128i*>(&dest[i]), output);

View File

@ -1,17 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/base/memory.h"
#include "third_party/catch/include/catch.hpp"
TEST_CASE("copy_and_swap_16_aligned", "Copy and Swap") {
// TODO(benvanik): tests.
REQUIRE(true == true);
}

View File

@ -13,6 +13,7 @@
// NOTE: if you're including this file it means you are explicitly depending
// on Linux headers. Including this file outside of linux platform specific
// source code will break portability
#include <cstddef>
#include "xenia/base/platform.h"

View File

@ -16,11 +16,4 @@ project("xenia-base")
"debug_visualizers.natvis",
})
test_suite("xenia-base-tests", project_root, ".", {
includedirs = {
project_root.."/third_party/gflags/src",
},
links = {
"xenia-base",
},
})
include("testing")

View File

@ -0,0 +1,419 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/base/memory.h"
#include "third_party/catch/include/catch.hpp"
namespace xe {
namespace base {
namespace test {
TEST_CASE("copy_128_aligned", "Copy and Swap") {
alignas(128) uint8_t src[256], dest[256];
for (uint8_t i = 0; i < 255; ++i) {
src[i] = 255 - i;
}
std::memset(dest, 0, sizeof(dest));
copy_128_aligned(dest, src, 1);
REQUIRE(std::memcmp(dest, src, 128));
REQUIRE(dest[128] == 0);
std::memset(dest, 0, sizeof(dest));
copy_128_aligned(dest, src, 2);
REQUIRE(std::memcmp(dest, src, 256));
std::memset(dest, 0, sizeof(dest));
copy_128_aligned(dest, src + 1, 1);
REQUIRE(std::memcmp(dest, src + 1, 128));
}
TEST_CASE("copy_and_swap_16_aligned", "Copy and Swap") {
alignas(16) uint16_t a = 0x1111, b = 0xABCD;
copy_and_swap_16_aligned(&a, &b, 1);
REQUIRE(a == 0xCDAB);
REQUIRE(b == 0xABCD);
alignas(16) uint16_t c[] = {0x0000, 0x0000, 0x0000, 0x0000};
alignas(16) uint16_t d[] = {0x0123, 0x4567, 0x89AB, 0xCDEF};
copy_and_swap_16_aligned(c, d, 1);
REQUIRE(c[0] == 0x2301);
REQUIRE(c[1] == 0x0000);
REQUIRE(c[2] == 0x0000);
REQUIRE(c[3] == 0x0000);
copy_and_swap_16_aligned(c, d, 3);
REQUIRE(c[0] == 0x2301);
REQUIRE(c[1] == 0x6745);
REQUIRE(c[2] == 0xAB89);
REQUIRE(c[3] == 0x0000);
copy_and_swap_16_aligned(c, d, 4);
REQUIRE(c[0] == 0x2301);
REQUIRE(c[1] == 0x6745);
REQUIRE(c[2] == 0xAB89);
REQUIRE(c[3] == 0xEFCD);
alignas(16) uint64_t e;
copy_and_swap_16_aligned(&e, d, 4);
REQUIRE(e == 0xEFCDAB8967452301);
alignas(16) char f[85] = {0x00};
alignas(16) char g[] =
"This is a 85 byte long string... "
"It's supposed to be longer than standard alignment.";
copy_and_swap_16_aligned(f, g, 42);
REQUIRE(std::strcmp(f,
"hTsii s a58b ty eolgns rtni.g..I 't susppsodet oebl "
"noeg rhtnas atdnra dlagimnne.t") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_16_aligned(f, g + 16, 34);
REQUIRE(std::strcmp(f,
" eolgns rtni.g..I 't susppsodet oebl "
"noeg rhtnas atdnra dlagimnne.t") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_16_aligned(f, g + 32, 26);
REQUIRE(std::strcmp(f,
"I 't susppsodet oebl "
"noeg rhtnas atdnra dlagimnne.t") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_16_aligned(f, g + 64, 10);
REQUIRE(std::strcmp(f, "s atdnra dlagimnne.t") == 0);
}
TEST_CASE("copy_and_swap_16_unaligned", "Copy and Swap") {
uint16_t a = 0x1111, b = 0xABCD;
copy_and_swap_16_unaligned(&a, &b, 1);
REQUIRE(a == 0xCDAB);
REQUIRE(b == 0xABCD);
uint16_t c[] = {0x0000, 0x0000, 0x0000, 0x0000};
uint16_t d[] = {0x0123, 0x4567, 0x89AB, 0xCDEF};
copy_and_swap_16_unaligned(c, d, 1);
REQUIRE(c[0] == 0x2301);
REQUIRE(c[1] == 0x0000);
REQUIRE(c[2] == 0x0000);
REQUIRE(c[3] == 0x0000);
copy_and_swap_16_unaligned(c, d, 4);
REQUIRE(c[0] == 0x2301);
REQUIRE(c[1] == 0x6745);
REQUIRE(c[2] == 0xAB89);
REQUIRE(c[3] == 0xEFCD);
uint64_t e;
copy_and_swap_16_unaligned(&e, d, 4);
REQUIRE(e == 0xEFCDAB8967452301);
char f[85] = {0x00};
char g[] =
"This is a 85 byte long string... "
"It's supposed to be longer than standard alignment.";
copy_and_swap_16_unaligned(f, g, 42);
REQUIRE(std::strcmp(f,
"hTsii s a58b ty eolgns rtni.g..I 't susppsodet oebl "
"noeg rhtnas atdnra dlagimnne.t") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_16_unaligned(f, g + 1, 41);
REQUIRE(std::strcmp(f,
"ih ssia 8 5ybetl no gtsirgn.. .tIs's puopes dotb "
"eolgnret ah ntsnaaddra ilngemtn") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_16_unaligned(f, g + 2, 41);
REQUIRE(std::strcmp(f,
"sii s a58b ty eolgns rtni.g..I 't susppsodet oebl "
"noeg rhtnas atdnra dlagimnne.t") == 0);
}
TEST_CASE("copy_and_swap_32_aligned", "Copy and Swap") {
alignas(32) uint32_t a = 0x11111111, b = 0x89ABCDEF;
copy_and_swap_32_aligned(&a, &b, 1);
REQUIRE(a == 0xEFCDAB89);
REQUIRE(b == 0x89ABCDEF);
alignas(32) uint32_t c[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000};
alignas(32) uint32_t d[] = {0x01234567, 0x89ABCDEF, 0xE887EEED, 0xD8514199};
copy_and_swap_32_aligned(c, d, 1);
REQUIRE(c[0] == 0x67452301);
REQUIRE(c[1] == 0x00000000);
REQUIRE(c[2] == 0x00000000);
REQUIRE(c[3] == 0x00000000);
copy_and_swap_32_aligned(c, d, 3);
REQUIRE(c[0] == 0x67452301);
REQUIRE(c[1] == 0xEFCDAB89);
REQUIRE(c[2] == 0xEDEE87E8);
REQUIRE(c[3] == 0x00000000);
copy_and_swap_32_aligned(c, d, 4);
REQUIRE(c[0] == 0x67452301);
REQUIRE(c[1] == 0xEFCDAB89);
REQUIRE(c[2] == 0xEDEE87E8);
REQUIRE(c[3] == 0x994151D8);
alignas(32) uint64_t e;
copy_and_swap_32_aligned(&e, d, 2);
REQUIRE(e == 0xEFCDAB8967452301);
alignas(32) char f[85] = {0x00};
alignas(32) char g[] =
"This is a 85 byte long string... "
"It's supposed to be longer than standard alignment.";
copy_and_swap_32_aligned(f, g, 21);
REQUIRE(std::strcmp(f,
"sihT si 58 atyb ol es gnnirt...g'tI us ssoppt deeb "
"onol regnahtats radnla dmngi.tne") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_32_aligned(f, g + 16, 17);
REQUIRE(std::strcmp(f,
"ol es gnnirt...g'tI us ssoppt deeb "
"onol regnahtats radnla dmngi.tne") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_32_aligned(f, g + 32, 13);
REQUIRE(std::strcmp(f,
"'tI us ssoppt deeb "
"onol regnahtats radnla dmngi.tne") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_32_aligned(f, g + 64, 5);
REQUIRE(std::strcmp(f, "ats radnla dmngi.tne") == 0);
}
TEST_CASE("copy_and_swap_32_unaligned", "Copy and Swap") {
uint32_t a = 0x11111111, b = 0x89ABCDEF;
copy_and_swap_32_unaligned(&a, &b, 1);
REQUIRE(a == 0xEFCDAB89);
REQUIRE(b == 0x89ABCDEF);
uint32_t c[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000};
uint32_t d[] = {0x01234567, 0x89ABCDEF, 0xE887EEED, 0xD8514199};
copy_and_swap_32_unaligned(c, d, 1);
REQUIRE(c[0] == 0x67452301);
REQUIRE(c[1] == 0x00000000);
REQUIRE(c[2] == 0x00000000);
REQUIRE(c[3] == 0x00000000);
copy_and_swap_32_unaligned(c, d, 3);
REQUIRE(c[0] == 0x67452301);
REQUIRE(c[1] == 0xEFCDAB89);
REQUIRE(c[2] == 0xEDEE87E8);
REQUIRE(c[3] == 0x00000000);
copy_and_swap_32_unaligned(c, d, 4);
REQUIRE(c[0] == 0x67452301);
REQUIRE(c[1] == 0xEFCDAB89);
REQUIRE(c[2] == 0xEDEE87E8);
REQUIRE(c[3] == 0x994151D8);
uint64_t e;
copy_and_swap_32_unaligned(&e, d, 2);
REQUIRE(e == 0xEFCDAB8967452301);
char f[85] = {0x00};
char g[] =
"This is a 85 byte long string... "
"It's supposed to be longer than standard alignment.";
copy_and_swap_32_unaligned(f, g, 21);
REQUIRE(std::strcmp(f,
"sihT si 58 atyb ol es gnnirt...g'tI us ssoppt deeb "
"onol regnahtats radnla dmngi.tne") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_32_unaligned(f, g + 1, 20);
REQUIRE(std::strcmp(f,
" siha si 58 etybnol ts ggnir ...s'tIpus esopot d eb "
"gnolt re nahnatsdradila emng") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_32_unaligned(f, g + 2, 20);
REQUIRE(std::strcmp(f,
"i si a sb 58 etygnolrts .gniI .. s'tppusdeso ot l "
"ebegnoht rs nadnat dragilanemn") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_32_unaligned(f, g + 3, 20);
REQUIRE(std::strcmp(f,
"si s8 a yb 5l et gnoirts..gntI .s s'oppu desb otol "
"eregnaht ts nadnaa drngiltnem") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_32_unaligned(f, g + 4, 20);
REQUIRE(std::strcmp(f,
" si 58 atyb ol es gnnirt...g'tI us ssoppt deeb onol "
"regnahtats radnla dmngi.tne") == 0);
}
TEST_CASE("copy_and_swap_64_aligned", "Copy and Swap") {
alignas(64) uint64_t a = 0x1111111111111111, b = 0x0123456789ABCDEF;
copy_and_swap_64_aligned(&a, &b, 1);
REQUIRE(a == 0xEFCDAB8967452301);
REQUIRE(b == 0x0123456789ABCDEF);
alignas(64) uint64_t c[] = {0x0000000000000000, 0x0000000000000000,
0x0000000000000000, 0x0000000000000000};
alignas(64) uint64_t d[] = {0x0123456789ABCDEF, 0xE887EEEDD8514199,
0x21D4745A1D4A7706, 0xA4174FED675766E3};
copy_and_swap_64_aligned(c, d, 1);
REQUIRE(c[0] == 0xEFCDAB8967452301);
REQUIRE(c[1] == 0x0000000000000000);
REQUIRE(c[2] == 0x0000000000000000);
REQUIRE(c[3] == 0x0000000000000000);
copy_and_swap_64_aligned(c, d, 3);
REQUIRE(c[0] == 0xEFCDAB8967452301);
REQUIRE(c[1] == 0x994151D8EDEE87E8);
REQUIRE(c[2] == 0x06774A1D5A74D421);
REQUIRE(c[3] == 0x0000000000000000);
copy_and_swap_64_aligned(c, d, 4);
REQUIRE(c[0] == 0xEFCDAB8967452301);
REQUIRE(c[1] == 0x994151D8EDEE87E8);
REQUIRE(c[2] == 0x06774A1D5A74D421);
REQUIRE(c[3] == 0xE3665767ED4F17A4);
alignas(64) uint64_t e;
copy_and_swap_64_aligned(&e, d, 1);
REQUIRE(e == 0xEFCDAB8967452301);
alignas(64) char f[85] = {0x00};
alignas(64) char g[] =
"This is a 85 byte long string... "
"It's supposed to be longer than standard alignment.";
copy_and_swap_64_aligned(f, g, 10);
REQUIRE(std::strcmp(f,
" si sihTtyb 58 as gnol e...gnirtus s'tI t desoppnol eb "
"onaht regradnats mngila d") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_64_aligned(f, g + 16, 8);
REQUIRE(std::strcmp(f,
"s gnol e...gnirtus s'tI t desoppnol eb "
"onaht regradnats mngila d") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_64_aligned(f, g + 32, 6);
REQUIRE(std::strcmp(f,
"us s'tI t desoppnol eb "
"onaht regradnats mngila d") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_64_aligned(f, g + 64, 2);
REQUIRE(std::strcmp(f, "radnats mngila d") == 0);
}
TEST_CASE("copy_and_swap_64_unaligned", "Copy and Swap") {
uint64_t a = 0x1111111111111111, b = 0x0123456789ABCDEF;
copy_and_swap_64_unaligned(&a, &b, 1);
REQUIRE(a == 0xEFCDAB8967452301);
REQUIRE(b == 0x0123456789ABCDEF);
uint64_t c[] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
0x0000000000000000};
uint64_t d[] = {0x0123456789ABCDEF, 0xE887EEEDD8514199, 0x21D4745A1D4A7706,
0xA4174FED675766E3};
copy_and_swap_64_unaligned(c, d, 1);
REQUIRE(c[0] == 0xEFCDAB8967452301);
REQUIRE(c[1] == 0x0000000000000000);
REQUIRE(c[2] == 0x0000000000000000);
REQUIRE(c[3] == 0x0000000000000000);
copy_and_swap_64_unaligned(c, d, 3);
REQUIRE(c[0] == 0xEFCDAB8967452301);
REQUIRE(c[1] == 0x994151D8EDEE87E8);
REQUIRE(c[2] == 0x06774A1D5A74D421);
REQUIRE(c[3] == 0x0000000000000000);
copy_and_swap_64_unaligned(c, d, 4);
REQUIRE(c[0] == 0xEFCDAB8967452301);
REQUIRE(c[1] == 0x994151D8EDEE87E8);
REQUIRE(c[2] == 0x06774A1D5A74D421);
REQUIRE(c[3] == 0xE3665767ED4F17A4);
uint64_t e;
copy_and_swap_64_unaligned(&e, d, 1);
REQUIRE(e == 0xEFCDAB8967452301);
char f[85] = {0x00};
char g[] =
"This is a 85 byte long string... "
"It's supposed to be longer than standard alignment.";
copy_and_swap_64_unaligned(f, g, 10);
REQUIRE(std::strcmp(f,
" si sihTtyb 58 as gnol e...gnirtus s'tI t desoppnol eb "
"onaht regradnats mngila d") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_64_unaligned(f, g + 1, 10);
REQUIRE(std::strcmp(f,
"a si sihetyb 58 ts gnol ...gnirpus s'tIot desopgnol "
"eb naht redradnatsemngila ") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_64_unaligned(f, g + 2, 10);
REQUIRE(std::strcmp(f,
" a si si etyb 58rts gnolI ...gnippus s't ot desoegnol "
"ebs naht r dradnatnemngila") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_64_unaligned(f, g + 3, 10);
REQUIRE(std::strcmp(f,
"8 a si sl etyb 5irts gnotI ...gnoppus s'b ot desregnol "
"ets naht a dradnatnemngil") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_64_unaligned(f, g + 4, 10);
REQUIRE(std::strcmp(f,
"58 a si ol etyb nirts gn'tI ...gsoppus seb ot de "
"regnol ats nahtla dradn.tnemngi") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_64_unaligned(f, g + 5, 9);
REQUIRE(std::strcmp(f,
" 58 a sinol etybgnirts gs'tI ...esoppus eb ot dt "
"regnolnats nahila drad") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_64_unaligned(f, g + 6, 9);
REQUIRE(std::strcmp(f,
"b 58 a sgnol ety.gnirts s'tI ..desoppusl eb ot ht "
"regnodnats nagila dra") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_64_unaligned(f, g + 7, 9);
REQUIRE(std::strcmp(f,
"yb 58 a gnol et..gnirtss s'tI . desoppuol eb otaht "
"regnadnats nngila dr") == 0);
std::memset(f, 0, sizeof(f));
copy_and_swap_64_unaligned(f, g + 8, 9);
REQUIRE(std::strcmp(f,
"tyb 58 as gnol e...gnirtus s'tI t desoppnol eb onaht "
"regradnats mngila d") == 0);
}
TEST_CASE("copy_and_swap_16_in_32_aligned", "Copy and Swap") {
// TODO(bwrsandman): test once properly understood.
REQUIRE(true == true);
}
TEST_CASE("copy_and_swap_16_in_32_unaligned", "Copy and Swap") {
// TODO(bwrsandman): test once properly understood.
REQUIRE(true == true);
}
} // namespace test
} // namespace base
} // namespace xe

View File

@ -0,0 +1,11 @@
project_root = "../../../.."
include(project_root.."/tools/build")
test_suite("xenia-base-tests", project_root, ".", {
includedirs = {
project_root.."/third_party/gflags/src",
},
links = {
"xenia-base",
},
})

View File

@ -13,7 +13,9 @@
#include "third_party/capstone/include/capstone.h"
#include "third_party/capstone/include/x86.h"
#include "xenia/base/exception_handler.h"
#include "xenia/base/logging.h"
#include "xenia/cpu/backend/x64/x64_assembler.h"
#include "xenia/cpu/backend/x64/x64_code_cache.h"
#include "xenia/cpu/backend/x64/x64_emitter.h"
@ -65,11 +67,16 @@ bool X64Backend::Initialize(Processor* processor) {
return false;
}
Xbyak::util::Cpu cpu;
if (!cpu.has(Xbyak::util::Cpu::tAVX)) {
XELOGE("This CPU does not support AVX. The emulator will now crash.");
return false;
}
RegisterSequences();
// Need movbe to do advanced LOAD/STORE tricks.
if (FLAGS_enable_haswell_instructions) {
Xbyak::util::Cpu cpu;
machine_info_.supports_extended_load_store =
cpu.has(Xbyak::util::Cpu::tMOVBE);
} else {

View File

@ -189,7 +189,7 @@ class TestRunner {
~TestRunner() {
memory::DeallocFixed(memory_, memory_size_,
memory::DeallocationType::kDecommitRelease);
memory::DeallocationType::kRelease);
memory::AlignedFree(context_);
}

View File

@ -236,7 +236,7 @@ void CommandProcessor::ShutdownContext() { context_.reset(); }
void CommandProcessor::InitializeRingBuffer(uint32_t ptr, uint32_t log2_size) {
read_ptr_index_ = 0;
primary_buffer_ptr_ = ptr;
primary_buffer_size_ = uint32_t(std::pow(2u, log2_size));
primary_buffer_size_ = 1 << log2_size;
}
void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr,
@ -695,7 +695,11 @@ bool CommandProcessor::ExecutePacketType3_ME_INIT(RingBuffer* reader,
uint32_t packet,
uint32_t count) {
// initialize CP's micro-engine
reader->AdvanceRead(count * sizeof(uint32_t));
me_bin_.clear();
for (uint32_t i = 0; i < count; i++) {
me_bin_.push_back(reader->ReadAndSwap<uint32_t>());
}
return true;
}
@ -1047,8 +1051,8 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_EXT(RingBuffer* reader,
1, // max z
};
assert_true(endianness == Endian::k8in16);
xe::copy_and_swap_16_aligned(memory_->TranslatePhysical(address), extents,
xe::countof(extents));
xe::copy_and_swap_16_unaligned(memory_->TranslatePhysical(address), extents,
xe::countof(extents));
trace_writer_.WriteMemoryWrite(CpuToGpu(address), sizeof(extents));
return true;
}

View File

@ -215,6 +215,9 @@ class CommandProcessor {
std::function<void()> swap_request_handler_;
std::queue<std::function<void()>> pending_fns_;
// MicroEngine binary from PM4_ME_INIT
std::vector<uint32_t> me_bin_;
uint32_t counter_ = 0;
uint32_t primary_buffer_ptr_ = 0;

View File

@ -1,535 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/gl4/draw_batcher.h"
#include <cstring>
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/gpu/gl4/gl4_gpu_flags.h"
#include "xenia/gpu/gpu_flags.h"
namespace xe {
namespace gpu {
namespace gl4 {
using namespace xe::gpu::xenos;
const size_t kCommandBufferCapacity = 16 * (1024 * 1024);
const size_t kCommandBufferAlignment = 4;
const size_t kStateBufferCapacity = 64 * (1024 * 1024);
const size_t kStateBufferAlignment = 256;
DrawBatcher::DrawBatcher(RegisterFile* register_file)
: register_file_(register_file),
command_buffer_(kCommandBufferCapacity, kCommandBufferAlignment),
state_buffer_(kStateBufferCapacity, kStateBufferAlignment),
array_data_buffer_(nullptr),
draw_open_(false) {
std::memset(&batch_state_, 0, sizeof(batch_state_));
batch_state_.needs_reconfigure = true;
batch_state_.command_range_start = batch_state_.state_range_start =
UINTPTR_MAX;
std::memset(&active_draw_, 0, sizeof(active_draw_));
}
bool DrawBatcher::Initialize(CircularBuffer* array_data_buffer) {
array_data_buffer_ = array_data_buffer;
if (!command_buffer_.Initialize()) {
return false;
}
if (!state_buffer_.Initialize()) {
return false;
}
if (!InitializeTFB()) {
return false;
}
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, command_buffer_.handle());
return true;
}
// Initializes a transform feedback object
// We use this to capture vertex data straight from the vertex/geometry shader.
bool DrawBatcher::InitializeTFB() {
glCreateBuffers(1, &tfvbo_);
if (!tfvbo_) {
return false;
}
glCreateTransformFeedbacks(1, &tfbo_);
if (!tfbo_) {
return false;
}
glCreateQueries(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN, 1, &tfqo_);
if (!tfqo_) {
return false;
}
// TODO(DrChat): Calculate this based on the number of primitives drawn.
glNamedBufferData(tfvbo_, 16384 * 4, nullptr, GL_STATIC_READ);
return true;
}
void DrawBatcher::ShutdownTFB() {
glDeleteBuffers(1, &tfvbo_);
glDeleteTransformFeedbacks(1, &tfbo_);
glDeleteQueries(1, &tfqo_);
tfvbo_ = 0;
tfbo_ = 0;
tfqo_ = 0;
}
size_t DrawBatcher::QueryTFBSize() {
if (!tfb_enabled_) {
return 0;
}
size_t size = 0;
switch (tfb_prim_type_gl_) {
case GL_POINTS:
size = tfb_prim_count_ * 1 * 4 * 4;
break;
case GL_LINES:
size = tfb_prim_count_ * 2 * 4 * 4;
break;
case GL_TRIANGLES:
size = tfb_prim_count_ * 3 * 4 * 4;
break;
}
return size;
}
bool DrawBatcher::ReadbackTFB(void* buffer, size_t size) {
if (!tfb_enabled_) {
XELOGW("DrawBatcher::ReadbackTFB called when TFB was disabled!");
return false;
}
void* data = glMapNamedBufferRange(tfvbo_, 0, size, GL_MAP_READ_BIT);
std::memcpy(buffer, data, size);
glUnmapNamedBuffer(tfvbo_);
return true;
}
void DrawBatcher::Shutdown() {
command_buffer_.Shutdown();
state_buffer_.Shutdown();
ShutdownTFB();
}
bool DrawBatcher::ReconfigurePipeline(GL4Shader* vertex_shader,
GL4Shader* pixel_shader,
GLuint pipeline) {
if (batch_state_.pipeline == pipeline) {
// No-op.
return true;
}
if (!Flush(FlushMode::kReconfigure)) {
return false;
}
batch_state_.vertex_shader = vertex_shader;
batch_state_.pixel_shader = pixel_shader;
batch_state_.pipeline = pipeline;
return true;
}
bool DrawBatcher::BeginDrawArrays(PrimitiveType prim_type,
uint32_t index_count) {
assert_false(draw_open_);
if (batch_state_.prim_type != prim_type || batch_state_.indexed) {
if (!Flush(FlushMode::kReconfigure)) {
return false;
}
}
batch_state_.prim_type = prim_type;
batch_state_.indexed = false;
if (!BeginDraw()) {
return false;
}
auto cmd = active_draw_.draw_arrays_cmd;
cmd->base_instance = 0;
cmd->instance_count = 1;
cmd->count = index_count;
cmd->first_index = 0;
return true;
}
bool DrawBatcher::BeginDrawElements(PrimitiveType prim_type,
uint32_t index_count,
IndexFormat index_format) {
assert_false(draw_open_);
GLenum index_type =
index_format == IndexFormat::kInt32 ? GL_UNSIGNED_INT : GL_UNSIGNED_SHORT;
if (batch_state_.prim_type != prim_type || !batch_state_.indexed ||
batch_state_.index_type != index_type) {
if (!Flush(FlushMode::kReconfigure)) {
return false;
}
}
batch_state_.prim_type = prim_type;
batch_state_.indexed = true;
batch_state_.index_type = index_type;
if (!BeginDraw()) {
return false;
}
uint32_t start_index = register_file_->values[XE_GPU_REG_VGT_INDX_OFFSET].u32;
assert_zero(start_index);
auto cmd = active_draw_.draw_elements_cmd;
cmd->base_instance = 0;
cmd->instance_count = 1;
cmd->count = index_count;
cmd->first_index = start_index;
cmd->base_vertex = 0;
return true;
}
bool DrawBatcher::BeginDraw() {
draw_open_ = true;
if (batch_state_.needs_reconfigure) {
batch_state_.needs_reconfigure = false;
// Have been reconfigured since last draw - need to compute state size.
// Layout:
// [draw command]
// [common header]
// [consts]
// Padded to max.
GLsizei command_size = 0;
if (batch_state_.indexed) {
command_size = sizeof(DrawElementsIndirectCommand);
} else {
command_size = sizeof(DrawArraysIndirectCommand);
}
batch_state_.command_stride =
xe::round_up(command_size, GLsizei(kCommandBufferAlignment));
GLsizei header_size = sizeof(CommonHeader);
// TODO(benvanik): consts sizing.
// GLsizei float_consts_size = sizeof(float4) * 512;
// GLsizei bool_consts_size = sizeof(uint32_t) * 8;
// GLsizei loop_consts_size = sizeof(uint32_t) * 32;
// GLsizei consts_size =
// float_consts_size + bool_consts_size + loop_consts_size;
// batch_state_.float_consts_offset = batch_state_.header_offset +
// header_size;
// batch_state_.bool_consts_offset =
// batch_state_.float_consts_offset + float_consts_size;
// batch_state_.loop_consts_offset =
// batch_state_.bool_consts_offset + bool_consts_size;
GLsizei consts_size = 0;
batch_state_.state_stride = header_size + consts_size;
}
// Allocate a command data block.
// We should treat it as write-only.
if (!command_buffer_.CanAcquire(batch_state_.command_stride)) {
Flush(FlushMode::kMakeCoherent);
}
active_draw_.command_allocation =
command_buffer_.Acquire(batch_state_.command_stride);
assert_not_null(active_draw_.command_allocation.host_ptr);
// Allocate a state data block.
// We should treat it as write-only.
if (!state_buffer_.CanAcquire(batch_state_.state_stride)) {
Flush(FlushMode::kMakeCoherent);
}
active_draw_.state_allocation =
state_buffer_.Acquire(batch_state_.state_stride);
assert_not_null(active_draw_.state_allocation.host_ptr);
active_draw_.command_address =
reinterpret_cast<uintptr_t>(active_draw_.command_allocation.host_ptr);
auto state_host_ptr =
reinterpret_cast<uintptr_t>(active_draw_.state_allocation.host_ptr);
active_draw_.header = reinterpret_cast<CommonHeader*>(state_host_ptr);
active_draw_.header->ps_param_gen = -1;
// active_draw_.float_consts =
// reinterpret_cast<float4*>(state_host_ptr +
// batch_state_.float_consts_offset);
// active_draw_.bool_consts =
// reinterpret_cast<uint32_t*>(state_host_ptr +
// batch_state_.bool_consts_offset);
// active_draw_.loop_consts =
// reinterpret_cast<uint32_t*>(state_host_ptr +
// batch_state_.loop_consts_offset);
return true;
}
void DrawBatcher::DiscardDraw() {
if (!draw_open_) {
// No-op.
return;
}
draw_open_ = false;
command_buffer_.Discard(std::move(active_draw_.command_allocation));
state_buffer_.Discard(std::move(active_draw_.state_allocation));
}
bool DrawBatcher::CommitDraw() {
assert_true(draw_open_);
draw_open_ = false;
// Copy over required constants.
CopyConstants();
if (batch_state_.state_range_start == UINTPTR_MAX) {
batch_state_.command_range_start = active_draw_.command_allocation.offset;
batch_state_.state_range_start = active_draw_.state_allocation.offset;
}
batch_state_.command_range_length +=
active_draw_.command_allocation.aligned_length;
batch_state_.state_range_length +=
active_draw_.state_allocation.aligned_length;
command_buffer_.Commit(std::move(active_draw_.command_allocation));
state_buffer_.Commit(std::move(active_draw_.state_allocation));
++batch_state_.draw_count;
return true;
}
void DrawBatcher::TFBBegin(PrimitiveType prim_type) {
if (!tfb_enabled_) {
return;
}
// Translate the primitive typename to something compatible with TFB.
GLenum gl_prim_type = 0;
switch (prim_type) {
case PrimitiveType::kLineList:
gl_prim_type = GL_LINES;
break;
case PrimitiveType::kLineStrip:
gl_prim_type = GL_LINES;
break;
case PrimitiveType::kLineLoop:
gl_prim_type = GL_LINES;
break;
case PrimitiveType::kPointList:
// The geometry shader associated with this writes out triangles.
gl_prim_type = GL_TRIANGLES;
break;
case PrimitiveType::kTriangleList:
gl_prim_type = GL_TRIANGLES;
break;
case PrimitiveType::kTriangleStrip:
gl_prim_type = GL_TRIANGLES;
break;
case PrimitiveType::kRectangleList:
gl_prim_type = GL_TRIANGLES;
break;
case PrimitiveType::kTriangleFan:
gl_prim_type = GL_TRIANGLES;
break;
case PrimitiveType::kQuadList:
// FIXME: In some cases the geometry shader will output lines.
// See: GL4CommandProcessor::UpdateShaders
gl_prim_type = GL_TRIANGLES;
break;
default:
assert_unhandled_case(prim_type);
break;
}
// TODO(DrChat): Resize the TFVBO here.
// Could draw a 2nd time with the rasterizer disabled once we have a primitive
// count.
tfb_prim_type_ = prim_type;
tfb_prim_type_gl_ = gl_prim_type;
glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, tfbo_);
// Bind the buffer to the TFB object.
glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, tfvbo_);
// Begin a query for # prims written
glBeginQueryIndexed(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN, 0, tfqo_);
// Begin capturing.
glBeginTransformFeedback(gl_prim_type);
}
void DrawBatcher::TFBEnd() {
if (!tfb_enabled_) {
return;
}
glEndTransformFeedback();
glEndQueryIndexed(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN, 0);
glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, 0);
glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, 0);
// Cache the query size as query objects aren't shared.
GLint prim_count = 0;
glGetQueryObjectiv(tfqo_, GL_QUERY_RESULT, &prim_count);
tfb_prim_count_ = prim_count;
}
bool DrawBatcher::Flush(FlushMode mode) {
GLboolean cull_enabled = 0;
if (batch_state_.draw_count) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
assert_not_zero(batch_state_.command_stride);
assert_not_zero(batch_state_.state_stride);
// Flush pending buffer changes.
command_buffer_.Flush();
state_buffer_.Flush();
array_data_buffer_->Flush();
// State data is indexed by draw ID.
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, state_buffer_.handle(),
batch_state_.state_range_start,
batch_state_.state_range_length);
GLenum prim_type = 0;
bool valid_prim = true;
switch (batch_state_.prim_type) {
case PrimitiveType::kPointList:
prim_type = GL_POINTS;
break;
case PrimitiveType::kLineList:
prim_type = GL_LINES;
break;
case PrimitiveType::kLineStrip:
prim_type = GL_LINE_STRIP;
break;
case PrimitiveType::kLineLoop:
prim_type = GL_LINE_LOOP;
break;
case PrimitiveType::kTriangleList:
prim_type = GL_TRIANGLES;
break;
case PrimitiveType::kTriangleStrip:
prim_type = GL_TRIANGLE_STRIP;
break;
case PrimitiveType::kTriangleFan:
prim_type = GL_TRIANGLE_FAN;
break;
case PrimitiveType::kRectangleList:
prim_type = GL_TRIANGLES;
// Rect lists aren't culled. There may be other things they skip too.
// assert_true(
// (register_file_->values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32
// & 0x3) == 0);
break;
case PrimitiveType::kQuadList:
prim_type = GL_LINES_ADJACENCY;
break;
default:
case PrimitiveType::kTriangleWithWFlags:
prim_type = GL_TRIANGLES;
valid_prim = false;
XELOGE("unsupported primitive type %d", batch_state_.prim_type);
assert_unhandled_case(batch_state_.prim_type);
break;
}
// Fast path for single draws.
void* indirect_offset =
reinterpret_cast<void*>(batch_state_.command_range_start);
if (tfb_enabled_) {
TFBBegin(batch_state_.prim_type);
}
if (valid_prim && batch_state_.draw_count == 1) {
// Fast path for one draw. Removes MDI overhead when not required.
if (batch_state_.indexed) {
auto& cmd = active_draw_.draw_elements_cmd;
glDrawElementsInstancedBaseVertexBaseInstance(
prim_type, cmd->count, batch_state_.index_type,
reinterpret_cast<void*>(
uintptr_t(cmd->first_index) *
(batch_state_.index_type == GL_UNSIGNED_SHORT ? 2 : 4)),
cmd->instance_count, cmd->base_vertex, cmd->base_instance);
} else {
auto& cmd = active_draw_.draw_arrays_cmd;
glDrawArraysInstancedBaseInstance(prim_type, cmd->first_index,
cmd->count, cmd->instance_count,
cmd->base_instance);
}
} else if (valid_prim) {
// Full multi-draw.
if (batch_state_.indexed) {
glMultiDrawElementsIndirect(prim_type, batch_state_.index_type,
indirect_offset, batch_state_.draw_count,
batch_state_.command_stride);
} else {
glMultiDrawArraysIndirect(prim_type, indirect_offset,
batch_state_.draw_count,
batch_state_.command_stride);
}
}
if (tfb_enabled_) {
TFBEnd();
}
batch_state_.command_range_start = UINTPTR_MAX;
batch_state_.command_range_length = 0;
batch_state_.state_range_start = UINTPTR_MAX;
batch_state_.state_range_length = 0;
batch_state_.draw_count = 0;
}
if (mode == FlushMode::kReconfigure) {
// Reset - we'll update it as soon as we have all the information.
batch_state_.needs_reconfigure = true;
}
return true;
}
void DrawBatcher::CopyConstants() {
// TODO(benvanik): partial updates, etc. We could use shader constant access
// knowledge that we get at compile time to only upload those constants
// required. If we did this as a variable length then we could really cut
// down on state block sizes.
std::memcpy(active_draw_.header->float_consts,
&register_file_->values[XE_GPU_REG_SHADER_CONSTANT_000_X].f32,
sizeof(active_draw_.header->float_consts));
std::memcpy(
active_draw_.header->bool_consts,
&register_file_->values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].f32,
sizeof(active_draw_.header->bool_consts));
std::memcpy(active_draw_.header->loop_consts,
&register_file_->values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].f32,
sizeof(active_draw_.header->loop_consts));
}
} // namespace gl4
} // namespace gpu
} // namespace xe

View File

@ -1,191 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_GL4_DRAW_BATCHER_H_
#define XENIA_GPU_GL4_DRAW_BATCHER_H_
#include "xenia/gpu/gl4/gl4_shader.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/gl/circular_buffer.h"
#include "xenia/ui/gl/gl_context.h"
namespace xe {
namespace gpu {
namespace gl4 {
using xe::ui::gl::CircularBuffer;
union float4 {
float v[4];
struct {
float x, y, z, w;
};
};
#pragma pack(push, 4)
struct DrawArraysIndirectCommand {
GLuint count;
GLuint instance_count;
GLuint first_index;
GLuint base_instance;
};
struct DrawElementsIndirectCommand {
GLuint count;
GLuint instance_count;
GLuint first_index;
GLint base_vertex;
GLuint base_instance;
};
#pragma pack(pop)
class DrawBatcher {
public:
enum class FlushMode {
kMakeCoherent,
kStateChange,
kReconfigure,
};
explicit DrawBatcher(RegisterFile* register_file);
bool Initialize(CircularBuffer* array_data_buffer);
void Shutdown();
PrimitiveType prim_type() const { return batch_state_.prim_type; }
void set_window_scalar(float width_scalar, float height_scalar) {
active_draw_.header->window_scale.x = width_scalar;
active_draw_.header->window_scale.y = height_scalar;
}
void set_vtx_fmt(float xy, float z, float w) {
active_draw_.header->vtx_fmt.x = xy;
active_draw_.header->vtx_fmt.y = xy;
active_draw_.header->vtx_fmt.z = z;
active_draw_.header->vtx_fmt.w = w;
}
void set_alpha_test(bool enabled, uint32_t func, float ref) {
active_draw_.header->alpha_test.x = enabled ? 1.0f : 0.0f;
active_draw_.header->alpha_test.y = static_cast<float>(func);
active_draw_.header->alpha_test.z = ref;
}
void set_ps_param_gen(int register_index) {
active_draw_.header->ps_param_gen = register_index;
}
void set_texture_sampler(int index, GLuint64 handle, uint32_t swizzle) {
active_draw_.header->texture_samplers[index] = handle;
active_draw_.header->texture_swizzles[index] = swizzle;
}
void set_index_buffer(const CircularBuffer::Allocation& allocation) {
// Offset is used in glDrawElements.
auto& cmd = active_draw_.draw_elements_cmd;
size_t index_size = batch_state_.index_type == GL_UNSIGNED_SHORT ? 2 : 4;
cmd->first_index = GLuint(allocation.offset / index_size);
}
bool ReconfigurePipeline(GL4Shader* vertex_shader, GL4Shader* pixel_shader,
GLuint pipeline);
bool BeginDrawArrays(PrimitiveType prim_type, uint32_t index_count);
bool BeginDrawElements(PrimitiveType prim_type, uint32_t index_count,
IndexFormat index_format);
void DiscardDraw();
bool CommitDraw();
bool Flush(FlushMode mode);
// TFB - Filled with vertex shader output from the last flush.
size_t QueryTFBSize();
bool ReadbackTFB(void* buffer, size_t size);
GLuint tfvbo() { return tfvbo_; }
bool is_tfb_enabled() const { return tfb_enabled_; }
void set_tfb_enabled(bool enabled) { tfb_enabled_ = enabled; }
private:
bool InitializeTFB();
void ShutdownTFB();
void TFBBegin(PrimitiveType prim_type);
void TFBEnd();
bool BeginDraw();
void CopyConstants();
RegisterFile* register_file_;
CircularBuffer command_buffer_;
CircularBuffer state_buffer_;
CircularBuffer* array_data_buffer_;
GLuint tfbo_ = 0;
GLuint tfvbo_ = 0;
GLuint tfqo_ = 0;
PrimitiveType tfb_prim_type_ = PrimitiveType::kNone;
GLenum tfb_prim_type_gl_ = 0;
GLint tfb_prim_count_ = 0;
bool tfb_enabled_ = false;
struct BatchState {
bool needs_reconfigure;
PrimitiveType prim_type;
bool indexed;
GLenum index_type;
GL4Shader* vertex_shader;
GL4Shader* pixel_shader;
GLuint pipeline;
GLsizei command_stride;
GLsizei state_stride;
GLsizei float_consts_offset;
GLsizei bool_consts_offset;
GLsizei loop_consts_offset;
uintptr_t command_range_start;
uintptr_t command_range_length;
uintptr_t state_range_start;
uintptr_t state_range_length;
GLsizei draw_count;
} batch_state_;
// This must match GL4Shader's header.
struct CommonHeader {
float4 window_scale; // sx,sy, ?, ?
float4 vtx_fmt; //
float4 alpha_test; // alpha test enable, func, ref, ?
int ps_param_gen;
int padding[3];
// TODO(benvanik): pack tightly
GLuint64 texture_samplers[32];
GLuint texture_swizzles[32];
float4 float_consts[512];
uint32_t bool_consts[8];
uint32_t loop_consts[32];
};
struct {
CircularBuffer::Allocation command_allocation;
CircularBuffer::Allocation state_allocation;
union {
DrawArraysIndirectCommand* draw_arrays_cmd;
DrawElementsIndirectCommand* draw_elements_cmd;
uintptr_t command_address;
};
CommonHeader* header;
} active_draw_;
bool draw_open_;
};
} // namespace gl4
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_GL4_DRAW_BATCHER_H_

File diff suppressed because it is too large Load Diff

View File

@ -1,237 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_GL4_GL4_COMMAND_PROCESSOR_H_
#define XENIA_GPU_GL4_GL4_COMMAND_PROCESSOR_H_
#include <atomic>
#include <cstring>
#include <functional>
#include <memory>
#include <mutex>
#include <queue>
#include <string>
#include <unordered_map>
#include <vector>
#include "xenia/base/threading.h"
#include "xenia/gpu/command_processor.h"
#include "xenia/gpu/gl4/draw_batcher.h"
#include "xenia/gpu/gl4/gl4_shader.h"
#include "xenia/gpu/gl4/gl4_shader_cache.h"
#include "xenia/gpu/gl4/texture_cache.h"
#include "xenia/gpu/glsl_shader_translator.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/xenos.h"
#include "xenia/kernel/xthread.h"
#include "xenia/memory.h"
#include "xenia/ui/gl/circular_buffer.h"
#include "xenia/ui/gl/gl_context.h"
namespace xe {
namespace gpu {
namespace gl4 {
class GL4GraphicsSystem;
class GL4CommandProcessor : public CommandProcessor {
public:
GL4CommandProcessor(GL4GraphicsSystem* graphics_system,
kernel::KernelState* kernel_state);
~GL4CommandProcessor() override;
void ClearCaches() override;
// HACK: for debugging; would be good to have this in a base type.
TextureCache* texture_cache() { return &texture_cache_; }
DrawBatcher* draw_batcher() { return &draw_batcher_; }
GLuint GetColorRenderTarget(uint32_t pitch, MsaaSamples samples,
uint32_t base, ColorRenderTargetFormat format);
GLuint GetDepthRenderTarget(uint32_t pitch, MsaaSamples samples,
uint32_t base, DepthRenderTargetFormat format);
private:
enum class UpdateStatus {
kCompatible,
kMismatch,
kError,
};
struct CachedFramebuffer {
GLuint color_targets[4];
GLuint depth_target;
GLuint framebuffer;
};
struct CachedColorRenderTarget {
uint32_t base;
uint32_t width;
uint32_t height;
ColorRenderTargetFormat format;
GLenum internal_format;
GLuint texture;
};
struct CachedDepthRenderTarget {
uint32_t base;
uint32_t width;
uint32_t height;
DepthRenderTargetFormat format;
GLenum internal_format;
GLuint texture;
};
struct CachedPipeline {
CachedPipeline();
~CachedPipeline();
GLuint vertex_program;
GLuint fragment_program;
struct {
GLuint default_pipeline;
GLuint point_list_pipeline;
GLuint rect_list_pipeline;
GLuint quad_list_pipeline;
GLuint line_quad_list_pipeline;
// TODO(benvanik): others with geometry shaders.
} handles;
};
bool SetupContext() override;
void ShutdownContext() override;
GLuint CreateGeometryProgram(const std::string& source);
void MakeCoherent() override;
void PrepareForWait() override;
void ReturnFromWait() override;
void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width,
uint32_t frontbuffer_height) override;
Shader* LoadShader(ShaderType shader_type, uint32_t guest_address,
const uint32_t* host_address,
uint32_t dword_count) override;
bool IssueDraw(PrimitiveType prim_type, uint32_t index_count,
IndexBufferInfo* index_buffer_info) override;
UpdateStatus UpdateShaders(PrimitiveType prim_type);
UpdateStatus UpdateRenderTargets();
UpdateStatus UpdateState(PrimitiveType prim_type);
UpdateStatus UpdateViewportState();
UpdateStatus UpdateRasterizerState(PrimitiveType prim_type);
UpdateStatus UpdateBlendState();
UpdateStatus UpdateDepthStencilState();
UpdateStatus PopulateIndexBuffer(IndexBufferInfo* index_buffer_info);
UpdateStatus PopulateVertexBuffers();
UpdateStatus PopulateSamplers();
UpdateStatus PopulateSampler(const Shader::TextureBinding& texture_binding);
bool IssueCopy() override;
CachedFramebuffer* GetFramebuffer(GLuint color_targets[4],
GLuint depth_target);
GlslShaderTranslator shader_translator_;
GL4ShaderCache shader_cache_;
CachedFramebuffer* active_framebuffer_ = nullptr;
GLuint last_framebuffer_texture_ = 0;
std::vector<CachedFramebuffer> cached_framebuffers_;
std::vector<CachedColorRenderTarget> cached_color_render_targets_;
std::vector<CachedDepthRenderTarget> cached_depth_render_targets_;
std::vector<std::unique_ptr<CachedPipeline>> all_pipelines_;
std::unordered_map<uint64_t, CachedPipeline*> cached_pipelines_;
GLuint point_list_geometry_program_ = 0;
GLuint rect_list_geometry_program_ = 0;
GLuint quad_list_geometry_program_ = 0;
GLuint line_quad_list_geometry_program_ = 0;
TextureCache texture_cache_;
DrawBatcher draw_batcher_;
xe::ui::gl::CircularBuffer scratch_buffer_;
private:
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
bool SetShadowRegister(float* dest, uint32_t register_name);
struct UpdateRenderTargetsRegisters {
uint32_t rb_modecontrol;
uint32_t rb_surface_info;
uint32_t rb_color_info;
uint32_t rb_color1_info;
uint32_t rb_color2_info;
uint32_t rb_color3_info;
uint32_t rb_color_mask;
uint32_t rb_depthcontrol;
uint32_t rb_stencilrefmask;
uint32_t rb_depth_info;
UpdateRenderTargetsRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_render_targets_regs_;
struct UpdateViewportStateRegisters {
// uint32_t pa_cl_clip_cntl;
uint32_t rb_surface_info;
uint32_t pa_cl_vte_cntl;
uint32_t pa_su_sc_mode_cntl;
uint32_t pa_sc_window_offset;
uint32_t pa_sc_window_scissor_tl;
uint32_t pa_sc_window_scissor_br;
float pa_cl_vport_xoffset;
float pa_cl_vport_yoffset;
float pa_cl_vport_zoffset;
float pa_cl_vport_xscale;
float pa_cl_vport_yscale;
float pa_cl_vport_zscale;
UpdateViewportStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_viewport_state_regs_;
struct UpdateRasterizerStateRegisters {
uint32_t pa_su_sc_mode_cntl;
uint32_t pa_sc_screen_scissor_tl;
uint32_t pa_sc_screen_scissor_br;
uint32_t multi_prim_ib_reset_index;
uint32_t pa_sc_viz_query;
PrimitiveType prim_type;
UpdateRasterizerStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_rasterizer_state_regs_;
struct UpdateBlendStateRegisters {
uint32_t rb_blendcontrol[4];
float rb_blend_rgba[4];
UpdateBlendStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_blend_state_regs_;
struct UpdateDepthStencilStateRegisters {
uint32_t rb_depthcontrol;
uint32_t rb_stencilrefmask;
UpdateDepthStencilStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_depth_stencil_state_regs_;
struct UpdateShadersRegisters {
PrimitiveType prim_type;
uint32_t pa_su_sc_mode_cntl;
uint32_t sq_program_cntl;
uint32_t sq_context_misc;
GL4Shader* vertex_shader;
GL4Shader* pixel_shader;
UpdateShadersRegisters() { Reset(); }
void Reset() {
sq_program_cntl = 0;
vertex_shader = pixel_shader = nullptr;
}
} update_shaders_regs_;
};
} // namespace gl4
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_GL4_GL4_COMMAND_PROCESSOR_H_

View File

@ -1,17 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/gl4/gl4_gpu_flags.h"
DEFINE_bool(disable_framebuffer_readback, false,
"Disable framebuffer readback.");
DEFINE_bool(disable_textures, false, "Disable textures and use colors only.");
DEFINE_string(shader_cache_dir, "",
"GL4 Shader cache directory (relative to Xenia). Specify an "
"empty string to disable the cache.");

View File

@ -1,21 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_GL4_GL4_GPU_FLAGS_H_
#define XENIA_GPU_GL4_GL4_GPU_FLAGS_H_
#include <gflags/gflags.h>
DECLARE_bool(disable_framebuffer_readback);
DECLARE_bool(disable_textures);
DECLARE_string(shader_cache_dir);
#define FINE_GRAINED_DRAW_SCOPES 0
#endif // XENIA_GPU_GL4_GL4_GPU_FLAGS_H_

View File

@ -1,86 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/gl4/gl4_graphics_system.h"
#include <algorithm>
#include <cstring>
#include "xenia/base/logging.h"
#include "xenia/base/profiling.h"
#include "xenia/cpu/processor.h"
#include "xenia/gpu/gl4/gl4_command_processor.h"
#include "xenia/gpu/gl4/gl4_gpu_flags.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/ui/gl/gl_provider.h"
#include "xenia/ui/window.h"
namespace xe {
namespace gpu {
namespace gl4 {
GL4GraphicsSystem::GL4GraphicsSystem() = default;
GL4GraphicsSystem::~GL4GraphicsSystem() = default;
X_STATUS GL4GraphicsSystem::Setup(cpu::Processor* processor,
kernel::KernelState* kernel_state,
ui::Window* target_window) {
// Must create the provider so we can create contexts.
provider_ = xe::ui::gl::GLProvider::Create(target_window);
auto result = GraphicsSystem::Setup(processor, kernel_state, target_window);
if (result) {
return result;
}
display_context_ =
reinterpret_cast<xe::ui::gl::GLContext*>(target_window->context());
return X_STATUS_SUCCESS;
}
void GL4GraphicsSystem::Shutdown() { GraphicsSystem::Shutdown(); }
std::unique_ptr<CommandProcessor> GL4GraphicsSystem::CreateCommandProcessor() {
return std::unique_ptr<CommandProcessor>(
new GL4CommandProcessor(this, kernel_state_));
}
void GL4GraphicsSystem::Swap(xe::ui::UIEvent* e) {
if (!command_processor_) {
return;
}
// Check for pending swap.
auto& swap_state = command_processor_->swap_state();
{
std::lock_guard<std::mutex> lock(swap_state.mutex);
if (swap_state.pending) {
swap_state.pending = false;
std::swap(swap_state.front_buffer_texture,
swap_state.back_buffer_texture);
}
}
if (!swap_state.front_buffer_texture) {
// Not yet ready.
return;
}
// Blit the frontbuffer.
display_context_->blitter()->BlitTexture2D(
static_cast<GLuint>(swap_state.front_buffer_texture),
Rect2D(0, 0, swap_state.width, swap_state.height),
Rect2D(0, 0, target_window_->width(), target_window_->height()),
GL_LINEAR, false);
}
} // namespace gl4
} // namespace gpu
} // namespace xe

View File

@ -1,45 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_GL4_GL4_GRAPHICS_SYSTEM_H_
#define XENIA_GPU_GL4_GL4_GRAPHICS_SYSTEM_H_
#include <memory>
#include "xenia/gpu/graphics_system.h"
#include "xenia/ui/gl/gl_context.h"
namespace xe {
namespace gpu {
namespace gl4 {
class GL4GraphicsSystem : public GraphicsSystem {
public:
GL4GraphicsSystem();
~GL4GraphicsSystem() override;
std::wstring name() const override { return L"GL4"; }
X_STATUS Setup(cpu::Processor* processor, kernel::KernelState* kernel_state,
ui::Window* target_window) override;
void Shutdown() override;
private:
std::unique_ptr<CommandProcessor> CreateCommandProcessor() override;
void Swap(xe::ui::UIEvent* e) override;
xe::ui::gl::GLContext* display_context_ = nullptr;
};
} // namespace gl4
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_GL4_GL4_GRAPHICS_SYSTEM_H_

View File

@ -1,298 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/gl4/gl4_shader.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
namespace xe {
namespace gpu {
namespace gl4 {
GL4Shader::GL4Shader(ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count)
: Shader(shader_type, data_hash, dword_ptr, dword_count) {}
GL4Shader::~GL4Shader() {
glDeleteProgram(program_);
glDeleteVertexArrays(1, &vao_);
}
bool GL4Shader::Prepare() {
// Build static vertex array descriptor.
if (!PrepareVertexArrayObject()) {
XELOGE("Unable to prepare vertex shader array object");
return false;
}
bool success = true;
if (!CompileShader()) {
host_error_log_ = GetShaderInfoLog();
success = false;
}
if (success && !LinkProgram()) {
host_error_log_ = GetProgramInfoLog();
success = false;
}
if (success) {
host_binary_ = GetBinary();
host_disassembly_ = GetHostDisasmNV(host_binary_);
}
is_valid_ = success;
return success;
}
bool GL4Shader::LoadFromBinary(const uint8_t* blob, GLenum binary_format,
size_t length) {
program_ = glCreateProgram();
glProgramBinary(program_, binary_format, blob, GLsizei(length));
GLint link_status = 0;
glGetProgramiv(program_, GL_LINK_STATUS, &link_status);
if (!link_status) {
// Failed to link. Not fatal - just clean up so we can get generated later.
XELOGD("GL4Shader::LoadFromBinary failed. Log:\n%s",
GetProgramInfoLog().c_str());
glDeleteProgram(program_);
program_ = 0;
return false;
}
// Build static vertex array descriptor.
if (!PrepareVertexArrayObject()) {
XELOGE("Unable to prepare vertex shader array object");
return false;
}
// Success!
host_binary_ = GetBinary();
host_disassembly_ = GetHostDisasmNV(host_binary_);
is_valid_ = true;
return true;
}
bool GL4Shader::PrepareVertexArrayObject() {
glCreateVertexArrays(1, &vao_);
for (const auto& vertex_binding : vertex_bindings()) {
for (const auto& attrib : vertex_binding.attributes) {
auto comp_count = GetVertexFormatComponentCount(
attrib.fetch_instr.attributes.data_format);
GLenum comp_type = 0;
bool is_signed = attrib.fetch_instr.attributes.is_signed;
switch (attrib.fetch_instr.attributes.data_format) {
case VertexFormat::k_8_8_8_8:
comp_type = is_signed ? GL_BYTE : GL_UNSIGNED_BYTE;
break;
case VertexFormat::k_2_10_10_10:
comp_type = is_signed ? GL_INT : GL_UNSIGNED_INT;
comp_count = 1;
break;
case VertexFormat::k_10_11_11:
comp_type = is_signed ? GL_INT : GL_UNSIGNED_INT;
comp_count = 1;
break;
case VertexFormat::k_11_11_10:
assert_true(is_signed);
comp_type = is_signed ? GL_R11F_G11F_B10F : 0;
break;
case VertexFormat::k_16_16:
comp_type = is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
break;
case VertexFormat::k_16_16_FLOAT:
comp_type = GL_HALF_FLOAT;
break;
case VertexFormat::k_16_16_16_16:
comp_type = is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
break;
case VertexFormat::k_16_16_16_16_FLOAT:
comp_type = GL_HALF_FLOAT;
break;
case VertexFormat::k_32:
comp_type = is_signed ? GL_INT : GL_UNSIGNED_INT;
break;
case VertexFormat::k_32_32:
comp_type = is_signed ? GL_INT : GL_UNSIGNED_INT;
break;
case VertexFormat::k_32_32_32_32:
comp_type = is_signed ? GL_INT : GL_UNSIGNED_INT;
break;
case VertexFormat::k_32_FLOAT:
comp_type = GL_FLOAT;
break;
case VertexFormat::k_32_32_FLOAT:
comp_type = GL_FLOAT;
break;
case VertexFormat::k_32_32_32_FLOAT:
comp_type = GL_FLOAT;
break;
case VertexFormat::k_32_32_32_32_FLOAT:
comp_type = GL_FLOAT;
break;
default:
assert_unhandled_case(attrib.fetch_instr.attributes.data_format);
return false;
}
glEnableVertexArrayAttrib(vao_, attrib.attrib_index);
glVertexArrayAttribBinding(vao_, attrib.attrib_index,
vertex_binding.binding_index);
glVertexArrayAttribFormat(vao_, attrib.attrib_index, comp_count,
comp_type,
!attrib.fetch_instr.attributes.is_integer,
attrib.fetch_instr.attributes.offset * 4);
}
}
return true;
}
bool GL4Shader::CompileShader() {
assert_zero(program_);
shader_ =
glCreateShader(shader_type_ == ShaderType::kVertex ? GL_VERTEX_SHADER
: GL_FRAGMENT_SHADER);
if (!shader_) {
XELOGE("OpenGL could not create a shader object!");
return false;
}
auto source_str = GetTranslatedBinaryString();
auto source_str_ptr = source_str.c_str();
GLint source_length = GLint(source_str.length());
glShaderSource(shader_, 1, &source_str_ptr, &source_length);
glCompileShader(shader_);
GLint status = 0;
glGetShaderiv(shader_, GL_COMPILE_STATUS, &status);
return status == GL_TRUE;
}
bool GL4Shader::LinkProgram() {
program_ = glCreateProgram();
if (!program_) {
XELOGE("OpenGL could not create a shader program!");
return false;
}
glAttachShader(program_, shader_);
// Enable TFB
if (shader_type_ == ShaderType::kVertex) {
const GLchar* feedbackVaryings = "gl_Position";
glTransformFeedbackVaryings(program_, 1, &feedbackVaryings,
GL_SEPARATE_ATTRIBS);
}
glProgramParameteri(program_, GL_PROGRAM_SEPARABLE, GL_TRUE);
glLinkProgram(program_);
GLint link_status = 0;
glGetProgramiv(program_, GL_LINK_STATUS, &link_status);
if (!link_status) {
assert_always("Unable to link generated shader");
return false;
}
return true;
}
std::string GL4Shader::GetShaderInfoLog() {
if (!shader_) {
return "GL4Shader::GetShaderInfoLog(): Program is NULL";
}
std::string log;
GLint log_length = 0;
glGetShaderiv(shader_, GL_INFO_LOG_LENGTH, &log_length);
if (log_length > 0) {
log.resize(log_length - 1);
glGetShaderInfoLog(shader_, log_length, &log_length, &log[0]);
}
return log;
}
std::string GL4Shader::GetProgramInfoLog() {
if (!program_) {
return "GL4Shader::GetProgramInfoLog(): Program is NULL";
}
std::string log;
GLint log_length = 0;
glGetProgramiv(program_, GL_INFO_LOG_LENGTH, &log_length);
if (log_length > 0) {
log.resize(log_length - 1);
glGetProgramInfoLog(program_, log_length, &log_length, &log[0]);
}
return log;
}
std::vector<uint8_t> GL4Shader::GetBinary(GLenum* binary_format) {
std::vector<uint8_t> binary;
// Get program binary, if it's available.
GLint binary_length = 0;
glGetProgramiv(program_, GL_PROGRAM_BINARY_LENGTH, &binary_length);
if (binary_length) {
binary.resize(binary_length);
GLenum binary_format_tmp = 0;
glGetProgramBinary(program_, binary_length, &binary_length,
&binary_format_tmp, binary.data());
if (binary_format) {
*binary_format = binary_format_tmp;
}
}
return binary;
}
std::string GL4Shader::GetHostDisasmNV(const std::vector<uint8_t>& binary) {
// If we are on nvidia, we can find the disassembly string.
// I haven't been able to figure out from the format how to do this
// without a search like this.
std::string disasm;
const char* disasm_start = nullptr;
size_t search_offset = 0;
const char* search_start = reinterpret_cast<const char*>(binary.data());
while (true) {
auto p = reinterpret_cast<const char*>(memchr(
binary.data() + search_offset, '!', binary.size() - search_offset));
if (!p) {
break;
}
if (p[0] == '!' && p[1] == '!' && p[2] == 'N' && p[3] == 'V') {
disasm_start = p;
break;
}
search_offset = p - search_start;
++search_offset;
}
if (disasm_start) {
disasm = std::string(disasm_start);
} else {
disasm = std::string("Shader disassembly not available.");
}
return disasm;
}
} // namespace gl4
} // namespace gpu
} // namespace xe

View File

@ -1,54 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_GL4_GL4_SHADER_H_
#define XENIA_GPU_GL4_GL4_SHADER_H_
#include <string>
#include "xenia/gpu/shader.h"
#include "xenia/ui/gl/gl_context.h"
namespace xe {
namespace gpu {
namespace gl4 {
class GL4Shader : public Shader {
public:
GL4Shader(ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count);
~GL4Shader() override;
GLuint program() const { return program_; }
GLuint shader() const { return shader_; }
GLuint vao() const { return vao_; }
bool Prepare();
bool LoadFromBinary(const uint8_t* blob, GLenum binary_format, size_t length);
std::vector<uint8_t> GetBinary(GLenum* binary_format = nullptr);
protected:
bool PrepareVertexArrayObject();
bool CompileShader();
bool LinkProgram();
std::string GetShaderInfoLog();
std::string GetProgramInfoLog();
static std::string GetHostDisasmNV(const std::vector<uint8_t>& binary);
GLuint program_ = 0;
GLuint shader_ = 0;
GLuint vao_ = 0;
};
} // namespace gl4
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_GL4_GL4_SHADER_H_

View File

@ -1,187 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/gl4/gl4_shader_cache.h"
#include <cinttypes>
#include "xenia/base/filesystem.h"
#include "xenia/base/logging.h"
#include "xenia/base/mapped_memory.h"
#include "xenia/gpu/gl4/gl4_gpu_flags.h"
#include "xenia/gpu/gl4/gl4_shader.h"
#include "xenia/gpu/glsl_shader_translator.h"
#include "xenia/gpu/gpu_flags.h"
#include "third_party/xxhash/xxhash.h"
namespace xe {
namespace gpu {
namespace gl4 {
GL4ShaderCache::GL4ShaderCache(GlslShaderTranslator* shader_translator)
: shader_translator_(shader_translator) {}
GL4ShaderCache::~GL4ShaderCache() {}
void GL4ShaderCache::Reset() {
shader_map_.clear();
all_shaders_.clear();
}
GL4Shader* GL4ShaderCache::LookupOrInsertShader(ShaderType shader_type,
const uint32_t* dwords,
uint32_t dword_count) {
// Hash the input memory and lookup the shader.
GL4Shader* shader_ptr = nullptr;
uint64_t hash = XXH64(dwords, dword_count * sizeof(uint32_t), 0);
auto it = shader_map_.find(hash);
if (it != shader_map_.end()) {
// Shader has been previously loaded.
// TODO(benvanik): compare bytes? Likelihood of collision is low.
shader_ptr = it->second;
} else {
// Check filesystem cache.
shader_ptr = FindCachedShader(shader_type, hash, dwords, dword_count);
if (shader_ptr) {
// Found!
XELOGGPU("Loaded %s shader from cache (hash: %.16" PRIX64 ")",
shader_type == ShaderType::kVertex ? "vertex" : "pixel", hash);
return shader_ptr;
}
// Not found in cache - load from scratch.
auto shader =
std::make_unique<GL4Shader>(shader_type, hash, dwords, dword_count);
shader_ptr = shader.get();
shader_map_.insert({hash, shader_ptr});
all_shaders_.emplace_back(std::move(shader));
// Perform translation.
// If this fails the shader will be marked as invalid and ignored later.
if (shader_translator_->Translate(shader_ptr)) {
shader_ptr->Prepare();
if (shader_ptr->is_valid()) {
CacheShader(shader_ptr);
XELOGGPU("Generated %s shader at 0x%.16" PRIX64 " (%db):\n%s",
shader_type == ShaderType::kVertex ? "vertex" : "pixel",
dwords, dword_count * 4,
shader_ptr->ucode_disassembly().c_str());
}
// Dump shader files if desired.
if (!FLAGS_dump_shaders.empty()) {
shader_ptr->Dump(FLAGS_dump_shaders, "gl4");
}
} else {
XELOGE("Shader failed translation");
}
}
return shader_ptr;
}
void GL4ShaderCache::CacheShader(GL4Shader* shader) {
if (FLAGS_shader_cache_dir.empty()) {
// Cache disabled.
return;
}
GLenum binary_format = 0;
auto binary = shader->GetBinary(&binary_format);
if (binary.size() == 0) {
// No binary returned.
return;
}
auto cache_dir = xe::to_absolute_path(xe::to_wstring(FLAGS_shader_cache_dir));
xe::filesystem::CreateFolder(cache_dir);
auto filename =
cache_dir + xe::format_string(
L"%.16" PRIX64 ".%s", shader->ucode_data_hash(),
shader->type() == ShaderType::kPixel ? L"frag" : L"vert");
auto file = xe::filesystem::OpenFile(filename, "wb");
if (!file) {
// Not fatal, but not too good.
return;
}
std::vector<uint8_t> cached_shader_mem;
// Resize this vector to the final filesize (- 1 to account for dummy array
// in CachedShader)
cached_shader_mem.resize(sizeof(CachedShader) + binary.size() - 1);
auto cached_shader =
reinterpret_cast<CachedShader*>(cached_shader_mem.data());
cached_shader->magic = xe::byte_swap('XSHD');
cached_shader->version = 0; // TODO
cached_shader->shader_type = uint8_t(shader->type());
cached_shader->binary_len = uint32_t(binary.size());
cached_shader->binary_format = binary_format;
std::memcpy(cached_shader->binary, binary.data(), binary.size());
fwrite(cached_shader_mem.data(), cached_shader_mem.size(), 1, file);
fclose(file);
}
GL4Shader* GL4ShaderCache::FindCachedShader(ShaderType shader_type,
uint64_t hash,
const uint32_t* dwords,
uint32_t dword_count) {
if (FLAGS_shader_cache_dir.empty()) {
// Cache disabled.
return nullptr;
}
auto cache_dir = xe::to_absolute_path(xe::to_wstring(FLAGS_shader_cache_dir));
auto filename =
cache_dir +
xe::format_string(L"%.16" PRIX64 ".%s", hash,
shader_type == ShaderType::kPixel ? L"frag" : L"vert");
if (!xe::filesystem::PathExists(filename)) {
return nullptr;
}
// Shader is cached. Open it up.
auto map = xe::MappedMemory::Open(filename, MappedMemory::Mode::kRead);
if (!map) {
// Should not fail
assert_always();
return nullptr;
}
auto cached_shader = reinterpret_cast<CachedShader*>(map->data());
// TODO: Compare versions
if (cached_shader->magic != xe::byte_swap('XSHD')) {
return nullptr;
}
auto shader =
std::make_unique<GL4Shader>(shader_type, hash, dwords, dword_count);
// Gather the binding points.
// TODO: Make Shader do this on construction.
// TODO: Regenerate microcode disasm/etc on load.
shader_translator_->GatherAllBindingInformation(shader.get());
if (!shader->LoadFromBinary(cached_shader->binary,
cached_shader->binary_format,
cached_shader->binary_len)) {
// Failed to load from binary.
return nullptr;
}
auto shader_ptr = shader.get();
shader_map_.insert({hash, shader_ptr});
all_shaders_.emplace_back(std::move(shader));
return shader_ptr;
}
} // namespace gl4
} // namespace gpu
} // namespace xe

View File

@ -1,62 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_GL4_SHADER_CACHE_H_
#define XENIA_GPU_GL4_SHADER_CACHE_H_
#include <cstdint>
#include <cstring>
#include <memory>
#include <unordered_map>
#include <vector>
#include "xenia/gpu/xenos.h"
namespace xe {
namespace gpu {
class GlslShaderTranslator;
namespace gl4 {
class GL4Shader;
class GL4ShaderCache {
public:
GL4ShaderCache(GlslShaderTranslator* shader_translator);
~GL4ShaderCache();
void Reset();
GL4Shader* LookupOrInsertShader(ShaderType shader_type,
const uint32_t* dwords, uint32_t dword_count);
private:
// Cached shader file format.
struct CachedShader {
uint32_t magic;
uint32_t version; // Version of the shader translator used.
uint8_t shader_type; // ShaderType enum
uint32_t binary_len; // Code length
uint32_t binary_format; // Binary format (from OpenGL)
uint8_t binary[1]; // Code
};
void CacheShader(GL4Shader* shader);
GL4Shader* FindCachedShader(ShaderType shader_type, uint64_t hash,
const uint32_t* dwords, uint32_t dword_count);
GlslShaderTranslator* shader_translator_ = nullptr;
std::vector<std::unique_ptr<GL4Shader>> all_shaders_;
std::unordered_map<uint64_t, GL4Shader*> shader_map_;
};
} // namespace gl4
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_GL4_SHADER_CACHE_H_

View File

@ -1,109 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/base/logging.h"
#include "xenia/base/main.h"
#include "xenia/gpu/gl4/gl4_command_processor.h"
#include "xenia/gpu/gl4/gl4_graphics_system.h"
#include "xenia/gpu/trace_viewer.h"
namespace xe {
namespace gpu {
namespace gl4 {
using namespace xe::gpu::xenos;
class GL4TraceViewer : public TraceViewer {
public:
std::unique_ptr<gpu::GraphicsSystem> CreateGraphicsSystem() override {
return std::unique_ptr<gpu::GraphicsSystem>(new GL4GraphicsSystem());
}
uintptr_t GetColorRenderTarget(uint32_t pitch, MsaaSamples samples,
uint32_t base,
ColorRenderTargetFormat format) override {
auto command_processor = static_cast<GL4CommandProcessor*>(
graphics_system_->command_processor());
return command_processor->GetColorRenderTarget(pitch, samples, base,
format);
}
uintptr_t GetDepthRenderTarget(uint32_t pitch, MsaaSamples samples,
uint32_t base,
DepthRenderTargetFormat format) override {
auto command_processor = static_cast<GL4CommandProcessor*>(
graphics_system_->command_processor());
return command_processor->GetDepthRenderTarget(pitch, samples, base,
format);
}
uintptr_t GetTextureEntry(const TextureInfo& texture_info,
const SamplerInfo& sampler_info) override {
auto command_processor = static_cast<GL4CommandProcessor*>(
graphics_system_->command_processor());
auto entry_view =
command_processor->texture_cache()->Demand(texture_info, sampler_info);
if (!entry_view) {
return 0;
}
auto texture = entry_view->texture;
return static_cast<uintptr_t>(texture->handle);
}
size_t QueryVSOutputSize() override {
auto command_processor = static_cast<GL4CommandProcessor*>(
graphics_system_->command_processor());
auto draw_batcher = command_processor->draw_batcher();
return draw_batcher->QueryTFBSize();
}
size_t QueryVSOutputElementSize() override {
// vec4 always has 4 elements.
return 4;
}
bool QueryVSOutput(void* buffer, size_t size) override {
auto command_processor = static_cast<GL4CommandProcessor*>(
graphics_system_->command_processor());
auto draw_batcher = command_processor->draw_batcher();
return draw_batcher->ReadbackTFB(buffer, size);
}
bool Setup() override {
if (!TraceViewer::Setup()) {
return false;
}
// Enable TFB
auto command_processor = static_cast<GL4CommandProcessor*>(
graphics_system_->command_processor());
auto draw_batcher = command_processor->draw_batcher();
draw_batcher->set_tfb_enabled(true);
return true;
}
private:
};
int trace_viewer_main(const std::vector<std::wstring>& args) {
GL4TraceViewer trace_viewer;
return trace_viewer.Main(args);
}
} // namespace gl4
} // namespace gpu
} // namespace xe
DEFINE_ENTRY_POINT(L"xenia-gpu-gl4-trace-viewer",
L"xenia-gpu-gl4-trace-viewer some.trace",
xe::gpu::gl4::trace_viewer_main);

View File

@ -1,88 +0,0 @@
project_root = "../../../.."
include(project_root.."/tools/build")
group("src")
project("xenia-gpu-gl4")
uuid("da10149d-efb0-44aa-924c-a76a46e1f04d")
kind("StaticLib")
language("C++")
links({
"glew",
"xenia-base",
"xenia-gpu",
"xenia-ui",
"xenia-ui-gl",
"xxhash",
})
defines({
"GLEW_STATIC=1",
"GLEW_MX=1",
})
includedirs({
project_root.."/third_party/gflags/src",
})
local_platform_files()
-- TODO(benvanik): kill this and move to the debugger UI.
group("src")
project("xenia-gpu-gl4-trace-viewer")
uuid("450f965b-a019-4ba5-bc6f-99901e5a4c8d")
kind("WindowedApp")
language("C++")
links({
"capstone",
"gflags",
"glew",
"imgui",
"libavcodec",
"libavutil",
"snappy",
"xenia-apu",
"xenia-apu-nop",
"xenia-base",
"xenia-core",
"xenia-cpu",
"xenia-cpu-backend-x64",
"xenia-gpu",
"xenia-gpu-gl4",
"xenia-hid",
"xenia-hid-nop",
"xenia-kernel",
"xenia-ui",
"xenia-ui-gl",
"xenia-vfs",
"xxhash",
})
flags({
"WinMain", -- Use WinMain instead of main.
})
defines({
"GLEW_STATIC=1",
"GLEW_MX=1",
})
includedirs({
project_root.."/third_party/gflags/src",
})
files({
"gl4_trace_viewer_main.cc",
"../../base/main_"..platform_suffix..".cc",
})
filter("platforms:Windows")
links({
"xenia-apu-xaudio2",
"xenia-hid-winkey",
"xenia-hid-xinput",
})
-- Only create the .user file if it doesn't already exist.
local user_file = project_root.."/build/xenia-gpu-gl4-trace-viewer.vcxproj.user"
if not os.isfile(user_file) then
debugdir(project_root)
debugargs({
"--flagfile=scratch/flags.txt",
"2>&1",
"1>scratch/stdout-trace-viewer.txt",
})
end

File diff suppressed because it is too large Load Diff

View File

@ -1,119 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_GL4_TEXTURE_CACHE_H_
#define XENIA_GPU_GL4_TEXTURE_CACHE_H_
#include <mutex>
#include <unordered_map>
#include <vector>
#include "xenia/gpu/sampler_info.h"
#include "xenia/gpu/texture_info.h"
#include "xenia/memory.h"
#include "xenia/ui/gl/blitter.h"
#include "xenia/ui/gl/circular_buffer.h"
#include "xenia/ui/gl/gl_context.h"
namespace xe {
namespace gpu {
namespace gl4 {
using xe::ui::gl::Blitter;
using xe::ui::gl::CircularBuffer;
using xe::ui::gl::Rect2D;
class TextureCache {
public:
struct TextureEntry;
struct SamplerEntry {
SamplerInfo sampler_info;
GLuint handle;
};
struct TextureEntryView {
TextureEntry* texture;
SamplerEntry* sampler;
uint64_t sampler_hash;
GLuint64 texture_sampler_handle;
};
struct TextureEntry {
TextureInfo texture_info;
uintptr_t access_watch_handle;
GLuint handle;
bool pending_invalidation;
std::vector<std::unique_ptr<TextureEntryView>> views;
};
TextureCache();
~TextureCache();
bool Initialize(Memory* memory, CircularBuffer* scratch_buffer);
void Shutdown();
void Scavenge();
void Clear();
void EvictAllTextures();
TextureEntryView* Demand(const TextureInfo& texture_info,
const SamplerInfo& sampler_info);
GLuint CopyTexture(Blitter* blitter, uint32_t guest_address,
uint32_t logical_width, uint32_t logical_height,
uint32_t block_width, uint32_t block_height,
TextureFormat format, bool swap_channels,
GLuint src_texture, Rect2D src_rect, Rect2D dest_rect);
GLuint ConvertTexture(Blitter* blitter, uint32_t guest_address,
uint32_t logical_width, uint32_t logical_height,
uint32_t block_width, uint32_t block_height,
TextureFormat format, bool swap_channels,
GLuint src_texture, Rect2D src_rect, Rect2D dest_rect);
TextureEntry* LookupAddress(uint32_t guest_address, uint32_t width,
uint32_t height, TextureFormat format);
private:
struct ReadBufferTexture {
uintptr_t access_watch_handle;
uint32_t guest_address;
uint32_t logical_width;
uint32_t logical_height;
uint32_t block_width;
uint32_t block_height;
TextureFormat format;
GLuint handle;
};
SamplerEntry* LookupOrInsertSampler(const SamplerInfo& sampler_info,
uint64_t opt_hash = 0);
void EvictSampler(SamplerEntry* entry);
TextureEntry* LookupOrInsertTexture(const TextureInfo& texture_info,
uint64_t opt_hash = 0);
void EvictTexture(TextureEntry* entry);
bool UploadTexture1D(GLuint texture, const TextureInfo& texture_info);
bool UploadTexture2D(GLuint texture, const TextureInfo& texture_info);
bool UploadTextureCube(GLuint texture, const TextureInfo& texture_info);
Memory* memory_;
CircularBuffer* scratch_buffer_;
std::unordered_map<uint64_t, SamplerEntry*> sampler_entries_;
std::unordered_map<uint64_t, TextureEntry*> texture_entries_;
std::vector<ReadBufferTexture*> read_buffer_textures_;
std::mutex invalidated_textures_mutex_;
std::vector<TextureEntry*>* invalidated_textures_;
std::vector<TextureEntry*> invalidated_textures_sets_[2];
};
} // namespace gl4
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_GL4_TEXTURE_CACHE_H_

View File

@ -959,6 +959,7 @@ void GlslShaderTranslator::EmitStoreResult(const InstructionResult& result,
case InstructionStorageTarget::kDepth:
EmitSourceDepth("gl_FragDepth");
break;
default:
case InstructionStorageTarget::kNone:
return;
}

View File

@ -170,7 +170,7 @@ uint32_t GraphicsSystem::ReadRegister(uint32_t addr) {
uint32_t r = (addr & 0xFFFF) / 4;
switch (r) {
case 0x0F00: // ?
case 0x0F00: // RB_EDRAM_TIMING
return 0x08100748;
case 0x0F01: // RB_BC_CONTROL
return 0x0000200E;
@ -211,7 +211,7 @@ void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) {
}
void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t log2_size) {
command_processor_->InitializeRingBuffer(ptr, (log2_size | 0x2) + 1);
command_processor_->InitializeRingBuffer(ptr, log2_size + 0x3);
}
void GraphicsSystem::EnableReadPointerWriteBack(uint32_t ptr,

View File

@ -44,6 +44,8 @@ bool SamplerInfo::Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
out_info->border_color = static_cast<BorderColor>(fetch.border_color);
out_info->lod_bias = (fetch.lod_bias) / 32.f;
out_info->mip_min_level = fetch.mip_min_level;
out_info->mip_max_level = fetch.mip_max_level;
return true;
}

View File

@ -26,6 +26,8 @@ struct SamplerInfo {
AnisoFilter aniso_filter;
BorderColor border_color;
float lod_bias;
uint32_t mip_min_level;
uint32_t mip_max_level;
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
const ParsedTextureFetchInstruction& fetch_instr,
@ -36,7 +38,9 @@ struct SamplerInfo {
return min_filter == other.min_filter && mag_filter == other.mag_filter &&
mip_filter == other.mip_filter && clamp_u == other.clamp_u &&
clamp_v == other.clamp_v && clamp_w == other.clamp_w &&
aniso_filter == other.aniso_filter;
aniso_filter == other.aniso_filter && lod_bias == other.lod_bias &&
mip_min_level == other.mip_min_level &&
mip_max_level == other.mip_max_level;
}
};

View File

@ -31,6 +31,11 @@ enum class InstructionStorageTarget {
kPosition,
// Result is stored to the point size export (gl_PointSize).
kPointSize,
// Result is stored as memexport destination address.
// [physical >> 2, ??, ??, ??]
kExportAddress,
// Result is stored to memexport destination data.
kExportData,
// Result is stored to a color target export indexed by storage_index [0-3].
kColorTarget,
// Result is stored to the depth export (gl_FragDepth).

View File

@ -1147,14 +1147,15 @@ void ShaderTranslator::ParseAluVectorInstruction(
} else if (is_vertex_shader()) {
switch (dest_num) {
case 32:
i.result.storage_target = InstructionStorageTarget::kExportAddress;
break;
case 33:
case 34:
case 35:
case 36:
case 37:
// TODO: Memexport registers
i.result.storage_target = InstructionStorageTarget::kNone;
i.result.storage_index = 0;
i.result.storage_index = dest_num - 33;
i.result.storage_target = InstructionStorageTarget::kExportData;
break;
case 62:
i.result.storage_target = InstructionStorageTarget::kPosition;
@ -1198,14 +1199,15 @@ void ShaderTranslator::ParseAluVectorInstruction(
i.result.storage_index = 3;
break;
case 32:
i.result.storage_target = InstructionStorageTarget::kExportAddress;
break;
case 33:
case 34:
case 35:
case 36:
case 37:
// TODO: Memexport registers
i.result.storage_target = InstructionStorageTarget::kNone;
i.result.storage_index = 0;
i.result.storage_index = dest_num - 33;
i.result.storage_target = InstructionStorageTarget::kExportData;
break;
case 61:
i.result.storage_target = InstructionStorageTarget::kDepth;
@ -1303,6 +1305,17 @@ void ShaderTranslator::ParseAluScalarInstruction(
: InstructionStorageAddressingMode::kStatic;
} else if (is_vertex_shader()) {
switch (dest_num) {
case 32:
i.result.storage_target = InstructionStorageTarget::kExportAddress;
break;
case 33:
case 34:
case 35:
case 36:
case 37:
i.result.storage_index = dest_num - 33;
i.result.storage_target = InstructionStorageTarget::kExportData;
break;
case 62:
i.result.storage_target = InstructionStorageTarget::kPosition;
break;
@ -1344,6 +1357,17 @@ void ShaderTranslator::ParseAluScalarInstruction(
i.result.storage_target = InstructionStorageTarget::kColorTarget;
i.result.storage_index = 3;
break;
case 32:
i.result.storage_target = InstructionStorageTarget::kExportAddress;
break;
case 33:
case 34:
case 35:
case 36:
case 37:
i.result.storage_index = dest_num - 33;
i.result.storage_target = InstructionStorageTarget::kExportData;
break;
case 61:
i.result.storage_target = InstructionStorageTarget::kDepth;
break;

View File

@ -38,6 +38,13 @@ void DisassembleResultOperand(const InstructionResult& result,
case InstructionStorageTarget::kPointSize:
out->Append("oPts");
break;
case InstructionStorageTarget::kExportAddress:
out->Append("eA");
break;
case InstructionStorageTarget::kExportData:
out->Append("eM");
uses_storage_index = true;
break;
case InstructionStorageTarget::kColorTarget:
out->AppendFormat("oC");
uses_storage_index = true;

View File

@ -67,6 +67,7 @@ void SpirvShaderTranslator::StartTranslation() {
vec2_int_type_ = b.makeVectorType(int_type_, 2);
vec2_uint_type_ = b.makeVectorType(uint_type_, 2);
vec2_float_type_ = b.makeVectorType(float_type_, 2);
vec3_int_type_ = b.makeVectorType(int_type_, 3);
vec3_float_type_ = b.makeVectorType(float_type_, 3);
vec4_float_type_ = b.makeVectorType(float_type_, 4);
vec4_int_type_ = b.makeVectorType(int_type_, 4);
@ -482,8 +483,10 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
mainFn, "main");
b.addExecutionMode(mainFn, spv::ExecutionModeOriginUpperLeft);
// FIXME(DrChat): We need to declare the DepthReplacing execution mode if
// we write depth, and we must unconditionally write depth if declared!
// If we write a new depth value, we must declare this mode!
if (writes_depth_) {
b.addExecutionMode(mainFn, spv::ExecutionModeDepthReplacing);
}
for (auto id : interface_ids_) {
entry->addIdOperand(id);
@ -527,10 +530,18 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
// Reinsert w
p = b.createCompositeInsert(p_w, p, vec4_float_type_, 3);
// Apply window offset
// pos.xy += window_scale.zw
auto window_offset = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_,
{window_scale, window_scale, 2, 3, 0, 1});
auto p_offset =
b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, p, window_offset);
// Apply window scaling
// pos.xy *= window_scale.xy
auto p_scaled =
b.createBinOp(spv::Op::OpFMul, vec4_float_type_, p, window_scale);
auto p_scaled = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, p_offset,
window_scale);
p = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_,
{p, p_scaled, 4, 5, 2, 3});
@ -608,6 +619,7 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
// Cleanup builder.
cf_blocks_.clear();
writes_depth_ = false;
loop_head_block_ = nullptr;
loop_body_block_ = nullptr;
loop_cont_block_ = nullptr;
@ -1786,60 +1798,10 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
tex_[dim_idx], std::vector<Id>({texture_index}));
auto texture = b.createLoad(texture_ptr);
spv::Id size = 0;
if (instr.attributes.offset_x || instr.attributes.offset_y) {
auto image =
b.createUnaryOp(spv::OpImage, b.getImageType(texture), texture);
spv::Builder::TextureParameters params;
std::memset(&params, 0, sizeof(params));
params.sampler = image;
params.lod = b.makeIntConstant(0);
size = b.createTextureQueryCall(spv::Op::OpImageQuerySizeLod, params,
false);
if (instr.dimension == TextureDimension::k1D) {
size = b.createUnaryOp(spv::Op::OpConvertSToF, float_type_, size);
} else if (instr.dimension == TextureDimension::k2D) {
size =
b.createUnaryOp(spv::Op::OpConvertSToF, vec2_float_type_, size);
} else if (instr.dimension == TextureDimension::k3D) {
size =
b.createUnaryOp(spv::Op::OpConvertSToF, vec3_float_type_, size);
} else if (instr.dimension == TextureDimension::kCube) {
size =
b.createUnaryOp(spv::Op::OpConvertSToF, vec4_float_type_, size);
}
}
if (instr.dimension == TextureDimension::k1D) {
src = b.createCompositeExtract(src, float_type_, 0);
if (instr.attributes.offset_x) {
auto offset = b.makeFloatConstant(instr.attributes.offset_x + 0.5f);
offset = b.createBinOp(spv::Op::OpFDiv, float_type_, offset, size);
src = b.createBinOp(spv::Op::OpFAdd, float_type_, src, offset);
}
// https://msdn.microsoft.com/en-us/library/windows/desktop/bb944006.aspx
// "Because the runtime does not support 1D textures, the compiler will
// use a 2D texture with the knowledge that the y-coordinate is
// unimportant."
src = b.createCompositeConstruct(
vec2_float_type_,
std::vector<Id>({src, b.makeFloatConstant(0.0f)}));
} else if (instr.dimension == TextureDimension::k2D) {
src = b.createRvalueSwizzle(spv::NoPrecision, vec2_float_type_, src,
std::vector<uint32_t>({0, 1}));
if (instr.attributes.offset_x || instr.attributes.offset_y) {
auto offset = b.makeCompositeConstant(
vec2_float_type_,
std::vector<Id>(
{b.makeFloatConstant(instr.attributes.offset_x + 0.5f),
b.makeFloatConstant(instr.attributes.offset_y + 0.5f)}));
offset =
b.createBinOp(spv::Op::OpFDiv, vec2_float_type_, offset, size);
src = b.createBinOp(spv::Op::OpFAdd, vec2_float_type_, src, offset);
}
// Upgrade 1D src coordinate into 2D
src = b.createCompositeConstruct(vec2_float_type_,
{src, b.makeFloatConstant(0.f)});
}
spv::Builder::TextureParameters params = {0};
@ -1848,6 +1810,50 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
if (instr.attributes.use_register_lod) {
params.lod = b.createLoad(lod_);
}
if (instr.attributes.offset_x || instr.attributes.offset_y ||
instr.attributes.offset_z) {
float offset_x = instr.attributes.offset_x;
float offset_y = instr.attributes.offset_y;
float offset_z = instr.attributes.offset_z;
// Round numbers away from zero. No effect if offset is 0.
offset_x += instr.attributes.offset_x < 0 ? -0.5f : 0.5f;
offset_y += instr.attributes.offset_y < 0 ? -0.5f : 0.5f;
offset_z += instr.attributes.offset_z < 0 ? -0.5f : 0.5f;
Id offset = 0;
switch (instr.dimension) {
case TextureDimension::k1D: {
// https://msdn.microsoft.com/en-us/library/windows/desktop/bb944006.aspx
// "Because the runtime does not support 1D textures, the compiler
// will use a 2D texture with the knowledge that the y-coordinate is
// unimportant."
offset = b.makeCompositeConstant(
vec2_int_type_,
{b.makeIntConstant(int(offset_x)), b.makeIntConstant(0)});
} break;
case TextureDimension::k2D: {
offset = b.makeCompositeConstant(
vec2_int_type_, {b.makeIntConstant(int(offset_x)),
b.makeIntConstant(int(offset_y))});
} break;
case TextureDimension::k3D: {
offset = b.makeCompositeConstant(
vec3_int_type_, {b.makeIntConstant(int(offset_x)),
b.makeIntConstant(int(offset_y)),
b.makeIntConstant(int(offset_z))});
} break;
case TextureDimension::kCube: {
// FIXME(DrChat): Is this the correct dimension? I forget
offset = b.makeCompositeConstant(
vec3_int_type_, {b.makeIntConstant(int(offset_x)),
b.makeIntConstant(int(offset_y)),
b.makeIntConstant(int(offset_z))});
} break;
}
params.offset = offset;
}
dest =
b.createTextureCall(spv::NoPrecision, vec4_float_type_, false, false,
@ -1908,11 +1914,39 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
}
} break;
case FetchOpcode::kGetTextureComputedLod: {
// TODO(DrChat): Verify if this implementation is correct.
// This is only valid in pixel shaders.
assert_true(is_pixel_shader());
auto texture_index =
b.makeUintConstant(tex_binding_map_[instr.operands[1].storage_index]);
auto texture_ptr =
b.createAccessChain(spv::StorageClass::StorageClassUniformConstant,
tex_[dim_idx], std::vector<Id>({texture_index}));
auto texture = b.createLoad(texture_ptr);
if (instr.dimension == TextureDimension::k1D) {
// Upgrade 1D src coordinate into 2D
src = b.createCompositeConstruct(vec2_float_type_,
{src, b.makeFloatConstant(0.f)});
}
spv::Builder::TextureParameters params = {};
params.sampler = texture;
params.coords = src;
auto lod =
b.createTextureQueryCall(spv::Op::OpImageQueryLod, params, false);
dest = b.createCompositeExtract(lod, float_type_, 1);
dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_);
} break;
case FetchOpcode::kSetTextureLod: {
// <lod register> = src1.x (MIP level)
// ... immediately after
// tfetch UseRegisterLOD=true
b.createStore(b.createCompositeExtract(src, float_type_, 0), lod_);
b.createStore(src, lod_);
} break;
default:
@ -2210,8 +2244,8 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
auto src1_y = b.createCompositeExtract(sources[1], float_type_, 1);
auto dst_y = b.createBinOp(spv::Op::OpFMul, float_type_, src0_y, src1_y);
auto src0_z = b.createCompositeExtract(sources[0], float_type_, 3);
auto src1_w = b.createCompositeExtract(sources[1], float_type_, 4);
auto src0_z = b.createCompositeExtract(sources[0], float_type_, 2);
auto src1_w = b.createCompositeExtract(sources[1], float_type_, 3);
dest = b.createCompositeConstruct(
vec4_float_type_,
std::vector<Id>({b.makeFloatConstant(1.f), dst_y, src0_z, src1_w}));
@ -3267,7 +3301,9 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
storage_type = float_type_;
storage_offsets.push_back(0);
storage_array = false;
writes_depth_ = true;
break;
default:
case InstructionStorageTarget::kNone:
assert_unhandled_case(result.storage_target);
break;

View File

@ -29,7 +29,7 @@ namespace gpu {
// supported size).
struct SpirvPushConstants {
// Accessible to vertex shader only:
float window_scale[4]; // scale x/y, viewport width/height (pixels)
float window_scale[4]; // scale x/y, offset x/y (pixels)
float vtx_fmt[4];
// Accessible to geometry shader only:
@ -132,7 +132,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
// Types.
spv::Id float_type_ = 0, bool_type_ = 0, int_type_ = 0, uint_type_ = 0;
spv::Id vec2_int_type_ = 0, vec2_uint_type_ = 0;
spv::Id vec2_int_type_ = 0, vec2_uint_type_ = 0, vec3_int_type_ = 0;
spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0;
spv::Id vec4_int_type_ = 0, vec4_uint_type_ = 0;
spv::Id vec2_bool_type_ = 0, vec3_bool_type_ = 0, vec4_bool_type_ = 0;
@ -163,6 +163,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
spv::Id vtx_ = 0; // Vertex buffer array (32 runtime arrays)
std::unordered_map<uint32_t, uint32_t> vtx_binding_map_;
bool writes_depth_ = false;
// SPIR-V IDs that are part of the in/out interface.
std::vector<spv::Id> interface_ids_;

View File

@ -15,6 +15,7 @@
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "third_party/xxhash/xxhash.h"
@ -59,6 +60,8 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
info.endianness = static_cast<Endian>(fetch.endianness);
info.is_tiled = fetch.tiled;
info.has_packed_mips = fetch.packed_mips;
info.mip_address = fetch.mip_address << 12;
info.mip_levels = fetch.packed_mips ? fetch.mip_max_level + 1 : 1;
info.input_length = 0; // Populated below.
if (info.format_info()->format == TextureFormat::kUnknown) {
@ -70,15 +73,16 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
// Must be called here when we know the format.
switch (info.dimension) {
case Dimension::k1D: {
assert_always();
info.CalculateTextureSizes1D(fetch.size_1d.width + 1);
} break;
case Dimension::k2D: {
info.CalculateTextureSizes2D(fetch.size_2d.width + 1,
fetch.size_2d.height + 1);
} break;
case Dimension::k3D: {
// TODO(benvanik): calculate size.
return false;
info.CalculateTextureSizes3D(fetch.size_3d.width + 1,
fetch.size_3d.height + 1,
fetch.size_3d.depth + 1);
}
case Dimension::kCube: {
info.CalculateTextureSizesCube(fetch.size_stack.width + 1,
@ -106,6 +110,8 @@ bool TextureInfo::PrepareResolve(uint32_t physical_address,
info.endianness = endian;
info.is_tiled = true;
info.has_packed_mips = false;
info.mip_address = 0;
info.mip_levels = 1;
info.input_length = 0;
if (info.format_info()->format == TextureFormat::kUnknown) {
@ -117,14 +123,46 @@ bool TextureInfo::PrepareResolve(uint32_t physical_address,
return true;
}
void TextureInfo::CalculateTextureSizes1D(uint32_t width) {
size_1d.logical_width = width;
auto format = format_info();
// width in blocks.
uint32_t block_width =
xe::round_up(size_1d.logical_width, format->block_width) /
format->block_width;
if (is_tiled) {
// If the texture is tiled, its dimensions must be a multiple of tile
// dimensions (32x32 blocks).
size_1d.block_width = xe::round_up(block_width, 32);
} else {
size_1d.block_width = block_width;
}
uint32_t bytes_per_block = format->block_width * format->bits_per_pixel / 8;
uint32_t byte_pitch = size_1d.block_width * bytes_per_block;
uint32_t texel_width;
if (!is_tiled) {
// Each row must be a multiple of 256 in linear textures.
byte_pitch = xe::round_up(byte_pitch, 256);
texel_width = (byte_pitch / bytes_per_block) * format->block_width;
} else {
texel_width = size_2d.block_width * format->block_width;
}
size_1d.input_width = texel_width;
size_1d.input_pitch = byte_pitch;
input_length = size_1d.input_pitch;
}
void TextureInfo::CalculateTextureSizes2D(uint32_t width, uint32_t height) {
size_2d.logical_width = width;
size_2d.logical_height = height;
// Here be dragons. The values here are used in texture_cache.cc to copy
// images and create GL textures. Changes here will impact that code.
// TODO(benvanik): generic texture copying utility.
auto format = format_info();
// w/h in blocks.
@ -135,11 +173,15 @@ void TextureInfo::CalculateTextureSizes2D(uint32_t width, uint32_t height) {
xe::round_up(size_2d.logical_height, format->block_height) /
format->block_height;
// Tiles are 32x32 blocks. The pitch of all textures must a multiple of tile
// dimensions.
uint32_t tile_width = xe::round_up(block_width, 32) / 32;
size_2d.block_width = tile_width * 32;
size_2d.block_height = block_height;
if (is_tiled) {
// If the texture is tiled, its dimensions must be a multiple of tile
// dimensions (32x32 blocks).
size_2d.block_width = xe::round_up(block_width, 32);
size_2d.block_height = xe::round_up(block_height, 32);
} else {
size_2d.block_width = block_width;
size_2d.block_height = block_height;
}
uint32_t bytes_per_block =
format->block_width * format->block_height * format->bits_per_pixel / 8;
@ -161,6 +203,52 @@ void TextureInfo::CalculateTextureSizes2D(uint32_t width, uint32_t height) {
input_length = size_2d.input_pitch * size_2d.block_height;
}
void TextureInfo::CalculateTextureSizes3D(uint32_t width, uint32_t height,
uint32_t depth) {
size_3d.logical_width = width;
size_3d.logical_height = height;
auto format = format_info();
// w/h in blocks must be a multiple of block size.
uint32_t block_width =
xe::round_up(size_3d.logical_width, format->block_width) /
format->block_width;
uint32_t block_height =
xe::round_up(size_3d.logical_height, format->block_height) /
format->block_height;
if (is_tiled) {
// If the texture is tiled, its dimensions must be a multiple of tile
// dimensions (32x32 blocks).
size_3d.block_width = xe::round_up(block_width, 32);
size_3d.block_height = xe::round_up(block_height, 32);
} else {
size_3d.block_width = block_width;
size_3d.block_height = block_height;
}
uint32_t bytes_per_block =
format->block_width * format->block_height * format->bits_per_pixel / 8;
uint32_t byte_pitch = size_3d.block_width * bytes_per_block;
uint32_t texel_width;
if (!is_tiled) {
// Each row must be a multiple of 256 in linear textures.
byte_pitch = xe::round_up(byte_pitch, 256);
texel_width = (byte_pitch / bytes_per_block) * format->block_width;
} else {
texel_width = size_3d.block_width * format->block_width;
}
size_3d.input_width = texel_width;
size_3d.input_height = size_3d.block_height * format->block_height;
size_3d.input_pitch = byte_pitch;
size_3d.input_face_length = size_3d.input_pitch * size_3d.block_height;
input_length = size_3d.input_face_length * depth;
}
void TextureInfo::CalculateTextureSizesCube(uint32_t width, uint32_t height,
uint32_t depth) {
assert_true(depth == 6);
@ -177,11 +265,15 @@ void TextureInfo::CalculateTextureSizesCube(uint32_t width, uint32_t height,
xe::round_up(size_cube.logical_height, format->block_height) /
format->block_height;
// Tiles are 32x32 blocks. All textures must be multiples of tile dimensions.
uint32_t tile_width = xe::round_up(block_width, 32) / 32;
uint32_t tile_height = xe::round_up(block_height, 32) / 32;
size_cube.block_width = tile_width * 32;
size_cube.block_height = tile_height * 32;
if (is_tiled) {
// If the texture is tiled, its dimensions must be a multiple of tile
// dimensions (32x32 blocks).
size_cube.block_width = xe::round_up(block_width, 32);
size_cube.block_height = xe::round_up(block_height, 32);
} else {
size_cube.block_width = block_width;
size_cube.block_height = block_height;
}
uint32_t bytes_per_block =
format->block_width * format->block_height * format->bits_per_pixel / 8;
@ -204,7 +296,194 @@ void TextureInfo::CalculateTextureSizesCube(uint32_t width, uint32_t height,
input_length = size_cube.input_face_length * 6;
}
bool TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info,
static void TextureSwap(Endian endianness, void* dest, const void* src,
size_t length) {
switch (endianness) {
case Endian::k8in16:
xe::copy_and_swap_16_unaligned(dest, src, length / 2);
break;
case Endian::k8in32:
xe::copy_and_swap_32_unaligned(dest, src, length / 4);
break;
case Endian::k16in32: // Swap high and low 16 bits within a 32 bit word
xe::copy_and_swap_16_in_32_unaligned(dest, src, length);
break;
default:
case Endian::kUnspecified:
std::memcpy(dest, src, length);
break;
}
}
static void ConvertTexelCTX1(uint8_t* dest, size_t dest_pitch,
const uint8_t* src, Endian src_endianness) {
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
union {
uint8_t data[8];
struct {
uint8_t r0, g0, r1, g1;
uint32_t xx;
};
} block;
static_assert(sizeof(block) == 8, "CTX1 block mismatch");
const uint32_t bytes_per_block = 8;
TextureSwap(src_endianness, block.data, src, bytes_per_block);
uint8_t cr[4] = {
block.r0, block.r1,
static_cast<uint8_t>(2.f / 3.f * block.r0 + 1.f / 3.f * block.r1),
static_cast<uint8_t>(1.f / 3.f * block.r0 + 2.f / 3.f * block.r1)};
uint8_t cg[4] = {
block.g0, block.g1,
static_cast<uint8_t>(2.f / 3.f * block.g0 + 1.f / 3.f * block.g1),
static_cast<uint8_t>(1.f / 3.f * block.g0 + 2.f / 3.f * block.g1)};
for (uint32_t oy = 0; oy < 4; ++oy) {
for (uint32_t ox = 0; ox < 4; ++ox) {
uint8_t xx = (block.xx >> (((ox + (oy * 4)) * 2))) & 3;
dest[(oy * dest_pitch) + (ox * 2) + 0] = cr[xx];
dest[(oy * dest_pitch) + (ox * 2) + 1] = cg[xx];
}
}
}
void TextureInfo::ConvertTiled(uint8_t* dest, const uint8_t* src, Endian endian,
const FormatInfo* format_info, uint32_t offset_x,
uint32_t offset_y, uint32_t block_pitch,
uint32_t width, uint32_t height,
uint32_t output_width) {
// TODO(benvanik): optimize this inner loop (or work by tiles).
uint32_t bytes_per_block = format_info->block_width *
format_info->block_height *
format_info->bits_per_pixel / 8;
uint32_t output_pitch =
output_width * format_info->block_width * format_info->bits_per_pixel / 8;
uint32_t output_row_height = 1;
if (format_info->format == TextureFormat::k_CTX1) {
// TODO: Can we calculate this?
output_row_height = 4;
}
// logical w/h in blocks.
uint32_t block_width =
xe::round_up(width, format_info->block_width) / format_info->block_width;
uint32_t block_height = xe::round_up(height, format_info->block_height) /
format_info->block_height;
// Bytes per pixel
auto log2_bpp =
(bytes_per_block / 4) + ((bytes_per_block / 2) >> (bytes_per_block / 4));
// Offset to the current row, in bytes.
uint32_t output_row_offset = 0;
for (uint32_t y = 0; y < block_height; y++) {
auto input_row_offset =
TextureInfo::TiledOffset2DOuter(offset_y + y, block_pitch, log2_bpp);
// Go block-by-block on this row.
uint32_t output_offset = output_row_offset;
for (uint32_t x = 0; x < block_width; x++) {
auto input_offset = TextureInfo::TiledOffset2DInner(
offset_x + x, offset_y + y, log2_bpp, input_row_offset);
input_offset >>= log2_bpp;
if (format_info->format == TextureFormat::k_CTX1) {
// Convert to R8G8.
ConvertTexelCTX1(&dest[output_offset], output_pitch, src, endian);
} else {
// Generic swap to destination.
TextureSwap(endian, dest + output_offset,
src + input_offset * bytes_per_block, bytes_per_block);
}
output_offset += bytes_per_block;
}
output_row_offset += output_pitch * output_row_height;
}
}
uint32_t TextureInfo::GetMaxMipLevels(uint32_t width, uint32_t height,
uint32_t depth) {
return 1 + xe::log2_floor(std::max({width, height, depth}));
}
uint32_t TextureInfo::GetMipLocation(const TextureInfo& src, uint32_t mip,
uint32_t* offset_x, uint32_t* offset_y) {
if (mip == 0) {
// Short-circuit. Mip 0 is always stored in guest_address.
GetPackedTileOffset(src, offset_x, offset_y);
return src.guest_address;
}
// If the texture is <= 16 pixels w/h, the mips are packed with the base
// texture. Otherwise, they're stored beginning from mip_address.
uint32_t address_base = std::min(src.width, src.height) < 16
? src.guest_address
: src.mip_address;
uint32_t address_offset = 0;
// Walk forward to find the address of the mip.
for (uint32_t i = 1; i < mip; i++) {
uint32_t logical_width = std::max(xe::next_pow2(src.width + 1) >> i, 1u);
uint32_t logical_height = std::max(xe::next_pow2(src.height + 1) >> i, 1u);
if (std::min(logical_width, logical_height) <= 16) {
// We've reached the point where the mips are packed into a single tile.
break;
}
address_offset += GetMipSize(src, i);
}
// Now, check if the mip is packed at an offset.
GetPackedTileOffset(xe::next_pow2(src.width + 1) >> mip,
xe::next_pow2(src.height + 1) >> mip, src.format_info(),
offset_x, offset_y);
return address_base + address_offset;
}
uint32_t TextureInfo::GetMipSize(const TextureInfo& src, uint32_t mip) {
if (mip == 0) {
return src.input_length;
}
uint32_t bytes_per_block = src.format_info()->block_width *
src.format_info()->block_height *
src.format_info()->bits_per_pixel / 8;
uint32_t logical_width = xe::next_pow2(src.width + 1) >> mip;
uint32_t logical_height = xe::next_pow2(src.height + 1) >> mip;
// w/h in blocks
uint32_t block_width =
xe::round_up(logical_width, src.format_info()->block_width) /
src.format_info()->block_width;
uint32_t block_height =
xe::round_up(logical_height, src.format_info()->block_height) /
src.format_info()->block_height;
uint32_t size = block_width * block_height * bytes_per_block;
// Minimum of one tile, which is 32x32 blocks.
uint32_t tile_size = 32 * 32 * bytes_per_block;
return std::max(size, tile_size) * (src.depth + 1);
}
uint32_t TextureInfo::GetMipLinearSize(const TextureInfo& src, uint32_t mip) {
uint32_t bytes_per_block = src.format_info()->block_width *
src.format_info()->block_height *
src.format_info()->bits_per_pixel / 8;
uint32_t size = src.input_length >> (mip * 2);
// The size is a multiple of the block size.
return xe::round_up(size, bytes_per_block);
}
bool TextureInfo::GetPackedTileOffset(uint32_t width, uint32_t height,
const FormatInfo* format_info,
uint32_t* out_offset_x,
uint32_t* out_offset_y) {
// Tile size is 32x32, and once textures go <=16 they are packed into a
@ -226,6 +505,13 @@ bool TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info,
// This only works for square textures, or textures that are some non-pot
// <= square. As soon as the aspect ratio goes weird, the textures start to
// stretch across tiles.
//
// The 2x2 and 1x1 squares are packed in their specific positions because
// each square is the size of at least one block (which is 4x4 pixels max)
// 4x4: x = width & ~0x3
// 2x2: y = (width & 0x3) << 2
// 1x1: y = (width & 0x3) << 2
//
// if (tile_aligned(w) > tile_aligned(h)) {
// // wider than tall, so packed horizontally
// } else if (tile_aligned(w) < tile_aligned(h)) {
@ -238,44 +524,57 @@ bool TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info,
// The minimum dimension is what matters most: if either width or height
// is <= 16 this mode kicks in.
if (std::min(texture_info.size_2d.logical_width,
texture_info.size_2d.logical_height) > 16) {
uint32_t log2_width = xe::log2_ceil(width);
uint32_t log2_height = xe::log2_ceil(height);
if (std::min(log2_width, log2_height) > 4) {
// Too big, not packed.
*out_offset_x = 0;
*out_offset_y = 0;
return false;
}
if (xe::log2_ceil(texture_info.size_2d.logical_width) >
xe::log2_ceil(texture_info.size_2d.logical_height)) {
// Find the block offset of the mip.
if (log2_width > log2_height) {
// Wider than tall. Laid out vertically.
*out_offset_x = 0;
*out_offset_y = 16;
*out_offset_y = log2_height > 0x1 ? 1 << log2_height : 0;
*out_offset_x = log2_height <= 0x1 ? 1 << (log2_width + 2) : 0;
} else {
// Taller than wide. Laid out horizontally.
*out_offset_x = 16;
*out_offset_y = 0;
*out_offset_x = log2_width > 0x1 ? 1 << log2_width : 0;
*out_offset_y = log2_width <= 0x1 ? 1 << (log2_height + 2) : 0;
}
*out_offset_x /= texture_info.format_info()->block_width;
*out_offset_y /= texture_info.format_info()->block_height;
*out_offset_x /= format_info->block_width;
*out_offset_y /= format_info->block_height;
return true;
}
bool TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info,
uint32_t* out_offset_x,
uint32_t* out_offset_y) {
return GetPackedTileOffset(xe::next_pow2(texture_info.size_2d.logical_width),
xe::next_pow2(texture_info.size_2d.logical_height),
texture_info.format_info(), out_offset_x,
out_offset_y);
}
// https://github.com/BinomialLLC/crunch/blob/ea9b8d8c00c8329791256adafa8cf11e4e7942a2/inc/crn_decomp.h#L4108
uint32_t TextureInfo::TiledOffset2DOuter(uint32_t y, uint32_t width,
uint32_t log_bpp) {
uint32_t macro = ((y >> 5) * (width >> 5)) << (log_bpp + 7);
uint32_t micro = ((y & 6) << 2) << log_bpp;
return macro + ((micro & ~15) << 1) + (micro & 15) +
((y & 8) << (3 + log_bpp)) + ((y & 1) << 4);
uint32_t log2_bpp) {
uint32_t macro = ((y / 32) * (width / 32)) << (log2_bpp + 7);
uint32_t micro = ((y & 6) << 2) << log2_bpp;
return macro + ((micro & ~0xF) << 1) + (micro & 0xF) +
((y & 8) << (3 + log2_bpp)) + ((y & 1) << 4);
}
uint32_t TextureInfo::TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
uint32_t TextureInfo::TiledOffset2DInner(uint32_t x, uint32_t y,
uint32_t log2_bpp,
uint32_t base_offset) {
uint32_t macro = (x >> 5) << (bpp + 7);
uint32_t micro = (x & 7) << bpp;
uint32_t offset = base_offset + (macro + ((micro & ~15) << 1) + (micro & 15));
return ((offset & ~511) << 3) + ((offset & 448) << 2) + (offset & 63) +
uint32_t macro = (x / 32) << (log2_bpp + 7);
uint32_t micro = (x & 7) << log2_bpp;
uint32_t offset =
base_offset + (macro + ((micro & ~0xF) << 1) + (micro & 0xF));
return ((offset & ~0x1FF) << 3) + ((offset & 0x1C0) << 2) + (offset & 0x3F) +
((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6);
}

View File

@ -256,6 +256,8 @@ struct TextureInfo {
Endian endianness;
bool is_tiled;
bool has_packed_mips;
uint32_t mip_address;
uint32_t mip_levels;
uint32_t input_length;
const FormatInfo* format_info() const {
@ -282,8 +284,6 @@ struct TextureInfo {
uint32_t input_height; // texel height
uint32_t input_pitch; // byte pitch
} size_2d;
struct {
} size_3d;
struct {
uint32_t logical_width;
uint32_t logical_height;
@ -293,7 +293,7 @@ struct TextureInfo {
uint32_t input_height; // texel height
uint32_t input_pitch; // byte pitch
uint32_t input_face_length; // byte pitch of face
} size_cube;
} size_3d, size_cube;
};
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
@ -304,12 +304,33 @@ struct TextureInfo {
uint32_t width, uint32_t height,
TextureInfo* out_info);
static void ConvertTiled(uint8_t* dest, const uint8_t* src, Endian endian,
const FormatInfo* format_info, uint32_t offset_x,
uint32_t offset_y, uint32_t block_pitch,
uint32_t width, uint32_t height,
uint32_t output_width);
static uint32_t GetMaxMipLevels(uint32_t width, uint32_t height,
uint32_t depth);
// Get the memory location of a mip. offset_x and offset_y are in blocks.
static uint32_t GetMipLocation(const TextureInfo& src, uint32_t mip,
uint32_t* offset_x, uint32_t* offset_y);
static uint32_t GetMipSize(const TextureInfo& src, uint32_t mip);
// Get the byte size of a MIP when stored linearly.
static uint32_t GetMipLinearSize(const TextureInfo& src, uint32_t mip);
static bool GetPackedTileOffset(uint32_t width, uint32_t height,
const FormatInfo* format_info,
uint32_t* out_offset_x,
uint32_t* out_offset_y);
static bool GetPackedTileOffset(const TextureInfo& texture_info,
uint32_t* out_offset_x,
uint32_t* out_offset_y);
static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
uint32_t log_bpp);
static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
uint32_t log2_bpp);
static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t log2_bpp,
uint32_t base_offset);
uint64_t hash() const;
@ -318,7 +339,9 @@ struct TextureInfo {
}
private:
void CalculateTextureSizes1D(uint32_t width);
void CalculateTextureSizes2D(uint32_t width, uint32_t height);
void CalculateTextureSizes3D(uint32_t width, uint32_t height, uint32_t depth);
void CalculateTextureSizesCube(uint32_t width, uint32_t height,
uint32_t depth);
};

View File

@ -15,8 +15,7 @@
#include "xenia/base/profiling.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
#include "third_party/vulkan/vk_mem_alloc.h"
#include "xenia/ui/vulkan/vulkan_mem_alloc.h"
using namespace xe::gpu::xenos;
@ -104,7 +103,7 @@ BufferCache::BufferCache(RegisterFile* register_file, Memory* memory,
device_,
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
capacity, 4096);
capacity, 256);
}
BufferCache::~BufferCache() { Shutdown(); }
@ -120,9 +119,13 @@ VkResult BufferCache::Initialize() {
}
// Create a memory allocator for textures.
VmaVulkanFunctions vulkan_funcs = {};
ui::vulkan::FillVMAVulkanFunctions(&vulkan_funcs);
VmaAllocatorCreateInfo alloc_info = {
0, *device_, *device_, 0, 0, nullptr, nullptr,
0, *device_, *device_, 0, 0, nullptr, nullptr, 0, nullptr, &vulkan_funcs,
};
status = vmaCreateAllocator(&alloc_info, &mem_allocator_);
if (status != VK_SUCCESS) {
return status;
@ -147,10 +150,10 @@ VkResult xe::gpu::vulkan::BufferCache::CreateVertexDescriptorPool() {
std::vector<VkDescriptorPoolSize> pool_sizes;
pool_sizes.push_back({
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
65536,
32 * 16384,
});
vertex_descriptor_pool_ =
std::make_unique<ui::vulkan::DescriptorPool>(*device_, 65536, pool_sizes);
vertex_descriptor_pool_ = std::make_unique<ui::vulkan::DescriptorPool>(
*device_, 32 * 16384, pool_sizes);
// 32 storage buffers available to vertex shader.
// TODO(DrChat): In the future, this could hold memexport staging data.
@ -287,7 +290,8 @@ VkResult BufferCache::CreateConstantDescriptorSet() {
return VK_SUCCESS;
}
void xe::gpu::vulkan::BufferCache::FreeConstantDescriptorSet() {
void BufferCache::FreeConstantDescriptorSet() {
if (constant_descriptor_set_) {
vkFreeDescriptorSets(*device_, constant_descriptor_pool_, 1,
&constant_descriptor_set_);

View File

@ -569,17 +569,13 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
// See r200UpdateWindow:
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
int16_t window_offset_x = 0;
int16_t window_offset_y = 0;
if ((regs.pa_su_sc_mode_cntl >> 16) & 1) {
window_offset_x = regs.pa_sc_window_offset & 0x7FFF;
window_offset_y = (regs.pa_sc_window_offset >> 16) & 0x7FFF;
if (window_offset_x & 0x4000) {
window_offset_x |= 0x8000;
}
if (window_offset_y & 0x4000) {
window_offset_y |= 0x8000;
}
int16_t window_offset_x = regs.pa_sc_window_offset & 0x7FFF;
int16_t window_offset_y = (regs.pa_sc_window_offset >> 16) & 0x7FFF;
if (window_offset_x & 0x4000) {
window_offset_x |= 0x8000;
}
if (window_offset_y & 0x4000) {
window_offset_y |= 0x8000;
}
// VK_DYNAMIC_STATE_SCISSOR
@ -593,8 +589,11 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
int32_t ws_y = (regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF;
int32_t ws_w = (regs.pa_sc_window_scissor_br & 0x7FFF) - ws_x;
int32_t ws_h = ((regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y;
ws_x += window_offset_x;
ws_y += window_offset_y;
if (!(regs.pa_sc_window_scissor_tl & 0x80000000)) {
// ! WINDOW_OFFSET_DISABLE
ws_x += window_offset_x;
ws_y += window_offset_y;
}
int32_t adj_x = ws_x - std::max(ws_x, 0);
int32_t adj_y = ws_y - std::max(ws_y, 0);
@ -657,6 +656,11 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
vport_zscale_enable == vport_xoffset_enable ==
vport_yoffset_enable == vport_zoffset_enable);
int16_t vtx_window_offset_x =
(regs.pa_su_sc_mode_cntl >> 16) & 1 ? window_offset_x : 0;
int16_t vtx_window_offset_y =
(regs.pa_su_sc_mode_cntl >> 16) & 1 ? window_offset_y : 0;
float vpw, vph, vpx, vpy;
if (vport_xscale_enable) {
float vox = vport_xoffset_enable ? regs.pa_cl_vport_xoffset : 0;
@ -667,25 +671,21 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
window_width_scalar = window_height_scalar = 1;
vpw = 2 * window_width_scalar * vsx;
vph = -2 * window_height_scalar * vsy;
vpx = window_width_scalar * vox - vpw / 2 + window_offset_x;
vpy = window_height_scalar * voy - vph / 2 + window_offset_y;
vpx = window_width_scalar * vox - vpw / 2 + vtx_window_offset_x;
vpy = window_height_scalar * voy - vph / 2 + vtx_window_offset_y;
} else {
vpw = 2 * 2560.0f * window_width_scalar;
vph = 2 * 2560.0f * window_height_scalar;
vpx = -2560.0f * window_width_scalar + window_offset_x;
vpy = -2560.0f * window_height_scalar + window_offset_y;
// TODO(DrChat): This should be the width/height of the target picture
vpw = 2560.0f;
vph = 2560.0f;
vpx = vtx_window_offset_x;
vpy = vtx_window_offset_y;
}
if (viewport_state_dirty) {
// float texel_offset_x = regs.pa_su_sc_vtx_cntl & 0x01 ? 0.5f : 0.f;
// float texel_offset_y = regs.pa_su_sc_vtx_cntl & 0x01 ? 0.5f : 0.f;
float texel_offset_x = 0.f;
float texel_offset_y = 0.f;
VkViewport viewport_rect;
std::memset(&viewport_rect, 0, sizeof(VkViewport));
viewport_rect.x = vpx + texel_offset_x;
viewport_rect.y = vpy + texel_offset_y;
viewport_rect.x = vpx;
viewport_rect.y = vpy;
viewport_rect.width = vpw;
viewport_rect.height = vph;
@ -766,18 +766,21 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
program_cntl.vs_export_mode == 7);
assert_false(program_cntl.gen_index_vtx);
SpirvPushConstants push_constants;
SpirvPushConstants push_constants = {};
// Done in VS, no need to flush state.
if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) {
if (vport_xscale_enable) {
push_constants.window_scale[0] = 1.0f;
push_constants.window_scale[1] = -1.0f;
push_constants.window_scale[2] = 0.f;
push_constants.window_scale[3] = 0.f;
} else {
push_constants.window_scale[0] = 1.0f / 2560.0f;
push_constants.window_scale[1] = 1.0f / 2560.0f;
// 1 / unscaled viewport w/h
push_constants.window_scale[0] = window_width_scalar / 1280.f;
push_constants.window_scale[1] = window_height_scalar / 1280.f;
push_constants.window_scale[2] = (-1280.f / window_width_scalar) + 0.5f;
push_constants.window_scale[3] = (-1280.f / window_height_scalar) + 0.5f;
}
push_constants.window_scale[2] = vpw;
push_constants.window_scale[3] = vph;
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
// VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0.

View File

@ -7,7 +7,7 @@ project("xenia-gpu-vulkan")
kind("StaticLib")
language("C++")
links({
"vulkan-loader",
"volk",
"xenia-base",
"xenia-gpu",
"xenia-ui",
@ -40,7 +40,7 @@ project("xenia-gpu-vulkan-trace-viewer")
"libavutil",
"snappy",
"spirv-tools",
"vulkan-loader",
"volk",
"xenia-apu",
"xenia-apu-nop",
"xenia-base",
@ -71,6 +71,15 @@ project("xenia-gpu-vulkan-trace-viewer")
"../../base/main_"..platform_suffix..".cc",
})
filter("platforms:Linux")
links({
"X11",
"xcb",
"X11-xcb",
"GL",
"vulkan",
})
filter("platforms:Windows")
links({
"xenia-apu-xaudio2",
@ -103,7 +112,7 @@ project("xenia-gpu-vulkan-trace-dump")
"libavutil",
"snappy",
"spirv-tools",
"vulkan-loader",
"volk",
"xenia-apu",
"xenia-apu-nop",
"xenia-base",
@ -131,6 +140,15 @@ project("xenia-gpu-vulkan-trace-dump")
"../../base/main_"..platform_suffix..".cc",
})
filter("platforms:Linux")
links({
"X11",
"xcb",
"X11-xcb",
"GL",
"vulkan",
})
filter("platforms:Windows")
-- Only create the .user file if it doesn't already exist.
local user_file = project_root.."/build/xenia-gpu-vulkan-trace-dump.vcxproj.user"

View File

@ -87,6 +87,8 @@ class CachedFramebuffer {
CachedTileView* color_attachments[4] = {nullptr};
// Reference to depth/stencil attachment, if used.
CachedTileView* depth_stencil_attachment = nullptr;
// Associated render pass
VkRenderPass render_pass = nullptr;
CachedFramebuffer(VkDevice device, VkRenderPass render_pass,
uint32_t surface_width, uint32_t surface_height,
@ -94,6 +96,8 @@ class CachedFramebuffer {
CachedTileView* target_depth_stencil_attachment);
~CachedFramebuffer();
VkResult Initialize();
bool IsCompatible(const RenderConfiguration& desired_config) const;
private:
@ -117,6 +121,8 @@ class CachedRenderPass {
CachedRenderPass(VkDevice device, const RenderConfiguration& desired_config);
~CachedRenderPass();
VkResult Initialize();
bool IsCompatible(const RenderConfiguration& desired_config) const;
private:
@ -124,10 +130,21 @@ class CachedRenderPass {
};
CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device,
VkCommandBuffer command_buffer,
VkDeviceMemory edram_memory,
TileViewKey view_key)
: device_(*device), key(std::move(view_key)) {
: device_(device), key(std::move(view_key)) {}
CachedTileView::~CachedTileView() {
VK_SAFE_DESTROY(vkDestroyImageView, *device_, image_view, nullptr);
VK_SAFE_DESTROY(vkDestroyImageView, *device_, image_view_depth, nullptr);
VK_SAFE_DESTROY(vkDestroyImageView, *device_, image_view_stencil, nullptr);
VK_SAFE_DESTROY(vkDestroyImage, *device_, image, nullptr);
VK_SAFE_DESTROY(vkFreeMemory, *device_, memory, nullptr);
}
VkResult CachedTileView::Initialize(VkCommandBuffer command_buffer) {
VkResult status = VK_SUCCESS;
// Map format to Vulkan.
VkFormat vulkan_format = VK_FORMAT_UNDEFINED;
uint32_t bpp = 4;
@ -196,22 +213,26 @@ CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device,
image_info.queueFamilyIndexCount = 0;
image_info.pQueueFamilyIndices = nullptr;
image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
auto err = vkCreateImage(device_, &image_info, nullptr, &image);
CheckResult(err, "vkCreateImage");
status = vkCreateImage(*device_, &image_info, nullptr, &image);
if (status != VK_SUCCESS) {
return status;
}
device->DbgSetObjectName(
device_->DbgSetObjectName(
reinterpret_cast<uint64_t>(image), VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT,
xe::format_string("%.8X pitch %.8X(%d)", key.tile_offset, key.tile_width,
xe::format_string("RT %.8X %.8X(%d)", key.tile_offset, key.tile_width,
key.tile_width));
VkMemoryRequirements memory_requirements;
vkGetImageMemoryRequirements(*device, image, &memory_requirements);
vkGetImageMemoryRequirements(*device_, image, &memory_requirements);
// Bind to a newly allocated chunk.
// TODO: Alias from a really big buffer?
memory = device->AllocateMemory(memory_requirements, 0);
err = vkBindImageMemory(device_, image, memory, 0);
CheckResult(err, "vkBindImageMemory");
memory = device_->AllocateMemory(memory_requirements, 0);
status = vkBindImageMemory(*device_, image, memory, 0);
if (status != VK_SUCCESS) {
return status;
}
// Create the image view we'll use to attach it to a framebuffer.
VkImageViewCreateInfo image_view_info;
@ -235,20 +256,26 @@ CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device,
image_view_info.subresourceRange.aspectMask =
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
}
err = vkCreateImageView(device_, &image_view_info, nullptr, &image_view);
CheckResult(err, "vkCreateImageView");
status = vkCreateImageView(*device_, &image_view_info, nullptr, &image_view);
if (status != VK_SUCCESS) {
return status;
}
// Create separate depth/stencil views.
if (key.color_or_depth == 0) {
image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
err = vkCreateImageView(device_, &image_view_info, nullptr,
&image_view_depth);
CheckResult(err, "vkCreateImageView");
status = vkCreateImageView(*device_, &image_view_info, nullptr,
&image_view_depth);
if (status != VK_SUCCESS) {
return status;
}
image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
err = vkCreateImageView(device_, &image_view_info, nullptr,
&image_view_stencil);
CheckResult(err, "vkCreateImageView");
status = vkCreateImageView(*device_, &image_view_info, nullptr,
&image_view_stencil);
if (status != VK_SUCCESS) {
return status;
}
}
// TODO(benvanik): transition to general layout?
@ -279,12 +306,7 @@ CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device,
0, 0, nullptr, 0, nullptr, 1, &image_barrier);
image_layout = image_barrier.newLayout;
}
CachedTileView::~CachedTileView() {
vkDestroyImageView(device_, image_view, nullptr);
vkDestroyImage(device_, image, nullptr);
vkFreeMemory(device_, memory, nullptr);
return VK_SUCCESS;
}
CachedFramebuffer::CachedFramebuffer(
@ -294,11 +316,18 @@ CachedFramebuffer::CachedFramebuffer(
: device_(device),
width(surface_width),
height(surface_height),
depth_stencil_attachment(target_depth_stencil_attachment) {
depth_stencil_attachment(target_depth_stencil_attachment),
render_pass(render_pass) {
for (int i = 0; i < 4; ++i) {
color_attachments[i] = target_color_attachments[i];
}
}
CachedFramebuffer::~CachedFramebuffer() {
VK_SAFE_DESTROY(vkDestroyFramebuffer, device_, handle, nullptr);
}
VkResult CachedFramebuffer::Initialize() {
// Create framebuffer.
VkImageView image_views[5] = {nullptr};
int image_view_count = 0;
@ -320,12 +349,7 @@ CachedFramebuffer::CachedFramebuffer(
framebuffer_info.width = width;
framebuffer_info.height = height;
framebuffer_info.layers = 1;
auto err = vkCreateFramebuffer(device_, &framebuffer_info, nullptr, &handle);
CheckResult(err, "vkCreateFramebuffer");
}
CachedFramebuffer::~CachedFramebuffer() {
vkDestroyFramebuffer(device_, handle, nullptr);
return vkCreateFramebuffer(device_, &framebuffer_info, nullptr, &handle);
}
bool CachedFramebuffer::IsCompatible(
@ -372,10 +396,21 @@ CachedRenderPass::CachedRenderPass(VkDevice device,
const RenderConfiguration& desired_config)
: device_(device) {
std::memcpy(&config, &desired_config, sizeof(config));
}
CachedRenderPass::~CachedRenderPass() {
for (auto framebuffer : cached_framebuffers) {
delete framebuffer;
}
cached_framebuffers.clear();
VK_SAFE_DESTROY(vkDestroyRenderPass, device_, handle, nullptr);
}
VkResult CachedRenderPass::Initialize() {
VkSampleCountFlagBits sample_count;
if (FLAGS_vulkan_native_msaa) {
switch (desired_config.surface_msaa) {
switch (config.surface_msaa) {
case MsaaSamples::k1X:
sample_count = VK_SAMPLE_COUNT_1_BIT;
break;
@ -386,7 +421,7 @@ CachedRenderPass::CachedRenderPass(VkDevice device,
sample_count = VK_SAMPLE_COUNT_4_BIT;
break;
default:
assert_unhandled_case(desired_config.surface_msaa);
assert_unhandled_case(config.surface_msaa);
break;
}
} else {
@ -480,17 +515,7 @@ CachedRenderPass::CachedRenderPass(VkDevice device,
render_pass_info.dependencyCount = 1;
render_pass_info.pDependencies = dependencies;
auto err = vkCreateRenderPass(device_, &render_pass_info, nullptr, &handle);
CheckResult(err, "vkCreateRenderPass");
}
CachedRenderPass::~CachedRenderPass() {
for (auto framebuffer : cached_framebuffers) {
delete framebuffer;
}
cached_framebuffers.clear();
vkDestroyRenderPass(device_, handle, nullptr);
return vkCreateRenderPass(device_, &render_pass_info, nullptr, &handle);
}
bool CachedRenderPass::IsCompatible(
@ -842,6 +867,14 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer,
// If no render pass was found in the cache create a new one.
if (!render_pass) {
render_pass = new CachedRenderPass(*device_, *config);
VkResult status = render_pass->Initialize();
if (status != VK_SUCCESS) {
XELOGE("%s: Failed to create render pass, status %s", __func__,
ui::vulkan::to_string(status));
delete render_pass;
return false;
}
cached_render_passes_.push_back(render_pass);
}
@ -913,6 +946,14 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer,
framebuffer = new CachedFramebuffer(
*device_, render_pass->handle, surface_pitch_px, surface_height_px,
target_color_attachments, target_depth_stencil_attachment);
VkResult status = framebuffer->Initialize();
if (status != VK_SUCCESS) {
XELOGE("%s: Failed to create framebuffer, status %s", __func__,
ui::vulkan::to_string(status));
delete framebuffer;
return false;
}
render_pass->cached_framebuffers.push_back(framebuffer);
}
@ -969,10 +1010,17 @@ CachedTileView* RenderCache::FindOrCreateTileView(
}
// Create a new tile and add to the cache.
tile_view =
new CachedTileView(device_, command_buffer, edram_memory_, view_key);
cached_tile_views_.push_back(tile_view);
tile_view = new CachedTileView(device_, edram_memory_, view_key);
VkResult status = tile_view->Initialize(command_buffer);
if (status != VK_SUCCESS) {
XELOGE("%s: Failed to create tile view, status %s", __func__,
ui::vulkan::to_string(status));
delete tile_view;
return nullptr;
}
cached_tile_views_.push_back(tile_view);
return tile_view;
}

View File

@ -68,11 +68,12 @@ class CachedTileView {
// (if a depth view) Image view of stencil aspect
VkImageView image_view_stencil = nullptr;
CachedTileView(ui::vulkan::VulkanDevice* device,
VkCommandBuffer command_buffer, VkDeviceMemory edram_memory,
CachedTileView(ui::vulkan::VulkanDevice* device, VkDeviceMemory edram_memory,
TileViewKey view_key);
~CachedTileView();
VkResult Initialize(VkCommandBuffer command_buffer);
bool IsEqual(const TileViewKey& other_key) const {
auto a = reinterpret_cast<const uint64_t*>(&key);
auto b = reinterpret_cast<const uint64_t*>(&other_key);
@ -88,7 +89,7 @@ class CachedTileView {
}
private:
VkDevice device_ = nullptr;
ui::vulkan::VulkanDevice* device_ = nullptr;
};
// Parsed render configuration from the current render state.

View File

@ -17,8 +17,7 @@
#include "xenia/gpu/sampler_info.h"
#include "xenia/gpu/texture_info.h"
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
#include "third_party/vulkan/vk_mem_alloc.h"
#include "xenia/ui/vulkan/vulkan_mem_alloc.h"
namespace xe {
namespace gpu {
@ -62,7 +61,7 @@ static const TextureConfig texture_configs[64] = {
// TODO: D24 unsupported on AMD.
/* k_24_8 */ {VK_FORMAT_D24_UNORM_S8_UINT},
/* k_24_8_FLOAT */ {VK_FORMAT_D24_UNORM_S8_UINT},
/* k_24_8_FLOAT */ {VK_FORMAT_D32_SFLOAT_S8_UINT},
/* k_16 */ {VK_FORMAT_R16_UNORM},
/* k_16_16 */ {VK_FORMAT_R16G16_UNORM},
/* k_16_16_16_16 */ {VK_FORMAT_R16G16B16A16_UNORM},
@ -198,8 +197,11 @@ VkResult TextureCache::Initialize() {
}
// Create a memory allocator for textures.
VmaVulkanFunctions vulkan_funcs = {};
ui::vulkan::FillVMAVulkanFunctions(&vulkan_funcs);
VmaAllocatorCreateInfo alloc_info = {
0, *device_, *device_, 0, 0, nullptr, nullptr,
0, *device_, *device_, 0, 0, nullptr, nullptr, 0, nullptr, &vulkan_funcs,
};
status = vmaCreateAllocator(&alloc_info, &mem_allocator_);
if (status != VK_SUCCESS) {
@ -256,6 +258,18 @@ TextureCache::TextureRegion* TextureCache::AllocateTextureRegion(
return nullptr;
}
assert_not_null(texture->texture_info.format_info());
auto& config =
texture_configs[int(texture->texture_info.format_info()->format)];
VkFormat format = config.host_format;
if (format == VK_FORMAT_UNDEFINED) {
XELOGE(
"Texture Cache: Attempted to allocate texture format %s, which is "
"defined as VK_FORMAT_UNDEFINED!",
texture->texture_info.format_info()->name);
return nullptr;
}
image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
image_info.usage =
VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
@ -270,7 +284,6 @@ TextureCache::TextureRegion* TextureCache::AllocateTextureRegion(
"(0x%.8X != 0x%.8X)",
texture->texture_info.format_info()->name, texture->format,
(props.optimalTilingFeatures & required_flags), required_flags);
assert_always();
}
if (texture->texture_info.dimension != Dimension::kCube &&
@ -296,7 +309,7 @@ TextureCache::TextureRegion* TextureCache::AllocateTextureRegion(
image_info.format = texture->format;
image_info.extent = region_size;
image_info.mipLevels = 1;
image_info.mipLevels = texture->texture_info.mip_levels;
image_info.arrayLayers = texture->texture_info.depth + 1;
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
@ -327,6 +340,7 @@ TextureCache::TextureRegion* TextureCache::AllocateTextureRegion(
region->image = image;
region->image_layout = image_info.initialLayout;
region->usage_flags = image_info.usage;
region->allocation = allocation;
region->allocation_info = vma_info;
@ -453,8 +467,7 @@ TextureCache::Texture* TextureCache::DemandResolveTexture(
// No texture at this location. Make a new one.
auto texture = AllocateTexture(texture_info, required_flags);
if (!texture) {
// Failed to allocate texture (out of memory?)
assert_always();
// Failed to allocate texture (out of memory)
XELOGE("Vulkan Texture Cache: Failed to allocate texture!");
return nullptr;
}
@ -595,8 +608,7 @@ TextureCache::TextureRegion* TextureCache::DemandRegion(
auto texture =
AllocateTexture(texture_info, VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT);
if (!texture) {
// Failed to allocate texture (out of memory?)
assert_always();
// Failed to allocate texture (out of memory)
XELOGE("Vulkan Texture Cache: Failed to allocate texture!");
return nullptr;
}
@ -664,6 +676,13 @@ TextureCache::TextureRegionView* TextureCache::DemandTextureRegionView(
VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE,
VK_COMPONENT_SWIZZLE_IDENTITY,
};
if (region->texture->texture_info.texture_format ==
TextureFormat::k_4_4_4_4) {
swiz_component_map[0] = VK_COMPONENT_SWIZZLE_A;
swiz_component_map[1] = VK_COMPONENT_SWIZZLE_B;
swiz_component_map[2] = VK_COMPONENT_SWIZZLE_G;
swiz_component_map[3] = VK_COMPONENT_SWIZZLE_R;
}
view_info.components = {
swiz_component_map[(swizzle >> 0) & 0x7],
@ -671,7 +690,8 @@ TextureCache::TextureRegionView* TextureCache::DemandTextureRegionView(
swiz_component_map[(swizzle >> 6) & 0x7],
swiz_component_map[(swizzle >> 9) & 0x7],
};
view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0,
region->texture->texture_info.mip_levels, 0, 1};
if (region->texture->format == VK_FORMAT_D16_UNORM_S8_UINT ||
region->texture->format == VK_FORMAT_D24_UNORM_S8_UINT ||
region->texture->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
@ -786,8 +806,6 @@ TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) {
sampler_create_info.addressModeW =
address_mode_map[static_cast<int>(sampler_info.clamp_w)];
sampler_create_info.mipLodBias = sampler_info.lod_bias;
float aniso = 0.f;
switch (sampler_info.aniso_filter) {
case AnisoFilter::kDisabled:
@ -819,8 +837,9 @@ TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) {
sampler_create_info.compareEnable = VK_FALSE;
sampler_create_info.compareOp = VK_COMPARE_OP_NEVER;
sampler_create_info.minLod = 0.0f;
sampler_create_info.maxLod = 0.0f;
sampler_create_info.mipLodBias = sampler_info.lod_bias;
sampler_create_info.minLod = float(sampler_info.mip_min_level);
sampler_create_info.maxLod = float(sampler_info.mip_max_level);
sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
sampler_create_info.unnormalizedCoordinates = VK_FALSE;
VkSampler vk_sampler;
@ -938,13 +957,13 @@ void TextureSwap(Endian endianness, void* dest, const void* src,
size_t length) {
switch (endianness) {
case Endian::k8in16:
xe::copy_and_swap_16_aligned(dest, src, length / 2);
xe::copy_and_swap_16_unaligned(dest, src, length / 2);
break;
case Endian::k8in32:
xe::copy_and_swap_32_aligned(dest, src, length / 4);
xe::copy_and_swap_32_unaligned(dest, src, length / 4);
break;
case Endian::k16in32: // Swap high and low 16 bits within a 32 bit word
xe::copy_and_swap_16_in_32_aligned(dest, src, length);
xe::copy_and_swap_16_in_32_unaligned(dest, src, length);
break;
default:
case Endian::kUnspecified:
@ -989,144 +1008,56 @@ void TextureCache::FlushPendingCommands(VkCommandBuffer setup_buffer,
vkBeginCommandBuffer(setup_buffer, &begin_info);
}
void TextureCache::ConvertTexelCTX1(uint8_t* dest, size_t dest_pitch,
const uint8_t* src, Endian src_endianness) {
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
union {
uint8_t data[8];
struct {
uint8_t r0, g0, r1, g1;
uint32_t xx;
};
} block;
static_assert(sizeof(block) == 8, "CTX1 block mismatch");
const uint32_t bytes_per_block = 8;
TextureSwap(src_endianness, block.data, src, bytes_per_block);
uint8_t cr[4] = {
block.r0, block.r1,
static_cast<uint8_t>(2.f / 3.f * block.r0 + 1.f / 3.f * block.r1),
static_cast<uint8_t>(1.f / 3.f * block.r0 + 2.f / 3.f * block.r1)};
uint8_t cg[4] = {
block.g0, block.g1,
static_cast<uint8_t>(2.f / 3.f * block.g0 + 1.f / 3.f * block.g1),
static_cast<uint8_t>(1.f / 3.f * block.g0 + 2.f / 3.f * block.g1)};
for (uint32_t oy = 0; oy < 4; ++oy) {
for (uint32_t ox = 0; ox < 4; ++ox) {
uint8_t xx = (block.xx >> (((ox + (oy * 4)) * 2))) & 3;
dest[(oy * dest_pitch) + (ox * 2) + 0] = cr[xx];
dest[(oy * dest_pitch) + (ox * 2) + 1] = cg[xx];
}
}
}
bool TextureCache::ConvertTexture2D(uint8_t* dest,
VkBufferImageCopy* copy_region,
const TextureInfo& src) {
void* host_address = memory_->TranslatePhysical(src.guest_address);
uint32_t mip, const TextureInfo& src) {
uint32_t offset_x = 0;
uint32_t offset_y = 0;
uint32_t address =
TextureInfo::GetMipLocation(src, mip, &offset_x, &offset_y);
void* host_address = memory_->TranslatePhysical(address);
// Pitch of the source texture in blocks.
uint32_t block_width = mip == 0
? src.size_2d.block_width
: xe::next_pow2(src.size_2d.block_width) >> mip;
uint32_t logical_width = src.size_2d.logical_width >> mip;
uint32_t logical_height = src.size_2d.logical_height >> mip;
uint32_t input_width = src.size_2d.input_width >> mip;
uint32_t input_height = src.size_2d.input_height >> mip;
// All dimensions must be a multiple of block w/h
logical_width = xe::round_up(logical_width, src.format_info()->block_width);
logical_height =
xe::round_up(logical_height, src.format_info()->block_height);
input_width = xe::round_up(input_width, src.format_info()->block_width);
input_height = xe::round_up(input_height, src.format_info()->block_height);
if (!src.is_tiled) {
uint32_t offset_x, offset_y;
if (src.has_packed_mips &&
TextureInfo::GetPackedTileOffset(src, &offset_x, &offset_y)) {
uint32_t bytes_per_block = src.format_info()->block_width *
src.format_info()->block_height *
src.format_info()->bits_per_pixel / 8;
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
src_mem += offset_y * src.size_2d.input_pitch;
src_mem += offset_x * bytes_per_block;
for (uint32_t y = 0;
y < std::min(src.size_2d.block_height, src.size_2d.logical_height);
y++) {
TextureSwap(src.endianness, dest, src_mem, src.size_2d.input_pitch);
src_mem += src.size_2d.input_pitch;
dest += src.size_2d.input_pitch;
}
copy_region->bufferRowLength = src.size_2d.input_width;
copy_region->bufferImageHeight = src.size_2d.input_height;
copy_region->imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
copy_region->imageExtent = {src.size_2d.logical_width,
src.size_2d.logical_height, 1};
return true;
} else {
// Fast path copy entire image.
TextureSwap(src.endianness, dest, host_address, src.input_length);
copy_region->bufferRowLength = src.size_2d.input_width;
copy_region->bufferImageHeight = src.size_2d.input_height;
copy_region->imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
copy_region->imageExtent = {src.size_2d.logical_width,
src.size_2d.logical_height, 1};
return true;
}
} else {
// Untile image.
// We could do this in a shader to speed things up, as this is pretty
// slow.
// TODO(benvanik): optimize this inner loop (or work by tiles).
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
uint32_t bytes_per_block = src.format_info()->block_width *
src.format_info()->block_height *
src.format_info()->bits_per_pixel / 8;
uint32_t output_pitch = src.size_2d.input_width *
src.format_info()->block_width *
src.format_info()->bits_per_pixel / 8;
uint32_t output_row_height = 1;
if (src.texture_format == TextureFormat::k_CTX1) {
// TODO: Can we calculate this?
output_row_height = 4;
}
// Tiled textures can be packed; get the offset into the packed texture.
uint32_t offset_x;
uint32_t offset_y;
TextureInfo::GetPackedTileOffset(src, &offset_x, &offset_y);
auto log2_bpp = (bytes_per_block >> 2) +
((bytes_per_block >> 1) >> (bytes_per_block >> 2));
// Offset to the current row, in bytes.
uint32_t output_row_offset = 0;
for (uint32_t y = 0; y < src.size_2d.block_height; y++) {
auto input_row_offset = TextureInfo::TiledOffset2DOuter(
offset_y + y, src.size_2d.block_width, log2_bpp);
// Go block-by-block on this row.
uint32_t output_offset = output_row_offset;
for (uint32_t x = 0; x < src.size_2d.block_width; x++) {
auto input_offset = TextureInfo::TiledOffset2DInner(
offset_x + x, offset_y + y, log2_bpp, input_row_offset);
input_offset >>= log2_bpp;
if (src.texture_format == TextureFormat::k_CTX1) {
// Convert to R8G8.
ConvertTexelCTX1(&dest[output_offset], output_pitch, src_mem,
src.endianness);
} else {
// Generic swap to destination.
TextureSwap(src.endianness, dest + output_offset,
src_mem + input_offset * bytes_per_block,
bytes_per_block);
}
output_offset += bytes_per_block;
}
output_row_offset += output_pitch * output_row_height;
}
copy_region->bufferRowLength = src.size_2d.input_width;
copy_region->bufferImageHeight = src.size_2d.input_height;
copy_region->imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
copy_region->imageExtent = {src.size_2d.logical_width,
src.size_2d.logical_height, 1};
return true;
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
src_mem += offset_y * src.size_2d.input_pitch;
src_mem += offset_x * bytes_per_block;
TextureSwap(src.endianness, dest, src_mem,
src.size_2d.input_pitch * src.size_2d.logical_height);
} else {
// Untile image.
// We could do this in a shader to speed things up, as this is pretty
// slow.
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
TextureInfo::ConvertTiled(dest, src_mem, src.endianness, src.format_info(),
offset_x, offset_y, block_width, logical_width,
logical_height, input_width);
}
return false;
copy_region->bufferRowLength = input_width;
copy_region->bufferImageHeight = input_height;
copy_region->imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, mip, 0, 1};
copy_region->imageExtent = {logical_width, logical_height, 1};
return true;
}
bool TextureCache::ConvertTextureCube(uint8_t* dest,
@ -1189,13 +1120,13 @@ bool TextureCache::ConvertTextureCube(uint8_t* dest,
}
bool TextureCache::ConvertTexture(uint8_t* dest, VkBufferImageCopy* copy_region,
const TextureInfo& src) {
uint32_t mip, const TextureInfo& src) {
switch (src.dimension) {
case Dimension::k1D:
assert_always();
break;
case Dimension::k2D:
return ConvertTexture2D(dest, copy_region, src);
return ConvertTexture2D(dest, copy_region, mip, src);
case Dimension::k3D:
assert_always();
break;
@ -1205,6 +1136,154 @@ bool TextureCache::ConvertTexture(uint8_t* dest, VkBufferImageCopy* copy_region,
return false;
}
bool TextureCache::UploadTexture(VkCommandBuffer command_buffer,
VkFence completion_fence, Texture* dest,
const TextureInfo& src) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
size_t unpack_length;
if (!ComputeTextureStorage(&unpack_length, src)) {
XELOGW("Failed to compute texture storage");
return false;
}
size_t total_unpack_length = unpack_length;
for (uint32_t i = 1; i < src.mip_levels; i++) {
// Add in more space for mips.
total_unpack_length += TextureInfo::GetMipLinearSize(src, i);
}
if (!staging_buffer_.CanAcquire(total_unpack_length)) {
// Need to have unique memory for every upload for at least one frame. If we
// run out of memory, we need to flush all queued upload commands to the
// GPU.
FlushPendingCommands(command_buffer, completion_fence);
// Uploads have been flushed. Continue.
if (!staging_buffer_.CanAcquire(total_unpack_length)) {
// The staging buffer isn't big enough to hold this texture.
XELOGE(
"TextureCache staging buffer is too small! (uploading 0x%.8X bytes)",
total_unpack_length);
assert_always();
return false;
}
}
// Grab some temporary memory for staging.
auto alloc = staging_buffer_.Acquire(total_unpack_length, completion_fence);
assert_not_null(alloc);
if (!alloc) {
XELOGE("%s: Failed to acquire staging memory", __func__);
return false;
}
// DEBUG: Check the source address. If it's completely zero'd out, print it.
bool valid = false;
auto src_data = memory_->TranslatePhysical(src.guest_address);
for (uint32_t i = 0; i < src.input_length; i++) {
if (src_data[i] != 0) {
valid = true;
break;
}
}
if (!valid) {
XELOGW(
"Warning: Uploading blank texture at address 0x%.8X "
"(length: 0x%.8X, format: %s)",
src.guest_address, src.input_length, src.format_info()->name);
}
// Invalidate contents of all regions for this texture, except for the base
// region.
for (auto region_it = dest->regions.begin(); region_it != dest->regions.end();
++region_it) {
(*region_it)->region_contents_valid = false;
}
dest->base_region->region_contents_valid = true;
// Upload texture into GPU memory.
// TODO: If the GPU supports it, we can submit a compute batch to convert the
// texture and copy it to its destination. Otherwise, fallback to conversion
// on the CPU.
std::vector<VkBufferImageCopy> copy_regions(src.mip_levels);
// Base MIP
if (!ConvertTexture(reinterpret_cast<uint8_t*>(alloc->host_ptr),
&copy_regions[0], 0, src)) {
XELOGW("Failed to convert texture");
return false;
}
copy_regions[0].bufferOffset = alloc->offset;
copy_regions[0].imageOffset = {0, 0, 0};
// Now upload all the MIPs
VkDeviceSize buffer_offset = unpack_length;
for (uint32_t mip = 1; mip < src.mip_levels; mip++) {
uint8_t* dest = reinterpret_cast<uint8_t*>(alloc->host_ptr) + buffer_offset;
ConvertTexture(dest, &copy_regions[mip], mip, src);
copy_regions[mip].bufferOffset = alloc->offset + buffer_offset;
copy_regions[mip].imageOffset = {0, 0, 0};
// With each mip, the length is divided by 4.
buffer_offset += TextureInfo::GetMipLinearSize(src, mip);
}
// Transition the texture into a transfer destination layout.
VkImageMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.oldLayout = dest->base_region->image_layout;
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = dest->base_region->image;
barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, src.mip_levels,
copy_regions[0].imageSubresource.baseArrayLayer,
copy_regions[0].imageSubresource.layerCount};
if (dest->format == VK_FORMAT_D16_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D24_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
barrier.subresourceRange.aspectMask =
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
}
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0,
nullptr, 1, &barrier);
// Now move the converted texture into the destination.
if (dest->format == VK_FORMAT_D16_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D24_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
// Do just a depth upload (for now).
// This assumes depth buffers don't have mips (hopefully they don't)
copy_regions[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
}
vkCmdCopyBufferToImage(command_buffer, staging_buffer_.gpu_buffer(),
dest->base_region->image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, src.mip_levels,
copy_regions.data());
// Now transition the texture into a shader readonly source.
barrier.srcAccessMask = barrier.dstAccessMask;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier.oldLayout = barrier.newLayout;
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
0, 0, nullptr, 0, nullptr, 1, &barrier);
dest->base_region->image_layout = barrier.newLayout;
return true;
}
bool TextureCache::ComputeTextureStorage(size_t* output_length,
const TextureInfo& src) {
if (src.texture_format == TextureFormat::k_CTX1) {
@ -1305,128 +1384,6 @@ void TextureCache::WritebackTexture(Texture* texture) {
wb_staging_buffer_.Scavenge();
}
bool TextureCache::UploadTexture(VkCommandBuffer setup_buffer,
VkFence completion_fence, Texture* dest,
const TextureInfo& src) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
size_t unpack_length;
if (!ComputeTextureStorage(&unpack_length, src)) {
XELOGW("Failed to compute texture storage");
return false;
}
if (!staging_buffer_.CanAcquire(unpack_length)) {
// Need to have unique memory for every upload for at least one frame. If we
// run out of memory, we need to flush all queued upload commands to the
// GPU.
FlushPendingCommands(setup_buffer, completion_fence);
// Uploads have been flushed. Continue.
if (!staging_buffer_.CanAcquire(unpack_length)) {
// The staging buffer isn't big enough to hold this texture.
XELOGE(
"TextureCache staging buffer is too small! (uploading 0x%.8X bytes)",
unpack_length);
assert_always();
return false;
}
}
// Grab some temporary memory for staging.
auto alloc = staging_buffer_.Acquire(unpack_length, completion_fence);
assert_not_null(alloc);
// DEBUG: Check the source address. If it's completely zero'd out, print it.
bool valid = false;
auto src_data = memory_->TranslatePhysical(src.guest_address);
for (uint32_t i = 0; i < src.input_length; i++) {
if (src_data[i] != 0) {
valid = true;
break;
}
}
if (!valid) {
XELOGW(
"Warning: Uploading blank texture at address 0x%.8X "
"(length: 0x%.8X, format: %d)",
src.guest_address, src.input_length, src.texture_format);
}
// Invalidate contents of all regions for this texture, except for the base
// region.
for (auto region_it = dest->regions.begin(); region_it != dest->regions.end();
++region_it) {
(*region_it)->region_contents_valid = false;
}
dest->base_region->region_contents_valid = true;
// Upload texture into GPU memory.
// TODO: If the GPU supports it, we can submit a compute batch to convert the
// texture and copy it to its destination. Otherwise, fallback to conversion
// on the CPU.
VkBufferImageCopy copy_region;
if (!ConvertTexture(reinterpret_cast<uint8_t*>(alloc->host_ptr), &copy_region,
src)) {
XELOGW("Failed to convert texture");
return false;
}
// Transition the texture into a transfer destination layout.
VkImageMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.oldLayout = dest->base_region->image_layout;
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = dest->base_region->image;
barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1,
copy_region.imageSubresource.baseArrayLayer,
copy_region.imageSubresource.layerCount};
if (dest->format == VK_FORMAT_D16_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D24_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
barrier.subresourceRange.aspectMask =
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
}
vkCmdPipelineBarrier(setup_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0,
nullptr, 1, &barrier);
// Now move the converted texture into the destination.
copy_region.bufferOffset = alloc->offset;
copy_region.imageOffset = {0, 0, 0};
if (dest->format == VK_FORMAT_D16_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D24_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
// Do just a depth upload (for now).
copy_region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
}
vkCmdCopyBufferToImage(setup_buffer, staging_buffer_.gpu_buffer(),
dest->base_region->image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy_region);
// Now transition the texture into a shader readonly source.
barrier.srcAccessMask = barrier.dstAccessMask;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier.oldLayout = barrier.newLayout;
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
vkCmdPipelineBarrier(setup_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
0, 0, nullptr, 0, nullptr, 1, &barrier);
dest->base_region->image_layout = barrier.newLayout;
return true;
}
void TextureCache::HashTextureBindings(
XXH64_state_t* hash_state, uint32_t& fetch_mask,
const std::vector<Shader::TextureBinding>& bindings) {

View File

@ -65,6 +65,7 @@ class TextureCache {
VkImage image;
VkImageLayout image_layout;
VkImageUsageFlags usage_flags;
VmaAllocation allocation;
VmaAllocationInfo allocation_info;
@ -172,15 +173,12 @@ class TextureCache {
void FlushPendingCommands(VkCommandBuffer setup_buffer,
VkFence completion_fence);
static void ConvertTexelCTX1(uint8_t* dest, size_t dest_pitch,
const uint8_t* src, Endian src_endianness);
bool ConvertTexture2D(uint8_t* dest, VkBufferImageCopy* copy_region,
const TextureInfo& src);
bool ConvertTextureCube(uint8_t* dest, VkBufferImageCopy* copy_region,
uint32_t mip, const TextureInfo& src);
bool ConvertTextureCube(uint8_t* dest, VkBufferImageCopy* copy_regions,
const TextureInfo& src);
bool ConvertTexture(uint8_t* dest, VkBufferImageCopy* copy_region,
const TextureInfo& src);
uint32_t mip, const TextureInfo& src);
bool ComputeTextureStorage(size_t* output_length, const TextureInfo& src);
// Writes a texture back into guest memory. This call is (mostly) asynchronous

View File

@ -915,15 +915,8 @@ bool VulkanCommandProcessor::IssueCopy() {
// vtx_window_offset_enable
assert_true(regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x00010000);
uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32;
int16_t window_offset_x = window_offset & 0x7FFF;
int16_t window_offset_y = (window_offset >> 16) & 0x7FFF;
// Sign-extension
if (window_offset_x & 0x4000) {
window_offset_x |= 0x8000;
}
if (window_offset_y & 0x4000) {
window_offset_y |= 0x8000;
}
int32_t window_offset_x = window_regs->window_offset.window_x_offset;
int32_t window_offset_y = window_regs->window_offset.window_y_offset;
uint32_t dest_texel_size = uint32_t(GetTexelSize(copy_dest_format));
@ -1027,7 +1020,18 @@ bool VulkanCommandProcessor::IssueCopy() {
std::max(1u, dest_logical_height), &texture_info);
auto texture = texture_cache_->DemandResolveTexture(texture_info);
assert_not_null(texture);
if (!texture) {
// Out of memory.
return false;
}
if (!(texture->base_region->usage_flags &
(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))) {
// Resolve image doesn't support drawing, and we don't support conversion.
return false;
}
texture->in_flight_fence = current_batch_fence_;
// For debugging purposes only (trace viewer)
@ -1188,35 +1192,50 @@ bool VulkanCommandProcessor::IssueCopy() {
resolve_extent,
};
// By offsetting the destination texture by the window offset, we've
// already handled it and need to subtract the window offset from the
// destination rectangle.
VkRect2D dst_rect = {
{resolve_offset.x + window_offset_x,
resolve_offset.y + window_offset_y},
{resolve_offset.x, resolve_offset.y},
resolve_extent,
};
// If the destination rectangle lies outside the window, make it start
// inside. The Xenos does not copy pixel data at any offset in screen
// coordinates.
int32_t dst_adj_x =
std::max(dst_rect.offset.x, -window_offset_x) - dst_rect.offset.x;
int32_t dst_adj_y =
std::max(dst_rect.offset.y, -window_offset_y) - dst_rect.offset.y;
if (uint32_t(dst_adj_x) > dst_rect.extent.width ||
uint32_t(dst_adj_y) > dst_rect.extent.height) {
// No-op?
break;
}
dst_rect.offset.x += dst_adj_x;
dst_rect.offset.y += dst_adj_y;
dst_rect.extent.width -= dst_adj_x;
dst_rect.extent.height -= dst_adj_y;
src_rect.extent.width -= dst_adj_x;
src_rect.extent.height -= dst_adj_y;
VkViewport viewport = {
float(-window_offset_x),
float(-window_offset_y),
float(copy_dest_pitch),
float(copy_dest_height),
0.f,
1.f,
0.f, 0.f, float(copy_dest_pitch), float(copy_dest_height), 0.f, 1.f,
};
uint32_t scissor_tl_x = window_regs->window_scissor_tl.tl_x;
uint32_t scissor_br_x = window_regs->window_scissor_br.br_x;
uint32_t scissor_tl_y = window_regs->window_scissor_tl.tl_y;
uint32_t scissor_br_y = window_regs->window_scissor_br.br_y;
// Clamp the values to destination dimensions.
scissor_tl_x = std::min(scissor_tl_x, copy_dest_pitch);
scissor_br_x = std::min(scissor_br_x, copy_dest_pitch);
scissor_tl_y = std::min(scissor_tl_y, copy_dest_height);
scissor_br_y = std::min(scissor_br_y, copy_dest_height);
VkRect2D scissor = {
{
int32_t(window_regs->window_scissor_tl.tl_x.value()),
int32_t(window_regs->window_scissor_tl.tl_y.value()),
},
{
window_regs->window_scissor_br.br_x.value() -
window_regs->window_scissor_tl.tl_x.value(),
window_regs->window_scissor_br.br_y.value() -
window_regs->window_scissor_tl.tl_y.value(),
},
{int32_t(scissor_tl_x), int32_t(scissor_tl_y)},
{scissor_br_x - scissor_tl_x, scissor_br_y - scissor_tl_y},
};
blitter_->BlitTexture2D(

View File

@ -601,20 +601,18 @@ enum Type3Opcode {
};
// clang-format on
template <uint16_t index, uint16_t count, bool one_reg = false>
constexpr inline uint32_t MakePacketType0() {
inline uint32_t MakePacketType0(uint16_t index, uint16_t count,
bool one_reg = false) {
// ttcccccc cccccccc oiiiiiii iiiiiiii
static_assert(index <= 0x7FFF, "index must be <= 0x7FFF");
static_assert(count >= 1 && count <= 0x4000,
"count must be >= 1 and <= 0x4000");
assert(index <= 0x7FFF);
assert(count >= 1 && count <= 0x4000);
return (0u << 30) | (((count - 1) & 0x3FFF) << 16) | (index & 0x7FFF);
}
template <uint16_t index_1, uint16_t index_2>
constexpr inline uint32_t MakePacketType1() {
inline uint32_t MakePacketType1(uint16_t index_1, uint16_t index_2) {
// tt?????? ??222222 22222111 11111111
static_assert(index_1 <= 0x7FF, "index_1 must be <= 0x7FF");
static_assert(index_2 <= 0x7FF, "index_2 must be <= 0x7FF");
assert(index_1 <= 0x7FF);
assert(index_2 <= 0x7FF);
return (1u << 30) | ((index_2 & 0x7FF) << 11) | (index_1 & 0x7FF);
}
@ -623,12 +621,11 @@ constexpr inline uint32_t MakePacketType2() {
return (2u << 30);
}
template <Type3Opcode opcode, uint16_t count, bool predicate = false>
constexpr inline uint32_t MakePacketType3() {
inline uint32_t MakePacketType3(Type3Opcode opcode, uint16_t count,
bool predicate = false) {
// ttcccccc cccccccc ?ooooooo ???????p
static_assert(opcode <= 0x7F, "opcode must be <= 0x7F");
static_assert(count >= 1 && count <= 0x4000,
"count must be >= 1 and <= 0x4000");
assert(opcode <= 0x7F);
assert(count >= 1 && count <= 0x4000);
return (3u << 30) | (((count - 1) & 0x3FFF) << 16) | ((opcode & 0x7F) << 8) |
(predicate ? 1 : 0);
}

View File

@ -17,8 +17,8 @@
#include "xenia/base/main.h"
#include "xenia/base/threading.h"
#include "xenia/hid/input_system.h"
#include "xenia/ui/gl/gl_provider.h"
#include "xenia/ui/imgui_drawer.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
#include "xenia/ui/window.h"
// Available input drivers:
@ -67,7 +67,7 @@ std::vector<std::unique_ptr<hid::InputDriver>> CreateInputDrivers(
std::unique_ptr<xe::ui::GraphicsProvider> CreateDemoGraphicsProvider(
xe::ui::Window* window) {
return xe::ui::gl::GLProvider::Create(window);
return xe::ui::vulkan::VulkanProvider::Create(window);
}
void DrawInputStatus();

View File

@ -26,12 +26,23 @@ project("xenia-hid-demo")
"gflags",
"glew",
"imgui",
"volk",
"xenia-base",
"xenia-hid",
"xenia-hid-nop",
"xenia-ui",
"xenia-ui-gl",
"xenia-ui-vulkan",
})
filter("platforms:Linux")
links({
"X11",
"xcb",
"X11-xcb",
"GL",
"vulkan",
})
filter()
flags({
"WinMain", -- Use WinMain instead of main.
})

View File

@ -160,13 +160,13 @@ DECLARE_XBOXKRNL_EXPORT(VdSetDisplayModeOverride,
ExportTag::kVideo | ExportTag::kStub);
dword_result_t VdInitializeEngines(unknown_t unk0, function_t callback,
lpvoid_t arg, lpunknown_t unk2_ptr,
lpunknown_t unk3_ptr) {
lpvoid_t arg, lpdword_t pfp_ptr,
lpdword_t me_ptr) {
// r3 = 0x4F810000
// r4 = function ptr (cleanup callback?)
// r5 = function arg
// r6 = register init cmds(?)
// r7 = gpu init cmds(?)
// r6 = PFP Microcode
// r7 = ME Microcode
return 1;
}
DECLARE_XBOXKRNL_EXPORT(VdInitializeEngines,
@ -205,12 +205,6 @@ DECLARE_XBOXKRNL_EXPORT(VdSetGraphicsInterruptCallback, ExportTag::kVideo);
void VdInitializeRingBuffer(lpvoid_t ptr, int_t log2_size) {
// r3 = result of MmGetPhysicalAddress
// r4 = log2(size)
// r4 is or'd with 0x802 and then stuffed into CP_RB_CNTL
// according to AMD docs, this corresponds with RB_BUFSZ, which is log2
// actual size.
// 0x8 is RB_BLKSZ, or number of words gpu will read before updating the
// host read pointer.
// So being or'd with 0x2 makes the ring buffer size always a multiple of 4.
// Buffer pointers are from MmAllocatePhysicalMemory with WRITE_COMBINE.
auto graphics_system = kernel_state()->emulator()->graphics_system();
graphics_system->InitializeRingBuffer(ptr, log2_size);
@ -374,7 +368,7 @@ void VdSwap(lpvoid_t buffer_ptr, // ptr into primary ringbuffer
// Write in the texture fetch.
dwords[offset++] =
xenos::MakePacketType0<gpu::XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0, 6>();
xenos::MakePacketType0(gpu::XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0, 6);
dwords[offset++] = fetch.dword_0;
dwords[offset++] = fetch.dword_1;
dwords[offset++] = fetch.dword_2;
@ -382,7 +376,7 @@ void VdSwap(lpvoid_t buffer_ptr, // ptr into primary ringbuffer
dwords[offset++] = fetch.dword_4;
dwords[offset++] = fetch.dword_5;
dwords[offset++] = xenos::MakePacketType3<xenos::PM4_XE_SWAP, 4>();
dwords[offset++] = xenos::MakePacketType3(xenos::PM4_XE_SWAP, 4);
dwords[offset++] = 'SWAP';
dwords[offset++] = (*frontbuffer_ptr) & 0x1FFFFFFF;

View File

@ -1,315 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/gl/blitter.h"
#include <string>
#include "xenia/base/assert.h"
#include "xenia/base/math.h"
#include "xenia/ui/gl/gl_context.h"
namespace xe {
namespace ui {
namespace gl {
Blitter::Blitter()
: vertex_program_(0),
color_fragment_program_(0),
depth_fragment_program_(0),
color_pipeline_(0),
depth_pipeline_(0),
vbo_(0),
vao_(0),
nearest_sampler_(0),
linear_sampler_(0),
scratch_framebuffer_(0) {}
Blitter::~Blitter() = default;
bool Blitter::Initialize() {
const std::string header =
R"(
#version 450
#extension GL_ARB_explicit_uniform_location : require
#extension GL_ARB_shading_language_420pack : require
precision highp float;
precision highp int;
layout(std140, column_major) uniform;
layout(std430, column_major) buffer;
)";
const std::string vs_source = header +
R"(
layout(location = 0) uniform vec4 src_uv;
out gl_PerVertex {
vec4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[];
};
layout(location = 0) in vec2 vfetch_pos;
layout(location = 0) out vec2 vtx_uv;
void main() {
gl_Position = vec4(vfetch_pos.xy * vec2(2.0, -2.0) -
vec2(1.0, -1.0), 0.0, 1.0);
vtx_uv = vfetch_pos.xy * src_uv.zw + src_uv.xy;
})";
const std::string color_fs_source = header +
R"(
layout(location = 1) uniform sampler2D src_texture;
layout(location = 2) uniform bool swap;
layout(location = 0) in vec2 vtx_uv;
layout(location = 0) out vec4 oC;
void main() {
oC = texture(src_texture, vtx_uv);
if (!swap) oC = oC.bgra;
})";
const std::string depth_fs_source = header +
R"(
layout(location = 1) uniform sampler2D src_texture;
layout(location = 0) in vec2 vtx_uv;
layout(location = 0) out vec4 oC;
void main() {
gl_FragDepth = texture(src_texture, vtx_uv).r;
})";
auto vs_source_str = vs_source.c_str();
vertex_program_ = glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &vs_source_str);
auto color_fs_source_str = color_fs_source.c_str();
color_fragment_program_ =
glCreateShaderProgramv(GL_FRAGMENT_SHADER, 1, &color_fs_source_str);
auto depth_fs_source_str = depth_fs_source.c_str();
depth_fragment_program_ =
glCreateShaderProgramv(GL_FRAGMENT_SHADER, 1, &depth_fs_source_str);
glCreateProgramPipelines(1, &color_pipeline_);
glUseProgramStages(color_pipeline_, GL_VERTEX_SHADER_BIT, vertex_program_);
glUseProgramStages(color_pipeline_, GL_FRAGMENT_SHADER_BIT,
color_fragment_program_);
glCreateProgramPipelines(1, &depth_pipeline_);
glUseProgramStages(depth_pipeline_, GL_VERTEX_SHADER_BIT, vertex_program_);
glUseProgramStages(depth_pipeline_, GL_FRAGMENT_SHADER_BIT,
depth_fragment_program_);
glCreateBuffers(1, &vbo_);
static const GLfloat vbo_data[] = {
0, 0, 1, 0, 0, 1, 1, 1,
};
glNamedBufferStorage(vbo_, sizeof(vbo_data), vbo_data, 0);
glCreateVertexArrays(1, &vao_);
glEnableVertexArrayAttrib(vao_, 0);
glVertexArrayAttribBinding(vao_, 0, 0);
glVertexArrayAttribFormat(vao_, 0, 2, GL_FLOAT, GL_FALSE, 0);
glVertexArrayVertexBuffer(vao_, 0, vbo_, 0, sizeof(GLfloat) * 2);
glCreateSamplers(1, &nearest_sampler_);
glSamplerParameteri(nearest_sampler_, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glSamplerParameteri(nearest_sampler_, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glSamplerParameteri(nearest_sampler_, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glSamplerParameteri(nearest_sampler_, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glCreateSamplers(1, &linear_sampler_);
glSamplerParameteri(linear_sampler_, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glSamplerParameteri(linear_sampler_, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glSamplerParameteri(linear_sampler_, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glSamplerParameteri(linear_sampler_, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glCreateFramebuffers(1, &scratch_framebuffer_);
return true;
}
void Blitter::Shutdown() {
glDeleteFramebuffers(1, &scratch_framebuffer_);
glDeleteProgram(vertex_program_);
glDeleteProgram(color_fragment_program_);
glDeleteProgram(depth_fragment_program_);
glDeleteProgramPipelines(1, &color_pipeline_);
glDeleteProgramPipelines(1, &depth_pipeline_);
glDeleteBuffers(1, &vbo_);
glDeleteVertexArrays(1, &vao_);
glDeleteSamplers(1, &nearest_sampler_);
glDeleteSamplers(1, &linear_sampler_);
}
struct SavedState {
GLboolean scissor_test_enabled;
GLboolean depth_test_enabled;
GLboolean depth_mask_enabled;
GLint depth_func;
GLboolean stencil_test_enabled;
GLboolean cull_face_enabled;
GLint cull_face;
GLint front_face;
GLint polygon_mode;
GLboolean color_mask_0_enabled[4];
GLboolean blend_0_enabled;
GLint draw_buffer;
GLfloat viewport[4];
GLint program_pipeline;
GLint vertex_array;
GLint texture_0;
GLint sampler_0;
void Save() {
scissor_test_enabled = glIsEnabled(GL_SCISSOR_TEST);
depth_test_enabled = glIsEnabled(GL_DEPTH_TEST);
glGetBooleanv(GL_DEPTH_WRITEMASK, &depth_mask_enabled);
glGetIntegerv(GL_DEPTH_FUNC, &depth_func);
stencil_test_enabled = glIsEnabled(GL_STENCIL_TEST);
cull_face_enabled = glIsEnabled(GL_CULL_FACE);
glGetIntegerv(GL_CULL_FACE_MODE, &cull_face);
glGetIntegerv(GL_FRONT_FACE, &front_face);
glGetIntegerv(GL_POLYGON_MODE, &polygon_mode);
glGetBooleani_v(GL_COLOR_WRITEMASK, 0,
reinterpret_cast<GLboolean*>(&color_mask_0_enabled));
blend_0_enabled = glIsEnabledi(GL_BLEND, 0);
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &draw_buffer);
glGetFloati_v(GL_VIEWPORT, 0, viewport);
glGetIntegerv(GL_PROGRAM_PIPELINE_BINDING, &program_pipeline);
glGetIntegerv(GL_VERTEX_ARRAY_BINDING, &vertex_array);
glGetIntegerv(GL_TEXTURE_BINDING_2D, &texture_0);
glGetIntegerv(GL_SAMPLER_BINDING, &sampler_0);
}
void Restore() {
scissor_test_enabled ? glEnable(GL_SCISSOR_TEST)
: glDisable(GL_SCISSOR_TEST);
depth_test_enabled ? glEnable(GL_DEPTH_TEST) : glDisable(GL_DEPTH_TEST);
glDepthMask(depth_mask_enabled);
glDepthFunc(depth_func);
stencil_test_enabled ? glEnable(GL_STENCIL_TEST)
: glDisable(GL_STENCIL_TEST);
cull_face_enabled ? glEnable(GL_CULL_FACE) : glDisable(GL_CULL_FACE);
glCullFace(cull_face);
glFrontFace(front_face);
glPolygonMode(GL_FRONT_AND_BACK, polygon_mode);
glColorMaski(0, color_mask_0_enabled[0], color_mask_0_enabled[1],
color_mask_0_enabled[2], color_mask_0_enabled[3]);
blend_0_enabled ? glEnablei(GL_BLEND, 0) : glDisablei(GL_BLEND, 0);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw_buffer);
glViewportIndexedf(0, viewport[0], viewport[1], viewport[2], viewport[3]);
glBindProgramPipeline(program_pipeline);
glBindVertexArray(vertex_array);
glBindTexture(GL_TEXTURE_2D, texture_0);
glBindSampler(0, sampler_0);
}
};
void Blitter::Draw(GLuint src_texture, Rect2D src_rect, Rect2D dest_rect,
GLenum filter) {
assert_not_zero(src_texture);
glDisable(GL_SCISSOR_TEST);
glDisable(GL_STENCIL_TEST);
glDisablei(GL_BLEND, 0);
glEnable(GL_CULL_FACE);
glCullFace(GL_BACK);
glFrontFace(GL_CW);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
glBindVertexArray(vao_);
glBindTextures(0, 1, &src_texture);
switch (filter) {
default:
case GL_NEAREST:
glBindSampler(0, nearest_sampler_);
break;
case GL_LINEAR:
glBindSampler(0, linear_sampler_);
break;
}
glViewportIndexedf(0, GLfloat(dest_rect.x), GLfloat(dest_rect.y),
GLfloat(dest_rect.width), GLfloat(dest_rect.height));
// TODO(benvanik): avoid this?
GLint src_texture_width;
glGetTextureLevelParameteriv(src_texture, 0, GL_TEXTURE_WIDTH,
&src_texture_width);
GLint src_texture_height;
glGetTextureLevelParameteriv(src_texture, 0, GL_TEXTURE_HEIGHT,
&src_texture_height);
glProgramUniform4f(vertex_program_, 0,
src_rect.x / static_cast<float>(src_texture_width),
src_rect.y / static_cast<float>(src_texture_height),
src_rect.width / static_cast<float>(src_texture_width),
src_rect.height / static_cast<float>(src_texture_height));
// Useful for seeing the entire framebuffer/etc:
// glProgramUniform4f(vertex_program_, 0, 0.0f, 0.0f, 1.0f, 1.0f);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
void Blitter::BlitTexture2D(GLuint src_texture, Rect2D src_rect,
Rect2D dest_rect, GLenum filter,
bool swap_channels) {
SavedState state;
state.Save();
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDisable(GL_DEPTH_TEST);
glDepthMask(GL_FALSE);
glStencilMask(0xFF);
glBindProgramPipeline(color_pipeline_);
glProgramUniform1i(color_fragment_program_, 2, swap_channels ? 1 : 0);
Draw(src_texture, src_rect, dest_rect, filter);
state.Restore();
}
void Blitter::CopyColorTexture2D(GLuint src_texture, Rect2D src_rect,
GLuint dest_texture, Rect2D dest_rect,
GLenum filter, bool swap_channels) {
SavedState state;
state.Save();
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDisable(GL_DEPTH_TEST);
glDepthMask(GL_FALSE);
glBindProgramPipeline(color_pipeline_);
glProgramUniform1i(color_fragment_program_, 2, swap_channels ? 1 : 0);
glNamedFramebufferTexture(scratch_framebuffer_, GL_COLOR_ATTACHMENT0,
dest_texture, 0);
glNamedFramebufferDrawBuffer(scratch_framebuffer_, GL_COLOR_ATTACHMENT0);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, scratch_framebuffer_);
Draw(src_texture, src_rect, dest_rect, filter);
glNamedFramebufferDrawBuffer(scratch_framebuffer_, GL_NONE);
glNamedFramebufferTexture(scratch_framebuffer_, GL_COLOR_ATTACHMENT0, GL_NONE,
0);
state.Restore();
}
void Blitter::CopyDepthTexture(GLuint src_texture, Rect2D src_rect,
GLuint dest_texture, Rect2D dest_rect) {
SavedState state;
state.Save();
glColorMaski(0, GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_ALWAYS);
glDepthMask(GL_TRUE);
glBindProgramPipeline(depth_pipeline_);
glNamedFramebufferTexture(scratch_framebuffer_, GL_DEPTH_STENCIL_ATTACHMENT,
dest_texture, 0);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, scratch_framebuffer_);
Draw(src_texture, src_rect, dest_rect, GL_NEAREST);
glNamedFramebufferTexture(scratch_framebuffer_, GL_DEPTH_STENCIL_ATTACHMENT,
GL_NONE, 0);
state.Restore();
}
} // namespace gl
} // namespace ui
} // namespace xe

View File

@ -1,70 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_GL_BLITTER_H_
#define XENIA_UI_GL_BLITTER_H_
#include <memory>
#include "xenia/ui/gl/gl.h"
namespace xe {
namespace ui {
namespace gl {
struct Rect2D {
int32_t x;
int32_t y;
int32_t width;
int32_t height;
Rect2D() : x(0), y(0), width(0), height(0) {}
Rect2D(int32_t x_, int32_t y_, int32_t width_, int32_t height_)
: x(x_), y(y_), width(width_), height(height_) {}
int32_t right() const { return x + width; }
int32_t bottom() const { return y + height; }
};
class Blitter {
public:
Blitter();
~Blitter();
bool Initialize();
void Shutdown();
void BlitTexture2D(GLuint src_texture, Rect2D src_rect, Rect2D dest_rect,
GLenum filter, bool swap_channels);
void CopyColorTexture2D(GLuint src_texture, Rect2D src_rect,
GLuint dest_texture, Rect2D dest_rect, GLenum filter,
bool swap_channels);
void CopyDepthTexture(GLuint src_texture, Rect2D src_rect,
GLuint dest_texture, Rect2D dest_rect);
private:
void Draw(GLuint src_texture, Rect2D src_rect, Rect2D dest_rect,
GLenum filter);
GLuint vertex_program_;
GLuint color_fragment_program_;
GLuint depth_fragment_program_;
GLuint color_pipeline_;
GLuint depth_pipeline_;
GLuint vbo_;
GLuint vao_;
GLuint nearest_sampler_;
GLuint linear_sampler_;
GLuint scratch_framebuffer_;
};
} // namespace gl
} // namespace ui
} // namespace xe
#endif // XENIA_UI_GL_BLITTER_H_

View File

@ -1,139 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/gl/circular_buffer.h"
#include <algorithm>
#include "xenia/base/assert.h"
#include "xenia/base/math.h"
namespace xe {
namespace ui {
namespace gl {
CircularBuffer::CircularBuffer(size_t capacity, size_t alignment)
: capacity_(capacity),
alignment_(alignment),
write_head_(0),
dirty_start_(UINT64_MAX),
dirty_end_(0),
buffer_(0),
gpu_base_(0),
host_base_(nullptr) {}
CircularBuffer::~CircularBuffer() { Shutdown(); }
bool CircularBuffer::Initialize() {
glCreateBuffers(1, &buffer_);
glNamedBufferStorage(buffer_, capacity_, nullptr,
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
host_base_ = reinterpret_cast<uint8_t*>(glMapNamedBufferRange(
buffer_, 0, capacity_,
GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_PERSISTENT_BIT));
assert_not_null(host_base_);
if (!host_base_) {
return false;
}
return true;
}
void CircularBuffer::Shutdown() {
if (!buffer_) {
return;
}
glUnmapNamedBuffer(buffer_);
glDeleteBuffers(1, &buffer_);
buffer_ = 0;
}
bool CircularBuffer::CanAcquire(size_t length) {
size_t aligned_length = xe::round_up(length, alignment_);
return write_head_ + aligned_length <= capacity_;
}
CircularBuffer::Allocation CircularBuffer::Acquire(size_t length) {
// Addresses must always be % 256.
size_t aligned_length = xe::round_up(length, alignment_);
assert_true(aligned_length <= capacity_, "Request too large");
if (write_head_ + aligned_length > capacity_) {
// Flush and wait.
WaitUntilClean();
}
Allocation allocation;
allocation.host_ptr = host_base_ + write_head_;
allocation.gpu_ptr = gpu_base_ + write_head_;
allocation.offset = write_head_;
allocation.length = length;
allocation.aligned_length = aligned_length;
allocation.cache_key = 0;
write_head_ += aligned_length;
return allocation;
}
bool CircularBuffer::AcquireCached(uint32_t key, size_t length,
Allocation* out_allocation) {
uint64_t full_key = key | (length << 32);
auto it = allocation_cache_.find(full_key);
if (it != allocation_cache_.end()) {
uintptr_t write_head = it->second;
size_t aligned_length = xe::round_up(length, alignment_);
out_allocation->host_ptr = host_base_ + write_head;
out_allocation->gpu_ptr = gpu_base_ + write_head;
out_allocation->offset = write_head;
out_allocation->length = length;
out_allocation->aligned_length = aligned_length;
out_allocation->cache_key = full_key;
return true;
} else {
*out_allocation = Acquire(length);
out_allocation->cache_key = full_key;
return false;
}
}
void CircularBuffer::Discard(Allocation allocation) {
write_head_ -= allocation.aligned_length;
}
void CircularBuffer::Commit(Allocation allocation) {
uintptr_t start = allocation.gpu_ptr - gpu_base_;
uintptr_t end = start + allocation.aligned_length;
dirty_start_ = std::min(dirty_start_, start);
dirty_end_ = std::max(dirty_end_, end);
assert_true(dirty_end_ <= capacity_);
if (allocation.cache_key) {
allocation_cache_.insert({allocation.cache_key, allocation.offset});
}
}
void CircularBuffer::Flush() {
if (dirty_start_ == dirty_end_ || dirty_start_ == UINT64_MAX) {
return;
}
glFlushMappedNamedBufferRange(buffer_, dirty_start_,
dirty_end_ - dirty_start_);
dirty_start_ = UINT64_MAX;
dirty_end_ = 0;
}
void CircularBuffer::ClearCache() { allocation_cache_.clear(); }
void CircularBuffer::WaitUntilClean() {
Flush();
glFinish();
write_head_ = 0;
ClearCache();
}
} // namespace gl
} // namespace ui
} // namespace xe

View File

@ -1,71 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_GL_CIRCULAR_BUFFER_H_
#define XENIA_UI_GL_CIRCULAR_BUFFER_H_
#include <unordered_map>
#include "xenia/ui/gl/gl.h"
namespace xe {
namespace ui {
namespace gl {
// TODO(benvanik): uh, make this circular.
// TODO(benvanik): fences to prevent this from ever flushing.
class CircularBuffer {
public:
CircularBuffer(size_t capacity, size_t alignment = 256);
~CircularBuffer();
struct Allocation {
void* host_ptr;
GLuint64 gpu_ptr;
size_t offset;
size_t length;
size_t aligned_length;
uint64_t cache_key; // 0 if caching disabled.
};
bool Initialize();
void Shutdown();
GLuint handle() const { return buffer_; }
GLuint64 gpu_handle() const { return gpu_base_; }
size_t capacity() const { return capacity_; }
bool CanAcquire(size_t length);
Allocation Acquire(size_t length);
bool AcquireCached(uint32_t key, size_t length, Allocation* out_allocation);
void Discard(Allocation allocation);
void Commit(Allocation allocation);
void Flush();
void ClearCache();
void WaitUntilClean();
private:
size_t capacity_;
size_t alignment_;
uintptr_t write_head_;
uintptr_t dirty_start_;
uintptr_t dirty_end_;
GLuint buffer_;
GLuint64 gpu_base_;
uint8_t* host_base_;
std::unordered_map<uint64_t, uintptr_t> allocation_cache_;
};
} // namespace gl
} // namespace ui
} // namespace xe
#endif // XENIA_UI_GL_CIRCULAR_BUFFER_H_

View File

@ -1,32 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_GL_GL_H_
#define XENIA_UI_GL_GL_H_
#include "xenia/base/platform.h"
#include "third_party/GL/glew.h"
typedef struct GLEWContextStruct GLEWContext;
extern "C" GLEWContext* glewGetContext();
#if XE_PLATFORM_WIN32
// We avoid including wglew.h here as it includes windows.h and pollutes the
// global namespace. As we don't need wglew most places we only do that as
// required.
typedef struct WGLEWContextStruct WGLEWContext;
extern "C" WGLEWContext* wglewGetContext();
#elif XE_PLATFORM_LINUX
typedef struct GLXEWContextStruct GLXEWContext;
extern "C" GLXEWContext* glxewGetContext();
#endif
#endif // XENIA_UI_GL_GL_H_

View File

@ -1,268 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/gl/gl_context.h"
#include <gflags/gflags.h>
#include <mutex>
#include <string>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/profiling.h"
#include "xenia/ui/gl/gl_immediate_drawer.h"
#include "xenia/ui/window.h"
DEFINE_bool(thread_safe_gl, false,
"Only allow one GL context to be active at a time.");
DEFINE_bool(disable_gl_context_reset, false,
"Do not aggressively reset the GL context (helps with capture "
"programs such as OBS or FRAPS).");
DEFINE_bool(random_clear_color, false, "Randomizes GL clear color.");
DEFINE_bool(gl_debug, false, "Enable OpenGL debug validation layer.");
DEFINE_bool(gl_debug_output, false, "Dump ARB_debug_output to stderr.");
DEFINE_bool(gl_debug_output_synchronous, true,
"ARB_debug_output will synchronize to be thread safe.");
namespace xe {
namespace ui {
namespace gl {
std::recursive_mutex GLContext::global_gl_mutex_;
void GLContext::FatalGLError(std::string error) {
xe::FatalError(
error +
"\nEnsure you have the latest drivers for your GPU and that it supports "
"OpenGL 4.5. See http://xenia.jp/faq/ for more information and a list"
"of supported GPUs.");
}
GLContext::GLContext(GraphicsProvider* provider, Window* target_window)
: GraphicsContext(provider, target_window) {}
GLContext::~GLContext() {}
void GLContext::AssertExtensionsPresent() {
if (!MakeCurrent()) {
FatalGLError("Unable to make GL context current.");
return;
}
// Check shader version at least 4.5 (matching GL 4.5).
auto glsl_version_raw =
reinterpret_cast<const char*>(glGetString(GL_SHADING_LANGUAGE_VERSION));
std::string glsl_version(glsl_version_raw);
if (glsl_version.find("4.5") == std::string::npos &&
glsl_version.find("4.6") == std::string::npos) {
FatalGLError("OpenGL GLSL version 4.50 or higher is required.");
return;
}
if (!GLEW_ARB_bindless_texture || !glMakeTextureHandleResidentARB) {
FatalGLError("OpenGL extension ARB_bindless_texture is required.");
return;
}
if (!GLEW_ARB_fragment_coord_conventions) {
FatalGLError(
"OpenGL extension ARB_fragment_coord_conventions is required.");
return;
}
ClearCurrent();
}
void GLContext::DebugMessage(GLenum source, GLenum type, GLuint id,
GLenum severity, GLsizei length,
const GLchar* message) {
const char* source_name = nullptr;
switch (source) {
case GL_DEBUG_SOURCE_API_ARB:
source_name = "OpenGL";
break;
case GL_DEBUG_SOURCE_WINDOW_SYSTEM_ARB:
source_name = "Windows";
break;
case GL_DEBUG_SOURCE_SHADER_COMPILER_ARB:
source_name = "Shader Compiler";
break;
case GL_DEBUG_SOURCE_THIRD_PARTY_ARB:
source_name = "Third Party";
break;
case GL_DEBUG_SOURCE_APPLICATION_ARB:
source_name = "Application";
break;
case GL_DEBUG_SOURCE_OTHER_ARB:
source_name = "Other";
break;
default:
source_name = "(unknown source)";
break;
}
const char* type_name = nullptr;
switch (type) {
case GL_DEBUG_TYPE_ERROR:
type_name = "error";
break;
case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR:
type_name = "deprecated behavior";
break;
case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR:
type_name = "undefined behavior";
break;
case GL_DEBUG_TYPE_PORTABILITY:
type_name = "portability";
break;
case GL_DEBUG_TYPE_PERFORMANCE:
type_name = "performance";
break;
case GL_DEBUG_TYPE_OTHER:
type_name = "message";
break;
case GL_DEBUG_TYPE_MARKER:
type_name = "marker";
break;
case GL_DEBUG_TYPE_PUSH_GROUP:
type_name = "push group";
break;
case GL_DEBUG_TYPE_POP_GROUP:
type_name = "pop group";
break;
default:
type_name = "(unknown type)";
break;
}
const char* severity_name = nullptr;
switch (severity) {
case GL_DEBUG_SEVERITY_HIGH_ARB:
severity_name = "high";
break;
case GL_DEBUG_SEVERITY_MEDIUM_ARB:
severity_name = "medium";
break;
case GL_DEBUG_SEVERITY_LOW_ARB:
severity_name = "low";
break;
case GL_DEBUG_SEVERITY_NOTIFICATION:
severity_name = "notification";
break;
default:
severity_name = "(unknown severity)";
break;
}
XELOGE("GL4 %s: %s(%s) %d: %s", source_name, type_name, severity_name, id,
message);
}
void GLAPIENTRY GLContext::DebugMessageThunk(GLenum source, GLenum type,
GLuint id, GLenum severity,
GLsizei length,
const GLchar* message,
GLvoid* user_param) {
reinterpret_cast<GLContext*>(user_param)
->DebugMessage(source, type, id, severity, length, message);
}
void GLContext::SetupDebugging() {
if (!FLAGS_gl_debug || !FLAGS_gl_debug_output) {
return;
}
glEnable(GL_DEBUG_OUTPUT);
// Synchronous output hurts, but is required if we want to line up the logs.
if (FLAGS_gl_debug_output_synchronous) {
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
} else {
glDisable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
}
// Enable everything by default.
glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL,
GL_TRUE);
// Disable annoying messages.
GLuint disable_message_ids[] = {
0x00020004, // Usage warning: Generic vertex attribute array 0 uses a
// pointer with a small value (0x0000000000000000). Is this
// intended to be used as an offset into a buffer object?
};
glDebugMessageControl(GL_DEBUG_SOURCE_API, GL_DEBUG_TYPE_OTHER, GL_DONT_CARE,
GLsizei(xe::countof(disable_message_ids)),
disable_message_ids, GL_FALSE);
// Callback will be made from driver threads.
glDebugMessageCallback(reinterpret_cast<GLDEBUGPROC>(&DebugMessageThunk),
this);
}
ImmediateDrawer* GLContext::immediate_drawer() {
return immediate_drawer_.get();
}
bool GLContext::WasLost() {
if (!robust_access_supported_) {
// Can't determine if we lost the context.
return false;
}
if (context_lost_) {
return true;
}
auto status = glGetGraphicsResetStatusARB();
if (status != GL_NO_ERROR) {
// Graphics card reset.
XELOGE("============= TDR detected on context %p! Context %s =============",
handle(), status == GL_GUILTY_CONTEXT_RESET ? "guilty" : "innocent");
context_lost_ = true;
return true;
}
return false;
}
std::unique_ptr<RawImage> GLContext::Capture() {
GraphicsContextLock lock(this);
std::unique_ptr<RawImage> raw_image(new RawImage());
raw_image->width = target_window_->width();
raw_image->stride = raw_image->width * 4;
raw_image->height = target_window_->height();
raw_image->data.resize(raw_image->stride * raw_image->height);
glReadPixels(0, 0, target_window_->width(), target_window_->height(), GL_RGBA,
GL_UNSIGNED_BYTE, raw_image->data.data());
// Flip vertically in-place.
size_t yt = 0;
size_t yb = (raw_image->height - 1) * raw_image->stride;
while (yt < yb) {
for (size_t i = 0; i < raw_image->stride; ++i) {
std::swap(raw_image->data[yt + i], raw_image->data[yb + i]);
}
yt += raw_image->stride;
yb -= raw_image->stride;
}
return raw_image;
}
} // namespace gl
} // namespace ui
} // namespace xe

View File

@ -1,93 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_GL_GL_CONTEXT_H_
#define XENIA_UI_GL_GL_CONTEXT_H_
#include <gflags/gflags.h>
#include <memory>
#include <mutex>
#include "xenia/ui/gl/blitter.h"
#include "xenia/ui/gl/gl.h"
#include "xenia/ui/graphics_context.h"
DECLARE_bool(thread_safe_gl);
DECLARE_bool(disable_gl_context_reset);
DECLARE_bool(random_clear_color);
DECLARE_bool(gl_debug);
DECLARE_bool(gl_debug_output);
DECLARE_bool(gl_debug_output_synchronous);
namespace xe {
namespace ui {
namespace gl {
class GLImmediateDrawer;
class GLProvider;
class GLContext : public GraphicsContext {
public:
~GLContext() override;
ImmediateDrawer* immediate_drawer() override;
virtual bool is_current() override = 0;
virtual bool MakeCurrent() override = 0;
virtual void ClearCurrent() override = 0;
bool WasLost() override;
virtual void BeginSwap() override = 0;
virtual void EndSwap() override = 0;
std::unique_ptr<RawImage> Capture() override;
Blitter* blitter() { return &blitter_; }
protected:
Blitter blitter_;
std::unique_ptr<GLImmediateDrawer> immediate_drawer_;
static std::recursive_mutex global_gl_mutex_;
bool context_lost_ = false;
bool robust_access_supported_ = false;
static void FatalGLError(std::string error);
virtual bool Initialize(GLContext* share_context) = 0;
virtual void* handle() = 0;
GLContext(GraphicsProvider* provider, Window* target_window);
void SetupDebugging();
void AssertExtensionsPresent();
void DebugMessage(GLenum source, GLenum type, GLuint id, GLenum severity,
GLsizei length, const GLchar* message);
private:
friend class GLProvider;
static std::unique_ptr<GLContext> Create(GraphicsProvider* provider,
Window* target_window,
GLContext* share_context = nullptr);
static std::unique_ptr<GLContext> CreateOffscreen(GraphicsProvider* provider,
GLContext* parent_context);
private:
static void GLAPIENTRY DebugMessageThunk(GLenum source, GLenum type,
GLuint id, GLenum severity,
GLsizei length,
const GLchar* message,
GLvoid* user_param);
};
} // namespace gl
} // namespace ui
} // namespace xe
#endif // XENIA_UI_GL_GL_CONTEXT_H_

View File

@ -1,315 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/gl/gl_context_win.h"
#include <gflags/gflags.h>
#include <mutex>
#include <string>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/platform_win.h"
#include "xenia/base/profiling.h"
#include "xenia/ui/gl/gl_immediate_drawer.h"
#include "xenia/ui/window.h"
#include "third_party/GL/wglew.h"
namespace xe {
namespace ui {
namespace gl {
thread_local GLEWContext* tls_glew_context_ = nullptr;
thread_local WGLEWContext* tls_wglew_context_ = nullptr;
extern "C" GLEWContext* glewGetContext() { return tls_glew_context_; }
extern "C" WGLEWContext* wglewGetContext() { return tls_wglew_context_; }
std::unique_ptr<GLContext> GLContext::Create(GraphicsProvider* provider,
Window* target_window,
GLContext* share_context) {
auto context =
std::unique_ptr<GLContext>(new WGLContext(provider, target_window));
if (!context->Initialize(share_context)) {
return nullptr;
}
context->AssertExtensionsPresent();
return context;
}
std::unique_ptr<GLContext> GLContext::CreateOffscreen(
GraphicsProvider* provider, GLContext* parent_context) {
return WGLContext::CreateOffscreen(provider,
static_cast<WGLContext*>(parent_context));
}
WGLContext::WGLContext(GraphicsProvider* provider, Window* target_window)
: GLContext(provider, target_window) {
glew_context_.reset(new GLEWContext());
wglew_context_.reset(new WGLEWContext());
}
WGLContext::~WGLContext() {
MakeCurrent();
blitter_.Shutdown();
immediate_drawer_.reset();
ClearCurrent();
if (glrc_) {
wglDeleteContext(glrc_);
}
if (dc_) {
ReleaseDC(HWND(target_window_->native_handle()), dc_);
}
}
bool WGLContext::Initialize(GLContext* share_context_) {
WGLContext* share_context = static_cast<WGLContext*>(share_context_);
dc_ = GetDC(HWND(target_window_->native_handle()));
PIXELFORMATDESCRIPTOR pfd = {0};
pfd.nSize = sizeof(pfd);
pfd.nVersion = 1;
pfd.dwFlags = PFD_DOUBLEBUFFER | PFD_SUPPORT_OPENGL | PFD_DRAW_TO_WINDOW;
pfd.iPixelType = PFD_TYPE_RGBA;
pfd.cColorBits = 32;
pfd.cDepthBits = 32;
pfd.iLayerType = PFD_MAIN_PLANE;
int pixel_format = ChoosePixelFormat(dc_, &pfd);
if (!pixel_format) {
FatalGLError("Unable to choose pixel format.");
return false;
}
if (!SetPixelFormat(dc_, pixel_format, &pfd)) {
FatalGLError("Unable to set pixel format.");
return false;
}
HGLRC temp_context = wglCreateContext(dc_);
if (!temp_context) {
FatalGLError("Unable to create temporary GL context.");
return false;
}
wglMakeCurrent(dc_, temp_context);
tls_glew_context_ = glew_context_.get();
tls_wglew_context_ = wglew_context_.get();
if (glewInit() != GLEW_OK) {
FatalGLError("Unable to initialize GLEW.");
return false;
}
if (wglewInit() != GLEW_OK) {
FatalGLError("Unable to initialize WGLEW.");
return false;
}
if (!WGLEW_ARB_create_context) {
FatalGLError("WGL_ARG_create_context not supported by GL ICD.");
return false;
}
if (GLEW_ARB_robustness) {
robust_access_supported_ = true;
}
int context_flags = 0;
if (FLAGS_gl_debug) {
context_flags |= WGL_CONTEXT_DEBUG_BIT_ARB;
}
if (robust_access_supported_) {
context_flags |= WGL_CONTEXT_ROBUST_ACCESS_BIT_ARB;
}
int attrib_list[] = {
WGL_CONTEXT_MAJOR_VERSION_ARB,
4,
WGL_CONTEXT_MINOR_VERSION_ARB,
5,
WGL_CONTEXT_FLAGS_ARB,
context_flags,
WGL_CONTEXT_PROFILE_MASK_ARB,
WGL_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB,
WGL_CONTEXT_RESET_NOTIFICATION_STRATEGY_ARB,
robust_access_supported_ ? WGL_LOSE_CONTEXT_ON_RESET_ARB : 0,
0};
glrc_ = wglCreateContextAttribsARB(
dc_, share_context ? share_context->glrc_ : nullptr, attrib_list);
wglMakeCurrent(nullptr, nullptr);
wglDeleteContext(temp_context);
if (!glrc_) {
FatalGLError("Unable to create real GL context.");
return false;
}
if (!MakeCurrent()) {
FatalGLError("Could not make real GL context current.");
return false;
}
XELOGI("Successfully created OpenGL context:");
XELOGI(" GL_VENDOR: %s", glGetString(GL_VENDOR));
XELOGI(" GL_VERSION: %s", glGetString(GL_VERSION));
XELOGI(" GL_RENDERER: %s", glGetString(GL_RENDERER));
XELOGI(" GL_SHADING_LANGUAGE_VERSION: %s",
glGetString(GL_SHADING_LANGUAGE_VERSION));
while (glGetError()) {
// Clearing errors.
}
SetupDebugging();
if (!blitter_.Initialize()) {
FatalGLError("Unable to initialize blitter.");
ClearCurrent();
return false;
}
immediate_drawer_ = std::make_unique<GLImmediateDrawer>(this);
ClearCurrent();
return true;
}
std::unique_ptr<WGLContext> WGLContext::CreateOffscreen(
GraphicsProvider* provider, WGLContext* parent_context) {
assert_not_null(parent_context->glrc_);
HGLRC new_glrc = nullptr;
{
GraphicsContextLock context_lock(parent_context);
int context_flags = 0;
if (FLAGS_gl_debug) {
context_flags |= WGL_CONTEXT_DEBUG_BIT_ARB;
}
bool robust_access_supported = parent_context->robust_access_supported_;
if (robust_access_supported) {
context_flags |= WGL_CONTEXT_ROBUST_ACCESS_BIT_ARB;
}
int attrib_list[] = {
WGL_CONTEXT_MAJOR_VERSION_ARB,
4,
WGL_CONTEXT_MINOR_VERSION_ARB,
5,
WGL_CONTEXT_FLAGS_ARB,
context_flags,
WGL_CONTEXT_PROFILE_MASK_ARB,
WGL_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB,
WGL_CONTEXT_RESET_NOTIFICATION_STRATEGY_ARB,
robust_access_supported ? WGL_LOSE_CONTEXT_ON_RESET_ARB : 0,
0};
new_glrc = wglCreateContextAttribsARB(parent_context->dc_,
parent_context->glrc_, attrib_list);
if (!new_glrc) {
FatalGLError("Could not create shared context.");
return nullptr;
}
}
auto new_context = std::unique_ptr<WGLContext>(
new WGLContext(provider, parent_context->target_window_));
new_context->glrc_ = new_glrc;
new_context->dc_ =
GetDC(HWND(parent_context->target_window_->native_handle()));
new_context->robust_access_supported_ =
parent_context->robust_access_supported_;
if (!new_context->MakeCurrent()) {
FatalGLError("Could not make new GL context current.");
return nullptr;
}
if (!glGetString(GL_EXTENSIONS)) {
new_context->ClearCurrent();
FatalGLError("New GL context did not have extensions.");
return nullptr;
}
if (glewInit() != GLEW_OK) {
new_context->ClearCurrent();
FatalGLError("Unable to initialize GLEW on shared context.");
return nullptr;
}
if (wglewInit() != GLEW_OK) {
new_context->ClearCurrent();
FatalGLError("Unable to initialize WGLEW on shared context.");
return nullptr;
}
new_context->SetupDebugging();
if (!new_context->blitter_.Initialize()) {
FatalGLError("Unable to initialize blitter on shared context.");
return nullptr;
}
new_context->ClearCurrent();
return new_context;
}
bool WGLContext::is_current() {
return tls_glew_context_ == glew_context_.get();
}
bool WGLContext::MakeCurrent() {
SCOPE_profile_cpu_f("gpu");
if (FLAGS_thread_safe_gl) {
global_gl_mutex_.lock();
}
if (!wglMakeCurrent(dc_, glrc_)) {
if (FLAGS_thread_safe_gl) {
global_gl_mutex_.unlock();
}
FatalGLError("Unable to make GL context current.");
return false;
}
tls_glew_context_ = glew_context_.get();
tls_wglew_context_ = wglew_context_.get();
return true;
}
void WGLContext::ClearCurrent() {
if (!FLAGS_disable_gl_context_reset) {
wglMakeCurrent(nullptr, nullptr);
}
tls_glew_context_ = nullptr;
tls_wglew_context_ = nullptr;
if (FLAGS_thread_safe_gl) {
global_gl_mutex_.unlock();
}
}
void WGLContext::BeginSwap() {
SCOPE_profile_cpu_i("gpu", "xe::ui::gl::WGLContext::BeginSwap");
float clear_color[] = {238 / 255.0f, 238 / 255.0f, 238 / 255.0f, 1.0f};
if (FLAGS_random_clear_color) {
clear_color[0] =
rand() / static_cast<float>(RAND_MAX); // NOLINT(runtime/threadsafe_fn)
clear_color[1] = 1.0f;
clear_color[2] = 0.0f;
clear_color[3] = 1.0f;
}
glClearNamedFramebufferfv(0, GL_COLOR, 0, clear_color);
}
void WGLContext::EndSwap() {
SCOPE_profile_cpu_i("gpu", "xe::ui::gl::WGLContext::EndSwap");
SwapBuffers(dc_);
}
} // namespace gl
} // namespace ui
} // namespace xe

View File

@ -1,64 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_GL_WGL_CONTEXT_H_
#define XENIA_UI_GL_WGL_CONTEXT_H_
#include <gflags/gflags.h>
#include <memory>
#include "xenia/ui/gl/blitter.h"
#include "xenia/ui/gl/gl.h"
#include "xenia/ui/gl/gl_context.h"
#include "xenia/ui/graphics_context.h"
typedef struct HDC__* HDC;
typedef struct HGLRC__* HGLRC;
namespace xe {
namespace ui {
namespace gl {
class GLImmediateDrawer;
class GLProvider;
class WGLContext : public GLContext {
public:
~WGLContext() override;
bool is_current() override;
bool MakeCurrent() override;
void ClearCurrent() override;
void BeginSwap() override;
void EndSwap() override;
protected:
friend class GLContext;
WGLContext(GraphicsProvider* provider, Window* target_window);
static std::unique_ptr<WGLContext> CreateOffscreen(
GraphicsProvider* provider, WGLContext* parent_context);
bool Initialize(GLContext* share_context) override;
void* handle() override { return glrc_; }
private:
HDC dc_ = nullptr;
HGLRC glrc_ = nullptr;
std::unique_ptr<GLEWContext> glew_context_;
std::unique_ptr<WGLEWContext> wglew_context_;
};
} // namespace gl
} // namespace ui
} // namespace xe
#endif // XENIA_UI_GL_GL_CONTEXT_H_

View File

@ -1,323 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/gl/gl_context_x11.h"
#include <gflags/gflags.h>
#include <gdk/gdkx.h>
#include <mutex>
#include <string>
#include "third_party/GL/glxew.h"
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/platform_linux.h"
#include "xenia/base/profiling.h"
#include "xenia/ui/gl/gl_immediate_drawer.h"
#include "xenia/ui/window.h"
namespace xe {
namespace ui {
namespace gl {
thread_local GLEWContext* tls_glew_context_ = nullptr;
thread_local GLXEWContext* tls_glxew_context_ = nullptr;
extern "C" GLEWContext* glewGetContext() { return tls_glew_context_; }
extern "C" GLXEWContext* glxewGetContext() { return tls_glxew_context_; }
std::unique_ptr<GLContext> GLContext::Create(GraphicsProvider* provider,
Window* target_window,
GLContext* share_context) {
auto context =
std::unique_ptr<GLContext>(new GLXContext(provider, target_window));
if (!context->Initialize(share_context)) {
return nullptr;
}
context->AssertExtensionsPresent();
return context;
}
std::unique_ptr<GLContext> GLContext::CreateOffscreen(
GraphicsProvider* provider, GLContext* parent_context) {
return GLXContext::CreateOffscreen(provider,
static_cast<GLXContext*>(parent_context));
}
GLXContext::GLXContext(GraphicsProvider* provider, Window* target_window)
: GLContext(provider, target_window) {
glew_context_.reset(new GLEWContext());
glxew_context_.reset(new GLXEWContext());
}
GLXContext::~GLXContext() {
MakeCurrent();
blitter_.Shutdown();
immediate_drawer_.reset();
ClearCurrent();
if (glx_context_) {
glXDestroyContext(disp_, glx_context_);
}
if (draw_area_) {
gtk_widget_destroy(draw_area_);
}
}
bool GLXContext::Initialize(GLContext* share_context) {
GtkWidget* window = GTK_WIDGET(target_window_->native_handle());
GtkWidget* draw_area = gtk_drawing_area_new();
int32_t width;
int32_t height;
gtk_window_get_size(GTK_WINDOW(window), &width, &height);
gtk_widget_set_size_request(draw_area, width, height);
gtk_container_add(GTK_CONTAINER(window), draw_area);
GdkVisual* visual = gdk_screen_get_system_visual(gdk_screen_get_default());
GdkDisplay* gdk_display = gtk_widget_get_display(window);
Display* display = gdk_x11_display_get_xdisplay(gdk_display);
disp_ = display;
::Window root = gdk_x11_get_default_root_xwindow();
static int vis_attrib_list[] = {GLX_RGBA, GLX_DEPTH_SIZE, 24,
GLX_DOUBLEBUFFER, None};
XVisualInfo* vi = glXChooseVisual(display, 0, vis_attrib_list);
if (vi == NULL) {
FatalGLError("No matching visuals for X display");
return false;
}
cmap_ = XCreateColormap(display, root, vi->visual, AllocNone);
::GLXContext temp_context = glXCreateContext(display, vi, NULL, GL_TRUE);
if (!temp_context) {
FatalGLError("Unable to create temporary GLX context");
return false;
}
xid_ = GDK_WINDOW_XID(gtk_widget_get_window(window));
glXMakeCurrent(display, xid_, temp_context);
tls_glew_context_ = glew_context_.get();
tls_glxew_context_ = glxew_context_.get();
if (glewInit() != GLEW_OK) {
FatalGLError("Unable to initialize GLEW.");
return false;
}
if (glxewInit() != GLEW_OK) {
FatalGLError("Unable to initialize GLXEW.");
return false;
}
if (!GLXEW_ARB_create_context) {
FatalGLError("GLX_ARB_create_context not supported by GL ICD.");
return false;
}
if (GLEW_ARB_robustness) {
robust_access_supported_ = true;
}
int context_flags = 0;
if (FLAGS_gl_debug) {
context_flags |= GLX_CONTEXT_DEBUG_BIT_ARB;
}
if (robust_access_supported_) {
context_flags |= GLX_CONTEXT_ROBUST_ACCESS_BIT_ARB;
}
int attrib_list[] = {
GLX_CONTEXT_MAJOR_VERSION_ARB,
4,
GLX_CONTEXT_MINOR_VERSION_ARB,
5,
GLX_CONTEXT_FLAGS_ARB,
context_flags,
GLX_CONTEXT_PROFILE_MASK_ARB,
GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB,
GLX_CONTEXT_RESET_NOTIFICATION_STRATEGY_ARB,
robust_access_supported_ ? GLX_LOSE_CONTEXT_ON_RESET_ARB : 0,
0};
GLXContext* share_context_glx = static_cast<GLXContext*>(share_context);
glx_context_ = glXCreateContextAttribsARB(
display, nullptr,
share_context ? share_context_glx->glx_context_ : nullptr, True,
attrib_list);
glXMakeCurrent(display, 0, nullptr);
glXDestroyContext(display, temp_context);
if (!glx_context_) {
FatalGLError("Unable to create real GL context.");
return false;
}
if (!MakeCurrent()) {
FatalGLError("Could not make real GL context current.");
return false;
}
XELOGI("Successfully created OpenGL context:");
XELOGI(" GL_VENDOR: %s", glGetString(GL_VENDOR));
XELOGI(" GL_VERSION: %s", glGetString(GL_VERSION));
XELOGI(" GL_RENDERER: %s", glGetString(GL_RENDERER));
XELOGI(" GL_SHADING_LANGUAGE_VERSION: %s",
glGetString(GL_SHADING_LANGUAGE_VERSION));
while (glGetError()) {
// Clearing errors.
}
SetupDebugging();
if (!blitter_.Initialize()) {
FatalGLError("Unable to initialize blitter.");
ClearCurrent();
return false;
}
immediate_drawer_ = std::make_unique<GLImmediateDrawer>(this);
ClearCurrent();
return true;
}
std::unique_ptr<GLXContext> GLXContext::CreateOffscreen(
GraphicsProvider* provider, GLXContext* parent_context) {
assert_not_null(parent_context->glx_context_);
::GLXContext new_glrc;
{
GraphicsContextLock context_lock(parent_context);
int context_flags = 0;
if (FLAGS_gl_debug) {
context_flags |= GLX_CONTEXT_DEBUG_BIT_ARB;
}
bool robust_access_supported = parent_context->robust_access_supported_;
if (robust_access_supported) {
context_flags |= GLX_CONTEXT_ROBUST_ACCESS_BIT_ARB;
}
int attrib_list[] = {
GLX_CONTEXT_MAJOR_VERSION_ARB,
4,
GLX_CONTEXT_MINOR_VERSION_ARB,
5,
GLX_CONTEXT_FLAGS_ARB,
context_flags,
GLX_CONTEXT_PROFILE_MASK_ARB,
GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB,
GLX_CONTEXT_RESET_NOTIFICATION_STRATEGY_ARB,
robust_access_supported ? GLX_LOSE_CONTEXT_ON_RESET_ARB : 0,
0};
new_glrc = glXCreateContextAttribsARB(parent_context->disp_, nullptr,
parent_context->glx_context_, True,
attrib_list);
if (!new_glrc) {
FatalGLError("Could not create shared context.");
return nullptr;
}
}
auto new_context = std::unique_ptr<GLXContext>(
new GLXContext(provider, parent_context->target_window_));
new_context->glx_context_ = new_glrc;
new_context->window_ = parent_context->window_;
new_context->draw_area_ = parent_context->draw_area_;
new_context->disp_ = parent_context->disp_;
new_context->xid_ = parent_context->xid_;
new_context->robust_access_supported_ =
parent_context->robust_access_supported_;
if (!new_context->MakeCurrent()) {
FatalGLError("Could not make new GL context current.");
return nullptr;
}
if (!glGetString(GL_EXTENSIONS)) {
new_context->ClearCurrent();
FatalGLError("New GL context did not have extensions.");
return nullptr;
}
if (glewInit() != GLEW_OK) {
new_context->ClearCurrent();
FatalGLError("Unable to initialize GLEW on shared context.");
return nullptr;
}
if (glxewInit() != GLEW_OK) {
new_context->ClearCurrent();
FatalGLError("Unable to initialize GLXEW on shared context.");
return nullptr;
}
new_context->SetupDebugging();
if (!new_context->blitter_.Initialize()) {
FatalGLError("Unable to initialize blitter on shared context.");
return nullptr;
}
new_context->ClearCurrent();
return new_context;
}
bool GLXContext::is_current() {
return tls_glew_context_ == glew_context_.get();
}
bool GLXContext::MakeCurrent() {
SCOPE_profile_cpu_f("gpu");
if (FLAGS_thread_safe_gl) {
global_gl_mutex_.lock();
}
if (!glXMakeCurrent(disp_, xid_, glx_context_)) {
if (FLAGS_thread_safe_gl) {
global_gl_mutex_.unlock();
}
FatalGLError("Unable to make GL context current.");
return false;
}
tls_glew_context_ = glew_context_.get();
tls_glxew_context_ = glxew_context_.get();
return true;
}
void GLXContext::ClearCurrent() {
if (!FLAGS_disable_gl_context_reset) {
glXMakeCurrent(disp_, 0, nullptr);
}
tls_glew_context_ = nullptr;
tls_glxew_context_ = nullptr;
if (FLAGS_thread_safe_gl) {
global_gl_mutex_.unlock();
}
}
void GLXContext::BeginSwap() {
SCOPE_profile_cpu_i("gpu", "xe::ui::gl::GLXContext::BeginSwap");
float clear_color[] = {238 / 255.0f, 238 / 255.0f, 238 / 255.0f, 1.0f};
if (FLAGS_random_clear_color) {
clear_color[0] =
rand() / static_cast<float>(RAND_MAX); // NOLINT(runtime/threadsafe_fn)
clear_color[1] = 1.0f;
clear_color[2] = 0.0f;
clear_color[3] = 1.0f;
}
glClearNamedFramebufferfv(0, GL_COLOR, 0, clear_color);
}
void GLXContext::EndSwap() {
SCOPE_profile_cpu_i("gpu", "xe::ui::gl::GLXContext::EndSwap");
glXSwapBuffers(disp_, xid_);
}
} // namespace gl
} // namespace ui
} // namespace xe

View File

@ -1,69 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_GL_GLX_CONTEXT_H_
#define XENIA_UI_GL_GLX_CONTEXT_H_
#include <gflags/gflags.h>
#include <memory>
#include "third_party/GL/glxew.h"
#include "xenia/base/platform_linux.h"
#include "xenia/ui/gl/blitter.h"
#include "xenia/ui/gl/gl.h"
#include "xenia/ui/gl/gl_context.h"
#include "xenia/ui/graphics_context.h"
DECLARE_bool(thread_safe_gl);
namespace xe {
namespace ui {
namespace gl {
class GLImmediateDrawer;
class GLProvider;
class GLXContext : public GLContext {
public:
~GLXContext() override;
bool is_current() override;
bool MakeCurrent() override;
void ClearCurrent() override;
void BeginSwap() override;
void EndSwap() override;
protected:
static std::unique_ptr<GLXContext> CreateOffscreen(
GraphicsProvider* provider, GLXContext* parent_context);
bool Initialize(GLContext* share_context) override;
void* handle() override { return glx_context_; }
private:
friend class GLContext;
GLXContext(GraphicsProvider* provider, Window* target_window);
std::unique_ptr<GLEWContext> glew_context_;
std::unique_ptr<GLXEWContext> glxew_context_;
::GLXContext glx_context_;
GtkWidget* window_;
GtkWidget* draw_area_;
Colormap cmap_;
Display* disp_;
int xid_;
};
} // namespace gl
} // namespace ui
} // namespace xe
#endif // XENIA_UI_GL_GL_CONTEXT_H_

View File

@ -1,283 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/gl/gl_immediate_drawer.h"
#include <string>
#include "xenia/base/assert.h"
#include "xenia/ui/graphics_context.h"
namespace xe {
namespace ui {
namespace gl {
constexpr uint32_t kMaxDrawVertices = 64 * 1024;
constexpr uint32_t kMaxDrawIndices = 64 * 1024;
class GLImmediateTexture : public ImmediateTexture {
public:
GLImmediateTexture(uint32_t width, uint32_t height,
ImmediateTextureFilter filter, bool repeat)
: ImmediateTexture(width, height) {
GLuint gl_handle;
glCreateTextures(GL_TEXTURE_2D, 1, &gl_handle);
GLenum gl_filter = GL_NEAREST;
switch (filter) {
case ImmediateTextureFilter::kNearest:
gl_filter = GL_NEAREST;
break;
case ImmediateTextureFilter::kLinear:
gl_filter = GL_LINEAR;
break;
}
glTextureParameteri(gl_handle, GL_TEXTURE_MIN_FILTER, gl_filter);
glTextureParameteri(gl_handle, GL_TEXTURE_MAG_FILTER, gl_filter);
glTextureParameteri(gl_handle, GL_TEXTURE_WRAP_S,
repeat ? GL_REPEAT : GL_CLAMP_TO_EDGE);
glTextureParameteri(gl_handle, GL_TEXTURE_WRAP_T,
repeat ? GL_REPEAT : GL_CLAMP_TO_EDGE);
glTextureStorage2D(gl_handle, 1, GL_RGBA8, width, height);
handle = static_cast<uintptr_t>(gl_handle);
}
~GLImmediateTexture() override {
GLuint gl_handle = static_cast<GLuint>(handle);
glDeleteTextures(1, &gl_handle);
}
};
GLImmediateDrawer::GLImmediateDrawer(GraphicsContext* graphics_context)
: ImmediateDrawer(graphics_context) {
glCreateBuffers(1, &vertex_buffer_);
glNamedBufferStorage(vertex_buffer_,
kMaxDrawVertices * sizeof(ImmediateVertex), nullptr,
GL_DYNAMIC_STORAGE_BIT);
glCreateBuffers(1, &index_buffer_);
glNamedBufferStorage(index_buffer_, kMaxDrawIndices * sizeof(uint16_t),
nullptr, GL_DYNAMIC_STORAGE_BIT);
glCreateVertexArrays(1, &vao_);
glEnableVertexArrayAttrib(vao_, 0);
glVertexArrayAttribBinding(vao_, 0, 0);
glVertexArrayAttribFormat(vao_, 0, 2, GL_FLOAT, GL_FALSE,
offsetof(ImmediateVertex, x));
glEnableVertexArrayAttrib(vao_, 1);
glVertexArrayAttribBinding(vao_, 1, 0);
glVertexArrayAttribFormat(vao_, 1, 2, GL_FLOAT, GL_FALSE,
offsetof(ImmediateVertex, u));
glEnableVertexArrayAttrib(vao_, 2);
glVertexArrayAttribBinding(vao_, 2, 0);
glVertexArrayAttribFormat(vao_, 2, 4, GL_UNSIGNED_BYTE, GL_TRUE,
offsetof(ImmediateVertex, color));
glVertexArrayVertexBuffer(vao_, 0, vertex_buffer_, 0,
sizeof(ImmediateVertex));
InitializeShaders();
}
GLImmediateDrawer::~GLImmediateDrawer() {
GraphicsContextLock lock(graphics_context_);
glDeleteBuffers(1, &vertex_buffer_);
glDeleteBuffers(1, &index_buffer_);
glDeleteVertexArrays(1, &vao_);
glDeleteProgram(program_);
}
void GLImmediateDrawer::InitializeShaders() {
const std::string header =
R"(
#version 450
#extension GL_ARB_explicit_uniform_location : require
#extension GL_ARB_shading_language_420pack : require
precision highp float;
layout(std140, column_major) uniform;
layout(std430, column_major) buffer;
)";
const std::string vertex_shader_source = header +
R"(
layout(location = 0) uniform mat4 projection_matrix;
layout(location = 0) in vec2 in_pos;
layout(location = 1) in vec2 in_uv;
layout(location = 2) in vec4 in_color;
layout(location = 0) out vec2 vtx_uv;
layout(location = 1) out vec4 vtx_color;
void main() {
gl_Position = projection_matrix * vec4(in_pos.xy, 0.0, 1.0);
vtx_uv = in_uv;
vtx_color = in_color;
})";
const std::string fragment_shader_source = header +
R"(
layout(location = 1) uniform sampler2D texture_sampler;
layout(location = 2) uniform int restrict_texture_samples;
layout(location = 0) in vec2 vtx_uv;
layout(location = 1) in vec4 vtx_color;
layout(location = 0) out vec4 out_color;
void main() {
out_color = vtx_color;
if (restrict_texture_samples == 0 || vtx_uv.x <= 1.0) {
vec4 tex_color = texture(texture_sampler, vtx_uv);
out_color *= tex_color;
// TODO(benvanik): microprofiler shadows.
}
})";
GLuint vertex_shader = glCreateShader(GL_VERTEX_SHADER);
const char* vertex_shader_source_ptr = vertex_shader_source.c_str();
GLint vertex_shader_source_length = GLint(vertex_shader_source.size());
glShaderSource(vertex_shader, 1, &vertex_shader_source_ptr,
&vertex_shader_source_length);
glCompileShader(vertex_shader);
GLuint fragment_shader = glCreateShader(GL_FRAGMENT_SHADER);
const char* fragment_shader_source_ptr = fragment_shader_source.c_str();
GLint fragment_shader_source_length = GLint(fragment_shader_source.size());
glShaderSource(fragment_shader, 1, &fragment_shader_source_ptr,
&fragment_shader_source_length);
glCompileShader(fragment_shader);
program_ = glCreateProgram();
glAttachShader(program_, vertex_shader);
glAttachShader(program_, fragment_shader);
glLinkProgram(program_);
glDeleteShader(vertex_shader);
glDeleteShader(fragment_shader);
}
std::unique_ptr<ImmediateTexture> GLImmediateDrawer::CreateTexture(
uint32_t width, uint32_t height, ImmediateTextureFilter filter, bool repeat,
const uint8_t* data) {
GraphicsContextLock lock(graphics_context_);
auto texture =
std::make_unique<GLImmediateTexture>(width, height, filter, repeat);
if (data) {
UpdateTexture(texture.get(), data);
}
return std::unique_ptr<ImmediateTexture>(texture.release());
}
void GLImmediateDrawer::UpdateTexture(ImmediateTexture* texture,
const uint8_t* data) {
GraphicsContextLock lock(graphics_context_);
glTextureSubImage2D(static_cast<GLuint>(texture->handle), 0, 0, 0,
texture->width, texture->height, GL_RGBA,
GL_UNSIGNED_BYTE, data);
}
void GLImmediateDrawer::Begin(int render_target_width,
int render_target_height) {
was_current_ = graphics_context_->is_current();
if (!was_current_) {
graphics_context_->MakeCurrent();
}
// Setup render state.
glEnablei(GL_BLEND, 0);
glBlendEquationi(0, GL_FUNC_ADD);
glBlendFunci(0, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glDisable(GL_DEPTH_TEST);
glDisable(GL_SCISSOR_TEST);
// Prepare drawing resources.
glUseProgram(program_);
glBindVertexArray(vao_);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer_);
// Setup orthographic projection matrix and viewport.
const float ortho_projection[4][4] = {
{2.0f / render_target_width, 0.0f, 0.0f, 0.0f},
{0.0f, 2.0f / -render_target_height, 0.0f, 0.0f},
{0.0f, 0.0f, -1.0f, 0.0f},
{-1.0f, 1.0f, 0.0f, 1.0f},
};
glProgramUniformMatrix4fv(program_, 0, 1, GL_FALSE, &ortho_projection[0][0]);
glViewport(0, 0, render_target_width, render_target_height);
}
void GLImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) {
assert_true(batch.vertex_count <= kMaxDrawVertices);
glNamedBufferSubData(vertex_buffer_, 0,
batch.vertex_count * sizeof(ImmediateVertex),
batch.vertices);
if (batch.indices) {
assert_true(batch.index_count <= kMaxDrawIndices);
glNamedBufferSubData(index_buffer_, 0, batch.index_count * sizeof(uint16_t),
batch.indices);
}
batch_has_index_buffer_ = !!batch.indices;
}
void GLImmediateDrawer::Draw(const ImmediateDraw& draw) {
if (draw.scissor) {
glEnable(GL_SCISSOR_TEST);
glScissorIndexed(0, draw.scissor_rect[0], draw.scissor_rect[1],
draw.scissor_rect[2], draw.scissor_rect[3]);
} else {
glDisable(GL_SCISSOR_TEST);
}
if (draw.alpha_blend) {
glEnablei(GL_BLEND, 0);
glBlendEquationi(0, GL_FUNC_ADD);
glBlendFunci(0, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
} else {
glDisablei(GL_BLEND, 0);
}
if (draw.texture_handle) {
glBindTextureUnit(0, static_cast<GLuint>(draw.texture_handle));
} else {
glBindTextureUnit(0, 0);
}
glProgramUniform1i(program_, 2, draw.restrict_texture_samples ? 1 : 0);
GLenum mode = GL_TRIANGLES;
switch (draw.primitive_type) {
case ImmediatePrimitiveType::kLines:
mode = GL_LINES;
break;
case ImmediatePrimitiveType::kTriangles:
mode = GL_TRIANGLES;
break;
}
if (batch_has_index_buffer_) {
glDrawElementsBaseVertex(
mode, draw.count, GL_UNSIGNED_SHORT,
reinterpret_cast<void*>(draw.index_offset * sizeof(uint16_t)),
draw.base_vertex);
} else {
glDrawArrays(mode, draw.base_vertex, draw.count);
}
}
void GLImmediateDrawer::EndDrawBatch() { glFlush(); }
void GLImmediateDrawer::End() {
// Restore modified state.
glDisable(GL_SCISSOR_TEST);
glBindTextureUnit(0, 0);
glUseProgram(0);
glBindVertexArray(0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
if (!was_current_) {
graphics_context_->ClearCurrent();
}
}
} // namespace gl
} // namespace ui
} // namespace xe

View File

@ -1,56 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_GL_GL_IMMEDIATE_DRAWER_H_
#define XENIA_UI_GL_GL_IMMEDIATE_DRAWER_H_
#include <memory>
#include "xenia/ui/gl/gl.h"
#include "xenia/ui/immediate_drawer.h"
namespace xe {
namespace ui {
namespace gl {
class GLImmediateDrawer : public ImmediateDrawer {
public:
GLImmediateDrawer(GraphicsContext* graphics_context);
~GLImmediateDrawer() override;
std::unique_ptr<ImmediateTexture> CreateTexture(uint32_t width,
uint32_t height,
ImmediateTextureFilter filter,
bool repeat,
const uint8_t* data) override;
void UpdateTexture(ImmediateTexture* texture, const uint8_t* data) override;
void Begin(int render_target_width, int render_target_height) override;
void BeginDrawBatch(const ImmediateDrawBatch& batch) override;
void Draw(const ImmediateDraw& draw) override;
void EndDrawBatch() override;
void End() override;
private:
void InitializeShaders();
GLuint program_ = 0;
GLuint vao_ = 0;
GLuint vertex_buffer_ = 0;
GLuint index_buffer_ = 0;
bool was_current_ = false;
bool batch_has_index_buffer_ = false;
};
} // namespace gl
} // namespace ui
} // namespace xe
#endif // XENIA_UI_GL_GL_IMMEDIATE_DRAWER_H_

View File

@ -1,49 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/gl/gl_provider.h"
#include "xenia/ui/gl/gl_context.h"
#include "xenia/ui/window.h"
namespace xe {
namespace ui {
namespace gl {
std::unique_ptr<GraphicsProvider> GLProvider::Create(Window* main_window) {
std::unique_ptr<GLProvider> provider(new GLProvider(main_window));
//
return std::unique_ptr<GraphicsProvider>(provider.release());
}
GLProvider::GLProvider(Window* main_window) : GraphicsProvider(main_window) {}
GLProvider::~GLProvider() = default;
std::unique_ptr<GraphicsContext> GLProvider::CreateContext(
Window* target_window) {
auto share_context = main_window_->context();
return std::unique_ptr<GraphicsContext>(
GLContext::Create(this, target_window,
static_cast<GLContext*>(share_context))
.release());
}
std::unique_ptr<GraphicsContext> GLProvider::CreateOffscreenContext() {
auto share_context = main_window_->context();
return std::unique_ptr<GraphicsContext>(
GLContext::CreateOffscreen(this, static_cast<GLContext*>(share_context))
.release());
}
} // namespace gl
} // namespace ui
} // namespace xe

View File

@ -1,40 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_GL_GL_PROVIDER_H_
#define XENIA_UI_GL_GL_PROVIDER_H_
#include <memory>
#include "xenia/ui/graphics_provider.h"
namespace xe {
namespace ui {
namespace gl {
class GLProvider : public GraphicsProvider {
public:
~GLProvider() override;
static std::unique_ptr<GraphicsProvider> Create(Window* main_window);
std::unique_ptr<GraphicsContext> CreateContext(
Window* target_window) override;
std::unique_ptr<GraphicsContext> CreateOffscreenContext() override;
protected:
explicit GLProvider(Window* main_window);
};
} // namespace gl
} // namespace ui
} // namespace xe
#endif // XENIA_UI_GL_GL_PROVIDER_H_

View File

@ -1,30 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <string>
#include <vector>
#include "xenia/base/main.h"
#include "xenia/ui/gl/gl_provider.h"
#include "xenia/ui/window.h"
namespace xe {
namespace ui {
int window_demo_main(const std::vector<std::wstring>& args);
std::unique_ptr<GraphicsProvider> CreateDemoGraphicsProvider(Window* window) {
return xe::ui::gl::GLProvider::Create(window);
}
} // namespace ui
} // namespace xe
DEFINE_ENTRY_POINT(L"xenia-ui-window-gl-demo", L"xenia-ui-window-gl-demo",
xe::ui::window_demo_main);

View File

@ -1,56 +0,0 @@
project_root = "../../../.."
include(project_root.."/tools/build")
group("src")
project("xenia-ui-gl")
uuid("623300e3-0085-4ccc-af46-d60e88cb43aa")
kind("StaticLib")
language("C++")
links({
"glew",
"xenia-base",
"xenia-ui",
})
defines({
"GLEW_STATIC=1",
"GLEW_MX=1",
})
includedirs({
project_root.."/third_party/gflags/src",
})
local_platform_files()
removefiles({"*_demo.cc"})
group("demos")
project("xenia-ui-window-gl-demo")
uuid("e0a687e5-d1f4-4c18-b2f7-012c53ec1ee4")
kind("WindowedApp")
language("C++")
links({
"gflags",
"glew",
"imgui",
"xenia-base",
"xenia-ui",
"xenia-ui-gl",
})
flags({
"WinMain", -- Use WinMain instead of main.
})
defines({
"GLEW_STATIC=1",
"GLEW_MX=1",
})
includedirs({
project_root.."/third_party/gflags/src",
})
files({
"../window_demo.cc",
"gl_window_demo.cc",
project_root.."/src/xenia/base/main_"..platform_suffix..".cc",
})
files({
})
resincludedirs({
project_root,
})

View File

@ -31,12 +31,22 @@ project("xenia-ui-window-vulkan-demo")
links({
"gflags",
"imgui",
"vulkan-loader",
"volk",
"xenia-base",
"xenia-ui",
"xenia-ui-spirv",
"xenia-ui-vulkan",
})
filter("platforms:Linux")
links({
"X11",
"xcb",
"X11-xcb",
"GL",
"vulkan",
})
filter()
flags({
"WinMain", -- Use WinMain instead of main.
})

View File

@ -22,13 +22,10 @@
#error Platform not yet supported.
#endif // XE_PLATFORM_WIN32
// We are statically linked with the loader, so use function prototypes.
#define VK_PROTOTYPES
// We use a loader with its own function prototypes.
#include "third_party/volk/volk.h"
#include "third_party/vulkan/vulkan.h"
// NOTE: header order matters here, unfortunately:
#include "third_party/vulkan/vk_lunarg_debug_marker.h"
#define XELOGVK XELOGI
DECLARE_bool(vulkan_validation);

View File

@ -169,7 +169,7 @@ bool VulkanDevice::Initialize(DeviceInfo device_info) {
queue_info.queueFamilyIndex = i;
queue_info.queueCount = family_props.queueCount;
queue_priorities[i].resize(queue_count, 0.f);
queue_priorities[i].resize(family_props.queueCount, 0.f);
if (i == ideal_queue_family_index) {
// Prioritize the first queue on the primary queue family.
queue_priorities[i][0] = 1.0f;

Some files were not shown because too many files have changed in this diff Show More