Textures and such.
This commit is contained in:
parent
5b2672a1b8
commit
dfc260b86e
|
@ -36,7 +36,7 @@ T align(T value, T alignment) {
|
|||
// Rounds the given number up to the next highest multiple.
|
||||
template <typename T, typename V>
|
||||
T round_up(T value, V multiple) {
|
||||
return value ? (value + multiple - 1 - (value - 1) % multiple) : multiple;
|
||||
return value ? (((value + multiple - 1) / multiple) * multiple) : multiple;
|
||||
}
|
||||
|
||||
inline float saturate(float value) {
|
||||
|
|
|
@ -27,29 +27,41 @@ CircularBuffer::CircularBuffer(size_t capacity)
|
|||
gpu_base_(0),
|
||||
host_base_(nullptr) {}
|
||||
|
||||
CircularBuffer::~CircularBuffer() {
|
||||
glUnmapNamedBuffer(buffer_);
|
||||
glDeleteBuffers(1, &buffer_);
|
||||
}
|
||||
CircularBuffer::~CircularBuffer() { Shutdown(); }
|
||||
|
||||
bool CircularBuffer::Initialize() {
|
||||
glCreateBuffers(1, &buffer_);
|
||||
glNamedBufferStorage(buffer_, capacity_, nullptr,
|
||||
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
|
||||
host_base_ = reinterpret_cast<uint8_t*>(glMapNamedBufferRange(
|
||||
buffer_, 0, capacity_, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT |
|
||||
GL_MAP_UNSYNCHRONIZED_BIT |
|
||||
GL_MAP_PERSISTENT_BIT));
|
||||
buffer_, 0, capacity_,
|
||||
GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_PERSISTENT_BIT));
|
||||
assert_not_null(host_base_);
|
||||
if (!host_base_) {
|
||||
return false;
|
||||
}
|
||||
glMakeNamedBufferResidentNV(buffer_, GL_WRITE_ONLY);
|
||||
glGetNamedBufferParameterui64vNV(buffer_, GL_BUFFER_GPU_ADDRESS_NV,
|
||||
&gpu_base_);
|
||||
|
||||
if (GLEW_NV_shader_buffer_load) {
|
||||
// To use this bindlessly we must make it resident.
|
||||
glMakeNamedBufferResidentNV(buffer_, GL_WRITE_ONLY);
|
||||
glGetNamedBufferParameterui64vNV(buffer_, GL_BUFFER_GPU_ADDRESS_NV,
|
||||
&gpu_base_);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void CircularBuffer::Shutdown() {
|
||||
if (!buffer_) {
|
||||
return;
|
||||
}
|
||||
glUnmapNamedBuffer(buffer_);
|
||||
if (GLEW_NV_shader_buffer_load) {
|
||||
glMakeNamedBufferNonResidentNV(buffer_);
|
||||
}
|
||||
glDeleteBuffers(1, &buffer_);
|
||||
buffer_ = 0;
|
||||
}
|
||||
|
||||
CircularBuffer::Allocation CircularBuffer::Acquire(size_t length) {
|
||||
// Addresses must always be % 256.
|
||||
length = poly::round_up(length, 256);
|
||||
|
@ -64,6 +76,7 @@ CircularBuffer::Allocation CircularBuffer::Acquire(size_t length) {
|
|||
Allocation allocation;
|
||||
allocation.host_ptr = host_base_ + write_head_;
|
||||
allocation.gpu_ptr = gpu_base_ + write_head_;
|
||||
allocation.offset = write_head_;
|
||||
allocation.length = length;
|
||||
write_head_ += length;
|
||||
return allocation;
|
||||
|
|
|
@ -26,10 +26,12 @@ class CircularBuffer {
|
|||
struct Allocation {
|
||||
void* host_ptr;
|
||||
GLuint64 gpu_ptr;
|
||||
size_t offset;
|
||||
size_t length;
|
||||
};
|
||||
|
||||
bool Initialize();
|
||||
void Shutdown();
|
||||
|
||||
GLuint handle() const { return buffer_; }
|
||||
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
#include <xenia/gpu/gl4/gl4_gpu-private.h>
|
||||
#include <xenia/gpu/gl4/gl4_graphics_system.h>
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
#include <xenia/gpu/sampler_info.h>
|
||||
#include <xenia/gpu/texture_info.h>
|
||||
#include <xenia/gpu/xenos.h>
|
||||
|
||||
#include <third_party/xxhash/xxhash.h>
|
||||
|
@ -36,7 +38,7 @@ const GLuint kAnyTarget = UINT_MAX;
|
|||
// All uncached vertex/index data goes here. If it fills up we need to sync
|
||||
// with the GPU, so this should be large enough to prevent that in a normal
|
||||
// frame.
|
||||
const size_t kScratchBufferCapacity = 64 * 1024 * 1024;
|
||||
const size_t kScratchBufferCapacity = 256 * 1024 * 1024;
|
||||
|
||||
CommandProcessor::CachedPipeline::CachedPipeline() = default;
|
||||
|
||||
|
@ -61,6 +63,7 @@ CommandProcessor::CommandProcessor(GL4GraphicsSystem* graphics_system)
|
|||
write_ptr_index_(0),
|
||||
bin_select_(0xFFFFFFFFull),
|
||||
bin_mask_(0xFFFFFFFFull),
|
||||
has_bindless_vbos_(false),
|
||||
active_vertex_shader_(nullptr),
|
||||
active_pixel_shader_(nullptr),
|
||||
active_framebuffer_(nullptr),
|
||||
|
@ -152,29 +155,34 @@ void CommandProcessor::WorkerMain() {
|
|||
}
|
||||
|
||||
bool CommandProcessor::SetupGL() {
|
||||
// Uniform buffer that stores the per-draw state (constants, etc).
|
||||
glCreateBuffers(1, &uniform_data_buffer_);
|
||||
glBindBuffer(GL_UNIFORM_BUFFER, uniform_data_buffer_);
|
||||
glNamedBufferStorage(uniform_data_buffer_, 16 * 1024, nullptr,
|
||||
GL_MAP_WRITE_BIT | GL_DYNAMIC_STORAGE_BIT);
|
||||
|
||||
// Circular buffer holding scratch vertex/index data.
|
||||
if (!scratch_buffer_.Initialize()) {
|
||||
PLOGE("Unable to initialize scratch buffer");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Texture cache that keeps track of any textures/samplers used.
|
||||
if (!texture_cache_.Initialize(&scratch_buffer_)) {
|
||||
PLOGE("Unable to initialize texture cache");
|
||||
return false;
|
||||
}
|
||||
|
||||
GLuint vertex_array;
|
||||
glGenVertexArrays(1, &vertex_array);
|
||||
glBindVertexArray(vertex_array);
|
||||
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
|
||||
|
||||
if (GLEW_NV_vertex_buffer_unified_memory) {
|
||||
has_bindless_vbos_ = true;
|
||||
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void CommandProcessor::ShutdownGL() {
|
||||
glDeleteBuffers(1, &uniform_data_buffer_);
|
||||
texture_cache_.Shutdown();
|
||||
scratch_buffer_.Shutdown();
|
||||
}
|
||||
|
||||
void CommandProcessor::InitializeRingBuffer(uint32_t ptr, uint32_t page_count) {
|
||||
|
@ -264,6 +272,7 @@ void CommandProcessor::PrepareForWait() {
|
|||
// make interrupt callbacks from the GPU so that we don't have to do a full
|
||||
// synchronize here.
|
||||
glFlush();
|
||||
glFinish();
|
||||
|
||||
if (FLAGS_thread_safe_gl) {
|
||||
context_->ClearCurrent();
|
||||
|
@ -1142,6 +1151,8 @@ void CommandProcessor::PrepareDraw(DrawCommand* draw_command) {
|
|||
// Generic stuff.
|
||||
cmd.start_index = regs[XE_GPU_REG_VGT_INDX_OFFSET].u32;
|
||||
cmd.base_vertex = 0;
|
||||
|
||||
cmd.state_data = nullptr;
|
||||
}
|
||||
|
||||
bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
|
||||
|
@ -1158,6 +1169,18 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
|
|||
return IssueCopy(draw_command);
|
||||
}
|
||||
|
||||
// TODO(benvanik): actually cache things >_>
|
||||
texture_cache_.Clear();
|
||||
|
||||
// Allocate a state data block.
|
||||
// Everything the shaders access lives here.
|
||||
auto allocation = scratch_buffer_.Acquire(sizeof(UniformDataBlock));
|
||||
cmd.state_data = reinterpret_cast<UniformDataBlock*>(allocation.host_ptr);
|
||||
if (!cmd.state_data) {
|
||||
PLOGE("Unable to allocate uniform data buffer");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!UpdateRenderTargets(draw_command)) {
|
||||
PLOGE("Unable to setup render targets");
|
||||
return false;
|
||||
|
@ -1172,17 +1195,15 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
|
|||
PLOGE("Unable to setup render state");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!UpdateConstants(draw_command)) {
|
||||
PLOGE("Unable to update shader constants");
|
||||
return false;
|
||||
}
|
||||
if (!UpdateShaders(draw_command)) {
|
||||
PLOGE("Unable to prepare draw shaders");
|
||||
return false;
|
||||
}
|
||||
|
||||
// if (!PopulateSamplers(draw_command)) {
|
||||
// XELOGE("Unable to prepare draw samplers");
|
||||
// return false;
|
||||
//}
|
||||
|
||||
if (!PopulateIndexBuffer(draw_command)) {
|
||||
PLOGE("Unable to setup index buffer");
|
||||
return false;
|
||||
|
@ -1191,6 +1212,10 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
|
|||
PLOGE("Unable to setup vertex buffers");
|
||||
return false;
|
||||
}
|
||||
if (!PopulateSamplers(draw_command)) {
|
||||
PLOGE("Unable to prepare draw samplers");
|
||||
return false;
|
||||
}
|
||||
|
||||
GLenum prim_type = 0;
|
||||
switch (cmd.prim_type) {
|
||||
|
@ -1228,6 +1253,7 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
|
|||
break;
|
||||
case PrimitiveType::kQuadList:
|
||||
prim_type = GL_LINES_ADJACENCY;
|
||||
return false;
|
||||
/*if
|
||||
(vs->DemandGeometryShader(D3D11VertexShaderResource::QUAD_LIST_SHADER,
|
||||
&geometry_shader)) {
|
||||
|
@ -1237,10 +1263,15 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
|
|||
default:
|
||||
case PrimitiveType::kUnknown0x07:
|
||||
prim_type = GL_POINTS;
|
||||
XELOGE("D3D11: unsupported primitive type %d", cmd.prim_type);
|
||||
XELOGE("unsupported primitive type %d", cmd.prim_type);
|
||||
break;
|
||||
}
|
||||
|
||||
// Commit the state buffer - nothing can change after this.
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, scratch_buffer_.handle(),
|
||||
allocation.offset, allocation.length);
|
||||
scratch_buffer_.Commit(std::move(allocation));
|
||||
|
||||
// HACK HACK HACK
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
|
||||
|
@ -1254,13 +1285,108 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
|
|||
prim_type, cmd.index_count,
|
||||
cmd.index_buffer.format == IndexFormat::kInt32 ? GL_UNSIGNED_INT
|
||||
: GL_UNSIGNED_SHORT,
|
||||
reinterpret_cast<void*>(cmd.start_index * element_size),
|
||||
reinterpret_cast<void*>(cmd.index_buffer.buffer_offset +
|
||||
cmd.start_index * element_size),
|
||||
cmd.base_vertex);
|
||||
} else {
|
||||
// Auto draw.
|
||||
glDrawArrays(prim_type, cmd.start_index, cmd.index_count);
|
||||
}
|
||||
|
||||
// Hacky draw counter.
|
||||
if (false) {
|
||||
static int draw_count = 0;
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
glScissor(20, 0, 20, 20);
|
||||
float red[] = {0, draw_count / 100.0f, 0, 1.0f};
|
||||
draw_count = (draw_count + 1) % 100;
|
||||
glClearNamedFramebufferfv(active_framebuffer_->framebuffer, GL_COLOR, 0,
|
||||
red);
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
|
||||
auto& regs = *register_file_;
|
||||
|
||||
auto enable_mode =
|
||||
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
|
||||
|
||||
// RB_SURFACE_INFO
|
||||
// http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
|
||||
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
||||
uint32_t surface_pitch = surface_info & 0x3FFF;
|
||||
auto surface_msaa = static_cast<MsaaSamples>((surface_info >> 16) & 0x3);
|
||||
|
||||
// Get/create all color render targets, if we are using them.
|
||||
// In depth-only mode we don't need them.
|
||||
GLenum draw_buffers[4] = {GL_NONE, GL_NONE, GL_NONE, GL_NONE};
|
||||
GLuint color_targets[4] = {kAnyTarget, kAnyTarget, kAnyTarget, kAnyTarget};
|
||||
if (enable_mode == ModeControl::kColorDepth) {
|
||||
uint32_t color_info[4] = {
|
||||
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
|
||||
regs[XE_GPU_REG_RB_COLOR2_INFO].u32,
|
||||
regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
|
||||
};
|
||||
// A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE
|
||||
uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
|
||||
for (int n = 0; n < poly::countof(color_info); n++) {
|
||||
uint32_t write_mask = (color_mask >> (n * 4)) & 0xF;
|
||||
if (!write_mask) {
|
||||
// Unused, so keep disabled and set to wildcard so we'll take any
|
||||
// framebuffer that has it.
|
||||
continue;
|
||||
}
|
||||
uint32_t color_base = color_info[n] & 0xFFF;
|
||||
auto color_format =
|
||||
static_cast<ColorRenderTargetFormat>((color_info[n] >> 16) & 0xF);
|
||||
color_targets[n] = GetColorRenderTarget(surface_pitch, surface_msaa,
|
||||
color_base, color_format);
|
||||
draw_buffers[n] = GL_COLOR_ATTACHMENT0 + n;
|
||||
glColorMaski(n, !!(write_mask & 0x1), !!(write_mask & 0x2),
|
||||
!!(write_mask & 0x4), !!(write_mask & 0x8));
|
||||
}
|
||||
}
|
||||
|
||||
// Get/create depth buffer, but only if we are going to use it.
|
||||
uint32_t depth_control = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
|
||||
uint32_t stencil_ref_mask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
|
||||
bool uses_depth =
|
||||
(depth_control & 0x00000002) || (depth_control & 0x00000004);
|
||||
uint32_t stencil_write_mask = (stencil_ref_mask & 0x00FF0000) >> 16;
|
||||
bool uses_stencil = (depth_control & 0x00000001) || (stencil_write_mask != 0);
|
||||
GLuint depth_target = kAnyTarget;
|
||||
if (uses_depth && uses_stencil) {
|
||||
uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
|
||||
uint32_t depth_base = depth_info & 0xFFF;
|
||||
auto depth_format =
|
||||
static_cast<DepthRenderTargetFormat>((depth_info >> 16) & 0x1);
|
||||
depth_target = GetDepthRenderTarget(surface_pitch, surface_msaa, depth_base,
|
||||
depth_format);
|
||||
// TODO(benvanik): when a game switches does it expect to keep the same
|
||||
// depth buffer contents?
|
||||
}
|
||||
|
||||
// Get/create a framebuffer with the required targets.
|
||||
// Note that none may be returned if we really don't need one.
|
||||
auto cached_framebuffer = GetFramebuffer(color_targets, depth_target);
|
||||
active_framebuffer_ = cached_framebuffer;
|
||||
if (!active_framebuffer_) {
|
||||
// Nothing to do.
|
||||
return true;
|
||||
}
|
||||
|
||||
// Setup just the targets we want.
|
||||
glNamedFramebufferDrawBuffers(cached_framebuffer->framebuffer, 4,
|
||||
draw_buffers);
|
||||
|
||||
// Make active.
|
||||
// TODO(benvanik): can we do this all named?
|
||||
// TODO(benvanik): do we want this on READ too?
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1272,57 +1398,24 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
|
|||
|
||||
auto& regs = *register_file_;
|
||||
|
||||
union float4 {
|
||||
float v[4];
|
||||
struct {
|
||||
float x, y, z, w;
|
||||
};
|
||||
};
|
||||
struct UniformDataBlock {
|
||||
float4 window_offset; // tx,ty,rt_w,rt_h
|
||||
float4 window_scissor; // x0,y0,x1,y1
|
||||
float4 viewport_offset; // tx,ty,tz,?
|
||||
float4 viewport_scale; // sx,sy,sz,?
|
||||
// TODO(benvanik): vertex format xyzw?
|
||||
|
||||
float4 alpha_test; // alpha test enable, func, ref, ?
|
||||
|
||||
// Register data from 0x4000 to 0x4927.
|
||||
// SHADER_CONSTANT_000_X...
|
||||
float4 float_consts[512];
|
||||
// SHADER_CONSTANT_FETCH_00_0...
|
||||
uint32_t fetch_consts[32 * 6];
|
||||
// SHADER_CONSTANT_BOOL_000_031...
|
||||
int32_t bool_consts[8];
|
||||
// SHADER_CONSTANT_LOOP_00...
|
||||
int32_t loop_consts[32];
|
||||
};
|
||||
static_assert(sizeof(UniformDataBlock) <= 16 * 1024,
|
||||
"Need <=16k uniform data");
|
||||
|
||||
auto allocation = scratch_buffer_.Acquire(16 * 1024);
|
||||
auto buffer_ptr = reinterpret_cast<UniformDataBlock*>(allocation.host_ptr);
|
||||
if (!buffer_ptr) {
|
||||
PLOGE("Unable to allocate uniform data buffer");
|
||||
return false;
|
||||
}
|
||||
auto state_data = draw_command->state_data;
|
||||
|
||||
// Window parameters.
|
||||
// See r200UpdateWindow:
|
||||
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
|
||||
uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32;
|
||||
buffer_ptr->window_offset.x = float(window_offset & 0x7FFF);
|
||||
buffer_ptr->window_offset.y = float((window_offset >> 16) & 0x7FFF);
|
||||
state_data->window_offset.x = float(window_offset & 0x7FFF);
|
||||
state_data->window_offset.y = float((window_offset >> 16) & 0x7FFF);
|
||||
uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
|
||||
uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
|
||||
buffer_ptr->window_scissor.x = float(window_scissor_tl & 0x7FFF);
|
||||
buffer_ptr->window_scissor.y = float((window_scissor_tl >> 16) & 0x7FFF);
|
||||
buffer_ptr->window_scissor.z = float(window_scissor_br & 0x7FFF);
|
||||
buffer_ptr->window_scissor.w = float((window_scissor_br >> 16) & 0x7FFF);
|
||||
state_data->window_scissor.x = float(window_scissor_tl & 0x7FFF);
|
||||
state_data->window_scissor.y = float((window_scissor_tl >> 16) & 0x7FFF);
|
||||
state_data->window_scissor.z = float(window_scissor_br & 0x7FFF);
|
||||
state_data->window_scissor.w = float((window_scissor_br >> 16) & 0x7FFF);
|
||||
|
||||
// HACK: no clue where to get these values.
|
||||
buffer_ptr->window_offset.z = 1280;
|
||||
buffer_ptr->window_offset.w = 720;
|
||||
state_data->window_offset.z = 1280;
|
||||
state_data->window_offset.w = 720;
|
||||
|
||||
// Whether each of the viewport settings is enabled.
|
||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||
|
@ -1338,20 +1431,20 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
|
|||
vport_yoffset_enable == vport_zoffset_enable);
|
||||
|
||||
// Viewport scaling. Only enabled if the flags are all set.
|
||||
buffer_ptr->viewport_scale.x =
|
||||
state_data->viewport_scale.x =
|
||||
vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1; // 640
|
||||
buffer_ptr->viewport_offset.x = vport_xoffset_enable
|
||||
state_data->viewport_offset.x = vport_xoffset_enable
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
|
||||
: 0; // 640
|
||||
buffer_ptr->viewport_scale.y = vport_yscale_enable
|
||||
state_data->viewport_scale.y = vport_yscale_enable
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
|
||||
: 1; // -360
|
||||
buffer_ptr->viewport_offset.y = vport_yoffset_enable
|
||||
state_data->viewport_offset.y = vport_yoffset_enable
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
|
||||
: 0; // 360
|
||||
buffer_ptr->viewport_scale.z =
|
||||
state_data->viewport_scale.z =
|
||||
vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1; // 1
|
||||
buffer_ptr->viewport_offset.z =
|
||||
state_data->viewport_offset.z =
|
||||
vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0; // 0
|
||||
// VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0.
|
||||
// = false: multiply the X, Y coordinates by 1/W0.
|
||||
|
@ -1365,15 +1458,6 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
|
|||
// TODO(benvanik): pass to shaders? disable transform? etc?
|
||||
glViewport(0, 0, 1280, 720);
|
||||
|
||||
// Copy over all constants.
|
||||
// TODO(benvanik): partial updates, etc. We could use shader constant access
|
||||
// knowledge that we get at compile time to only upload those constants
|
||||
// required.
|
||||
std::memcpy(
|
||||
&buffer_ptr->float_consts, ®s[XE_GPU_REG_SHADER_CONSTANT_000_X].f32,
|
||||
sizeof(buffer_ptr->float_consts) + sizeof(buffer_ptr->fetch_consts) +
|
||||
sizeof(buffer_ptr->loop_consts) + sizeof(buffer_ptr->bool_consts));
|
||||
|
||||
// Scissoring.
|
||||
int32_t screen_scissor_tl = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32;
|
||||
int32_t screen_scissor_br = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32;
|
||||
|
@ -1424,10 +1508,10 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
|
|||
// Deprecated in GL, implemented in shader.
|
||||
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
|
||||
uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL].u32;
|
||||
buffer_ptr->alpha_test.x =
|
||||
state_data->alpha_test.x =
|
||||
(color_control & 0x4) ? 1.0f : 0.0f; // ALPAHTESTENABLE
|
||||
buffer_ptr->alpha_test.y = float(color_control & 0x3); // ALPHAFUNC
|
||||
buffer_ptr->alpha_test.z = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
|
||||
state_data->alpha_test.y = float(color_control & 0x3); // ALPHAFUNC
|
||||
state_data->alpha_test.z = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
|
||||
|
||||
static const GLenum blend_map[] = {
|
||||
/* 0 */ GL_ZERO,
|
||||
|
@ -1575,91 +1659,23 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
|
|||
stencil_op_map[(depth_control & 0x0001C000) >> 14]);
|
||||
}
|
||||
|
||||
// Stash - program setup will bind this to uniforms.
|
||||
draw_command->state_data_gpu_ptr = allocation.gpu_ptr;
|
||||
scratch_buffer_.Commit(std::move(allocation));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
|
||||
bool CommandProcessor::UpdateConstants(DrawCommand* draw_command) {
|
||||
auto& regs = *register_file_;
|
||||
auto state_data = draw_command->state_data;
|
||||
|
||||
auto enable_mode =
|
||||
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
|
||||
// TODO(benvanik): partial updates, etc. We could use shader constant access
|
||||
// knowledge that we get at compile time to only upload those constants
|
||||
// required. If we did this as a variable length then we could really cut
|
||||
// down on state block sizes.
|
||||
|
||||
// RB_SURFACE_INFO
|
||||
// http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
|
||||
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
||||
uint32_t surface_pitch = surface_info & 0x3FFF;
|
||||
auto surface_msaa = static_cast<MsaaSamples>((surface_info >> 16) & 0x3);
|
||||
|
||||
// Get/create all color render targets, if we are using them.
|
||||
// In depth-only mode we don't need them.
|
||||
GLenum draw_buffers[4] = {GL_NONE, GL_NONE, GL_NONE, GL_NONE};
|
||||
GLuint color_targets[4] = {kAnyTarget, kAnyTarget, kAnyTarget, kAnyTarget};
|
||||
if (enable_mode == ModeControl::kColorDepth) {
|
||||
uint32_t color_info[4] = {
|
||||
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
|
||||
regs[XE_GPU_REG_RB_COLOR2_INFO].u32,
|
||||
regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
|
||||
};
|
||||
// A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE
|
||||
uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
|
||||
for (int n = 0; n < poly::countof(color_info); n++) {
|
||||
uint32_t write_mask = (color_mask >> (n * 4)) & 0xF;
|
||||
if (!write_mask) {
|
||||
// Unused, so keep disabled and set to wildcard so we'll take any
|
||||
// framebuffer that has it.
|
||||
continue;
|
||||
}
|
||||
uint32_t color_base = color_info[n] & 0xFFF;
|
||||
auto color_format =
|
||||
static_cast<ColorRenderTargetFormat>((color_info[n] >> 16) & 0xF);
|
||||
color_targets[n] = GetColorRenderTarget(surface_pitch, surface_msaa,
|
||||
color_base, color_format);
|
||||
draw_buffers[n] = GL_COLOR_ATTACHMENT0 + n;
|
||||
glColorMaski(n, !!(write_mask & 0x1), !!(write_mask & 0x2),
|
||||
!!(write_mask & 0x4), !!(write_mask & 0x8));
|
||||
}
|
||||
}
|
||||
|
||||
// Get/create depth buffer, but only if we are going to use it.
|
||||
uint32_t depth_control = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
|
||||
uint32_t stencil_ref_mask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
|
||||
bool uses_depth =
|
||||
(depth_control & 0x00000002) || (depth_control & 0x00000004);
|
||||
uint32_t stencil_write_mask = (stencil_ref_mask & 0x00FF0000) >> 16;
|
||||
bool uses_stencil = (depth_control & 0x00000001) || (stencil_write_mask != 0);
|
||||
GLuint depth_target = kAnyTarget;
|
||||
if (uses_depth && uses_stencil) {
|
||||
uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
|
||||
uint32_t depth_base = depth_info & 0xFFF;
|
||||
auto depth_format =
|
||||
static_cast<DepthRenderTargetFormat>((depth_info >> 16) & 0x1);
|
||||
depth_target = GetDepthRenderTarget(surface_pitch, surface_msaa, depth_base,
|
||||
depth_format);
|
||||
// TODO(benvanik): when a game switches does it expect to keep the same
|
||||
// depth buffer contents?
|
||||
}
|
||||
|
||||
// Get/create a framebuffer with the required targets.
|
||||
// Note that none may be returned if we really don't need one.
|
||||
auto cached_framebuffer = GetFramebuffer(color_targets, depth_target);
|
||||
active_framebuffer_ = cached_framebuffer;
|
||||
if (!active_framebuffer_) {
|
||||
// Nothing to do.
|
||||
return true;
|
||||
}
|
||||
|
||||
// Setup just the targets we want.
|
||||
glNamedFramebufferDrawBuffers(cached_framebuffer->framebuffer, 4,
|
||||
draw_buffers);
|
||||
|
||||
// Make active.
|
||||
// TODO(benvanik): can we do this all named?
|
||||
// TODO(benvanik): do we want this on READ too?
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer);
|
||||
// Copy over all constants.
|
||||
std::memcpy(
|
||||
&state_data->float_consts, ®s[XE_GPU_REG_SHADER_CONSTANT_000_X].f32,
|
||||
sizeof(state_data->float_consts) + sizeof(state_data->fetch_consts) +
|
||||
sizeof(state_data->loop_consts) + sizeof(state_data->bool_consts));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -1718,28 +1734,10 @@ bool CommandProcessor::UpdateShaders(DrawCommand* draw_command) {
|
|||
glUseProgramStages(pipeline, GL_GEOMETRY_SHADER_BIT, geometry_program);
|
||||
glUseProgramStages(pipeline, GL_FRAGMENT_SHADER_BIT, fragment_program);
|
||||
|
||||
// HACK: layout(location=0) on a bindless uniform crashes nvidia driver.
|
||||
GLint vertex_state_loc = glGetUniformLocation(vertex_program, "state");
|
||||
assert_true(vertex_state_loc == 0);
|
||||
GLint geometry_state_loc =
|
||||
geometry_program ? glGetUniformLocation(geometry_program, "state") : -1;
|
||||
assert_true(geometry_state_loc == -1 || geometry_state_loc == 0);
|
||||
GLint fragment_state_loc = glGetUniformLocation(fragment_program, "state");
|
||||
assert_true(fragment_state_loc == -1 || fragment_state_loc == 0);
|
||||
|
||||
cached_pipeline->handles.default_pipeline = pipeline;
|
||||
}
|
||||
|
||||
// TODO(benvanik): do we need to do this for all stages if the locations
|
||||
// match?
|
||||
glProgramUniformHandleui64ARB(vertex_program, 0, cmd.state_data_gpu_ptr);
|
||||
/*if (geometry_program && geometry_state_loc != -1) {
|
||||
glProgramUniformHandleui64ARB(geometry_program, 0, cmd.state_data_gpu_ptr);
|
||||
}*/
|
||||
/*if (fragment_state_loc != -1) {
|
||||
glProgramUniformHandleui64ARB(fragment_program, 0,
|
||||
cmd.state_data_gpu_ptr);
|
||||
}*/
|
||||
// NOTE: we don't yet have our state data pointer - that comes at the end.
|
||||
|
||||
glBindProgramPipeline(cached_pipeline->handles.default_pipeline);
|
||||
|
||||
|
@ -1759,10 +1757,10 @@ bool CommandProcessor::PopulateIndexBuffer(DrawCommand* draw_command) {
|
|||
assert_true(info.endianness == Endian::k8in16 ||
|
||||
info.endianness == Endian::k8in32);
|
||||
|
||||
auto allocation = scratch_buffer_.Acquire(cmd.index_count *
|
||||
(info.format == IndexFormat::kInt32
|
||||
? sizeof(uint32_t)
|
||||
: sizeof(uint16_t)));
|
||||
size_t total_size =
|
||||
cmd.index_count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
|
||||
: sizeof(uint16_t));
|
||||
auto allocation = scratch_buffer_.Acquire(total_size);
|
||||
|
||||
if (info.format == IndexFormat::kInt32) {
|
||||
poly::copy_and_swap_32_aligned(
|
||||
|
@ -1776,9 +1774,14 @@ bool CommandProcessor::PopulateIndexBuffer(DrawCommand* draw_command) {
|
|||
cmd.index_count);
|
||||
}
|
||||
|
||||
glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, allocation.gpu_ptr,
|
||||
allocation.length);
|
||||
|
||||
if (has_bindless_vbos_) {
|
||||
glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, allocation.gpu_ptr,
|
||||
allocation.length);
|
||||
} else {
|
||||
// Offset is used in glDrawElements.
|
||||
cmd.index_buffer.buffer_offset = allocation.offset;
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, scratch_buffer_.handle());
|
||||
}
|
||||
scratch_buffer_.Commit(std::move(allocation));
|
||||
|
||||
return true;
|
||||
|
@ -1792,7 +1795,8 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) {
|
|||
|
||||
const auto& buffer_inputs = active_vertex_shader_->buffer_inputs();
|
||||
|
||||
for (size_t n = 0; n < buffer_inputs.count; n++) {
|
||||
uint32_t el_index = 0;
|
||||
for (uint32_t n = 0; n < buffer_inputs.count; n++) {
|
||||
const auto& desc = buffer_inputs.descs[n];
|
||||
|
||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6;
|
||||
|
@ -1826,7 +1830,11 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) {
|
|||
reinterpret_cast<const uint32_t*>(membase_ + (fetch->address << 2)),
|
||||
fetch->size);
|
||||
|
||||
uint32_t el_index = 0;
|
||||
if (!has_bindless_vbos_) {
|
||||
glBindVertexBuffer(n, scratch_buffer_.handle(), allocation.offset,
|
||||
desc.stride_words * 4);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < desc.element_count; ++i) {
|
||||
const auto& el = desc.elements[i];
|
||||
auto comp_count = GetVertexFormatComponentCount(el.format);
|
||||
|
@ -1882,13 +1890,19 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) {
|
|||
assert_unhandled_case(el.format);
|
||||
break;
|
||||
}
|
||||
size_t offset = el.offset_words * sizeof(uint32_t);
|
||||
glEnableVertexAttribArray(el_index);
|
||||
glVertexAttribFormatNV(el_index, comp_count, comp_type, el.is_normalized,
|
||||
desc.stride_words * sizeof(uint32_t));
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, el_index,
|
||||
allocation.gpu_ptr + offset,
|
||||
allocation.length - offset);
|
||||
if (has_bindless_vbos_) {
|
||||
glVertexAttribFormatNV(el_index, comp_count, comp_type,
|
||||
el.is_normalized,
|
||||
desc.stride_words * sizeof(uint32_t));
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, el_index,
|
||||
allocation.gpu_ptr + (el.offset_words * 4),
|
||||
allocation.length - (el.offset_words * 4));
|
||||
} else {
|
||||
glVertexAttribBinding(el_index, n);
|
||||
glVertexAttribFormat(el_index, comp_count, comp_type, el.is_normalized,
|
||||
el.offset_words * 4);
|
||||
}
|
||||
++el_index;
|
||||
}
|
||||
|
||||
|
@ -1899,6 +1913,82 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::PopulateSamplers(DrawCommand* draw_command) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
auto& regs = *register_file_;
|
||||
|
||||
// VS and PS samplers are shared, but may be used exclusively.
|
||||
// We walk each and setup lazily.
|
||||
bool has_setup_sampler[32] = {false};
|
||||
|
||||
// Vertex texture samplers.
|
||||
const auto& vertex_sampler_inputs = active_vertex_shader_->sampler_inputs();
|
||||
for (size_t i = 0; i < vertex_sampler_inputs.count; ++i) {
|
||||
const auto& desc = vertex_sampler_inputs.descs[i];
|
||||
if (has_setup_sampler[desc.fetch_slot]) {
|
||||
continue;
|
||||
}
|
||||
has_setup_sampler[desc.fetch_slot] = true;
|
||||
if (!PopulateSampler(draw_command, desc)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Pixel shader texture sampler.
|
||||
const auto& pixel_sampler_inputs = active_pixel_shader_->sampler_inputs();
|
||||
for (size_t i = 0; i < pixel_sampler_inputs.count; ++i) {
|
||||
const auto& desc = pixel_sampler_inputs.descs[i];
|
||||
if (has_setup_sampler[desc.fetch_slot]) {
|
||||
continue;
|
||||
}
|
||||
has_setup_sampler[desc.fetch_slot] = true;
|
||||
if (!PopulateSampler(draw_command, desc)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::PopulateSampler(DrawCommand* draw_command,
|
||||
const Shader::SamplerDesc& desc) {
|
||||
auto& regs = *register_file_;
|
||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + desc.fetch_slot * 6;
|
||||
auto group = reinterpret_cast<const xe_gpu_fetch_group_t*>(®s.values[r]);
|
||||
auto& fetch = group->texture_fetch;
|
||||
|
||||
// ?
|
||||
assert_true(fetch.type == 0x2);
|
||||
|
||||
TextureInfo texture_info;
|
||||
if (!TextureInfo::Prepare(fetch, &texture_info)) {
|
||||
XELOGE("Unable to parse texture fetcher info");
|
||||
return false; // invalid texture used
|
||||
}
|
||||
SamplerInfo sampler_info;
|
||||
if (!SamplerInfo::Prepare(fetch, desc.tex_fetch, &sampler_info)) {
|
||||
XELOGE("Unable to parse sampler info");
|
||||
return false; // invalid texture used
|
||||
}
|
||||
|
||||
uint32_t guest_base = fetch.address << 12;
|
||||
void* host_base = membase_ + guest_base;
|
||||
auto entry_view = texture_cache_.Demand(host_base, texture_info.input_length,
|
||||
texture_info, sampler_info);
|
||||
if (!entry_view) {
|
||||
// Unable to create/fetch/etc.
|
||||
XELOGE("Failed to demand texture");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Shaders will use bindless to fetch right from it.
|
||||
draw_command->state_data->texture_samplers[desc.fetch_slot] =
|
||||
entry_view->texture_sampler_handle;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
|
||||
auto& regs = *register_file_;
|
||||
|
||||
|
@ -2045,7 +2135,7 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
|
|||
case CopyCommand::kConstantOne:
|
||||
case CopyCommand::kNull:
|
||||
default:
|
||||
assert_unhandled_case(copy_command);
|
||||
// assert_unhandled_case(copy_command);
|
||||
return false;
|
||||
}
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <xenia/gpu/gl4/circular_buffer.h>
|
||||
#include <xenia/gpu/gl4/gl_context.h>
|
||||
#include <xenia/gpu/gl4/gl4_shader.h>
|
||||
#include <xenia/gpu/gl4/texture_cache.h>
|
||||
#include <xenia/gpu/register_file.h>
|
||||
#include <xenia/gpu/xenos.h>
|
||||
#include <xenia/memory.h>
|
||||
|
@ -40,6 +41,39 @@ struct SwapParameters {
|
|||
GLenum attachment;
|
||||
};
|
||||
|
||||
// This must match the layout in gl4_shader.cc.
|
||||
struct UniformDataBlock {
|
||||
union float4 {
|
||||
float v[4];
|
||||
struct {
|
||||
float x, y, z, w;
|
||||
};
|
||||
};
|
||||
|
||||
float4 window_offset; // tx,ty,rt_w,rt_h
|
||||
float4 window_scissor; // x0,y0,x1,y1
|
||||
float4 viewport_offset; // tx,ty,tz,?
|
||||
float4 viewport_scale; // sx,sy,sz,?
|
||||
// TODO(benvanik): vertex format xyzw?
|
||||
|
||||
float4 alpha_test; // alpha test enable, func, ref, ?
|
||||
|
||||
// TODO(benvanik): overlay with fetch_consts below?
|
||||
uint64_t texture_samplers[32];
|
||||
|
||||
// Register data from 0x4000 to 0x4927.
|
||||
// SHADER_CONSTANT_000_X...
|
||||
float4 float_consts[512];
|
||||
// SHADER_CONSTANT_FETCH_00_0...
|
||||
uint32_t fetch_consts[32 * 6];
|
||||
// SHADER_CONSTANT_BOOL_000_031...
|
||||
int32_t bool_consts[8];
|
||||
// SHADER_CONSTANT_LOOP_00...
|
||||
int32_t loop_consts[32];
|
||||
};
|
||||
static_assert(sizeof(UniformDataBlock) <= 16 * 1024,
|
||||
"Need <=16k uniform data");
|
||||
|
||||
// TODO(benvanik): move more of the enums in here?
|
||||
struct DrawCommand {
|
||||
PrimitiveType prim_type;
|
||||
|
@ -54,6 +88,7 @@ struct DrawCommand {
|
|||
size_t size;
|
||||
xenos::Endian endianness;
|
||||
xenos::IndexFormat format;
|
||||
size_t buffer_offset;
|
||||
} index_buffer;
|
||||
|
||||
// Texture samplers.
|
||||
|
@ -63,11 +98,9 @@ struct DrawCommand {
|
|||
// SamplerStateResource* sampler_state;
|
||||
};
|
||||
SamplerInput vertex_shader_samplers[32];
|
||||
size_t vertex_shader_sampler_count;
|
||||
SamplerInput pixel_shader_samplers[32];
|
||||
size_t pixel_shader_sampler_count;
|
||||
|
||||
GLuint64 state_data_gpu_ptr;
|
||||
UniformDataBlock* state_data;
|
||||
};
|
||||
|
||||
class CommandProcessor {
|
||||
|
@ -195,11 +228,15 @@ class CommandProcessor {
|
|||
|
||||
void PrepareDraw(DrawCommand* draw_command);
|
||||
bool IssueDraw(DrawCommand* draw_command);
|
||||
bool UpdateState(DrawCommand* draw_command);
|
||||
bool UpdateRenderTargets(DrawCommand* draw_command);
|
||||
bool UpdateState(DrawCommand* draw_command);
|
||||
bool UpdateConstants(DrawCommand* draw_command);
|
||||
bool UpdateShaders(DrawCommand* draw_command);
|
||||
bool PopulateIndexBuffer(DrawCommand* draw_command);
|
||||
bool PopulateVertexBuffers(DrawCommand* draw_command);
|
||||
bool PopulateSamplers(DrawCommand* draw_command);
|
||||
bool PopulateSampler(DrawCommand* draw_command,
|
||||
const Shader::SamplerDesc& desc);
|
||||
bool IssueCopy(DrawCommand* draw_command);
|
||||
|
||||
CachedFramebuffer* GetFramebuffer(GLuint color_targets[4],
|
||||
|
@ -237,7 +274,7 @@ class CommandProcessor {
|
|||
uint64_t bin_select_;
|
||||
uint64_t bin_mask_;
|
||||
|
||||
GLuint uniform_data_buffer_;
|
||||
bool has_bindless_vbos_;
|
||||
|
||||
std::vector<std::unique_ptr<GL4Shader>> all_shaders_;
|
||||
std::unordered_map<uint64_t, GL4Shader*> shader_cache_;
|
||||
|
@ -251,7 +288,7 @@ class CommandProcessor {
|
|||
std::vector<CachedDepthRenderTarget> cached_depth_render_targets_;
|
||||
std::vector<std::unique_ptr<CachedPipeline>> all_pipelines_;
|
||||
std::unordered_map<uint64_t, CachedPipeline*> cached_pipelines_;
|
||||
|
||||
TextureCache texture_cache_;
|
||||
CircularBuffer scratch_buffer_;
|
||||
|
||||
DrawCommand draw_command_;
|
||||
|
|
|
@ -17,6 +17,9 @@
|
|||
|
||||
DECLARE_bool(thread_safe_gl);
|
||||
|
||||
DECLARE_bool(gl_debug_output);
|
||||
DECLARE_bool(gl_debug_output_synchronous);
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace gl4 {
|
||||
|
|
|
@ -15,6 +15,10 @@
|
|||
DEFINE_bool(thread_safe_gl, false,
|
||||
"Only allow one GL context to be active at a time.");
|
||||
|
||||
DEFINE_bool(gl_debug_output, false, "Dump ARB_debug_output to stderr.");
|
||||
DEFINE_bool(gl_debug_output_synchronous, true,
|
||||
"ARB_debug_output will synchronize to be thread safe.");
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace gl4 {
|
||||
|
|
|
@ -35,7 +35,6 @@ const std::string header =
|
|||
"#extension GL_ARB_explicit_uniform_location : require\n"
|
||||
"#extension GL_ARB_shading_language_420pack : require\n"
|
||||
"#extension GL_ARB_shader_storage_buffer_object : require\n"
|
||||
"#extension GL_NV_shader_buffer_load : require\n"
|
||||
"precision highp float;\n"
|
||||
"precision highp int;\n"
|
||||
"layout(std140, column_major) uniform;\n"
|
||||
|
@ -46,6 +45,7 @@ const std::string header =
|
|||
" vec4 viewport_offset;\n"
|
||||
" vec4 viewport_scale;\n"
|
||||
" vec4 alpha_test;\n"
|
||||
" uvec2 texture_samplers[32];\n"
|
||||
" vec4 float_consts[512];\n"
|
||||
" uint fetch_consts[32 * 6];\n"
|
||||
" int bool_consts[8];\n"
|
||||
|
@ -55,7 +55,9 @@ const std::string header =
|
|||
" vec4 o[16];\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"uniform StateData* state;\n";
|
||||
"layout(binding = 0) buffer State {\n"
|
||||
" StateData state;\n"
|
||||
"};\n";
|
||||
|
||||
bool GL4Shader::PrepareVertexShader(
|
||||
const xenos::xe_gpu_program_cntl_t& program_cntl) {
|
||||
|
@ -69,20 +71,20 @@ bool GL4Shader::PrepareVertexShader(
|
|||
// TODO(benvanik): piecewise viewport_enable -> offset/scale logic.
|
||||
" if (false) {\n"
|
||||
" } else {\n"
|
||||
/*" pos.xy = pos.xy / vec2(state->window_offset.z / 2.0, "
|
||||
"-state->window_offset.w / 2.0) + vec2(-1.0, 1.0);\n"
|
||||
/*" pos.xy = pos.xy / vec2(state.window_offset.z / 2.0, "
|
||||
"-state.window_offset.w / 2.0) + vec2(-1.0, 1.0);\n"
|
||||
" pos.zw = vec2(0.0, 1.0);\n"*/
|
||||
" pos.xy = pos.xy / vec2(1280.0 / 2.0, "
|
||||
"-720.0 / 2.0) + vec2(-1.0, 1.0);\n"
|
||||
" //pos.zw = vec2(0.0, 1.0);\n"
|
||||
" }\n"
|
||||
" pos.x = pos.x * state->viewport_scale.x + \n"
|
||||
" state->viewport_offset.x;\n"
|
||||
" pos.y = pos.y * state->viewport_scale.y + \n"
|
||||
" state->viewport_offset.y;\n"
|
||||
" pos.z = pos.z * state->viewport_scale.z + \n"
|
||||
" state->viewport_offset.z;\n"
|
||||
" pos.xy += state->window_offset.xy;\n"
|
||||
" pos.x = pos.x * state.viewport_scale.x + \n"
|
||||
" state.viewport_offset.x;\n"
|
||||
" pos.y = pos.y * state.viewport_scale.y + \n"
|
||||
" state.viewport_offset.y;\n"
|
||||
" pos.z = pos.z * state.viewport_scale.z + \n"
|
||||
" state.viewport_offset.z;\n"
|
||||
" pos.xy += state.window_offset.xy;\n"
|
||||
" return pos;\n"
|
||||
"}\n";
|
||||
std::string source =
|
||||
|
@ -105,6 +107,8 @@ bool GL4Shader::PrepareVertexShader(
|
|||
" gl_Position = applyViewport(gl_Position);\n"
|
||||
"}\n";
|
||||
|
||||
// glGetTextureSamplerHandleARB()
|
||||
|
||||
std::string translated_source =
|
||||
shader_translator_.TranslateVertexShader(this, program_cntl);
|
||||
if (translated_source.empty()) {
|
||||
|
@ -135,9 +139,9 @@ bool GL4Shader::PreparePixelShader(
|
|||
"void processFragment();\n"
|
||||
"void main() {\n"
|
||||
" for (int i = 0; i < oC.length(); ++i) {\n"
|
||||
" oC[i] = vec4(0.0, 0.0, 0.0, 0.0);\n"
|
||||
" oC[i] = vec4(1.0, 0.0, 0.0, 1.0);\n"
|
||||
" }\n" +
|
||||
(program_cntl.ps_export_depth ? " gl_FragDepth = 0.0\n" : "") +
|
||||
(program_cntl.ps_export_depth ? " gl_FragDepth = 0.0;\n" : "") +
|
||||
" processFragment();\n"
|
||||
"}\n";
|
||||
|
||||
|
|
|
@ -28,25 +28,21 @@ static const char chan_names[] = {
|
|||
const char* GetVertexFormatTypeName(const GL4Shader::BufferDescElement& el) {
|
||||
switch (el.format) {
|
||||
case VertexFormat::k_32:
|
||||
return el.is_signed ? "int" : "uint";
|
||||
case VertexFormat::k_32_FLOAT:
|
||||
return "float";
|
||||
case VertexFormat::k_16_16:
|
||||
case VertexFormat::k_32_32:
|
||||
return el.is_signed ? "ivec2" : "uvec2";
|
||||
case VertexFormat::k_16_16_FLOAT:
|
||||
case VertexFormat::k_32_32_FLOAT:
|
||||
return "vec2";
|
||||
case VertexFormat::k_10_11_11:
|
||||
case VertexFormat::k_11_11_10:
|
||||
return "int3"; // ?
|
||||
case VertexFormat::k_32_32_32_FLOAT:
|
||||
return "vec3";
|
||||
case VertexFormat::k_8_8_8_8:
|
||||
case VertexFormat::k_2_10_10_10:
|
||||
case VertexFormat::k_16_16_16_16:
|
||||
case VertexFormat::k_32_32_32_32:
|
||||
return el.is_signed ? "ivec4" : "uvec4";
|
||||
case VertexFormat::k_16_16_16_16_FLOAT:
|
||||
case VertexFormat::k_32_32_32_32_FLOAT:
|
||||
return "vec4";
|
||||
|
@ -58,14 +54,13 @@ const char* GetVertexFormatTypeName(const GL4Shader::BufferDescElement& el) {
|
|||
}
|
||||
|
||||
GL4ShaderTranslator::GL4ShaderTranslator()
|
||||
: output_(kOutputCapacity), tex_fetch_index_(0), dwords_(nullptr) {}
|
||||
: output_(kOutputCapacity), dwords_(nullptr) {}
|
||||
|
||||
GL4ShaderTranslator::~GL4ShaderTranslator() = default;
|
||||
|
||||
void GL4ShaderTranslator::Reset(GL4Shader* shader) {
|
||||
output_.Reset();
|
||||
shader_type_ = shader->type();
|
||||
tex_fetch_index_ = 0;
|
||||
dwords_ = shader->data();
|
||||
}
|
||||
|
||||
|
@ -76,8 +71,6 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
|
|||
// Normal shaders only, for now.
|
||||
assert_true(program_cntl.vs_export_mode == 0);
|
||||
|
||||
AppendTextureHeader(vertex_shader->sampler_inputs());
|
||||
|
||||
// Add vertex shader input.
|
||||
uint32_t el_index = 0;
|
||||
const auto& buffer_inputs = vertex_shader->buffer_inputs();
|
||||
|
@ -102,7 +95,7 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
|
|||
// Add temporaries for any registers we may use.
|
||||
uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
|
||||
for (uint32_t n = 0; n <= temp_regs; n++) {
|
||||
Append(" vec4 r%d = state->float_consts[%d];\n", n, n);
|
||||
Append(" vec4 r%d = state.float_consts[%d];\n", n, n);
|
||||
}
|
||||
Append(" vec4 t;\n");
|
||||
|
||||
|
@ -129,15 +122,13 @@ std::string GL4ShaderTranslator::TranslatePixelShader(
|
|||
// If the same PS is used with different VS that output different amounts
|
||||
// (and less than the number of required registers), things may die.
|
||||
|
||||
AppendTextureHeader(pixel_shader->sampler_inputs());
|
||||
|
||||
// Pixel shader main() header.
|
||||
Append("void processFragment() {\n");
|
||||
|
||||
// Add temporary registers.
|
||||
uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
|
||||
for (uint32_t n = 0; n <= std::max(15u, temp_regs); n++) {
|
||||
Append(" vec4 r%d = state->float_consts[%d];\n", n, n + 256);
|
||||
Append(" vec4 r%d = state.float_consts[%d];\n", n, n + 256);
|
||||
}
|
||||
Append(" vec4 t;\n");
|
||||
Append(" float s;\n"); // scalar result (used for RETAIN_PREV)
|
||||
|
@ -161,42 +152,6 @@ std::string GL4ShaderTranslator::TranslatePixelShader(
|
|||
return output_.to_string();
|
||||
}
|
||||
|
||||
void GL4ShaderTranslator::AppendTextureHeader(
|
||||
const GL4Shader::SamplerInputs& sampler_inputs) {
|
||||
bool fetch_setup[32] = {false};
|
||||
|
||||
// 1 texture per constant slot, 1 sampler per fetch.
|
||||
for (uint32_t n = 0; n < sampler_inputs.count; n++) {
|
||||
const auto& input = sampler_inputs.descs[n];
|
||||
const auto& fetch = input.tex_fetch;
|
||||
|
||||
// Add texture, if needed.
|
||||
if (!fetch_setup[fetch.const_idx]) {
|
||||
fetch_setup[fetch.const_idx] = true;
|
||||
const char* texture_type = nullptr;
|
||||
switch (fetch.dimension) {
|
||||
case DIMENSION_1D:
|
||||
texture_type = "Texture1D";
|
||||
break;
|
||||
default:
|
||||
case DIMENSION_2D:
|
||||
texture_type = "Texture2D";
|
||||
break;
|
||||
case DIMENSION_3D:
|
||||
texture_type = "Texture3D";
|
||||
break;
|
||||
case DIMENSION_CUBE:
|
||||
texture_type = "TextureCube";
|
||||
break;
|
||||
}
|
||||
Append("%s x_texture_%d;\n", texture_type, fetch.const_idx);
|
||||
}
|
||||
|
||||
// Add sampler.
|
||||
Append("SamplerState x_sampler_%d;\n", n);
|
||||
}
|
||||
}
|
||||
|
||||
void GL4ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type,
|
||||
uint32_t swiz, uint32_t negate,
|
||||
uint32_t abs_constants) {
|
||||
|
@ -217,7 +172,7 @@ void GL4ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type,
|
|||
if (abs_constants) {
|
||||
Append("abs(");
|
||||
}
|
||||
Append("state->float_consts[%u]", is_pixel_shader() ? num + 256 : num);
|
||||
Append("state.float_consts[%u]", is_pixel_shader() ? num + 256 : num);
|
||||
if (abs_constants) {
|
||||
Append(")");
|
||||
}
|
||||
|
@ -258,9 +213,12 @@ void GL4ShaderTranslator::AppendDestRegName(uint32_t num, uint32_t dst_exp) {
|
|||
case 0:
|
||||
Append("oC[0]");
|
||||
break;
|
||||
case 61:
|
||||
// Write to t, as we need to splice just x out of it.
|
||||
Append("t");
|
||||
break;
|
||||
default:
|
||||
// TODO(benvanik): other render targets?
|
||||
// TODO(benvanik): depth?
|
||||
assert_always();
|
||||
break;
|
||||
}
|
||||
|
@ -282,7 +240,10 @@ void GL4ShaderTranslator::AppendDestReg(uint32_t num, uint32_t mask,
|
|||
|
||||
void GL4ShaderTranslator::AppendDestRegPost(uint32_t num, uint32_t mask,
|
||||
uint32_t dst_exp) {
|
||||
if (mask != 0xF) {
|
||||
if (num == 61) {
|
||||
// gl_FragDepth handling to just get x from the temp result.
|
||||
Append(" gl_FragDepth = t.x;\n");
|
||||
} else if (mask != 0xF) {
|
||||
// Masking.
|
||||
Append(" ");
|
||||
AppendDestRegName(num, dst_exp);
|
||||
|
@ -399,7 +360,7 @@ bool GL4ShaderTranslator::TranslateALU_ADDv(const instr_alu_t& alu) {
|
|||
alu.abs_constants);
|
||||
Append(")");
|
||||
if (alu.vector_clamp) {
|
||||
Append(")");
|
||||
Append(", 0.0, 1.0)");
|
||||
}
|
||||
Append(";\n");
|
||||
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
|
||||
|
@ -685,7 +646,7 @@ bool GL4ShaderTranslator::TranslateALU_DOT4v(const instr_alu_t& alu) {
|
|||
if (alu.vector_clamp) {
|
||||
Append(", 0.0, 1.0)");
|
||||
}
|
||||
Append(";\n");
|
||||
Append(".xxxx;\n");
|
||||
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
|
||||
return true;
|
||||
}
|
||||
|
@ -706,7 +667,7 @@ bool GL4ShaderTranslator::TranslateALU_DOT3v(const instr_alu_t& alu) {
|
|||
if (alu.vector_clamp) {
|
||||
Append(", 0.0, 1.0)");
|
||||
}
|
||||
Append(";\n");
|
||||
Append(".xxxx;\n");
|
||||
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
|
||||
return true;
|
||||
}
|
||||
|
@ -730,7 +691,7 @@ bool GL4ShaderTranslator::TranslateALU_DOT2ADDv(const instr_alu_t& alu) {
|
|||
if (alu.vector_clamp) {
|
||||
Append(", 0.0, 1.0)");
|
||||
}
|
||||
Append(";\n");
|
||||
Append(".xxxx;\n");
|
||||
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
|
||||
return true;
|
||||
}
|
||||
|
@ -1402,20 +1363,27 @@ bool GL4ShaderTranslator::TranslateVertexFetch(const instr_fetch_vtx_t* vtx,
|
|||
bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex,
|
||||
int sync) {
|
||||
int src_component_count = 0;
|
||||
const char* sampler_type;
|
||||
switch (tex->dimension) {
|
||||
case DIMENSION_1D:
|
||||
src_component_count = 1;
|
||||
sampler_type = "sampler1D";
|
||||
break;
|
||||
default:
|
||||
case DIMENSION_2D:
|
||||
src_component_count = 2;
|
||||
sampler_type = "sampler2D";
|
||||
break;
|
||||
case DIMENSION_3D:
|
||||
src_component_count = 3;
|
||||
sampler_type = "sampler3D";
|
||||
break;
|
||||
case DIMENSION_CUBE:
|
||||
src_component_count = 3;
|
||||
sampler_type = "samplerCube";
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(tex->dimension);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Disassemble.
|
||||
|
@ -1500,10 +1468,10 @@ bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex,
|
|||
Append("\n");
|
||||
|
||||
// Translate.
|
||||
Append(" t = ");
|
||||
Append("x_texture_%d.Sample(x_sampler_%d, r%u.", tex->const_idx,
|
||||
tex_fetch_index_++, // hacky way to line up to tex buffers
|
||||
tex->src_reg);
|
||||
// TODO(benvanik): if sampler == null, set to invalid color.
|
||||
Append(" t = texture(");
|
||||
Append("%s(state.texture_samplers[%d])", sampler_type, tex->const_idx & 0xF);
|
||||
Append(", r%u.", tex->src_reg);
|
||||
src_swiz = tex->src_swiz;
|
||||
for (int i = 0; i < src_component_count; i++) {
|
||||
Append("%c", chan_names[src_swiz & 0x3]);
|
||||
|
@ -1511,6 +1479,26 @@ bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex,
|
|||
}
|
||||
Append(");\n");
|
||||
|
||||
// Output texture coordinates as color.
|
||||
// TODO(benvanik): only if texture is invalid?
|
||||
// Append(" t = vec4(r%u.", tex->src_reg);
|
||||
// src_swiz = tex->src_swiz;
|
||||
// for (int i = 0; i < src_component_count; i++) {
|
||||
// Append("%c", chan_names[src_swiz & 0x3]);
|
||||
// src_swiz >>= 2;
|
||||
//}
|
||||
// switch (src_component_count) {
|
||||
// case 1:
|
||||
// Append(", 0.0, 0.0, 1.0);\n");
|
||||
// break;
|
||||
// case 2:
|
||||
// Append(", 0.0, 1.0);\n");
|
||||
// break;
|
||||
// case 3:
|
||||
// Append(", 1.0);\n");
|
||||
// break;
|
||||
//}
|
||||
|
||||
Append(" r%u.xyzw = vec4(", tex->dst_reg);
|
||||
uint32_t dst_swiz = tex->dst_swiz;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
|
@ -1524,6 +1512,7 @@ bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex,
|
|||
} else if ((dst_swiz & 0x7) == 6) {
|
||||
// ?
|
||||
Append("?");
|
||||
assert_always();
|
||||
} else if ((dst_swiz & 0x7) == 7) {
|
||||
Append("r%u.%c", tex->dst_reg, chan_names[i]);
|
||||
} else {
|
||||
|
|
|
@ -39,7 +39,6 @@ class GL4ShaderTranslator {
|
|||
|
||||
protected:
|
||||
ShaderType shader_type_;
|
||||
uint32_t tex_fetch_index_;
|
||||
const uint32_t* dwords_;
|
||||
|
||||
static const int kOutputCapacity = 64 * 1024;
|
||||
|
@ -56,8 +55,6 @@ class GL4ShaderTranslator {
|
|||
va_end(args);
|
||||
}
|
||||
|
||||
void AppendTextureHeader(const GL4Shader::SamplerInputs& sampler_inputs);
|
||||
|
||||
void AppendSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate,
|
||||
uint32_t abs);
|
||||
void AppendDestRegName(uint32_t num, uint32_t dst_exp);
|
||||
|
|
|
@ -115,6 +115,8 @@ bool GLContext::Initialize(HWND hwnd) {
|
|||
// Clearing errors.
|
||||
}
|
||||
|
||||
SetupDebugging();
|
||||
|
||||
ClearCurrent();
|
||||
|
||||
return true;
|
||||
|
@ -160,11 +162,120 @@ std::unique_ptr<GLContext> GLContext::CreateShared() {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
SetupDebugging();
|
||||
|
||||
new_context->ClearCurrent();
|
||||
|
||||
return new_context;
|
||||
}
|
||||
|
||||
void GLContext::DebugMessage(GLenum source, GLenum type, GLuint id,
|
||||
GLenum severity, GLsizei length,
|
||||
const GLchar* message) {
|
||||
const char* source_name = nullptr;
|
||||
switch (source) {
|
||||
case GL_DEBUG_SOURCE_API_ARB:
|
||||
source_name = "OpenGL";
|
||||
break;
|
||||
case GL_DEBUG_SOURCE_WINDOW_SYSTEM_ARB:
|
||||
source_name = "Windows";
|
||||
break;
|
||||
case GL_DEBUG_SOURCE_SHADER_COMPILER_ARB:
|
||||
source_name = "Shader Compiler";
|
||||
break;
|
||||
case GL_DEBUG_SOURCE_THIRD_PARTY_ARB:
|
||||
source_name = "Third Party";
|
||||
break;
|
||||
case GL_DEBUG_SOURCE_APPLICATION_ARB:
|
||||
source_name = "Application";
|
||||
break;
|
||||
case GL_DEBUG_SOURCE_OTHER_ARB:
|
||||
source_name = "Other";
|
||||
break;
|
||||
default:
|
||||
source_name = "(unknown source)";
|
||||
break;
|
||||
}
|
||||
|
||||
const char* type_name = nullptr;
|
||||
switch (type) {
|
||||
case GL_DEBUG_TYPE_ERROR:
|
||||
type_name = "error";
|
||||
break;
|
||||
case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR:
|
||||
type_name = "deprecated behavior";
|
||||
break;
|
||||
case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR:
|
||||
type_name = "undefined behavior";
|
||||
break;
|
||||
case GL_DEBUG_TYPE_PORTABILITY:
|
||||
type_name = "portability";
|
||||
break;
|
||||
case GL_DEBUG_TYPE_PERFORMANCE:
|
||||
type_name = "performance";
|
||||
break;
|
||||
case GL_DEBUG_TYPE_OTHER:
|
||||
type_name = "message";
|
||||
break;
|
||||
case GL_DEBUG_TYPE_MARKER:
|
||||
type_name = "marker";
|
||||
break;
|
||||
case GL_DEBUG_TYPE_PUSH_GROUP:
|
||||
type_name = "push group";
|
||||
break;
|
||||
case GL_DEBUG_TYPE_POP_GROUP:
|
||||
type_name = "pop group";
|
||||
break;
|
||||
default:
|
||||
type_name = "(unknown type)";
|
||||
break;
|
||||
}
|
||||
|
||||
const char* severity_name = nullptr;
|
||||
switch (severity) {
|
||||
case GL_DEBUG_SEVERITY_HIGH_ARB:
|
||||
severity_name = "high";
|
||||
break;
|
||||
case GL_DEBUG_SEVERITY_MEDIUM_ARB:
|
||||
severity_name = "medium";
|
||||
break;
|
||||
case GL_DEBUG_SEVERITY_LOW_ARB:
|
||||
severity_name = "low";
|
||||
break;
|
||||
case GL_DEBUG_SEVERITY_NOTIFICATION:
|
||||
severity_name = "notification";
|
||||
break;
|
||||
default:
|
||||
severity_name = "(unknown severity)";
|
||||
break;
|
||||
}
|
||||
|
||||
XELOGE("GL4 %s: %s(%s) %d: %s", source_name, type_name, severity_name, id,
|
||||
message);
|
||||
}
|
||||
|
||||
void GLAPIENTRY
|
||||
GLContext::DebugMessageThunk(GLenum source, GLenum type, GLuint id,
|
||||
GLenum severity, GLsizei length,
|
||||
const GLchar* message, GLvoid* user_param) {
|
||||
reinterpret_cast<GLContext*>(user_param)
|
||||
->DebugMessage(source, type, id, severity, length, message);
|
||||
}
|
||||
|
||||
void GLContext::SetupDebugging() {
|
||||
if (!FLAGS_gl_debug_output) {
|
||||
return;
|
||||
}
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
if (FLAGS_gl_debug_output_synchronous) {
|
||||
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
|
||||
}
|
||||
glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL,
|
||||
GL_TRUE);
|
||||
glDebugMessageCallback(reinterpret_cast<GLDEBUGPROC>(&DebugMessageThunk),
|
||||
this);
|
||||
}
|
||||
|
||||
bool GLContext::MakeCurrent() {
|
||||
if (FLAGS_thread_safe_gl) {
|
||||
global_gl_mutex_.lock();
|
||||
|
|
|
@ -35,6 +35,13 @@ class GLContext {
|
|||
void ClearCurrent();
|
||||
|
||||
private:
|
||||
void SetupDebugging();
|
||||
void DebugMessage(GLenum source, GLenum type, GLuint id, GLenum severity,
|
||||
GLsizei length, const GLchar* message);
|
||||
static void GLAPIENTRY
|
||||
DebugMessageThunk(GLenum source, GLenum type, GLuint id, GLenum severity,
|
||||
GLsizei length, const GLchar* message, GLvoid* user_param);
|
||||
|
||||
HWND hwnd_;
|
||||
HDC dc_;
|
||||
HGLRC glrc_;
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
'gl4_shader_translator.h',
|
||||
'gl_context.cc',
|
||||
'gl_context.h',
|
||||
'texture_cache.cc',
|
||||
'texture_cache.h',
|
||||
],
|
||||
|
||||
'conditions': [
|
||||
|
|
|
@ -0,0 +1,497 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/gl4/texture_cache.h>
|
||||
|
||||
#include <poly/assert.h>
|
||||
#include <poly/math.h>
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace gl4 {
|
||||
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
extern "C" GLEWContext* glewGetContext();
|
||||
extern "C" WGLEWContext* wglewGetContext();
|
||||
|
||||
TextureCache::TextureCache() {
|
||||
//
|
||||
}
|
||||
|
||||
TextureCache::~TextureCache() { Shutdown(); }
|
||||
|
||||
bool TextureCache::Initialize(CircularBuffer* scratch_buffer) {
|
||||
scratch_buffer_ = scratch_buffer;
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureCache::Shutdown() {
|
||||
Clear();
|
||||
//
|
||||
}
|
||||
|
||||
void TextureCache::Clear() {
|
||||
for (auto& entry : entries_) {
|
||||
for (auto& view : entry.views) {
|
||||
glMakeTextureHandleNonResidentARB(view.texture_sampler_handle);
|
||||
glDeleteSamplers(1, &view.sampler);
|
||||
}
|
||||
glDeleteTextures(1, &entry.base_texture);
|
||||
}
|
||||
entries_.clear();
|
||||
}
|
||||
|
||||
TextureCache::EntryView* TextureCache::Demand(void* host_base, size_t length,
|
||||
const TextureInfo& texture_info,
|
||||
const SamplerInfo& sampler_info) {
|
||||
entries_.emplace_back(Entry());
|
||||
auto& entry = entries_.back();
|
||||
entry.texture_info = texture_info;
|
||||
|
||||
GLenum target;
|
||||
switch (texture_info.dimension) {
|
||||
case Dimension::k1D:
|
||||
target = GL_TEXTURE_1D;
|
||||
break;
|
||||
case Dimension::k2D:
|
||||
target = GL_TEXTURE_2D;
|
||||
break;
|
||||
case Dimension::k3D:
|
||||
target = GL_TEXTURE_3D;
|
||||
break;
|
||||
case Dimension::kCube:
|
||||
target = GL_TEXTURE_CUBE_MAP;
|
||||
break;
|
||||
}
|
||||
|
||||
// Setup the base texture.
|
||||
glCreateTextures(target, 1, &entry.base_texture);
|
||||
if (!SetupTexture(entry.base_texture, texture_info)) {
|
||||
PLOGE("Unable to setup texture parameters");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Upload/convert.
|
||||
bool uploaded = false;
|
||||
switch (texture_info.dimension) {
|
||||
case Dimension::k2D:
|
||||
uploaded = UploadTexture2D(entry.base_texture, host_base, length,
|
||||
texture_info, sampler_info);
|
||||
break;
|
||||
case Dimension::k1D:
|
||||
case Dimension::k3D:
|
||||
case Dimension::kCube:
|
||||
assert_unhandled_case(texture_info.dimension);
|
||||
return false;
|
||||
}
|
||||
if (!uploaded) {
|
||||
PLOGE("Failed to convert/upload texture");
|
||||
return false;
|
||||
}
|
||||
|
||||
entry.views.emplace_back(EntryView());
|
||||
auto& entry_view = entry.views.back();
|
||||
entry_view.sampler_info = sampler_info;
|
||||
|
||||
// Setup the sampler.
|
||||
glCreateSamplers(1, &entry_view.sampler);
|
||||
if (!SetupSampler(entry_view.sampler, texture_info, sampler_info)) {
|
||||
PLOGE("Unable to setup texture sampler parameters");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the uvec2 handle to the texture/sampler pair and make it resident.
|
||||
// The handle can be passed directly to the shader.
|
||||
entry_view.texture_sampler_handle =
|
||||
glGetTextureSamplerHandleARB(entry.base_texture, entry_view.sampler);
|
||||
if (!entry_view.texture_sampler_handle) {
|
||||
return nullptr;
|
||||
}
|
||||
glMakeTextureHandleResidentARB(entry_view.texture_sampler_handle);
|
||||
|
||||
return &entry_view;
|
||||
}
|
||||
|
||||
bool TextureCache::SetupTexture(GLuint texture,
|
||||
const TextureInfo& texture_info) {
|
||||
// TODO(benvanik): texture mip levels.
|
||||
glTextureParameteri(texture, GL_TEXTURE_BASE_LEVEL, 0);
|
||||
glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, 1);
|
||||
|
||||
// Pre-shader swizzle.
|
||||
// TODO(benvanik): can this be dynamic? Maybe per view?
|
||||
// We may have to emulate this in the shader.
|
||||
uint32_t swizzle_r = texture_info.swizzle & 0x7;
|
||||
uint32_t swizzle_g = (texture_info.swizzle >> 3) & 0x7;
|
||||
uint32_t swizzle_b = (texture_info.swizzle >> 6) & 0x7;
|
||||
uint32_t swizzle_a = (texture_info.swizzle >> 9) & 0x7;
|
||||
static const GLenum swizzle_map[] = {
|
||||
GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA, GL_ZERO, GL_ONE,
|
||||
};
|
||||
glTextureParameteri(texture, GL_TEXTURE_SWIZZLE_R, swizzle_map[swizzle_r]);
|
||||
glTextureParameteri(texture, GL_TEXTURE_SWIZZLE_G, swizzle_map[swizzle_g]);
|
||||
glTextureParameteri(texture, GL_TEXTURE_SWIZZLE_B, swizzle_map[swizzle_b]);
|
||||
glTextureParameteri(texture, GL_TEXTURE_SWIZZLE_A, swizzle_map[swizzle_a]);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TextureCache::SetupSampler(GLuint sampler, const TextureInfo& texture_info,
|
||||
const SamplerInfo& sampler_info) {
|
||||
// TODO(benvanik): border color from texture fetch.
|
||||
GLfloat border_color[4] = {0.0f};
|
||||
glSamplerParameterfv(sampler, GL_TEXTURE_BORDER_COLOR, border_color);
|
||||
|
||||
// TODO(benvanik): setup LODs for mipmapping.
|
||||
glSamplerParameterf(sampler, GL_TEXTURE_LOD_BIAS, 0.0f);
|
||||
glSamplerParameterf(sampler, GL_TEXTURE_MIN_LOD, 0.0f);
|
||||
glSamplerParameterf(sampler, GL_TEXTURE_MAX_LOD, 0.0f);
|
||||
|
||||
// Texture wrapping modes.
|
||||
// TODO(benvanik): not sure if the middle ones are correct.
|
||||
static const GLenum wrap_map[] = {
|
||||
GL_REPEAT, //
|
||||
GL_MIRRORED_REPEAT, //
|
||||
GL_CLAMP_TO_EDGE, //
|
||||
GL_MIRROR_CLAMP_TO_EDGE, //
|
||||
GL_CLAMP_TO_BORDER, // ?
|
||||
GL_MIRROR_CLAMP_TO_BORDER_EXT, // ?
|
||||
GL_CLAMP_TO_BORDER, //
|
||||
GL_MIRROR_CLAMP_TO_BORDER_EXT, //
|
||||
};
|
||||
glSamplerParameteri(sampler, GL_TEXTURE_WRAP_S,
|
||||
wrap_map[sampler_info.clamp_u]);
|
||||
glSamplerParameteri(sampler, GL_TEXTURE_WRAP_T,
|
||||
wrap_map[sampler_info.clamp_v]);
|
||||
glSamplerParameteri(sampler, GL_TEXTURE_WRAP_R,
|
||||
wrap_map[sampler_info.clamp_w]);
|
||||
|
||||
// Texture level filtering.
|
||||
GLenum min_filter;
|
||||
switch (sampler_info.min_filter) {
|
||||
case ucode::TEX_FILTER_POINT:
|
||||
switch (sampler_info.mip_filter) {
|
||||
case ucode::TEX_FILTER_BASEMAP:
|
||||
min_filter = GL_NEAREST;
|
||||
break;
|
||||
case ucode::TEX_FILTER_POINT:
|
||||
// min_filter = GL_NEAREST_MIPMAP_NEAREST;
|
||||
min_filter = GL_NEAREST;
|
||||
break;
|
||||
case ucode::TEX_FILTER_LINEAR:
|
||||
// min_filter = GL_NEAREST_MIPMAP_LINEAR;
|
||||
min_filter = GL_NEAREST;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(sampler_info.mip_filter);
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case ucode::TEX_FILTER_LINEAR:
|
||||
switch (sampler_info.mip_filter) {
|
||||
case ucode::TEX_FILTER_BASEMAP:
|
||||
min_filter = GL_LINEAR;
|
||||
break;
|
||||
case ucode::TEX_FILTER_POINT:
|
||||
// min_filter = GL_LINEAR_MIPMAP_NEAREST;
|
||||
min_filter = GL_LINEAR;
|
||||
break;
|
||||
case ucode::TEX_FILTER_LINEAR:
|
||||
// min_filter = GL_LINEAR_MIPMAP_LINEAR;
|
||||
min_filter = GL_LINEAR;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(sampler_info.mip_filter);
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(sampler_info.min_filter);
|
||||
return false;
|
||||
}
|
||||
GLenum mag_filter;
|
||||
switch (sampler_info.mag_filter) {
|
||||
case ucode::TEX_FILTER_POINT:
|
||||
mag_filter = GL_NEAREST;
|
||||
break;
|
||||
case ucode::TEX_FILTER_LINEAR:
|
||||
mag_filter = GL_LINEAR;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(mag_filter);
|
||||
return false;
|
||||
}
|
||||
glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, min_filter);
|
||||
glSamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, mag_filter);
|
||||
|
||||
// TODO(benvanik): anisotropic filtering.
|
||||
// GL_TEXTURE_MAX_ANISOTROPY_EXT
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureSwap(Endian endianness, void* dest, const void* src,
|
||||
size_t length) {
|
||||
switch (endianness) {
|
||||
case Endian::k8in16:
|
||||
poly::copy_and_swap_16_aligned(reinterpret_cast<uint16_t*>(dest),
|
||||
reinterpret_cast<const uint16_t*>(src),
|
||||
length / 2);
|
||||
break;
|
||||
case Endian::k8in32:
|
||||
poly::copy_and_swap_32_aligned(reinterpret_cast<uint32_t*>(dest),
|
||||
reinterpret_cast<const uint32_t*>(src),
|
||||
length / 4);
|
||||
break;
|
||||
case Endian::k16in32:
|
||||
// TODO(benvanik): make more efficient.
|
||||
/*for (uint32_t i = 0; i < length; i += 4, src += 4, dest += 4) {
|
||||
uint32_t value = *(uint32_t*)src;
|
||||
*(uint32_t*)dest = ((value >> 16) & 0xFFFF) | (value << 16);
|
||||
}*/
|
||||
assert_always("16in32 not supported");
|
||||
break;
|
||||
default:
|
||||
case Endian::kUnspecified:
|
||||
std::memcpy(dest, src, length);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool TextureCache::UploadTexture2D(GLuint texture, void* host_base,
|
||||
size_t length,
|
||||
const TextureInfo& texture_info,
|
||||
const SamplerInfo& sampler_info) {
|
||||
assert_true(length == texture_info.input_length);
|
||||
|
||||
GLenum internal_format = GL_RGBA8;
|
||||
GLenum format = GL_RGBA;
|
||||
GLenum type = GL_UNSIGNED_BYTE;
|
||||
// https://code.google.com/p/glsnewton/source/browse/trunk/Source/uDDSLoader.pas?r=62
|
||||
// http://dench.flatlib.jp/opengl/textures
|
||||
// http://fossies.org/linux/WebKit/Source/ThirdParty/ANGLE/src/libGLESv2/formatutils.cpp
|
||||
switch (texture_info.format) {
|
||||
case TextureFormat::k_8:
|
||||
internal_format = GL_R8;
|
||||
format = GL_R;
|
||||
type = GL_UNSIGNED_BYTE;
|
||||
break;
|
||||
case TextureFormat::k_1_5_5_5:
|
||||
internal_format = GL_RGB5_A1;
|
||||
format = GL_BGRA;
|
||||
type = GL_UNSIGNED_SHORT_1_5_5_5_REV;
|
||||
break;
|
||||
case TextureFormat::k_5_6_5:
|
||||
internal_format = GL_RGB565;
|
||||
format = GL_RGB;
|
||||
type = GL_UNSIGNED_SHORT_5_6_5;
|
||||
break;
|
||||
case TextureFormat::k_2_10_10_10:
|
||||
case TextureFormat::k_2_10_10_10_AS_16_16_16_16:
|
||||
internal_format = GL_RGB10_A2;
|
||||
format = GL_RGBA;
|
||||
type = GL_UNSIGNED_INT_2_10_10_10_REV;
|
||||
break;
|
||||
case TextureFormat::k_10_11_11:
|
||||
case TextureFormat::k_10_11_11_AS_16_16_16_16:
|
||||
// ?
|
||||
internal_format = GL_R11F_G11F_B10F;
|
||||
format = GL_RGB;
|
||||
type = GL_UNSIGNED_INT_10F_11F_11F_REV;
|
||||
break;
|
||||
case TextureFormat::k_11_11_10:
|
||||
case TextureFormat::k_11_11_10_AS_16_16_16_16:
|
||||
internal_format = GL_R11F_G11F_B10F;
|
||||
format = GL_RGB;
|
||||
type = GL_UNSIGNED_INT_10F_11F_11F_REV;
|
||||
break;
|
||||
case TextureFormat::k_8_8_8_8:
|
||||
case TextureFormat::k_8_8_8_8_AS_16_16_16_16:
|
||||
internal_format = GL_RGBA8;
|
||||
format = GL_RGBA;
|
||||
type = GL_UNSIGNED_BYTE;
|
||||
break;
|
||||
case TextureFormat::k_4_4_4_4:
|
||||
internal_format = GL_RGBA4;
|
||||
format = GL_RGBA;
|
||||
type = GL_UNSIGNED_SHORT_4_4_4_4;
|
||||
break;
|
||||
case TextureFormat::k_16_FLOAT:
|
||||
internal_format = GL_R16F;
|
||||
format = GL_RED;
|
||||
type = GL_HALF_FLOAT;
|
||||
break;
|
||||
case TextureFormat::k_16_16_FLOAT:
|
||||
internal_format = GL_RG16F;
|
||||
format = GL_RG;
|
||||
type = GL_HALF_FLOAT;
|
||||
break;
|
||||
case TextureFormat::k_16_16_16_16_FLOAT:
|
||||
internal_format = GL_RGBA16F;
|
||||
format = GL_RGBA;
|
||||
type = GL_HALF_FLOAT;
|
||||
break;
|
||||
case TextureFormat::k_32_FLOAT:
|
||||
internal_format = GL_R32F;
|
||||
format = GL_R;
|
||||
type = GL_FLOAT;
|
||||
break;
|
||||
case TextureFormat::k_32_32_FLOAT:
|
||||
internal_format = GL_RG32F;
|
||||
format = GL_RG;
|
||||
type = GL_FLOAT;
|
||||
break;
|
||||
case TextureFormat::k_32_32_32_FLOAT:
|
||||
internal_format = GL_RGB32F;
|
||||
format = GL_RGB;
|
||||
type = GL_FLOAT;
|
||||
break;
|
||||
case TextureFormat::k_32_32_32_32_FLOAT:
|
||||
internal_format = GL_RGBA32F;
|
||||
format = GL_RGBA;
|
||||
type = GL_FLOAT;
|
||||
break;
|
||||
case TextureFormat::k_DXT1:
|
||||
case TextureFormat::k_DXT1_AS_16_16_16_16:
|
||||
// or GL_COMPRESSED_RGB_S3TC_DXT1_EXT?
|
||||
internal_format = format = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
|
||||
break;
|
||||
case TextureFormat::k_DXT2_3:
|
||||
case TextureFormat::k_DXT2_3_AS_16_16_16_16:
|
||||
internal_format = format = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
|
||||
break;
|
||||
case TextureFormat::k_DXT4_5:
|
||||
case TextureFormat::k_DXT4_5_AS_16_16_16_16:
|
||||
internal_format = format = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
|
||||
break;
|
||||
case TextureFormat::k_24_8:
|
||||
internal_format = GL_DEPTH24_STENCIL8;
|
||||
format = GL_DEPTH_STENCIL;
|
||||
type = GL_UNSIGNED_INT_24_8;
|
||||
break;
|
||||
case TextureFormat::k_24_8_FLOAT:
|
||||
internal_format = GL_DEPTH24_STENCIL8;
|
||||
format = GL_DEPTH_STENCIL;
|
||||
type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV;
|
||||
break;
|
||||
default:
|
||||
case TextureFormat::k_1_REVERSE:
|
||||
case TextureFormat::k_1:
|
||||
case TextureFormat::k_6_5_5:
|
||||
case TextureFormat::k_8_A:
|
||||
case TextureFormat::k_8_B:
|
||||
case TextureFormat::k_8_8:
|
||||
case TextureFormat::k_Cr_Y1_Cb_Y0:
|
||||
case TextureFormat::k_Y1_Cr_Y0_Cb:
|
||||
case TextureFormat::k_8_8_8_8_A:
|
||||
case TextureFormat::k_16:
|
||||
case TextureFormat::k_16_16:
|
||||
case TextureFormat::k_16_16_16_16:
|
||||
case TextureFormat::k_16_EXPAND:
|
||||
case TextureFormat::k_16_16_EXPAND:
|
||||
case TextureFormat::k_16_16_16_16_EXPAND:
|
||||
case TextureFormat::k_32_32:
|
||||
case TextureFormat::k_32_32_32_32:
|
||||
case TextureFormat::k_32_AS_8:
|
||||
case TextureFormat::k_32_AS_8_8:
|
||||
case TextureFormat::k_16_MPEG:
|
||||
case TextureFormat::k_16_16_MPEG:
|
||||
case TextureFormat::k_8_INTERLACED:
|
||||
case TextureFormat::k_32_AS_8_INTERLACED:
|
||||
case TextureFormat::k_32_AS_8_8_INTERLACED:
|
||||
case TextureFormat::k_16_INTERLACED:
|
||||
case TextureFormat::k_16_MPEG_INTERLACED:
|
||||
case TextureFormat::k_16_16_MPEG_INTERLACED:
|
||||
case TextureFormat::k_DXN:
|
||||
case TextureFormat::k_DXT3A:
|
||||
case TextureFormat::k_DXT5A:
|
||||
case TextureFormat::k_CTX1:
|
||||
case TextureFormat::k_DXT3A_AS_1_1_1_1:
|
||||
assert_unhandled_case(texture_info.format);
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t unpack_length = texture_info.input_length;
|
||||
glTextureStorage2D(texture, 1, internal_format,
|
||||
texture_info.size_2d.output_width,
|
||||
texture_info.size_2d.output_height);
|
||||
assert_true(unpack_length % 4 == 0);
|
||||
|
||||
auto allocation = scratch_buffer_->Acquire(unpack_length);
|
||||
|
||||
if (!texture_info.is_tiled) {
|
||||
TextureSwap(texture_info.endianness, allocation.host_ptr, host_base,
|
||||
unpack_length);
|
||||
/*const uint8_t* src = reinterpret_cast<const uint8_t*>(host_base);
|
||||
uint8_t* dest = reinterpret_cast<uint8_t*>(allocation.host_ptr);
|
||||
for (uint32_t y = 0; y < texture_info.size_2d.block_height; y++) {
|
||||
for (uint32_t x = 0; x < texture_info.size_2d.logical_pitch;
|
||||
x += texture_info.texel_pitch) {
|
||||
TextureSwap(texture_info.endianness, dest + x, src + x,
|
||||
texture_info.texel_pitch);
|
||||
}
|
||||
src += texture_info.size_2d.input_pitch;
|
||||
dest += texture_info.size_2d.input_pitch;
|
||||
}*/
|
||||
// std::memcpy(dest, src, unpack_length);
|
||||
} else {
|
||||
uint8_t* src = reinterpret_cast<uint8_t*>(host_base);
|
||||
uint8_t* dest = reinterpret_cast<uint8_t*>(allocation.host_ptr);
|
||||
uint32_t output_pitch =
|
||||
(texture_info.size_2d.output_width / texture_info.block_size) *
|
||||
texture_info.texel_pitch;
|
||||
auto bpp =
|
||||
(texture_info.texel_pitch >> 2) +
|
||||
((texture_info.texel_pitch >> 1) >> (texture_info.texel_pitch >> 2));
|
||||
for (uint32_t y = 0, output_base_offset = 0;
|
||||
y < texture_info.size_2d.block_height;
|
||||
y++, output_base_offset += output_pitch) {
|
||||
auto input_base_offset = TextureInfo::TiledOffset2DOuter(
|
||||
y, (texture_info.size_2d.input_width / texture_info.block_size), bpp);
|
||||
for (uint32_t x = 0, output_offset = output_base_offset;
|
||||
x < texture_info.size_2d.block_width;
|
||||
x++, output_offset += texture_info.texel_pitch) {
|
||||
auto input_offset =
|
||||
TextureInfo::TiledOffset2DInner(x, y, bpp, input_base_offset) >>
|
||||
bpp;
|
||||
TextureSwap(texture_info.endianness, dest + output_offset,
|
||||
src + input_offset * texture_info.texel_pitch,
|
||||
texture_info.texel_pitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
size_t unpack_offset = allocation.offset;
|
||||
scratch_buffer_->Commit(std::move(allocation));
|
||||
|
||||
// glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
|
||||
// glPixelStorei(GL_UNPACK_ALIGNMENT, texture_info.texel_pitch);
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, texture_info.size_2d.input_width);
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, scratch_buffer_->handle());
|
||||
if (texture_info.is_compressed) {
|
||||
glCompressedTextureSubImage2D(texture, 0, 0, 0,
|
||||
texture_info.size_2d.output_width,
|
||||
texture_info.size_2d.output_height, format,
|
||||
static_cast<GLsizei>(unpack_length),
|
||||
reinterpret_cast<void*>(unpack_offset));
|
||||
} else {
|
||||
glTextureSubImage2D(texture, 0, 0, 0, texture_info.size_2d.output_width,
|
||||
texture_info.size_2d.output_height, format, type,
|
||||
reinterpret_cast<void*>(unpack_offset));
|
||||
}
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace gl4
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,65 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_GL4_TEXTURE_CACHE_H_
|
||||
#define XENIA_GPU_GL4_TEXTURE_CACHE_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <xenia/gpu/gl4/circular_buffer.h>
|
||||
#include <xenia/gpu/gl4/gl_context.h>
|
||||
#include <xenia/gpu/sampler_info.h>
|
||||
#include <xenia/gpu/texture_info.h>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace gl4 {
|
||||
|
||||
class TextureCache {
|
||||
public:
|
||||
struct EntryView {
|
||||
SamplerInfo sampler_info;
|
||||
GLuint sampler;
|
||||
GLuint64 texture_sampler_handle;
|
||||
};
|
||||
struct Entry {
|
||||
TextureInfo texture_info;
|
||||
GLuint base_texture;
|
||||
std::vector<EntryView> views;
|
||||
};
|
||||
|
||||
TextureCache();
|
||||
~TextureCache();
|
||||
|
||||
bool Initialize(CircularBuffer* scratch_buffer);
|
||||
void Shutdown();
|
||||
void Clear();
|
||||
|
||||
EntryView* Demand(void* host_base, size_t length,
|
||||
const TextureInfo& texture_info,
|
||||
const SamplerInfo& sampler_info);
|
||||
|
||||
private:
|
||||
bool SetupTexture(GLuint texture, const TextureInfo& texture_info);
|
||||
bool SetupSampler(GLuint sampler, const TextureInfo& texture_info,
|
||||
const SamplerInfo& sampler_info);
|
||||
|
||||
bool UploadTexture2D(GLuint texture, void* host_base, size_t length,
|
||||
const TextureInfo& texture_info,
|
||||
const SamplerInfo& sampler_info);
|
||||
|
||||
CircularBuffer* scratch_buffer_;
|
||||
std::vector<Entry> entries_;
|
||||
};
|
||||
|
||||
} // namespace gl4
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_GL4_TEXTURE_CACHE_H_
|
|
@ -74,17 +74,32 @@ LRESULT WGLControl::WndProc(HWND hWnd, UINT message, WPARAM wParam,
|
|||
LPARAM lParam) {
|
||||
switch (message) {
|
||||
case WM_PAINT: {
|
||||
GLContextLock context_lock(&context_);
|
||||
// TODO(benvanik): is viewport needed?
|
||||
glViewport(0, 0, width_, height_);
|
||||
float clear_color[] = {rand() / (float)RAND_MAX, 1.0f, 0, 1.0f};
|
||||
glClearNamedFramebufferfv(0, GL_COLOR, 0, clear_color);
|
||||
if (current_paint_callback_) {
|
||||
current_paint_callback_();
|
||||
current_paint_callback_ = nullptr;
|
||||
{
|
||||
GLContextLock context_lock(&context_);
|
||||
wglSwapIntervalEXT(0);
|
||||
|
||||
// TODO(benvanik): is viewport needed?
|
||||
glViewport(0, 0, width_, height_);
|
||||
float clear_color[] = {rand() / (float)RAND_MAX, 1.0f, 0, 1.0f};
|
||||
glClearNamedFramebufferfv(0, GL_COLOR, 0, clear_color);
|
||||
|
||||
if (current_paint_callback_) {
|
||||
current_paint_callback_();
|
||||
current_paint_callback_ = nullptr;
|
||||
}
|
||||
|
||||
// TODO(benvanik): profiler present.
|
||||
// Profiler::Present();
|
||||
|
||||
// Hacky swap timer.
|
||||
static int swap_count = 0;
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
glScissor(0, 0, 20, 20);
|
||||
float red[] = {swap_count / 60.0f, 0, 0, 1.0f};
|
||||
swap_count = (swap_count + 1) % 60;
|
||||
glClearNamedFramebufferfv(0, GL_COLOR, 0, red);
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
}
|
||||
// TODO(benvanik): profiler present.
|
||||
// Profiler::Present();
|
||||
SwapBuffers(context_.dc());
|
||||
} break;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/sampler_info.h>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
bool SamplerInfo::Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
|
||||
const ucode::instr_fetch_tex_t& fetch_instr,
|
||||
SamplerInfo* out_info) {
|
||||
out_info->min_filter = static_cast<ucode::instr_tex_filter_t>(
|
||||
fetch_instr.min_filter == 3 ? fetch.min_filter : fetch_instr.min_filter);
|
||||
out_info->mag_filter = static_cast<ucode::instr_tex_filter_t>(
|
||||
fetch_instr.mag_filter == 3 ? fetch.mag_filter : fetch_instr.mag_filter);
|
||||
out_info->mip_filter = static_cast<ucode::instr_tex_filter_t>(
|
||||
fetch_instr.mip_filter == 3 ? fetch.mip_filter : fetch_instr.mip_filter);
|
||||
out_info->clamp_u = fetch.clamp_x;
|
||||
out_info->clamp_v = fetch.clamp_y;
|
||||
out_info->clamp_w = fetch.clamp_z;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_SAMPLER_INFO_H_
|
||||
#define XENIA_GPU_SAMPLER_INFO_H_
|
||||
|
||||
#include <xenia/gpu/ucode.h>
|
||||
#include <xenia/gpu/xenos.h>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
struct SamplerInfo {
|
||||
ucode::instr_tex_filter_t min_filter;
|
||||
ucode::instr_tex_filter_t mag_filter;
|
||||
ucode::instr_tex_filter_t mip_filter;
|
||||
uint32_t clamp_u;
|
||||
uint32_t clamp_v;
|
||||
uint32_t clamp_w;
|
||||
|
||||
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
|
||||
const ucode::instr_fetch_tex_t& fetch_instr,
|
||||
SamplerInfo* out_info);
|
||||
|
||||
bool operator==(const SamplerInfo& other) const {
|
||||
return min_filter == other.min_filter && mag_filter == other.mag_filter &&
|
||||
mip_filter == other.mip_filter && clamp_u == other.clamp_u &&
|
||||
clamp_v == other.clamp_v && clamp_w == other.clamp_w;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_SAMPLER_INFO_H_
|
|
@ -172,6 +172,8 @@ void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
|
|||
return;
|
||||
}
|
||||
|
||||
assert_true(vtx->const_index <= 0x1F);
|
||||
|
||||
uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel;
|
||||
auto& inputs = buffer_inputs_;
|
||||
BufferDescElement* el = nullptr;
|
||||
|
@ -240,10 +242,12 @@ void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
|
|||
void Shader::GatherTextureFetch(const instr_fetch_tex_t* tex) {
|
||||
// TODO(benvanik): check dest_swiz to see if we are writing anything.
|
||||
|
||||
assert_true(tex->const_idx < 0x1F);
|
||||
|
||||
assert_true(sampler_inputs_.count + 1 < poly::countof(sampler_inputs_.descs));
|
||||
auto& input = sampler_inputs_.descs[sampler_inputs_.count++];
|
||||
input.input_index = sampler_inputs_.count - 1;
|
||||
input.fetch_slot = tex->const_idx & 0xF; // ?
|
||||
input.fetch_slot = tex->const_idx & 0xF; // ??????????????????????????????
|
||||
input.tex_fetch = *tex;
|
||||
|
||||
// Format mangling, size estimation, etc.
|
||||
|
|
|
@ -9,8 +9,12 @@
|
|||
'register_file.cc',
|
||||
'register_file.h',
|
||||
'register_table.inc',
|
||||
'sampler_info.cc',
|
||||
'sampler_info.h',
|
||||
'shader.cc',
|
||||
'shader.h',
|
||||
'texture_info.cc',
|
||||
'texture_info.h',
|
||||
'ucode.h',
|
||||
'ucode_disassembler.cc',
|
||||
'ucode_disassembler.h',
|
||||
|
|
|
@ -0,0 +1,239 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/texture_info.h>
|
||||
|
||||
#include <poly/math.h>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
using namespace xe::gpu::ucode;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
|
||||
TextureInfo* out_info) {
|
||||
// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308051(v=vs.85).aspx
|
||||
// a2xx_sq_surfaceformat
|
||||
|
||||
auto& info = *out_info;
|
||||
info.swizzle = fetch.swizzle;
|
||||
|
||||
info.dimension = static_cast<Dimension>(fetch.dimension);
|
||||
switch (info.dimension) {
|
||||
case Dimension::k1D:
|
||||
info.width = fetch.size_1d.width;
|
||||
break;
|
||||
case Dimension::k2D:
|
||||
info.width = fetch.size_2d.width;
|
||||
info.height = fetch.size_2d.height;
|
||||
break;
|
||||
case Dimension::k3D:
|
||||
case Dimension::kCube:
|
||||
info.width = fetch.size_3d.width;
|
||||
info.height = fetch.size_3d.height;
|
||||
info.depth = fetch.size_3d.depth;
|
||||
break;
|
||||
}
|
||||
info.endianness = static_cast<Endian>(fetch.endianness);
|
||||
|
||||
info.block_size = 0;
|
||||
info.texel_pitch = 0;
|
||||
info.is_tiled = fetch.tiled;
|
||||
info.is_compressed = false;
|
||||
info.input_length = 0;
|
||||
info.format = static_cast<TextureFormat>(fetch.format);
|
||||
switch (fetch.format) {
|
||||
case FMT_8:
|
||||
info.block_size = 1;
|
||||
info.texel_pitch = 1;
|
||||
break;
|
||||
case FMT_1_5_5_5:
|
||||
info.block_size = 1;
|
||||
info.texel_pitch = 2;
|
||||
break;
|
||||
case FMT_8_8_8_8:
|
||||
case FMT_8_8_8_8_AS_16_16_16_16:
|
||||
info.block_size = 1;
|
||||
info.texel_pitch = 4;
|
||||
break;
|
||||
case FMT_4_4_4_4:
|
||||
info.block_size = 1;
|
||||
info.texel_pitch = 2;
|
||||
break;
|
||||
case FMT_16_16_16_16_FLOAT:
|
||||
info.block_size = 1;
|
||||
info.texel_pitch = 8;
|
||||
break;
|
||||
case FMT_32_FLOAT:
|
||||
info.block_size = 1;
|
||||
info.texel_pitch = 4;
|
||||
break;
|
||||
case FMT_DXT1:
|
||||
info.block_size = 4;
|
||||
info.texel_pitch = 8;
|
||||
info.is_compressed = true;
|
||||
break;
|
||||
case FMT_DXT2_3:
|
||||
case FMT_DXT4_5:
|
||||
info.block_size = 4;
|
||||
info.texel_pitch = 16;
|
||||
info.is_compressed = true;
|
||||
break;
|
||||
case FMT_DXT1_AS_16_16_16_16:
|
||||
// TODO(benvanik): conversion?
|
||||
info.block_size = 4;
|
||||
info.texel_pitch = 8;
|
||||
info.is_compressed = true;
|
||||
break;
|
||||
case FMT_DXT2_3_AS_16_16_16_16:
|
||||
case FMT_DXT4_5_AS_16_16_16_16:
|
||||
// TODO(benvanik): conversion?
|
||||
info.block_size = 4;
|
||||
info.texel_pitch = 16;
|
||||
info.is_compressed = true;
|
||||
break;
|
||||
case FMT_1_REVERSE:
|
||||
case FMT_1:
|
||||
case FMT_5_6_5:
|
||||
case FMT_6_5_5:
|
||||
case FMT_2_10_10_10:
|
||||
case FMT_8_A:
|
||||
case FMT_8_B:
|
||||
case FMT_8_8:
|
||||
case FMT_Cr_Y1_Cb_Y0:
|
||||
case FMT_Y1_Cr_Y0_Cb:
|
||||
case FMT_5_5_5_1:
|
||||
case FMT_8_8_8_8_A:
|
||||
case FMT_10_11_11:
|
||||
case FMT_11_11_10:
|
||||
case FMT_24_8:
|
||||
case FMT_24_8_FLOAT:
|
||||
case FMT_16:
|
||||
case FMT_16_16:
|
||||
case FMT_16_16_16_16:
|
||||
case FMT_16_EXPAND:
|
||||
case FMT_16_16_EXPAND:
|
||||
case FMT_16_16_16_16_EXPAND:
|
||||
case FMT_16_FLOAT:
|
||||
case FMT_16_16_FLOAT:
|
||||
case FMT_32:
|
||||
case FMT_32_32:
|
||||
case FMT_32_32_32_32:
|
||||
case FMT_32_32_FLOAT:
|
||||
case FMT_32_32_32_32_FLOAT:
|
||||
case FMT_32_AS_8:
|
||||
case FMT_32_AS_8_8:
|
||||
case FMT_16_MPEG:
|
||||
case FMT_16_16_MPEG:
|
||||
case FMT_8_INTERLACED:
|
||||
case FMT_32_AS_8_INTERLACED:
|
||||
case FMT_32_AS_8_8_INTERLACED:
|
||||
case FMT_16_INTERLACED:
|
||||
case FMT_16_MPEG_INTERLACED:
|
||||
case FMT_16_16_MPEG_INTERLACED:
|
||||
case FMT_DXN:
|
||||
case FMT_2_10_10_10_AS_16_16_16_16:
|
||||
case FMT_10_11_11_AS_16_16_16_16:
|
||||
case FMT_11_11_10_AS_16_16_16_16:
|
||||
case FMT_32_32_32_FLOAT:
|
||||
case FMT_DXT3A:
|
||||
case FMT_DXT5A:
|
||||
case FMT_CTX1:
|
||||
case FMT_DXT3A_AS_1_1_1_1:
|
||||
PLOGE("Unhandled texture format");
|
||||
return false;
|
||||
default:
|
||||
assert_unhandled_case(fetch.format);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Must be called here when we know the format.
|
||||
switch (info.dimension) {
|
||||
case Dimension::k1D:
|
||||
info.CalculateTextureSizes1D(fetch);
|
||||
break;
|
||||
case Dimension::k2D:
|
||||
info.CalculateTextureSizes2D(fetch);
|
||||
break;
|
||||
case Dimension::k3D:
|
||||
// TODO(benvanik): calculate size.
|
||||
return false;
|
||||
case Dimension::kCube:
|
||||
// TODO(benvanik): calculate size.
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureInfo::CalculateTextureSizes1D(const xe_gpu_texture_fetch_t& fetch) {
|
||||
// ?
|
||||
size_1d.width = fetch.size_1d.width;
|
||||
}
|
||||
|
||||
void TextureInfo::CalculateTextureSizes2D(const xe_gpu_texture_fetch_t& fetch) {
|
||||
size_2d.logical_width = 1 + fetch.size_2d.width;
|
||||
size_2d.logical_height = 1 + fetch.size_2d.height;
|
||||
|
||||
size_2d.block_width = size_2d.logical_width / block_size;
|
||||
size_2d.block_height = size_2d.logical_height / block_size;
|
||||
|
||||
if (!is_compressed) {
|
||||
// must be 32x32 but also must have a pitch that is a multiple of 256 bytes
|
||||
uint32_t bytes_per_block = block_size * block_size * texel_pitch;
|
||||
uint32_t width_multiple = 32;
|
||||
if (bytes_per_block) {
|
||||
uint32_t minimum_multiple = 256 / bytes_per_block;
|
||||
if (width_multiple < minimum_multiple) {
|
||||
width_multiple = minimum_multiple;
|
||||
}
|
||||
}
|
||||
size_2d.input_width = poly::round_up(size_2d.logical_width, width_multiple);
|
||||
size_2d.input_height = poly::round_up(size_2d.logical_height, 32);
|
||||
size_2d.output_width = size_2d.logical_width;
|
||||
size_2d.output_height = size_2d.logical_height;
|
||||
} else {
|
||||
// must be 128x128
|
||||
size_2d.input_width = poly::round_up(size_2d.logical_width, 128);
|
||||
size_2d.input_height = poly::round_up(size_2d.logical_height, 128);
|
||||
size_2d.output_width = poly::next_pow2(size_2d.logical_width);
|
||||
size_2d.output_height = poly::next_pow2(size_2d.logical_height);
|
||||
}
|
||||
|
||||
size_2d.logical_pitch = (size_2d.logical_width / block_size) * texel_pitch;
|
||||
size_2d.input_pitch = (size_2d.input_width / block_size) * texel_pitch;
|
||||
|
||||
if (!is_tiled) {
|
||||
input_length = size_2d.block_height * size_2d.logical_pitch;
|
||||
} else {
|
||||
input_length = size_2d.block_height * size_2d.logical_pitch; // ?
|
||||
}
|
||||
}
|
||||
|
||||
// https://code.google.com/p/crunch/source/browse/trunk/inc/crn_decomp.h#4104
|
||||
uint32_t TextureInfo::TiledOffset2DOuter(uint32_t y, uint32_t width,
|
||||
uint32_t log_bpp) {
|
||||
uint32_t macro = ((y >> 5) * (width >> 5)) << (log_bpp + 7);
|
||||
uint32_t micro = ((y & 6) << 2) << log_bpp;
|
||||
return macro + ((micro & ~15) << 1) + (micro & 15) +
|
||||
((y & 8) << (3 + log_bpp)) + ((y & 1) << 4);
|
||||
}
|
||||
|
||||
uint32_t TextureInfo::TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
|
||||
uint32_t base_offset) {
|
||||
uint32_t macro = (x >> 5) << (bpp + 7);
|
||||
uint32_t micro = (x & 7) << bpp;
|
||||
uint32_t offset = base_offset + (macro + ((micro & ~15) << 1) + (micro & 15));
|
||||
return ((offset & ~511) << 3) + ((offset & 448) << 2) + (offset & 63) +
|
||||
((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6);
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,140 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_TEXTURE_INFO_H_
|
||||
#define XENIA_GPU_TEXTURE_INFO_H_
|
||||
|
||||
#include <xenia/gpu/ucode.h>
|
||||
#include <xenia/gpu/xenos.h>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
// a2xx_sq_surfaceformat
|
||||
enum class TextureFormat : uint32_t {
|
||||
k_1_REVERSE = 0,
|
||||
k_1 = 1,
|
||||
k_8 = 2,
|
||||
k_1_5_5_5 = 3,
|
||||
k_5_6_5 = 4,
|
||||
k_6_5_5 = 5,
|
||||
k_8_8_8_8 = 6,
|
||||
k_2_10_10_10 = 7,
|
||||
k_8_A = 8,
|
||||
k_8_B = 9,
|
||||
k_8_8 = 10,
|
||||
k_Cr_Y1_Cb_Y0 = 11,
|
||||
k_Y1_Cr_Y0_Cb = 12,
|
||||
// ? hole
|
||||
k_8_8_8_8_A = 14,
|
||||
k_4_4_4_4 = 15,
|
||||
k_10_11_11 = 16,
|
||||
k_11_11_10 = 17,
|
||||
k_DXT1 = 18,
|
||||
k_DXT2_3 = 19,
|
||||
k_DXT4_5 = 20,
|
||||
// ? hole
|
||||
k_24_8 = 22,
|
||||
k_24_8_FLOAT = 23,
|
||||
k_16 = 24,
|
||||
k_16_16 = 25,
|
||||
k_16_16_16_16 = 26,
|
||||
k_16_EXPAND = 27,
|
||||
k_16_16_EXPAND = 28,
|
||||
k_16_16_16_16_EXPAND = 29,
|
||||
k_16_FLOAT = 30,
|
||||
k_16_16_FLOAT = 31,
|
||||
k_16_16_16_16_FLOAT = 32,
|
||||
k_32 = 33,
|
||||
k_32_32 = 34,
|
||||
k_32_32_32_32 = 35,
|
||||
k_32_FLOAT = 36,
|
||||
k_32_32_FLOAT = 37,
|
||||
k_32_32_32_32_FLOAT = 38,
|
||||
k_32_AS_8 = 39,
|
||||
k_32_AS_8_8 = 40,
|
||||
k_16_MPEG = 41,
|
||||
k_16_16_MPEG = 42,
|
||||
k_8_INTERLACED = 43,
|
||||
k_32_AS_8_INTERLACED = 44,
|
||||
k_32_AS_8_8_INTERLACED = 45,
|
||||
k_16_INTERLACED = 46,
|
||||
k_16_MPEG_INTERLACED = 47,
|
||||
k_16_16_MPEG_INTERLACED = 48,
|
||||
k_DXN = 49,
|
||||
k_8_8_8_8_AS_16_16_16_16 = 50,
|
||||
k_DXT1_AS_16_16_16_16 = 51,
|
||||
k_DXT2_3_AS_16_16_16_16 = 52,
|
||||
k_DXT4_5_AS_16_16_16_16 = 53,
|
||||
k_2_10_10_10_AS_16_16_16_16 = 54,
|
||||
k_10_11_11_AS_16_16_16_16 = 55,
|
||||
k_11_11_10_AS_16_16_16_16 = 56,
|
||||
k_32_32_32_FLOAT = 57,
|
||||
k_DXT3A = 58,
|
||||
k_DXT5A = 59,
|
||||
k_CTX1 = 60,
|
||||
k_DXT3A_AS_1_1_1_1 = 61,
|
||||
|
||||
kUnknown = 0xFFFFFFFFu,
|
||||
};
|
||||
|
||||
struct TextureInfo {
|
||||
uint32_t swizzle;
|
||||
Dimension dimension;
|
||||
uint32_t width;
|
||||
uint32_t height;
|
||||
uint32_t depth;
|
||||
uint32_t block_size;
|
||||
uint32_t texel_pitch;
|
||||
xenos::Endian endianness;
|
||||
bool is_tiled;
|
||||
bool is_compressed;
|
||||
uint32_t input_length;
|
||||
|
||||
TextureFormat format;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t width;
|
||||
} size_1d;
|
||||
struct {
|
||||
uint32_t logical_width;
|
||||
uint32_t logical_height;
|
||||
uint32_t block_width;
|
||||
uint32_t block_height;
|
||||
uint32_t input_width;
|
||||
uint32_t input_height;
|
||||
uint32_t output_width;
|
||||
uint32_t output_height;
|
||||
uint32_t logical_pitch;
|
||||
uint32_t input_pitch;
|
||||
} size_2d;
|
||||
struct {
|
||||
} size_3d;
|
||||
struct {
|
||||
} size_cube;
|
||||
};
|
||||
|
||||
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
|
||||
TextureInfo* out_info);
|
||||
|
||||
static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
|
||||
uint32_t log_bpp);
|
||||
static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
|
||||
uint32_t base_offset);
|
||||
|
||||
private:
|
||||
void CalculateTextureSizes1D(const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
void CalculateTextureSizes2D(const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_TEXTURE_INFO_H_
|
|
@ -35,6 +35,13 @@ enum class PrimitiveType : uint32_t {
|
|||
kQuadList = 0x0D,
|
||||
};
|
||||
|
||||
enum class Dimension : uint32_t {
|
||||
k1D = 0,
|
||||
k2D = 1,
|
||||
k3D = 2,
|
||||
kCube = 3,
|
||||
};
|
||||
|
||||
namespace xenos {
|
||||
|
||||
typedef enum {
|
||||
|
|
Loading…
Reference in New Issue