Textures and such.

This commit is contained in:
Ben Vanik 2014-12-30 14:10:30 -08:00
parent 5b2672a1b8
commit dfc260b86e
23 changed files with 1598 additions and 296 deletions

View File

@ -36,7 +36,7 @@ T align(T value, T alignment) {
// Rounds the given number up to the next highest multiple.
template <typename T, typename V>
T round_up(T value, V multiple) {
return value ? (value + multiple - 1 - (value - 1) % multiple) : multiple;
return value ? (((value + multiple - 1) / multiple) * multiple) : multiple;
}
inline float saturate(float value) {

View File

@ -27,29 +27,41 @@ CircularBuffer::CircularBuffer(size_t capacity)
gpu_base_(0),
host_base_(nullptr) {}
CircularBuffer::~CircularBuffer() {
glUnmapNamedBuffer(buffer_);
glDeleteBuffers(1, &buffer_);
}
CircularBuffer::~CircularBuffer() { Shutdown(); }
bool CircularBuffer::Initialize() {
glCreateBuffers(1, &buffer_);
glNamedBufferStorage(buffer_, capacity_, nullptr,
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
host_base_ = reinterpret_cast<uint8_t*>(glMapNamedBufferRange(
buffer_, 0, capacity_, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT |
GL_MAP_UNSYNCHRONIZED_BIT |
GL_MAP_PERSISTENT_BIT));
buffer_, 0, capacity_,
GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_PERSISTENT_BIT));
assert_not_null(host_base_);
if (!host_base_) {
return false;
}
glMakeNamedBufferResidentNV(buffer_, GL_WRITE_ONLY);
glGetNamedBufferParameterui64vNV(buffer_, GL_BUFFER_GPU_ADDRESS_NV,
&gpu_base_);
if (GLEW_NV_shader_buffer_load) {
// To use this bindlessly we must make it resident.
glMakeNamedBufferResidentNV(buffer_, GL_WRITE_ONLY);
glGetNamedBufferParameterui64vNV(buffer_, GL_BUFFER_GPU_ADDRESS_NV,
&gpu_base_);
}
return true;
}
void CircularBuffer::Shutdown() {
if (!buffer_) {
return;
}
glUnmapNamedBuffer(buffer_);
if (GLEW_NV_shader_buffer_load) {
glMakeNamedBufferNonResidentNV(buffer_);
}
glDeleteBuffers(1, &buffer_);
buffer_ = 0;
}
CircularBuffer::Allocation CircularBuffer::Acquire(size_t length) {
// Addresses must always be % 256.
length = poly::round_up(length, 256);
@ -64,6 +76,7 @@ CircularBuffer::Allocation CircularBuffer::Acquire(size_t length) {
Allocation allocation;
allocation.host_ptr = host_base_ + write_head_;
allocation.gpu_ptr = gpu_base_ + write_head_;
allocation.offset = write_head_;
allocation.length = length;
write_head_ += length;
return allocation;

View File

@ -26,10 +26,12 @@ class CircularBuffer {
struct Allocation {
void* host_ptr;
GLuint64 gpu_ptr;
size_t offset;
size_t length;
};
bool Initialize();
void Shutdown();
GLuint handle() const { return buffer_; }

View File

@ -16,6 +16,8 @@
#include <xenia/gpu/gl4/gl4_gpu-private.h>
#include <xenia/gpu/gl4/gl4_graphics_system.h>
#include <xenia/gpu/gpu-private.h>
#include <xenia/gpu/sampler_info.h>
#include <xenia/gpu/texture_info.h>
#include <xenia/gpu/xenos.h>
#include <third_party/xxhash/xxhash.h>
@ -36,7 +38,7 @@ const GLuint kAnyTarget = UINT_MAX;
// All uncached vertex/index data goes here. If it fills up we need to sync
// with the GPU, so this should be large enough to prevent that in a normal
// frame.
const size_t kScratchBufferCapacity = 64 * 1024 * 1024;
const size_t kScratchBufferCapacity = 256 * 1024 * 1024;
CommandProcessor::CachedPipeline::CachedPipeline() = default;
@ -61,6 +63,7 @@ CommandProcessor::CommandProcessor(GL4GraphicsSystem* graphics_system)
write_ptr_index_(0),
bin_select_(0xFFFFFFFFull),
bin_mask_(0xFFFFFFFFull),
has_bindless_vbos_(false),
active_vertex_shader_(nullptr),
active_pixel_shader_(nullptr),
active_framebuffer_(nullptr),
@ -152,29 +155,34 @@ void CommandProcessor::WorkerMain() {
}
bool CommandProcessor::SetupGL() {
// Uniform buffer that stores the per-draw state (constants, etc).
glCreateBuffers(1, &uniform_data_buffer_);
glBindBuffer(GL_UNIFORM_BUFFER, uniform_data_buffer_);
glNamedBufferStorage(uniform_data_buffer_, 16 * 1024, nullptr,
GL_MAP_WRITE_BIT | GL_DYNAMIC_STORAGE_BIT);
// Circular buffer holding scratch vertex/index data.
if (!scratch_buffer_.Initialize()) {
PLOGE("Unable to initialize scratch buffer");
return false;
}
// Texture cache that keeps track of any textures/samplers used.
if (!texture_cache_.Initialize(&scratch_buffer_)) {
PLOGE("Unable to initialize texture cache");
return false;
}
GLuint vertex_array;
glGenVertexArrays(1, &vertex_array);
glBindVertexArray(vertex_array);
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
if (GLEW_NV_vertex_buffer_unified_memory) {
has_bindless_vbos_ = true;
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
}
return true;
}
void CommandProcessor::ShutdownGL() {
glDeleteBuffers(1, &uniform_data_buffer_);
texture_cache_.Shutdown();
scratch_buffer_.Shutdown();
}
void CommandProcessor::InitializeRingBuffer(uint32_t ptr, uint32_t page_count) {
@ -264,6 +272,7 @@ void CommandProcessor::PrepareForWait() {
// make interrupt callbacks from the GPU so that we don't have to do a full
// synchronize here.
glFlush();
glFinish();
if (FLAGS_thread_safe_gl) {
context_->ClearCurrent();
@ -1142,6 +1151,8 @@ void CommandProcessor::PrepareDraw(DrawCommand* draw_command) {
// Generic stuff.
cmd.start_index = regs[XE_GPU_REG_VGT_INDX_OFFSET].u32;
cmd.base_vertex = 0;
cmd.state_data = nullptr;
}
bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
@ -1158,6 +1169,18 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
return IssueCopy(draw_command);
}
// TODO(benvanik): actually cache things >_>
texture_cache_.Clear();
// Allocate a state data block.
// Everything the shaders access lives here.
auto allocation = scratch_buffer_.Acquire(sizeof(UniformDataBlock));
cmd.state_data = reinterpret_cast<UniformDataBlock*>(allocation.host_ptr);
if (!cmd.state_data) {
PLOGE("Unable to allocate uniform data buffer");
return false;
}
if (!UpdateRenderTargets(draw_command)) {
PLOGE("Unable to setup render targets");
return false;
@ -1172,17 +1195,15 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
PLOGE("Unable to setup render state");
return false;
}
if (!UpdateConstants(draw_command)) {
PLOGE("Unable to update shader constants");
return false;
}
if (!UpdateShaders(draw_command)) {
PLOGE("Unable to prepare draw shaders");
return false;
}
// if (!PopulateSamplers(draw_command)) {
// XELOGE("Unable to prepare draw samplers");
// return false;
//}
if (!PopulateIndexBuffer(draw_command)) {
PLOGE("Unable to setup index buffer");
return false;
@ -1191,6 +1212,10 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
PLOGE("Unable to setup vertex buffers");
return false;
}
if (!PopulateSamplers(draw_command)) {
PLOGE("Unable to prepare draw samplers");
return false;
}
GLenum prim_type = 0;
switch (cmd.prim_type) {
@ -1228,6 +1253,7 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
break;
case PrimitiveType::kQuadList:
prim_type = GL_LINES_ADJACENCY;
return false;
/*if
(vs->DemandGeometryShader(D3D11VertexShaderResource::QUAD_LIST_SHADER,
&geometry_shader)) {
@ -1237,10 +1263,15 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
default:
case PrimitiveType::kUnknown0x07:
prim_type = GL_POINTS;
XELOGE("D3D11: unsupported primitive type %d", cmd.prim_type);
XELOGE("unsupported primitive type %d", cmd.prim_type);
break;
}
// Commit the state buffer - nothing can change after this.
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, scratch_buffer_.handle(),
allocation.offset, allocation.length);
scratch_buffer_.Commit(std::move(allocation));
// HACK HACK HACK
glDisable(GL_DEPTH_TEST);
@ -1254,13 +1285,108 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
prim_type, cmd.index_count,
cmd.index_buffer.format == IndexFormat::kInt32 ? GL_UNSIGNED_INT
: GL_UNSIGNED_SHORT,
reinterpret_cast<void*>(cmd.start_index * element_size),
reinterpret_cast<void*>(cmd.index_buffer.buffer_offset +
cmd.start_index * element_size),
cmd.base_vertex);
} else {
// Auto draw.
glDrawArrays(prim_type, cmd.start_index, cmd.index_count);
}
// Hacky draw counter.
if (false) {
static int draw_count = 0;
glEnable(GL_SCISSOR_TEST);
glScissor(20, 0, 20, 20);
float red[] = {0, draw_count / 100.0f, 0, 1.0f};
draw_count = (draw_count + 1) % 100;
glClearNamedFramebufferfv(active_framebuffer_->framebuffer, GL_COLOR, 0,
red);
glDisable(GL_SCISSOR_TEST);
}
return true;
}
bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
auto& regs = *register_file_;
auto enable_mode =
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
// RB_SURFACE_INFO
// http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
uint32_t surface_pitch = surface_info & 0x3FFF;
auto surface_msaa = static_cast<MsaaSamples>((surface_info >> 16) & 0x3);
// Get/create all color render targets, if we are using them.
// In depth-only mode we don't need them.
GLenum draw_buffers[4] = {GL_NONE, GL_NONE, GL_NONE, GL_NONE};
GLuint color_targets[4] = {kAnyTarget, kAnyTarget, kAnyTarget, kAnyTarget};
if (enable_mode == ModeControl::kColorDepth) {
uint32_t color_info[4] = {
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
regs[XE_GPU_REG_RB_COLOR2_INFO].u32,
regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
};
// A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE
uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
for (int n = 0; n < poly::countof(color_info); n++) {
uint32_t write_mask = (color_mask >> (n * 4)) & 0xF;
if (!write_mask) {
// Unused, so keep disabled and set to wildcard so we'll take any
// framebuffer that has it.
continue;
}
uint32_t color_base = color_info[n] & 0xFFF;
auto color_format =
static_cast<ColorRenderTargetFormat>((color_info[n] >> 16) & 0xF);
color_targets[n] = GetColorRenderTarget(surface_pitch, surface_msaa,
color_base, color_format);
draw_buffers[n] = GL_COLOR_ATTACHMENT0 + n;
glColorMaski(n, !!(write_mask & 0x1), !!(write_mask & 0x2),
!!(write_mask & 0x4), !!(write_mask & 0x8));
}
}
// Get/create depth buffer, but only if we are going to use it.
uint32_t depth_control = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
uint32_t stencil_ref_mask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
bool uses_depth =
(depth_control & 0x00000002) || (depth_control & 0x00000004);
uint32_t stencil_write_mask = (stencil_ref_mask & 0x00FF0000) >> 16;
bool uses_stencil = (depth_control & 0x00000001) || (stencil_write_mask != 0);
GLuint depth_target = kAnyTarget;
if (uses_depth && uses_stencil) {
uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
uint32_t depth_base = depth_info & 0xFFF;
auto depth_format =
static_cast<DepthRenderTargetFormat>((depth_info >> 16) & 0x1);
depth_target = GetDepthRenderTarget(surface_pitch, surface_msaa, depth_base,
depth_format);
// TODO(benvanik): when a game switches does it expect to keep the same
// depth buffer contents?
}
// Get/create a framebuffer with the required targets.
// Note that none may be returned if we really don't need one.
auto cached_framebuffer = GetFramebuffer(color_targets, depth_target);
active_framebuffer_ = cached_framebuffer;
if (!active_framebuffer_) {
// Nothing to do.
return true;
}
// Setup just the targets we want.
glNamedFramebufferDrawBuffers(cached_framebuffer->framebuffer, 4,
draw_buffers);
// Make active.
// TODO(benvanik): can we do this all named?
// TODO(benvanik): do we want this on READ too?
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer);
return true;
}
@ -1272,57 +1398,24 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
auto& regs = *register_file_;
union float4 {
float v[4];
struct {
float x, y, z, w;
};
};
struct UniformDataBlock {
float4 window_offset; // tx,ty,rt_w,rt_h
float4 window_scissor; // x0,y0,x1,y1
float4 viewport_offset; // tx,ty,tz,?
float4 viewport_scale; // sx,sy,sz,?
// TODO(benvanik): vertex format xyzw?
float4 alpha_test; // alpha test enable, func, ref, ?
// Register data from 0x4000 to 0x4927.
// SHADER_CONSTANT_000_X...
float4 float_consts[512];
// SHADER_CONSTANT_FETCH_00_0...
uint32_t fetch_consts[32 * 6];
// SHADER_CONSTANT_BOOL_000_031...
int32_t bool_consts[8];
// SHADER_CONSTANT_LOOP_00...
int32_t loop_consts[32];
};
static_assert(sizeof(UniformDataBlock) <= 16 * 1024,
"Need <=16k uniform data");
auto allocation = scratch_buffer_.Acquire(16 * 1024);
auto buffer_ptr = reinterpret_cast<UniformDataBlock*>(allocation.host_ptr);
if (!buffer_ptr) {
PLOGE("Unable to allocate uniform data buffer");
return false;
}
auto state_data = draw_command->state_data;
// Window parameters.
// See r200UpdateWindow:
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32;
buffer_ptr->window_offset.x = float(window_offset & 0x7FFF);
buffer_ptr->window_offset.y = float((window_offset >> 16) & 0x7FFF);
state_data->window_offset.x = float(window_offset & 0x7FFF);
state_data->window_offset.y = float((window_offset >> 16) & 0x7FFF);
uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
buffer_ptr->window_scissor.x = float(window_scissor_tl & 0x7FFF);
buffer_ptr->window_scissor.y = float((window_scissor_tl >> 16) & 0x7FFF);
buffer_ptr->window_scissor.z = float(window_scissor_br & 0x7FFF);
buffer_ptr->window_scissor.w = float((window_scissor_br >> 16) & 0x7FFF);
state_data->window_scissor.x = float(window_scissor_tl & 0x7FFF);
state_data->window_scissor.y = float((window_scissor_tl >> 16) & 0x7FFF);
state_data->window_scissor.z = float(window_scissor_br & 0x7FFF);
state_data->window_scissor.w = float((window_scissor_br >> 16) & 0x7FFF);
// HACK: no clue where to get these values.
buffer_ptr->window_offset.z = 1280;
buffer_ptr->window_offset.w = 720;
state_data->window_offset.z = 1280;
state_data->window_offset.w = 720;
// Whether each of the viewport settings is enabled.
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
@ -1338,20 +1431,20 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
vport_yoffset_enable == vport_zoffset_enable);
// Viewport scaling. Only enabled if the flags are all set.
buffer_ptr->viewport_scale.x =
state_data->viewport_scale.x =
vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1; // 640
buffer_ptr->viewport_offset.x = vport_xoffset_enable
state_data->viewport_offset.x = vport_xoffset_enable
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
: 0; // 640
buffer_ptr->viewport_scale.y = vport_yscale_enable
state_data->viewport_scale.y = vport_yscale_enable
? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
: 1; // -360
buffer_ptr->viewport_offset.y = vport_yoffset_enable
state_data->viewport_offset.y = vport_yoffset_enable
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
: 0; // 360
buffer_ptr->viewport_scale.z =
state_data->viewport_scale.z =
vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1; // 1
buffer_ptr->viewport_offset.z =
state_data->viewport_offset.z =
vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0; // 0
// VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0.
// = false: multiply the X, Y coordinates by 1/W0.
@ -1365,15 +1458,6 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
// TODO(benvanik): pass to shaders? disable transform? etc?
glViewport(0, 0, 1280, 720);
// Copy over all constants.
// TODO(benvanik): partial updates, etc. We could use shader constant access
// knowledge that we get at compile time to only upload those constants
// required.
std::memcpy(
&buffer_ptr->float_consts, &regs[XE_GPU_REG_SHADER_CONSTANT_000_X].f32,
sizeof(buffer_ptr->float_consts) + sizeof(buffer_ptr->fetch_consts) +
sizeof(buffer_ptr->loop_consts) + sizeof(buffer_ptr->bool_consts));
// Scissoring.
int32_t screen_scissor_tl = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32;
int32_t screen_scissor_br = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32;
@ -1424,10 +1508,10 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
// Deprecated in GL, implemented in shader.
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL].u32;
buffer_ptr->alpha_test.x =
state_data->alpha_test.x =
(color_control & 0x4) ? 1.0f : 0.0f; // ALPAHTESTENABLE
buffer_ptr->alpha_test.y = float(color_control & 0x3); // ALPHAFUNC
buffer_ptr->alpha_test.z = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
state_data->alpha_test.y = float(color_control & 0x3); // ALPHAFUNC
state_data->alpha_test.z = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
static const GLenum blend_map[] = {
/* 0 */ GL_ZERO,
@ -1575,91 +1659,23 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
stencil_op_map[(depth_control & 0x0001C000) >> 14]);
}
// Stash - program setup will bind this to uniforms.
draw_command->state_data_gpu_ptr = allocation.gpu_ptr;
scratch_buffer_.Commit(std::move(allocation));
return true;
}
bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
bool CommandProcessor::UpdateConstants(DrawCommand* draw_command) {
auto& regs = *register_file_;
auto state_data = draw_command->state_data;
auto enable_mode =
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
// TODO(benvanik): partial updates, etc. We could use shader constant access
// knowledge that we get at compile time to only upload those constants
// required. If we did this as a variable length then we could really cut
// down on state block sizes.
// RB_SURFACE_INFO
// http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
uint32_t surface_pitch = surface_info & 0x3FFF;
auto surface_msaa = static_cast<MsaaSamples>((surface_info >> 16) & 0x3);
// Get/create all color render targets, if we are using them.
// In depth-only mode we don't need them.
GLenum draw_buffers[4] = {GL_NONE, GL_NONE, GL_NONE, GL_NONE};
GLuint color_targets[4] = {kAnyTarget, kAnyTarget, kAnyTarget, kAnyTarget};
if (enable_mode == ModeControl::kColorDepth) {
uint32_t color_info[4] = {
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
regs[XE_GPU_REG_RB_COLOR2_INFO].u32,
regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
};
// A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE
uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
for (int n = 0; n < poly::countof(color_info); n++) {
uint32_t write_mask = (color_mask >> (n * 4)) & 0xF;
if (!write_mask) {
// Unused, so keep disabled and set to wildcard so we'll take any
// framebuffer that has it.
continue;
}
uint32_t color_base = color_info[n] & 0xFFF;
auto color_format =
static_cast<ColorRenderTargetFormat>((color_info[n] >> 16) & 0xF);
color_targets[n] = GetColorRenderTarget(surface_pitch, surface_msaa,
color_base, color_format);
draw_buffers[n] = GL_COLOR_ATTACHMENT0 + n;
glColorMaski(n, !!(write_mask & 0x1), !!(write_mask & 0x2),
!!(write_mask & 0x4), !!(write_mask & 0x8));
}
}
// Get/create depth buffer, but only if we are going to use it.
uint32_t depth_control = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
uint32_t stencil_ref_mask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
bool uses_depth =
(depth_control & 0x00000002) || (depth_control & 0x00000004);
uint32_t stencil_write_mask = (stencil_ref_mask & 0x00FF0000) >> 16;
bool uses_stencil = (depth_control & 0x00000001) || (stencil_write_mask != 0);
GLuint depth_target = kAnyTarget;
if (uses_depth && uses_stencil) {
uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
uint32_t depth_base = depth_info & 0xFFF;
auto depth_format =
static_cast<DepthRenderTargetFormat>((depth_info >> 16) & 0x1);
depth_target = GetDepthRenderTarget(surface_pitch, surface_msaa, depth_base,
depth_format);
// TODO(benvanik): when a game switches does it expect to keep the same
// depth buffer contents?
}
// Get/create a framebuffer with the required targets.
// Note that none may be returned if we really don't need one.
auto cached_framebuffer = GetFramebuffer(color_targets, depth_target);
active_framebuffer_ = cached_framebuffer;
if (!active_framebuffer_) {
// Nothing to do.
return true;
}
// Setup just the targets we want.
glNamedFramebufferDrawBuffers(cached_framebuffer->framebuffer, 4,
draw_buffers);
// Make active.
// TODO(benvanik): can we do this all named?
// TODO(benvanik): do we want this on READ too?
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer);
// Copy over all constants.
std::memcpy(
&state_data->float_consts, &regs[XE_GPU_REG_SHADER_CONSTANT_000_X].f32,
sizeof(state_data->float_consts) + sizeof(state_data->fetch_consts) +
sizeof(state_data->loop_consts) + sizeof(state_data->bool_consts));
return true;
}
@ -1718,28 +1734,10 @@ bool CommandProcessor::UpdateShaders(DrawCommand* draw_command) {
glUseProgramStages(pipeline, GL_GEOMETRY_SHADER_BIT, geometry_program);
glUseProgramStages(pipeline, GL_FRAGMENT_SHADER_BIT, fragment_program);
// HACK: layout(location=0) on a bindless uniform crashes nvidia driver.
GLint vertex_state_loc = glGetUniformLocation(vertex_program, "state");
assert_true(vertex_state_loc == 0);
GLint geometry_state_loc =
geometry_program ? glGetUniformLocation(geometry_program, "state") : -1;
assert_true(geometry_state_loc == -1 || geometry_state_loc == 0);
GLint fragment_state_loc = glGetUniformLocation(fragment_program, "state");
assert_true(fragment_state_loc == -1 || fragment_state_loc == 0);
cached_pipeline->handles.default_pipeline = pipeline;
}
// TODO(benvanik): do we need to do this for all stages if the locations
// match?
glProgramUniformHandleui64ARB(vertex_program, 0, cmd.state_data_gpu_ptr);
/*if (geometry_program && geometry_state_loc != -1) {
glProgramUniformHandleui64ARB(geometry_program, 0, cmd.state_data_gpu_ptr);
}*/
/*if (fragment_state_loc != -1) {
glProgramUniformHandleui64ARB(fragment_program, 0,
cmd.state_data_gpu_ptr);
}*/
// NOTE: we don't yet have our state data pointer - that comes at the end.
glBindProgramPipeline(cached_pipeline->handles.default_pipeline);
@ -1759,10 +1757,10 @@ bool CommandProcessor::PopulateIndexBuffer(DrawCommand* draw_command) {
assert_true(info.endianness == Endian::k8in16 ||
info.endianness == Endian::k8in32);
auto allocation = scratch_buffer_.Acquire(cmd.index_count *
(info.format == IndexFormat::kInt32
? sizeof(uint32_t)
: sizeof(uint16_t)));
size_t total_size =
cmd.index_count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
: sizeof(uint16_t));
auto allocation = scratch_buffer_.Acquire(total_size);
if (info.format == IndexFormat::kInt32) {
poly::copy_and_swap_32_aligned(
@ -1776,9 +1774,14 @@ bool CommandProcessor::PopulateIndexBuffer(DrawCommand* draw_command) {
cmd.index_count);
}
glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, allocation.gpu_ptr,
allocation.length);
if (has_bindless_vbos_) {
glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, allocation.gpu_ptr,
allocation.length);
} else {
// Offset is used in glDrawElements.
cmd.index_buffer.buffer_offset = allocation.offset;
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, scratch_buffer_.handle());
}
scratch_buffer_.Commit(std::move(allocation));
return true;
@ -1792,7 +1795,8 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) {
const auto& buffer_inputs = active_vertex_shader_->buffer_inputs();
for (size_t n = 0; n < buffer_inputs.count; n++) {
uint32_t el_index = 0;
for (uint32_t n = 0; n < buffer_inputs.count; n++) {
const auto& desc = buffer_inputs.descs[n];
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6;
@ -1826,7 +1830,11 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) {
reinterpret_cast<const uint32_t*>(membase_ + (fetch->address << 2)),
fetch->size);
uint32_t el_index = 0;
if (!has_bindless_vbos_) {
glBindVertexBuffer(n, scratch_buffer_.handle(), allocation.offset,
desc.stride_words * 4);
}
for (uint32_t i = 0; i < desc.element_count; ++i) {
const auto& el = desc.elements[i];
auto comp_count = GetVertexFormatComponentCount(el.format);
@ -1882,13 +1890,19 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) {
assert_unhandled_case(el.format);
break;
}
size_t offset = el.offset_words * sizeof(uint32_t);
glEnableVertexAttribArray(el_index);
glVertexAttribFormatNV(el_index, comp_count, comp_type, el.is_normalized,
desc.stride_words * sizeof(uint32_t));
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, el_index,
allocation.gpu_ptr + offset,
allocation.length - offset);
if (has_bindless_vbos_) {
glVertexAttribFormatNV(el_index, comp_count, comp_type,
el.is_normalized,
desc.stride_words * sizeof(uint32_t));
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, el_index,
allocation.gpu_ptr + (el.offset_words * 4),
allocation.length - (el.offset_words * 4));
} else {
glVertexAttribBinding(el_index, n);
glVertexAttribFormat(el_index, comp_count, comp_type, el.is_normalized,
el.offset_words * 4);
}
++el_index;
}
@ -1899,6 +1913,82 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) {
return true;
}
bool CommandProcessor::PopulateSamplers(DrawCommand* draw_command) {
SCOPE_profile_cpu_f("gpu");
auto& regs = *register_file_;
// VS and PS samplers are shared, but may be used exclusively.
// We walk each and setup lazily.
bool has_setup_sampler[32] = {false};
// Vertex texture samplers.
const auto& vertex_sampler_inputs = active_vertex_shader_->sampler_inputs();
for (size_t i = 0; i < vertex_sampler_inputs.count; ++i) {
const auto& desc = vertex_sampler_inputs.descs[i];
if (has_setup_sampler[desc.fetch_slot]) {
continue;
}
has_setup_sampler[desc.fetch_slot] = true;
if (!PopulateSampler(draw_command, desc)) {
return false;
}
}
// Pixel shader texture sampler.
const auto& pixel_sampler_inputs = active_pixel_shader_->sampler_inputs();
for (size_t i = 0; i < pixel_sampler_inputs.count; ++i) {
const auto& desc = pixel_sampler_inputs.descs[i];
if (has_setup_sampler[desc.fetch_slot]) {
continue;
}
has_setup_sampler[desc.fetch_slot] = true;
if (!PopulateSampler(draw_command, desc)) {
return false;
}
}
return true;
}
bool CommandProcessor::PopulateSampler(DrawCommand* draw_command,
const Shader::SamplerDesc& desc) {
auto& regs = *register_file_;
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + desc.fetch_slot * 6;
auto group = reinterpret_cast<const xe_gpu_fetch_group_t*>(&regs.values[r]);
auto& fetch = group->texture_fetch;
// ?
assert_true(fetch.type == 0x2);
TextureInfo texture_info;
if (!TextureInfo::Prepare(fetch, &texture_info)) {
XELOGE("Unable to parse texture fetcher info");
return false; // invalid texture used
}
SamplerInfo sampler_info;
if (!SamplerInfo::Prepare(fetch, desc.tex_fetch, &sampler_info)) {
XELOGE("Unable to parse sampler info");
return false; // invalid texture used
}
uint32_t guest_base = fetch.address << 12;
void* host_base = membase_ + guest_base;
auto entry_view = texture_cache_.Demand(host_base, texture_info.input_length,
texture_info, sampler_info);
if (!entry_view) {
// Unable to create/fetch/etc.
XELOGE("Failed to demand texture");
return false;
}
// Shaders will use bindless to fetch right from it.
draw_command->state_data->texture_samplers[desc.fetch_slot] =
entry_view->texture_sampler_handle;
return true;
}
bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
auto& regs = *register_file_;
@ -2045,7 +2135,7 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
case CopyCommand::kConstantOne:
case CopyCommand::kNull:
default:
assert_unhandled_case(copy_command);
// assert_unhandled_case(copy_command);
return false;
}
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);

View File

@ -20,6 +20,7 @@
#include <xenia/gpu/gl4/circular_buffer.h>
#include <xenia/gpu/gl4/gl_context.h>
#include <xenia/gpu/gl4/gl4_shader.h>
#include <xenia/gpu/gl4/texture_cache.h>
#include <xenia/gpu/register_file.h>
#include <xenia/gpu/xenos.h>
#include <xenia/memory.h>
@ -40,6 +41,39 @@ struct SwapParameters {
GLenum attachment;
};
// This must match the layout in gl4_shader.cc.
struct UniformDataBlock {
union float4 {
float v[4];
struct {
float x, y, z, w;
};
};
float4 window_offset; // tx,ty,rt_w,rt_h
float4 window_scissor; // x0,y0,x1,y1
float4 viewport_offset; // tx,ty,tz,?
float4 viewport_scale; // sx,sy,sz,?
// TODO(benvanik): vertex format xyzw?
float4 alpha_test; // alpha test enable, func, ref, ?
// TODO(benvanik): overlay with fetch_consts below?
uint64_t texture_samplers[32];
// Register data from 0x4000 to 0x4927.
// SHADER_CONSTANT_000_X...
float4 float_consts[512];
// SHADER_CONSTANT_FETCH_00_0...
uint32_t fetch_consts[32 * 6];
// SHADER_CONSTANT_BOOL_000_031...
int32_t bool_consts[8];
// SHADER_CONSTANT_LOOP_00...
int32_t loop_consts[32];
};
static_assert(sizeof(UniformDataBlock) <= 16 * 1024,
"Need <=16k uniform data");
// TODO(benvanik): move more of the enums in here?
struct DrawCommand {
PrimitiveType prim_type;
@ -54,6 +88,7 @@ struct DrawCommand {
size_t size;
xenos::Endian endianness;
xenos::IndexFormat format;
size_t buffer_offset;
} index_buffer;
// Texture samplers.
@ -63,11 +98,9 @@ struct DrawCommand {
// SamplerStateResource* sampler_state;
};
SamplerInput vertex_shader_samplers[32];
size_t vertex_shader_sampler_count;
SamplerInput pixel_shader_samplers[32];
size_t pixel_shader_sampler_count;
GLuint64 state_data_gpu_ptr;
UniformDataBlock* state_data;
};
class CommandProcessor {
@ -195,11 +228,15 @@ class CommandProcessor {
void PrepareDraw(DrawCommand* draw_command);
bool IssueDraw(DrawCommand* draw_command);
bool UpdateState(DrawCommand* draw_command);
bool UpdateRenderTargets(DrawCommand* draw_command);
bool UpdateState(DrawCommand* draw_command);
bool UpdateConstants(DrawCommand* draw_command);
bool UpdateShaders(DrawCommand* draw_command);
bool PopulateIndexBuffer(DrawCommand* draw_command);
bool PopulateVertexBuffers(DrawCommand* draw_command);
bool PopulateSamplers(DrawCommand* draw_command);
bool PopulateSampler(DrawCommand* draw_command,
const Shader::SamplerDesc& desc);
bool IssueCopy(DrawCommand* draw_command);
CachedFramebuffer* GetFramebuffer(GLuint color_targets[4],
@ -237,7 +274,7 @@ class CommandProcessor {
uint64_t bin_select_;
uint64_t bin_mask_;
GLuint uniform_data_buffer_;
bool has_bindless_vbos_;
std::vector<std::unique_ptr<GL4Shader>> all_shaders_;
std::unordered_map<uint64_t, GL4Shader*> shader_cache_;
@ -251,7 +288,7 @@ class CommandProcessor {
std::vector<CachedDepthRenderTarget> cached_depth_render_targets_;
std::vector<std::unique_ptr<CachedPipeline>> all_pipelines_;
std::unordered_map<uint64_t, CachedPipeline*> cached_pipelines_;
TextureCache texture_cache_;
CircularBuffer scratch_buffer_;
DrawCommand draw_command_;

View File

@ -17,6 +17,9 @@
DECLARE_bool(thread_safe_gl);
DECLARE_bool(gl_debug_output);
DECLARE_bool(gl_debug_output_synchronous);
namespace xe {
namespace gpu {
namespace gl4 {

View File

@ -15,6 +15,10 @@
DEFINE_bool(thread_safe_gl, false,
"Only allow one GL context to be active at a time.");
DEFINE_bool(gl_debug_output, false, "Dump ARB_debug_output to stderr.");
DEFINE_bool(gl_debug_output_synchronous, true,
"ARB_debug_output will synchronize to be thread safe.");
namespace xe {
namespace gpu {
namespace gl4 {

View File

@ -35,7 +35,6 @@ const std::string header =
"#extension GL_ARB_explicit_uniform_location : require\n"
"#extension GL_ARB_shading_language_420pack : require\n"
"#extension GL_ARB_shader_storage_buffer_object : require\n"
"#extension GL_NV_shader_buffer_load : require\n"
"precision highp float;\n"
"precision highp int;\n"
"layout(std140, column_major) uniform;\n"
@ -46,6 +45,7 @@ const std::string header =
" vec4 viewport_offset;\n"
" vec4 viewport_scale;\n"
" vec4 alpha_test;\n"
" uvec2 texture_samplers[32];\n"
" vec4 float_consts[512];\n"
" uint fetch_consts[32 * 6];\n"
" int bool_consts[8];\n"
@ -55,7 +55,9 @@ const std::string header =
" vec4 o[16];\n"
"};\n"
"\n"
"uniform StateData* state;\n";
"layout(binding = 0) buffer State {\n"
" StateData state;\n"
"};\n";
bool GL4Shader::PrepareVertexShader(
const xenos::xe_gpu_program_cntl_t& program_cntl) {
@ -69,20 +71,20 @@ bool GL4Shader::PrepareVertexShader(
// TODO(benvanik): piecewise viewport_enable -> offset/scale logic.
" if (false) {\n"
" } else {\n"
/*" pos.xy = pos.xy / vec2(state->window_offset.z / 2.0, "
"-state->window_offset.w / 2.0) + vec2(-1.0, 1.0);\n"
/*" pos.xy = pos.xy / vec2(state.window_offset.z / 2.0, "
"-state.window_offset.w / 2.0) + vec2(-1.0, 1.0);\n"
" pos.zw = vec2(0.0, 1.0);\n"*/
" pos.xy = pos.xy / vec2(1280.0 / 2.0, "
"-720.0 / 2.0) + vec2(-1.0, 1.0);\n"
" //pos.zw = vec2(0.0, 1.0);\n"
" }\n"
" pos.x = pos.x * state->viewport_scale.x + \n"
" state->viewport_offset.x;\n"
" pos.y = pos.y * state->viewport_scale.y + \n"
" state->viewport_offset.y;\n"
" pos.z = pos.z * state->viewport_scale.z + \n"
" state->viewport_offset.z;\n"
" pos.xy += state->window_offset.xy;\n"
" pos.x = pos.x * state.viewport_scale.x + \n"
" state.viewport_offset.x;\n"
" pos.y = pos.y * state.viewport_scale.y + \n"
" state.viewport_offset.y;\n"
" pos.z = pos.z * state.viewport_scale.z + \n"
" state.viewport_offset.z;\n"
" pos.xy += state.window_offset.xy;\n"
" return pos;\n"
"}\n";
std::string source =
@ -105,6 +107,8 @@ bool GL4Shader::PrepareVertexShader(
" gl_Position = applyViewport(gl_Position);\n"
"}\n";
// glGetTextureSamplerHandleARB()
std::string translated_source =
shader_translator_.TranslateVertexShader(this, program_cntl);
if (translated_source.empty()) {
@ -135,9 +139,9 @@ bool GL4Shader::PreparePixelShader(
"void processFragment();\n"
"void main() {\n"
" for (int i = 0; i < oC.length(); ++i) {\n"
" oC[i] = vec4(0.0, 0.0, 0.0, 0.0);\n"
" oC[i] = vec4(1.0, 0.0, 0.0, 1.0);\n"
" }\n" +
(program_cntl.ps_export_depth ? " gl_FragDepth = 0.0\n" : "") +
(program_cntl.ps_export_depth ? " gl_FragDepth = 0.0;\n" : "") +
" processFragment();\n"
"}\n";

View File

@ -28,25 +28,21 @@ static const char chan_names[] = {
const char* GetVertexFormatTypeName(const GL4Shader::BufferDescElement& el) {
switch (el.format) {
case VertexFormat::k_32:
return el.is_signed ? "int" : "uint";
case VertexFormat::k_32_FLOAT:
return "float";
case VertexFormat::k_16_16:
case VertexFormat::k_32_32:
return el.is_signed ? "ivec2" : "uvec2";
case VertexFormat::k_16_16_FLOAT:
case VertexFormat::k_32_32_FLOAT:
return "vec2";
case VertexFormat::k_10_11_11:
case VertexFormat::k_11_11_10:
return "int3"; // ?
case VertexFormat::k_32_32_32_FLOAT:
return "vec3";
case VertexFormat::k_8_8_8_8:
case VertexFormat::k_2_10_10_10:
case VertexFormat::k_16_16_16_16:
case VertexFormat::k_32_32_32_32:
return el.is_signed ? "ivec4" : "uvec4";
case VertexFormat::k_16_16_16_16_FLOAT:
case VertexFormat::k_32_32_32_32_FLOAT:
return "vec4";
@ -58,14 +54,13 @@ const char* GetVertexFormatTypeName(const GL4Shader::BufferDescElement& el) {
}
GL4ShaderTranslator::GL4ShaderTranslator()
: output_(kOutputCapacity), tex_fetch_index_(0), dwords_(nullptr) {}
: output_(kOutputCapacity), dwords_(nullptr) {}
GL4ShaderTranslator::~GL4ShaderTranslator() = default;
void GL4ShaderTranslator::Reset(GL4Shader* shader) {
output_.Reset();
shader_type_ = shader->type();
tex_fetch_index_ = 0;
dwords_ = shader->data();
}
@ -76,8 +71,6 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
// Normal shaders only, for now.
assert_true(program_cntl.vs_export_mode == 0);
AppendTextureHeader(vertex_shader->sampler_inputs());
// Add vertex shader input.
uint32_t el_index = 0;
const auto& buffer_inputs = vertex_shader->buffer_inputs();
@ -102,7 +95,7 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
// Add temporaries for any registers we may use.
uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
for (uint32_t n = 0; n <= temp_regs; n++) {
Append(" vec4 r%d = state->float_consts[%d];\n", n, n);
Append(" vec4 r%d = state.float_consts[%d];\n", n, n);
}
Append(" vec4 t;\n");
@ -129,15 +122,13 @@ std::string GL4ShaderTranslator::TranslatePixelShader(
// If the same PS is used with different VS that output different amounts
// (and less than the number of required registers), things may die.
AppendTextureHeader(pixel_shader->sampler_inputs());
// Pixel shader main() header.
Append("void processFragment() {\n");
// Add temporary registers.
uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
for (uint32_t n = 0; n <= std::max(15u, temp_regs); n++) {
Append(" vec4 r%d = state->float_consts[%d];\n", n, n + 256);
Append(" vec4 r%d = state.float_consts[%d];\n", n, n + 256);
}
Append(" vec4 t;\n");
Append(" float s;\n"); // scalar result (used for RETAIN_PREV)
@ -161,42 +152,6 @@ std::string GL4ShaderTranslator::TranslatePixelShader(
return output_.to_string();
}
void GL4ShaderTranslator::AppendTextureHeader(
const GL4Shader::SamplerInputs& sampler_inputs) {
bool fetch_setup[32] = {false};
// 1 texture per constant slot, 1 sampler per fetch.
for (uint32_t n = 0; n < sampler_inputs.count; n++) {
const auto& input = sampler_inputs.descs[n];
const auto& fetch = input.tex_fetch;
// Add texture, if needed.
if (!fetch_setup[fetch.const_idx]) {
fetch_setup[fetch.const_idx] = true;
const char* texture_type = nullptr;
switch (fetch.dimension) {
case DIMENSION_1D:
texture_type = "Texture1D";
break;
default:
case DIMENSION_2D:
texture_type = "Texture2D";
break;
case DIMENSION_3D:
texture_type = "Texture3D";
break;
case DIMENSION_CUBE:
texture_type = "TextureCube";
break;
}
Append("%s x_texture_%d;\n", texture_type, fetch.const_idx);
}
// Add sampler.
Append("SamplerState x_sampler_%d;\n", n);
}
}
void GL4ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type,
uint32_t swiz, uint32_t negate,
uint32_t abs_constants) {
@ -217,7 +172,7 @@ void GL4ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type,
if (abs_constants) {
Append("abs(");
}
Append("state->float_consts[%u]", is_pixel_shader() ? num + 256 : num);
Append("state.float_consts[%u]", is_pixel_shader() ? num + 256 : num);
if (abs_constants) {
Append(")");
}
@ -258,9 +213,12 @@ void GL4ShaderTranslator::AppendDestRegName(uint32_t num, uint32_t dst_exp) {
case 0:
Append("oC[0]");
break;
case 61:
// Write to t, as we need to splice just x out of it.
Append("t");
break;
default:
// TODO(benvanik): other render targets?
// TODO(benvanik): depth?
assert_always();
break;
}
@ -282,7 +240,10 @@ void GL4ShaderTranslator::AppendDestReg(uint32_t num, uint32_t mask,
void GL4ShaderTranslator::AppendDestRegPost(uint32_t num, uint32_t mask,
uint32_t dst_exp) {
if (mask != 0xF) {
if (num == 61) {
// gl_FragDepth handling to just get x from the temp result.
Append(" gl_FragDepth = t.x;\n");
} else if (mask != 0xF) {
// Masking.
Append(" ");
AppendDestRegName(num, dst_exp);
@ -399,7 +360,7 @@ bool GL4ShaderTranslator::TranslateALU_ADDv(const instr_alu_t& alu) {
alu.abs_constants);
Append(")");
if (alu.vector_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -685,7 +646,7 @@ bool GL4ShaderTranslator::TranslateALU_DOT4v(const instr_alu_t& alu) {
if (alu.vector_clamp) {
Append(", 0.0, 1.0)");
}
Append(";\n");
Append(".xxxx;\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
return true;
}
@ -706,7 +667,7 @@ bool GL4ShaderTranslator::TranslateALU_DOT3v(const instr_alu_t& alu) {
if (alu.vector_clamp) {
Append(", 0.0, 1.0)");
}
Append(";\n");
Append(".xxxx;\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
return true;
}
@ -730,7 +691,7 @@ bool GL4ShaderTranslator::TranslateALU_DOT2ADDv(const instr_alu_t& alu) {
if (alu.vector_clamp) {
Append(", 0.0, 1.0)");
}
Append(";\n");
Append(".xxxx;\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
return true;
}
@ -1402,20 +1363,27 @@ bool GL4ShaderTranslator::TranslateVertexFetch(const instr_fetch_vtx_t* vtx,
bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex,
int sync) {
int src_component_count = 0;
const char* sampler_type;
switch (tex->dimension) {
case DIMENSION_1D:
src_component_count = 1;
sampler_type = "sampler1D";
break;
default:
case DIMENSION_2D:
src_component_count = 2;
sampler_type = "sampler2D";
break;
case DIMENSION_3D:
src_component_count = 3;
sampler_type = "sampler3D";
break;
case DIMENSION_CUBE:
src_component_count = 3;
sampler_type = "samplerCube";
break;
default:
assert_unhandled_case(tex->dimension);
return false;
}
// Disassemble.
@ -1500,10 +1468,10 @@ bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex,
Append("\n");
// Translate.
Append(" t = ");
Append("x_texture_%d.Sample(x_sampler_%d, r%u.", tex->const_idx,
tex_fetch_index_++, // hacky way to line up to tex buffers
tex->src_reg);
// TODO(benvanik): if sampler == null, set to invalid color.
Append(" t = texture(");
Append("%s(state.texture_samplers[%d])", sampler_type, tex->const_idx & 0xF);
Append(", r%u.", tex->src_reg);
src_swiz = tex->src_swiz;
for (int i = 0; i < src_component_count; i++) {
Append("%c", chan_names[src_swiz & 0x3]);
@ -1511,6 +1479,26 @@ bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex,
}
Append(");\n");
// Output texture coordinates as color.
// TODO(benvanik): only if texture is invalid?
// Append(" t = vec4(r%u.", tex->src_reg);
// src_swiz = tex->src_swiz;
// for (int i = 0; i < src_component_count; i++) {
// Append("%c", chan_names[src_swiz & 0x3]);
// src_swiz >>= 2;
//}
// switch (src_component_count) {
// case 1:
// Append(", 0.0, 0.0, 1.0);\n");
// break;
// case 2:
// Append(", 0.0, 1.0);\n");
// break;
// case 3:
// Append(", 1.0);\n");
// break;
//}
Append(" r%u.xyzw = vec4(", tex->dst_reg);
uint32_t dst_swiz = tex->dst_swiz;
for (int i = 0; i < 4; i++) {
@ -1524,6 +1512,7 @@ bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex,
} else if ((dst_swiz & 0x7) == 6) {
// ?
Append("?");
assert_always();
} else if ((dst_swiz & 0x7) == 7) {
Append("r%u.%c", tex->dst_reg, chan_names[i]);
} else {

View File

@ -39,7 +39,6 @@ class GL4ShaderTranslator {
protected:
ShaderType shader_type_;
uint32_t tex_fetch_index_;
const uint32_t* dwords_;
static const int kOutputCapacity = 64 * 1024;
@ -56,8 +55,6 @@ class GL4ShaderTranslator {
va_end(args);
}
void AppendTextureHeader(const GL4Shader::SamplerInputs& sampler_inputs);
void AppendSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate,
uint32_t abs);
void AppendDestRegName(uint32_t num, uint32_t dst_exp);

View File

@ -115,6 +115,8 @@ bool GLContext::Initialize(HWND hwnd) {
// Clearing errors.
}
SetupDebugging();
ClearCurrent();
return true;
@ -160,11 +162,120 @@ std::unique_ptr<GLContext> GLContext::CreateShared() {
return nullptr;
}
SetupDebugging();
new_context->ClearCurrent();
return new_context;
}
void GLContext::DebugMessage(GLenum source, GLenum type, GLuint id,
GLenum severity, GLsizei length,
const GLchar* message) {
const char* source_name = nullptr;
switch (source) {
case GL_DEBUG_SOURCE_API_ARB:
source_name = "OpenGL";
break;
case GL_DEBUG_SOURCE_WINDOW_SYSTEM_ARB:
source_name = "Windows";
break;
case GL_DEBUG_SOURCE_SHADER_COMPILER_ARB:
source_name = "Shader Compiler";
break;
case GL_DEBUG_SOURCE_THIRD_PARTY_ARB:
source_name = "Third Party";
break;
case GL_DEBUG_SOURCE_APPLICATION_ARB:
source_name = "Application";
break;
case GL_DEBUG_SOURCE_OTHER_ARB:
source_name = "Other";
break;
default:
source_name = "(unknown source)";
break;
}
const char* type_name = nullptr;
switch (type) {
case GL_DEBUG_TYPE_ERROR:
type_name = "error";
break;
case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR:
type_name = "deprecated behavior";
break;
case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR:
type_name = "undefined behavior";
break;
case GL_DEBUG_TYPE_PORTABILITY:
type_name = "portability";
break;
case GL_DEBUG_TYPE_PERFORMANCE:
type_name = "performance";
break;
case GL_DEBUG_TYPE_OTHER:
type_name = "message";
break;
case GL_DEBUG_TYPE_MARKER:
type_name = "marker";
break;
case GL_DEBUG_TYPE_PUSH_GROUP:
type_name = "push group";
break;
case GL_DEBUG_TYPE_POP_GROUP:
type_name = "pop group";
break;
default:
type_name = "(unknown type)";
break;
}
const char* severity_name = nullptr;
switch (severity) {
case GL_DEBUG_SEVERITY_HIGH_ARB:
severity_name = "high";
break;
case GL_DEBUG_SEVERITY_MEDIUM_ARB:
severity_name = "medium";
break;
case GL_DEBUG_SEVERITY_LOW_ARB:
severity_name = "low";
break;
case GL_DEBUG_SEVERITY_NOTIFICATION:
severity_name = "notification";
break;
default:
severity_name = "(unknown severity)";
break;
}
XELOGE("GL4 %s: %s(%s) %d: %s", source_name, type_name, severity_name, id,
message);
}
void GLAPIENTRY
GLContext::DebugMessageThunk(GLenum source, GLenum type, GLuint id,
GLenum severity, GLsizei length,
const GLchar* message, GLvoid* user_param) {
reinterpret_cast<GLContext*>(user_param)
->DebugMessage(source, type, id, severity, length, message);
}
void GLContext::SetupDebugging() {
if (!FLAGS_gl_debug_output) {
return;
}
glEnable(GL_DEBUG_OUTPUT);
if (FLAGS_gl_debug_output_synchronous) {
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
}
glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL,
GL_TRUE);
glDebugMessageCallback(reinterpret_cast<GLDEBUGPROC>(&DebugMessageThunk),
this);
}
bool GLContext::MakeCurrent() {
if (FLAGS_thread_safe_gl) {
global_gl_mutex_.lock();

View File

@ -35,6 +35,13 @@ class GLContext {
void ClearCurrent();
private:
void SetupDebugging();
void DebugMessage(GLenum source, GLenum type, GLuint id, GLenum severity,
GLsizei length, const GLchar* message);
static void GLAPIENTRY
DebugMessageThunk(GLenum source, GLenum type, GLuint id, GLenum severity,
GLsizei length, const GLchar* message, GLvoid* user_param);
HWND hwnd_;
HDC dc_;
HGLRC glrc_;

View File

@ -16,6 +16,8 @@
'gl4_shader_translator.h',
'gl_context.cc',
'gl_context.h',
'texture_cache.cc',
'texture_cache.h',
],
'conditions': [

View File

@ -0,0 +1,497 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/gl4/texture_cache.h>
#include <poly/assert.h>
#include <poly/math.h>
#include <xenia/gpu/gpu-private.h>
namespace xe {
namespace gpu {
namespace gl4 {
using namespace xe::gpu::xenos;
extern "C" GLEWContext* glewGetContext();
extern "C" WGLEWContext* wglewGetContext();
TextureCache::TextureCache() {
//
}
TextureCache::~TextureCache() { Shutdown(); }
bool TextureCache::Initialize(CircularBuffer* scratch_buffer) {
scratch_buffer_ = scratch_buffer;
return true;
}
void TextureCache::Shutdown() {
Clear();
//
}
void TextureCache::Clear() {
for (auto& entry : entries_) {
for (auto& view : entry.views) {
glMakeTextureHandleNonResidentARB(view.texture_sampler_handle);
glDeleteSamplers(1, &view.sampler);
}
glDeleteTextures(1, &entry.base_texture);
}
entries_.clear();
}
TextureCache::EntryView* TextureCache::Demand(void* host_base, size_t length,
const TextureInfo& texture_info,
const SamplerInfo& sampler_info) {
entries_.emplace_back(Entry());
auto& entry = entries_.back();
entry.texture_info = texture_info;
GLenum target;
switch (texture_info.dimension) {
case Dimension::k1D:
target = GL_TEXTURE_1D;
break;
case Dimension::k2D:
target = GL_TEXTURE_2D;
break;
case Dimension::k3D:
target = GL_TEXTURE_3D;
break;
case Dimension::kCube:
target = GL_TEXTURE_CUBE_MAP;
break;
}
// Setup the base texture.
glCreateTextures(target, 1, &entry.base_texture);
if (!SetupTexture(entry.base_texture, texture_info)) {
PLOGE("Unable to setup texture parameters");
return false;
}
// Upload/convert.
bool uploaded = false;
switch (texture_info.dimension) {
case Dimension::k2D:
uploaded = UploadTexture2D(entry.base_texture, host_base, length,
texture_info, sampler_info);
break;
case Dimension::k1D:
case Dimension::k3D:
case Dimension::kCube:
assert_unhandled_case(texture_info.dimension);
return false;
}
if (!uploaded) {
PLOGE("Failed to convert/upload texture");
return false;
}
entry.views.emplace_back(EntryView());
auto& entry_view = entry.views.back();
entry_view.sampler_info = sampler_info;
// Setup the sampler.
glCreateSamplers(1, &entry_view.sampler);
if (!SetupSampler(entry_view.sampler, texture_info, sampler_info)) {
PLOGE("Unable to setup texture sampler parameters");
return false;
}
// Get the uvec2 handle to the texture/sampler pair and make it resident.
// The handle can be passed directly to the shader.
entry_view.texture_sampler_handle =
glGetTextureSamplerHandleARB(entry.base_texture, entry_view.sampler);
if (!entry_view.texture_sampler_handle) {
return nullptr;
}
glMakeTextureHandleResidentARB(entry_view.texture_sampler_handle);
return &entry_view;
}
bool TextureCache::SetupTexture(GLuint texture,
const TextureInfo& texture_info) {
// TODO(benvanik): texture mip levels.
glTextureParameteri(texture, GL_TEXTURE_BASE_LEVEL, 0);
glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, 1);
// Pre-shader swizzle.
// TODO(benvanik): can this be dynamic? Maybe per view?
// We may have to emulate this in the shader.
uint32_t swizzle_r = texture_info.swizzle & 0x7;
uint32_t swizzle_g = (texture_info.swizzle >> 3) & 0x7;
uint32_t swizzle_b = (texture_info.swizzle >> 6) & 0x7;
uint32_t swizzle_a = (texture_info.swizzle >> 9) & 0x7;
static const GLenum swizzle_map[] = {
GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA, GL_ZERO, GL_ONE,
};
glTextureParameteri(texture, GL_TEXTURE_SWIZZLE_R, swizzle_map[swizzle_r]);
glTextureParameteri(texture, GL_TEXTURE_SWIZZLE_G, swizzle_map[swizzle_g]);
glTextureParameteri(texture, GL_TEXTURE_SWIZZLE_B, swizzle_map[swizzle_b]);
glTextureParameteri(texture, GL_TEXTURE_SWIZZLE_A, swizzle_map[swizzle_a]);
return true;
}
bool TextureCache::SetupSampler(GLuint sampler, const TextureInfo& texture_info,
const SamplerInfo& sampler_info) {
// TODO(benvanik): border color from texture fetch.
GLfloat border_color[4] = {0.0f};
glSamplerParameterfv(sampler, GL_TEXTURE_BORDER_COLOR, border_color);
// TODO(benvanik): setup LODs for mipmapping.
glSamplerParameterf(sampler, GL_TEXTURE_LOD_BIAS, 0.0f);
glSamplerParameterf(sampler, GL_TEXTURE_MIN_LOD, 0.0f);
glSamplerParameterf(sampler, GL_TEXTURE_MAX_LOD, 0.0f);
// Texture wrapping modes.
// TODO(benvanik): not sure if the middle ones are correct.
static const GLenum wrap_map[] = {
GL_REPEAT, //
GL_MIRRORED_REPEAT, //
GL_CLAMP_TO_EDGE, //
GL_MIRROR_CLAMP_TO_EDGE, //
GL_CLAMP_TO_BORDER, // ?
GL_MIRROR_CLAMP_TO_BORDER_EXT, // ?
GL_CLAMP_TO_BORDER, //
GL_MIRROR_CLAMP_TO_BORDER_EXT, //
};
glSamplerParameteri(sampler, GL_TEXTURE_WRAP_S,
wrap_map[sampler_info.clamp_u]);
glSamplerParameteri(sampler, GL_TEXTURE_WRAP_T,
wrap_map[sampler_info.clamp_v]);
glSamplerParameteri(sampler, GL_TEXTURE_WRAP_R,
wrap_map[sampler_info.clamp_w]);
// Texture level filtering.
GLenum min_filter;
switch (sampler_info.min_filter) {
case ucode::TEX_FILTER_POINT:
switch (sampler_info.mip_filter) {
case ucode::TEX_FILTER_BASEMAP:
min_filter = GL_NEAREST;
break;
case ucode::TEX_FILTER_POINT:
// min_filter = GL_NEAREST_MIPMAP_NEAREST;
min_filter = GL_NEAREST;
break;
case ucode::TEX_FILTER_LINEAR:
// min_filter = GL_NEAREST_MIPMAP_LINEAR;
min_filter = GL_NEAREST;
break;
default:
assert_unhandled_case(sampler_info.mip_filter);
return false;
}
break;
case ucode::TEX_FILTER_LINEAR:
switch (sampler_info.mip_filter) {
case ucode::TEX_FILTER_BASEMAP:
min_filter = GL_LINEAR;
break;
case ucode::TEX_FILTER_POINT:
// min_filter = GL_LINEAR_MIPMAP_NEAREST;
min_filter = GL_LINEAR;
break;
case ucode::TEX_FILTER_LINEAR:
// min_filter = GL_LINEAR_MIPMAP_LINEAR;
min_filter = GL_LINEAR;
break;
default:
assert_unhandled_case(sampler_info.mip_filter);
return false;
}
break;
default:
assert_unhandled_case(sampler_info.min_filter);
return false;
}
GLenum mag_filter;
switch (sampler_info.mag_filter) {
case ucode::TEX_FILTER_POINT:
mag_filter = GL_NEAREST;
break;
case ucode::TEX_FILTER_LINEAR:
mag_filter = GL_LINEAR;
break;
default:
assert_unhandled_case(mag_filter);
return false;
}
glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, min_filter);
glSamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, mag_filter);
// TODO(benvanik): anisotropic filtering.
// GL_TEXTURE_MAX_ANISOTROPY_EXT
return true;
}
void TextureSwap(Endian endianness, void* dest, const void* src,
size_t length) {
switch (endianness) {
case Endian::k8in16:
poly::copy_and_swap_16_aligned(reinterpret_cast<uint16_t*>(dest),
reinterpret_cast<const uint16_t*>(src),
length / 2);
break;
case Endian::k8in32:
poly::copy_and_swap_32_aligned(reinterpret_cast<uint32_t*>(dest),
reinterpret_cast<const uint32_t*>(src),
length / 4);
break;
case Endian::k16in32:
// TODO(benvanik): make more efficient.
/*for (uint32_t i = 0; i < length; i += 4, src += 4, dest += 4) {
uint32_t value = *(uint32_t*)src;
*(uint32_t*)dest = ((value >> 16) & 0xFFFF) | (value << 16);
}*/
assert_always("16in32 not supported");
break;
default:
case Endian::kUnspecified:
std::memcpy(dest, src, length);
break;
}
}
bool TextureCache::UploadTexture2D(GLuint texture, void* host_base,
size_t length,
const TextureInfo& texture_info,
const SamplerInfo& sampler_info) {
assert_true(length == texture_info.input_length);
GLenum internal_format = GL_RGBA8;
GLenum format = GL_RGBA;
GLenum type = GL_UNSIGNED_BYTE;
// https://code.google.com/p/glsnewton/source/browse/trunk/Source/uDDSLoader.pas?r=62
// http://dench.flatlib.jp/opengl/textures
// http://fossies.org/linux/WebKit/Source/ThirdParty/ANGLE/src/libGLESv2/formatutils.cpp
switch (texture_info.format) {
case TextureFormat::k_8:
internal_format = GL_R8;
format = GL_R;
type = GL_UNSIGNED_BYTE;
break;
case TextureFormat::k_1_5_5_5:
internal_format = GL_RGB5_A1;
format = GL_BGRA;
type = GL_UNSIGNED_SHORT_1_5_5_5_REV;
break;
case TextureFormat::k_5_6_5:
internal_format = GL_RGB565;
format = GL_RGB;
type = GL_UNSIGNED_SHORT_5_6_5;
break;
case TextureFormat::k_2_10_10_10:
case TextureFormat::k_2_10_10_10_AS_16_16_16_16:
internal_format = GL_RGB10_A2;
format = GL_RGBA;
type = GL_UNSIGNED_INT_2_10_10_10_REV;
break;
case TextureFormat::k_10_11_11:
case TextureFormat::k_10_11_11_AS_16_16_16_16:
// ?
internal_format = GL_R11F_G11F_B10F;
format = GL_RGB;
type = GL_UNSIGNED_INT_10F_11F_11F_REV;
break;
case TextureFormat::k_11_11_10:
case TextureFormat::k_11_11_10_AS_16_16_16_16:
internal_format = GL_R11F_G11F_B10F;
format = GL_RGB;
type = GL_UNSIGNED_INT_10F_11F_11F_REV;
break;
case TextureFormat::k_8_8_8_8:
case TextureFormat::k_8_8_8_8_AS_16_16_16_16:
internal_format = GL_RGBA8;
format = GL_RGBA;
type = GL_UNSIGNED_BYTE;
break;
case TextureFormat::k_4_4_4_4:
internal_format = GL_RGBA4;
format = GL_RGBA;
type = GL_UNSIGNED_SHORT_4_4_4_4;
break;
case TextureFormat::k_16_FLOAT:
internal_format = GL_R16F;
format = GL_RED;
type = GL_HALF_FLOAT;
break;
case TextureFormat::k_16_16_FLOAT:
internal_format = GL_RG16F;
format = GL_RG;
type = GL_HALF_FLOAT;
break;
case TextureFormat::k_16_16_16_16_FLOAT:
internal_format = GL_RGBA16F;
format = GL_RGBA;
type = GL_HALF_FLOAT;
break;
case TextureFormat::k_32_FLOAT:
internal_format = GL_R32F;
format = GL_R;
type = GL_FLOAT;
break;
case TextureFormat::k_32_32_FLOAT:
internal_format = GL_RG32F;
format = GL_RG;
type = GL_FLOAT;
break;
case TextureFormat::k_32_32_32_FLOAT:
internal_format = GL_RGB32F;
format = GL_RGB;
type = GL_FLOAT;
break;
case TextureFormat::k_32_32_32_32_FLOAT:
internal_format = GL_RGBA32F;
format = GL_RGBA;
type = GL_FLOAT;
break;
case TextureFormat::k_DXT1:
case TextureFormat::k_DXT1_AS_16_16_16_16:
// or GL_COMPRESSED_RGB_S3TC_DXT1_EXT?
internal_format = format = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
break;
case TextureFormat::k_DXT2_3:
case TextureFormat::k_DXT2_3_AS_16_16_16_16:
internal_format = format = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
break;
case TextureFormat::k_DXT4_5:
case TextureFormat::k_DXT4_5_AS_16_16_16_16:
internal_format = format = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
break;
case TextureFormat::k_24_8:
internal_format = GL_DEPTH24_STENCIL8;
format = GL_DEPTH_STENCIL;
type = GL_UNSIGNED_INT_24_8;
break;
case TextureFormat::k_24_8_FLOAT:
internal_format = GL_DEPTH24_STENCIL8;
format = GL_DEPTH_STENCIL;
type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV;
break;
default:
case TextureFormat::k_1_REVERSE:
case TextureFormat::k_1:
case TextureFormat::k_6_5_5:
case TextureFormat::k_8_A:
case TextureFormat::k_8_B:
case TextureFormat::k_8_8:
case TextureFormat::k_Cr_Y1_Cb_Y0:
case TextureFormat::k_Y1_Cr_Y0_Cb:
case TextureFormat::k_8_8_8_8_A:
case TextureFormat::k_16:
case TextureFormat::k_16_16:
case TextureFormat::k_16_16_16_16:
case TextureFormat::k_16_EXPAND:
case TextureFormat::k_16_16_EXPAND:
case TextureFormat::k_16_16_16_16_EXPAND:
case TextureFormat::k_32_32:
case TextureFormat::k_32_32_32_32:
case TextureFormat::k_32_AS_8:
case TextureFormat::k_32_AS_8_8:
case TextureFormat::k_16_MPEG:
case TextureFormat::k_16_16_MPEG:
case TextureFormat::k_8_INTERLACED:
case TextureFormat::k_32_AS_8_INTERLACED:
case TextureFormat::k_32_AS_8_8_INTERLACED:
case TextureFormat::k_16_INTERLACED:
case TextureFormat::k_16_MPEG_INTERLACED:
case TextureFormat::k_16_16_MPEG_INTERLACED:
case TextureFormat::k_DXN:
case TextureFormat::k_DXT3A:
case TextureFormat::k_DXT5A:
case TextureFormat::k_CTX1:
case TextureFormat::k_DXT3A_AS_1_1_1_1:
assert_unhandled_case(texture_info.format);
return false;
}
size_t unpack_length = texture_info.input_length;
glTextureStorage2D(texture, 1, internal_format,
texture_info.size_2d.output_width,
texture_info.size_2d.output_height);
assert_true(unpack_length % 4 == 0);
auto allocation = scratch_buffer_->Acquire(unpack_length);
if (!texture_info.is_tiled) {
TextureSwap(texture_info.endianness, allocation.host_ptr, host_base,
unpack_length);
/*const uint8_t* src = reinterpret_cast<const uint8_t*>(host_base);
uint8_t* dest = reinterpret_cast<uint8_t*>(allocation.host_ptr);
for (uint32_t y = 0; y < texture_info.size_2d.block_height; y++) {
for (uint32_t x = 0; x < texture_info.size_2d.logical_pitch;
x += texture_info.texel_pitch) {
TextureSwap(texture_info.endianness, dest + x, src + x,
texture_info.texel_pitch);
}
src += texture_info.size_2d.input_pitch;
dest += texture_info.size_2d.input_pitch;
}*/
// std::memcpy(dest, src, unpack_length);
} else {
uint8_t* src = reinterpret_cast<uint8_t*>(host_base);
uint8_t* dest = reinterpret_cast<uint8_t*>(allocation.host_ptr);
uint32_t output_pitch =
(texture_info.size_2d.output_width / texture_info.block_size) *
texture_info.texel_pitch;
auto bpp =
(texture_info.texel_pitch >> 2) +
((texture_info.texel_pitch >> 1) >> (texture_info.texel_pitch >> 2));
for (uint32_t y = 0, output_base_offset = 0;
y < texture_info.size_2d.block_height;
y++, output_base_offset += output_pitch) {
auto input_base_offset = TextureInfo::TiledOffset2DOuter(
y, (texture_info.size_2d.input_width / texture_info.block_size), bpp);
for (uint32_t x = 0, output_offset = output_base_offset;
x < texture_info.size_2d.block_width;
x++, output_offset += texture_info.texel_pitch) {
auto input_offset =
TextureInfo::TiledOffset2DInner(x, y, bpp, input_base_offset) >>
bpp;
TextureSwap(texture_info.endianness, dest + output_offset,
src + input_offset * texture_info.texel_pitch,
texture_info.texel_pitch);
}
}
}
size_t unpack_offset = allocation.offset;
scratch_buffer_->Commit(std::move(allocation));
// glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
// glPixelStorei(GL_UNPACK_ALIGNMENT, texture_info.texel_pitch);
glPixelStorei(GL_UNPACK_ROW_LENGTH, texture_info.size_2d.input_width);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, scratch_buffer_->handle());
if (texture_info.is_compressed) {
glCompressedTextureSubImage2D(texture, 0, 0, 0,
texture_info.size_2d.output_width,
texture_info.size_2d.output_height, format,
static_cast<GLsizei>(unpack_length),
reinterpret_cast<void*>(unpack_offset));
} else {
glTextureSubImage2D(texture, 0, 0, 0, texture_info.size_2d.output_width,
texture_info.size_2d.output_height, format, type,
reinterpret_cast<void*>(unpack_offset));
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
return true;
}
} // namespace gl4
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,65 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_GL4_TEXTURE_CACHE_H_
#define XENIA_GPU_GL4_TEXTURE_CACHE_H_
#include <vector>
#include <xenia/gpu/gl4/circular_buffer.h>
#include <xenia/gpu/gl4/gl_context.h>
#include <xenia/gpu/sampler_info.h>
#include <xenia/gpu/texture_info.h>
namespace xe {
namespace gpu {
namespace gl4 {
class TextureCache {
public:
struct EntryView {
SamplerInfo sampler_info;
GLuint sampler;
GLuint64 texture_sampler_handle;
};
struct Entry {
TextureInfo texture_info;
GLuint base_texture;
std::vector<EntryView> views;
};
TextureCache();
~TextureCache();
bool Initialize(CircularBuffer* scratch_buffer);
void Shutdown();
void Clear();
EntryView* Demand(void* host_base, size_t length,
const TextureInfo& texture_info,
const SamplerInfo& sampler_info);
private:
bool SetupTexture(GLuint texture, const TextureInfo& texture_info);
bool SetupSampler(GLuint sampler, const TextureInfo& texture_info,
const SamplerInfo& sampler_info);
bool UploadTexture2D(GLuint texture, void* host_base, size_t length,
const TextureInfo& texture_info,
const SamplerInfo& sampler_info);
CircularBuffer* scratch_buffer_;
std::vector<Entry> entries_;
};
} // namespace gl4
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_GL4_TEXTURE_CACHE_H_

View File

@ -74,17 +74,32 @@ LRESULT WGLControl::WndProc(HWND hWnd, UINT message, WPARAM wParam,
LPARAM lParam) {
switch (message) {
case WM_PAINT: {
GLContextLock context_lock(&context_);
// TODO(benvanik): is viewport needed?
glViewport(0, 0, width_, height_);
float clear_color[] = {rand() / (float)RAND_MAX, 1.0f, 0, 1.0f};
glClearNamedFramebufferfv(0, GL_COLOR, 0, clear_color);
if (current_paint_callback_) {
current_paint_callback_();
current_paint_callback_ = nullptr;
{
GLContextLock context_lock(&context_);
wglSwapIntervalEXT(0);
// TODO(benvanik): is viewport needed?
glViewport(0, 0, width_, height_);
float clear_color[] = {rand() / (float)RAND_MAX, 1.0f, 0, 1.0f};
glClearNamedFramebufferfv(0, GL_COLOR, 0, clear_color);
if (current_paint_callback_) {
current_paint_callback_();
current_paint_callback_ = nullptr;
}
// TODO(benvanik): profiler present.
// Profiler::Present();
// Hacky swap timer.
static int swap_count = 0;
glEnable(GL_SCISSOR_TEST);
glScissor(0, 0, 20, 20);
float red[] = {swap_count / 60.0f, 0, 0, 1.0f};
swap_count = (swap_count + 1) % 60;
glClearNamedFramebufferfv(0, GL_COLOR, 0, red);
glDisable(GL_SCISSOR_TEST);
}
// TODO(benvanik): profiler present.
// Profiler::Present();
SwapBuffers(context_.dc());
} break;
}

View File

@ -0,0 +1,31 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/sampler_info.h>
namespace xe {
namespace gpu {
bool SamplerInfo::Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
const ucode::instr_fetch_tex_t& fetch_instr,
SamplerInfo* out_info) {
out_info->min_filter = static_cast<ucode::instr_tex_filter_t>(
fetch_instr.min_filter == 3 ? fetch.min_filter : fetch_instr.min_filter);
out_info->mag_filter = static_cast<ucode::instr_tex_filter_t>(
fetch_instr.mag_filter == 3 ? fetch.mag_filter : fetch_instr.mag_filter);
out_info->mip_filter = static_cast<ucode::instr_tex_filter_t>(
fetch_instr.mip_filter == 3 ? fetch.mip_filter : fetch_instr.mip_filter);
out_info->clamp_u = fetch.clamp_x;
out_info->clamp_v = fetch.clamp_y;
out_info->clamp_w = fetch.clamp_z;
return true;
}
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,41 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_SAMPLER_INFO_H_
#define XENIA_GPU_SAMPLER_INFO_H_
#include <xenia/gpu/ucode.h>
#include <xenia/gpu/xenos.h>
namespace xe {
namespace gpu {
struct SamplerInfo {
ucode::instr_tex_filter_t min_filter;
ucode::instr_tex_filter_t mag_filter;
ucode::instr_tex_filter_t mip_filter;
uint32_t clamp_u;
uint32_t clamp_v;
uint32_t clamp_w;
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
const ucode::instr_fetch_tex_t& fetch_instr,
SamplerInfo* out_info);
bool operator==(const SamplerInfo& other) const {
return min_filter == other.min_filter && mag_filter == other.mag_filter &&
mip_filter == other.mip_filter && clamp_u == other.clamp_u &&
clamp_v == other.clamp_v && clamp_w == other.clamp_w;
}
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_SAMPLER_INFO_H_

View File

@ -172,6 +172,8 @@ void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
return;
}
assert_true(vtx->const_index <= 0x1F);
uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel;
auto& inputs = buffer_inputs_;
BufferDescElement* el = nullptr;
@ -240,10 +242,12 @@ void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
void Shader::GatherTextureFetch(const instr_fetch_tex_t* tex) {
// TODO(benvanik): check dest_swiz to see if we are writing anything.
assert_true(tex->const_idx < 0x1F);
assert_true(sampler_inputs_.count + 1 < poly::countof(sampler_inputs_.descs));
auto& input = sampler_inputs_.descs[sampler_inputs_.count++];
input.input_index = sampler_inputs_.count - 1;
input.fetch_slot = tex->const_idx & 0xF; // ?
input.fetch_slot = tex->const_idx & 0xF; // ??????????????????????????????
input.tex_fetch = *tex;
// Format mangling, size estimation, etc.

View File

@ -9,8 +9,12 @@
'register_file.cc',
'register_file.h',
'register_table.inc',
'sampler_info.cc',
'sampler_info.h',
'shader.cc',
'shader.h',
'texture_info.cc',
'texture_info.h',
'ucode.h',
'ucode_disassembler.cc',
'ucode_disassembler.h',

View File

@ -0,0 +1,239 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/texture_info.h>
#include <poly/math.h>
namespace xe {
namespace gpu {
using namespace xe::gpu::ucode;
using namespace xe::gpu::xenos;
bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
TextureInfo* out_info) {
// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308051(v=vs.85).aspx
// a2xx_sq_surfaceformat
auto& info = *out_info;
info.swizzle = fetch.swizzle;
info.dimension = static_cast<Dimension>(fetch.dimension);
switch (info.dimension) {
case Dimension::k1D:
info.width = fetch.size_1d.width;
break;
case Dimension::k2D:
info.width = fetch.size_2d.width;
info.height = fetch.size_2d.height;
break;
case Dimension::k3D:
case Dimension::kCube:
info.width = fetch.size_3d.width;
info.height = fetch.size_3d.height;
info.depth = fetch.size_3d.depth;
break;
}
info.endianness = static_cast<Endian>(fetch.endianness);
info.block_size = 0;
info.texel_pitch = 0;
info.is_tiled = fetch.tiled;
info.is_compressed = false;
info.input_length = 0;
info.format = static_cast<TextureFormat>(fetch.format);
switch (fetch.format) {
case FMT_8:
info.block_size = 1;
info.texel_pitch = 1;
break;
case FMT_1_5_5_5:
info.block_size = 1;
info.texel_pitch = 2;
break;
case FMT_8_8_8_8:
case FMT_8_8_8_8_AS_16_16_16_16:
info.block_size = 1;
info.texel_pitch = 4;
break;
case FMT_4_4_4_4:
info.block_size = 1;
info.texel_pitch = 2;
break;
case FMT_16_16_16_16_FLOAT:
info.block_size = 1;
info.texel_pitch = 8;
break;
case FMT_32_FLOAT:
info.block_size = 1;
info.texel_pitch = 4;
break;
case FMT_DXT1:
info.block_size = 4;
info.texel_pitch = 8;
info.is_compressed = true;
break;
case FMT_DXT2_3:
case FMT_DXT4_5:
info.block_size = 4;
info.texel_pitch = 16;
info.is_compressed = true;
break;
case FMT_DXT1_AS_16_16_16_16:
// TODO(benvanik): conversion?
info.block_size = 4;
info.texel_pitch = 8;
info.is_compressed = true;
break;
case FMT_DXT2_3_AS_16_16_16_16:
case FMT_DXT4_5_AS_16_16_16_16:
// TODO(benvanik): conversion?
info.block_size = 4;
info.texel_pitch = 16;
info.is_compressed = true;
break;
case FMT_1_REVERSE:
case FMT_1:
case FMT_5_6_5:
case FMT_6_5_5:
case FMT_2_10_10_10:
case FMT_8_A:
case FMT_8_B:
case FMT_8_8:
case FMT_Cr_Y1_Cb_Y0:
case FMT_Y1_Cr_Y0_Cb:
case FMT_5_5_5_1:
case FMT_8_8_8_8_A:
case FMT_10_11_11:
case FMT_11_11_10:
case FMT_24_8:
case FMT_24_8_FLOAT:
case FMT_16:
case FMT_16_16:
case FMT_16_16_16_16:
case FMT_16_EXPAND:
case FMT_16_16_EXPAND:
case FMT_16_16_16_16_EXPAND:
case FMT_16_FLOAT:
case FMT_16_16_FLOAT:
case FMT_32:
case FMT_32_32:
case FMT_32_32_32_32:
case FMT_32_32_FLOAT:
case FMT_32_32_32_32_FLOAT:
case FMT_32_AS_8:
case FMT_32_AS_8_8:
case FMT_16_MPEG:
case FMT_16_16_MPEG:
case FMT_8_INTERLACED:
case FMT_32_AS_8_INTERLACED:
case FMT_32_AS_8_8_INTERLACED:
case FMT_16_INTERLACED:
case FMT_16_MPEG_INTERLACED:
case FMT_16_16_MPEG_INTERLACED:
case FMT_DXN:
case FMT_2_10_10_10_AS_16_16_16_16:
case FMT_10_11_11_AS_16_16_16_16:
case FMT_11_11_10_AS_16_16_16_16:
case FMT_32_32_32_FLOAT:
case FMT_DXT3A:
case FMT_DXT5A:
case FMT_CTX1:
case FMT_DXT3A_AS_1_1_1_1:
PLOGE("Unhandled texture format");
return false;
default:
assert_unhandled_case(fetch.format);
return false;
}
// Must be called here when we know the format.
switch (info.dimension) {
case Dimension::k1D:
info.CalculateTextureSizes1D(fetch);
break;
case Dimension::k2D:
info.CalculateTextureSizes2D(fetch);
break;
case Dimension::k3D:
// TODO(benvanik): calculate size.
return false;
case Dimension::kCube:
// TODO(benvanik): calculate size.
return false;
}
return true;
}
void TextureInfo::CalculateTextureSizes1D(const xe_gpu_texture_fetch_t& fetch) {
// ?
size_1d.width = fetch.size_1d.width;
}
void TextureInfo::CalculateTextureSizes2D(const xe_gpu_texture_fetch_t& fetch) {
size_2d.logical_width = 1 + fetch.size_2d.width;
size_2d.logical_height = 1 + fetch.size_2d.height;
size_2d.block_width = size_2d.logical_width / block_size;
size_2d.block_height = size_2d.logical_height / block_size;
if (!is_compressed) {
// must be 32x32 but also must have a pitch that is a multiple of 256 bytes
uint32_t bytes_per_block = block_size * block_size * texel_pitch;
uint32_t width_multiple = 32;
if (bytes_per_block) {
uint32_t minimum_multiple = 256 / bytes_per_block;
if (width_multiple < minimum_multiple) {
width_multiple = minimum_multiple;
}
}
size_2d.input_width = poly::round_up(size_2d.logical_width, width_multiple);
size_2d.input_height = poly::round_up(size_2d.logical_height, 32);
size_2d.output_width = size_2d.logical_width;
size_2d.output_height = size_2d.logical_height;
} else {
// must be 128x128
size_2d.input_width = poly::round_up(size_2d.logical_width, 128);
size_2d.input_height = poly::round_up(size_2d.logical_height, 128);
size_2d.output_width = poly::next_pow2(size_2d.logical_width);
size_2d.output_height = poly::next_pow2(size_2d.logical_height);
}
size_2d.logical_pitch = (size_2d.logical_width / block_size) * texel_pitch;
size_2d.input_pitch = (size_2d.input_width / block_size) * texel_pitch;
if (!is_tiled) {
input_length = size_2d.block_height * size_2d.logical_pitch;
} else {
input_length = size_2d.block_height * size_2d.logical_pitch; // ?
}
}
// https://code.google.com/p/crunch/source/browse/trunk/inc/crn_decomp.h#4104
uint32_t TextureInfo::TiledOffset2DOuter(uint32_t y, uint32_t width,
uint32_t log_bpp) {
uint32_t macro = ((y >> 5) * (width >> 5)) << (log_bpp + 7);
uint32_t micro = ((y & 6) << 2) << log_bpp;
return macro + ((micro & ~15) << 1) + (micro & 15) +
((y & 8) << (3 + log_bpp)) + ((y & 1) << 4);
}
uint32_t TextureInfo::TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
uint32_t base_offset) {
uint32_t macro = (x >> 5) << (bpp + 7);
uint32_t micro = (x & 7) << bpp;
uint32_t offset = base_offset + (macro + ((micro & ~15) << 1) + (micro & 15));
return ((offset & ~511) << 3) + ((offset & 448) << 2) + (offset & 63) +
((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6);
}
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,140 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_TEXTURE_INFO_H_
#define XENIA_GPU_TEXTURE_INFO_H_
#include <xenia/gpu/ucode.h>
#include <xenia/gpu/xenos.h>
namespace xe {
namespace gpu {
// a2xx_sq_surfaceformat
enum class TextureFormat : uint32_t {
k_1_REVERSE = 0,
k_1 = 1,
k_8 = 2,
k_1_5_5_5 = 3,
k_5_6_5 = 4,
k_6_5_5 = 5,
k_8_8_8_8 = 6,
k_2_10_10_10 = 7,
k_8_A = 8,
k_8_B = 9,
k_8_8 = 10,
k_Cr_Y1_Cb_Y0 = 11,
k_Y1_Cr_Y0_Cb = 12,
// ? hole
k_8_8_8_8_A = 14,
k_4_4_4_4 = 15,
k_10_11_11 = 16,
k_11_11_10 = 17,
k_DXT1 = 18,
k_DXT2_3 = 19,
k_DXT4_5 = 20,
// ? hole
k_24_8 = 22,
k_24_8_FLOAT = 23,
k_16 = 24,
k_16_16 = 25,
k_16_16_16_16 = 26,
k_16_EXPAND = 27,
k_16_16_EXPAND = 28,
k_16_16_16_16_EXPAND = 29,
k_16_FLOAT = 30,
k_16_16_FLOAT = 31,
k_16_16_16_16_FLOAT = 32,
k_32 = 33,
k_32_32 = 34,
k_32_32_32_32 = 35,
k_32_FLOAT = 36,
k_32_32_FLOAT = 37,
k_32_32_32_32_FLOAT = 38,
k_32_AS_8 = 39,
k_32_AS_8_8 = 40,
k_16_MPEG = 41,
k_16_16_MPEG = 42,
k_8_INTERLACED = 43,
k_32_AS_8_INTERLACED = 44,
k_32_AS_8_8_INTERLACED = 45,
k_16_INTERLACED = 46,
k_16_MPEG_INTERLACED = 47,
k_16_16_MPEG_INTERLACED = 48,
k_DXN = 49,
k_8_8_8_8_AS_16_16_16_16 = 50,
k_DXT1_AS_16_16_16_16 = 51,
k_DXT2_3_AS_16_16_16_16 = 52,
k_DXT4_5_AS_16_16_16_16 = 53,
k_2_10_10_10_AS_16_16_16_16 = 54,
k_10_11_11_AS_16_16_16_16 = 55,
k_11_11_10_AS_16_16_16_16 = 56,
k_32_32_32_FLOAT = 57,
k_DXT3A = 58,
k_DXT5A = 59,
k_CTX1 = 60,
k_DXT3A_AS_1_1_1_1 = 61,
kUnknown = 0xFFFFFFFFu,
};
struct TextureInfo {
uint32_t swizzle;
Dimension dimension;
uint32_t width;
uint32_t height;
uint32_t depth;
uint32_t block_size;
uint32_t texel_pitch;
xenos::Endian endianness;
bool is_tiled;
bool is_compressed;
uint32_t input_length;
TextureFormat format;
union {
struct {
uint32_t width;
} size_1d;
struct {
uint32_t logical_width;
uint32_t logical_height;
uint32_t block_width;
uint32_t block_height;
uint32_t input_width;
uint32_t input_height;
uint32_t output_width;
uint32_t output_height;
uint32_t logical_pitch;
uint32_t input_pitch;
} size_2d;
struct {
} size_3d;
struct {
} size_cube;
};
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
TextureInfo* out_info);
static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
uint32_t log_bpp);
static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
uint32_t base_offset);
private:
void CalculateTextureSizes1D(const xenos::xe_gpu_texture_fetch_t& fetch);
void CalculateTextureSizes2D(const xenos::xe_gpu_texture_fetch_t& fetch);
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_TEXTURE_INFO_H_

View File

@ -35,6 +35,13 @@ enum class PrimitiveType : uint32_t {
kQuadList = 0x0D,
};
enum class Dimension : uint32_t {
k1D = 0,
k2D = 1,
k3D = 2,
kCube = 3,
};
namespace xenos {
typedef enum {