diff --git a/src/poly/math.h b/src/poly/math.h index 0fbbb0702..49edd2dc0 100644 --- a/src/poly/math.h +++ b/src/poly/math.h @@ -36,7 +36,7 @@ T align(T value, T alignment) { // Rounds the given number up to the next highest multiple. template T round_up(T value, V multiple) { - return value ? (value + multiple - 1 - (value - 1) % multiple) : multiple; + return value ? (((value + multiple - 1) / multiple) * multiple) : multiple; } inline float saturate(float value) { diff --git a/src/xenia/gpu/gl4/circular_buffer.cc b/src/xenia/gpu/gl4/circular_buffer.cc index a7d456c5a..d2a342646 100644 --- a/src/xenia/gpu/gl4/circular_buffer.cc +++ b/src/xenia/gpu/gl4/circular_buffer.cc @@ -27,29 +27,41 @@ CircularBuffer::CircularBuffer(size_t capacity) gpu_base_(0), host_base_(nullptr) {} -CircularBuffer::~CircularBuffer() { - glUnmapNamedBuffer(buffer_); - glDeleteBuffers(1, &buffer_); -} +CircularBuffer::~CircularBuffer() { Shutdown(); } bool CircularBuffer::Initialize() { glCreateBuffers(1, &buffer_); glNamedBufferStorage(buffer_, capacity_, nullptr, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); host_base_ = reinterpret_cast(glMapNamedBufferRange( - buffer_, 0, capacity_, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | - GL_MAP_UNSYNCHRONIZED_BIT | - GL_MAP_PERSISTENT_BIT)); + buffer_, 0, capacity_, + GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_PERSISTENT_BIT)); assert_not_null(host_base_); if (!host_base_) { return false; } - glMakeNamedBufferResidentNV(buffer_, GL_WRITE_ONLY); - glGetNamedBufferParameterui64vNV(buffer_, GL_BUFFER_GPU_ADDRESS_NV, - &gpu_base_); + + if (GLEW_NV_shader_buffer_load) { + // To use this bindlessly we must make it resident. + glMakeNamedBufferResidentNV(buffer_, GL_WRITE_ONLY); + glGetNamedBufferParameterui64vNV(buffer_, GL_BUFFER_GPU_ADDRESS_NV, + &gpu_base_); + } return true; } +void CircularBuffer::Shutdown() { + if (!buffer_) { + return; + } + glUnmapNamedBuffer(buffer_); + if (GLEW_NV_shader_buffer_load) { + glMakeNamedBufferNonResidentNV(buffer_); + } + glDeleteBuffers(1, &buffer_); + buffer_ = 0; +} + CircularBuffer::Allocation CircularBuffer::Acquire(size_t length) { // Addresses must always be % 256. length = poly::round_up(length, 256); @@ -64,6 +76,7 @@ CircularBuffer::Allocation CircularBuffer::Acquire(size_t length) { Allocation allocation; allocation.host_ptr = host_base_ + write_head_; allocation.gpu_ptr = gpu_base_ + write_head_; + allocation.offset = write_head_; allocation.length = length; write_head_ += length; return allocation; diff --git a/src/xenia/gpu/gl4/circular_buffer.h b/src/xenia/gpu/gl4/circular_buffer.h index dde0e41d1..987ce746c 100644 --- a/src/xenia/gpu/gl4/circular_buffer.h +++ b/src/xenia/gpu/gl4/circular_buffer.h @@ -26,10 +26,12 @@ class CircularBuffer { struct Allocation { void* host_ptr; GLuint64 gpu_ptr; + size_t offset; size_t length; }; bool Initialize(); + void Shutdown(); GLuint handle() const { return buffer_; } diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index dd1fef43f..c54c541d8 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include @@ -36,7 +38,7 @@ const GLuint kAnyTarget = UINT_MAX; // All uncached vertex/index data goes here. If it fills up we need to sync // with the GPU, so this should be large enough to prevent that in a normal // frame. -const size_t kScratchBufferCapacity = 64 * 1024 * 1024; +const size_t kScratchBufferCapacity = 256 * 1024 * 1024; CommandProcessor::CachedPipeline::CachedPipeline() = default; @@ -61,6 +63,7 @@ CommandProcessor::CommandProcessor(GL4GraphicsSystem* graphics_system) write_ptr_index_(0), bin_select_(0xFFFFFFFFull), bin_mask_(0xFFFFFFFFull), + has_bindless_vbos_(false), active_vertex_shader_(nullptr), active_pixel_shader_(nullptr), active_framebuffer_(nullptr), @@ -152,29 +155,34 @@ void CommandProcessor::WorkerMain() { } bool CommandProcessor::SetupGL() { - // Uniform buffer that stores the per-draw state (constants, etc). - glCreateBuffers(1, &uniform_data_buffer_); - glBindBuffer(GL_UNIFORM_BUFFER, uniform_data_buffer_); - glNamedBufferStorage(uniform_data_buffer_, 16 * 1024, nullptr, - GL_MAP_WRITE_BIT | GL_DYNAMIC_STORAGE_BIT); - // Circular buffer holding scratch vertex/index data. if (!scratch_buffer_.Initialize()) { PLOGE("Unable to initialize scratch buffer"); return false; } + // Texture cache that keeps track of any textures/samplers used. + if (!texture_cache_.Initialize(&scratch_buffer_)) { + PLOGE("Unable to initialize texture cache"); + return false; + } + GLuint vertex_array; glGenVertexArrays(1, &vertex_array); glBindVertexArray(vertex_array); - glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); - glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); + + if (GLEW_NV_vertex_buffer_unified_memory) { + has_bindless_vbos_ = true; + glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); + } return true; } void CommandProcessor::ShutdownGL() { - glDeleteBuffers(1, &uniform_data_buffer_); + texture_cache_.Shutdown(); + scratch_buffer_.Shutdown(); } void CommandProcessor::InitializeRingBuffer(uint32_t ptr, uint32_t page_count) { @@ -264,6 +272,7 @@ void CommandProcessor::PrepareForWait() { // make interrupt callbacks from the GPU so that we don't have to do a full // synchronize here. glFlush(); + glFinish(); if (FLAGS_thread_safe_gl) { context_->ClearCurrent(); @@ -1142,6 +1151,8 @@ void CommandProcessor::PrepareDraw(DrawCommand* draw_command) { // Generic stuff. cmd.start_index = regs[XE_GPU_REG_VGT_INDX_OFFSET].u32; cmd.base_vertex = 0; + + cmd.state_data = nullptr; } bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { @@ -1158,6 +1169,18 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { return IssueCopy(draw_command); } + // TODO(benvanik): actually cache things >_> + texture_cache_.Clear(); + + // Allocate a state data block. + // Everything the shaders access lives here. + auto allocation = scratch_buffer_.Acquire(sizeof(UniformDataBlock)); + cmd.state_data = reinterpret_cast(allocation.host_ptr); + if (!cmd.state_data) { + PLOGE("Unable to allocate uniform data buffer"); + return false; + } + if (!UpdateRenderTargets(draw_command)) { PLOGE("Unable to setup render targets"); return false; @@ -1172,17 +1195,15 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { PLOGE("Unable to setup render state"); return false; } - + if (!UpdateConstants(draw_command)) { + PLOGE("Unable to update shader constants"); + return false; + } if (!UpdateShaders(draw_command)) { PLOGE("Unable to prepare draw shaders"); return false; } - // if (!PopulateSamplers(draw_command)) { - // XELOGE("Unable to prepare draw samplers"); - // return false; - //} - if (!PopulateIndexBuffer(draw_command)) { PLOGE("Unable to setup index buffer"); return false; @@ -1191,6 +1212,10 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { PLOGE("Unable to setup vertex buffers"); return false; } + if (!PopulateSamplers(draw_command)) { + PLOGE("Unable to prepare draw samplers"); + return false; + } GLenum prim_type = 0; switch (cmd.prim_type) { @@ -1228,6 +1253,7 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { break; case PrimitiveType::kQuadList: prim_type = GL_LINES_ADJACENCY; + return false; /*if (vs->DemandGeometryShader(D3D11VertexShaderResource::QUAD_LIST_SHADER, &geometry_shader)) { @@ -1237,10 +1263,15 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { default: case PrimitiveType::kUnknown0x07: prim_type = GL_POINTS; - XELOGE("D3D11: unsupported primitive type %d", cmd.prim_type); + XELOGE("unsupported primitive type %d", cmd.prim_type); break; } + // Commit the state buffer - nothing can change after this. + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, scratch_buffer_.handle(), + allocation.offset, allocation.length); + scratch_buffer_.Commit(std::move(allocation)); + // HACK HACK HACK glDisable(GL_DEPTH_TEST); @@ -1254,13 +1285,108 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { prim_type, cmd.index_count, cmd.index_buffer.format == IndexFormat::kInt32 ? GL_UNSIGNED_INT : GL_UNSIGNED_SHORT, - reinterpret_cast(cmd.start_index * element_size), + reinterpret_cast(cmd.index_buffer.buffer_offset + + cmd.start_index * element_size), cmd.base_vertex); } else { // Auto draw. glDrawArrays(prim_type, cmd.start_index, cmd.index_count); } + // Hacky draw counter. + if (false) { + static int draw_count = 0; + glEnable(GL_SCISSOR_TEST); + glScissor(20, 0, 20, 20); + float red[] = {0, draw_count / 100.0f, 0, 1.0f}; + draw_count = (draw_count + 1) % 100; + glClearNamedFramebufferfv(active_framebuffer_->framebuffer, GL_COLOR, 0, + red); + glDisable(GL_SCISSOR_TEST); + } + + return true; +} + +bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) { + auto& regs = *register_file_; + + auto enable_mode = + static_cast(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); + + // RB_SURFACE_INFO + // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html + uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; + uint32_t surface_pitch = surface_info & 0x3FFF; + auto surface_msaa = static_cast((surface_info >> 16) & 0x3); + + // Get/create all color render targets, if we are using them. + // In depth-only mode we don't need them. + GLenum draw_buffers[4] = {GL_NONE, GL_NONE, GL_NONE, GL_NONE}; + GLuint color_targets[4] = {kAnyTarget, kAnyTarget, kAnyTarget, kAnyTarget}; + if (enable_mode == ModeControl::kColorDepth) { + uint32_t color_info[4] = { + regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32, + regs[XE_GPU_REG_RB_COLOR2_INFO].u32, + regs[XE_GPU_REG_RB_COLOR3_INFO].u32, + }; + // A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE + uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; + for (int n = 0; n < poly::countof(color_info); n++) { + uint32_t write_mask = (color_mask >> (n * 4)) & 0xF; + if (!write_mask) { + // Unused, so keep disabled and set to wildcard so we'll take any + // framebuffer that has it. + continue; + } + uint32_t color_base = color_info[n] & 0xFFF; + auto color_format = + static_cast((color_info[n] >> 16) & 0xF); + color_targets[n] = GetColorRenderTarget(surface_pitch, surface_msaa, + color_base, color_format); + draw_buffers[n] = GL_COLOR_ATTACHMENT0 + n; + glColorMaski(n, !!(write_mask & 0x1), !!(write_mask & 0x2), + !!(write_mask & 0x4), !!(write_mask & 0x8)); + } + } + + // Get/create depth buffer, but only if we are going to use it. + uint32_t depth_control = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; + uint32_t stencil_ref_mask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; + bool uses_depth = + (depth_control & 0x00000002) || (depth_control & 0x00000004); + uint32_t stencil_write_mask = (stencil_ref_mask & 0x00FF0000) >> 16; + bool uses_stencil = (depth_control & 0x00000001) || (stencil_write_mask != 0); + GLuint depth_target = kAnyTarget; + if (uses_depth && uses_stencil) { + uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; + uint32_t depth_base = depth_info & 0xFFF; + auto depth_format = + static_cast((depth_info >> 16) & 0x1); + depth_target = GetDepthRenderTarget(surface_pitch, surface_msaa, depth_base, + depth_format); + // TODO(benvanik): when a game switches does it expect to keep the same + // depth buffer contents? + } + + // Get/create a framebuffer with the required targets. + // Note that none may be returned if we really don't need one. + auto cached_framebuffer = GetFramebuffer(color_targets, depth_target); + active_framebuffer_ = cached_framebuffer; + if (!active_framebuffer_) { + // Nothing to do. + return true; + } + + // Setup just the targets we want. + glNamedFramebufferDrawBuffers(cached_framebuffer->framebuffer, 4, + draw_buffers); + + // Make active. + // TODO(benvanik): can we do this all named? + // TODO(benvanik): do we want this on READ too? + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer); + return true; } @@ -1272,57 +1398,24 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { auto& regs = *register_file_; - union float4 { - float v[4]; - struct { - float x, y, z, w; - }; - }; - struct UniformDataBlock { - float4 window_offset; // tx,ty,rt_w,rt_h - float4 window_scissor; // x0,y0,x1,y1 - float4 viewport_offset; // tx,ty,tz,? - float4 viewport_scale; // sx,sy,sz,? - // TODO(benvanik): vertex format xyzw? - - float4 alpha_test; // alpha test enable, func, ref, ? - - // Register data from 0x4000 to 0x4927. - // SHADER_CONSTANT_000_X... - float4 float_consts[512]; - // SHADER_CONSTANT_FETCH_00_0... - uint32_t fetch_consts[32 * 6]; - // SHADER_CONSTANT_BOOL_000_031... - int32_t bool_consts[8]; - // SHADER_CONSTANT_LOOP_00... - int32_t loop_consts[32]; - }; - static_assert(sizeof(UniformDataBlock) <= 16 * 1024, - "Need <=16k uniform data"); - - auto allocation = scratch_buffer_.Acquire(16 * 1024); - auto buffer_ptr = reinterpret_cast(allocation.host_ptr); - if (!buffer_ptr) { - PLOGE("Unable to allocate uniform data buffer"); - return false; - } + auto state_data = draw_command->state_data; // Window parameters. // See r200UpdateWindow: // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; - buffer_ptr->window_offset.x = float(window_offset & 0x7FFF); - buffer_ptr->window_offset.y = float((window_offset >> 16) & 0x7FFF); + state_data->window_offset.x = float(window_offset & 0x7FFF); + state_data->window_offset.y = float((window_offset >> 16) & 0x7FFF); uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; - buffer_ptr->window_scissor.x = float(window_scissor_tl & 0x7FFF); - buffer_ptr->window_scissor.y = float((window_scissor_tl >> 16) & 0x7FFF); - buffer_ptr->window_scissor.z = float(window_scissor_br & 0x7FFF); - buffer_ptr->window_scissor.w = float((window_scissor_br >> 16) & 0x7FFF); + state_data->window_scissor.x = float(window_scissor_tl & 0x7FFF); + state_data->window_scissor.y = float((window_scissor_tl >> 16) & 0x7FFF); + state_data->window_scissor.z = float(window_scissor_br & 0x7FFF); + state_data->window_scissor.w = float((window_scissor_br >> 16) & 0x7FFF); // HACK: no clue where to get these values. - buffer_ptr->window_offset.z = 1280; - buffer_ptr->window_offset.w = 720; + state_data->window_offset.z = 1280; + state_data->window_offset.w = 720; // Whether each of the viewport settings is enabled. // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf @@ -1338,20 +1431,20 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { vport_yoffset_enable == vport_zoffset_enable); // Viewport scaling. Only enabled if the flags are all set. - buffer_ptr->viewport_scale.x = + state_data->viewport_scale.x = vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1; // 640 - buffer_ptr->viewport_offset.x = vport_xoffset_enable + state_data->viewport_offset.x = vport_xoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : 0; // 640 - buffer_ptr->viewport_scale.y = vport_yscale_enable + state_data->viewport_scale.y = vport_yscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1; // -360 - buffer_ptr->viewport_offset.y = vport_yoffset_enable + state_data->viewport_offset.y = vport_yoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : 0; // 360 - buffer_ptr->viewport_scale.z = + state_data->viewport_scale.z = vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1; // 1 - buffer_ptr->viewport_offset.z = + state_data->viewport_offset.z = vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0; // 0 // VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0. // = false: multiply the X, Y coordinates by 1/W0. @@ -1365,15 +1458,6 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { // TODO(benvanik): pass to shaders? disable transform? etc? glViewport(0, 0, 1280, 720); - // Copy over all constants. - // TODO(benvanik): partial updates, etc. We could use shader constant access - // knowledge that we get at compile time to only upload those constants - // required. - std::memcpy( - &buffer_ptr->float_consts, ®s[XE_GPU_REG_SHADER_CONSTANT_000_X].f32, - sizeof(buffer_ptr->float_consts) + sizeof(buffer_ptr->fetch_consts) + - sizeof(buffer_ptr->loop_consts) + sizeof(buffer_ptr->bool_consts)); - // Scissoring. int32_t screen_scissor_tl = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32; int32_t screen_scissor_br = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32; @@ -1424,10 +1508,10 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { // Deprecated in GL, implemented in shader. // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL].u32; - buffer_ptr->alpha_test.x = + state_data->alpha_test.x = (color_control & 0x4) ? 1.0f : 0.0f; // ALPAHTESTENABLE - buffer_ptr->alpha_test.y = float(color_control & 0x3); // ALPHAFUNC - buffer_ptr->alpha_test.z = regs[XE_GPU_REG_RB_ALPHA_REF].f32; + state_data->alpha_test.y = float(color_control & 0x3); // ALPHAFUNC + state_data->alpha_test.z = regs[XE_GPU_REG_RB_ALPHA_REF].f32; static const GLenum blend_map[] = { /* 0 */ GL_ZERO, @@ -1575,91 +1659,23 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { stencil_op_map[(depth_control & 0x0001C000) >> 14]); } - // Stash - program setup will bind this to uniforms. - draw_command->state_data_gpu_ptr = allocation.gpu_ptr; - scratch_buffer_.Commit(std::move(allocation)); - return true; } -bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) { +bool CommandProcessor::UpdateConstants(DrawCommand* draw_command) { auto& regs = *register_file_; + auto state_data = draw_command->state_data; - auto enable_mode = - static_cast(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); + // TODO(benvanik): partial updates, etc. We could use shader constant access + // knowledge that we get at compile time to only upload those constants + // required. If we did this as a variable length then we could really cut + // down on state block sizes. - // RB_SURFACE_INFO - // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html - uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; - uint32_t surface_pitch = surface_info & 0x3FFF; - auto surface_msaa = static_cast((surface_info >> 16) & 0x3); - - // Get/create all color render targets, if we are using them. - // In depth-only mode we don't need them. - GLenum draw_buffers[4] = {GL_NONE, GL_NONE, GL_NONE, GL_NONE}; - GLuint color_targets[4] = {kAnyTarget, kAnyTarget, kAnyTarget, kAnyTarget}; - if (enable_mode == ModeControl::kColorDepth) { - uint32_t color_info[4] = { - regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32, - regs[XE_GPU_REG_RB_COLOR2_INFO].u32, - regs[XE_GPU_REG_RB_COLOR3_INFO].u32, - }; - // A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE - uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; - for (int n = 0; n < poly::countof(color_info); n++) { - uint32_t write_mask = (color_mask >> (n * 4)) & 0xF; - if (!write_mask) { - // Unused, so keep disabled and set to wildcard so we'll take any - // framebuffer that has it. - continue; - } - uint32_t color_base = color_info[n] & 0xFFF; - auto color_format = - static_cast((color_info[n] >> 16) & 0xF); - color_targets[n] = GetColorRenderTarget(surface_pitch, surface_msaa, - color_base, color_format); - draw_buffers[n] = GL_COLOR_ATTACHMENT0 + n; - glColorMaski(n, !!(write_mask & 0x1), !!(write_mask & 0x2), - !!(write_mask & 0x4), !!(write_mask & 0x8)); - } - } - - // Get/create depth buffer, but only if we are going to use it. - uint32_t depth_control = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; - uint32_t stencil_ref_mask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; - bool uses_depth = - (depth_control & 0x00000002) || (depth_control & 0x00000004); - uint32_t stencil_write_mask = (stencil_ref_mask & 0x00FF0000) >> 16; - bool uses_stencil = (depth_control & 0x00000001) || (stencil_write_mask != 0); - GLuint depth_target = kAnyTarget; - if (uses_depth && uses_stencil) { - uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; - uint32_t depth_base = depth_info & 0xFFF; - auto depth_format = - static_cast((depth_info >> 16) & 0x1); - depth_target = GetDepthRenderTarget(surface_pitch, surface_msaa, depth_base, - depth_format); - // TODO(benvanik): when a game switches does it expect to keep the same - // depth buffer contents? - } - - // Get/create a framebuffer with the required targets. - // Note that none may be returned if we really don't need one. - auto cached_framebuffer = GetFramebuffer(color_targets, depth_target); - active_framebuffer_ = cached_framebuffer; - if (!active_framebuffer_) { - // Nothing to do. - return true; - } - - // Setup just the targets we want. - glNamedFramebufferDrawBuffers(cached_framebuffer->framebuffer, 4, - draw_buffers); - - // Make active. - // TODO(benvanik): can we do this all named? - // TODO(benvanik): do we want this on READ too? - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer); + // Copy over all constants. + std::memcpy( + &state_data->float_consts, ®s[XE_GPU_REG_SHADER_CONSTANT_000_X].f32, + sizeof(state_data->float_consts) + sizeof(state_data->fetch_consts) + + sizeof(state_data->loop_consts) + sizeof(state_data->bool_consts)); return true; } @@ -1718,28 +1734,10 @@ bool CommandProcessor::UpdateShaders(DrawCommand* draw_command) { glUseProgramStages(pipeline, GL_GEOMETRY_SHADER_BIT, geometry_program); glUseProgramStages(pipeline, GL_FRAGMENT_SHADER_BIT, fragment_program); - // HACK: layout(location=0) on a bindless uniform crashes nvidia driver. - GLint vertex_state_loc = glGetUniformLocation(vertex_program, "state"); - assert_true(vertex_state_loc == 0); - GLint geometry_state_loc = - geometry_program ? glGetUniformLocation(geometry_program, "state") : -1; - assert_true(geometry_state_loc == -1 || geometry_state_loc == 0); - GLint fragment_state_loc = glGetUniformLocation(fragment_program, "state"); - assert_true(fragment_state_loc == -1 || fragment_state_loc == 0); - cached_pipeline->handles.default_pipeline = pipeline; } - // TODO(benvanik): do we need to do this for all stages if the locations - // match? - glProgramUniformHandleui64ARB(vertex_program, 0, cmd.state_data_gpu_ptr); - /*if (geometry_program && geometry_state_loc != -1) { - glProgramUniformHandleui64ARB(geometry_program, 0, cmd.state_data_gpu_ptr); - }*/ - /*if (fragment_state_loc != -1) { - glProgramUniformHandleui64ARB(fragment_program, 0, - cmd.state_data_gpu_ptr); - }*/ + // NOTE: we don't yet have our state data pointer - that comes at the end. glBindProgramPipeline(cached_pipeline->handles.default_pipeline); @@ -1759,10 +1757,10 @@ bool CommandProcessor::PopulateIndexBuffer(DrawCommand* draw_command) { assert_true(info.endianness == Endian::k8in16 || info.endianness == Endian::k8in32); - auto allocation = scratch_buffer_.Acquire(cmd.index_count * - (info.format == IndexFormat::kInt32 - ? sizeof(uint32_t) - : sizeof(uint16_t))); + size_t total_size = + cmd.index_count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t) + : sizeof(uint16_t)); + auto allocation = scratch_buffer_.Acquire(total_size); if (info.format == IndexFormat::kInt32) { poly::copy_and_swap_32_aligned( @@ -1776,9 +1774,14 @@ bool CommandProcessor::PopulateIndexBuffer(DrawCommand* draw_command) { cmd.index_count); } - glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, allocation.gpu_ptr, - allocation.length); - + if (has_bindless_vbos_) { + glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, allocation.gpu_ptr, + allocation.length); + } else { + // Offset is used in glDrawElements. + cmd.index_buffer.buffer_offset = allocation.offset; + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, scratch_buffer_.handle()); + } scratch_buffer_.Commit(std::move(allocation)); return true; @@ -1792,7 +1795,8 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) { const auto& buffer_inputs = active_vertex_shader_->buffer_inputs(); - for (size_t n = 0; n < buffer_inputs.count; n++) { + uint32_t el_index = 0; + for (uint32_t n = 0; n < buffer_inputs.count; n++) { const auto& desc = buffer_inputs.descs[n]; int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6; @@ -1826,7 +1830,11 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) { reinterpret_cast(membase_ + (fetch->address << 2)), fetch->size); - uint32_t el_index = 0; + if (!has_bindless_vbos_) { + glBindVertexBuffer(n, scratch_buffer_.handle(), allocation.offset, + desc.stride_words * 4); + } + for (uint32_t i = 0; i < desc.element_count; ++i) { const auto& el = desc.elements[i]; auto comp_count = GetVertexFormatComponentCount(el.format); @@ -1882,13 +1890,19 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) { assert_unhandled_case(el.format); break; } - size_t offset = el.offset_words * sizeof(uint32_t); glEnableVertexAttribArray(el_index); - glVertexAttribFormatNV(el_index, comp_count, comp_type, el.is_normalized, - desc.stride_words * sizeof(uint32_t)); - glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, el_index, - allocation.gpu_ptr + offset, - allocation.length - offset); + if (has_bindless_vbos_) { + glVertexAttribFormatNV(el_index, comp_count, comp_type, + el.is_normalized, + desc.stride_words * sizeof(uint32_t)); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, el_index, + allocation.gpu_ptr + (el.offset_words * 4), + allocation.length - (el.offset_words * 4)); + } else { + glVertexAttribBinding(el_index, n); + glVertexAttribFormat(el_index, comp_count, comp_type, el.is_normalized, + el.offset_words * 4); + } ++el_index; } @@ -1899,6 +1913,82 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) { return true; } +bool CommandProcessor::PopulateSamplers(DrawCommand* draw_command) { + SCOPE_profile_cpu_f("gpu"); + + auto& regs = *register_file_; + + // VS and PS samplers are shared, but may be used exclusively. + // We walk each and setup lazily. + bool has_setup_sampler[32] = {false}; + + // Vertex texture samplers. + const auto& vertex_sampler_inputs = active_vertex_shader_->sampler_inputs(); + for (size_t i = 0; i < vertex_sampler_inputs.count; ++i) { + const auto& desc = vertex_sampler_inputs.descs[i]; + if (has_setup_sampler[desc.fetch_slot]) { + continue; + } + has_setup_sampler[desc.fetch_slot] = true; + if (!PopulateSampler(draw_command, desc)) { + return false; + } + } + + // Pixel shader texture sampler. + const auto& pixel_sampler_inputs = active_pixel_shader_->sampler_inputs(); + for (size_t i = 0; i < pixel_sampler_inputs.count; ++i) { + const auto& desc = pixel_sampler_inputs.descs[i]; + if (has_setup_sampler[desc.fetch_slot]) { + continue; + } + has_setup_sampler[desc.fetch_slot] = true; + if (!PopulateSampler(draw_command, desc)) { + return false; + } + } + + return true; +} + +bool CommandProcessor::PopulateSampler(DrawCommand* draw_command, + const Shader::SamplerDesc& desc) { + auto& regs = *register_file_; + int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + desc.fetch_slot * 6; + auto group = reinterpret_cast(®s.values[r]); + auto& fetch = group->texture_fetch; + + // ? + assert_true(fetch.type == 0x2); + + TextureInfo texture_info; + if (!TextureInfo::Prepare(fetch, &texture_info)) { + XELOGE("Unable to parse texture fetcher info"); + return false; // invalid texture used + } + SamplerInfo sampler_info; + if (!SamplerInfo::Prepare(fetch, desc.tex_fetch, &sampler_info)) { + XELOGE("Unable to parse sampler info"); + return false; // invalid texture used + } + + uint32_t guest_base = fetch.address << 12; + void* host_base = membase_ + guest_base; + auto entry_view = texture_cache_.Demand(host_base, texture_info.input_length, + texture_info, sampler_info); + if (!entry_view) { + // Unable to create/fetch/etc. + XELOGE("Failed to demand texture"); + return false; + } + + // Shaders will use bindless to fetch right from it. + draw_command->state_data->texture_samplers[desc.fetch_slot] = + entry_view->texture_sampler_handle; + + return true; +} + bool CommandProcessor::IssueCopy(DrawCommand* draw_command) { auto& regs = *register_file_; @@ -2045,7 +2135,7 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) { case CopyCommand::kConstantOne: case CopyCommand::kNull: default: - assert_unhandled_case(copy_command); + // assert_unhandled_case(copy_command); return false; } glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); diff --git a/src/xenia/gpu/gl4/command_processor.h b/src/xenia/gpu/gl4/command_processor.h index ff8441215..0cbfaec1b 100644 --- a/src/xenia/gpu/gl4/command_processor.h +++ b/src/xenia/gpu/gl4/command_processor.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,39 @@ struct SwapParameters { GLenum attachment; }; +// This must match the layout in gl4_shader.cc. +struct UniformDataBlock { + union float4 { + float v[4]; + struct { + float x, y, z, w; + }; + }; + + float4 window_offset; // tx,ty,rt_w,rt_h + float4 window_scissor; // x0,y0,x1,y1 + float4 viewport_offset; // tx,ty,tz,? + float4 viewport_scale; // sx,sy,sz,? + // TODO(benvanik): vertex format xyzw? + + float4 alpha_test; // alpha test enable, func, ref, ? + + // TODO(benvanik): overlay with fetch_consts below? + uint64_t texture_samplers[32]; + + // Register data from 0x4000 to 0x4927. + // SHADER_CONSTANT_000_X... + float4 float_consts[512]; + // SHADER_CONSTANT_FETCH_00_0... + uint32_t fetch_consts[32 * 6]; + // SHADER_CONSTANT_BOOL_000_031... + int32_t bool_consts[8]; + // SHADER_CONSTANT_LOOP_00... + int32_t loop_consts[32]; +}; +static_assert(sizeof(UniformDataBlock) <= 16 * 1024, + "Need <=16k uniform data"); + // TODO(benvanik): move more of the enums in here? struct DrawCommand { PrimitiveType prim_type; @@ -54,6 +88,7 @@ struct DrawCommand { size_t size; xenos::Endian endianness; xenos::IndexFormat format; + size_t buffer_offset; } index_buffer; // Texture samplers. @@ -63,11 +98,9 @@ struct DrawCommand { // SamplerStateResource* sampler_state; }; SamplerInput vertex_shader_samplers[32]; - size_t vertex_shader_sampler_count; SamplerInput pixel_shader_samplers[32]; - size_t pixel_shader_sampler_count; - GLuint64 state_data_gpu_ptr; + UniformDataBlock* state_data; }; class CommandProcessor { @@ -195,11 +228,15 @@ class CommandProcessor { void PrepareDraw(DrawCommand* draw_command); bool IssueDraw(DrawCommand* draw_command); - bool UpdateState(DrawCommand* draw_command); bool UpdateRenderTargets(DrawCommand* draw_command); + bool UpdateState(DrawCommand* draw_command); + bool UpdateConstants(DrawCommand* draw_command); bool UpdateShaders(DrawCommand* draw_command); bool PopulateIndexBuffer(DrawCommand* draw_command); bool PopulateVertexBuffers(DrawCommand* draw_command); + bool PopulateSamplers(DrawCommand* draw_command); + bool PopulateSampler(DrawCommand* draw_command, + const Shader::SamplerDesc& desc); bool IssueCopy(DrawCommand* draw_command); CachedFramebuffer* GetFramebuffer(GLuint color_targets[4], @@ -237,7 +274,7 @@ class CommandProcessor { uint64_t bin_select_; uint64_t bin_mask_; - GLuint uniform_data_buffer_; + bool has_bindless_vbos_; std::vector> all_shaders_; std::unordered_map shader_cache_; @@ -251,7 +288,7 @@ class CommandProcessor { std::vector cached_depth_render_targets_; std::vector> all_pipelines_; std::unordered_map cached_pipelines_; - + TextureCache texture_cache_; CircularBuffer scratch_buffer_; DrawCommand draw_command_; diff --git a/src/xenia/gpu/gl4/gl4_gpu-private.h b/src/xenia/gpu/gl4/gl4_gpu-private.h index da11370f5..2b24c7f26 100644 --- a/src/xenia/gpu/gl4/gl4_gpu-private.h +++ b/src/xenia/gpu/gl4/gl4_gpu-private.h @@ -17,6 +17,9 @@ DECLARE_bool(thread_safe_gl); +DECLARE_bool(gl_debug_output); +DECLARE_bool(gl_debug_output_synchronous); + namespace xe { namespace gpu { namespace gl4 { diff --git a/src/xenia/gpu/gl4/gl4_gpu.cc b/src/xenia/gpu/gl4/gl4_gpu.cc index 6a3fe49b8..c27a4fb75 100644 --- a/src/xenia/gpu/gl4/gl4_gpu.cc +++ b/src/xenia/gpu/gl4/gl4_gpu.cc @@ -15,6 +15,10 @@ DEFINE_bool(thread_safe_gl, false, "Only allow one GL context to be active at a time."); +DEFINE_bool(gl_debug_output, false, "Dump ARB_debug_output to stderr."); +DEFINE_bool(gl_debug_output_synchronous, true, + "ARB_debug_output will synchronize to be thread safe."); + namespace xe { namespace gpu { namespace gl4 { diff --git a/src/xenia/gpu/gl4/gl4_shader.cc b/src/xenia/gpu/gl4/gl4_shader.cc index 2994ab627..a6b2df2ae 100644 --- a/src/xenia/gpu/gl4/gl4_shader.cc +++ b/src/xenia/gpu/gl4/gl4_shader.cc @@ -35,7 +35,6 @@ const std::string header = "#extension GL_ARB_explicit_uniform_location : require\n" "#extension GL_ARB_shading_language_420pack : require\n" "#extension GL_ARB_shader_storage_buffer_object : require\n" - "#extension GL_NV_shader_buffer_load : require\n" "precision highp float;\n" "precision highp int;\n" "layout(std140, column_major) uniform;\n" @@ -46,6 +45,7 @@ const std::string header = " vec4 viewport_offset;\n" " vec4 viewport_scale;\n" " vec4 alpha_test;\n" + " uvec2 texture_samplers[32];\n" " vec4 float_consts[512];\n" " uint fetch_consts[32 * 6];\n" " int bool_consts[8];\n" @@ -55,7 +55,9 @@ const std::string header = " vec4 o[16];\n" "};\n" "\n" - "uniform StateData* state;\n"; + "layout(binding = 0) buffer State {\n" + " StateData state;\n" + "};\n"; bool GL4Shader::PrepareVertexShader( const xenos::xe_gpu_program_cntl_t& program_cntl) { @@ -69,20 +71,20 @@ bool GL4Shader::PrepareVertexShader( // TODO(benvanik): piecewise viewport_enable -> offset/scale logic. " if (false) {\n" " } else {\n" - /*" pos.xy = pos.xy / vec2(state->window_offset.z / 2.0, " - "-state->window_offset.w / 2.0) + vec2(-1.0, 1.0);\n" + /*" pos.xy = pos.xy / vec2(state.window_offset.z / 2.0, " + "-state.window_offset.w / 2.0) + vec2(-1.0, 1.0);\n" " pos.zw = vec2(0.0, 1.0);\n"*/ " pos.xy = pos.xy / vec2(1280.0 / 2.0, " "-720.0 / 2.0) + vec2(-1.0, 1.0);\n" " //pos.zw = vec2(0.0, 1.0);\n" " }\n" - " pos.x = pos.x * state->viewport_scale.x + \n" - " state->viewport_offset.x;\n" - " pos.y = pos.y * state->viewport_scale.y + \n" - " state->viewport_offset.y;\n" - " pos.z = pos.z * state->viewport_scale.z + \n" - " state->viewport_offset.z;\n" - " pos.xy += state->window_offset.xy;\n" + " pos.x = pos.x * state.viewport_scale.x + \n" + " state.viewport_offset.x;\n" + " pos.y = pos.y * state.viewport_scale.y + \n" + " state.viewport_offset.y;\n" + " pos.z = pos.z * state.viewport_scale.z + \n" + " state.viewport_offset.z;\n" + " pos.xy += state.window_offset.xy;\n" " return pos;\n" "}\n"; std::string source = @@ -105,6 +107,8 @@ bool GL4Shader::PrepareVertexShader( " gl_Position = applyViewport(gl_Position);\n" "}\n"; + // glGetTextureSamplerHandleARB() + std::string translated_source = shader_translator_.TranslateVertexShader(this, program_cntl); if (translated_source.empty()) { @@ -135,9 +139,9 @@ bool GL4Shader::PreparePixelShader( "void processFragment();\n" "void main() {\n" " for (int i = 0; i < oC.length(); ++i) {\n" - " oC[i] = vec4(0.0, 0.0, 0.0, 0.0);\n" + " oC[i] = vec4(1.0, 0.0, 0.0, 1.0);\n" " }\n" + - (program_cntl.ps_export_depth ? " gl_FragDepth = 0.0\n" : "") + + (program_cntl.ps_export_depth ? " gl_FragDepth = 0.0;\n" : "") + " processFragment();\n" "}\n"; diff --git a/src/xenia/gpu/gl4/gl4_shader_translator.cc b/src/xenia/gpu/gl4/gl4_shader_translator.cc index f0b0c5bed..9ff76d3f0 100644 --- a/src/xenia/gpu/gl4/gl4_shader_translator.cc +++ b/src/xenia/gpu/gl4/gl4_shader_translator.cc @@ -28,25 +28,21 @@ static const char chan_names[] = { const char* GetVertexFormatTypeName(const GL4Shader::BufferDescElement& el) { switch (el.format) { case VertexFormat::k_32: - return el.is_signed ? "int" : "uint"; case VertexFormat::k_32_FLOAT: return "float"; case VertexFormat::k_16_16: case VertexFormat::k_32_32: - return el.is_signed ? "ivec2" : "uvec2"; case VertexFormat::k_16_16_FLOAT: case VertexFormat::k_32_32_FLOAT: return "vec2"; case VertexFormat::k_10_11_11: case VertexFormat::k_11_11_10: - return "int3"; // ? case VertexFormat::k_32_32_32_FLOAT: return "vec3"; case VertexFormat::k_8_8_8_8: case VertexFormat::k_2_10_10_10: case VertexFormat::k_16_16_16_16: case VertexFormat::k_32_32_32_32: - return el.is_signed ? "ivec4" : "uvec4"; case VertexFormat::k_16_16_16_16_FLOAT: case VertexFormat::k_32_32_32_32_FLOAT: return "vec4"; @@ -58,14 +54,13 @@ const char* GetVertexFormatTypeName(const GL4Shader::BufferDescElement& el) { } GL4ShaderTranslator::GL4ShaderTranslator() - : output_(kOutputCapacity), tex_fetch_index_(0), dwords_(nullptr) {} + : output_(kOutputCapacity), dwords_(nullptr) {} GL4ShaderTranslator::~GL4ShaderTranslator() = default; void GL4ShaderTranslator::Reset(GL4Shader* shader) { output_.Reset(); shader_type_ = shader->type(); - tex_fetch_index_ = 0; dwords_ = shader->data(); } @@ -76,8 +71,6 @@ std::string GL4ShaderTranslator::TranslateVertexShader( // Normal shaders only, for now. assert_true(program_cntl.vs_export_mode == 0); - AppendTextureHeader(vertex_shader->sampler_inputs()); - // Add vertex shader input. uint32_t el_index = 0; const auto& buffer_inputs = vertex_shader->buffer_inputs(); @@ -102,7 +95,7 @@ std::string GL4ShaderTranslator::TranslateVertexShader( // Add temporaries for any registers we may use. uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs; for (uint32_t n = 0; n <= temp_regs; n++) { - Append(" vec4 r%d = state->float_consts[%d];\n", n, n); + Append(" vec4 r%d = state.float_consts[%d];\n", n, n); } Append(" vec4 t;\n"); @@ -129,15 +122,13 @@ std::string GL4ShaderTranslator::TranslatePixelShader( // If the same PS is used with different VS that output different amounts // (and less than the number of required registers), things may die. - AppendTextureHeader(pixel_shader->sampler_inputs()); - // Pixel shader main() header. Append("void processFragment() {\n"); // Add temporary registers. uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs; for (uint32_t n = 0; n <= std::max(15u, temp_regs); n++) { - Append(" vec4 r%d = state->float_consts[%d];\n", n, n + 256); + Append(" vec4 r%d = state.float_consts[%d];\n", n, n + 256); } Append(" vec4 t;\n"); Append(" float s;\n"); // scalar result (used for RETAIN_PREV) @@ -161,42 +152,6 @@ std::string GL4ShaderTranslator::TranslatePixelShader( return output_.to_string(); } -void GL4ShaderTranslator::AppendTextureHeader( - const GL4Shader::SamplerInputs& sampler_inputs) { - bool fetch_setup[32] = {false}; - - // 1 texture per constant slot, 1 sampler per fetch. - for (uint32_t n = 0; n < sampler_inputs.count; n++) { - const auto& input = sampler_inputs.descs[n]; - const auto& fetch = input.tex_fetch; - - // Add texture, if needed. - if (!fetch_setup[fetch.const_idx]) { - fetch_setup[fetch.const_idx] = true; - const char* texture_type = nullptr; - switch (fetch.dimension) { - case DIMENSION_1D: - texture_type = "Texture1D"; - break; - default: - case DIMENSION_2D: - texture_type = "Texture2D"; - break; - case DIMENSION_3D: - texture_type = "Texture3D"; - break; - case DIMENSION_CUBE: - texture_type = "TextureCube"; - break; - } - Append("%s x_texture_%d;\n", texture_type, fetch.const_idx); - } - - // Add sampler. - Append("SamplerState x_sampler_%d;\n", n); - } -} - void GL4ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate, uint32_t abs_constants) { @@ -217,7 +172,7 @@ void GL4ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type, if (abs_constants) { Append("abs("); } - Append("state->float_consts[%u]", is_pixel_shader() ? num + 256 : num); + Append("state.float_consts[%u]", is_pixel_shader() ? num + 256 : num); if (abs_constants) { Append(")"); } @@ -258,9 +213,12 @@ void GL4ShaderTranslator::AppendDestRegName(uint32_t num, uint32_t dst_exp) { case 0: Append("oC[0]"); break; + case 61: + // Write to t, as we need to splice just x out of it. + Append("t"); + break; default: // TODO(benvanik): other render targets? - // TODO(benvanik): depth? assert_always(); break; } @@ -282,7 +240,10 @@ void GL4ShaderTranslator::AppendDestReg(uint32_t num, uint32_t mask, void GL4ShaderTranslator::AppendDestRegPost(uint32_t num, uint32_t mask, uint32_t dst_exp) { - if (mask != 0xF) { + if (num == 61) { + // gl_FragDepth handling to just get x from the temp result. + Append(" gl_FragDepth = t.x;\n"); + } else if (mask != 0xF) { // Masking. Append(" "); AppendDestRegName(num, dst_exp); @@ -399,7 +360,7 @@ bool GL4ShaderTranslator::TranslateALU_ADDv(const instr_alu_t& alu) { alu.abs_constants); Append(")"); if (alu.vector_clamp) { - Append(")"); + Append(", 0.0, 1.0)"); } Append(";\n"); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); @@ -685,7 +646,7 @@ bool GL4ShaderTranslator::TranslateALU_DOT4v(const instr_alu_t& alu) { if (alu.vector_clamp) { Append(", 0.0, 1.0)"); } - Append(";\n"); + Append(".xxxx;\n"); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); return true; } @@ -706,7 +667,7 @@ bool GL4ShaderTranslator::TranslateALU_DOT3v(const instr_alu_t& alu) { if (alu.vector_clamp) { Append(", 0.0, 1.0)"); } - Append(";\n"); + Append(".xxxx;\n"); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); return true; } @@ -730,7 +691,7 @@ bool GL4ShaderTranslator::TranslateALU_DOT2ADDv(const instr_alu_t& alu) { if (alu.vector_clamp) { Append(", 0.0, 1.0)"); } - Append(";\n"); + Append(".xxxx;\n"); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); return true; } @@ -1402,20 +1363,27 @@ bool GL4ShaderTranslator::TranslateVertexFetch(const instr_fetch_vtx_t* vtx, bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex, int sync) { int src_component_count = 0; + const char* sampler_type; switch (tex->dimension) { case DIMENSION_1D: src_component_count = 1; + sampler_type = "sampler1D"; break; - default: case DIMENSION_2D: src_component_count = 2; + sampler_type = "sampler2D"; break; case DIMENSION_3D: src_component_count = 3; + sampler_type = "sampler3D"; break; case DIMENSION_CUBE: src_component_count = 3; + sampler_type = "samplerCube"; break; + default: + assert_unhandled_case(tex->dimension); + return false; } // Disassemble. @@ -1500,10 +1468,10 @@ bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex, Append("\n"); // Translate. - Append(" t = "); - Append("x_texture_%d.Sample(x_sampler_%d, r%u.", tex->const_idx, - tex_fetch_index_++, // hacky way to line up to tex buffers - tex->src_reg); + // TODO(benvanik): if sampler == null, set to invalid color. + Append(" t = texture("); + Append("%s(state.texture_samplers[%d])", sampler_type, tex->const_idx & 0xF); + Append(", r%u.", tex->src_reg); src_swiz = tex->src_swiz; for (int i = 0; i < src_component_count; i++) { Append("%c", chan_names[src_swiz & 0x3]); @@ -1511,6 +1479,26 @@ bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex, } Append(");\n"); + // Output texture coordinates as color. + // TODO(benvanik): only if texture is invalid? + // Append(" t = vec4(r%u.", tex->src_reg); + // src_swiz = tex->src_swiz; + // for (int i = 0; i < src_component_count; i++) { + // Append("%c", chan_names[src_swiz & 0x3]); + // src_swiz >>= 2; + //} + // switch (src_component_count) { + // case 1: + // Append(", 0.0, 0.0, 1.0);\n"); + // break; + // case 2: + // Append(", 0.0, 1.0);\n"); + // break; + // case 3: + // Append(", 1.0);\n"); + // break; + //} + Append(" r%u.xyzw = vec4(", tex->dst_reg); uint32_t dst_swiz = tex->dst_swiz; for (int i = 0; i < 4; i++) { @@ -1524,6 +1512,7 @@ bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex, } else if ((dst_swiz & 0x7) == 6) { // ? Append("?"); + assert_always(); } else if ((dst_swiz & 0x7) == 7) { Append("r%u.%c", tex->dst_reg, chan_names[i]); } else { diff --git a/src/xenia/gpu/gl4/gl4_shader_translator.h b/src/xenia/gpu/gl4/gl4_shader_translator.h index 64da30b04..984483744 100644 --- a/src/xenia/gpu/gl4/gl4_shader_translator.h +++ b/src/xenia/gpu/gl4/gl4_shader_translator.h @@ -39,7 +39,6 @@ class GL4ShaderTranslator { protected: ShaderType shader_type_; - uint32_t tex_fetch_index_; const uint32_t* dwords_; static const int kOutputCapacity = 64 * 1024; @@ -56,8 +55,6 @@ class GL4ShaderTranslator { va_end(args); } - void AppendTextureHeader(const GL4Shader::SamplerInputs& sampler_inputs); - void AppendSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate, uint32_t abs); void AppendDestRegName(uint32_t num, uint32_t dst_exp); diff --git a/src/xenia/gpu/gl4/gl_context.cc b/src/xenia/gpu/gl4/gl_context.cc index 3f4f48a01..bd1e85160 100644 --- a/src/xenia/gpu/gl4/gl_context.cc +++ b/src/xenia/gpu/gl4/gl_context.cc @@ -115,6 +115,8 @@ bool GLContext::Initialize(HWND hwnd) { // Clearing errors. } + SetupDebugging(); + ClearCurrent(); return true; @@ -160,11 +162,120 @@ std::unique_ptr GLContext::CreateShared() { return nullptr; } + SetupDebugging(); + new_context->ClearCurrent(); return new_context; } +void GLContext::DebugMessage(GLenum source, GLenum type, GLuint id, + GLenum severity, GLsizei length, + const GLchar* message) { + const char* source_name = nullptr; + switch (source) { + case GL_DEBUG_SOURCE_API_ARB: + source_name = "OpenGL"; + break; + case GL_DEBUG_SOURCE_WINDOW_SYSTEM_ARB: + source_name = "Windows"; + break; + case GL_DEBUG_SOURCE_SHADER_COMPILER_ARB: + source_name = "Shader Compiler"; + break; + case GL_DEBUG_SOURCE_THIRD_PARTY_ARB: + source_name = "Third Party"; + break; + case GL_DEBUG_SOURCE_APPLICATION_ARB: + source_name = "Application"; + break; + case GL_DEBUG_SOURCE_OTHER_ARB: + source_name = "Other"; + break; + default: + source_name = "(unknown source)"; + break; + } + + const char* type_name = nullptr; + switch (type) { + case GL_DEBUG_TYPE_ERROR: + type_name = "error"; + break; + case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR: + type_name = "deprecated behavior"; + break; + case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR: + type_name = "undefined behavior"; + break; + case GL_DEBUG_TYPE_PORTABILITY: + type_name = "portability"; + break; + case GL_DEBUG_TYPE_PERFORMANCE: + type_name = "performance"; + break; + case GL_DEBUG_TYPE_OTHER: + type_name = "message"; + break; + case GL_DEBUG_TYPE_MARKER: + type_name = "marker"; + break; + case GL_DEBUG_TYPE_PUSH_GROUP: + type_name = "push group"; + break; + case GL_DEBUG_TYPE_POP_GROUP: + type_name = "pop group"; + break; + default: + type_name = "(unknown type)"; + break; + } + + const char* severity_name = nullptr; + switch (severity) { + case GL_DEBUG_SEVERITY_HIGH_ARB: + severity_name = "high"; + break; + case GL_DEBUG_SEVERITY_MEDIUM_ARB: + severity_name = "medium"; + break; + case GL_DEBUG_SEVERITY_LOW_ARB: + severity_name = "low"; + break; + case GL_DEBUG_SEVERITY_NOTIFICATION: + severity_name = "notification"; + break; + default: + severity_name = "(unknown severity)"; + break; + } + + XELOGE("GL4 %s: %s(%s) %d: %s", source_name, type_name, severity_name, id, + message); +} + +void GLAPIENTRY +GLContext::DebugMessageThunk(GLenum source, GLenum type, GLuint id, + GLenum severity, GLsizei length, + const GLchar* message, GLvoid* user_param) { + reinterpret_cast(user_param) + ->DebugMessage(source, type, id, severity, length, message); +} + +void GLContext::SetupDebugging() { + if (!FLAGS_gl_debug_output) { + return; + } + glEnable(GL_DEBUG_OUTPUT); + if (FLAGS_gl_debug_output_synchronous) { + glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); + } + glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, + GL_TRUE); + glDebugMessageCallback(reinterpret_cast(&DebugMessageThunk), + this); +} + bool GLContext::MakeCurrent() { if (FLAGS_thread_safe_gl) { global_gl_mutex_.lock(); diff --git a/src/xenia/gpu/gl4/gl_context.h b/src/xenia/gpu/gl4/gl_context.h index 68386b773..196c99a15 100644 --- a/src/xenia/gpu/gl4/gl_context.h +++ b/src/xenia/gpu/gl4/gl_context.h @@ -35,6 +35,13 @@ class GLContext { void ClearCurrent(); private: + void SetupDebugging(); + void DebugMessage(GLenum source, GLenum type, GLuint id, GLenum severity, + GLsizei length, const GLchar* message); + static void GLAPIENTRY + DebugMessageThunk(GLenum source, GLenum type, GLuint id, GLenum severity, + GLsizei length, const GLchar* message, GLvoid* user_param); + HWND hwnd_; HDC dc_; HGLRC glrc_; diff --git a/src/xenia/gpu/gl4/sources.gypi b/src/xenia/gpu/gl4/sources.gypi index 3f0c349ce..bdeeae80a 100644 --- a/src/xenia/gpu/gl4/sources.gypi +++ b/src/xenia/gpu/gl4/sources.gypi @@ -16,6 +16,8 @@ 'gl4_shader_translator.h', 'gl_context.cc', 'gl_context.h', + 'texture_cache.cc', + 'texture_cache.h', ], 'conditions': [ diff --git a/src/xenia/gpu/gl4/texture_cache.cc b/src/xenia/gpu/gl4/texture_cache.cc new file mode 100644 index 000000000..0aff172bd --- /dev/null +++ b/src/xenia/gpu/gl4/texture_cache.cc @@ -0,0 +1,497 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include +#include +#include + +namespace xe { +namespace gpu { +namespace gl4 { + +using namespace xe::gpu::xenos; + +extern "C" GLEWContext* glewGetContext(); +extern "C" WGLEWContext* wglewGetContext(); + +TextureCache::TextureCache() { + // +} + +TextureCache::~TextureCache() { Shutdown(); } + +bool TextureCache::Initialize(CircularBuffer* scratch_buffer) { + scratch_buffer_ = scratch_buffer; + return true; +} + +void TextureCache::Shutdown() { + Clear(); + // +} + +void TextureCache::Clear() { + for (auto& entry : entries_) { + for (auto& view : entry.views) { + glMakeTextureHandleNonResidentARB(view.texture_sampler_handle); + glDeleteSamplers(1, &view.sampler); + } + glDeleteTextures(1, &entry.base_texture); + } + entries_.clear(); +} + +TextureCache::EntryView* TextureCache::Demand(void* host_base, size_t length, + const TextureInfo& texture_info, + const SamplerInfo& sampler_info) { + entries_.emplace_back(Entry()); + auto& entry = entries_.back(); + entry.texture_info = texture_info; + + GLenum target; + switch (texture_info.dimension) { + case Dimension::k1D: + target = GL_TEXTURE_1D; + break; + case Dimension::k2D: + target = GL_TEXTURE_2D; + break; + case Dimension::k3D: + target = GL_TEXTURE_3D; + break; + case Dimension::kCube: + target = GL_TEXTURE_CUBE_MAP; + break; + } + + // Setup the base texture. + glCreateTextures(target, 1, &entry.base_texture); + if (!SetupTexture(entry.base_texture, texture_info)) { + PLOGE("Unable to setup texture parameters"); + return false; + } + + // Upload/convert. + bool uploaded = false; + switch (texture_info.dimension) { + case Dimension::k2D: + uploaded = UploadTexture2D(entry.base_texture, host_base, length, + texture_info, sampler_info); + break; + case Dimension::k1D: + case Dimension::k3D: + case Dimension::kCube: + assert_unhandled_case(texture_info.dimension); + return false; + } + if (!uploaded) { + PLOGE("Failed to convert/upload texture"); + return false; + } + + entry.views.emplace_back(EntryView()); + auto& entry_view = entry.views.back(); + entry_view.sampler_info = sampler_info; + + // Setup the sampler. + glCreateSamplers(1, &entry_view.sampler); + if (!SetupSampler(entry_view.sampler, texture_info, sampler_info)) { + PLOGE("Unable to setup texture sampler parameters"); + return false; + } + + // Get the uvec2 handle to the texture/sampler pair and make it resident. + // The handle can be passed directly to the shader. + entry_view.texture_sampler_handle = + glGetTextureSamplerHandleARB(entry.base_texture, entry_view.sampler); + if (!entry_view.texture_sampler_handle) { + return nullptr; + } + glMakeTextureHandleResidentARB(entry_view.texture_sampler_handle); + + return &entry_view; +} + +bool TextureCache::SetupTexture(GLuint texture, + const TextureInfo& texture_info) { + // TODO(benvanik): texture mip levels. + glTextureParameteri(texture, GL_TEXTURE_BASE_LEVEL, 0); + glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, 1); + + // Pre-shader swizzle. + // TODO(benvanik): can this be dynamic? Maybe per view? + // We may have to emulate this in the shader. + uint32_t swizzle_r = texture_info.swizzle & 0x7; + uint32_t swizzle_g = (texture_info.swizzle >> 3) & 0x7; + uint32_t swizzle_b = (texture_info.swizzle >> 6) & 0x7; + uint32_t swizzle_a = (texture_info.swizzle >> 9) & 0x7; + static const GLenum swizzle_map[] = { + GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA, GL_ZERO, GL_ONE, + }; + glTextureParameteri(texture, GL_TEXTURE_SWIZZLE_R, swizzle_map[swizzle_r]); + glTextureParameteri(texture, GL_TEXTURE_SWIZZLE_G, swizzle_map[swizzle_g]); + glTextureParameteri(texture, GL_TEXTURE_SWIZZLE_B, swizzle_map[swizzle_b]); + glTextureParameteri(texture, GL_TEXTURE_SWIZZLE_A, swizzle_map[swizzle_a]); + + return true; +} + +bool TextureCache::SetupSampler(GLuint sampler, const TextureInfo& texture_info, + const SamplerInfo& sampler_info) { + // TODO(benvanik): border color from texture fetch. + GLfloat border_color[4] = {0.0f}; + glSamplerParameterfv(sampler, GL_TEXTURE_BORDER_COLOR, border_color); + + // TODO(benvanik): setup LODs for mipmapping. + glSamplerParameterf(sampler, GL_TEXTURE_LOD_BIAS, 0.0f); + glSamplerParameterf(sampler, GL_TEXTURE_MIN_LOD, 0.0f); + glSamplerParameterf(sampler, GL_TEXTURE_MAX_LOD, 0.0f); + + // Texture wrapping modes. + // TODO(benvanik): not sure if the middle ones are correct. + static const GLenum wrap_map[] = { + GL_REPEAT, // + GL_MIRRORED_REPEAT, // + GL_CLAMP_TO_EDGE, // + GL_MIRROR_CLAMP_TO_EDGE, // + GL_CLAMP_TO_BORDER, // ? + GL_MIRROR_CLAMP_TO_BORDER_EXT, // ? + GL_CLAMP_TO_BORDER, // + GL_MIRROR_CLAMP_TO_BORDER_EXT, // + }; + glSamplerParameteri(sampler, GL_TEXTURE_WRAP_S, + wrap_map[sampler_info.clamp_u]); + glSamplerParameteri(sampler, GL_TEXTURE_WRAP_T, + wrap_map[sampler_info.clamp_v]); + glSamplerParameteri(sampler, GL_TEXTURE_WRAP_R, + wrap_map[sampler_info.clamp_w]); + + // Texture level filtering. + GLenum min_filter; + switch (sampler_info.min_filter) { + case ucode::TEX_FILTER_POINT: + switch (sampler_info.mip_filter) { + case ucode::TEX_FILTER_BASEMAP: + min_filter = GL_NEAREST; + break; + case ucode::TEX_FILTER_POINT: + // min_filter = GL_NEAREST_MIPMAP_NEAREST; + min_filter = GL_NEAREST; + break; + case ucode::TEX_FILTER_LINEAR: + // min_filter = GL_NEAREST_MIPMAP_LINEAR; + min_filter = GL_NEAREST; + break; + default: + assert_unhandled_case(sampler_info.mip_filter); + return false; + } + break; + case ucode::TEX_FILTER_LINEAR: + switch (sampler_info.mip_filter) { + case ucode::TEX_FILTER_BASEMAP: + min_filter = GL_LINEAR; + break; + case ucode::TEX_FILTER_POINT: + // min_filter = GL_LINEAR_MIPMAP_NEAREST; + min_filter = GL_LINEAR; + break; + case ucode::TEX_FILTER_LINEAR: + // min_filter = GL_LINEAR_MIPMAP_LINEAR; + min_filter = GL_LINEAR; + break; + default: + assert_unhandled_case(sampler_info.mip_filter); + return false; + } + break; + default: + assert_unhandled_case(sampler_info.min_filter); + return false; + } + GLenum mag_filter; + switch (sampler_info.mag_filter) { + case ucode::TEX_FILTER_POINT: + mag_filter = GL_NEAREST; + break; + case ucode::TEX_FILTER_LINEAR: + mag_filter = GL_LINEAR; + break; + default: + assert_unhandled_case(mag_filter); + return false; + } + glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, min_filter); + glSamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, mag_filter); + + // TODO(benvanik): anisotropic filtering. + // GL_TEXTURE_MAX_ANISOTROPY_EXT + + return true; +} + +void TextureSwap(Endian endianness, void* dest, const void* src, + size_t length) { + switch (endianness) { + case Endian::k8in16: + poly::copy_and_swap_16_aligned(reinterpret_cast(dest), + reinterpret_cast(src), + length / 2); + break; + case Endian::k8in32: + poly::copy_and_swap_32_aligned(reinterpret_cast(dest), + reinterpret_cast(src), + length / 4); + break; + case Endian::k16in32: + // TODO(benvanik): make more efficient. + /*for (uint32_t i = 0; i < length; i += 4, src += 4, dest += 4) { + uint32_t value = *(uint32_t*)src; + *(uint32_t*)dest = ((value >> 16) & 0xFFFF) | (value << 16); + }*/ + assert_always("16in32 not supported"); + break; + default: + case Endian::kUnspecified: + std::memcpy(dest, src, length); + break; + } +} + +bool TextureCache::UploadTexture2D(GLuint texture, void* host_base, + size_t length, + const TextureInfo& texture_info, + const SamplerInfo& sampler_info) { + assert_true(length == texture_info.input_length); + + GLenum internal_format = GL_RGBA8; + GLenum format = GL_RGBA; + GLenum type = GL_UNSIGNED_BYTE; + // https://code.google.com/p/glsnewton/source/browse/trunk/Source/uDDSLoader.pas?r=62 + // http://dench.flatlib.jp/opengl/textures + // http://fossies.org/linux/WebKit/Source/ThirdParty/ANGLE/src/libGLESv2/formatutils.cpp + switch (texture_info.format) { + case TextureFormat::k_8: + internal_format = GL_R8; + format = GL_R; + type = GL_UNSIGNED_BYTE; + break; + case TextureFormat::k_1_5_5_5: + internal_format = GL_RGB5_A1; + format = GL_BGRA; + type = GL_UNSIGNED_SHORT_1_5_5_5_REV; + break; + case TextureFormat::k_5_6_5: + internal_format = GL_RGB565; + format = GL_RGB; + type = GL_UNSIGNED_SHORT_5_6_5; + break; + case TextureFormat::k_2_10_10_10: + case TextureFormat::k_2_10_10_10_AS_16_16_16_16: + internal_format = GL_RGB10_A2; + format = GL_RGBA; + type = GL_UNSIGNED_INT_2_10_10_10_REV; + break; + case TextureFormat::k_10_11_11: + case TextureFormat::k_10_11_11_AS_16_16_16_16: + // ? + internal_format = GL_R11F_G11F_B10F; + format = GL_RGB; + type = GL_UNSIGNED_INT_10F_11F_11F_REV; + break; + case TextureFormat::k_11_11_10: + case TextureFormat::k_11_11_10_AS_16_16_16_16: + internal_format = GL_R11F_G11F_B10F; + format = GL_RGB; + type = GL_UNSIGNED_INT_10F_11F_11F_REV; + break; + case TextureFormat::k_8_8_8_8: + case TextureFormat::k_8_8_8_8_AS_16_16_16_16: + internal_format = GL_RGBA8; + format = GL_RGBA; + type = GL_UNSIGNED_BYTE; + break; + case TextureFormat::k_4_4_4_4: + internal_format = GL_RGBA4; + format = GL_RGBA; + type = GL_UNSIGNED_SHORT_4_4_4_4; + break; + case TextureFormat::k_16_FLOAT: + internal_format = GL_R16F; + format = GL_RED; + type = GL_HALF_FLOAT; + break; + case TextureFormat::k_16_16_FLOAT: + internal_format = GL_RG16F; + format = GL_RG; + type = GL_HALF_FLOAT; + break; + case TextureFormat::k_16_16_16_16_FLOAT: + internal_format = GL_RGBA16F; + format = GL_RGBA; + type = GL_HALF_FLOAT; + break; + case TextureFormat::k_32_FLOAT: + internal_format = GL_R32F; + format = GL_R; + type = GL_FLOAT; + break; + case TextureFormat::k_32_32_FLOAT: + internal_format = GL_RG32F; + format = GL_RG; + type = GL_FLOAT; + break; + case TextureFormat::k_32_32_32_FLOAT: + internal_format = GL_RGB32F; + format = GL_RGB; + type = GL_FLOAT; + break; + case TextureFormat::k_32_32_32_32_FLOAT: + internal_format = GL_RGBA32F; + format = GL_RGBA; + type = GL_FLOAT; + break; + case TextureFormat::k_DXT1: + case TextureFormat::k_DXT1_AS_16_16_16_16: + // or GL_COMPRESSED_RGB_S3TC_DXT1_EXT? + internal_format = format = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT; + break; + case TextureFormat::k_DXT2_3: + case TextureFormat::k_DXT2_3_AS_16_16_16_16: + internal_format = format = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT; + break; + case TextureFormat::k_DXT4_5: + case TextureFormat::k_DXT4_5_AS_16_16_16_16: + internal_format = format = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; + break; + case TextureFormat::k_24_8: + internal_format = GL_DEPTH24_STENCIL8; + format = GL_DEPTH_STENCIL; + type = GL_UNSIGNED_INT_24_8; + break; + case TextureFormat::k_24_8_FLOAT: + internal_format = GL_DEPTH24_STENCIL8; + format = GL_DEPTH_STENCIL; + type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV; + break; + default: + case TextureFormat::k_1_REVERSE: + case TextureFormat::k_1: + case TextureFormat::k_6_5_5: + case TextureFormat::k_8_A: + case TextureFormat::k_8_B: + case TextureFormat::k_8_8: + case TextureFormat::k_Cr_Y1_Cb_Y0: + case TextureFormat::k_Y1_Cr_Y0_Cb: + case TextureFormat::k_8_8_8_8_A: + case TextureFormat::k_16: + case TextureFormat::k_16_16: + case TextureFormat::k_16_16_16_16: + case TextureFormat::k_16_EXPAND: + case TextureFormat::k_16_16_EXPAND: + case TextureFormat::k_16_16_16_16_EXPAND: + case TextureFormat::k_32_32: + case TextureFormat::k_32_32_32_32: + case TextureFormat::k_32_AS_8: + case TextureFormat::k_32_AS_8_8: + case TextureFormat::k_16_MPEG: + case TextureFormat::k_16_16_MPEG: + case TextureFormat::k_8_INTERLACED: + case TextureFormat::k_32_AS_8_INTERLACED: + case TextureFormat::k_32_AS_8_8_INTERLACED: + case TextureFormat::k_16_INTERLACED: + case TextureFormat::k_16_MPEG_INTERLACED: + case TextureFormat::k_16_16_MPEG_INTERLACED: + case TextureFormat::k_DXN: + case TextureFormat::k_DXT3A: + case TextureFormat::k_DXT5A: + case TextureFormat::k_CTX1: + case TextureFormat::k_DXT3A_AS_1_1_1_1: + assert_unhandled_case(texture_info.format); + return false; + } + + size_t unpack_length = texture_info.input_length; + glTextureStorage2D(texture, 1, internal_format, + texture_info.size_2d.output_width, + texture_info.size_2d.output_height); + assert_true(unpack_length % 4 == 0); + + auto allocation = scratch_buffer_->Acquire(unpack_length); + + if (!texture_info.is_tiled) { + TextureSwap(texture_info.endianness, allocation.host_ptr, host_base, + unpack_length); + /*const uint8_t* src = reinterpret_cast(host_base); + uint8_t* dest = reinterpret_cast(allocation.host_ptr); + for (uint32_t y = 0; y < texture_info.size_2d.block_height; y++) { + for (uint32_t x = 0; x < texture_info.size_2d.logical_pitch; + x += texture_info.texel_pitch) { + TextureSwap(texture_info.endianness, dest + x, src + x, + texture_info.texel_pitch); + } + src += texture_info.size_2d.input_pitch; + dest += texture_info.size_2d.input_pitch; + }*/ + // std::memcpy(dest, src, unpack_length); + } else { + uint8_t* src = reinterpret_cast(host_base); + uint8_t* dest = reinterpret_cast(allocation.host_ptr); + uint32_t output_pitch = + (texture_info.size_2d.output_width / texture_info.block_size) * + texture_info.texel_pitch; + auto bpp = + (texture_info.texel_pitch >> 2) + + ((texture_info.texel_pitch >> 1) >> (texture_info.texel_pitch >> 2)); + for (uint32_t y = 0, output_base_offset = 0; + y < texture_info.size_2d.block_height; + y++, output_base_offset += output_pitch) { + auto input_base_offset = TextureInfo::TiledOffset2DOuter( + y, (texture_info.size_2d.input_width / texture_info.block_size), bpp); + for (uint32_t x = 0, output_offset = output_base_offset; + x < texture_info.size_2d.block_width; + x++, output_offset += texture_info.texel_pitch) { + auto input_offset = + TextureInfo::TiledOffset2DInner(x, y, bpp, input_base_offset) >> + bpp; + TextureSwap(texture_info.endianness, dest + output_offset, + src + input_offset * texture_info.texel_pitch, + texture_info.texel_pitch); + } + } + } + size_t unpack_offset = allocation.offset; + scratch_buffer_->Commit(std::move(allocation)); + + // glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE); + // glPixelStorei(GL_UNPACK_ALIGNMENT, texture_info.texel_pitch); + glPixelStorei(GL_UNPACK_ROW_LENGTH, texture_info.size_2d.input_width); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, scratch_buffer_->handle()); + if (texture_info.is_compressed) { + glCompressedTextureSubImage2D(texture, 0, 0, 0, + texture_info.size_2d.output_width, + texture_info.size_2d.output_height, format, + static_cast(unpack_length), + reinterpret_cast(unpack_offset)); + } else { + glTextureSubImage2D(texture, 0, 0, 0, texture_info.size_2d.output_width, + texture_info.size_2d.output_height, format, type, + reinterpret_cast(unpack_offset)); + } + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + return true; +} + +} // namespace gl4 +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/gl4/texture_cache.h b/src/xenia/gpu/gl4/texture_cache.h new file mode 100644 index 000000000..f4816d981 --- /dev/null +++ b/src/xenia/gpu/gl4/texture_cache.h @@ -0,0 +1,65 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_GL4_TEXTURE_CACHE_H_ +#define XENIA_GPU_GL4_TEXTURE_CACHE_H_ + +#include + +#include +#include +#include +#include + +namespace xe { +namespace gpu { +namespace gl4 { + +class TextureCache { + public: + struct EntryView { + SamplerInfo sampler_info; + GLuint sampler; + GLuint64 texture_sampler_handle; + }; + struct Entry { + TextureInfo texture_info; + GLuint base_texture; + std::vector views; + }; + + TextureCache(); + ~TextureCache(); + + bool Initialize(CircularBuffer* scratch_buffer); + void Shutdown(); + void Clear(); + + EntryView* Demand(void* host_base, size_t length, + const TextureInfo& texture_info, + const SamplerInfo& sampler_info); + + private: + bool SetupTexture(GLuint texture, const TextureInfo& texture_info); + bool SetupSampler(GLuint sampler, const TextureInfo& texture_info, + const SamplerInfo& sampler_info); + + bool UploadTexture2D(GLuint texture, void* host_base, size_t length, + const TextureInfo& texture_info, + const SamplerInfo& sampler_info); + + CircularBuffer* scratch_buffer_; + std::vector entries_; +}; + +} // namespace gl4 +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_GL4_TEXTURE_CACHE_H_ diff --git a/src/xenia/gpu/gl4/wgl_control.cc b/src/xenia/gpu/gl4/wgl_control.cc index 5571a1784..032ed7c9d 100644 --- a/src/xenia/gpu/gl4/wgl_control.cc +++ b/src/xenia/gpu/gl4/wgl_control.cc @@ -74,17 +74,32 @@ LRESULT WGLControl::WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) { switch (message) { case WM_PAINT: { - GLContextLock context_lock(&context_); - // TODO(benvanik): is viewport needed? - glViewport(0, 0, width_, height_); - float clear_color[] = {rand() / (float)RAND_MAX, 1.0f, 0, 1.0f}; - glClearNamedFramebufferfv(0, GL_COLOR, 0, clear_color); - if (current_paint_callback_) { - current_paint_callback_(); - current_paint_callback_ = nullptr; + { + GLContextLock context_lock(&context_); + wglSwapIntervalEXT(0); + + // TODO(benvanik): is viewport needed? + glViewport(0, 0, width_, height_); + float clear_color[] = {rand() / (float)RAND_MAX, 1.0f, 0, 1.0f}; + glClearNamedFramebufferfv(0, GL_COLOR, 0, clear_color); + + if (current_paint_callback_) { + current_paint_callback_(); + current_paint_callback_ = nullptr; + } + + // TODO(benvanik): profiler present. + // Profiler::Present(); + + // Hacky swap timer. + static int swap_count = 0; + glEnable(GL_SCISSOR_TEST); + glScissor(0, 0, 20, 20); + float red[] = {swap_count / 60.0f, 0, 0, 1.0f}; + swap_count = (swap_count + 1) % 60; + glClearNamedFramebufferfv(0, GL_COLOR, 0, red); + glDisable(GL_SCISSOR_TEST); } - // TODO(benvanik): profiler present. - // Profiler::Present(); SwapBuffers(context_.dc()); } break; } diff --git a/src/xenia/gpu/sampler_info.cc b/src/xenia/gpu/sampler_info.cc new file mode 100644 index 000000000..f260f7bfc --- /dev/null +++ b/src/xenia/gpu/sampler_info.cc @@ -0,0 +1,31 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +namespace xe { +namespace gpu { + +bool SamplerInfo::Prepare(const xenos::xe_gpu_texture_fetch_t& fetch, + const ucode::instr_fetch_tex_t& fetch_instr, + SamplerInfo* out_info) { + out_info->min_filter = static_cast( + fetch_instr.min_filter == 3 ? fetch.min_filter : fetch_instr.min_filter); + out_info->mag_filter = static_cast( + fetch_instr.mag_filter == 3 ? fetch.mag_filter : fetch_instr.mag_filter); + out_info->mip_filter = static_cast( + fetch_instr.mip_filter == 3 ? fetch.mip_filter : fetch_instr.mip_filter); + out_info->clamp_u = fetch.clamp_x; + out_info->clamp_v = fetch.clamp_y; + out_info->clamp_w = fetch.clamp_z; + return true; +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/sampler_info.h b/src/xenia/gpu/sampler_info.h new file mode 100644 index 000000000..9aa764117 --- /dev/null +++ b/src/xenia/gpu/sampler_info.h @@ -0,0 +1,41 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SAMPLER_INFO_H_ +#define XENIA_GPU_SAMPLER_INFO_H_ + +#include +#include + +namespace xe { +namespace gpu { + +struct SamplerInfo { + ucode::instr_tex_filter_t min_filter; + ucode::instr_tex_filter_t mag_filter; + ucode::instr_tex_filter_t mip_filter; + uint32_t clamp_u; + uint32_t clamp_v; + uint32_t clamp_w; + + static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch, + const ucode::instr_fetch_tex_t& fetch_instr, + SamplerInfo* out_info); + + bool operator==(const SamplerInfo& other) const { + return min_filter == other.min_filter && mag_filter == other.mag_filter && + mip_filter == other.mip_filter && clamp_u == other.clamp_u && + clamp_v == other.clamp_v && clamp_w == other.clamp_w; + } +}; + +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_SAMPLER_INFO_H_ diff --git a/src/xenia/gpu/shader.cc b/src/xenia/gpu/shader.cc index d2cb0bd5d..bbc84f0d6 100644 --- a/src/xenia/gpu/shader.cc +++ b/src/xenia/gpu/shader.cc @@ -172,6 +172,8 @@ void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) { return; } + assert_true(vtx->const_index <= 0x1F); + uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel; auto& inputs = buffer_inputs_; BufferDescElement* el = nullptr; @@ -240,10 +242,12 @@ void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) { void Shader::GatherTextureFetch(const instr_fetch_tex_t* tex) { // TODO(benvanik): check dest_swiz to see if we are writing anything. + assert_true(tex->const_idx < 0x1F); + assert_true(sampler_inputs_.count + 1 < poly::countof(sampler_inputs_.descs)); auto& input = sampler_inputs_.descs[sampler_inputs_.count++]; input.input_index = sampler_inputs_.count - 1; - input.fetch_slot = tex->const_idx & 0xF; // ? + input.fetch_slot = tex->const_idx & 0xF; // ?????????????????????????????? input.tex_fetch = *tex; // Format mangling, size estimation, etc. diff --git a/src/xenia/gpu/sources.gypi b/src/xenia/gpu/sources.gypi index 416884d5d..ec144c8af 100644 --- a/src/xenia/gpu/sources.gypi +++ b/src/xenia/gpu/sources.gypi @@ -9,8 +9,12 @@ 'register_file.cc', 'register_file.h', 'register_table.inc', + 'sampler_info.cc', + 'sampler_info.h', 'shader.cc', 'shader.h', + 'texture_info.cc', + 'texture_info.h', 'ucode.h', 'ucode_disassembler.cc', 'ucode_disassembler.h', diff --git a/src/xenia/gpu/texture_info.cc b/src/xenia/gpu/texture_info.cc new file mode 100644 index 000000000..8b0aaecae --- /dev/null +++ b/src/xenia/gpu/texture_info.cc @@ -0,0 +1,239 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include + +namespace xe { +namespace gpu { + +using namespace xe::gpu::ucode; +using namespace xe::gpu::xenos; + +bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch, + TextureInfo* out_info) { + // http://msdn.microsoft.com/en-us/library/windows/desktop/cc308051(v=vs.85).aspx + // a2xx_sq_surfaceformat + + auto& info = *out_info; + info.swizzle = fetch.swizzle; + + info.dimension = static_cast(fetch.dimension); + switch (info.dimension) { + case Dimension::k1D: + info.width = fetch.size_1d.width; + break; + case Dimension::k2D: + info.width = fetch.size_2d.width; + info.height = fetch.size_2d.height; + break; + case Dimension::k3D: + case Dimension::kCube: + info.width = fetch.size_3d.width; + info.height = fetch.size_3d.height; + info.depth = fetch.size_3d.depth; + break; + } + info.endianness = static_cast(fetch.endianness); + + info.block_size = 0; + info.texel_pitch = 0; + info.is_tiled = fetch.tiled; + info.is_compressed = false; + info.input_length = 0; + info.format = static_cast(fetch.format); + switch (fetch.format) { + case FMT_8: + info.block_size = 1; + info.texel_pitch = 1; + break; + case FMT_1_5_5_5: + info.block_size = 1; + info.texel_pitch = 2; + break; + case FMT_8_8_8_8: + case FMT_8_8_8_8_AS_16_16_16_16: + info.block_size = 1; + info.texel_pitch = 4; + break; + case FMT_4_4_4_4: + info.block_size = 1; + info.texel_pitch = 2; + break; + case FMT_16_16_16_16_FLOAT: + info.block_size = 1; + info.texel_pitch = 8; + break; + case FMT_32_FLOAT: + info.block_size = 1; + info.texel_pitch = 4; + break; + case FMT_DXT1: + info.block_size = 4; + info.texel_pitch = 8; + info.is_compressed = true; + break; + case FMT_DXT2_3: + case FMT_DXT4_5: + info.block_size = 4; + info.texel_pitch = 16; + info.is_compressed = true; + break; + case FMT_DXT1_AS_16_16_16_16: + // TODO(benvanik): conversion? + info.block_size = 4; + info.texel_pitch = 8; + info.is_compressed = true; + break; + case FMT_DXT2_3_AS_16_16_16_16: + case FMT_DXT4_5_AS_16_16_16_16: + // TODO(benvanik): conversion? + info.block_size = 4; + info.texel_pitch = 16; + info.is_compressed = true; + break; + case FMT_1_REVERSE: + case FMT_1: + case FMT_5_6_5: + case FMT_6_5_5: + case FMT_2_10_10_10: + case FMT_8_A: + case FMT_8_B: + case FMT_8_8: + case FMT_Cr_Y1_Cb_Y0: + case FMT_Y1_Cr_Y0_Cb: + case FMT_5_5_5_1: + case FMT_8_8_8_8_A: + case FMT_10_11_11: + case FMT_11_11_10: + case FMT_24_8: + case FMT_24_8_FLOAT: + case FMT_16: + case FMT_16_16: + case FMT_16_16_16_16: + case FMT_16_EXPAND: + case FMT_16_16_EXPAND: + case FMT_16_16_16_16_EXPAND: + case FMT_16_FLOAT: + case FMT_16_16_FLOAT: + case FMT_32: + case FMT_32_32: + case FMT_32_32_32_32: + case FMT_32_32_FLOAT: + case FMT_32_32_32_32_FLOAT: + case FMT_32_AS_8: + case FMT_32_AS_8_8: + case FMT_16_MPEG: + case FMT_16_16_MPEG: + case FMT_8_INTERLACED: + case FMT_32_AS_8_INTERLACED: + case FMT_32_AS_8_8_INTERLACED: + case FMT_16_INTERLACED: + case FMT_16_MPEG_INTERLACED: + case FMT_16_16_MPEG_INTERLACED: + case FMT_DXN: + case FMT_2_10_10_10_AS_16_16_16_16: + case FMT_10_11_11_AS_16_16_16_16: + case FMT_11_11_10_AS_16_16_16_16: + case FMT_32_32_32_FLOAT: + case FMT_DXT3A: + case FMT_DXT5A: + case FMT_CTX1: + case FMT_DXT3A_AS_1_1_1_1: + PLOGE("Unhandled texture format"); + return false; + default: + assert_unhandled_case(fetch.format); + return false; + } + + // Must be called here when we know the format. + switch (info.dimension) { + case Dimension::k1D: + info.CalculateTextureSizes1D(fetch); + break; + case Dimension::k2D: + info.CalculateTextureSizes2D(fetch); + break; + case Dimension::k3D: + // TODO(benvanik): calculate size. + return false; + case Dimension::kCube: + // TODO(benvanik): calculate size. + return false; + } + + return true; +} + +void TextureInfo::CalculateTextureSizes1D(const xe_gpu_texture_fetch_t& fetch) { + // ? + size_1d.width = fetch.size_1d.width; +} + +void TextureInfo::CalculateTextureSizes2D(const xe_gpu_texture_fetch_t& fetch) { + size_2d.logical_width = 1 + fetch.size_2d.width; + size_2d.logical_height = 1 + fetch.size_2d.height; + + size_2d.block_width = size_2d.logical_width / block_size; + size_2d.block_height = size_2d.logical_height / block_size; + + if (!is_compressed) { + // must be 32x32 but also must have a pitch that is a multiple of 256 bytes + uint32_t bytes_per_block = block_size * block_size * texel_pitch; + uint32_t width_multiple = 32; + if (bytes_per_block) { + uint32_t minimum_multiple = 256 / bytes_per_block; + if (width_multiple < minimum_multiple) { + width_multiple = minimum_multiple; + } + } + size_2d.input_width = poly::round_up(size_2d.logical_width, width_multiple); + size_2d.input_height = poly::round_up(size_2d.logical_height, 32); + size_2d.output_width = size_2d.logical_width; + size_2d.output_height = size_2d.logical_height; + } else { + // must be 128x128 + size_2d.input_width = poly::round_up(size_2d.logical_width, 128); + size_2d.input_height = poly::round_up(size_2d.logical_height, 128); + size_2d.output_width = poly::next_pow2(size_2d.logical_width); + size_2d.output_height = poly::next_pow2(size_2d.logical_height); + } + + size_2d.logical_pitch = (size_2d.logical_width / block_size) * texel_pitch; + size_2d.input_pitch = (size_2d.input_width / block_size) * texel_pitch; + + if (!is_tiled) { + input_length = size_2d.block_height * size_2d.logical_pitch; + } else { + input_length = size_2d.block_height * size_2d.logical_pitch; // ? + } +} + +// https://code.google.com/p/crunch/source/browse/trunk/inc/crn_decomp.h#4104 +uint32_t TextureInfo::TiledOffset2DOuter(uint32_t y, uint32_t width, + uint32_t log_bpp) { + uint32_t macro = ((y >> 5) * (width >> 5)) << (log_bpp + 7); + uint32_t micro = ((y & 6) << 2) << log_bpp; + return macro + ((micro & ~15) << 1) + (micro & 15) + + ((y & 8) << (3 + log_bpp)) + ((y & 1) << 4); +} + +uint32_t TextureInfo::TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp, + uint32_t base_offset) { + uint32_t macro = (x >> 5) << (bpp + 7); + uint32_t micro = (x & 7) << bpp; + uint32_t offset = base_offset + (macro + ((micro & ~15) << 1) + (micro & 15)); + return ((offset & ~511) << 3) + ((offset & 448) << 2) + (offset & 63) + + ((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6); +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/texture_info.h b/src/xenia/gpu/texture_info.h new file mode 100644 index 000000000..2cda83426 --- /dev/null +++ b/src/xenia/gpu/texture_info.h @@ -0,0 +1,140 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_TEXTURE_INFO_H_ +#define XENIA_GPU_TEXTURE_INFO_H_ + +#include +#include + +namespace xe { +namespace gpu { + +// a2xx_sq_surfaceformat +enum class TextureFormat : uint32_t { + k_1_REVERSE = 0, + k_1 = 1, + k_8 = 2, + k_1_5_5_5 = 3, + k_5_6_5 = 4, + k_6_5_5 = 5, + k_8_8_8_8 = 6, + k_2_10_10_10 = 7, + k_8_A = 8, + k_8_B = 9, + k_8_8 = 10, + k_Cr_Y1_Cb_Y0 = 11, + k_Y1_Cr_Y0_Cb = 12, + // ? hole + k_8_8_8_8_A = 14, + k_4_4_4_4 = 15, + k_10_11_11 = 16, + k_11_11_10 = 17, + k_DXT1 = 18, + k_DXT2_3 = 19, + k_DXT4_5 = 20, + // ? hole + k_24_8 = 22, + k_24_8_FLOAT = 23, + k_16 = 24, + k_16_16 = 25, + k_16_16_16_16 = 26, + k_16_EXPAND = 27, + k_16_16_EXPAND = 28, + k_16_16_16_16_EXPAND = 29, + k_16_FLOAT = 30, + k_16_16_FLOAT = 31, + k_16_16_16_16_FLOAT = 32, + k_32 = 33, + k_32_32 = 34, + k_32_32_32_32 = 35, + k_32_FLOAT = 36, + k_32_32_FLOAT = 37, + k_32_32_32_32_FLOAT = 38, + k_32_AS_8 = 39, + k_32_AS_8_8 = 40, + k_16_MPEG = 41, + k_16_16_MPEG = 42, + k_8_INTERLACED = 43, + k_32_AS_8_INTERLACED = 44, + k_32_AS_8_8_INTERLACED = 45, + k_16_INTERLACED = 46, + k_16_MPEG_INTERLACED = 47, + k_16_16_MPEG_INTERLACED = 48, + k_DXN = 49, + k_8_8_8_8_AS_16_16_16_16 = 50, + k_DXT1_AS_16_16_16_16 = 51, + k_DXT2_3_AS_16_16_16_16 = 52, + k_DXT4_5_AS_16_16_16_16 = 53, + k_2_10_10_10_AS_16_16_16_16 = 54, + k_10_11_11_AS_16_16_16_16 = 55, + k_11_11_10_AS_16_16_16_16 = 56, + k_32_32_32_FLOAT = 57, + k_DXT3A = 58, + k_DXT5A = 59, + k_CTX1 = 60, + k_DXT3A_AS_1_1_1_1 = 61, + + kUnknown = 0xFFFFFFFFu, +}; + +struct TextureInfo { + uint32_t swizzle; + Dimension dimension; + uint32_t width; + uint32_t height; + uint32_t depth; + uint32_t block_size; + uint32_t texel_pitch; + xenos::Endian endianness; + bool is_tiled; + bool is_compressed; + uint32_t input_length; + + TextureFormat format; + + union { + struct { + uint32_t width; + } size_1d; + struct { + uint32_t logical_width; + uint32_t logical_height; + uint32_t block_width; + uint32_t block_height; + uint32_t input_width; + uint32_t input_height; + uint32_t output_width; + uint32_t output_height; + uint32_t logical_pitch; + uint32_t input_pitch; + } size_2d; + struct { + } size_3d; + struct { + } size_cube; + }; + + static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch, + TextureInfo* out_info); + + static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width, + uint32_t log_bpp); + static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp, + uint32_t base_offset); + + private: + void CalculateTextureSizes1D(const xenos::xe_gpu_texture_fetch_t& fetch); + void CalculateTextureSizes2D(const xenos::xe_gpu_texture_fetch_t& fetch); +}; + +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_TEXTURE_INFO_H_ diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index f23ec50f4..e89e4ba97 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -35,6 +35,13 @@ enum class PrimitiveType : uint32_t { kQuadList = 0x0D, }; +enum class Dimension : uint32_t { + k1D = 0, + k2D = 1, + k3D = 2, + kCube = 3, +}; + namespace xenos { typedef enum {