diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index bc97be59d..628b2d871 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -132,8 +132,10 @@ void CommandProcessor::WorkerMain() { bool CommandProcessor::SetupGL() { // Uniform buffer that stores the per-draw state (constants, etc). - glGenBuffers(1, &uniform_data_buffer_); - glNamedBufferStorage(uniform_data_buffer_, 16 * 1024, nullptr, GL_MAP_WRITE_BIT); + glCreateBuffers(1, &uniform_data_buffer_); + glBindBuffer(GL_UNIFORM_BUFFER, uniform_data_buffer_); + glNamedBufferStorage(uniform_data_buffer_, 16 * 1024, nullptr, + GL_MAP_WRITE_BIT | GL_DYNAMIC_STORAGE_BIT); return true; } @@ -857,25 +859,21 @@ bool CommandProcessor::ExecutePacketType3_DRAW_INDX(RingbufferReader* reader, assert_always(); } - if (!PrepareDraw(&draw_command_)) { - PLOGE("Invalid DRAW_INDX; ignoring"); - return false; - } + PrepareDraw(&draw_command_); draw_command_.prim_type = prim_type; draw_command_.start_index = 0; draw_command_.index_count = index_count; draw_command_.base_vertex = 0; if (src_sel == 0x0) { // Indexed draw. - // TODO(benvanik): detect subregions of larger index buffers - /*driver_->PrepareDrawIndexBuffer( - draw_command_, index_base, index_size, - endianness, - index_32bit ? INDEX_FORMAT_32BIT : INDEX_FORMAT_16BIT);*/ - draw_command_.index_buffer = nullptr; + draw_command_.index_buffer.address = membase_ + index_base; + draw_command_.index_buffer.size = index_size; + draw_command_.index_buffer.endianess = index_endianness; + draw_command_.index_buffer.format = + index_32bit ? IndexFormat::kInt32 : IndexFormat::kInt16; } else if (src_sel == 0x2) { // Auto draw. - draw_command_.index_buffer = nullptr; + draw_command_.index_buffer.address = nullptr; } else { // Unknown source select. assert_always(); @@ -900,14 +898,12 @@ bool CommandProcessor::ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader, reader->CheckRead(indices_size / sizeof(uint32_t)); uint32_t index_ptr = reader->ptr(); reader->Advance(count - 1); - if (!PrepareDraw(&draw_command_)) { - return false; - } + PrepareDraw(&draw_command_); draw_command_.prim_type = prim_type; draw_command_.start_index = 0; draw_command_.index_count = index_count; draw_command_.base_vertex = 0; - draw_command_.index_buffer = nullptr; + draw_command_.index_buffer.address = nullptr; return IssueDraw(&draw_command_); } @@ -1056,26 +1052,62 @@ bool CommandProcessor::LoadShader(ShaderType shader_type, return true; } -bool CommandProcessor::PrepareDraw(DrawCommand* draw_command) { - SCOPE_profile_cpu_f("gpu"); +void CommandProcessor::PrepareDraw(DrawCommand* draw_command) { auto& regs = *register_file_; auto& cmd = *draw_command; // Reset the things we don't modify so that we have clean state. cmd.prim_type = PrimitiveType::kPointList; cmd.index_count = 0; - cmd.index_buffer = nullptr; + cmd.index_buffer.address = nullptr; // Generic stuff. cmd.start_index = regs[XE_GPU_REG_VGT_INDX_OFFSET].u32; cmd.base_vertex = 0; +} + +bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { + SCOPE_profile_cpu_f("gpu"); + auto& regs = *register_file_; + auto enable_mode = + static_cast(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); + if (enable_mode == ModeControl::kIgnore) { + // Ignored. + return true; + } else if (enable_mode == ModeControl::kCopy) { + // Special copy handling. + return IssueCopy(draw_command); + } if (!UpdateState(draw_command)) { + PLOGE("Unable to setup render state"); return false; } - if (!UpdateRenderTargets()) { + if (!UpdateRenderTargets(draw_command)) { + PLOGE("Unable to setup render targets"); return false; } + + // if (!PopulateShaders(draw_command)) { + // XELOGE("Unable to prepare draw shaders"); + // return false; + //} + // if (!PopulateSamplers(draw_command)) { + // XELOGE("Unable to prepare draw samplers"); + // return false; + //} + + // if (!PopulateIndexBuffer(draw_command)) { + // PLOGE("Unable to setup index buffer"); + // return false; + //} + // if (!PopulateVertexBuffers(draw_command)) { + // XELOGE("Unable to setup vertex buffers"); + // return false; + //} + + // TODO(benvanik): draw. + return true; } @@ -1116,7 +1148,7 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { "Need <=16k uniform data"); auto buffer_ptr = reinterpret_cast( - glMapNamedBufferRange(uniform_data_buffer_, 0, 0, + glMapNamedBufferRange(uniform_data_buffer_, 0, 16 * 1024, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT)); if (!buffer_ptr) { PLOGE("Unable to map uniform data buffer"); @@ -1150,7 +1182,7 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32; // 0 // Whether each of the viewport settings is enabled. - // We require it to be all or nothing right now. + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; bool vport_xscale_enable = (vte_control & (1 << 0)) > 0; bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0; @@ -1161,6 +1193,15 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { assert_true(vport_xscale_enable == vport_yscale_enable == vport_zscale_enable == vport_xoffset_enable == vport_yoffset_enable == vport_zoffset_enable); + // VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0. + // = false: multiply the X, Y coordinates by 1/W0. + bool vtx_xy_fmt = (vte_control >> 8) & 0x1; + // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. + // = false: multiply the Z coordinate by 1/W0. + bool vtx_z_fmt = (vte_control >> 9) & 0x1; + // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to + // get 1/W0. + bool vtx_w0_fmt = (vte_control >> 10) & 0x1; // TODO(benvanik): pass to shaders? disable transform? etc? glViewport(0, 0, 1280, 720); @@ -1298,24 +1339,24 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { glBlendColor(blend_color[0], blend_color[1], blend_color[2], blend_color[3]); static const GLenum compare_func_map[] = { - /* 0 */ GL_NEVER, - /* 1 */ GL_LESS, - /* 2 */ GL_EQUAL, - /* 3 */ GL_LEQUAL, - /* 4 */ GL_GREATER, - /* 5 */ GL_NOTEQUAL, - /* 6 */ GL_GEQUAL, - /* 7 */ GL_ALWAYS, + /* 0 */ GL_NEVER, + /* 1 */ GL_LESS, + /* 2 */ GL_EQUAL, + /* 3 */ GL_LEQUAL, + /* 4 */ GL_GREATER, + /* 5 */ GL_NOTEQUAL, + /* 6 */ GL_GEQUAL, + /* 7 */ GL_ALWAYS, }; static const GLenum stencil_op_map[] = { - /* 0 */ GL_KEEP, - /* 1 */ GL_ZERO, - /* 2 */ GL_REPLACE, - /* 3 */ GL_INCR_WRAP, - /* 4 */ GL_DECR_WRAP, - /* 5 */ GL_INVERT, - /* 6 */ GL_INCR, - /* 7 */ GL_DECR, + /* 0 */ GL_KEEP, + /* 1 */ GL_ZERO, + /* 2 */ GL_REPLACE, + /* 3 */ GL_INCR_WRAP, + /* 4 */ GL_DECR_WRAP, + /* 5 */ GL_INVERT, + /* 6 */ GL_INCR, + /* 7 */ GL_DECR, }; uint32_t depth_control = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; // A2XX_RB_DEPTHCONTROL_Z_ENABLE @@ -1382,18 +1423,172 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { return true; } -bool CommandProcessor::UpdateRenderTargets() { +bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) { auto& regs = *register_file_; + auto enable_mode = + static_cast(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); + + // RB_SURFACE_INFO + // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html + uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; + uint32_t surface_pitch = surface_info & 0x3FFF; + auto surface_msaa = static_cast((surface_info >> 16) & 0x3); + + // Get/create all color render targets, if we are using them. + // In depth-only mode we don't need them. + GLuint color_targets[4] = {0, 0, 0, 0}; + if (enable_mode == ModeControl::kColorDepth) { + uint32_t color_info[4] = { + regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32, + regs[XE_GPU_REG_RB_COLOR2_INFO].u32, + regs[XE_GPU_REG_RB_COLOR3_INFO].u32, + }; + for (int n = 0; n < poly::countof(color_info); n++) { + uint32_t color_base = color_info[n] & 0xFFF; + auto color_format = + static_cast((color_info[n] >> 16) & 0xF); + color_targets[n] = GetColorRenderTarget(surface_pitch, surface_msaa, + color_base, color_format); + } + } + + uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; + uint32_t depth_base = depth_info & 0xFFF; + auto depth_format = + static_cast((depth_info >> 16) & 0x1); + GLuint depth_target = GetDepthRenderTarget(surface_pitch, surface_msaa, + depth_base, depth_format); + // TODO(benvanik): when a game switches does it expect to keep the same + // depth buffer contents? + + // Get/create a framebuffer with the required targets. + GLuint framebuffer = GetFramebuffer(color_targets, depth_target); + glBindFramebuffer(GL_FRAMEBUFFER, framebuffer); + return true; } -bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { - SCOPE_profile_cpu_f("gpu"); - +bool CommandProcessor::IssueCopy(DrawCommand* draw_command) { + // uint32_t copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32; + // uint32_t copy_dest_height = (copy_dest_pitch >> 16) & 0x3FFF; + // copy_dest_pitch &= 0x3FFF; return true; } +GLuint CommandProcessor::GetColorRenderTarget(uint32_t pitch, + MsaaSamples samples, + uint32_t base, + ColorRenderTargetFormat format) { + // Because we don't know the height of anything, we allocate at full res. + // At 2560x2560, it's impossible for EDRAM to fit anymore. + uint32_t width = 2560; + uint32_t height = 2560; + + // NOTE: we strip gamma formats down to normal ones. + if (format == ColorRenderTargetFormat::k8888Gamma) { + format = ColorRenderTargetFormat::k8888; + } + + CachedColorRenderTarget* cached = nullptr; + for (auto& it = cached_color_render_targets_.begin(); + it != cached_color_render_targets_.end(); ++it) { + if (it->base == base && it->width == width && it->height == height && + it->format == format) { + return it->texture; + } + } + cached_color_render_targets_.push_back(CachedColorRenderTarget()); + cached = &cached_color_render_targets_.back(); + cached->base = base; + cached->width = width; + cached->height = height; + cached->format = format; + + GLenum internal_format; + switch (format) { + case ColorRenderTargetFormat::k8888: + case ColorRenderTargetFormat::k8888Gamma: + internal_format = GL_RGBA8; + break; + default: + assert_unhandled_case(format); + return 0; + } + + glCreateTextures(GL_TEXTURE_2D, 1, &cached->texture); + glTextureStorage2D(cached->texture, 1, internal_format, width, height); + + return cached->texture; +} + +GLuint CommandProcessor::GetDepthRenderTarget(uint32_t pitch, + MsaaSamples samples, + uint32_t base, + DepthRenderTargetFormat format) { + uint32_t width = 2560; + uint32_t height = 2560; + + CachedDepthRenderTarget* cached = nullptr; + for (auto& it = cached_depth_render_targets_.begin(); + it != cached_depth_render_targets_.end(); ++it) { + if (it->base == base && it->width == width && it->height == height && + it->format == format) { + return it->texture; + } + } + cached_depth_render_targets_.push_back(CachedDepthRenderTarget()); + cached = &cached_depth_render_targets_.back(); + cached->base = base; + cached->width = width; + cached->height = height; + cached->format = format; + + GLenum internal_format; + switch (format) { + case DepthRenderTargetFormat::kD24S8: + internal_format = GL_DEPTH24_STENCIL8; + break; + case DepthRenderTargetFormat::kD24FS8: + // TODO(benvanik): not supported in GL? + default: + assert_unhandled_case(format); + return 0; + } + + glCreateTextures(GL_TEXTURE_2D, 1, &cached->texture); + glTextureStorage2D(cached->texture, 1, internal_format, width, height); + + return cached->texture; +} + +GLuint CommandProcessor::GetFramebuffer(GLuint color_targets[4], + GLuint depth_target) { + CachedFramebuffer* cached = nullptr; + for (auto& it = cached_framebuffers_.begin(); + it != cached_framebuffers_.end(); ++it) { + if (it->depth_target == depth_target && + it->color_targets[0] == color_targets[0] && + it->color_targets[1] == color_targets[1] && + it->color_targets[2] == color_targets[2] && + it->color_targets[3] == color_targets[3]) { + return it->framebuffer; + } + } + cached_framebuffers_.push_back(CachedFramebuffer()); + cached = &cached_framebuffers_.back(); + glCreateFramebuffers(1, &cached->framebuffer); + for (int i = 0; i < 4; ++i) { + cached->color_targets[i] = color_targets[i]; + glNamedFramebufferTexture(cached->framebuffer, GL_COLOR_ATTACHMENT0 + i, + color_targets[i], 0); + } + cached->depth_target = depth_target; + glNamedFramebufferTexture(cached->framebuffer, GL_DEPTH_STENCIL_ATTACHMENT, + depth_target, 0); + return cached->framebuffer; +} + } // namespace gl4 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/gl4/command_processor.h b/src/xenia/gpu/gl4/command_processor.h index 5839d0c87..de71b15b0 100644 --- a/src/xenia/gpu/gl4/command_processor.h +++ b/src/xenia/gpu/gl4/command_processor.h @@ -41,13 +41,17 @@ struct DrawCommand { // Index buffer, if present. // If index_count > 0 but buffer is nullptr then auto draw. - //IndexBufferResource* index_buffer; - void* index_buffer; + struct { + const uint8_t* address; + size_t size; + xenos::Endian endianess; + xenos::IndexFormat format; + } index_buffer; // Vertex buffers. struct { uint32_t input_index; - //VertexBufferResource* buffer; + // VertexBufferResource* buffer; uint32_t stride; uint32_t offset; } vertex_buffers[96]; @@ -56,8 +60,8 @@ struct DrawCommand { // Texture samplers. struct SamplerInput { uint32_t input_index; - //TextureResource* texture; - //SamplerStateResource* sampler_state; + // TextureResource* texture; + // SamplerStateResource* sampler_state; }; SamplerInput vertex_shader_samplers[32]; size_t vertex_shader_sampler_count; @@ -156,10 +160,21 @@ class CommandProcessor { bool LoadShader(ShaderType shader_type, const uint32_t* address, uint32_t dword_count); - bool PrepareDraw(DrawCommand* draw_command); - bool UpdateState(DrawCommand* draw_command); - bool UpdateRenderTargets(); + void PrepareDraw(DrawCommand* draw_command); bool IssueDraw(DrawCommand* draw_command); + bool UpdateState(DrawCommand* draw_command); + bool UpdateRenderTargets(DrawCommand* draw_command); + // bool PopulateIndexBuffer(DrawCommand* draw_command); + // bool PopulateVertexBuffers(DrawCommand* draw_command); + bool IssueCopy(DrawCommand* draw_command); + + GLuint GetFramebuffer(GLuint color_targets[4], GLuint depth_target); + GLuint GetColorRenderTarget(uint32_t pitch, xenos::MsaaSamples samples, + uint32_t base, + xenos::ColorRenderTargetFormat format); + GLuint GetDepthRenderTarget(uint32_t pitch, xenos::MsaaSamples samples, + uint32_t base, + xenos::DepthRenderTargetFormat format); Memory* memory_; uint8_t* membase_; @@ -195,6 +210,29 @@ class CommandProcessor { GLuint uniform_data_buffer_; DrawCommand draw_command_; + + struct CachedFramebuffer { + GLuint color_targets[4]; + GLuint depth_target; + GLuint framebuffer; + }; + std::vector cached_framebuffers_; + struct CachedColorRenderTarget { + uint32_t base; + uint32_t width; + uint32_t height; + xenos::ColorRenderTargetFormat format; + GLuint texture; + }; + std::vector cached_color_render_targets_; + struct CachedDepthRenderTarget { + uint32_t base; + uint32_t width; + uint32_t height; + xenos::DepthRenderTargetFormat format; + GLuint texture; + }; + std::vector cached_depth_render_targets_; }; } // namespace gl4 diff --git a/src/xenia/gpu/gl4/gl_context.cc b/src/xenia/gpu/gl4/gl_context.cc index 594bde6ef..e20565557 100644 --- a/src/xenia/gpu/gl4/gl_context.cc +++ b/src/xenia/gpu/gl4/gl_context.cc @@ -106,6 +106,10 @@ bool GLContext::Initialize(HWND hwnd) { return false; } + while (glGetError()) { + // Clearing errors. + } + return true; } @@ -145,6 +149,10 @@ std::unique_ptr GLContext::CreateShared() { new_context->ClearCurrent(); MakeCurrent(); + while (glGetError()) { + // Clearing errors. + } + return new_context; } diff --git a/src/xenia/gpu/shader.cc b/src/xenia/gpu/shader.cc index 009e93af7..334331716 100644 --- a/src/xenia/gpu/shader.cc +++ b/src/xenia/gpu/shader.cc @@ -22,6 +22,9 @@ Shader::Shader(ShaderType shader_type, uint64_t data_hash, : shader_type_(shader_type), data_hash_(data_hash), is_valid_(false) { data_.resize(dword_count); poly::copy_and_swap(data_.data(), dword_ptr, dword_count); + std::memset(&alloc_counts_, 0, sizeof(alloc_counts_)); + std::memset(&buffer_inputs_, 0, sizeof(buffer_inputs_)); + std::memset(&sampler_inputs_, 0, sizeof(sampler_inputs_)); // Disassemble ucode and stash. // TODO(benvanik): debug only. diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 449a7a9be..54e706688 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -51,6 +51,35 @@ enum class Endian : uint32_t { k16in32 = 3, }; +enum class IndexFormat : uint32_t { + kInt16, + kInt32, +}; + +enum class MsaaSamples : uint32_t { + k1X = 0, + k2X = 1, + k4X = 2, +}; + +enum class ColorRenderTargetFormat : uint32_t { + k8888 = 0, // D3DFMT_A8R8G8B8 (or ABGR?) + k8888Gamma = 1, // D3DFMT_A8R8G8B8 with gamma correction + // ... +}; + +enum class DepthRenderTargetFormat : uint32_t { + kD24S8 = 0, + kD24FS8 = 1, +}; + +enum class ModeControl : uint32_t { + kIgnore = 0, + kColorDepth = 4, + kDepth = 5, + kCopy = 6, +}; + #define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \ (((XE_GPU_SWIZZLE_##x) << 0) | ((XE_GPU_SWIZZLE_##y) << 3) | \ ((XE_GPU_SWIZZLE_##z) << 6) | ((XE_GPU_SWIZZLE_##w) << 9))