From 8a099ac99f61871f0492864d7e95a5922e57223d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 15 Apr 2019 12:43:37 -0400 Subject: [PATCH] Correct Kepler Memory on Linear Pushes. --- src/video_core/engines/kepler_memory.cpp | 40 +++++++++++++++--------- src/video_core/engines/kepler_memory.h | 24 ++++++++++++-- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index cd51a31d78..3ed28f4a78 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -10,6 +10,8 @@ #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" +#include "video_core/textures/convert.h" +#include "video_core/textures/decoders.h" namespace Tegra::Engines { @@ -27,30 +29,40 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { switch (method_call.method) { case KEPLERMEMORY_REG_INDEX(exec): { - state.write_offset = 0; + ProcessExec(); break; } case KEPLERMEMORY_REG_INDEX(data): { - ProcessData(method_call.argument); + ProcessData(method_call.argument, method_call.IsLastCall()); break; } } } -void KeplerMemory::ProcessData(u32 data) { - ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); - ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); +void KeplerMemory::ProcessExec() { + state.write_offset = 0; + state.copy_size = regs.line_length_in * regs.line_count; + state.inner_buffer.resize(state.copy_size); +} - // We have to invalidate the destination region to evict any outdated surfaces from the cache. - // We do this before actually writing the new data because the destination address might - // contain a dirty surface that will have to be written back to memory. - const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)}; - rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32)); - memory_manager.Write(address, data); +void KeplerMemory::ProcessData(u32 data, bool is_last_call) { + const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); + std::memcpy(&state.inner_buffer[state.write_offset], &data, sub_copy_size); + state.write_offset += sub_copy_size; + if (is_last_call) { + UNIMPLEMENTED_IF_MSG(regs.exec.linear == 0, "Block Linear Copy is not implemented"); + if (regs.exec.linear != 0) { + const GPUVAddr address{regs.dest.Address()}; + const auto host_ptr = memory_manager.GetPointer(address); + // We have to invalidate the destination region to evict any outdated surfaces from the + // cache. We do this before actually writing the new data because the destination + // address might contain a dirty surface that will have to be written back to memory. - system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); - - state.write_offset++; + rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), state.copy_size); + std::memcpy(host_ptr, state.inner_buffer.data(), state.copy_size); + system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); + } + } } } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 78b6c3e458..5f892ddad1 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -6,6 +6,7 @@ #include #include +#include #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -51,7 +52,11 @@ public: u32 address_high; u32 address_low; u32 pitch; - u32 block_dimensions; + union { + BitField<0, 4, u32> block_width; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_depth; + }; u32 width; u32 height; u32 depth; @@ -63,6 +68,18 @@ public: return static_cast((static_cast(address_high) << 32) | address_low); } + + u32 BlockWidth() const { + return 1U << block_width.Value(); + } + + u32 BlockHeight() const { + return 1U << block_height.Value(); + } + + u32 BlockDepth() const { + return 1U << block_depth.Value(); + } } dest; struct { @@ -81,6 +98,8 @@ public: struct { u32 write_offset = 0; + u32 copy_size = 0; + std::vector inner_buffer; } state{}; private: @@ -88,7 +107,8 @@ private: VideoCore::RasterizerInterface& rasterizer; MemoryManager& memory_manager; - void ProcessData(u32 data); + void ProcessExec(); + void ProcessData(u32 data, bool is_last_call); }; #define ASSERT_REG_POSITION(field_name, position) \