mirror of https://git.suyu.dev/suyu/suyu
Merge pull request #6799 from ameerj/vp9-fixes
nvdec: Fix VP9 reference frame refreshes
This commit is contained in:
commit
f183668a87
|
@ -166,8 +166,6 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
|
|||
LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size);
|
||||
} else {
|
||||
cmd_buffer.map_address = object->dma_map_addr;
|
||||
AddBufferMap(object->dma_map_addr, object->size, object->addr,
|
||||
object->status == nvmap::Object::Status::Allocated);
|
||||
}
|
||||
}
|
||||
std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBuffer));
|
||||
|
@ -178,30 +176,11 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
|
|||
}
|
||||
|
||||
NvResult nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
|
||||
IoctlMapBuffer params{};
|
||||
std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer));
|
||||
std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
|
||||
SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
|
||||
|
||||
auto& gpu = system.GPU();
|
||||
|
||||
for (auto& cmd_buffer : cmd_buffer_handles) {
|
||||
const auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)};
|
||||
if (!object) {
|
||||
LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle);
|
||||
std::memcpy(output.data(), ¶ms, output.size());
|
||||
return NvResult::InvalidState;
|
||||
}
|
||||
if (const auto size{RemoveBufferMap(object->dma_map_addr)}; size) {
|
||||
gpu.MemoryManager().Unmap(object->dma_map_addr, *size);
|
||||
} else {
|
||||
// This occurs quite frequently, however does not seem to impact functionality
|
||||
LOG_DEBUG(Service_NVDRV, "invalid offset=0x{:X} dma=0x{:X}", object->addr,
|
||||
object->dma_map_addr);
|
||||
}
|
||||
object->dma_map_addr = 0;
|
||||
}
|
||||
// This is intntionally stubbed.
|
||||
// Skip unmapping buffers here, as to not break the continuity of the VP9 reference frame
|
||||
// addresses, and risk invalidating data before the async GPU thread is done with it
|
||||
std::memset(output.data(), 0, output.size());
|
||||
LOG_DEBUG(Service_NVDRV, "(STUBBED) called");
|
||||
return NvResult::Success;
|
||||
}
|
||||
|
||||
|
@ -212,33 +191,4 @@ NvResult nvhost_nvdec_common::SetSubmitTimeout(const std::vector<u8>& input,
|
|||
return NvResult::Success;
|
||||
}
|
||||
|
||||
std::optional<nvhost_nvdec_common::BufferMap> nvhost_nvdec_common::FindBufferMap(
|
||||
GPUVAddr gpu_addr) const {
|
||||
const auto it = std::find_if(
|
||||
buffer_mappings.begin(), buffer_mappings.upper_bound(gpu_addr), [&](const auto& entry) {
|
||||
return (gpu_addr >= entry.second.StartAddr() && gpu_addr < entry.second.EndAddr());
|
||||
});
|
||||
|
||||
ASSERT(it != buffer_mappings.end());
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void nvhost_nvdec_common::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr,
|
||||
bool is_allocated) {
|
||||
buffer_mappings.insert_or_assign(gpu_addr, BufferMap{gpu_addr, size, cpu_addr, is_allocated});
|
||||
}
|
||||
|
||||
std::optional<std::size_t> nvhost_nvdec_common::RemoveBufferMap(GPUVAddr gpu_addr) {
|
||||
const auto iter{buffer_mappings.find(gpu_addr)};
|
||||
if (iter == buffer_mappings.end()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
std::size_t size = 0;
|
||||
if (iter->second.IsAllocated()) {
|
||||
size = iter->second.Size();
|
||||
}
|
||||
buffer_mappings.erase(iter);
|
||||
return size;
|
||||
}
|
||||
|
||||
} // namespace Service::Nvidia::Devices
|
||||
|
|
|
@ -23,45 +23,6 @@ public:
|
|||
~nvhost_nvdec_common() override;
|
||||
|
||||
protected:
|
||||
class BufferMap final {
|
||||
public:
|
||||
constexpr BufferMap() = default;
|
||||
|
||||
constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_)
|
||||
: start_addr{start_addr_}, end_addr{start_addr_ + size_} {}
|
||||
|
||||
constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_, VAddr cpu_addr_,
|
||||
bool is_allocated_)
|
||||
: start_addr{start_addr_}, end_addr{start_addr_ + size_}, cpu_addr{cpu_addr_},
|
||||
is_allocated{is_allocated_} {}
|
||||
|
||||
constexpr VAddr StartAddr() const {
|
||||
return start_addr;
|
||||
}
|
||||
|
||||
constexpr VAddr EndAddr() const {
|
||||
return end_addr;
|
||||
}
|
||||
|
||||
constexpr std::size_t Size() const {
|
||||
return end_addr - start_addr;
|
||||
}
|
||||
|
||||
constexpr VAddr CpuAddr() const {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
constexpr bool IsAllocated() const {
|
||||
return is_allocated;
|
||||
}
|
||||
|
||||
private:
|
||||
GPUVAddr start_addr{};
|
||||
GPUVAddr end_addr{};
|
||||
VAddr cpu_addr{};
|
||||
bool is_allocated{};
|
||||
};
|
||||
|
||||
struct IoctlSetNvmapFD {
|
||||
s32_le nvmap_fd{};
|
||||
};
|
||||
|
@ -154,17 +115,11 @@ protected:
|
|||
NvResult UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
|
||||
NvResult SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output);
|
||||
|
||||
std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const;
|
||||
void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
|
||||
std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
|
||||
|
||||
s32_le nvmap_fd{};
|
||||
u32_le submit_timeout{};
|
||||
std::shared_ptr<nvmap> nvmap_dev;
|
||||
SyncpointManager& syncpoint_manager;
|
||||
std::array<u32, MaxSyncPoints> device_syncpoints{};
|
||||
// This is expected to be ordered, therefore we must use a map, not unordered_map
|
||||
std::map<GPUVAddr, BufferMap> buffer_mappings;
|
||||
};
|
||||
}; // namespace Devices
|
||||
} // namespace Service::Nvidia
|
||||
|
|
|
@ -11,6 +11,9 @@
|
|||
|
||||
namespace Tegra::Decoder {
|
||||
namespace {
|
||||
constexpr u32 diff_update_probability = 252;
|
||||
constexpr u32 frame_sync_code = 0x498342;
|
||||
|
||||
// Default compressed header probabilities once frame context resets
|
||||
constexpr Vp9EntropyProbs default_probs{
|
||||
.y_mode_prob{
|
||||
|
@ -361,8 +364,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state)
|
|||
InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
|
||||
|
||||
// surface_luma_offset[0:3] contains the address of the reference frame offsets in the following
|
||||
// order: last, golden, altref, current. It may be worthwhile to track the updates done here
|
||||
// to avoid buffering frame data needed for reference frame updating in the header composition.
|
||||
// order: last, golden, altref, current.
|
||||
std::copy(state.surface_luma_offset.begin(), state.surface_luma_offset.begin() + 4,
|
||||
vp9_info.frame_offsets.begin());
|
||||
|
||||
|
@ -384,33 +386,18 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state)
|
|||
gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(),
|
||||
current_frame.info.bitstream_size);
|
||||
}
|
||||
// Buffer two frames, saving the last show frame info
|
||||
if (!next_next_frame.bit_stream.empty()) {
|
||||
if (!next_frame.bit_stream.empty()) {
|
||||
Vp9FrameContainer temp{
|
||||
.info = current_frame.info,
|
||||
.bit_stream = std::move(current_frame.bit_stream),
|
||||
};
|
||||
next_next_frame.info.show_frame = current_frame.info.last_frame_shown;
|
||||
current_frame.info = next_next_frame.info;
|
||||
current_frame.bit_stream = std::move(next_next_frame.bit_stream);
|
||||
next_next_frame = std::move(temp);
|
||||
|
||||
if (!next_frame.bit_stream.empty()) {
|
||||
Vp9FrameContainer temp2{
|
||||
.info = current_frame.info,
|
||||
.bit_stream = std::move(current_frame.bit_stream),
|
||||
};
|
||||
next_frame.info.show_frame = current_frame.info.last_frame_shown;
|
||||
current_frame.info = next_frame.info;
|
||||
current_frame.bit_stream = std::move(next_frame.bit_stream);
|
||||
next_frame = std::move(temp2);
|
||||
} else {
|
||||
next_frame.info = current_frame.info;
|
||||
next_frame.bit_stream = std::move(current_frame.bit_stream);
|
||||
}
|
||||
next_frame.info.show_frame = current_frame.info.last_frame_shown;
|
||||
current_frame.info = next_frame.info;
|
||||
current_frame.bit_stream = std::move(next_frame.bit_stream);
|
||||
next_frame = std::move(temp);
|
||||
} else {
|
||||
next_next_frame.info = current_frame.info;
|
||||
next_next_frame.bit_stream = std::move(current_frame.bit_stream);
|
||||
next_frame.info = current_frame.info;
|
||||
next_frame.bit_stream = std::move(current_frame.bit_stream);
|
||||
}
|
||||
return current_frame;
|
||||
}
|
||||
|
@ -613,86 +600,64 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
|
|||
|
||||
// Reset context
|
||||
prev_frame_probs = default_probs;
|
||||
swap_next_golden = false;
|
||||
swap_ref_indices = false;
|
||||
loop_filter_ref_deltas.fill(0);
|
||||
loop_filter_mode_deltas.fill(0);
|
||||
|
||||
// allow frames offsets to stabilize before checking for golden frames
|
||||
grace_period = 4;
|
||||
|
||||
// On key frames, all frame slots are set to the current frame,
|
||||
// so the value of the selected slot doesn't really matter.
|
||||
frame_ctxs.fill({current_frame_number, false, default_probs});
|
||||
frame_ctxs.fill(default_probs);
|
||||
|
||||
// intra only, meaning the frame can be recreated with no other references
|
||||
current_frame_info.intra_only = true;
|
||||
|
||||
} else {
|
||||
|
||||
if (!current_frame_info.show_frame) {
|
||||
uncomp_writer.WriteBit(current_frame_info.intra_only);
|
||||
if (!current_frame_info.last_frame_was_key) {
|
||||
swap_next_golden = !swap_next_golden;
|
||||
}
|
||||
} else {
|
||||
current_frame_info.intra_only = false;
|
||||
}
|
||||
if (!current_frame_info.error_resilient_mode) {
|
||||
uncomp_writer.WriteU(0, 2); // Reset frame context.
|
||||
}
|
||||
|
||||
// Last, Golden, Altref frames
|
||||
std::array<s32, 3> ref_frame_index{0, 1, 2};
|
||||
|
||||
// Set when next frame is hidden
|
||||
// altref and golden references are swapped
|
||||
if (swap_next_golden) {
|
||||
ref_frame_index = std::array<s32, 3>{0, 2, 1};
|
||||
const auto& curr_offsets = current_frame_info.frame_offsets;
|
||||
const auto& next_offsets = next_frame.info.frame_offsets;
|
||||
const bool ref_frames_different = curr_offsets[1] != curr_offsets[2];
|
||||
const bool next_references_swap =
|
||||
(next_offsets[1] == curr_offsets[2]) || (next_offsets[2] == curr_offsets[1]);
|
||||
const bool needs_ref_swap = ref_frames_different && next_references_swap;
|
||||
if (needs_ref_swap) {
|
||||
swap_ref_indices = !swap_ref_indices;
|
||||
}
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> refresh_last;
|
||||
BitField<1, 2, u32> refresh_golden;
|
||||
BitField<2, 1, u32> refresh_alt;
|
||||
} refresh_frame_flags;
|
||||
|
||||
// update Last Frame
|
||||
u64 refresh_frame_flags = 1;
|
||||
|
||||
// golden frame may refresh, determined if the next golden frame offset is changed
|
||||
bool golden_refresh = false;
|
||||
if (grace_period <= 0) {
|
||||
for (s32 index = 1; index < 3; ++index) {
|
||||
if (current_frame_info.frame_offsets[index] !=
|
||||
next_frame.info.frame_offsets[index]) {
|
||||
current_frame_info.refresh_frame[index] = true;
|
||||
golden_refresh = true;
|
||||
grace_period = 3;
|
||||
}
|
||||
refresh_frame_flags.raw = 0;
|
||||
for (u32 index = 0; index < 3; ++index) {
|
||||
// Refresh indices that use the current frame as an index
|
||||
if (curr_offsets[3] == next_offsets[index]) {
|
||||
refresh_frame_flags.raw |= 1u << index;
|
||||
}
|
||||
}
|
||||
|
||||
if (current_frame_info.show_frame &&
|
||||
(!next_frame.info.show_frame || next_frame.info.is_key_frame)) {
|
||||
// Update golden frame
|
||||
refresh_frame_flags = swap_next_golden ? 2 : 4;
|
||||
if (swap_ref_indices) {
|
||||
const u32 temp = refresh_frame_flags.refresh_golden;
|
||||
refresh_frame_flags.refresh_golden.Assign(refresh_frame_flags.refresh_alt.Value());
|
||||
refresh_frame_flags.refresh_alt.Assign(temp);
|
||||
}
|
||||
|
||||
if (!current_frame_info.show_frame) {
|
||||
// Update altref
|
||||
refresh_frame_flags = swap_next_golden ? 2 : 4;
|
||||
} else if (golden_refresh) {
|
||||
refresh_frame_flags = 3;
|
||||
}
|
||||
|
||||
if (current_frame_info.intra_only) {
|
||||
uncomp_writer.WriteU(frame_sync_code, 24);
|
||||
uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
|
||||
uncomp_writer.WriteU(refresh_frame_flags.raw, 8);
|
||||
uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
|
||||
uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
|
||||
uncomp_writer.WriteBit(false); // Render and frame size different.
|
||||
} else {
|
||||
uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
|
||||
|
||||
for (s32 index = 1; index < 4; index++) {
|
||||
const bool swap_indices = needs_ref_swap ^ swap_ref_indices;
|
||||
const auto ref_frame_index = swap_indices ? std::array{0, 2, 1} : std::array{0, 1, 2};
|
||||
uncomp_writer.WriteU(refresh_frame_flags.raw, 8);
|
||||
for (size_t index = 1; index < 4; index++) {
|
||||
uncomp_writer.WriteU(ref_frame_index[index - 1], 3);
|
||||
uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1);
|
||||
}
|
||||
|
||||
uncomp_writer.WriteBit(true); // Frame size with refs.
|
||||
uncomp_writer.WriteBit(false); // Render and frame size different.
|
||||
uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv);
|
||||
|
@ -714,10 +679,9 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
|
|||
frame_ctx_idx = 1;
|
||||
}
|
||||
|
||||
uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
|
||||
prev_frame_probs =
|
||||
frame_ctxs[frame_ctx_idx].probs; // reference probabilities for compressed header
|
||||
frame_ctxs[frame_ctx_idx] = {current_frame_number, false, current_frame_info.entropy};
|
||||
uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
|
||||
prev_frame_probs = frame_ctxs[frame_ctx_idx]; // reference probabilities for compressed header
|
||||
frame_ctxs[frame_ctx_idx] = current_frame_info.entropy;
|
||||
|
||||
uncomp_writer.WriteU(current_frame_info.first_level, 6);
|
||||
uncomp_writer.WriteU(current_frame_info.sharpness_level, 3);
|
||||
|
@ -812,7 +776,6 @@ const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters
|
|||
current_frame_info = curr_frame.info;
|
||||
bitstream = std::move(curr_frame.bit_stream);
|
||||
}
|
||||
|
||||
// The uncompressed header routine sets PrevProb parameters needed for the compressed header
|
||||
auto uncomp_writer = ComposeUncompressedHeader();
|
||||
std::vector<u8> compressed_header = ComposeCompressedHeader();
|
||||
|
@ -828,13 +791,6 @@ const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters
|
|||
frame.begin() + uncompressed_header.size());
|
||||
std::copy(bitstream.begin(), bitstream.end(),
|
||||
frame.begin() + uncompressed_header.size() + compressed_header.size());
|
||||
|
||||
// keep track of frame number
|
||||
current_frame_number++;
|
||||
grace_period--;
|
||||
|
||||
// don't display hidden frames
|
||||
hidden = !current_frame_info.show_frame;
|
||||
return frame;
|
||||
}
|
||||
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
enum class FrameType { KeyFrame = 0, InterFrame = 1 };
|
||||
namespace Decoder {
|
||||
|
||||
/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
|
||||
|
@ -124,7 +123,7 @@ public:
|
|||
|
||||
/// Returns true if the most recent frame was a hidden frame.
|
||||
[[nodiscard]] bool WasFrameHidden() const {
|
||||
return hidden;
|
||||
return !current_frame_info.show_frame;
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -178,19 +177,12 @@ private:
|
|||
std::array<s8, 4> loop_filter_ref_deltas{};
|
||||
std::array<s8, 2> loop_filter_mode_deltas{};
|
||||
|
||||
bool hidden = false;
|
||||
s64 current_frame_number = -2; // since we buffer 2 frames
|
||||
s32 grace_period = 6; // frame offsets need to stabilize
|
||||
std::array<FrameContexts, 4> frame_ctxs{};
|
||||
Vp9FrameContainer next_frame{};
|
||||
Vp9FrameContainer next_next_frame{};
|
||||
bool swap_next_golden{};
|
||||
std::array<Vp9EntropyProbs, 4> frame_ctxs{};
|
||||
bool swap_ref_indices{};
|
||||
|
||||
Vp9PictureInfo current_frame_info{};
|
||||
Vp9EntropyProbs prev_frame_probs{};
|
||||
|
||||
s32 diff_update_probability = 252;
|
||||
s32 frame_sync_code = 0x498342;
|
||||
};
|
||||
|
||||
} // namespace Decoder
|
||||
|
|
|
@ -296,12 +296,6 @@ struct RefPoolElement {
|
|||
bool refresh{};
|
||||
};
|
||||
|
||||
struct FrameContexts {
|
||||
s64 from;
|
||||
bool adapted;
|
||||
Vp9EntropyProbs probs;
|
||||
};
|
||||
|
||||
#define ASSERT_POSITION(field_name, position) \
|
||||
static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
|
Loading…
Reference in New Issue