forked from ShuriZma/suyu
1
0
Fork 0

Merge pull request #7326 from ameerj/vp8

codecs: Implement VP8 video decoding support
This commit is contained in:
Fernando S 2021-11-14 23:03:56 +01:00 committed by GitHub
commit 720970c4c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 183 additions and 26 deletions

View File

@ -600,6 +600,7 @@ if (YUZU_USE_BUNDLED_FFMPEG)
${LIBVA_LIBRARIES})
set(FFmpeg_HWACCEL_FLAGS
--enable-hwaccel=h264_vaapi
--enable-hwaccel=vp8_vaapi
--enable-hwaccel=vp9_vaapi
--enable-libdrm)
list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS
@ -620,6 +621,7 @@ if (YUZU_USE_BUNDLED_FFMPEG)
--enable-ffnvcodec
--enable-nvdec
--enable-hwaccel=h264_nvdec
--enable-hwaccel=vp8_nvdec
--enable-hwaccel=vp9_nvdec
--extra-cflags=-I${CUDA_INCLUDE_DIRS}
)
@ -670,6 +672,7 @@ if (YUZU_USE_BUNDLED_FFMPEG)
--disable-postproc
--disable-swresample
--enable-decoder=h264
--enable-decoder=vp8
--enable-decoder=vp9
--cc="${CMAKE_C_COMPILER}"
--cxx="${CMAKE_CXX_COMPILER}"

View File

@ -15,6 +15,8 @@ add_library(video_core STATIC
command_classes/codecs/codec.h
command_classes/codecs/h264.cpp
command_classes/codecs/h264.h
command_classes/codecs/vp8.cpp
command_classes/codecs/vp8.h
command_classes/codecs/vp9.cpp
command_classes/codecs/vp9.h
command_classes/codecs/vp9_types.h

View File

@ -8,6 +8,7 @@
#include "common/settings.h"
#include "video_core/command_classes/codecs/codec.h"
#include "video_core/command_classes/codecs/h264.h"
#include "video_core/command_classes/codecs/vp8.h"
#include "video_core/command_classes/codecs/vp9.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
@ -46,6 +47,7 @@ void AVFrameDeleter(AVFrame* ptr) {
Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
: gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
vp8_decoder(std::make_unique<Decoder::VP8>(gpu)),
vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
Codec::~Codec() {
@ -135,7 +137,9 @@ void Codec::Initialize() {
switch (current_codec) {
case NvdecCommon::VideoCodec::H264:
return AV_CODEC_ID_H264;
case NvdecCommon::VideoCodec::Vp9:
case NvdecCommon::VideoCodec::VP8:
return AV_CODEC_ID_VP8;
case NvdecCommon::VideoCodec::VP9:
return AV_CODEC_ID_VP9;
default:
UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
@ -176,19 +180,27 @@ void Codec::Decode() {
return;
}
bool vp9_hidden_frame = false;
std::vector<u8> frame_data;
if (current_codec == NvdecCommon::VideoCodec::H264) {
frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame);
} else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
frame_data = vp9_decoder->ComposeFrameHeader(state);
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
}
const auto& frame_data = [&]() {
switch (current_codec) {
case Tegra::NvdecCommon::VideoCodec::H264:
return h264_decoder->ComposeFrame(state, is_first_frame);
case Tegra::NvdecCommon::VideoCodec::VP8:
return vp8_decoder->ComposeFrame(state);
case Tegra::NvdecCommon::VideoCodec::VP9:
vp9_decoder->ComposeFrame(state);
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
return vp9_decoder->GetFrameBytes();
default:
UNREACHABLE();
return std::vector<u8>{};
}
}();
AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
if (!packet) {
LOG_ERROR(Service_NVDRV, "av_packet_alloc failed");
return;
}
packet->data = frame_data.data();
packet->data = const_cast<u8*>(frame_data.data());
packet->size = static_cast<s32>(frame_data.size());
if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) {
LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res);
@ -252,11 +264,11 @@ std::string_view Codec::GetCurrentCodecName() const {
return "None";
case NvdecCommon::VideoCodec::H264:
return "H264";
case NvdecCommon::VideoCodec::Vp8:
case NvdecCommon::VideoCodec::VP8:
return "VP8";
case NvdecCommon::VideoCodec::H265:
return "H265";
case NvdecCommon::VideoCodec::Vp9:
case NvdecCommon::VideoCodec::VP9:
return "VP9";
default:
return "Unknown";

View File

@ -29,6 +29,7 @@ using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>;
namespace Decoder {
class H264;
class VP8;
class VP9;
} // namespace Decoder
@ -72,6 +73,7 @@ private:
GPU& gpu;
const NvdecCommon::NvdecRegisters& state;
std::unique_ptr<Decoder::H264> h264_decoder;
std::unique_ptr<Decoder::VP8> vp8_decoder;
std::unique_ptr<Decoder::VP9> vp9_decoder;
std::queue<AVFramePtr> av_frames{};

View File

@ -45,8 +45,8 @@ H264::H264(GPU& gpu_) : gpu(gpu_) {}
H264::~H264() = default;
const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state,
bool is_first_frame) {
const std::vector<u8>& H264::ComposeFrame(const NvdecCommon::NvdecRegisters& state,
bool is_first_frame) {
H264DecoderContext context;
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));

View File

@ -75,9 +75,9 @@ public:
explicit H264(GPU& gpu);
~H264();
/// Compose the H264 header of the frame for FFmpeg decoding
[[nodiscard]] const std::vector<u8>& ComposeFrameHeader(
const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
/// Compose the H264 frame for FFmpeg decoding
[[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state,
bool is_first_frame = false);
private:
std::vector<u8> frame;

View File

@ -0,0 +1,55 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <vector>
#include "video_core/command_classes/codecs/vp8.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace Tegra::Decoder {
VP8::VP8(GPU& gpu_) : gpu(gpu_) {}
VP8::~VP8() = default;
const std::vector<u8>& VP8::ComposeFrame(const NvdecCommon::NvdecRegisters& state) {
VP8PictureInfo info;
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
const bool is_key_frame = info.key_frame == 1u;
const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size);
const size_t header_size = is_key_frame ? 10u : 3u;
frame.resize(header_size + bitstream_size);
// Based on page 30 of the VP8 specification.
// https://datatracker.ietf.org/doc/rfc6386/
frame[0] = is_key_frame ? 0u : 1u; // 1-bit frame type (0: keyframe, 1: interframes).
frame[0] |= static_cast<u8>((info.version & 7u) << 1u); // 3-bit version number
frame[0] |= static_cast<u8>(1u << 4u); // 1-bit show_frame flag
// The next 19-bits are the first partition size
frame[0] |= static_cast<u8>((info.first_part_size & 7u) << 5u);
frame[1] = static_cast<u8>((info.first_part_size & 0x7f8u) >> 3u);
frame[2] = static_cast<u8>((info.first_part_size & 0x7f800u) >> 11u);
if (is_key_frame) {
frame[3] = 0x9du;
frame[4] = 0x01u;
frame[5] = 0x2au;
// TODO(ameerj): Horizontal/Vertical Scale
// 16 bits: (2 bits Horizontal Scale << 14) | Width (14 bits)
frame[6] = static_cast<u8>(info.frame_width & 0xff);
frame[7] = static_cast<u8>(((info.frame_width >> 8) & 0x3f));
// 16 bits:(2 bits Vertical Scale << 14) | Height (14 bits)
frame[8] = static_cast<u8>(info.frame_height & 0xff);
frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f));
}
const u64 bitstream_offset = state.frame_bitstream_offset;
gpu.MemoryManager().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size);
return frame;
}
} // namespace Tegra::Decoder

View File

@ -0,0 +1,74 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <vector>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/command_classes/nvdec_common.h"
namespace Tegra {
class GPU;
namespace Decoder {
class VP8 {
public:
explicit VP8(GPU& gpu);
~VP8();
/// Compose the VP8 frame for FFmpeg decoding
[[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state);
private:
std::vector<u8> frame;
GPU& gpu;
struct VP8PictureInfo {
INSERT_PADDING_WORDS_NOINIT(14);
u16 frame_width; // actual frame width
u16 frame_height; // actual frame height
u8 key_frame;
u8 version;
union {
u8 raw;
BitField<0, 2, u8> tile_format;
BitField<2, 3, u8> gob_height;
BitField<5, 3, u8> reserverd_surface_format;
};
u8 error_conceal_on; // 1: error conceal on; 0: off
u32 first_part_size; // the size of first partition(frame header and mb header partition)
u32 hist_buffer_size; // in units of 256
u32 vld_buffer_size; // in units of 1
// Current frame buffers
std::array<u32, 2> frame_stride; // [y_c]
u32 luma_top_offset; // offset of luma top field in units of 256
u32 luma_bot_offset; // offset of luma bottom field in units of 256
u32 luma_frame_offset; // offset of luma frame in units of 256
u32 chroma_top_offset; // offset of chroma top field in units of 256
u32 chroma_bot_offset; // offset of chroma bottom field in units of 256
u32 chroma_frame_offset; // offset of chroma frame in units of 256
INSERT_PADDING_BYTES_NOINIT(0x1c); // NvdecDisplayParams
// Decode picture buffer related
s8 current_output_memory_layout;
// output NV12/NV24 setting. index 0: golden; 1: altref; 2: last
std::array<s8, 3> output_memory_layout;
u8 segmentation_feature_data_update;
INSERT_PADDING_BYTES_NOINIT(3);
// ucode return result
u32 result_value;
std::array<u32, 8> partition_offset;
INSERT_PADDING_WORDS_NOINIT(3);
};
static_assert(sizeof(VP8PictureInfo) == 0xc0, "PictureInfo is an invalid size");
};
} // namespace Decoder
} // namespace Tegra

View File

@ -770,7 +770,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
return uncomp_writer;
}
const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state) {
void VP9::ComposeFrame(const NvdecCommon::NvdecRegisters& state) {
std::vector<u8> bitstream;
{
Vp9FrameContainer curr_frame = GetCurrentFrame(state);
@ -792,7 +792,6 @@ const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters
frame.begin() + uncompressed_header.size());
std::copy(bitstream.begin(), bitstream.end(),
frame.begin() + uncompressed_header.size() + compressed_header.size());
return frame;
}
VpxRangeEncoder::VpxRangeEncoder() {

View File

@ -116,16 +116,20 @@ public:
VP9(VP9&&) = default;
VP9& operator=(VP9&&) = delete;
/// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec
/// documentation
[[nodiscard]] const std::vector<u8>& ComposeFrameHeader(
const NvdecCommon::NvdecRegisters& state);
/// Composes the VP9 frame from the GPU state information.
/// Based on the official VP9 spec documentation
void ComposeFrame(const NvdecCommon::NvdecRegisters& state);
/// Returns true if the most recent frame was a hidden frame.
[[nodiscard]] bool WasFrameHidden() const {
return !current_frame_info.show_frame;
}
/// Returns a const reference to the composed frame data.
[[nodiscard]] const std::vector<u8>& GetFrameBytes() const {
return frame;
}
private:
/// Generates compressed header probability updates in the bitstream writer
template <typename T, std::size_t N>

View File

@ -35,7 +35,8 @@ AVFramePtr Nvdec::GetFrame() {
void Nvdec::Execute() {
switch (codec->GetCurrentCodec()) {
case NvdecCommon::VideoCodec::H264:
case NvdecCommon::VideoCodec::Vp9:
case NvdecCommon::VideoCodec::VP8:
case NvdecCommon::VideoCodec::VP9:
codec->Decode();
break;
default:

View File

@ -13,9 +13,9 @@ namespace Tegra::NvdecCommon {
enum class VideoCodec : u64 {
None = 0x0,
H264 = 0x3,
Vp8 = 0x5,
VP8 = 0x5,
H265 = 0x7,
Vp9 = 0x9,
VP9 = 0x9,
};
// NVDEC should use a 32-bit address space, but is mapped to 64-bit,
@ -50,7 +50,10 @@ struct NvdecRegisters {
u64 h264_last_surface_chroma_offset; ///< 0x0858
std::array<u64, 17> surface_luma_offset; ///< 0x0860
std::array<u64, 17> surface_chroma_offset; ///< 0x08E8
INSERT_PADDING_WORDS_NOINIT(132); ///< 0x0970
INSERT_PADDING_WORDS_NOINIT(68); ///< 0x0970
u64 vp8_prob_data_offset; ///< 0x0A80
u64 vp8_header_partition_buf_offset; ///< 0x0A88
INSERT_PADDING_WORDS_NOINIT(60); ///< 0x0A90
u64 vp9_entropy_probs_offset; ///< 0x0B80
u64 vp9_backward_updates_offset; ///< 0x0B88
u64 vp9_last_frame_segmap_offset; ///< 0x0B90
@ -81,6 +84,8 @@ ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A);
ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B);
ASSERT_REG_POSITION(surface_luma_offset, 0x10C);
ASSERT_REG_POSITION(surface_chroma_offset, 0x11D);
ASSERT_REG_POSITION(vp8_prob_data_offset, 0x150);
ASSERT_REG_POSITION(vp8_header_partition_buf_offset, 0x151);
ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170);
ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171);
ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172);