diff --git a/src/core/mdec.cpp b/src/core/mdec.cpp index 321ddeb19..d553c1677 100644 --- a/src/core/mdec.cpp +++ b/src/core/mdec.cpp @@ -126,63 +126,70 @@ static void YUVToRGB_New(u32 xx, u32 yy, const std::array& Crblk, const static void YUVToMono(const std::array& Yblk); -static StatusRegister s_status = {}; -static bool s_enable_dma_in = false; -static bool s_enable_dma_out = false; +namespace { +struct MDECState +{ + StatusRegister status = {}; + bool enable_dma_in = false; + bool enable_dma_out = false; -// Even though the DMA is in words, we access the FIFO as halfwords. -static InlineFIFOQueue s_data_in_fifo; -static InlineFIFOQueue s_data_out_fifo; -static State s_state = State::Idle; -static u32 s_remaining_halfwords = 0; + // Even though the DMA is in words, we access the FIFO as halfwords. + InlineFIFOQueue data_in_fifo; + InlineFIFOQueue data_out_fifo; + State state = State::Idle; + u32 remaining_halfwords = 0; -static std::array s_iq_uv{}; -static std::array s_iq_y{}; + std::array iq_uv{}; + std::array iq_y{}; -static std::array s_scale_table{}; + std::array scale_table{}; -// blocks, for colour: 0 - Crblk, 1 - Cbblk, 2-5 - Y 1-4 -alignas(VECTOR_ALIGNMENT) static std::array, NUM_BLOCKS> s_blocks; -static u32 s_current_block = 0; // block (0-5) -static u32 s_current_coefficient = 64; // k (in block) -static u16 s_current_q_scale = 0; + // blocks, for colour: 0 - Crblk, 1 - Cbblk, 2-5 - Y 1-4 + alignas(VECTOR_ALIGNMENT) std::array, NUM_BLOCKS> blocks; + u32 current_block = 0; // block (0-5) + u32 current_coefficient = 64; // k (in block) + u16 current_q_scale = 0; -alignas(16) static std::array s_block_rgb{}; -static std::unique_ptr s_block_copy_out_event; + alignas(16) std::array block_rgb{}; + std::unique_ptr block_copy_out_event; -static u32 s_total_blocks_decoded = 0; + u32 total_blocks_decoded = 0; +}; +} // namespace + +ALIGN_TO_CACHE_LINE static MDECState s_state; } // namespace MDEC void MDEC::Initialize() { - s_block_copy_out_event = + s_state.block_copy_out_event = TimingEvents::CreateTimingEvent("MDEC Block Copy Out", 1, 1, &MDEC::CopyOutBlock, nullptr, false); - s_total_blocks_decoded = 0; + s_state.total_blocks_decoded = 0; Reset(); } void MDEC::Shutdown() { - s_block_copy_out_event.reset(); + s_state.block_copy_out_event.reset(); } void MDEC::Reset() { - s_block_copy_out_event->Deactivate(); + s_state.block_copy_out_event->Deactivate(); SoftReset(); } bool MDEC::DoState(StateWrapper& sw) { - sw.Do(&s_status.bits); - sw.Do(&s_enable_dma_in); - sw.Do(&s_enable_dma_out); - sw.Do(&s_data_in_fifo); - sw.Do(&s_data_out_fifo); - sw.Do(&s_state); - sw.Do(&s_remaining_halfwords); - sw.Do(&s_iq_uv); - sw.Do(&s_iq_y); + sw.Do(&s_state.status.bits); + sw.Do(&s_state.enable_dma_in); + sw.Do(&s_state.enable_dma_out); + sw.Do(&s_state.data_in_fifo); + sw.Do(&s_state.data_out_fifo); + sw.Do(&s_state.state); + sw.Do(&s_state.remaining_halfwords); + sw.Do(&s_state.iq_uv); + sw.Do(&s_state.iq_y); if (sw.GetVersion() < 66) [[unlikely]] { @@ -192,19 +199,19 @@ bool MDEC::DoState(StateWrapper& sw) } else { - sw.Do(&s_scale_table); + sw.Do(&s_state.scale_table); } - sw.Do(&s_blocks); - sw.Do(&s_current_block); - sw.Do(&s_current_coefficient); - sw.Do(&s_current_q_scale); - sw.Do(&s_block_rgb); + sw.Do(&s_state.blocks); + sw.Do(&s_state.current_block); + sw.Do(&s_state.current_coefficient); + sw.Do(&s_state.current_q_scale); + sw.Do(&s_state.block_rgb); bool block_copy_out_pending = HasPendingBlockCopyOut(); sw.Do(&block_copy_out_pending); if (sw.IsReading()) - s_block_copy_out_event->SetState(block_copy_out_pending); + s_state.block_copy_out_event->SetState(block_copy_out_pending); return !sw.HasError(); } @@ -218,8 +225,8 @@ u32 MDEC::ReadRegister(u32 offset) case 4: { - TRACE_LOG("MDEC status register -> 0x{:08X}", s_status.bits); - return s_status.bits; + TRACE_LOG("MDEC status register -> 0x{:08X}", s_state.status.bits); + return s_state.status.bits; } [[unlikely]] default: @@ -248,8 +255,8 @@ void MDEC::WriteRegister(u32 offset, u32 value) if (cr.reset) SoftReset(); - s_enable_dma_in = cr.enable_dma_in; - s_enable_dma_out = cr.enable_dma_out; + s_state.enable_dma_in = cr.enable_dma_in; + s_state.enable_dma_out = cr.enable_dma_out; Execute(); return; } @@ -264,93 +271,94 @@ void MDEC::WriteRegister(u32 offset, u32 value) void MDEC::DMARead(u32* words, u32 word_count) { - if (s_data_out_fifo.GetSize() < word_count) [[unlikely]] + if (s_state.data_out_fifo.GetSize() < word_count) [[unlikely]] { - WARNING_LOG("Insufficient data in output FIFO (requested {}, have {})", word_count, s_data_out_fifo.GetSize()); + WARNING_LOG("Insufficient data in output FIFO (requested {}, have {})", word_count, + s_state.data_out_fifo.GetSize()); } - const u32 words_to_read = std::min(word_count, s_data_out_fifo.GetSize()); + const u32 words_to_read = std::min(word_count, s_state.data_out_fifo.GetSize()); if (words_to_read > 0) { - s_data_out_fifo.PopRange(words, words_to_read); + s_state.data_out_fifo.PopRange(words, words_to_read); words += words_to_read; word_count -= words_to_read; } - DEBUG_LOG("DMA read complete, {} bytes left", s_data_out_fifo.GetSize() * sizeof(u32)); - if (s_data_out_fifo.IsEmpty()) + DEBUG_LOG("DMA read complete, {} bytes left", s_state.data_out_fifo.GetSize() * sizeof(u32)); + if (s_state.data_out_fifo.IsEmpty()) Execute(); } void MDEC::DMAWrite(const u32* words, u32 word_count) { - if (s_data_in_fifo.GetSpace() < (word_count * 2)) [[unlikely]] + if (s_state.data_in_fifo.GetSpace() < (word_count * 2)) [[unlikely]] { - WARNING_LOG("Input FIFO overflow (writing {}, space {})", word_count * 2, s_data_in_fifo.GetSpace()); + WARNING_LOG("Input FIFO overflow (writing {}, space {})", word_count * 2, s_state.data_in_fifo.GetSpace()); } - const u32 halfwords_to_write = std::min(word_count * 2, s_data_in_fifo.GetSpace() & ~u32(2)); - s_data_in_fifo.PushRange(reinterpret_cast(words), halfwords_to_write); + const u32 halfwords_to_write = std::min(word_count * 2, s_state.data_in_fifo.GetSpace() & ~u32(2)); + s_state.data_in_fifo.PushRange(reinterpret_cast(words), halfwords_to_write); Execute(); } bool MDEC::HasPendingBlockCopyOut() { - return s_block_copy_out_event->IsActive(); + return s_state.block_copy_out_event->IsActive(); } void MDEC::SoftReset() { - s_status.bits = 0; - s_enable_dma_in = false; - s_enable_dma_out = false; - s_data_in_fifo.Clear(); - s_data_out_fifo.Clear(); - s_state = State::Idle; - s_remaining_halfwords = 0; - s_current_block = 0; - s_current_coefficient = 64; - s_current_q_scale = 0; - s_block_copy_out_event->Deactivate(); + s_state.status.bits = 0; + s_state.enable_dma_in = false; + s_state.enable_dma_out = false; + s_state.data_in_fifo.Clear(); + s_state.data_out_fifo.Clear(); + s_state.state = State::Idle; + s_state.remaining_halfwords = 0; + s_state.current_block = 0; + s_state.current_coefficient = 64; + s_state.current_q_scale = 0; + s_state.block_copy_out_event->Deactivate(); UpdateStatus(); } void MDEC::ResetDecoder() { - s_current_block = 0; - s_current_coefficient = 64; - s_current_q_scale = 0; + s_state.current_block = 0; + s_state.current_coefficient = 64; + s_state.current_q_scale = 0; } void MDEC::UpdateStatus() { - s_status.data_out_fifo_empty = s_data_out_fifo.IsEmpty(); - s_status.data_in_fifo_full = s_data_in_fifo.IsFull(); + s_state.status.data_out_fifo_empty = s_state.data_out_fifo.IsEmpty(); + s_state.status.data_in_fifo_full = s_state.data_in_fifo.IsFull(); - s_status.command_busy = (s_state != State::Idle); - s_status.parameter_words_remaining = Truncate16((s_remaining_halfwords / 2) - 1); - s_status.current_block = (s_current_block + 4) % NUM_BLOCKS; + s_state.status.command_busy = (s_state.state != State::Idle); + s_state.status.parameter_words_remaining = Truncate16((s_state.remaining_halfwords / 2) - 1); + s_state.status.current_block = (s_state.current_block + 4) % NUM_BLOCKS; // we always want data in if it's enabled - const bool data_in_request = s_enable_dma_in && s_data_in_fifo.GetSpace() >= (32 * 2); - s_status.data_in_request = data_in_request; + const bool data_in_request = s_state.enable_dma_in && s_state.data_in_fifo.GetSpace() >= (32 * 2); + s_state.status.data_in_request = data_in_request; DMA::SetRequest(DMA::Channel::MDECin, data_in_request); // we only want to send data out if we have some in the fifo - const bool data_out_request = s_enable_dma_out && !s_data_out_fifo.IsEmpty(); - s_status.data_out_request = data_out_request; + const bool data_out_request = s_state.enable_dma_out && !s_state.data_out_fifo.IsEmpty(); + s_state.status.data_out_request = data_out_request; DMA::SetRequest(DMA::Channel::MDECout, data_out_request); } u32 MDEC::ReadDataRegister() { - if (s_data_out_fifo.IsEmpty()) + if (s_state.data_out_fifo.IsEmpty()) { // Stall the CPU until we're done processing. if (HasPendingBlockCopyOut()) { DEV_LOG("MDEC data out FIFO empty on read - stalling CPU"); - CPU::AddPendingTicks(s_block_copy_out_event->GetTicksUntilNextExecution()); + CPU::AddPendingTicks(s_state.block_copy_out_event->GetTicksUntilNextExecution()); } else { @@ -359,8 +367,8 @@ u32 MDEC::ReadDataRegister() } } - const u32 value = s_data_out_fifo.Pop(); - if (s_data_out_fifo.IsEmpty()) + const u32 value = s_state.data_out_fifo.Pop(); + if (s_state.data_out_fifo.IsEmpty()) Execute(); else UpdateStatus(); @@ -372,8 +380,8 @@ void MDEC::WriteCommandRegister(u32 value) { TRACE_LOG("MDEC command/data register <- 0x{:08X}", value); - s_data_in_fifo.Push(Truncate16(value)); - s_data_in_fifo.Push(Truncate16(value >> 16)); + s_state.data_in_fifo.Push(Truncate16(value)); + s_state.data_in_fifo.Push(Truncate16(value >> 16)); Execute(); } @@ -382,20 +390,21 @@ void MDEC::Execute() { for (;;) { - switch (s_state) + switch (s_state.state) { case State::Idle: { - if (s_data_in_fifo.GetSize() < 2) + if (s_state.data_in_fifo.GetSize() < 2) goto finished; // first word - const CommandWord cw{ZeroExtend32(s_data_in_fifo.Peek(0)) | (ZeroExtend32(s_data_in_fifo.Peek(1)) << 16)}; - s_status.data_output_depth = cw.data_output_depth; - s_status.data_output_signed = cw.data_output_signed; - s_status.data_output_bit15 = cw.data_output_bit15; - s_data_in_fifo.Remove(2); - s_data_out_fifo.Clear(); + const CommandWord cw{ZeroExtend32(s_state.data_in_fifo.Peek(0)) | + (ZeroExtend32(s_state.data_in_fifo.Peek(1)) << 16)}; + s_state.status.data_output_depth = cw.data_output_depth; + s_state.status.data_output_signed = cw.data_output_signed; + s_state.status.data_output_bit15 = cw.data_output_bit15; + s_state.data_in_fifo.Remove(2); + s_state.data_out_fifo.Clear(); u32 num_words; State new_state; @@ -426,8 +435,8 @@ void MDEC::Execute() DEBUG_LOG("MDEC command: 0x{:08X} ({}, {} words in parameter, {} expected)", cw.bits, static_cast(cw.command.GetValue()), cw.parameter_word_count.GetValue(), num_words); - s_remaining_halfwords = num_words * 2; - s_state = new_state; + s_state.remaining_halfwords = num_words * 2; + s_state.state = new_state; UpdateStatus(); continue; } @@ -437,15 +446,15 @@ void MDEC::Execute() if (HandleDecodeMacroblockCommand()) { // we should be writing out now - DebugAssert(s_state == State::WritingMacroblock); + DebugAssert(s_state.state == State::WritingMacroblock); goto finished; } - if (s_remaining_halfwords == 0 && s_current_block != NUM_BLOCKS) + if (s_state.remaining_halfwords == 0 && s_state.current_block != NUM_BLOCKS) { // expecting data, but nothing more will be coming. bail out ResetDecoder(); - s_state = State::Idle; + s_state.state = State::Idle; continue; } @@ -460,22 +469,22 @@ void MDEC::Execute() case State::SetIqTable: { - if (s_data_in_fifo.GetSize() < s_remaining_halfwords) + if (s_state.data_in_fifo.GetSize() < s_state.remaining_halfwords) goto finished; HandleSetQuantTableCommand(); - s_state = State::Idle; + s_state.state = State::Idle; UpdateStatus(); continue; } case State::SetScaleTable: { - if (s_data_in_fifo.GetSize() < s_remaining_halfwords) + if (s_state.data_in_fifo.GetSize() < s_state.remaining_halfwords) goto finished; HandleSetScaleCommand(); - s_state = State::Idle; + s_state.state = State::Idle; UpdateStatus(); continue; } @@ -483,13 +492,13 @@ void MDEC::Execute() case State::NoCommand: { // can potentially have a large amount of halfwords, so eat them as we go - const u32 words_to_consume = std::min(s_remaining_halfwords, s_data_in_fifo.GetSize()); - s_data_in_fifo.Remove(words_to_consume); - s_remaining_halfwords -= words_to_consume; - if (s_remaining_halfwords == 0) + const u32 words_to_consume = std::min(s_state.remaining_halfwords, s_state.data_in_fifo.GetSize()); + s_state.data_in_fifo.Remove(words_to_consume); + s_state.remaining_halfwords -= words_to_consume; + if (s_state.remaining_halfwords == 0) goto finished; - s_state = State::Idle; + s_state.state = State::Idle; UpdateStatus(); continue; } @@ -507,7 +516,7 @@ finished: bool MDEC::HandleDecodeMacroblockCommand() { - if (s_status.data_output_depth <= DataOutputDepth_8Bit) + if (s_state.status.data_output_depth <= DataOutputDepth_8Bit) return DecodeMonoMacroblock(); else return DecodeColoredMacroblock(); @@ -516,33 +525,33 @@ bool MDEC::HandleDecodeMacroblockCommand() bool MDEC::DecodeMonoMacroblock() { // TODO: This should guard the output not the input - if (!s_data_out_fifo.IsEmpty()) + if (!s_state.data_out_fifo.IsEmpty()) return false; if (g_settings.use_old_mdec_routines) [[unlikely]] { - if (!DecodeRLE_Old(s_blocks[0].data(), s_iq_y.data())) + if (!DecodeRLE_Old(s_state.blocks[0].data(), s_state.iq_y.data())) return false; - IDCT_Old(s_blocks[0].data()); + IDCT_Old(s_state.blocks[0].data()); } else { - if (!DecodeRLE_New(s_blocks[0].data(), s_iq_y.data())) + if (!DecodeRLE_New(s_state.blocks[0].data(), s_state.iq_y.data())) return false; - IDCT_New(s_blocks[0].data()); + IDCT_New(s_state.blocks[0].data()); } - DEBUG_LOG("Decoded mono macroblock, {} words remaining", s_remaining_halfwords / 2); + DEBUG_LOG("Decoded mono macroblock, {} words remaining", s_state.remaining_halfwords / 2); ResetDecoder(); - s_state = State::WritingMacroblock; + s_state.state = State::WritingMacroblock; - YUVToMono(s_blocks[0]); + YUVToMono(s_state.blocks[0]); ScheduleBlockCopyOut(TICKS_PER_BLOCK * 6); - s_total_blocks_decoded++; + s_state.total_blocks_decoded++; return true; } @@ -550,52 +559,54 @@ bool MDEC::DecodeColoredMacroblock() { if (g_settings.use_old_mdec_routines) [[unlikely]] { - for (; s_current_block < NUM_BLOCKS; s_current_block++) + for (; s_state.current_block < NUM_BLOCKS; s_state.current_block++) { - if (!DecodeRLE_Old(s_blocks[s_current_block].data(), (s_current_block >= 2) ? s_iq_y.data() : s_iq_uv.data())) + if (!DecodeRLE_Old(s_state.blocks[s_state.current_block].data(), + (s_state.current_block >= 2) ? s_state.iq_y.data() : s_state.iq_uv.data())) return false; - IDCT_Old(s_blocks[s_current_block].data()); + IDCT_Old(s_state.blocks[s_state.current_block].data()); } - if (!s_data_out_fifo.IsEmpty()) + if (!s_state.data_out_fifo.IsEmpty()) return false; // done decoding - DEBUG_LOG("Decoded colored macroblock, {} words remaining", s_remaining_halfwords / 2); + DEBUG_LOG("Decoded colored macroblock, {} words remaining", s_state.remaining_halfwords / 2); ResetDecoder(); - s_state = State::WritingMacroblock; + s_state.state = State::WritingMacroblock; - YUVToRGB_Old(0, 0, s_blocks[0], s_blocks[1], s_blocks[2]); - YUVToRGB_Old(8, 0, s_blocks[0], s_blocks[1], s_blocks[3]); - YUVToRGB_Old(0, 8, s_blocks[0], s_blocks[1], s_blocks[4]); - YUVToRGB_Old(8, 8, s_blocks[0], s_blocks[1], s_blocks[5]); + YUVToRGB_Old(0, 0, s_state.blocks[0], s_state.blocks[1], s_state.blocks[2]); + YUVToRGB_Old(8, 0, s_state.blocks[0], s_state.blocks[1], s_state.blocks[3]); + YUVToRGB_Old(0, 8, s_state.blocks[0], s_state.blocks[1], s_state.blocks[4]); + YUVToRGB_Old(8, 8, s_state.blocks[0], s_state.blocks[1], s_state.blocks[5]); } else { - for (; s_current_block < NUM_BLOCKS; s_current_block++) + for (; s_state.current_block < NUM_BLOCKS; s_state.current_block++) { - if (!DecodeRLE_New(s_blocks[s_current_block].data(), (s_current_block >= 2) ? s_iq_y.data() : s_iq_uv.data())) + if (!DecodeRLE_New(s_state.blocks[s_state.current_block].data(), + (s_state.current_block >= 2) ? s_state.iq_y.data() : s_state.iq_uv.data())) return false; - IDCT_New(s_blocks[s_current_block].data()); + IDCT_New(s_state.blocks[s_state.current_block].data()); } - if (!s_data_out_fifo.IsEmpty()) + if (!s_state.data_out_fifo.IsEmpty()) return false; // done decoding - DEBUG_LOG("Decoded colored macroblock, {} words remaining", s_remaining_halfwords / 2); + DEBUG_LOG("Decoded colored macroblock, {} words remaining", s_state.remaining_halfwords / 2); ResetDecoder(); - s_state = State::WritingMacroblock; + s_state.state = State::WritingMacroblock; - YUVToRGB_New(0, 0, s_blocks[0], s_blocks[1], s_blocks[2]); - YUVToRGB_New(8, 0, s_blocks[0], s_blocks[1], s_blocks[3]); - YUVToRGB_New(0, 8, s_blocks[0], s_blocks[1], s_blocks[4]); - YUVToRGB_New(8, 8, s_blocks[0], s_blocks[1], s_blocks[5]); + YUVToRGB_New(0, 0, s_state.blocks[0], s_state.blocks[1], s_state.blocks[2]); + YUVToRGB_New(8, 0, s_state.blocks[0], s_state.blocks[1], s_state.blocks[3]); + YUVToRGB_New(0, 8, s_state.blocks[0], s_state.blocks[1], s_state.blocks[4]); + YUVToRGB_New(8, 8, s_state.blocks[0], s_state.blocks[1], s_state.blocks[5]); } - s_total_blocks_decoded += 4; + s_state.total_blocks_decoded += 4; ScheduleBlockCopyOut(TICKS_PER_BLOCK * 6); return true; @@ -606,19 +617,19 @@ void MDEC::ScheduleBlockCopyOut(TickCount ticks) DebugAssert(!HasPendingBlockCopyOut()); DEBUG_LOG("Scheduling block copy out in {} ticks", ticks); - s_block_copy_out_event->SetIntervalAndSchedule(ticks); + s_state.block_copy_out_event->SetIntervalAndSchedule(ticks); } void MDEC::CopyOutBlock(void* param, TickCount ticks, TickCount ticks_late) { - Assert(s_state == State::WritingMacroblock); - s_block_copy_out_event->Deactivate(); + Assert(s_state.state == State::WritingMacroblock); + s_state.block_copy_out_event->Deactivate(); - switch (s_status.data_output_depth) + switch (s_state.status.data_output_depth) { case DataOutputDepth_4Bit: { - const u32* in_ptr = s_block_rgb.data(); + const u32* in_ptr = s_state.block_rgb.data(); for (u32 i = 0; i < (64 / 8); i++) { u32 value = *(in_ptr++) >> 4; @@ -629,21 +640,21 @@ void MDEC::CopyOutBlock(void* param, TickCount ticks, TickCount ticks_late) value |= (*(in_ptr++) >> 4) << 20; value |= (*(in_ptr++) >> 4) << 24; value |= (*(in_ptr++) >> 4) << 28; - s_data_out_fifo.Push(value); + s_state.data_out_fifo.Push(value); } } break; case DataOutputDepth_8Bit: { - const u32* in_ptr = s_block_rgb.data(); + const u32* in_ptr = s_state.block_rgb.data(); for (u32 i = 0; i < (64 / 4); i++) { u32 value = *in_ptr++; value |= *in_ptr++ << 8; value |= *in_ptr++ << 16; value |= *in_ptr++ << 24; - s_data_out_fifo.Push(value); + s_state.data_out_fifo.Push(value); } } break; @@ -654,31 +665,31 @@ void MDEC::CopyOutBlock(void* param, TickCount ticks, TickCount ticks_late) u32 index = 0; u32 state = 0; u32 rgb = 0; - while (index < s_block_rgb.size()) + while (index < s_state.block_rgb.size()) { switch (state) { case 0: - rgb = s_block_rgb[index++]; // RGB- + rgb = s_state.block_rgb[index++]; // RGB- state = 1; break; case 1: - rgb |= (s_block_rgb[index] & 0xFF) << 24; // RGBR - s_data_out_fifo.Push(rgb); - rgb = s_block_rgb[index] >> 8; // GB-- + rgb |= (s_state.block_rgb[index] & 0xFF) << 24; // RGBR + s_state.data_out_fifo.Push(rgb); + rgb = s_state.block_rgb[index] >> 8; // GB-- index++; state = 2; break; case 2: - rgb |= s_block_rgb[index] << 16; // GBRG - s_data_out_fifo.Push(rgb); - rgb = s_block_rgb[index] >> 16; // B--- + rgb |= s_state.block_rgb[index] << 16; // GBRG + s_state.data_out_fifo.Push(rgb); + rgb = s_state.block_rgb[index] >> 16; // B--- index++; state = 3; break; case 3: - rgb |= s_block_rgb[index] << 8; // BRGB - s_data_out_fifo.Push(rgb); + rgb |= s_state.block_rgb[index] << 8; // BRGB + s_state.data_out_fifo.Push(rgb); index++; state = 0; break; @@ -691,44 +702,44 @@ void MDEC::CopyOutBlock(void* param, TickCount ticks, TickCount ticks_late) { if (g_settings.use_old_mdec_routines) [[unlikely]] { - const u16 a = ZeroExtend16(s_status.data_output_bit15.GetValue()) << 15; - for (u32 i = 0; i < static_cast(s_block_rgb.size());) + const u16 a = ZeroExtend16(s_state.status.data_output_bit15.GetValue()) << 15; + for (u32 i = 0; i < static_cast(s_state.block_rgb.size());) { - u32 color = s_block_rgb[i++]; + u32 color = s_state.block_rgb[i++]; u16 r = Truncate16((color >> 3) & 0x1Fu); u16 g = Truncate16((color >> 11) & 0x1Fu); u16 b = Truncate16((color >> 19) & 0x1Fu); const u16 color15a = r | (g << 5) | (b << 10) | (a << 15); - color = s_block_rgb[i++]; + color = s_state.block_rgb[i++]; r = Truncate16((color >> 3) & 0x1Fu); g = Truncate16((color >> 11) & 0x1Fu); b = Truncate16((color >> 19) & 0x1Fu); const u16 color15b = r | (g << 5) | (b << 10) | (a << 15); - s_data_out_fifo.Push(ZeroExtend32(color15a) | (ZeroExtend32(color15b) << 16)); + s_state.data_out_fifo.Push(ZeroExtend32(color15a) | (ZeroExtend32(color15b) << 16)); } } else { - const u32 a = ZeroExtend32(s_status.data_output_bit15.GetValue()) << 15; - for (u32 i = 0; i < static_cast(s_block_rgb.size());) + const u32 a = ZeroExtend32(s_state.status.data_output_bit15.GetValue()) << 15; + for (u32 i = 0; i < static_cast(s_state.block_rgb.size());) { #define E8TO5(color) (std::min((((color) + 4) >> 3), 0x1F)) - u32 color = s_block_rgb[i++]; + u32 color = s_state.block_rgb[i++]; u32 r = E8TO5(color & 0xFFu); u32 g = E8TO5((color >> 8) & 0xFFu); u32 b = E8TO5((color >> 16) & 0xFFu); const u32 color15a = r | (g << 5) | (b << 10) | a; - color = s_block_rgb[i++]; + color = s_state.block_rgb[i++]; r = E8TO5(color & 0xFFu); g = E8TO5((color >> 8) & 0xFFu); b = E8TO5((color >> 16) & 0xFFu); const u32 color15b = r | (g << 5) | (b << 10) | a; #undef E8TO5 - s_data_out_fifo.Push(color15a | (color15b << 16)); + s_state.data_out_fifo.Push(color15a | (color15b << 16)); } } } @@ -738,11 +749,11 @@ void MDEC::CopyOutBlock(void* param, TickCount ticks, TickCount ticks_late) break; } - DEBUG_LOG("Block copied out, fifo size = {} ({} bytes)", s_data_out_fifo.GetSize(), - s_data_out_fifo.GetSize() * sizeof(u32)); + DEBUG_LOG("Block copied out, fifo size = {} ({} bytes)", s_state.data_out_fifo.GetSize(), + s_state.data_out_fifo.GetSize() * sizeof(u32)); // if we've copied out all blocks, command is complete - s_state = (s_remaining_halfwords == 0) ? State::Idle : State::DecodingMacroblock; + s_state.state = (s_state.remaining_halfwords == 0) ? State::Idle : State::DecodingMacroblock; Execute(); } @@ -753,7 +764,7 @@ bool MDEC::DecodeRLE_Old(s16* blk, const u8* qt) 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63}}; - if (s_current_coefficient == 64) + if (s_state.current_coefficient == 64) { std::fill_n(blk, 64, s16(0)); @@ -761,11 +772,11 @@ bool MDEC::DecodeRLE_Old(s16* blk, const u8* qt) u16 n; for (;;) { - if (s_data_in_fifo.IsEmpty() || s_remaining_halfwords == 0) + if (s_state.data_in_fifo.IsEmpty() || s_state.remaining_halfwords == 0) return false; - n = s_data_in_fifo.Pop(); - s_remaining_halfwords--; + n = s_state.data_in_fifo.Pop(); + s_state.remaining_halfwords--; if (n == 0xFE00) continue; @@ -773,47 +784,48 @@ bool MDEC::DecodeRLE_Old(s16* blk, const u8* qt) break; } - s_current_coefficient = 0; - s_current_q_scale = (n >> 10) & 0x3F; - s32 val = - SignExtendN<10, s32>(static_cast(n & 0x3FF)) * static_cast(ZeroExtend32(qt[s_current_coefficient])); + s_state.current_coefficient = 0; + s_state.current_q_scale = (n >> 10) & 0x3F; + s32 val = SignExtendN<10, s32>(static_cast(n & 0x3FF)) * + static_cast(ZeroExtend32(qt[s_state.current_coefficient])); - if (s_current_q_scale == 0) + if (s_state.current_q_scale == 0) val = SignExtendN<10, s32>(static_cast(n & 0x3FF)) * 2; val = std::clamp(val, -0x400, 0x3FF); - if (s_current_q_scale > 0) - blk[zagzig[s_current_coefficient]] = static_cast(val); + if (s_state.current_q_scale > 0) + blk[zagzig[s_state.current_coefficient]] = static_cast(val); else - blk[s_current_coefficient] = static_cast(val); + blk[s_state.current_coefficient] = static_cast(val); } - while (!s_data_in_fifo.IsEmpty() && s_remaining_halfwords > 0) + while (!s_state.data_in_fifo.IsEmpty() && s_state.remaining_halfwords > 0) { - u16 n = s_data_in_fifo.Pop(); - s_remaining_halfwords--; + u16 n = s_state.data_in_fifo.Pop(); + s_state.remaining_halfwords--; - s_current_coefficient += ((n >> 10) & 0x3F) + 1; - if (s_current_coefficient < 64) + s_state.current_coefficient += ((n >> 10) & 0x3F) + 1; + if (s_state.current_coefficient < 64) { - s32 val = (SignExtendN<10, s32>(static_cast(n & 0x3FF)) * - static_cast(ZeroExtend32(qt[s_current_coefficient])) * static_cast(s_current_q_scale) + - 4) / - 8; + s32 val = + (SignExtendN<10, s32>(static_cast(n & 0x3FF)) * + static_cast(ZeroExtend32(qt[s_state.current_coefficient])) * static_cast(s_state.current_q_scale) + + 4) / + 8; - if (s_current_q_scale == 0) + if (s_state.current_q_scale == 0) val = SignExtendN<10, s32>(static_cast(n & 0x3FF)) * 2; val = std::clamp(val, -0x400, 0x3FF); - if (s_current_q_scale > 0) - blk[zagzig[s_current_coefficient]] = static_cast(val); + if (s_state.current_q_scale > 0) + blk[zagzig[s_state.current_coefficient]] = static_cast(val); else - blk[s_current_coefficient] = static_cast(val); + blk[s_state.current_coefficient] = static_cast(val); } - if (s_current_coefficient >= 63) + if (s_state.current_coefficient >= 63) { - s_current_coefficient = 64; + s_state.current_coefficient = 64; return true; } } @@ -830,7 +842,7 @@ void MDEC::IDCT_Old(s16* blk) { s64 sum = 0; for (u32 u = 0; u < 8; u++) - sum += s32(blk[u * 8 + x]) * s32(s_scale_table[y * 8 + u]); + sum += s32(blk[u * 8 + x]) * s32(s_state.scale_table[y * 8 + u]); temp_buffer[x + y * 8] = sum; } } @@ -840,7 +852,7 @@ void MDEC::IDCT_Old(s16* blk) { s64 sum = 0; for (u32 u = 0; u < 8; u++) - sum += s64(temp_buffer[u + y * 8]) * s32(s_scale_table[x * 8 + u]); + sum += s64(temp_buffer[u + y * 8]) * s32(s_state.scale_table[x * 8 + u]); blk[x + y * 8] = static_cast(std::clamp(SignExtendN<9, s32>((sum >> 32) + ((sum >> 31) & 1)), -128, 127)); @@ -851,7 +863,7 @@ void MDEC::IDCT_Old(s16* blk) void MDEC::YUVToRGB_Old(u32 xx, u32 yy, const std::array& Crblk, const std::array& Cbblk, const std::array& Yblk) { - const s16 addval = s_status.data_output_signed ? 0 : 0x80; + const s16 addval = s_state.status.data_output_signed ? 0 : 0x80; for (u32 y = 0; y < 8; y++) { for (u32 x = 0; x < 8; x++) @@ -868,9 +880,9 @@ void MDEC::YUVToRGB_Old(u32 xx, u32 yy, const std::array& Crblk, const G = static_cast(std::clamp(static_cast(Y) + G, -128, 127)) + addval; B = static_cast(std::clamp(static_cast(Y) + B, -128, 127)) + addval; - s_block_rgb[(x + xx) + ((y + yy) * 16)] = ZeroExtend32(static_cast(R)) | - (ZeroExtend32(static_cast(G)) << 8) | - (ZeroExtend32(static_cast(B)) << 16); + s_state.block_rgb[(x + xx) + ((y + yy) * 16)] = ZeroExtend32(static_cast(R)) | + (ZeroExtend32(static_cast(G)) << 8) | + (ZeroExtend32(static_cast(B)) << 16); } } } @@ -883,7 +895,7 @@ bool MDEC::DecodeRLE_New(s16* blk, const u8* qt) 28, 21, 14, 7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30, 23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63}}; - if (s_current_coefficient == 64) + if (s_state.current_coefficient == 64) { std::fill_n(blk, 64, s16(0)); @@ -891,11 +903,11 @@ bool MDEC::DecodeRLE_New(s16* blk, const u8* qt) u16 n; for (;;) { - if (s_data_in_fifo.IsEmpty() || s_remaining_halfwords == 0) + if (s_state.data_in_fifo.IsEmpty() || s_state.remaining_halfwords == 0) return false; - n = s_data_in_fifo.Pop(); - s_remaining_halfwords--; + n = s_state.data_in_fifo.Pop(); + s_state.remaining_halfwords--; if (n == 0xFE00) continue; @@ -903,32 +915,33 @@ bool MDEC::DecodeRLE_New(s16* blk, const u8* qt) break; } - s_current_coefficient = 0; - s_current_q_scale = n >> 10; + s_state.current_coefficient = 0; + s_state.current_q_scale = n >> 10; // Store the DCT blocks with an additional 4 bits of precision. const s32 val = SignExtendN<10, s32>(static_cast(n)); - const s32 coeff = (s_current_q_scale == 0) ? (val << 5) : (((val * qt[0]) << 4) + (val ? ((val < 0) ? 8 : -8) : 0)); + const s32 coeff = + (s_state.current_q_scale == 0) ? (val << 5) : (((val * qt[0]) << 4) + (val ? ((val < 0) ? 8 : -8) : 0)); blk[zigzag[0]] = static_cast(std::clamp(coeff, -0x4000, 0x3FFF)); } - while (!s_data_in_fifo.IsEmpty() && s_remaining_halfwords > 0) + while (!s_state.data_in_fifo.IsEmpty() && s_state.remaining_halfwords > 0) { - u16 n = s_data_in_fifo.Pop(); - s_remaining_halfwords--; + u16 n = s_state.data_in_fifo.Pop(); + s_state.remaining_halfwords--; - s_current_coefficient += ((n >> 10) + 1); - if (s_current_coefficient < 64) + s_state.current_coefficient += ((n >> 10) + 1); + if (s_state.current_coefficient < 64) { const s32 val = SignExtendN<10, s32>(n); - const s32 scq = static_cast(s_current_q_scale * qt[s_current_coefficient]); + const s32 scq = static_cast(s_state.current_q_scale * qt[s_state.current_coefficient]); const s32 coeff = (scq == 0) ? (val << 5) : ((((val * scq) >> 3) << 4) + (val ? ((val < 0) ? 8 : -8) : 0)); - blk[zigzag[s_current_coefficient]] = static_cast(std::clamp(coeff, -0x4000, 0x3FFF)); + blk[zigzag[s_state.current_coefficient]] = static_cast(std::clamp(coeff, -0x4000, 0x3FFF)); } - if (s_current_coefficient >= 63) + if (s_state.current_coefficient >= 63) { - s_current_coefficient = 64; + s_state.current_coefficient = 64; return true; } } @@ -950,13 +963,13 @@ void MDEC::IDCT_New(s16* blk) for (u32 x = 0; x < 8; x++) { for (u32 y = 0; y < 8; y++) - temp[y * 8 + x] = IDCTRow(&blk[x * 8], &s_scale_table[y * 8]); + temp[y * 8 + x] = IDCTRow(&blk[x * 8], &s_state.scale_table[y * 8]); } for (u32 x = 0; x < 8; x++) { for (u32 y = 0; y < 8; y++) { - const s32 sum = IDCTRow(&temp[x * 8], &s_scale_table[y * 8]); + const s32 sum = IDCTRow(&temp[x * 8], &s_state.scale_table[y * 8]); blk[x * 8 + y] = static_cast(std::clamp(SignExtendN<9, s32>(sum), -128, 127)); } } @@ -965,7 +978,7 @@ void MDEC::IDCT_New(s16* blk) void MDEC::YUVToRGB_New(u32 xx, u32 yy, const std::array& Crblk, const std::array& Cbblk, const std::array& Yblk) { - const s32 addval = s_status.data_output_signed ? 0 : 0x80; + const s32 addval = s_state.status.data_output_signed ? 0 : 0x80; for (u32 y = 0; y < 8; y++) { for (u32 x = 0; x < 8; x++) @@ -981,7 +994,7 @@ void MDEC::YUVToRGB_New(u32 xx, u32 yy, const std::array& Crblk, const addval; const s32 b = std::clamp(SignExtendN<9, s32>(Y + (((454 * Cb) + 0x80) >> 8)), -128, 127) + addval; - s_block_rgb[(x + xx) + ((y + yy) * 16)] = + s_state.block_rgb[(x + xx) + ((y + yy) * 16)] = static_cast(r) | (static_cast(g) << 8) | (static_cast(b) << 16); } } @@ -989,37 +1002,37 @@ void MDEC::YUVToRGB_New(u32 xx, u32 yy, const std::array& Crblk, const void MDEC::YUVToMono(const std::array& Yblk) { - const s32 addval = s_status.data_output_signed ? 0 : 0x80; + const s32 addval = s_state.status.data_output_signed ? 0 : 0x80; for (u32 i = 0; i < 64; i++) - s_block_rgb[i] = static_cast(std::clamp(SignExtendN<9, s32>(Yblk[i]), -128, 127) + addval); + s_state.block_rgb[i] = static_cast(std::clamp(SignExtendN<9, s32>(Yblk[i]), -128, 127) + addval); } void MDEC::HandleSetQuantTableCommand() { - DebugAssert(s_remaining_halfwords >= 32); + DebugAssert(s_state.remaining_halfwords >= 32); // TODO: Remove extra copies.. std::array packed_data; - s_data_in_fifo.PopRange(packed_data.data(), static_cast(packed_data.size())); - s_remaining_halfwords -= 32; - std::memcpy(s_iq_y.data(), packed_data.data(), s_iq_y.size()); + s_state.data_in_fifo.PopRange(packed_data.data(), static_cast(packed_data.size())); + s_state.remaining_halfwords -= 32; + std::memcpy(s_state.iq_y.data(), packed_data.data(), s_state.iq_y.size()); - if (s_remaining_halfwords > 0) + if (s_state.remaining_halfwords > 0) { - DebugAssert(s_remaining_halfwords >= 32); + DebugAssert(s_state.remaining_halfwords >= 32); - s_data_in_fifo.PopRange(packed_data.data(), static_cast(packed_data.size())); - std::memcpy(s_iq_uv.data(), packed_data.data(), s_iq_uv.size()); + s_state.data_in_fifo.PopRange(packed_data.data(), static_cast(packed_data.size())); + std::memcpy(s_state.iq_uv.data(), packed_data.data(), s_state.iq_uv.size()); } } void MDEC::HandleSetScaleCommand() { - DebugAssert(s_remaining_halfwords == 64); + DebugAssert(s_state.remaining_halfwords == 64); std::array packed_data; - s_data_in_fifo.PopRange(packed_data.data(), static_cast(packed_data.size())); - s_remaining_halfwords -= 32; + s_state.data_in_fifo.PopRange(packed_data.data(), static_cast(packed_data.size())); + s_state.remaining_halfwords -= 32; SetScaleMatrix(packed_data.data()); } @@ -1028,7 +1041,7 @@ void MDEC::SetScaleMatrix(const u16* values) for (u32 y = 0; y < 8; y++) { for (u32 x = 0; x < 8; x++) - s_scale_table[y * 8 + x] = values[x * 8 + y]; + s_state.scale_table[y * 8 + x] = values[x * 8 + y]; } } @@ -1048,26 +1061,27 @@ void MDEC::DrawDebugStateWindow() static constexpr std::array output_depths = {{"4-bit", "8-bit", "24-bit", "15-bit"}}; static constexpr std::array block_names = {{"Crblk", "Cbblk", "Y1", "Y2", "Y3", "Y4", "Output"}}; - ImGui::Text("Blocks Decoded: %u", s_total_blocks_decoded); - ImGui::Text("Data-In FIFO Size: %u (%u bytes)", s_data_in_fifo.GetSize(), s_data_in_fifo.GetSize() * 4); - ImGui::Text("Data-Out FIFO Size: %u (%u bytes)", s_data_out_fifo.GetSize(), s_data_out_fifo.GetSize() * 4); - ImGui::Text("DMA Enable: %s%s", s_enable_dma_in ? "In " : "", s_enable_dma_out ? "Out" : ""); - ImGui::Text("Current State: %s", state_names[static_cast(s_state)]); - ImGui::Text("Current Block: %s", block_names[s_current_block]); - ImGui::Text("Current Coefficient: %u", s_current_coefficient); + ImGui::Text("Blocks Decoded: %u", s_state.total_blocks_decoded); + ImGui::Text("Data-In FIFO Size: %u (%u bytes)", s_state.data_in_fifo.GetSize(), s_state.data_in_fifo.GetSize() * 4); + ImGui::Text("Data-Out FIFO Size: %u (%u bytes)", s_state.data_out_fifo.GetSize(), + s_state.data_out_fifo.GetSize() * 4); + ImGui::Text("DMA Enable: %s%s", s_state.enable_dma_in ? "In " : "", s_state.enable_dma_out ? "Out" : ""); + ImGui::Text("Current State: %s", state_names[static_cast(s_state.state)]); + ImGui::Text("Current Block: %s", block_names[s_state.current_block]); + ImGui::Text("Current Coefficient: %u", s_state.current_coefficient); if (ImGui::CollapsingHeader("Status", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::Text("Data-Out FIFO Empty: %s", s_status.data_out_fifo_empty ? "Yes" : "No"); - ImGui::Text("Data-In FIFO Full: %s", s_status.data_in_fifo_full ? "Yes" : "No"); - ImGui::Text("Command Busy: %s", s_status.command_busy ? "Yes" : "No"); - ImGui::Text("Data-In Request: %s", s_status.data_in_request ? "Yes" : "No"); - ImGui::Text("Output Depth: %s", output_depths[static_cast(s_status.data_output_depth.GetValue())]); - ImGui::Text("Output Signed: %s", s_status.data_output_signed ? "Yes" : "No"); - ImGui::Text("Output Bit 15: %u", ZeroExtend32(s_status.data_output_bit15.GetValue())); - ImGui::Text("Current Block: %u", ZeroExtend32(s_status.current_block.GetValue())); + ImGui::Text("Data-Out FIFO Empty: %s", s_state.status.data_out_fifo_empty ? "Yes" : "No"); + ImGui::Text("Data-In FIFO Full: %s", s_state.status.data_in_fifo_full ? "Yes" : "No"); + ImGui::Text("Command Busy: %s", s_state.status.command_busy ? "Yes" : "No"); + ImGui::Text("Data-In Request: %s", s_state.status.data_in_request ? "Yes" : "No"); + ImGui::Text("Output Depth: %s", output_depths[static_cast(s_state.status.data_output_depth.GetValue())]); + ImGui::Text("Output Signed: %s", s_state.status.data_output_signed ? "Yes" : "No"); + ImGui::Text("Output Bit 15: %u", ZeroExtend32(s_state.status.data_output_bit15.GetValue())); + ImGui::Text("Current Block: %u", ZeroExtend32(s_state.status.current_block.GetValue())); ImGui::Text("Parameter Words Remaining: %d", - static_cast(SignExtend32(s_status.parameter_words_remaining.GetValue()))); + static_cast(SignExtend32(s_state.status.parameter_words_remaining.GetValue()))); } ImGui::End();