From 51eff82eb6abb5c426c5125b54a7fdbcbeb363ce Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sun, 18 Oct 2020 14:44:32 +1000 Subject: [PATCH] CPU/Recompiler: Implement lwl/lwr/swl/swr --- src/core/cpu_recompiler_code_generator.cpp | 104 +++++++++++++++++++++ src/core/cpu_recompiler_code_generator.h | 2 + src/core/cpu_recompiler_register_cache.h | 4 + 3 files changed, 110 insertions(+) diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index e0f84f416..82e6bcbfa 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -88,6 +88,16 @@ bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi) result = Compile_Load(cbi); break; + case InstructionOp::lwl: + case InstructionOp::lwr: + result = Compile_LoadLeftRight(cbi); + break; + + case InstructionOp::swl: + case InstructionOp::swr: + result = Compile_StoreLeftRight(cbi); + break; + case InstructionOp::sb: case InstructionOp::sh: case InstructionOp::sw: @@ -1263,6 +1273,100 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi) return true; } +bool CodeGenerator::Compile_LoadLeftRight(const CodeBlockInstruction& cbi) +{ + InstructionPrologue(cbi, 1); + + Value base = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs); + Value offset = Value::FromConstantU32(cbi.instruction.i.imm_sext32()); + Value address = AddValues(base, offset, false); + base.ReleaseAndClear(); + + Value shift = ShlValues(AndValues(address, Value::FromConstantU32(3)), Value::FromConstantU32(3)); // * 8 + address = AndValues(address, Value::FromConstantU32(~u32(3))); + + Value mem = EmitLoadGuestMemory(cbi, address, RegSize_32); + + // hack to bypass load delays + Value value; + if (cbi.instruction.i.rt == m_register_cache.GetLoadDelayRegister()) + { + const Value& ld_value = m_register_cache.GetLoadDelayValue(); + if (ld_value.IsInHostRegister()) + value.SetHostReg(&m_register_cache, ld_value.GetHostRegister(), ld_value.size); + else + value = ld_value; + } + else + { + value = m_register_cache.ReadGuestRegister(cbi.instruction.i.rt, true, true); + } + + if (cbi.instruction.op == InstructionOp::lwl) + { + Value lhs = AndValues(value, ShrValues(Value::FromConstantU32(0x00FFFFFF), shift)); + mem = ShlValues(mem, SubValues(Value::FromConstantU32(24), shift, false)); + EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs); + } + else + { + Value lhs = AndValues( + value, ShlValues(Value::FromConstantU32(0xFFFFFF00), SubValues(Value::FromConstantU32(24), shift, false))); + EmitShr(mem.GetHostRegister(), mem.GetHostRegister(), RegSize_32, shift); + EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs); + } + + shift.ReleaseAndClear(); + + if (g_settings.gpu_pgxp_enable) + EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), mem, address); + + m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.i.rt, std::move(mem)); + + InstructionEpilogue(cbi); + return true; +} + +bool CodeGenerator::Compile_StoreLeftRight(const CodeBlockInstruction& cbi) +{ + InstructionPrologue(cbi, 1); + + Value base = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs); + Value offset = Value::FromConstantU32(cbi.instruction.i.imm_sext32()); + Value address = AddValues(base, offset, false); + base.ReleaseAndClear(); + + Value shift = ShlValues(AndValues(address, Value::FromConstantU32(3)), Value::FromConstantU32(3)); // * 8 + address = AndValues(address, Value::FromConstantU32(~u32(3))); + + Value mem = EmitLoadGuestMemory(cbi, address, RegSize_32); + + Value reg = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt); + + if (cbi.instruction.op == InstructionOp::swl) + { + Value lhs = ShrValues(reg, SubValues(Value::FromConstantU32(24), shift, false)); + EmitAnd(mem.GetHostRegister(), mem.GetHostRegister(), ShlValues(Value::FromConstantU32(0xFFFFFF00), shift)); + EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs); + } + else + { + Value lhs = ShlValues(reg, shift); + mem = AndValues(mem, + ShrValues(Value::FromConstantU32(0x00FFFFFF), SubValues(Value::FromConstantU32(24), shift, false))); + EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs); + } + + shift.ReleaseAndClear(); + + EmitStoreGuestMemory(cbi, address, mem); + if (g_settings.gpu_pgxp_enable) + EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), mem, address); + + InstructionEpilogue(cbi); + return true; +} + bool CodeGenerator::Compile_MoveHiLo(const CodeBlockInstruction& cbi) { InstructionPrologue(cbi, 1); diff --git a/src/core/cpu_recompiler_code_generator.h b/src/core/cpu_recompiler_code_generator.h index 26fc77b3d..98e22d4be 100644 --- a/src/core/cpu_recompiler_code_generator.h +++ b/src/core/cpu_recompiler_code_generator.h @@ -201,6 +201,8 @@ private: bool Compile_Shift(const CodeBlockInstruction& cbi); bool Compile_Load(const CodeBlockInstruction& cbi); bool Compile_Store(const CodeBlockInstruction& cbi); + bool Compile_LoadLeftRight(const CodeBlockInstruction& cbi); + bool Compile_StoreLeftRight(const CodeBlockInstruction& cbi); bool Compile_MoveHiLo(const CodeBlockInstruction& cbi); bool Compile_Add(const CodeBlockInstruction& cbi); bool Compile_Subtract(const CodeBlockInstruction& cbi); diff --git a/src/core/cpu_recompiler_register_cache.h b/src/core/cpu_recompiler_register_cache.h index c5c3cb4da..df9ddd8e1 100644 --- a/src/core/cpu_recompiler_register_cache.h +++ b/src/core/cpu_recompiler_register_cache.h @@ -308,6 +308,10 @@ public: /// Stores the specified value to the guest register after the next instruction (load delay). void WriteGuestRegisterDelayed(Reg guest_reg, Value&& value); + /// Returns the current target for a load delay, or Reg::count. + Reg GetLoadDelayRegister() const { return m_state.load_delay_register; } + const Value& GetLoadDelayValue() const { return m_state.load_delay_value; } + /// Moves load delay to the next load delay, and writes any previous load delay to the destination register. void UpdateLoadDelay();