Merge pull request #5110 from MerryMage/const-pool
Jit64: Implement a constant pool
This commit is contained in:
commit
d2690568f9
|
@ -42,7 +42,7 @@ public:
|
|||
}
|
||||
|
||||
// Call this before you generate any code.
|
||||
void AllocCodeSpace(size_t size, bool need_low = true)
|
||||
virtual void AllocCodeSpace(size_t size, bool need_low = true)
|
||||
{
|
||||
region_size = size;
|
||||
region = static_cast<u8*>(Common::AllocateExecutableMemory(region_size, need_low));
|
||||
|
@ -51,7 +51,7 @@ public:
|
|||
|
||||
// Always clear code space with breakpoints, so that if someone accidentally executes
|
||||
// uninitialized, it just breaks into the debugger.
|
||||
void ClearCodeSpace()
|
||||
virtual void ClearCodeSpace()
|
||||
{
|
||||
PoisonMemory();
|
||||
ResetCodePtr();
|
||||
|
|
|
@ -245,6 +245,7 @@ if(_M_X86)
|
|||
PowerPC/Jit64/JitRegCache.cpp
|
||||
PowerPC/Jit64/Jit_SystemRegisters.cpp
|
||||
PowerPC/Jit64Common/BlockCache.cpp
|
||||
PowerPC/Jit64Common/ConstantPool.cpp
|
||||
PowerPC/Jit64Common/EmuCodeBlock.cpp
|
||||
PowerPC/Jit64Common/FarCodeCache.cpp
|
||||
PowerPC/Jit64Common/Jit64AsmCommon.cpp
|
||||
|
|
|
@ -244,6 +244,7 @@
|
|||
<ClCompile Include="PowerPC\Interpreter\Interpreter_Paired.cpp" />
|
||||
<ClCompile Include="PowerPC\Interpreter\Interpreter_SystemRegisters.cpp" />
|
||||
<ClCompile Include="PowerPC\Interpreter\Interpreter_Tables.cpp" />
|
||||
<ClCompile Include="PowerPC\Jit64Common\ConstantPool.cpp" />
|
||||
<ClCompile Include="PowerPC\JitILCommon\IR.cpp" />
|
||||
<ClCompile Include="PowerPC\JitILCommon\JitILBase_Branch.cpp" />
|
||||
<ClCompile Include="PowerPC\JitILCommon\JitILBase_FloatingPoint.cpp" />
|
||||
|
@ -486,6 +487,7 @@
|
|||
<ClInclude Include="PowerPC\CachedInterpreter\InterpreterBlockCache.h" />
|
||||
<ClInclude Include="PowerPC\Interpreter\Interpreter.h" />
|
||||
<ClInclude Include="PowerPC\Interpreter\Interpreter_FPUtils.h" />
|
||||
<ClInclude Include="PowerPC\Jit64Common\ConstantPool.h" />
|
||||
<ClInclude Include="PowerPC\Jit64IL\JitIL.h" />
|
||||
<ClInclude Include="PowerPC\Jit64\FPURegCache.h" />
|
||||
<ClInclude Include="PowerPC\Jit64\GPRRegCache.h" />
|
||||
|
|
|
@ -870,6 +870,9 @@
|
|||
<ClCompile Include="IOS\USB\Bluetooth\WiimoteHIDAttr.cpp">
|
||||
<Filter>IOS\USB\Bluetooth</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="PowerPC\Jit64Common\ConstantPool.cpp">
|
||||
<Filter>PowerPC\Jit64Common</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="BootManager.h" />
|
||||
|
@ -1493,6 +1496,9 @@
|
|||
<ClInclude Include="IOS\MIOS.h">
|
||||
<Filter>IOS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="PowerPC\Jit64Common\ConstantPool.h">
|
||||
<Filter>PowerPC\Jit64Common</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Text Include="CMakeLists.txt" />
|
||||
|
|
|
@ -108,7 +108,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
|
|||
UCOMISD(xmm, R(xmm));
|
||||
fixups.push_back(J_CC(CC_P));
|
||||
}
|
||||
MOVDDUP(xmm, M(psGeneratedQNaN));
|
||||
MOVDDUP(xmm, MConst(psGeneratedQNaN));
|
||||
for (FixupBranch fixup : fixups)
|
||||
SetJumpTarget(fixup);
|
||||
FixupBranch done = J(true);
|
||||
|
@ -127,7 +127,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
|
|||
SwitchToFarCode();
|
||||
SetJumpTarget(handle_nan);
|
||||
_assert_msg_(DYNA_REC, clobber == XMM0, "BLENDVPD implicitly uses XMM0");
|
||||
BLENDVPD(xmm, M(psGeneratedQNaN));
|
||||
BLENDVPD(xmm, MConst(psGeneratedQNaN));
|
||||
for (u32 x : inputs)
|
||||
{
|
||||
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, fpr.R(x), fpr.R(x), CMP_UNORD);
|
||||
|
@ -151,7 +151,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
|
|||
SetJumpTarget(handle_nan);
|
||||
MOVAPD(tmp, R(clobber));
|
||||
ANDNPD(clobber, R(xmm));
|
||||
ANDPD(tmp, M(psGeneratedQNaN));
|
||||
ANDPD(tmp, MConst(psGeneratedQNaN));
|
||||
ORPD(tmp, R(clobber));
|
||||
MOVAPD(xmm, R(tmp));
|
||||
for (u32 x : inputs)
|
||||
|
@ -350,7 +350,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
ADDSD(XMM1, fpr.R(b));
|
||||
}
|
||||
if (inst.SUBOP5 == 31) // nmadd
|
||||
XORPD(XMM1, M(packed ? psSignBits2 : psSignBits));
|
||||
XORPD(XMM1, MConst(packed ? psSignBits2 : psSignBits));
|
||||
}
|
||||
fpr.BindToRegister(d, !single);
|
||||
if (single)
|
||||
|
@ -385,15 +385,15 @@ void Jit64::fsign(UGeckoInstruction inst)
|
|||
{
|
||||
case 40: // neg
|
||||
avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, fpr.RX(d), src,
|
||||
M(packed ? psSignBits2 : psSignBits), packed);
|
||||
MConst(packed ? psSignBits2 : psSignBits), packed);
|
||||
break;
|
||||
case 136: // nabs
|
||||
avx_op(&XEmitter::VORPD, &XEmitter::ORPD, fpr.RX(d), src, M(packed ? psSignBits2 : psSignBits),
|
||||
packed);
|
||||
avx_op(&XEmitter::VORPD, &XEmitter::ORPD, fpr.RX(d), src,
|
||||
MConst(packed ? psSignBits2 : psSignBits), packed);
|
||||
break;
|
||||
case 264: // abs
|
||||
avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, fpr.RX(d), src, M(packed ? psAbsMask2 : psAbsMask),
|
||||
packed);
|
||||
avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, fpr.RX(d), src,
|
||||
MConst(packed ? psAbsMask2 : psAbsMask), packed);
|
||||
break;
|
||||
default:
|
||||
PanicAlert("fsign bleh");
|
||||
|
@ -608,7 +608,7 @@ void Jit64::fctiwx(UGeckoInstruction inst)
|
|||
// The upper 32 bits of the result are set to 0xfff80000,
|
||||
// except for -0.0 where they are set to 0xfff80001 (TODO).
|
||||
|
||||
MOVAPD(XMM0, M(half_qnan_and_s32_max));
|
||||
MOVAPD(XMM0, MConst(half_qnan_and_s32_max));
|
||||
MINSD(XMM0, fpr.R(b));
|
||||
switch (inst.SUBOP10)
|
||||
{
|
||||
|
|
|
@ -623,7 +623,7 @@ void Jit64::mcrfs(UGeckoInstruction inst)
|
|||
}
|
||||
AND(32, R(RSCRATCH), Imm32(mask));
|
||||
MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
|
||||
LEA(64, RSCRATCH, M(m_crTable.data()));
|
||||
LEA(64, RSCRATCH, MConst(m_crTable));
|
||||
MOV(64, R(RSCRATCH), MComplex(RSCRATCH, RSCRATCH2, SCALE_8, 0));
|
||||
MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH));
|
||||
}
|
||||
|
@ -657,14 +657,14 @@ void Jit64::mffsx(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
// MXCSR = s_fpscr_to_mxcsr[FPSCR & 7]
|
||||
static const u32 s_fpscr_to_mxcsr[] = {
|
||||
static const u32 s_fpscr_to_mxcsr[8] = {
|
||||
0x1F80, 0x7F80, 0x5F80, 0x3F80, 0x9F80, 0xFF80, 0xDF80, 0xBF80,
|
||||
};
|
||||
|
||||
// Needs value of FPSCR in RSCRATCH.
|
||||
void Jit64::UpdateMXCSR()
|
||||
{
|
||||
LEA(64, RSCRATCH2, M(&s_fpscr_to_mxcsr));
|
||||
LEA(64, RSCRATCH2, MConst(s_fpscr_to_mxcsr));
|
||||
AND(32, R(RSCRATCH), Imm32(7));
|
||||
LDMXCSR(MComplex(RSCRATCH2, RSCRATCH, SCALE_4, 0));
|
||||
}
|
||||
|
@ -730,7 +730,7 @@ void Jit64::mtfsfix(UGeckoInstruction inst)
|
|||
|
||||
// Field 7 contains NI and RN.
|
||||
if (inst.CRFD == 7)
|
||||
LDMXCSR(M(&s_fpscr_to_mxcsr[imm & 7]));
|
||||
LDMXCSR(MConst(s_fpscr_to_mxcsr, imm & 7));
|
||||
}
|
||||
|
||||
void Jit64::mtfsfx(UGeckoInstruction inst)
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
// Copyright 2017 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "Common/Assert.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/Jit64Common/ConstantPool.h"
|
||||
|
||||
ConstantPool::ConstantPool(Gen::X64CodeBlock* parent) : m_parent(parent)
|
||||
{
|
||||
}
|
||||
|
||||
ConstantPool::~ConstantPool() = default;
|
||||
|
||||
void ConstantPool::AllocCodeSpace()
|
||||
{
|
||||
_assert_(!m_current_ptr);
|
||||
Init();
|
||||
}
|
||||
|
||||
void ConstantPool::ClearCodeSpace()
|
||||
{
|
||||
Init();
|
||||
}
|
||||
|
||||
Gen::OpArg ConstantPool::GetConstantOpArg(const void* value, size_t element_size,
|
||||
size_t num_elements, size_t index)
|
||||
{
|
||||
const size_t value_size = element_size * num_elements;
|
||||
auto iter = m_const_info.find(value);
|
||||
|
||||
if (iter == m_const_info.end())
|
||||
{
|
||||
void* ptr = std::align(ALIGNMENT, value_size, m_current_ptr, m_remaining_size);
|
||||
_assert_msg_(DYNA_REC, ptr, "Constant pool has run out of space.");
|
||||
|
||||
m_current_ptr = static_cast<u8*>(m_current_ptr) + value_size;
|
||||
m_remaining_size -= value_size;
|
||||
|
||||
std::memcpy(ptr, value, value_size);
|
||||
iter = m_const_info.emplace(std::make_pair(value, ConstantInfo{ptr, value_size})).first;
|
||||
}
|
||||
|
||||
const ConstantInfo& info = iter->second;
|
||||
_assert_msg_(DYNA_REC, info.m_size == value_size,
|
||||
"Constant has incorrect size in constant pool.");
|
||||
u8* location = static_cast<u8*>(info.m_location);
|
||||
return Gen::M(location + element_size * index);
|
||||
}
|
||||
|
||||
void ConstantPool::Init()
|
||||
{
|
||||
// If execution happens to run to the start of the constant pool, halt.
|
||||
m_parent->INT3();
|
||||
m_parent->AlignCode16();
|
||||
|
||||
// Reserve a block of memory CONST_POOL_SIZE in size.
|
||||
m_current_ptr = m_parent->GetWritableCodePtr();
|
||||
m_parent->ReserveCodeSpace(CONST_POOL_SIZE);
|
||||
|
||||
m_remaining_size = CONST_POOL_SIZE;
|
||||
m_const_info.clear();
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
// Copyright 2017 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <map>
|
||||
|
||||
namespace Gen
|
||||
{
|
||||
struct OpArg;
|
||||
class X64CodeBlock;
|
||||
}
|
||||
|
||||
// Constants are copied into this pool so that they live at a memory location
|
||||
// that is close to the code that references it. This ensures that the 32-bit
|
||||
// limitation on RIP addressing is not an issue.
|
||||
class ConstantPool
|
||||
{
|
||||
public:
|
||||
static constexpr size_t CONST_POOL_SIZE = 1024 * 32;
|
||||
static constexpr size_t ALIGNMENT = 16;
|
||||
|
||||
explicit ConstantPool(Gen::X64CodeBlock* parent);
|
||||
~ConstantPool();
|
||||
|
||||
// ConstantPool reserves CONST_POOL_SIZE bytes from parent, and uses
|
||||
// that space to store its constants.
|
||||
void AllocCodeSpace();
|
||||
void ClearCodeSpace();
|
||||
|
||||
// Copies the value into the pool if it doesn't exist. Returns a pointer
|
||||
// to existing values if they were already copied. Pointer equality is
|
||||
// used to determine if two constants are the same.
|
||||
Gen::OpArg GetConstantOpArg(const void* value, size_t element_size, size_t num_elements,
|
||||
size_t index);
|
||||
|
||||
private:
|
||||
void Init();
|
||||
|
||||
struct ConstantInfo
|
||||
{
|
||||
void* m_location;
|
||||
size_t m_size;
|
||||
};
|
||||
|
||||
Gen::X64CodeBlock* m_parent;
|
||||
void* m_current_ptr = nullptr;
|
||||
size_t m_remaining_size = CONST_POOL_SIZE;
|
||||
std::map<const void*, ConstantInfo> m_const_info;
|
||||
};
|
|
@ -40,6 +40,18 @@ OpArg FixImmediate(int access_size, OpArg arg)
|
|||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void EmuCodeBlock::ClearCodeSpace()
|
||||
{
|
||||
X64CodeBlock::ClearCodeSpace();
|
||||
m_const_pool.ClearCodeSpace();
|
||||
}
|
||||
|
||||
void EmuCodeBlock::AllocCodeSpace(size_t size, bool need_low)
|
||||
{
|
||||
X64CodeBlock::AllocCodeSpace(size + ConstantPool::CONST_POOL_SIZE, need_low);
|
||||
m_const_pool.AllocCodeSpace();
|
||||
}
|
||||
|
||||
void EmuCodeBlock::MemoryExceptionCheck()
|
||||
{
|
||||
// TODO: We really should untangle the trampolines, exception handlers and
|
||||
|
@ -836,16 +848,16 @@ void EmuCodeBlock::Force25BitPrecision(X64Reg output, const OpArg& input, X64Reg
|
|||
// mantissa = (mantissa & ~0xFFFFFFF) + ((mantissa & (1ULL << 27)) << 1);
|
||||
if (input.IsSimpleReg() && cpu_info.bAVX)
|
||||
{
|
||||
VPAND(tmp, input.GetSimpleReg(), M(psRoundBit));
|
||||
VPAND(output, input.GetSimpleReg(), M(psMantissaTruncate));
|
||||
VPAND(tmp, input.GetSimpleReg(), MConst(psRoundBit));
|
||||
VPAND(output, input.GetSimpleReg(), MConst(psMantissaTruncate));
|
||||
PADDQ(output, R(tmp));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!input.IsSimpleReg(output))
|
||||
MOVAPD(output, input);
|
||||
avx_op(&XEmitter::VPAND, &XEmitter::PAND, tmp, R(output), M(psRoundBit), true, true);
|
||||
PAND(output, M(psMantissaTruncate));
|
||||
avx_op(&XEmitter::VPAND, &XEmitter::PAND, tmp, R(output), MConst(psRoundBit), true, true);
|
||||
PAND(output, MConst(psMantissaTruncate));
|
||||
PADDQ(output, R(tmp));
|
||||
}
|
||||
}
|
||||
|
@ -890,7 +902,7 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
|
|||
MOVSD(XMM1, R(src));
|
||||
|
||||
// Grab Exponent
|
||||
PAND(XMM1, M(&double_exponent));
|
||||
PAND(XMM1, MConst(double_exponent));
|
||||
PSRLQ(XMM1, 52);
|
||||
MOVD_xmm(R(RSCRATCH), XMM1);
|
||||
|
||||
|
@ -909,15 +921,15 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
|
|||
|
||||
// xmm1 = fraction | 0x0010000000000000
|
||||
MOVSD(XMM1, R(src));
|
||||
PAND(XMM1, M(&double_fraction));
|
||||
POR(XMM1, M(&double_explicit_top_bit));
|
||||
PAND(XMM1, MConst(double_fraction));
|
||||
POR(XMM1, MConst(double_explicit_top_bit));
|
||||
|
||||
// fraction >> shift
|
||||
PSRLQ(XMM1, R(XMM0));
|
||||
|
||||
// OR the sign bit in.
|
||||
MOVSD(XMM0, R(src));
|
||||
PAND(XMM0, M(&double_sign_bit));
|
||||
PAND(XMM0, MConst(double_sign_bit));
|
||||
PSRLQ(XMM0, 32);
|
||||
POR(XMM1, R(XMM0));
|
||||
|
||||
|
@ -930,12 +942,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
|
|||
|
||||
// We want bits 0, 1
|
||||
MOVSD(XMM1, R(src));
|
||||
PAND(XMM1, M(&double_top_two_bits));
|
||||
PAND(XMM1, MConst(double_top_two_bits));
|
||||
PSRLQ(XMM1, 32);
|
||||
|
||||
// And 5 through to 34
|
||||
MOVSD(XMM0, R(src));
|
||||
PAND(XMM0, M(&double_bottom_bits));
|
||||
PAND(XMM0, MConst(double_bottom_bits));
|
||||
PSRLQ(XMM0, 29);
|
||||
|
||||
// OR them togther
|
||||
|
@ -967,8 +979,8 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
|
|||
// Here, check to see if the source is small enough that it will result in a denormal, and pass it
|
||||
// to the x87 unit
|
||||
// if it is.
|
||||
avx_op(&XEmitter::VPAND, &XEmitter::PAND, XMM0, R(src), M(&double_sign_bit), true, true);
|
||||
UCOMISD(XMM0, M(&min_norm_single));
|
||||
avx_op(&XEmitter::VPAND, &XEmitter::PAND, XMM0, R(src), MConst(double_sign_bit), true, true);
|
||||
UCOMISD(XMM0, MConst(min_norm_single));
|
||||
FixupBranch nanConversion = J_CC(CC_P, true);
|
||||
FixupBranch denormalConversion = J_CC(CC_B, true);
|
||||
CVTSD2SS(dst, R(src));
|
||||
|
@ -982,7 +994,7 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
|
|||
FixupBranch continue1 = J_CC(CC_C, true);
|
||||
// Clear the quiet bit of the SNaN, which was 0 (signalling) but got set to 1 (quiet) by
|
||||
// conversion.
|
||||
ANDPS(dst, M(&single_qnan_bit));
|
||||
ANDPS(dst, MConst(single_qnan_bit));
|
||||
FixupBranch continue2 = J(true);
|
||||
|
||||
SetJumpTarget(denormalConversion);
|
||||
|
@ -1025,7 +1037,7 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
|
|||
SetJumpTarget(nanConversion);
|
||||
TEST(32, R(gprsrc), Imm32(0x00400000));
|
||||
FixupBranch continue1 = J_CC(CC_NZ, true);
|
||||
ANDPD(dst, M(&double_qnan_bit));
|
||||
ANDPD(dst, MConst(double_qnan_bit));
|
||||
FixupBranch continue2 = J(true);
|
||||
SwitchToNearCode();
|
||||
|
||||
|
@ -1057,7 +1069,7 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
|
|||
{
|
||||
MOVQ_xmm(R(RSCRATCH), xmm);
|
||||
SHR(64, R(RSCRATCH), Imm8(63)); // Get the sign bit; almost all the branches need it.
|
||||
PTEST(xmm, M(psDoubleExp));
|
||||
PTEST(xmm, MConst(psDoubleExp));
|
||||
FixupBranch maxExponent = J_CC(CC_C);
|
||||
FixupBranch zeroExponent = J_CC(CC_Z);
|
||||
|
||||
|
@ -1067,7 +1079,7 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
|
|||
continue1 = J();
|
||||
|
||||
SetJumpTarget(maxExponent);
|
||||
PTEST(xmm, M(psDoubleFrac));
|
||||
PTEST(xmm, MConst(psDoubleFrac));
|
||||
FixupBranch notNAN = J_CC(CC_Z);
|
||||
|
||||
// Max exponent + mantissa: PPC_FPCLASS_QNAN
|
||||
|
@ -1097,10 +1109,10 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
|
|||
else
|
||||
{
|
||||
MOVQ_xmm(R(RSCRATCH), xmm);
|
||||
TEST(64, R(RSCRATCH), M(psDoubleExp));
|
||||
TEST(64, R(RSCRATCH), MConst(psDoubleExp));
|
||||
FixupBranch zeroExponent = J_CC(CC_Z);
|
||||
AND(64, R(RSCRATCH), M(psDoubleNoSign));
|
||||
CMP(64, R(RSCRATCH), M(psDoubleExp));
|
||||
AND(64, R(RSCRATCH), MConst(psDoubleNoSign));
|
||||
CMP(64, R(RSCRATCH), MConst(psDoubleExp));
|
||||
FixupBranch nan =
|
||||
J_CC(CC_G); // This works because if the sign bit is set, RSCRATCH is negative
|
||||
FixupBranch infinity = J_CC(CC_E);
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "Common/CommonTypes.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
|
||||
#include "Core/PowerPC/Jit64Common/ConstantPool.h"
|
||||
#include "Core/PowerPC/Jit64Common/FarCodeCache.h"
|
||||
#include "Core/PowerPC/Jit64Common/TrampolineInfo.h"
|
||||
|
||||
|
@ -22,12 +23,27 @@ class Mapping;
|
|||
class EmuCodeBlock : public Gen::X64CodeBlock
|
||||
{
|
||||
public:
|
||||
void ClearCodeSpace() override;
|
||||
void AllocCodeSpace(size_t size, bool need_low = true) override;
|
||||
|
||||
void MemoryExceptionCheck();
|
||||
|
||||
// Simple functions to switch between near and far code emitting
|
||||
void SwitchToFarCode();
|
||||
void SwitchToNearCode();
|
||||
|
||||
template <typename T>
|
||||
Gen::OpArg MConst(const T& value)
|
||||
{
|
||||
return m_const_pool.GetConstantOpArg(&value, sizeof(T), 1, 0);
|
||||
}
|
||||
|
||||
template <typename T, size_t N>
|
||||
Gen::OpArg MConst(const T (&value)[N], size_t index = 0)
|
||||
{
|
||||
return m_const_pool.GetConstantOpArg(&value, sizeof(T), N, index);
|
||||
}
|
||||
|
||||
Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr,
|
||||
BitSet32 registers_in_use);
|
||||
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
|
||||
|
@ -105,6 +121,7 @@ public:
|
|||
void Clear();
|
||||
|
||||
protected:
|
||||
ConstantPool m_const_pool{this};
|
||||
FarCodeCache m_far_code;
|
||||
u8* m_near_code; // Backed up when we switch to far code.
|
||||
|
||||
|
|
|
@ -180,8 +180,6 @@ void CommonAsmRoutines::GenMfcr()
|
|||
X64Reg tmp = RSCRATCH2;
|
||||
X64Reg cr_val = RSCRATCH_EXTRA;
|
||||
XOR(32, R(dst), R(dst));
|
||||
// we only need to zero the high bits of tmp once
|
||||
XOR(32, R(tmp), R(tmp));
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
static const u32 m_flagTable[8] = {0x0, 0x1, 0x8, 0x9, 0x0, 0x1, 0x8, 0x9};
|
||||
|
@ -190,9 +188,13 @@ void CommonAsmRoutines::GenMfcr()
|
|||
|
||||
MOV(64, R(cr_val), PPCSTATE(cr_val[i]));
|
||||
|
||||
// Upper bits of tmp need to be zeroed.
|
||||
// Note: tmp is used later for address calculations and thus
|
||||
// can't be zero-ed once. This also prevents partial
|
||||
// register stalls due to SETcc.
|
||||
XOR(32, R(tmp), R(tmp));
|
||||
// EQ: Bits 31-0 == 0; set flag bit 1
|
||||
TEST(32, R(cr_val), R(cr_val));
|
||||
// FIXME: is there a better way to do this without the partial register merging?
|
||||
SETcc(CC_Z, R(tmp));
|
||||
LEA(32, dst, MComplex(dst, tmp, SCALE_2, 0));
|
||||
|
||||
|
@ -204,7 +206,8 @@ void CommonAsmRoutines::GenMfcr()
|
|||
// SO: Bit 61 set; set flag bit 0
|
||||
// LT: Bit 62 set; set flag bit 3
|
||||
SHR(64, R(cr_val), Imm8(61));
|
||||
OR(32, R(dst), MScaled(cr_val, SCALE_4, PtrOffset(m_flagTable)));
|
||||
LEA(64, tmp, MConst(m_flagTable));
|
||||
OR(32, R(dst), MComplex(tmp, cr_val, SCALE_4, 0));
|
||||
}
|
||||
RET();
|
||||
|
||||
|
@ -297,11 +300,12 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type,
|
|||
if (quantize == -1)
|
||||
{
|
||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||
MULSS(XMM0, MDisp(RSCRATCH2, PtrOffset(m_quantizeTableS)));
|
||||
LEA(64, RSCRATCH, MConst(m_quantizeTableS));
|
||||
MULSS(XMM0, MRegSum(RSCRATCH2, RSCRATCH));
|
||||
}
|
||||
else if (quantize > 0)
|
||||
{
|
||||
MULSS(XMM0, M(&m_quantizeTableS[quantize * 2]));
|
||||
MULSS(XMM0, MConst(m_quantizeTableS, quantize * 2));
|
||||
}
|
||||
|
||||
switch (type)
|
||||
|
@ -309,20 +313,20 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type,
|
|||
case QUANTIZE_U8:
|
||||
XORPS(XMM1, R(XMM1));
|
||||
MAXSS(XMM0, R(XMM1));
|
||||
MINSS(XMM0, M(&m_255));
|
||||
MINSS(XMM0, MConst(m_255));
|
||||
break;
|
||||
case QUANTIZE_S8:
|
||||
MAXSS(XMM0, M(&m_m128));
|
||||
MINSS(XMM0, M(&m_127));
|
||||
MAXSS(XMM0, MConst(m_m128));
|
||||
MINSS(XMM0, MConst(m_127));
|
||||
break;
|
||||
case QUANTIZE_U16:
|
||||
XORPS(XMM1, R(XMM1));
|
||||
MAXSS(XMM0, R(XMM1));
|
||||
MINSS(XMM0, M(m_65535));
|
||||
MINSS(XMM0, MConst(m_65535));
|
||||
break;
|
||||
case QUANTIZE_S16:
|
||||
MAXSS(XMM0, M(&m_m32768));
|
||||
MINSS(XMM0, M(&m_32767));
|
||||
MAXSS(XMM0, MConst(m_m32768));
|
||||
MINSS(XMM0, MConst(m_32767));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -335,12 +339,13 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type,
|
|||
if (quantize == -1)
|
||||
{
|
||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, PtrOffset(m_quantizeTableS)));
|
||||
LEA(64, RSCRATCH, MConst(m_quantizeTableS));
|
||||
MOVQ_xmm(XMM1, MRegSum(RSCRATCH2, RSCRATCH));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
}
|
||||
else if (quantize > 0)
|
||||
{
|
||||
MOVQ_xmm(XMM1, M(&m_quantizeTableS[quantize * 2]));
|
||||
MOVQ_xmm(XMM1, MConst(m_quantizeTableS, quantize * 2));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
}
|
||||
|
||||
|
@ -358,7 +363,7 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type,
|
|||
// is out of int32 range while it's OK for large negatives, it isn't for positives
|
||||
// I don't know whether the overflow actually happens in any games but it potentially can
|
||||
// cause problems, so we need some clamping
|
||||
MINPS(XMM0, M(m_65535));
|
||||
MINPS(XMM0, MConst(m_65535));
|
||||
CVTTPS2DQ(XMM0, R(XMM0));
|
||||
|
||||
switch (type)
|
||||
|
@ -419,7 +424,7 @@ void QuantizedMemoryRoutines::GenQuantizedStoreFloat(bool single, bool isInline)
|
|||
{
|
||||
if (cpu_info.bSSSE3)
|
||||
{
|
||||
PSHUFB(XMM0, M(pbswapShuffle2x4));
|
||||
PSHUFB(XMM0, MConst(pbswapShuffle2x4));
|
||||
MOVQ_xmm(R(RSCRATCH), XMM0);
|
||||
}
|
||||
else
|
||||
|
@ -492,13 +497,14 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
|
|||
if (quantize == -1)
|
||||
{
|
||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||
MULSS(XMM0, MDisp(RSCRATCH2, PtrOffset(m_dequantizeTableS)));
|
||||
LEA(64, RSCRATCH, MConst(m_dequantizeTableS));
|
||||
MULSS(XMM0, MRegSum(RSCRATCH2, RSCRATCH));
|
||||
}
|
||||
else if (quantize > 0)
|
||||
{
|
||||
MULSS(XMM0, M(&m_dequantizeTableS[quantize * 2]));
|
||||
MULSS(XMM0, MConst(m_dequantizeTableS, quantize * 2));
|
||||
}
|
||||
UNPCKLPS(XMM0, M(m_one));
|
||||
UNPCKLPS(XMM0, MConst(m_one));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -564,12 +570,13 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
|
|||
if (quantize == -1)
|
||||
{
|
||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, PtrOffset(m_dequantizeTableS)));
|
||||
LEA(64, RSCRATCH, MConst(m_dequantizeTableS));
|
||||
MOVQ_xmm(XMM1, MRegSum(RSCRATCH2, RSCRATCH));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
}
|
||||
else if (quantize > 0)
|
||||
{
|
||||
MOVQ_xmm(XMM1, M(&m_dequantizeTableS[quantize * 2]));
|
||||
MOVQ_xmm(XMM1, MConst(m_dequantizeTableS, quantize * 2));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
}
|
||||
}
|
||||
|
@ -597,7 +604,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
|
|||
else if (cpu_info.bSSSE3)
|
||||
{
|
||||
MOVD_xmm(XMM0, MRegSum(RMEM, RSCRATCH_EXTRA));
|
||||
PSHUFB(XMM0, M(pbswapShuffle1x4));
|
||||
PSHUFB(XMM0, MConst(pbswapShuffle1x4));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -605,7 +612,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
|
|||
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
|
||||
}
|
||||
|
||||
UNPCKLPS(XMM0, M(m_one));
|
||||
UNPCKLPS(XMM0, MConst(m_one));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -623,7 +630,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
|
|||
else if (cpu_info.bSSSE3)
|
||||
{
|
||||
MOVQ_xmm(XMM0, MRegSum(RMEM, RSCRATCH_EXTRA));
|
||||
PSHUFB(XMM0, M(pbswapShuffle2x4));
|
||||
PSHUFB(XMM0, MConst(pbswapShuffle2x4));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -1753,7 +1753,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
|
||||
X64Reg reg = fregURegWithMov(RI, I);
|
||||
alignas(16) static const u32 ssSignBits[4] = {0x80000000};
|
||||
Jit->PXOR(reg, M(ssSignBits));
|
||||
Jit->PXOR(reg, Jit->MConst(ssSignBits));
|
||||
RI.fregs[reg] = I;
|
||||
fregNormalRegClear(RI, I);
|
||||
break;
|
||||
|
@ -1765,7 +1765,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
|
||||
X64Reg reg = fregURegWithMov(RI, I);
|
||||
alignas(16) static const u64 sdSignBits[2] = {0x8000000000000000ULL};
|
||||
Jit->PXOR(reg, M(sdSignBits));
|
||||
Jit->PXOR(reg, Jit->MConst(sdSignBits));
|
||||
RI.fregs[reg] = I;
|
||||
fregNormalRegClear(RI, I);
|
||||
break;
|
||||
|
@ -1777,7 +1777,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
|
||||
X64Reg reg = fregURegWithMov(RI, I);
|
||||
alignas(16) static const u32 psSignBits[4] = {0x80000000, 0x80000000};
|
||||
Jit->PXOR(reg, M(psSignBits));
|
||||
Jit->PXOR(reg, Jit->MConst(psSignBits));
|
||||
RI.fregs[reg] = I;
|
||||
fregNormalRegClear(RI, I);
|
||||
break;
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
alignas(16) const u8 pbswapShuffle1x4[16] = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
alignas(16) const u8 pbswapShuffle2x4[16] = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
|
||||
alignas(16) const float m_quantizeTableS[] = {
|
||||
alignas(16) const float m_quantizeTableS[128] = {
|
||||
(1ULL << 0), (1ULL << 0), (1ULL << 1), (1ULL << 1),
|
||||
(1ULL << 2), (1ULL << 2), (1ULL << 3), (1ULL << 3),
|
||||
(1ULL << 4), (1ULL << 4), (1ULL << 5), (1ULL << 5),
|
||||
|
@ -42,7 +42,7 @@ alignas(16) const float m_quantizeTableS[] = {
|
|||
1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1),
|
||||
};
|
||||
|
||||
alignas(16) const float m_dequantizeTableS[] = {
|
||||
alignas(16) const float m_dequantizeTableS[128] = {
|
||||
1.0 / (1ULL << 0), 1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1),
|
||||
1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3), 1.0 / (1ULL << 3),
|
||||
1.0 / (1ULL << 4), 1.0 / (1ULL << 4), 1.0 / (1ULL << 5), 1.0 / (1ULL << 5),
|
||||
|
@ -77,4 +77,4 @@ alignas(16) const float m_dequantizeTableS[] = {
|
|||
(1ULL << 2), (1ULL << 2), (1ULL << 1), (1ULL << 1),
|
||||
};
|
||||
|
||||
alignas(16) const float m_one[] = {1.0f, 0.0f, 0.0f, 0.0f};
|
||||
alignas(16) const float m_one[4] = {1.0f, 0.0f, 0.0f, 0.0f};
|
||||
|
|
|
@ -8,9 +8,9 @@
|
|||
|
||||
alignas(16) extern const u8 pbswapShuffle1x4[16];
|
||||
alignas(16) extern const u8 pbswapShuffle2x4[16];
|
||||
alignas(16) extern const float m_one[];
|
||||
alignas(16) extern const float m_quantizeTableS[];
|
||||
alignas(16) extern const float m_dequantizeTableS[];
|
||||
alignas(16) extern const float m_one[4];
|
||||
alignas(16) extern const float m_quantizeTableS[128];
|
||||
alignas(16) extern const float m_dequantizeTableS[128];
|
||||
|
||||
class CommonAsmRoutinesBase
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue