Merge pull request #5110 from MerryMage/const-pool

Jit64: Implement a constant pool
This commit is contained in:
Anthony 2017-03-20 13:29:57 -07:00 committed by GitHub
commit d2690568f9
14 changed files with 232 additions and 68 deletions

View File

@ -42,7 +42,7 @@ public:
} }
// Call this before you generate any code. // Call this before you generate any code.
void AllocCodeSpace(size_t size, bool need_low = true) virtual void AllocCodeSpace(size_t size, bool need_low = true)
{ {
region_size = size; region_size = size;
region = static_cast<u8*>(Common::AllocateExecutableMemory(region_size, need_low)); region = static_cast<u8*>(Common::AllocateExecutableMemory(region_size, need_low));
@ -51,7 +51,7 @@ public:
// Always clear code space with breakpoints, so that if someone accidentally executes // Always clear code space with breakpoints, so that if someone accidentally executes
// uninitialized, it just breaks into the debugger. // uninitialized, it just breaks into the debugger.
void ClearCodeSpace() virtual void ClearCodeSpace()
{ {
PoisonMemory(); PoisonMemory();
ResetCodePtr(); ResetCodePtr();

View File

@ -245,6 +245,7 @@ if(_M_X86)
PowerPC/Jit64/JitRegCache.cpp PowerPC/Jit64/JitRegCache.cpp
PowerPC/Jit64/Jit_SystemRegisters.cpp PowerPC/Jit64/Jit_SystemRegisters.cpp
PowerPC/Jit64Common/BlockCache.cpp PowerPC/Jit64Common/BlockCache.cpp
PowerPC/Jit64Common/ConstantPool.cpp
PowerPC/Jit64Common/EmuCodeBlock.cpp PowerPC/Jit64Common/EmuCodeBlock.cpp
PowerPC/Jit64Common/FarCodeCache.cpp PowerPC/Jit64Common/FarCodeCache.cpp
PowerPC/Jit64Common/Jit64AsmCommon.cpp PowerPC/Jit64Common/Jit64AsmCommon.cpp

View File

@ -244,6 +244,7 @@
<ClCompile Include="PowerPC\Interpreter\Interpreter_Paired.cpp" /> <ClCompile Include="PowerPC\Interpreter\Interpreter_Paired.cpp" />
<ClCompile Include="PowerPC\Interpreter\Interpreter_SystemRegisters.cpp" /> <ClCompile Include="PowerPC\Interpreter\Interpreter_SystemRegisters.cpp" />
<ClCompile Include="PowerPC\Interpreter\Interpreter_Tables.cpp" /> <ClCompile Include="PowerPC\Interpreter\Interpreter_Tables.cpp" />
<ClCompile Include="PowerPC\Jit64Common\ConstantPool.cpp" />
<ClCompile Include="PowerPC\JitILCommon\IR.cpp" /> <ClCompile Include="PowerPC\JitILCommon\IR.cpp" />
<ClCompile Include="PowerPC\JitILCommon\JitILBase_Branch.cpp" /> <ClCompile Include="PowerPC\JitILCommon\JitILBase_Branch.cpp" />
<ClCompile Include="PowerPC\JitILCommon\JitILBase_FloatingPoint.cpp" /> <ClCompile Include="PowerPC\JitILCommon\JitILBase_FloatingPoint.cpp" />
@ -486,6 +487,7 @@
<ClInclude Include="PowerPC\CachedInterpreter\InterpreterBlockCache.h" /> <ClInclude Include="PowerPC\CachedInterpreter\InterpreterBlockCache.h" />
<ClInclude Include="PowerPC\Interpreter\Interpreter.h" /> <ClInclude Include="PowerPC\Interpreter\Interpreter.h" />
<ClInclude Include="PowerPC\Interpreter\Interpreter_FPUtils.h" /> <ClInclude Include="PowerPC\Interpreter\Interpreter_FPUtils.h" />
<ClInclude Include="PowerPC\Jit64Common\ConstantPool.h" />
<ClInclude Include="PowerPC\Jit64IL\JitIL.h" /> <ClInclude Include="PowerPC\Jit64IL\JitIL.h" />
<ClInclude Include="PowerPC\Jit64\FPURegCache.h" /> <ClInclude Include="PowerPC\Jit64\FPURegCache.h" />
<ClInclude Include="PowerPC\Jit64\GPRRegCache.h" /> <ClInclude Include="PowerPC\Jit64\GPRRegCache.h" />

View File

@ -870,6 +870,9 @@
<ClCompile Include="IOS\USB\Bluetooth\WiimoteHIDAttr.cpp"> <ClCompile Include="IOS\USB\Bluetooth\WiimoteHIDAttr.cpp">
<Filter>IOS\USB\Bluetooth</Filter> <Filter>IOS\USB\Bluetooth</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="PowerPC\Jit64Common\ConstantPool.cpp">
<Filter>PowerPC\Jit64Common</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="BootManager.h" /> <ClInclude Include="BootManager.h" />
@ -1493,6 +1496,9 @@
<ClInclude Include="IOS\MIOS.h"> <ClInclude Include="IOS\MIOS.h">
<Filter>IOS</Filter> <Filter>IOS</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="PowerPC\Jit64Common\ConstantPool.h">
<Filter>PowerPC\Jit64Common</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<Text Include="CMakeLists.txt" /> <Text Include="CMakeLists.txt" />

View File

@ -108,7 +108,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
UCOMISD(xmm, R(xmm)); UCOMISD(xmm, R(xmm));
fixups.push_back(J_CC(CC_P)); fixups.push_back(J_CC(CC_P));
} }
MOVDDUP(xmm, M(psGeneratedQNaN)); MOVDDUP(xmm, MConst(psGeneratedQNaN));
for (FixupBranch fixup : fixups) for (FixupBranch fixup : fixups)
SetJumpTarget(fixup); SetJumpTarget(fixup);
FixupBranch done = J(true); FixupBranch done = J(true);
@ -127,7 +127,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
SwitchToFarCode(); SwitchToFarCode();
SetJumpTarget(handle_nan); SetJumpTarget(handle_nan);
_assert_msg_(DYNA_REC, clobber == XMM0, "BLENDVPD implicitly uses XMM0"); _assert_msg_(DYNA_REC, clobber == XMM0, "BLENDVPD implicitly uses XMM0");
BLENDVPD(xmm, M(psGeneratedQNaN)); BLENDVPD(xmm, MConst(psGeneratedQNaN));
for (u32 x : inputs) for (u32 x : inputs)
{ {
avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, fpr.R(x), fpr.R(x), CMP_UNORD); avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, fpr.R(x), fpr.R(x), CMP_UNORD);
@ -151,7 +151,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
SetJumpTarget(handle_nan); SetJumpTarget(handle_nan);
MOVAPD(tmp, R(clobber)); MOVAPD(tmp, R(clobber));
ANDNPD(clobber, R(xmm)); ANDNPD(clobber, R(xmm));
ANDPD(tmp, M(psGeneratedQNaN)); ANDPD(tmp, MConst(psGeneratedQNaN));
ORPD(tmp, R(clobber)); ORPD(tmp, R(clobber));
MOVAPD(xmm, R(tmp)); MOVAPD(xmm, R(tmp));
for (u32 x : inputs) for (u32 x : inputs)
@ -350,7 +350,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
ADDSD(XMM1, fpr.R(b)); ADDSD(XMM1, fpr.R(b));
} }
if (inst.SUBOP5 == 31) // nmadd if (inst.SUBOP5 == 31) // nmadd
XORPD(XMM1, M(packed ? psSignBits2 : psSignBits)); XORPD(XMM1, MConst(packed ? psSignBits2 : psSignBits));
} }
fpr.BindToRegister(d, !single); fpr.BindToRegister(d, !single);
if (single) if (single)
@ -385,15 +385,15 @@ void Jit64::fsign(UGeckoInstruction inst)
{ {
case 40: // neg case 40: // neg
avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, fpr.RX(d), src, avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, fpr.RX(d), src,
M(packed ? psSignBits2 : psSignBits), packed); MConst(packed ? psSignBits2 : psSignBits), packed);
break; break;
case 136: // nabs case 136: // nabs
avx_op(&XEmitter::VORPD, &XEmitter::ORPD, fpr.RX(d), src, M(packed ? psSignBits2 : psSignBits), avx_op(&XEmitter::VORPD, &XEmitter::ORPD, fpr.RX(d), src,
packed); MConst(packed ? psSignBits2 : psSignBits), packed);
break; break;
case 264: // abs case 264: // abs
avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, fpr.RX(d), src, M(packed ? psAbsMask2 : psAbsMask), avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, fpr.RX(d), src,
packed); MConst(packed ? psAbsMask2 : psAbsMask), packed);
break; break;
default: default:
PanicAlert("fsign bleh"); PanicAlert("fsign bleh");
@ -608,7 +608,7 @@ void Jit64::fctiwx(UGeckoInstruction inst)
// The upper 32 bits of the result are set to 0xfff80000, // The upper 32 bits of the result are set to 0xfff80000,
// except for -0.0 where they are set to 0xfff80001 (TODO). // except for -0.0 where they are set to 0xfff80001 (TODO).
MOVAPD(XMM0, M(half_qnan_and_s32_max)); MOVAPD(XMM0, MConst(half_qnan_and_s32_max));
MINSD(XMM0, fpr.R(b)); MINSD(XMM0, fpr.R(b));
switch (inst.SUBOP10) switch (inst.SUBOP10)
{ {

View File

@ -623,7 +623,7 @@ void Jit64::mcrfs(UGeckoInstruction inst)
} }
AND(32, R(RSCRATCH), Imm32(mask)); AND(32, R(RSCRATCH), Imm32(mask));
MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
LEA(64, RSCRATCH, M(m_crTable.data())); LEA(64, RSCRATCH, MConst(m_crTable));
MOV(64, R(RSCRATCH), MComplex(RSCRATCH, RSCRATCH2, SCALE_8, 0)); MOV(64, R(RSCRATCH), MComplex(RSCRATCH, RSCRATCH2, SCALE_8, 0));
MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH)); MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH));
} }
@ -657,14 +657,14 @@ void Jit64::mffsx(UGeckoInstruction inst)
} }
// MXCSR = s_fpscr_to_mxcsr[FPSCR & 7] // MXCSR = s_fpscr_to_mxcsr[FPSCR & 7]
static const u32 s_fpscr_to_mxcsr[] = { static const u32 s_fpscr_to_mxcsr[8] = {
0x1F80, 0x7F80, 0x5F80, 0x3F80, 0x9F80, 0xFF80, 0xDF80, 0xBF80, 0x1F80, 0x7F80, 0x5F80, 0x3F80, 0x9F80, 0xFF80, 0xDF80, 0xBF80,
}; };
// Needs value of FPSCR in RSCRATCH. // Needs value of FPSCR in RSCRATCH.
void Jit64::UpdateMXCSR() void Jit64::UpdateMXCSR()
{ {
LEA(64, RSCRATCH2, M(&s_fpscr_to_mxcsr)); LEA(64, RSCRATCH2, MConst(s_fpscr_to_mxcsr));
AND(32, R(RSCRATCH), Imm32(7)); AND(32, R(RSCRATCH), Imm32(7));
LDMXCSR(MComplex(RSCRATCH2, RSCRATCH, SCALE_4, 0)); LDMXCSR(MComplex(RSCRATCH2, RSCRATCH, SCALE_4, 0));
} }
@ -730,7 +730,7 @@ void Jit64::mtfsfix(UGeckoInstruction inst)
// Field 7 contains NI and RN. // Field 7 contains NI and RN.
if (inst.CRFD == 7) if (inst.CRFD == 7)
LDMXCSR(M(&s_fpscr_to_mxcsr[imm & 7])); LDMXCSR(MConst(s_fpscr_to_mxcsr, imm & 7));
} }
void Jit64::mtfsfx(UGeckoInstruction inst) void Jit64::mtfsfx(UGeckoInstruction inst)

View File

@ -0,0 +1,67 @@
// Copyright 2017 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <cstring>
#include <memory>
#include <utility>
#include "Common/Assert.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64Common/ConstantPool.h"
ConstantPool::ConstantPool(Gen::X64CodeBlock* parent) : m_parent(parent)
{
}
ConstantPool::~ConstantPool() = default;
void ConstantPool::AllocCodeSpace()
{
_assert_(!m_current_ptr);
Init();
}
void ConstantPool::ClearCodeSpace()
{
Init();
}
Gen::OpArg ConstantPool::GetConstantOpArg(const void* value, size_t element_size,
size_t num_elements, size_t index)
{
const size_t value_size = element_size * num_elements;
auto iter = m_const_info.find(value);
if (iter == m_const_info.end())
{
void* ptr = std::align(ALIGNMENT, value_size, m_current_ptr, m_remaining_size);
_assert_msg_(DYNA_REC, ptr, "Constant pool has run out of space.");
m_current_ptr = static_cast<u8*>(m_current_ptr) + value_size;
m_remaining_size -= value_size;
std::memcpy(ptr, value, value_size);
iter = m_const_info.emplace(std::make_pair(value, ConstantInfo{ptr, value_size})).first;
}
const ConstantInfo& info = iter->second;
_assert_msg_(DYNA_REC, info.m_size == value_size,
"Constant has incorrect size in constant pool.");
u8* location = static_cast<u8*>(info.m_location);
return Gen::M(location + element_size * index);
}
void ConstantPool::Init()
{
// If execution happens to run to the start of the constant pool, halt.
m_parent->INT3();
m_parent->AlignCode16();
// Reserve a block of memory CONST_POOL_SIZE in size.
m_current_ptr = m_parent->GetWritableCodePtr();
m_parent->ReserveCodeSpace(CONST_POOL_SIZE);
m_remaining_size = CONST_POOL_SIZE;
m_const_info.clear();
}

View File

@ -0,0 +1,52 @@
// Copyright 2017 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <map>
namespace Gen
{
struct OpArg;
class X64CodeBlock;
}
// Constants are copied into this pool so that they live at a memory location
// that is close to the code that references it. This ensures that the 32-bit
// limitation on RIP addressing is not an issue.
class ConstantPool
{
public:
static constexpr size_t CONST_POOL_SIZE = 1024 * 32;
static constexpr size_t ALIGNMENT = 16;
explicit ConstantPool(Gen::X64CodeBlock* parent);
~ConstantPool();
// ConstantPool reserves CONST_POOL_SIZE bytes from parent, and uses
// that space to store its constants.
void AllocCodeSpace();
void ClearCodeSpace();
// Copies the value into the pool if it doesn't exist. Returns a pointer
// to existing values if they were already copied. Pointer equality is
// used to determine if two constants are the same.
Gen::OpArg GetConstantOpArg(const void* value, size_t element_size, size_t num_elements,
size_t index);
private:
void Init();
struct ConstantInfo
{
void* m_location;
size_t m_size;
};
Gen::X64CodeBlock* m_parent;
void* m_current_ptr = nullptr;
size_t m_remaining_size = CONST_POOL_SIZE;
std::map<const void*, ConstantInfo> m_const_info;
};

View File

@ -40,6 +40,18 @@ OpArg FixImmediate(int access_size, OpArg arg)
} }
} // Anonymous namespace } // Anonymous namespace
void EmuCodeBlock::ClearCodeSpace()
{
X64CodeBlock::ClearCodeSpace();
m_const_pool.ClearCodeSpace();
}
void EmuCodeBlock::AllocCodeSpace(size_t size, bool need_low)
{
X64CodeBlock::AllocCodeSpace(size + ConstantPool::CONST_POOL_SIZE, need_low);
m_const_pool.AllocCodeSpace();
}
void EmuCodeBlock::MemoryExceptionCheck() void EmuCodeBlock::MemoryExceptionCheck()
{ {
// TODO: We really should untangle the trampolines, exception handlers and // TODO: We really should untangle the trampolines, exception handlers and
@ -836,16 +848,16 @@ void EmuCodeBlock::Force25BitPrecision(X64Reg output, const OpArg& input, X64Reg
// mantissa = (mantissa & ~0xFFFFFFF) + ((mantissa & (1ULL << 27)) << 1); // mantissa = (mantissa & ~0xFFFFFFF) + ((mantissa & (1ULL << 27)) << 1);
if (input.IsSimpleReg() && cpu_info.bAVX) if (input.IsSimpleReg() && cpu_info.bAVX)
{ {
VPAND(tmp, input.GetSimpleReg(), M(psRoundBit)); VPAND(tmp, input.GetSimpleReg(), MConst(psRoundBit));
VPAND(output, input.GetSimpleReg(), M(psMantissaTruncate)); VPAND(output, input.GetSimpleReg(), MConst(psMantissaTruncate));
PADDQ(output, R(tmp)); PADDQ(output, R(tmp));
} }
else else
{ {
if (!input.IsSimpleReg(output)) if (!input.IsSimpleReg(output))
MOVAPD(output, input); MOVAPD(output, input);
avx_op(&XEmitter::VPAND, &XEmitter::PAND, tmp, R(output), M(psRoundBit), true, true); avx_op(&XEmitter::VPAND, &XEmitter::PAND, tmp, R(output), MConst(psRoundBit), true, true);
PAND(output, M(psMantissaTruncate)); PAND(output, MConst(psMantissaTruncate));
PADDQ(output, R(tmp)); PADDQ(output, R(tmp));
} }
} }
@ -890,7 +902,7 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
MOVSD(XMM1, R(src)); MOVSD(XMM1, R(src));
// Grab Exponent // Grab Exponent
PAND(XMM1, M(&double_exponent)); PAND(XMM1, MConst(double_exponent));
PSRLQ(XMM1, 52); PSRLQ(XMM1, 52);
MOVD_xmm(R(RSCRATCH), XMM1); MOVD_xmm(R(RSCRATCH), XMM1);
@ -909,15 +921,15 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
// xmm1 = fraction | 0x0010000000000000 // xmm1 = fraction | 0x0010000000000000
MOVSD(XMM1, R(src)); MOVSD(XMM1, R(src));
PAND(XMM1, M(&double_fraction)); PAND(XMM1, MConst(double_fraction));
POR(XMM1, M(&double_explicit_top_bit)); POR(XMM1, MConst(double_explicit_top_bit));
// fraction >> shift // fraction >> shift
PSRLQ(XMM1, R(XMM0)); PSRLQ(XMM1, R(XMM0));
// OR the sign bit in. // OR the sign bit in.
MOVSD(XMM0, R(src)); MOVSD(XMM0, R(src));
PAND(XMM0, M(&double_sign_bit)); PAND(XMM0, MConst(double_sign_bit));
PSRLQ(XMM0, 32); PSRLQ(XMM0, 32);
POR(XMM1, R(XMM0)); POR(XMM1, R(XMM0));
@ -930,12 +942,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
// We want bits 0, 1 // We want bits 0, 1
MOVSD(XMM1, R(src)); MOVSD(XMM1, R(src));
PAND(XMM1, M(&double_top_two_bits)); PAND(XMM1, MConst(double_top_two_bits));
PSRLQ(XMM1, 32); PSRLQ(XMM1, 32);
// And 5 through to 34 // And 5 through to 34
MOVSD(XMM0, R(src)); MOVSD(XMM0, R(src));
PAND(XMM0, M(&double_bottom_bits)); PAND(XMM0, MConst(double_bottom_bits));
PSRLQ(XMM0, 29); PSRLQ(XMM0, 29);
// OR them togther // OR them togther
@ -967,8 +979,8 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
// Here, check to see if the source is small enough that it will result in a denormal, and pass it // Here, check to see if the source is small enough that it will result in a denormal, and pass it
// to the x87 unit // to the x87 unit
// if it is. // if it is.
avx_op(&XEmitter::VPAND, &XEmitter::PAND, XMM0, R(src), M(&double_sign_bit), true, true); avx_op(&XEmitter::VPAND, &XEmitter::PAND, XMM0, R(src), MConst(double_sign_bit), true, true);
UCOMISD(XMM0, M(&min_norm_single)); UCOMISD(XMM0, MConst(min_norm_single));
FixupBranch nanConversion = J_CC(CC_P, true); FixupBranch nanConversion = J_CC(CC_P, true);
FixupBranch denormalConversion = J_CC(CC_B, true); FixupBranch denormalConversion = J_CC(CC_B, true);
CVTSD2SS(dst, R(src)); CVTSD2SS(dst, R(src));
@ -982,7 +994,7 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
FixupBranch continue1 = J_CC(CC_C, true); FixupBranch continue1 = J_CC(CC_C, true);
// Clear the quiet bit of the SNaN, which was 0 (signalling) but got set to 1 (quiet) by // Clear the quiet bit of the SNaN, which was 0 (signalling) but got set to 1 (quiet) by
// conversion. // conversion.
ANDPS(dst, M(&single_qnan_bit)); ANDPS(dst, MConst(single_qnan_bit));
FixupBranch continue2 = J(true); FixupBranch continue2 = J(true);
SetJumpTarget(denormalConversion); SetJumpTarget(denormalConversion);
@ -1025,7 +1037,7 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
SetJumpTarget(nanConversion); SetJumpTarget(nanConversion);
TEST(32, R(gprsrc), Imm32(0x00400000)); TEST(32, R(gprsrc), Imm32(0x00400000));
FixupBranch continue1 = J_CC(CC_NZ, true); FixupBranch continue1 = J_CC(CC_NZ, true);
ANDPD(dst, M(&double_qnan_bit)); ANDPD(dst, MConst(double_qnan_bit));
FixupBranch continue2 = J(true); FixupBranch continue2 = J(true);
SwitchToNearCode(); SwitchToNearCode();
@ -1057,7 +1069,7 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
{ {
MOVQ_xmm(R(RSCRATCH), xmm); MOVQ_xmm(R(RSCRATCH), xmm);
SHR(64, R(RSCRATCH), Imm8(63)); // Get the sign bit; almost all the branches need it. SHR(64, R(RSCRATCH), Imm8(63)); // Get the sign bit; almost all the branches need it.
PTEST(xmm, M(psDoubleExp)); PTEST(xmm, MConst(psDoubleExp));
FixupBranch maxExponent = J_CC(CC_C); FixupBranch maxExponent = J_CC(CC_C);
FixupBranch zeroExponent = J_CC(CC_Z); FixupBranch zeroExponent = J_CC(CC_Z);
@ -1067,7 +1079,7 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
continue1 = J(); continue1 = J();
SetJumpTarget(maxExponent); SetJumpTarget(maxExponent);
PTEST(xmm, M(psDoubleFrac)); PTEST(xmm, MConst(psDoubleFrac));
FixupBranch notNAN = J_CC(CC_Z); FixupBranch notNAN = J_CC(CC_Z);
// Max exponent + mantissa: PPC_FPCLASS_QNAN // Max exponent + mantissa: PPC_FPCLASS_QNAN
@ -1097,10 +1109,10 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
else else
{ {
MOVQ_xmm(R(RSCRATCH), xmm); MOVQ_xmm(R(RSCRATCH), xmm);
TEST(64, R(RSCRATCH), M(psDoubleExp)); TEST(64, R(RSCRATCH), MConst(psDoubleExp));
FixupBranch zeroExponent = J_CC(CC_Z); FixupBranch zeroExponent = J_CC(CC_Z);
AND(64, R(RSCRATCH), M(psDoubleNoSign)); AND(64, R(RSCRATCH), MConst(psDoubleNoSign));
CMP(64, R(RSCRATCH), M(psDoubleExp)); CMP(64, R(RSCRATCH), MConst(psDoubleExp));
FixupBranch nan = FixupBranch nan =
J_CC(CC_G); // This works because if the sign bit is set, RSCRATCH is negative J_CC(CC_G); // This works because if the sign bit is set, RSCRATCH is negative
FixupBranch infinity = J_CC(CC_E); FixupBranch infinity = J_CC(CC_E);

View File

@ -10,6 +10,7 @@
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/x64Emitter.h" #include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64Common/ConstantPool.h"
#include "Core/PowerPC/Jit64Common/FarCodeCache.h" #include "Core/PowerPC/Jit64Common/FarCodeCache.h"
#include "Core/PowerPC/Jit64Common/TrampolineInfo.h" #include "Core/PowerPC/Jit64Common/TrampolineInfo.h"
@ -22,12 +23,27 @@ class Mapping;
class EmuCodeBlock : public Gen::X64CodeBlock class EmuCodeBlock : public Gen::X64CodeBlock
{ {
public: public:
void ClearCodeSpace() override;
void AllocCodeSpace(size_t size, bool need_low = true) override;
void MemoryExceptionCheck(); void MemoryExceptionCheck();
// Simple functions to switch between near and far code emitting // Simple functions to switch between near and far code emitting
void SwitchToFarCode(); void SwitchToFarCode();
void SwitchToNearCode(); void SwitchToNearCode();
template <typename T>
Gen::OpArg MConst(const T& value)
{
return m_const_pool.GetConstantOpArg(&value, sizeof(T), 1, 0);
}
template <typename T, size_t N>
Gen::OpArg MConst(const T (&value)[N], size_t index = 0)
{
return m_const_pool.GetConstantOpArg(&value, sizeof(T), N, index);
}
Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr, Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr,
BitSet32 registers_in_use); BitSet32 registers_in_use);
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
@ -105,6 +121,7 @@ public:
void Clear(); void Clear();
protected: protected:
ConstantPool m_const_pool{this};
FarCodeCache m_far_code; FarCodeCache m_far_code;
u8* m_near_code; // Backed up when we switch to far code. u8* m_near_code; // Backed up when we switch to far code.

View File

@ -180,8 +180,6 @@ void CommonAsmRoutines::GenMfcr()
X64Reg tmp = RSCRATCH2; X64Reg tmp = RSCRATCH2;
X64Reg cr_val = RSCRATCH_EXTRA; X64Reg cr_val = RSCRATCH_EXTRA;
XOR(32, R(dst), R(dst)); XOR(32, R(dst), R(dst));
// we only need to zero the high bits of tmp once
XOR(32, R(tmp), R(tmp));
for (int i = 0; i < 8; i++) for (int i = 0; i < 8; i++)
{ {
static const u32 m_flagTable[8] = {0x0, 0x1, 0x8, 0x9, 0x0, 0x1, 0x8, 0x9}; static const u32 m_flagTable[8] = {0x0, 0x1, 0x8, 0x9, 0x0, 0x1, 0x8, 0x9};
@ -190,9 +188,13 @@ void CommonAsmRoutines::GenMfcr()
MOV(64, R(cr_val), PPCSTATE(cr_val[i])); MOV(64, R(cr_val), PPCSTATE(cr_val[i]));
// Upper bits of tmp need to be zeroed.
// Note: tmp is used later for address calculations and thus
// can't be zero-ed once. This also prevents partial
// register stalls due to SETcc.
XOR(32, R(tmp), R(tmp));
// EQ: Bits 31-0 == 0; set flag bit 1 // EQ: Bits 31-0 == 0; set flag bit 1
TEST(32, R(cr_val), R(cr_val)); TEST(32, R(cr_val), R(cr_val));
// FIXME: is there a better way to do this without the partial register merging?
SETcc(CC_Z, R(tmp)); SETcc(CC_Z, R(tmp));
LEA(32, dst, MComplex(dst, tmp, SCALE_2, 0)); LEA(32, dst, MComplex(dst, tmp, SCALE_2, 0));
@ -204,7 +206,8 @@ void CommonAsmRoutines::GenMfcr()
// SO: Bit 61 set; set flag bit 0 // SO: Bit 61 set; set flag bit 0
// LT: Bit 62 set; set flag bit 3 // LT: Bit 62 set; set flag bit 3
SHR(64, R(cr_val), Imm8(61)); SHR(64, R(cr_val), Imm8(61));
OR(32, R(dst), MScaled(cr_val, SCALE_4, PtrOffset(m_flagTable))); LEA(64, tmp, MConst(m_flagTable));
OR(32, R(dst), MComplex(tmp, cr_val, SCALE_4, 0));
} }
RET(); RET();
@ -297,11 +300,12 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type,
if (quantize == -1) if (quantize == -1)
{ {
SHR(32, R(RSCRATCH2), Imm8(5)); SHR(32, R(RSCRATCH2), Imm8(5));
MULSS(XMM0, MDisp(RSCRATCH2, PtrOffset(m_quantizeTableS))); LEA(64, RSCRATCH, MConst(m_quantizeTableS));
MULSS(XMM0, MRegSum(RSCRATCH2, RSCRATCH));
} }
else if (quantize > 0) else if (quantize > 0)
{ {
MULSS(XMM0, M(&m_quantizeTableS[quantize * 2])); MULSS(XMM0, MConst(m_quantizeTableS, quantize * 2));
} }
switch (type) switch (type)
@ -309,20 +313,20 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type,
case QUANTIZE_U8: case QUANTIZE_U8:
XORPS(XMM1, R(XMM1)); XORPS(XMM1, R(XMM1));
MAXSS(XMM0, R(XMM1)); MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M(&m_255)); MINSS(XMM0, MConst(m_255));
break; break;
case QUANTIZE_S8: case QUANTIZE_S8:
MAXSS(XMM0, M(&m_m128)); MAXSS(XMM0, MConst(m_m128));
MINSS(XMM0, M(&m_127)); MINSS(XMM0, MConst(m_127));
break; break;
case QUANTIZE_U16: case QUANTIZE_U16:
XORPS(XMM1, R(XMM1)); XORPS(XMM1, R(XMM1));
MAXSS(XMM0, R(XMM1)); MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M(m_65535)); MINSS(XMM0, MConst(m_65535));
break; break;
case QUANTIZE_S16: case QUANTIZE_S16:
MAXSS(XMM0, M(&m_m32768)); MAXSS(XMM0, MConst(m_m32768));
MINSS(XMM0, M(&m_32767)); MINSS(XMM0, MConst(m_32767));
break; break;
default: default:
break; break;
@ -335,12 +339,13 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type,
if (quantize == -1) if (quantize == -1)
{ {
SHR(32, R(RSCRATCH2), Imm8(5)); SHR(32, R(RSCRATCH2), Imm8(5));
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, PtrOffset(m_quantizeTableS))); LEA(64, RSCRATCH, MConst(m_quantizeTableS));
MOVQ_xmm(XMM1, MRegSum(RSCRATCH2, RSCRATCH));
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
} }
else if (quantize > 0) else if (quantize > 0)
{ {
MOVQ_xmm(XMM1, M(&m_quantizeTableS[quantize * 2])); MOVQ_xmm(XMM1, MConst(m_quantizeTableS, quantize * 2));
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
} }
@ -358,7 +363,7 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type,
// is out of int32 range while it's OK for large negatives, it isn't for positives // is out of int32 range while it's OK for large negatives, it isn't for positives
// I don't know whether the overflow actually happens in any games but it potentially can // I don't know whether the overflow actually happens in any games but it potentially can
// cause problems, so we need some clamping // cause problems, so we need some clamping
MINPS(XMM0, M(m_65535)); MINPS(XMM0, MConst(m_65535));
CVTTPS2DQ(XMM0, R(XMM0)); CVTTPS2DQ(XMM0, R(XMM0));
switch (type) switch (type)
@ -419,7 +424,7 @@ void QuantizedMemoryRoutines::GenQuantizedStoreFloat(bool single, bool isInline)
{ {
if (cpu_info.bSSSE3) if (cpu_info.bSSSE3)
{ {
PSHUFB(XMM0, M(pbswapShuffle2x4)); PSHUFB(XMM0, MConst(pbswapShuffle2x4));
MOVQ_xmm(R(RSCRATCH), XMM0); MOVQ_xmm(R(RSCRATCH), XMM0);
} }
else else
@ -492,13 +497,14 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
if (quantize == -1) if (quantize == -1)
{ {
SHR(32, R(RSCRATCH2), Imm8(5)); SHR(32, R(RSCRATCH2), Imm8(5));
MULSS(XMM0, MDisp(RSCRATCH2, PtrOffset(m_dequantizeTableS))); LEA(64, RSCRATCH, MConst(m_dequantizeTableS));
MULSS(XMM0, MRegSum(RSCRATCH2, RSCRATCH));
} }
else if (quantize > 0) else if (quantize > 0)
{ {
MULSS(XMM0, M(&m_dequantizeTableS[quantize * 2])); MULSS(XMM0, MConst(m_dequantizeTableS, quantize * 2));
} }
UNPCKLPS(XMM0, M(m_one)); UNPCKLPS(XMM0, MConst(m_one));
} }
else else
{ {
@ -564,12 +570,13 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
if (quantize == -1) if (quantize == -1)
{ {
SHR(32, R(RSCRATCH2), Imm8(5)); SHR(32, R(RSCRATCH2), Imm8(5));
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, PtrOffset(m_dequantizeTableS))); LEA(64, RSCRATCH, MConst(m_dequantizeTableS));
MOVQ_xmm(XMM1, MRegSum(RSCRATCH2, RSCRATCH));
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
} }
else if (quantize > 0) else if (quantize > 0)
{ {
MOVQ_xmm(XMM1, M(&m_dequantizeTableS[quantize * 2])); MOVQ_xmm(XMM1, MConst(m_dequantizeTableS, quantize * 2));
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
} }
} }
@ -597,7 +604,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
else if (cpu_info.bSSSE3) else if (cpu_info.bSSSE3)
{ {
MOVD_xmm(XMM0, MRegSum(RMEM, RSCRATCH_EXTRA)); MOVD_xmm(XMM0, MRegSum(RMEM, RSCRATCH_EXTRA));
PSHUFB(XMM0, M(pbswapShuffle1x4)); PSHUFB(XMM0, MConst(pbswapShuffle1x4));
} }
else else
{ {
@ -605,7 +612,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
} }
UNPCKLPS(XMM0, M(m_one)); UNPCKLPS(XMM0, MConst(m_one));
} }
else else
{ {
@ -623,7 +630,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
else if (cpu_info.bSSSE3) else if (cpu_info.bSSSE3)
{ {
MOVQ_xmm(XMM0, MRegSum(RMEM, RSCRATCH_EXTRA)); MOVQ_xmm(XMM0, MRegSum(RMEM, RSCRATCH_EXTRA));
PSHUFB(XMM0, M(pbswapShuffle2x4)); PSHUFB(XMM0, MConst(pbswapShuffle2x4));
} }
else else
{ {

View File

@ -1753,7 +1753,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = fregURegWithMov(RI, I); X64Reg reg = fregURegWithMov(RI, I);
alignas(16) static const u32 ssSignBits[4] = {0x80000000}; alignas(16) static const u32 ssSignBits[4] = {0x80000000};
Jit->PXOR(reg, M(ssSignBits)); Jit->PXOR(reg, Jit->MConst(ssSignBits));
RI.fregs[reg] = I; RI.fregs[reg] = I;
fregNormalRegClear(RI, I); fregNormalRegClear(RI, I);
break; break;
@ -1765,7 +1765,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = fregURegWithMov(RI, I); X64Reg reg = fregURegWithMov(RI, I);
alignas(16) static const u64 sdSignBits[2] = {0x8000000000000000ULL}; alignas(16) static const u64 sdSignBits[2] = {0x8000000000000000ULL};
Jit->PXOR(reg, M(sdSignBits)); Jit->PXOR(reg, Jit->MConst(sdSignBits));
RI.fregs[reg] = I; RI.fregs[reg] = I;
fregNormalRegClear(RI, I); fregNormalRegClear(RI, I);
break; break;
@ -1777,7 +1777,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = fregURegWithMov(RI, I); X64Reg reg = fregURegWithMov(RI, I);
alignas(16) static const u32 psSignBits[4] = {0x80000000, 0x80000000}; alignas(16) static const u32 psSignBits[4] = {0x80000000, 0x80000000};
Jit->PXOR(reg, M(psSignBits)); Jit->PXOR(reg, Jit->MConst(psSignBits));
RI.fregs[reg] = I; RI.fregs[reg] = I;
fregNormalRegClear(RI, I); fregNormalRegClear(RI, I);
break; break;

View File

@ -7,7 +7,7 @@
alignas(16) const u8 pbswapShuffle1x4[16] = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; alignas(16) const u8 pbswapShuffle1x4[16] = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
alignas(16) const u8 pbswapShuffle2x4[16] = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15}; alignas(16) const u8 pbswapShuffle2x4[16] = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
alignas(16) const float m_quantizeTableS[] = { alignas(16) const float m_quantizeTableS[128] = {
(1ULL << 0), (1ULL << 0), (1ULL << 1), (1ULL << 1), (1ULL << 0), (1ULL << 0), (1ULL << 1), (1ULL << 1),
(1ULL << 2), (1ULL << 2), (1ULL << 3), (1ULL << 3), (1ULL << 2), (1ULL << 2), (1ULL << 3), (1ULL << 3),
(1ULL << 4), (1ULL << 4), (1ULL << 5), (1ULL << 5), (1ULL << 4), (1ULL << 4), (1ULL << 5), (1ULL << 5),
@ -42,7 +42,7 @@ alignas(16) const float m_quantizeTableS[] = {
1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1), 1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1),
}; };
alignas(16) const float m_dequantizeTableS[] = { alignas(16) const float m_dequantizeTableS[128] = {
1.0 / (1ULL << 0), 1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1), 1.0 / (1ULL << 0), 1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1),
1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3), 1.0 / (1ULL << 3), 1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3), 1.0 / (1ULL << 3),
1.0 / (1ULL << 4), 1.0 / (1ULL << 4), 1.0 / (1ULL << 5), 1.0 / (1ULL << 5), 1.0 / (1ULL << 4), 1.0 / (1ULL << 4), 1.0 / (1ULL << 5), 1.0 / (1ULL << 5),
@ -77,4 +77,4 @@ alignas(16) const float m_dequantizeTableS[] = {
(1ULL << 2), (1ULL << 2), (1ULL << 1), (1ULL << 1), (1ULL << 2), (1ULL << 2), (1ULL << 1), (1ULL << 1),
}; };
alignas(16) const float m_one[] = {1.0f, 0.0f, 0.0f, 0.0f}; alignas(16) const float m_one[4] = {1.0f, 0.0f, 0.0f, 0.0f};

View File

@ -8,9 +8,9 @@
alignas(16) extern const u8 pbswapShuffle1x4[16]; alignas(16) extern const u8 pbswapShuffle1x4[16];
alignas(16) extern const u8 pbswapShuffle2x4[16]; alignas(16) extern const u8 pbswapShuffle2x4[16];
alignas(16) extern const float m_one[]; alignas(16) extern const float m_one[4];
alignas(16) extern const float m_quantizeTableS[]; alignas(16) extern const float m_quantizeTableS[128];
alignas(16) extern const float m_dequantizeTableS[]; alignas(16) extern const float m_dequantizeTableS[128];
class CommonAsmRoutinesBase class CommonAsmRoutinesBase
{ {