Add code for frsqrtex to JIT. Disable the table-based implementation in the interpreter until we find something that it actually fixes, so far it seems like it breaks stuff.

Assorted cleanup around the JIT of flags that we don't need anymore.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4867 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2010-01-17 11:47:35 +00:00
parent 936664314f
commit f50e3cf5fe
13 changed files with 83 additions and 31 deletions

View File

@ -405,6 +405,8 @@ void fresx(UGeckoInstruction _inst)
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
} }
// #define USE_ACCURATE_FRSQRTEX
void frsqrtex(UGeckoInstruction _inst) void frsqrtex(UGeckoInstruction _inst)
{ {
double b = rPS0(_inst.FB); double b = rPS0(_inst.FB);
@ -415,6 +417,7 @@ void frsqrtex(UGeckoInstruction _inst)
} }
else else
{ {
#ifdef USE_ACCURATE_FRSQRTEX
if (b == 0.0) { if (b == 0.0) {
SetFPException(FPSCR_ZX); SetFPException(FPSCR_ZX);
riPS0(_inst.FD) = 0x7ff0000000000000; riPS0(_inst.FD) = 0x7ff0000000000000;
@ -436,6 +439,11 @@ void frsqrtex(UGeckoInstruction _inst)
outa |= frsqrtex_lut[idx] >> 12; outa |= frsqrtex_lut[idx] >> 12;
riPS0(_inst.FD) = ((u64)outa << 32) + (u64)outb; riPS0(_inst.FD) = ((u64)outa << 32) + (u64)outb;
} }
#else
if (b == 0.0)
SetFPException(FPSCR_ZX);
rPS0(_inst.FD) = ForceDouble(1.0 / sqrt(b));
#endif
} }
UpdateFPRF(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));

View File

@ -443,7 +443,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
js.blockStart = em_address; js.blockStart = em_address;
js.fifoBytesThisBlock = 0; js.fifoBytesThisBlock = 0;
js.curBlock = b; js.curBlock = b;
js.blockSetsQuantizers = false;
js.block_flags = 0; js.block_flags = 0;
js.cancel = false; js.cancel = false;

View File

@ -110,7 +110,6 @@ private:
int block_flags; int block_flags;
bool isLastInstruction; bool isLastInstruction;
bool blockSetsQuantizers;
int fifoBytesThisBlock; int fifoBytesThisBlock;
@ -247,6 +246,7 @@ public:
void ps_muls(UGeckoInstruction inst); void ps_muls(UGeckoInstruction inst);
void fp_arith_s(UGeckoInstruction inst); void fp_arith_s(UGeckoInstruction inst);
void frsqrtex(UGeckoInstruction inst);
void fcmpx(UGeckoInstruction inst); void fcmpx(UGeckoInstruction inst);
void fmrx(UGeckoInstruction inst); void fmrx(UGeckoInstruction inst);

View File

@ -71,6 +71,9 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEm
fpr.UnlockAll(); fpr.UnlockAll();
} }
static const double one_const = 1.0f;
void Jit64::fp_arith_s(UGeckoInstruction inst) void Jit64::fp_arith_s(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
@ -79,6 +82,20 @@ void Jit64::fp_arith_s(UGeckoInstruction inst)
Default(inst); return; Default(inst); return;
} }
if (inst.SUBOP5 == 26) {
// frsqrtex
int d = inst.FD;
int b = inst.FB;
fpr.Lock(b, d);
fpr.LoadToX64(d, true, true);
MOVSD(XMM0, M((void *)&one_const));
SQRTSD(XMM1, fpr.R(b));
DIVSD(XMM0, R(XMM1));
MOVSD(fpr.R(d), XMM0);
fpr.UnlockAll();
return;
}
if (inst.SUBOP5 != 18 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21 && if (inst.SUBOP5 != 18 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21 &&
inst.SUBOP5 != 25) { inst.SUBOP5 != 25) {
Default(inst); return; Default(inst); return;
@ -253,3 +270,5 @@ void Jit64::fcmpx(UGeckoInstruction inst)
SetJumpTarget(continue3); SetJumpTarget(continue3);
fpr.UnlockAll(); fpr.UnlockAll();
} }

View File

@ -275,9 +275,7 @@ void Jit64::stfs(UGeckoInstruction inst)
MOV(32, gpr.R(a), R(ABI_PARAM2)); MOV(32, gpr.R(a), R(ABI_PARAM2));
} }
CVTSD2SS(XMM0, fpr.R(s)); CVTSD2SS(XMM0, fpr.R(s));
MOVSS(M(&temp32), XMM0); SafeWriteFloatToReg(XMM0, ABI_PARAM2);
MOV(32, R(ABI_PARAM1), M(&temp32));
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0);
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
fpr.UnlockAll(); fpr.UnlockAll();

View File

@ -51,9 +51,8 @@ void Jit64::psq_st(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStorePaired) JITDISABLE(LoadStorePaired)
js.block_flags |= BLOCK_USE_GQR0 << inst.I;
if (js.blockSetsQuantizers || !inst.RA) if (!inst.RA)
{ {
// TODO: Support these cases if it becomes necessary. // TODO: Support these cases if it becomes necessary.
Default(inst); Default(inst);
@ -105,12 +104,13 @@ void Jit64::psq_st(UGeckoInstruction inst)
MOV(32, gpr.R(a), R(ECX)); MOV(32, gpr.R(a), R(ECX));
MOVZX(32, 16, EAX, M(&PowerPC::ppcState.spr[SPR_GQR0 + inst.I])); MOVZX(32, 16, EAX, M(&PowerPC::ppcState.spr[SPR_GQR0 + inst.I]));
MOVZX(32, 8, EDX, R(AL)); MOVZX(32, 8, EDX, R(AL));
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]! // FIXME: Fix ModR/M encoding to allow [EDX*4+disp32] without a base register!
#ifdef _M_IX86 #ifdef _M_IX86
SHL(32, R(EDX), Imm8(2)); int addr_shift = 2;
#else #else
SHL(32, R(EDX), Imm8(3)); int addr_shift = 3;
#endif #endif
SHL(32, R(EDX), Imm8(addr_shift));
if (inst.W) { if (inst.W) {
// One value // One value
XORPS(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions. XORPS(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions.
@ -130,14 +130,20 @@ void Jit64::psq_l(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStorePaired) JITDISABLE(LoadStorePaired)
js.block_flags |= BLOCK_USE_GQR0 << inst.I; if (!inst.RA)
if (js.blockSetsQuantizers || !inst.RA || inst.W)
{ {
Default(inst); Default(inst);
return; return;
} }
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
if (inst.W) {
// PanicAlert("Single ps load: %i %i", gqr.ST_TYPE, gqr.ST_SCALE);
Default(inst);
return;
}
bool update = inst.OPCD == 57; bool update = inst.OPCD == 57;
int offset = inst.SIMM_12; int offset = inst.SIMM_12;

View File

@ -52,7 +52,6 @@ void Jit64::mtspr(UGeckoInstruction inst)
case SPR_GQR0 + 5: case SPR_GQR0 + 5:
case SPR_GQR0 + 6: case SPR_GQR0 + 6:
case SPR_GQR0 + 7: case SPR_GQR0 + 7:
js.blockSetsQuantizers = true;
// Prevent recompiler from compiling in old quantizer values. // Prevent recompiler from compiling in old quantizer values.
// If the value changed, destroy all blocks using this quantizer // If the value changed, destroy all blocks using this quantizer
// This will create a little bit of block churn, but hopefully not too bad. // This will create a little bit of block churn, but hopefully not too bad.

View File

@ -397,8 +397,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitB
js.blockStart = em_address; js.blockStart = em_address;
js.fifoBytesThisBlock = 0; js.fifoBytesThisBlock = 0;
js.curBlock = b; js.curBlock = b;
js.blockSetsQuantizers = false;
js.block_flags = 0;
js.cancel = false; js.cancel = false;
//Analyze the block, collect all instructions it is made of (including inlining, //Analyze the block, collect all instructions it is made of (including inlining,
@ -464,7 +462,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitB
// Perform actual code generation // Perform actual code generation
WriteCode(); WriteCode();
b->flags = js.block_flags;
b->codeSize = (u32)(GetCodePtr() - normalEntry); b->codeSize = (u32)(GetCodePtr() - normalEntry);
b->originalSize = size; b->originalSize = size;
return normalEntry; return normalEntry;

View File

@ -98,10 +98,8 @@ private:
UGeckoInstruction next_inst; // for easy peephole opt. UGeckoInstruction next_inst; // for easy peephole opt.
int instructionNumber; int instructionNumber;
int downcountAmount; int downcountAmount;
int block_flags;
bool isLastInstruction; bool isLastInstruction;
bool blockSetsQuantizers;
bool forceUnsafeLoad; bool forceUnsafeLoad;
int fifoBytesThisBlock; int fifoBytesThisBlock;

View File

@ -299,6 +299,9 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
// Easy! // Easy!
const u8* storeSingleFloat = AlignCode4(); const u8* storeSingleFloat = AlignCode4();
SafeWriteFloatToReg(XMM0, ECX);
RET();
/*
if (cpu_info.bSSSE3) { if (cpu_info.bSSSE3) {
PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
// TODO: SafeWriteFloat // TODO: SafeWriteFloat
@ -309,8 +312,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
MOVSS(M(&psTemp[0]), XMM0); MOVSS(M(&psTemp[0]), XMM0);
MOV(32, R(EAX), M(&psTemp[0])); MOV(32, R(EAX), M(&psTemp[0]));
SafeWriteRegToReg(EAX, ECX, 32, 0, true); SafeWriteRegToReg(EAX, ECX, 32, 0, true);
} }*/
RET();
const u8* storeSingleU8 = AlignCode4(); // Used by MKWii const u8* storeSingleU8 = AlignCode4(); // Used by MKWii
SHR(32, R(EAX), Imm8(6)); SHR(32, R(EAX), Imm8(6));
@ -336,8 +338,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
const u8* storeSingleU16 = AlignCode4(); // Used by MKWii const u8* storeSingleU16 = AlignCode4(); // Used by MKWii
SHR(32, R(EAX), Imm8(6)); SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1)); MULSS(XMM0, R(XMM1));
MULPS(XMM0, R(XMM1));
PXOR(XMM1, R(XMM1)); PXOR(XMM1, R(XMM1));
MAXSS(XMM0, R(XMM1)); MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_65535)); MINSS(XMM0, M((void *)&m_65535));

View File

@ -42,13 +42,6 @@
#define JIT_ICACHE_INVALID_BYTE 0x14 #define JIT_ICACHE_INVALID_BYTE 0x14
#define JIT_ICACHE_INVALID_WORD 0x14141414 #define JIT_ICACHE_INVALID_WORD 0x14141414
enum BlockFlag
{
BLOCK_USE_GQR0 = 0x1, BLOCK_USE_GQR1 = 0x2, BLOCK_USE_GQR2 = 0x4, BLOCK_USE_GQR3 = 0x8,
BLOCK_USE_GQR4 = 0x10, BLOCK_USE_GQR5 = 0x20, BLOCK_USE_GQR6 = 0x40, BLOCK_USE_GQR7 = 0x80,
};
// TODO(ector) - optimize this struct for size // TODO(ector) - optimize this struct for size
struct JitBlock struct JitBlock
{ {

View File

@ -18,6 +18,7 @@
#include "Common.h" #include "Common.h"
#include "Thunk.h" #include "Thunk.h"
#include "CPUDetect.h"
#include "../PowerPC.h" #include "../PowerPC.h"
#include "../../Core.h" #include "../../Core.h"
#include "../../HW/GPFifo.h" #include "../../HW/GPFifo.h"
@ -139,6 +140,36 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
SetJumpTarget(arg2); SetJumpTarget(arg2);
} }
static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
static u32 GC_ALIGNED16(float_buffer);
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr)
{
TEST(32, R(reg_addr), Imm32(0x0C000000));
if (false && cpu_info.bSSSE3) {
// This path should be faster but for some reason it causes errors so I've disabled it.
FixupBranch argh = J_CC(CC_Z);
MOVSS(M(&float_buffer), xmm_value);
MOV(32, R(EAX), M(&float_buffer));
BSWAP(32, EAX);
ABI_CallFunctionRR(thunks.ProtectFunction(((void *)&Memory::Write_U32), 2), EAX, reg_addr);
FixupBranch arg2 = J();
SetJumpTarget(argh);
PSHUFB(xmm_value, M((void *)pbswapShuffle1x4));
#ifdef _M_IX86
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOVD_xmm(MDisp(reg_addr, (u32)Memory::base), xmm_value);
#else
MOVD_xmm(MComplex(RBX, reg_addr, SCALE_1, 0), xmm_value);
#endif
SetJumpTarget(arg2);
} else {
MOVSS(M(&float_buffer), xmm_value);
MOV(32, R(EAX), M(&float_buffer));
SafeWriteRegToReg(EAX, reg_addr, 32, 0, true);
}
}
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address) void EmuCodeBlock::WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address)
{ {
#ifdef _M_X64 #ifdef _M_X64

View File

@ -29,6 +29,9 @@ public:
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false); void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true); void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true);
// Trashes both inputs and EAX.
void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr);
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address); void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address); void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
void JitClearCA(); void JitClearCA();