Merge branch 'dspjit'

Conflicts:
	Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp
	Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp
	Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.cpp
	Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp
This commit is contained in:
Pierre 2013-04-21 12:28:35 +02:00
commit 9a8dd7963e
11 changed files with 447 additions and 395 deletions

View File

@ -755,6 +755,10 @@ void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src)
Write8(0x0F);
Write8(0xB7);
}
else if (sbits == 32 && dbits == 64)
{
Write8(0x8B);
}
else
{
Crash();
@ -1055,10 +1059,8 @@ void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(t
void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2)
{
#ifdef _DEBUG
#ifndef _M_X64
_assert_msg_(DYNA_REC, !a1.IsSimpleReg() || !a2.IsSimpleReg() || a1.GetSimpleReg() != a2.GetSimpleReg(), "Redundant MOV @ %p - bug in JIT?",
code);
#endif
code);
#endif
WriteNormalOp(this, bits, nrmMOV, a1, a2);
}

View File

@ -107,8 +107,8 @@ public:
void dsp_op_write_reg_imm(int reg, u16 val);
void dsp_conditional_extend_accum(int reg);
void dsp_conditional_extend_accum_imm(int reg, u16 val);
void dsp_op_read_reg_dont_saturate(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend = NONE);
void dsp_op_read_reg(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend = NONE);
void dsp_op_read_reg_and_saturate(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend = NONE);
// Commands
void dar(const UDSPInstruction opc);

View File

@ -451,7 +451,7 @@ void addp(const UDSPInstruction opc)
dsp_set_long_acc(dreg, res);
res = dsp_get_long_acc(dreg);
Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, prod, res));
Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, prod, res));
}
// ADDAXL $acD, $axS.l

View File

@ -737,12 +737,12 @@ void DSPEmitter::addp(const UDSPInstruction opc)
ADD(64, R(RAX), R(RDX));
// dsp_set_long_acc(dreg, res);
// res = dsp_get_long_acc(dreg);
// Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, prod, res));
// Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, prod, res));
if (FlagsNeeded())
{
MOV(64, R(RCX), R(RAX));
set_long_acc(dreg, RCX);
Update_SR_Register64_Carry2(EAX, tmp1);
Update_SR_Register64_Carry(EAX, tmp1);
}
else
{
@ -1557,16 +1557,17 @@ void DSPEmitter::lsrn(const UDSPInstruction opc)
// acc <<= -shift;
// }
CMP(64, R(RDX), Imm8(0));
CMP(64, R(RDX), Imm8(0));//is this actually worth the branch cost?
FixupBranch zero = J_CC(CC_E);
TEST(16, R(RAX), Imm16(0x3f));
TEST(16, R(RAX), Imm16(0x3f));//is this actually worth the branch cost?
FixupBranch noShift = J_CC(CC_Z);
MOVZX(64, 16, RCX, R(RAX));
AND(16, R(RCX), Imm16(0x3f));
//CL gets automatically masked with 0x3f on IA32/AMD64
//MOVZX(64, 16, RCX, R(RAX));
//AND(16, R(RCX), Imm16(0x3f));
TEST(16, R(RAX), Imm16(0x40));
FixupBranch shiftLeft = J_CC(CC_Z);
NEG(16, R(RCX));
ADD(16, R(RCX), Imm16(0x40));
//ADD(16, R(RCX), Imm16(0x40));
SHL(64, R(RDX), R(RCX));
FixupBranch exit = J();
SetJumpTarget(shiftLeft);

View File

@ -322,7 +322,8 @@ void DSPEmitter::loop(const UDSPInstruction opc)
{
u16 reg = opc & 0x1f;
// u16 cnt = g_dsp.r[reg];
dsp_op_read_reg(reg, RDX, ZERO);
//todo: check if we can use normal variant here
dsp_op_read_reg_dont_saturate(reg, RDX, ZERO);
u16 loop_pc = compilePC + 1;
CMP(16, R(EDX), Imm16(0));
@ -391,7 +392,8 @@ void DSPEmitter::bloop(const UDSPInstruction opc)
{
u16 reg = opc & 0x1f;
// u16 cnt = g_dsp.r[reg];
dsp_op_read_reg(reg, RDX, ZERO);
//todo: check if we can use normal variant here
dsp_op_read_reg_dont_saturate(reg, RDX, ZERO);
u16 loop_pc = dsp_imem_read(compilePC + 1);
CMP(16, R(EDX), Imm16(0));

View File

@ -59,11 +59,8 @@ void DSPEmitter::mv(const UDSPInstruction opc)
{
u8 sreg = (opc & 0x3) + DSP_REG_ACL0;
u8 dreg = ((opc >> 2) & 0x3);
if (sreg >= DSP_REG_ACM0) {
dsp_op_read_reg_and_saturate(sreg, RBX, ZERO);
storeIndex = dreg + DSP_REG_AXL0;
} else
pushExtValueFromReg(dreg + DSP_REG_AXL0, sreg);
dsp_op_read_reg(sreg, RBX, ZERO);
storeIndex = dreg + DSP_REG_AXL0;
}
// S @$arD, $acS.S
@ -80,10 +77,7 @@ void DSPEmitter::s(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
if (sreg >= DSP_REG_ACM0)
dsp_op_read_reg_and_saturate(sreg, tmp1, ZERO);
else
dsp_op_read_reg(sreg, tmp1, ZERO);
dsp_op_read_reg(sreg, tmp1, ZERO);
// u16 val = g_dsp.r[src];
dmem_write(tmp1);
@ -105,10 +99,7 @@ void DSPEmitter::sn(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
if (sreg >= DSP_REG_ACM0)
dsp_op_read_reg_and_saturate(sreg, tmp1, ZERO);
else
dsp_op_read_reg(sreg, tmp1, ZERO);
dsp_op_read_reg(sreg, tmp1, ZERO);
dmem_write(tmp1);
gpr.putXReg(tmp1);
@ -178,7 +169,7 @@ void DSPEmitter::ls(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO);
dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO);
dmem_write(tmp1);
gpr.putXReg(tmp1);
@ -205,7 +196,7 @@ void DSPEmitter::lsn(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO);
dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO);
dmem_write(tmp1);
gpr.putXReg(tmp1);
@ -231,7 +222,7 @@ void DSPEmitter::lsm(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO);
dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO);
dmem_write(tmp1);
gpr.putXReg(tmp1);
@ -258,7 +249,7 @@ void DSPEmitter::lsnm(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO);
dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO);
dmem_write(tmp1);
gpr.putXReg(tmp1);
@ -283,7 +274,7 @@ void DSPEmitter::sl(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO);
dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO);
dmem_write(tmp1);
gpr.putXReg(tmp1);
@ -309,7 +300,7 @@ void DSPEmitter::sln(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO);
dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO);
dmem_write(tmp1);
gpr.putXReg(tmp1);
@ -335,7 +326,7 @@ void DSPEmitter::slm(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO);
dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO);
dmem_write(tmp1);
gpr.putXReg(tmp1);
@ -361,7 +352,7 @@ void DSPEmitter::slnm(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO);
dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO);
dmem_write(tmp1);
gpr.putXReg(tmp1);
@ -640,15 +631,8 @@ void DSPEmitter::ldaxnm(const UDSPInstruction opc)
increase_addr_reg(DSP_REG_AR3, DSP_REG_AR3);
}
// Push value from g_dsp.r[sreg] into EBX and stores the destinationindex in
// storeIndex
void DSPEmitter::pushExtValueFromReg(u16 dreg, u16 sreg)
{
dsp_op_read_reg(sreg, RBX, ZERO);
storeIndex = dreg;
}
// Push value from address in g_dsp.r[sreg] into EBX and stores the
// destinationindex in storeIndex
void DSPEmitter::pushExtValueFromMem(u16 dreg, u16 sreg)
{
// u16 addr = g_dsp.r[addr];

View File

@ -26,10 +26,7 @@ void DSPEmitter::srs(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
if (reg >= DSP_REG_ACM0)
dsp_op_read_reg_and_saturate(reg, tmp1, ZERO);
else
dsp_op_read_reg(reg, tmp1, ZERO);
dsp_op_read_reg(reg, tmp1, ZERO);
dsp_op_read_reg(DSP_REG_CR, RAX, ZERO);
SHL(16, R(EAX), Imm8(8));
OR(16, R(EAX), Imm16(opc & 0xFF));
@ -87,10 +84,7 @@ void DSPEmitter::sr(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
if (reg >= DSP_REG_ACM0)
dsp_op_read_reg_and_saturate(reg, tmp1);
else
dsp_op_read_reg(reg, tmp1);
dsp_op_read_reg(reg, tmp1);
dmem_write_imm(address, tmp1);
gpr.putXReg(tmp1);
@ -213,10 +207,7 @@ void DSPEmitter::srr(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
if (sreg >= DSP_REG_ACM0)
dsp_op_read_reg_and_saturate(sreg, tmp1);
else
dsp_op_read_reg(sreg, tmp1);
dsp_op_read_reg(sreg, tmp1);
dsp_op_read_reg(dreg, RAX, ZERO);
dmem_write(tmp1);
@ -235,10 +226,7 @@ void DSPEmitter::srrd(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
if (sreg >= DSP_REG_ACM0)
dsp_op_read_reg_and_saturate(sreg, tmp1);
else
dsp_op_read_reg(sreg, tmp1);
dsp_op_read_reg(sreg, tmp1);
dsp_op_read_reg(dreg, RAX, ZERO);
dmem_write(tmp1);
@ -259,10 +247,7 @@ void DSPEmitter::srri(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
if (sreg >= DSP_REG_ACM0)
dsp_op_read_reg_and_saturate(sreg, tmp1);
else
dsp_op_read_reg(sreg, tmp1);
dsp_op_read_reg(sreg, tmp1);
dsp_op_read_reg(dreg, RAX, ZERO);
dmem_write(tmp1);
@ -283,10 +268,7 @@ void DSPEmitter::srrn(const UDSPInstruction opc)
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
if (sreg >= DSP_REG_ACM0)
dsp_op_read_reg_and_saturate(sreg, tmp1);
else
dsp_op_read_reg(sreg, tmp1);
dsp_op_read_reg(sreg, tmp1);
dsp_op_read_reg(dreg, RAX, ZERO);
dmem_write(tmp1);

View File

@ -9,310 +9,6 @@
#include "x64ABI.h"
using namespace Gen;
//clobbers:
//EAX = (s8)g_dsp.reg_stack_ptr[stack_reg]
//expects:
void DSPEmitter::dsp_reg_stack_push(int stack_reg)
{
//g_dsp.reg_stack_ptr[stack_reg]++;
//g_dsp.reg_stack_ptr[stack_reg] &= DSP_STACK_MASK;
MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg]));
ADD(8, R(AL), Imm8(1));
AND(8, R(AL), Imm8(DSP_STACK_MASK));
MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL));
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
//g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] = g_dsp.r[DSP_REG_ST0 + stack_reg];
MOV(16, R(tmp1), M(&g_dsp.r.st[stack_reg]));
#ifdef _M_IX86 // All32
MOVZX(32, 8, EAX, R(AL));
#else
MOVZX(64, 8, RAX, R(AL));
#endif
MOV(16, MComplex(EAX, EAX, 1,
PtrOffset(&g_dsp.reg_stack[stack_reg][0],0)), R(tmp1));
gpr.putXReg(tmp1);
}
//clobbers:
//EAX = (s8)g_dsp.reg_stack_ptr[stack_reg]
//expects:
void DSPEmitter::dsp_reg_stack_pop(int stack_reg)
{
//g_dsp.r[DSP_REG_ST0 + stack_reg] = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]];
MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg]));
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
#ifdef _M_IX86 // All32
MOVZX(32, 8, EAX, R(AL));
#else
MOVZX(64, 8, RAX, R(AL));
#endif
MOV(16, R(tmp1), MComplex(EAX, EAX, 1,
PtrOffset(&g_dsp.reg_stack[stack_reg][0],0)));
MOV(16, M(&g_dsp.r.st[stack_reg]), R(tmp1));
gpr.putXReg(tmp1);
//g_dsp.reg_stack_ptr[stack_reg]--;
//g_dsp.reg_stack_ptr[stack_reg] &= DSP_STACK_MASK;
SUB(8, R(AL), Imm8(1));
AND(8, R(AL), Imm8(DSP_STACK_MASK));
MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL));
}
void DSPEmitter::dsp_reg_store_stack(int stack_reg, Gen::X64Reg host_sreg)
{
if (host_sreg != EDX) {
MOV(16, R(EDX), R(host_sreg));
}
dsp_reg_stack_push(stack_reg);
//g_dsp.r[DSP_REG_ST0 + stack_reg] = val;
MOV(16, M(&g_dsp.r.st[stack_reg]), R(EDX));
}
void DSPEmitter::dsp_reg_load_stack(int stack_reg, Gen::X64Reg host_dreg)
{
//u16 val = g_dsp.r[DSP_REG_ST0 + stack_reg];
MOV(16, R(EDX), M(&g_dsp.r.st[stack_reg]));
dsp_reg_stack_pop(stack_reg);
if (host_dreg != EDX) {
MOV(16, R(host_dreg), R(EDX));
}
}
void DSPEmitter::dsp_reg_store_stack_imm(int stack_reg, u16 val)
{
dsp_reg_stack_push(stack_reg);
//g_dsp.r[DSP_REG_ST0 + stack_reg] = val;
MOV(16, M(&g_dsp.r.st[stack_reg]), Imm16(val));
}
void DSPEmitter::dsp_op_write_reg(int reg, Gen::X64Reg host_sreg)
{
switch (reg & 0x1f) {
// 8-bit sign extended registers.
case DSP_REG_ACH0:
case DSP_REG_ACH1:
gpr.writeReg(reg, R(host_sreg));
break;
// Stack registers.
case DSP_REG_ST0:
case DSP_REG_ST1:
case DSP_REG_ST2:
case DSP_REG_ST3:
dsp_reg_store_stack(reg - DSP_REG_ST0, host_sreg);
break;
default:
gpr.writeReg(reg, R(host_sreg));
break;
}
}
void DSPEmitter::dsp_op_write_reg_imm(int reg, u16 val)
{
switch (reg & 0x1f) {
// 8-bit sign extended registers. Should look at prod.h too...
case DSP_REG_ACH0:
case DSP_REG_ACH1:
gpr.writeReg(reg, Imm16((u16)(s16)(s8)(u8)val));
break;
// Stack registers.
case DSP_REG_ST0:
case DSP_REG_ST1:
case DSP_REG_ST2:
case DSP_REG_ST3:
dsp_reg_store_stack_imm(reg - DSP_REG_ST0, val);
break;
default:
gpr.writeReg(reg, Imm16(val));
break;
}
}
void DSPEmitter::dsp_conditional_extend_accum(int reg)
{
switch (reg)
{
case DSP_REG_ACM0:
case DSP_REG_ACM1:
{
OpArg sr_reg;
gpr.getReg(DSP_REG_SR,sr_reg);
DSPJitRegCache c(gpr);
TEST(16, sr_reg, Imm16(SR_40_MODE_BIT));
FixupBranch not_40bit = J_CC(CC_Z,true);
//if (g_dsp.r[DSP_REG_SR] & SR_40_MODE_BIT)
//{
// Sign extend into whole accum.
//u16 val = g_dsp.r[reg];
get_acc_m(reg - DSP_REG_ACM0, EAX);
SHR(32, R(EAX), Imm8(16));
//g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACH0] = (val & 0x8000) ? 0xFFFF : 0x0000;
//g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACL0] = 0;
set_acc_h(reg - DSP_REG_ACM0, R(RAX));
set_acc_l(reg - DSP_REG_ACM0, Imm16(0));
//}
gpr.flushRegs(c);
SetJumpTarget(not_40bit);
gpr.putReg(DSP_REG_SR, false);
}
}
}
void DSPEmitter::dsp_conditional_extend_accum_imm(int reg, u16 val)
{
switch (reg)
{
case DSP_REG_ACM0:
case DSP_REG_ACM1:
{
OpArg sr_reg;
gpr.getReg(DSP_REG_SR,sr_reg);
DSPJitRegCache c(gpr);
TEST(16, sr_reg, Imm16(SR_40_MODE_BIT));
FixupBranch not_40bit = J_CC(CC_Z, true);
//if (g_dsp.r[DSP_REG_SR] & SR_40_MODE_BIT)
//{
// Sign extend into whole accum.
//g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACH0] = (val & 0x8000) ? 0xFFFF : 0x0000;
//g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACL0] = 0;
set_acc_h(reg - DSP_REG_ACM0, Imm16((val & 0x8000)?0xffff:0x0000));
set_acc_l(reg - DSP_REG_ACM0, Imm16(0));
//}
gpr.flushRegs(c);
SetJumpTarget(not_40bit);
gpr.putReg(DSP_REG_SR, false);
}
}
}
void DSPEmitter::dsp_op_read_reg(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend)
{
switch (reg & 0x1f)
{
case DSP_REG_ST0:
case DSP_REG_ST1:
case DSP_REG_ST2:
case DSP_REG_ST3:
dsp_reg_load_stack(reg - DSP_REG_ST0, host_dreg);
switch(extend) {
case SIGN:
#ifdef _M_IX86 // All32
MOVSX(32, 16, host_dreg, R(host_dreg));
#else
MOVSX(64, 16, host_dreg, R(host_dreg));
#endif
break;
case ZERO:
#ifdef _M_IX86 // All32
MOVZX(32, 16, host_dreg, R(host_dreg));
#else
MOVZX(64, 16, host_dreg, R(host_dreg));
#endif
break;
case NONE:
default:
break;
}
return;
default:
gpr.readReg(reg, host_dreg, extend);
return;
}
}
void DSPEmitter::dsp_op_read_reg_and_saturate(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend)
{
//we already know this is ACCM0 or ACCM1
#ifdef _M_IX86 // All32
gpr.readReg(reg, host_dreg, extend);
#else
OpArg acc_reg;
gpr.getReg(reg-DSP_REG_ACM0+DSP_REG_ACC0_64, acc_reg);
#endif
OpArg sr_reg;
gpr.getReg(DSP_REG_SR,sr_reg);
DSPJitRegCache c(gpr);
TEST(16, sr_reg, Imm16(SR_40_MODE_BIT));
FixupBranch not_40bit = J_CC(CC_Z, true);
#ifdef _M_IX86 // All32
DSPJitRegCache c2(gpr);
gpr.putReg(DSP_REG_SR, false);
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
gpr.readReg(reg-DSP_REG_ACM0+DSP_REG_ACH0, tmp1, NONE);
MOVSX(32,16,host_dreg,R(host_dreg));
SHL(32, R(tmp1), Imm8(16));
MOV(16,R(tmp1),R(host_dreg));
CMP(32,R(host_dreg), R(tmp1));
FixupBranch no_saturate = J_CC(CC_Z);
CMP(32,R(tmp1),Imm32(0));
FixupBranch negative = J_CC(CC_LE);
MOV(32,R(host_dreg),Imm32(0x7fff));//this works for all extend modes
FixupBranch done_positive = J();
SetJumpTarget(negative);
if (extend == NONE || extend == ZERO)
MOV(32,R(host_dreg),Imm32(0x00008000));
else
MOV(32,R(host_dreg),Imm32(0xffff8000));
FixupBranch done_negative = J();
SetJumpTarget(no_saturate);
if (extend == ZERO)
MOVZX(32,16,host_dreg,R(host_dreg));
SetJumpTarget(done_positive);
SetJumpTarget(done_negative);
gpr.putXReg(tmp1);
gpr.flushRegs(c2);
SetJumpTarget(not_40bit);
gpr.flushRegs(c);
#else
MOVSX(64,32,host_dreg,acc_reg);
CMP(64,R(host_dreg),acc_reg);
FixupBranch no_saturate = J_CC(CC_Z);
CMP(64,acc_reg,Imm32(0));
FixupBranch negative = J_CC(CC_LE);
MOV(64,R(host_dreg),Imm32(0x7fff));//this works for all extend modes
FixupBranch done_positive = J();
SetJumpTarget(negative);
if (extend == NONE || extend == ZERO)
MOV(64,R(host_dreg),Imm32(0x00008000));
else
MOV(64,R(host_dreg),Imm32(0xffff8000));
FixupBranch done_negative = J();
SetJumpTarget(no_saturate);
SetJumpTarget(not_40bit);
MOV(64, R(host_dreg), acc_reg);
if (extend == NONE || extend == ZERO)
SHR(64, R(host_dreg), Imm8(16));
else
SAR(64, R(host_dreg), Imm8(16));
SetJumpTarget(done_positive);
SetJumpTarget(done_negative);
gpr.flushRegs(c);
gpr.putReg(reg-DSP_REG_ACM0+DSP_REG_ACC0_64, false);
#endif
gpr.putReg(DSP_REG_SR, false);
}
// MRR $D, $S
// 0001 11dd ddds ssss
// Move value from register $S to register $D.
@ -321,10 +17,7 @@ void DSPEmitter::mrr(const UDSPInstruction opc)
u8 sreg = opc & 0x1f;
u8 dreg = (opc >> 5) & 0x1f;
if (sreg >= DSP_REG_ACM0)
dsp_op_read_reg_and_saturate(sreg, EDX);
else
dsp_op_read_reg(sreg, EDX);
dsp_op_read_reg(sreg, EDX);
dsp_op_write_reg(dreg, EDX);
dsp_conditional_extend_accum(dreg);
}

View File

@ -255,6 +255,18 @@ void DSPJitRegCache::flushRegs(DSPJitRegCache &cache, bool emit)
regs[i].last_use_ctr = cache.regs[i].last_use_ctr;
}
//sync the freely used xregs
if (!emit) {
for(i = 0; i < NUMXREGS; i++) {
if (cache.xregs[i].guest_reg == DSP_REG_USED &&
xregs[i].guest_reg == DSP_REG_NONE)
xregs[i].guest_reg = DSP_REG_USED;
if (cache.xregs[i].guest_reg == DSP_REG_NONE &&
xregs[i].guest_reg == DSP_REG_USED)
xregs[i].guest_reg = DSP_REG_NONE;
}
}
//consistency checks
for(i = 0; i < NUMXREGS; i++)
{
@ -389,7 +401,7 @@ void DSPJitRegCache::loadRegs(bool emit)
for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++)
{
if (regs[i].host_reg != INVALID_REG)
movToHostReg(i,regs[i].host_reg);
movToHostReg(i,regs[i].host_reg, emit);
}
if (emit)
@ -519,7 +531,7 @@ void DSPJitRegCache::popRegs() {
for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++)
{
if (regs[i].host_reg != INVALID_REG)
movToHostReg(i,regs[i].host_reg);
movToHostReg(i,regs[i].host_reg, true);
}
}
@ -916,17 +928,38 @@ void DSPJitRegCache::writeReg(int dreg, OpArg arg)
{
OpArg reg;
getReg(dreg, reg, false);
switch(regs[dreg].size)
if (arg.IsImm())
{
case 2: emitter.MOV(16, reg, arg); break;
case 4: emitter.MOV(32, reg, arg); break;
switch(regs[dreg].size)
{
case 2: emitter.MOV(16, reg, Imm16(arg.offset)); break;
case 4: emitter.MOV(32, reg, Imm32(arg.offset)); break;
#ifdef _M_X64
case 8: emitter.MOV(64, reg, arg); break;
case 8:
if ((s32)arg.offset == (s64)arg.offset)
emitter.MOV(64, reg, Imm32(arg.offset));
else
emitter.MOV(64, reg, Imm64(arg.offset));
break;
#endif
default:
_assert_msg_(DSPLLE, 0, "unsupported memory size");
break;
default:
_assert_msg_(DSPLLE, 0, "unsupported memory size");
break;
}
}
else
{
switch(regs[dreg].size)
{
case 2: emitter.MOV(16, reg, arg); break;
case 4: emitter.MOV(32, reg, arg); break;
#ifdef _M_X64
case 8: emitter.MOV(64, reg, arg); break;
#endif
default:
_assert_msg_(DSPLLE, 0, "unsupported memory size");
break;
}
}
putReg(dreg, true);
}
@ -1042,8 +1075,7 @@ void DSPJitRegCache::getXReg(X64Reg reg)
if (xregs[reg].guest_reg != DSP_REG_NONE)
spillXReg(reg);
_assert_msg_(DSPLLE, xregs[reg].guest_reg != DSP_REG_NONE, "register already in use");
_assert_msg_(DSPLLE, xregs[reg].guest_reg == DSP_REG_NONE, "register already in use");
xregs[reg].guest_reg = DSP_REG_USED;
}

View File

@ -11,6 +11,352 @@
using namespace Gen;
//clobbers:
//EAX = (s8)g_dsp.reg_stack_ptr[stack_reg]
//expects:
void DSPEmitter::dsp_reg_stack_push(int stack_reg)
{
//g_dsp.reg_stack_ptr[stack_reg]++;
//g_dsp.reg_stack_ptr[stack_reg] &= DSP_STACK_MASK;
MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg]));
ADD(8, R(AL), Imm8(1));
AND(8, R(AL), Imm8(DSP_STACK_MASK));
MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL));
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
//g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] = g_dsp.r[DSP_REG_ST0 + stack_reg];
MOV(16, R(tmp1), M(&g_dsp.r.st[stack_reg]));
#ifdef _M_IX86 // All32
MOVZX(32, 8, EAX, R(AL));
#else
MOVZX(64, 8, RAX, R(AL));
#endif
MOV(16, MComplex(EAX, EAX, 1,
PtrOffset(&g_dsp.reg_stack[stack_reg][0],0)), R(tmp1));
gpr.putXReg(tmp1);
}
//clobbers:
//EAX = (s8)g_dsp.reg_stack_ptr[stack_reg]
//expects:
void DSPEmitter::dsp_reg_stack_pop(int stack_reg)
{
//g_dsp.r[DSP_REG_ST0 + stack_reg] = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]];
MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg]));
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
#ifdef _M_IX86 // All32
MOVZX(32, 8, EAX, R(AL));
#else
MOVZX(64, 8, RAX, R(AL));
#endif
MOV(16, R(tmp1), MComplex(EAX, EAX, 1,
PtrOffset(&g_dsp.reg_stack[stack_reg][0],0)));
MOV(16, M(&g_dsp.r.st[stack_reg]), R(tmp1));
gpr.putXReg(tmp1);
//g_dsp.reg_stack_ptr[stack_reg]--;
//g_dsp.reg_stack_ptr[stack_reg] &= DSP_STACK_MASK;
SUB(8, R(AL), Imm8(1));
AND(8, R(AL), Imm8(DSP_STACK_MASK));
MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL));
}
void DSPEmitter::dsp_reg_store_stack(int stack_reg, Gen::X64Reg host_sreg)
{
if (host_sreg != EDX)
{
MOV(16, R(EDX), R(host_sreg));
}
dsp_reg_stack_push(stack_reg);
//g_dsp.r[DSP_REG_ST0 + stack_reg] = val;
MOV(16, M(&g_dsp.r.st[stack_reg]), R(EDX));
}
void DSPEmitter::dsp_reg_load_stack(int stack_reg, Gen::X64Reg host_dreg)
{
//u16 val = g_dsp.r[DSP_REG_ST0 + stack_reg];
MOV(16, R(EDX), M(&g_dsp.r.st[stack_reg]));
dsp_reg_stack_pop(stack_reg);
if (host_dreg != EDX)
{
MOV(16, R(host_dreg), R(EDX));
}
}
void DSPEmitter::dsp_reg_store_stack_imm(int stack_reg, u16 val)
{
dsp_reg_stack_push(stack_reg);
//g_dsp.r[DSP_REG_ST0 + stack_reg] = val;
MOV(16, M(&g_dsp.r.st[stack_reg]), Imm16(val));
}
void DSPEmitter::dsp_op_write_reg(int reg, Gen::X64Reg host_sreg)
{
switch (reg & 0x1f)
{
// 8-bit sign extended registers.
case DSP_REG_ACH0:
case DSP_REG_ACH1:
gpr.writeReg(reg, R(host_sreg));
break;
// Stack registers.
case DSP_REG_ST0:
case DSP_REG_ST1:
case DSP_REG_ST2:
case DSP_REG_ST3:
dsp_reg_store_stack(reg - DSP_REG_ST0, host_sreg);
break;
default:
gpr.writeReg(reg, R(host_sreg));
break;
}
}
void DSPEmitter::dsp_op_write_reg_imm(int reg, u16 val)
{
switch (reg & 0x1f)
{
// 8-bit sign extended registers. Should look at prod.h too...
case DSP_REG_ACH0:
case DSP_REG_ACH1:
gpr.writeReg(reg, Imm16((u16)(s16)(s8)(u8)val));
break;
// Stack registers.
case DSP_REG_ST0:
case DSP_REG_ST1:
case DSP_REG_ST2:
case DSP_REG_ST3:
dsp_reg_store_stack_imm(reg - DSP_REG_ST0, val);
break;
default:
gpr.writeReg(reg, Imm16(val));
break;
}
}
void DSPEmitter::dsp_conditional_extend_accum(int reg)
{
switch (reg)
{
case DSP_REG_ACM0:
case DSP_REG_ACM1:
{
OpArg sr_reg;
gpr.getReg(DSP_REG_SR,sr_reg);
DSPJitRegCache c(gpr);
TEST(16, sr_reg, Imm16(SR_40_MODE_BIT));
FixupBranch not_40bit = J_CC(CC_Z,true);
//if (g_dsp.r[DSP_REG_SR] & SR_40_MODE_BIT)
//{
// Sign extend into whole accum.
//u16 val = g_dsp.r[reg];
get_acc_m(reg - DSP_REG_ACM0, EAX);
SHR(32, R(EAX), Imm8(16));
//g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACH0] = (val & 0x8000) ? 0xFFFF : 0x0000;
//g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACL0] = 0;
set_acc_h(reg - DSP_REG_ACM0, R(RAX));
set_acc_l(reg - DSP_REG_ACM0, Imm16(0));
//}
gpr.flushRegs(c);
SetJumpTarget(not_40bit);
gpr.putReg(DSP_REG_SR, false);
}
}
}
void DSPEmitter::dsp_conditional_extend_accum_imm(int reg, u16 val)
{
switch (reg)
{
case DSP_REG_ACM0:
case DSP_REG_ACM1:
{
OpArg sr_reg;
gpr.getReg(DSP_REG_SR,sr_reg);
DSPJitRegCache c(gpr);
TEST(16, sr_reg, Imm16(SR_40_MODE_BIT));
FixupBranch not_40bit = J_CC(CC_Z, true);
//if (g_dsp.r[DSP_REG_SR] & SR_40_MODE_BIT)
//{
// Sign extend into whole accum.
//g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACH0] = (val & 0x8000) ? 0xFFFF : 0x0000;
//g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACL0] = 0;
set_acc_h(reg - DSP_REG_ACM0, Imm16((val & 0x8000)?0xffff:0x0000));
set_acc_l(reg - DSP_REG_ACM0, Imm16(0));
//}
gpr.flushRegs(c);
SetJumpTarget(not_40bit);
gpr.putReg(DSP_REG_SR, false);
}
}
}
void DSPEmitter::dsp_op_read_reg_dont_saturate(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend)
{
switch (reg & 0x1f)
{
case DSP_REG_ST0:
case DSP_REG_ST1:
case DSP_REG_ST2:
case DSP_REG_ST3:
dsp_reg_load_stack(reg - DSP_REG_ST0, host_dreg);
switch(extend)
{
case SIGN:
#ifdef _M_IX86 // All32
MOVSX(32, 16, host_dreg, R(host_dreg));
#else
MOVSX(64, 16, host_dreg, R(host_dreg));
#endif
break;
case ZERO:
#ifdef _M_IX86 // All32
MOVZX(32, 16, host_dreg, R(host_dreg));
#else
MOVZX(64, 16, host_dreg, R(host_dreg));
#endif
break;
case NONE:
default:
break;
}
return;
default:
gpr.readReg(reg, host_dreg, extend);
return;
}
}
void DSPEmitter::dsp_op_read_reg(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend)
{
switch (reg & 0x1f)
{
case DSP_REG_ST0:
case DSP_REG_ST1:
case DSP_REG_ST2:
case DSP_REG_ST3:
dsp_reg_load_stack(reg - DSP_REG_ST0, host_dreg);
switch(extend)
{
case SIGN:
#ifdef _M_IX86 // All32
MOVSX(32, 16, host_dreg, R(host_dreg));
#else
MOVSX(64, 16, host_dreg, R(host_dreg));
#endif
break;
case ZERO:
#ifdef _M_IX86 // All32
MOVZX(32, 16, host_dreg, R(host_dreg));
#else
MOVZX(64, 16, host_dreg, R(host_dreg));
#endif
break;
case NONE:
default:
break;
}
return;
case DSP_REG_ACM0:
case DSP_REG_ACM1:
{
//we already know this is ACCM0 or ACCM1
#ifdef _M_IX86 // All32
gpr.readReg(reg, host_dreg, extend);
#else
OpArg acc_reg;
gpr.getReg(reg-DSP_REG_ACM0+DSP_REG_ACC0_64, acc_reg);
#endif
OpArg sr_reg;
gpr.getReg(DSP_REG_SR,sr_reg);
DSPJitRegCache c(gpr);
TEST(16, sr_reg, Imm16(SR_40_MODE_BIT));
FixupBranch not_40bit = J_CC(CC_Z, true);
#ifdef _M_IX86 // All32
DSPJitRegCache c2(gpr);
gpr.putReg(DSP_REG_SR, false);
X64Reg tmp1;
gpr.getFreeXReg(tmp1);
gpr.readReg(reg-DSP_REG_ACM0+DSP_REG_ACH0, tmp1, NONE);
MOVSX(32,16,host_dreg,R(host_dreg));
SHL(32, R(tmp1), Imm8(16));
MOV(16,R(tmp1),R(host_dreg));
CMP(32,R(host_dreg), R(tmp1));
FixupBranch no_saturate = J_CC(CC_Z);
CMP(32,R(tmp1),Imm32(0));
FixupBranch negative = J_CC(CC_LE);
MOV(32,R(host_dreg),Imm32(0x7fff));//this works for all extend modes
FixupBranch done_positive = J();
SetJumpTarget(negative);
if (extend == NONE || extend == ZERO)
MOV(32,R(host_dreg),Imm32(0x00008000));
else
MOV(32,R(host_dreg),Imm32(0xffff8000));
FixupBranch done_negative = J();
SetJumpTarget(no_saturate);
if (extend == ZERO)
MOVZX(32,16,host_dreg,R(host_dreg));
SetJumpTarget(done_positive);
SetJumpTarget(done_negative);
gpr.putXReg(tmp1);
gpr.flushRegs(c2);
SetJumpTarget(not_40bit);
gpr.flushRegs(c);
#else
MOVSX(64,32,host_dreg,acc_reg);
CMP(64,R(host_dreg),acc_reg);
FixupBranch no_saturate = J_CC(CC_Z);
CMP(64,acc_reg,Imm32(0));
FixupBranch negative = J_CC(CC_LE);
MOV(64,R(host_dreg),Imm32(0x7fff));//this works for all extend modes
FixupBranch done_positive = J();
SetJumpTarget(negative);
if (extend == NONE || extend == ZERO)
MOV(64,R(host_dreg),Imm32(0x00008000));
else
MOV(64,R(host_dreg),Imm32(0xffff8000));
FixupBranch done_negative = J();
SetJumpTarget(no_saturate);
SetJumpTarget(not_40bit);
MOV(64, R(host_dreg), acc_reg);
if (extend == NONE || extend == ZERO)
SHR(64, R(host_dreg), Imm8(16));
else
SAR(64, R(host_dreg), Imm8(16));
SetJumpTarget(done_positive);
SetJumpTarget(done_negative);
gpr.flushRegs(c);
gpr.putReg(reg-DSP_REG_ACM0+DSP_REG_ACC0_64, false);
#endif
gpr.putReg(DSP_REG_SR, false);
}
return;
default:
gpr.readReg(reg, host_dreg, extend);
return;
}
}
// addr math
//
// These functions detect overflow by checking if
@ -119,13 +465,14 @@ void DSPEmitter::increase_addr_reg(int reg, int _ix_reg)
//eax = dar
XOR(32, R(EAX), R(ECX));
XOR(32, R(EAX), R(tmp1));
LEA(32, ECX, MRegSum(EDX, EDX));
OR(32, R(ECX), Imm8(2));
AND(32, R(EAX), R(ECX));
//if (ix >= 0)
TEST(32, R(ECX), R(ECX));
FixupBranch negative = J_CC(CC_S);
LEA(32, ECX, MRegSum(EDX, EDX));
OR(32, R(ECX), Imm8(2));
AND(32, R(EAX), R(ECX));
//if (dar > wr)
CMP(32, R(EAX), R(EDX));
FixupBranch done = J_CC(CC_BE);
@ -136,6 +483,10 @@ void DSPEmitter::increase_addr_reg(int reg, int _ix_reg)
//else
SetJumpTarget(negative);
LEA(32, ECX, MRegSum(EDX, EDX));
OR(32, R(ECX), Imm8(2));
AND(32, R(EAX), R(ECX));
//if ((((nar + wr + 1) ^ nar) & dar) <= wr)
LEA(32, ECX, MComplex(tmp1, EDX, 1, 1));
XOR(32, R(ECX), R(tmp1));
@ -184,13 +535,14 @@ void DSPEmitter::decrease_addr_reg(int reg)
//eax = dar
XOR(32, R(EAX), R(ECX));
XOR(32, R(EAX), R(tmp1));
LEA(32, ECX, MRegSum(EDX, EDX));
OR(32, R(ECX), Imm8(2));
AND(32, R(EAX), R(ECX));
//if ((u32)ix > 0xFFFF8000) ==> (~ix < 0x00007FFF)
CMP(32, R(ECX), Imm32(0x00007FFF));
FixupBranch positive = J_CC(CC_AE);
LEA(32, ECX, MRegSum(EDX, EDX));
OR(32, R(ECX), Imm8(2));
AND(32, R(EAX), R(ECX));
//if (dar > wr)
CMP(32, R(EAX), R(EDX));
FixupBranch done = J_CC(CC_BE);
@ -201,6 +553,10 @@ void DSPEmitter::decrease_addr_reg(int reg)
//else
SetJumpTarget(positive);
LEA(32, ECX, MRegSum(EDX, EDX));
OR(32, R(ECX), Imm8(2));
AND(32, R(EAX), R(ECX));
//if ((((nar + wr + 1) ^ nar) & dar) <= wr)
LEA(32, ECX, MComplex(tmp1, EDX, 1, 1));
XOR(32, R(ECX), R(tmp1));

View File

@ -1997,7 +1997,7 @@ void Jit64::slwx(UGeckoInstruction inst)
}
else
{
MOV(32, gpr.R(a), gpr.R(a));
MOVZX(64, 32, gpr.R(a).GetSimpleReg(), gpr.R(a));
}
gpr.UnlockAll();
gpr.UnlockAllX();