Core/DSPCore: Improve Interpreter address register add/sub, convert to
assembler for JIT. Replace JIT ToMask() with a different variant. Remove superfluous zeroWriteBackLog calls(added by me). Core/Common: Don't bother creating a string and calling into a Logs trigger() when there is noone listening. Change AtomicLoadAcquire for gcc to just make the compiler not reorder memory accesses around it instead of doing a full memory barrier, per the comment in the win32 variant. Core/AudioCommon: Fix a use of uninitialized variable inside libalsa. Microbenchmarking results for ToMask variants:(1 000 000 000 iterations): cpu\variant| shifts | bit scan intel mobile C2D@2.5GHz | 5.5s | 4.0s amd athlon64x2@3GHz | 6.1s | 6.4s (including some constant overhead identical to both variants) git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6667 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
0e737235a8
commit
c33f46406e
|
@ -123,6 +123,7 @@ bool AlsaSound::AlsaInit()
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dir = 0;
|
||||||
err = snd_pcm_hw_params_set_rate_near(handle, hwparams, &sample_rate, &dir);
|
err = snd_pcm_hw_params_set_rate_near(handle, hwparams, &sample_rate, &dir);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
{
|
{
|
||||||
|
|
|
@ -57,8 +57,12 @@ inline u32 AtomicLoad(volatile u32& src) {
|
||||||
return src; // 32-bit reads are always atomic.
|
return src; // 32-bit reads are always atomic.
|
||||||
}
|
}
|
||||||
inline u32 AtomicLoadAcquire(volatile u32& src) {
|
inline u32 AtomicLoadAcquire(volatile u32& src) {
|
||||||
__sync_synchronize(); // TODO: May not be necessary.
|
//keep the compiler from caching any memory references
|
||||||
return src;
|
u32 result = src; // 32-bit reads are always atomic.
|
||||||
|
//__sync_synchronize(); // TODO: May not be necessary.
|
||||||
|
// Compiler instruction only. x86 loads always have acquire semantics.
|
||||||
|
__asm__ __volatile__ ( "":::"memory" );
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void AtomicOr(volatile u32& target, u32 value) {
|
inline void AtomicOr(volatile u32& target, u32 value) {
|
||||||
|
|
|
@ -116,7 +116,7 @@ void LogManager::Log(LogTypes::LOG_LEVELS level, LogTypes::LOG_TYPE type,
|
||||||
char msg[MAX_MSGLEN * 2];
|
char msg[MAX_MSGLEN * 2];
|
||||||
LogContainer *log = m_Log[type];
|
LogContainer *log = m_Log[type];
|
||||||
|
|
||||||
if (! log->isEnable() || level > log->getLevel())
|
if (! log->isEnable() || level > log->getLevel() || ! log->hasListeners())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
CharArrayFromFormatV(temp, MAX_MSGLEN, format, args);
|
CharArrayFromFormatV(temp, MAX_MSGLEN, format, args);
|
||||||
|
|
|
@ -89,6 +89,7 @@ public:
|
||||||
void setLevel(LogTypes::LOG_LEVELS level) {
|
void setLevel(LogTypes::LOG_LEVELS level) {
|
||||||
m_level = level;
|
m_level = level;
|
||||||
}
|
}
|
||||||
|
bool hasListeners() const { return listeners.size() > 0; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
char m_fullName[128];
|
char m_fullName[128];
|
||||||
|
|
|
@ -259,7 +259,7 @@ private:
|
||||||
|
|
||||||
void Update_SR_Register(Gen::X64Reg val = Gen::EAX);
|
void Update_SR_Register(Gen::X64Reg val = Gen::EAX);
|
||||||
|
|
||||||
void ToMask(Gen::X64Reg value_reg = Gen::EDI, Gen::X64Reg temp_reg = Gen::ESI);
|
void ToMask(Gen::X64Reg value_reg = Gen::EDI);
|
||||||
void dsp_increment_one(Gen::X64Reg ar = Gen::EAX, Gen::X64Reg wr = Gen::EDX, Gen::X64Reg wr_pow = Gen::EDI, Gen::X64Reg temp_reg = Gen::ESI);
|
void dsp_increment_one(Gen::X64Reg ar = Gen::EAX, Gen::X64Reg wr = Gen::EDX, Gen::X64Reg wr_pow = Gen::EDI, Gen::X64Reg temp_reg = Gen::ESI);
|
||||||
void dsp_decrement_one(Gen::X64Reg ar = Gen::EAX, Gen::X64Reg wr = Gen::EDX, Gen::X64Reg wr_pow = Gen::EDI, Gen::X64Reg temp_reg = Gen::ESI);
|
void dsp_decrement_one(Gen::X64Reg ar = Gen::EAX, Gen::X64Reg wr = Gen::EDX, Gen::X64Reg wr_pow = Gen::EDI, Gen::X64Reg temp_reg = Gen::ESI);
|
||||||
void get_long_prod(Gen::X64Reg long_prod = Gen::RAX);
|
void get_long_prod(Gen::X64Reg long_prod = Gen::RAX);
|
||||||
|
|
|
@ -68,10 +68,10 @@ inline u16 dsp_increase_addr_reg(u16 reg, s16 ix)
|
||||||
u16 m = ToMask(wr) | 1;
|
u16 m = ToMask(wr) | 1;
|
||||||
u16 nar = ar+ix;
|
u16 nar = ar+ix;
|
||||||
if (ix >= 0) {
|
if (ix >= 0) {
|
||||||
if((ar&m)+(ix&m) -m-1 >= 0)
|
if((ar&m) + (int)(ix&m) -(int)m-1 >= 0)
|
||||||
nar -= wr+1;
|
nar -= wr+1;
|
||||||
} else {
|
} else {
|
||||||
if((ar&m)+(ix&m) -m-1 < m-wr)
|
if((ar&m) + (int)(ix&m) -(int)m-1 < m-wr)
|
||||||
nar += wr+1;
|
nar += wr+1;
|
||||||
}
|
}
|
||||||
return nar;
|
return nar;
|
||||||
|
@ -82,13 +82,12 @@ inline u16 dsp_decrease_addr_reg(u16 reg, s16 ix)
|
||||||
u16 ar = g_dsp.r[reg];
|
u16 ar = g_dsp.r[reg];
|
||||||
u16 wr = g_dsp.r[reg+8];
|
u16 wr = g_dsp.r[reg+8];
|
||||||
u16 m = ToMask(wr) | 1;
|
u16 m = ToMask(wr) | 1;
|
||||||
ix = -ix-1;
|
u16 nar = ar-ix;
|
||||||
u16 nar = ar+ix+1;
|
if ((u16)ix > 0x8000) { // equiv: ix < 0 && ix != -0x8000
|
||||||
if (ix-1 >= 0) {
|
if((ar&m) - (int)(ix&m) >= 0)
|
||||||
if((ar&m)+(ix&m) -m >= 0)
|
|
||||||
nar -= wr+1;
|
nar -= wr+1;
|
||||||
} else {
|
} else {
|
||||||
if((ar&m)+(ix&m) -m < m-wr)
|
if((ar&m) - (int)(ix&m) < m-wr)
|
||||||
nar += wr+1;
|
nar += wr+1;
|
||||||
}
|
}
|
||||||
return nar;
|
return nar;
|
||||||
|
|
|
@ -37,8 +37,8 @@ const DSPOPCTemplate opcodes[] =
|
||||||
|
|
||||||
{"DAR", 0x0004, 0xfffc, DSPInterpreter::dar, &DSPEmitter::dar, 1, 1, {{P_REG, 1, 0, 0, 0x0003}}, false, false, false, false, false},
|
{"DAR", 0x0004, 0xfffc, DSPInterpreter::dar, &DSPEmitter::dar, 1, 1, {{P_REG, 1, 0, 0, 0x0003}}, false, false, false, false, false},
|
||||||
{"IAR", 0x0008, 0xfffc, DSPInterpreter::iar, &DSPEmitter::iar, 1, 1, {{P_REG, 1, 0, 0, 0x0003}}, false, false, false, false, false},
|
{"IAR", 0x0008, 0xfffc, DSPInterpreter::iar, &DSPEmitter::iar, 1, 1, {{P_REG, 1, 0, 0, 0x0003}}, false, false, false, false, false},
|
||||||
{"SUBARN", 0x000c, 0xfffc, DSPInterpreter::subarn, NULL/*&DSPEmitter::subarn*/, 1, 1, {{P_REG, 1, 0, 0, 0x0003}}, false, false, false, false, false},
|
{"SUBARN", 0x000c, 0xfffc, DSPInterpreter::subarn, &DSPEmitter::subarn, 1, 1, {{P_REG, 1, 0, 0, 0x0003}}, false, false, false, false, false},
|
||||||
{"ADDARN", 0x0010, 0xfff0, DSPInterpreter::addarn, NULL/*&DSPEmitter::addarn*/, 1, 2, {{P_REG, 1, 0, 0, 0x0003}, {P_REG04, 1, 0, 2, 0x000c}}, false, false, false, false, false},
|
{"ADDARN", 0x0010, 0xfff0, DSPInterpreter::addarn, &DSPEmitter::addarn, 1, 2, {{P_REG, 1, 0, 0, 0x0003}, {P_REG04, 1, 0, 2, 0x000c}}, false, false, false, false, false},
|
||||||
|
|
||||||
{"HALT", 0x0021, 0xffff, DSPInterpreter::halt, NULL, 1, 0, {}, false, true, true, false, false},
|
{"HALT", 0x0021, 0xffff, DSPInterpreter::halt, NULL, 1, 0, {}, false, true, true, false, false},
|
||||||
|
|
||||||
|
|
|
@ -370,7 +370,6 @@ void DSPEmitter::nx(const UDSPInstruction opc)
|
||||||
void DSPEmitter::dar(const UDSPInstruction opc)
|
void DSPEmitter::dar(const UDSPInstruction opc)
|
||||||
{
|
{
|
||||||
// g_dsp.r[opc & 0x3] = dsp_decrement_addr_reg(opc & 0x3);
|
// g_dsp.r[opc & 0x3] = dsp_decrement_addr_reg(opc & 0x3);
|
||||||
zeroWriteBackLog(opc);
|
|
||||||
decrement_addr_reg(opc & 0x3);
|
decrement_addr_reg(opc & 0x3);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -381,7 +380,6 @@ void DSPEmitter::dar(const UDSPInstruction opc)
|
||||||
void DSPEmitter::iar(const UDSPInstruction opc)
|
void DSPEmitter::iar(const UDSPInstruction opc)
|
||||||
{
|
{
|
||||||
// g_dsp.r[opc & 0x3] = dsp_increment_addr_reg(opc & 0x3);
|
// g_dsp.r[opc & 0x3] = dsp_increment_addr_reg(opc & 0x3);
|
||||||
zeroWriteBackLog(opc);
|
|
||||||
increment_addr_reg(opc & 0x3);
|
increment_addr_reg(opc & 0x3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -393,7 +391,6 @@ void DSPEmitter::subarn(const UDSPInstruction opc)
|
||||||
{
|
{
|
||||||
// u8 dreg = opc & 0x3;
|
// u8 dreg = opc & 0x3;
|
||||||
// g_dsp.r[dreg] = dsp_decrease_addr_reg(dreg, (s16)g_dsp.r[DSP_REG_IX0 + dreg]);
|
// g_dsp.r[dreg] = dsp_decrease_addr_reg(dreg, (s16)g_dsp.r[DSP_REG_IX0 + dreg]);
|
||||||
zeroWriteBackLog(opc);
|
|
||||||
decrease_addr_reg(opc & 0x3);
|
decrease_addr_reg(opc & 0x3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -408,7 +405,6 @@ void DSPEmitter::addarn(const UDSPInstruction opc)
|
||||||
// g_dsp.r[dreg] = dsp_increase_addr_reg(dreg, (s16)g_dsp.r[DSP_REG_IX0 + sreg]);
|
// g_dsp.r[dreg] = dsp_increase_addr_reg(dreg, (s16)g_dsp.r[DSP_REG_IX0 + sreg]);
|
||||||
|
|
||||||
// From looking around it is always called with the matching index register
|
// From looking around it is always called with the matching index register
|
||||||
zeroWriteBackLog(opc);
|
|
||||||
increase_addr_reg(opc & 0x3);
|
increase_addr_reg(opc & 0x3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -448,7 +444,6 @@ void DSPEmitter::sbclr(const UDSPInstruction opc)
|
||||||
{
|
{
|
||||||
u8 bit = (opc & 0x7) + 6;
|
u8 bit = (opc & 0x7) + 6;
|
||||||
|
|
||||||
zeroWriteBackLog(opc);
|
|
||||||
clrCompileSR(1 << bit);
|
clrCompileSR(1 << bit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -460,10 +455,10 @@ void DSPEmitter::sbset(const UDSPInstruction opc)
|
||||||
{
|
{
|
||||||
u8 bit = (opc & 0x7) + 6;
|
u8 bit = (opc & 0x7) + 6;
|
||||||
|
|
||||||
zeroWriteBackLog(opc);
|
|
||||||
setCompileSR(1 << bit);
|
setCompileSR(1 << bit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 1000 1bbb xxxx xxxx, bbb >= 010
|
||||||
// This is a bunch of flag setters, flipping bits in SR. So far so good,
|
// This is a bunch of flag setters, flipping bits in SR. So far so good,
|
||||||
// but it's harder to know exactly what effect they have.
|
// but it's harder to know exactly what effect they have.
|
||||||
void DSPEmitter::srbith(const UDSPInstruction opc)
|
void DSPEmitter::srbith(const UDSPInstruction opc)
|
||||||
|
|
|
@ -27,126 +27,141 @@
|
||||||
using namespace Gen;
|
using namespace Gen;
|
||||||
|
|
||||||
// Performs the hashing required by increment/increase/decrease_addr_reg
|
// Performs the hashing required by increment/increase/decrease_addr_reg
|
||||||
void DSPEmitter::ToMask(X64Reg value_reg, X64Reg temp_reg)
|
// clobbers RCX
|
||||||
|
void DSPEmitter::ToMask(X64Reg value_reg)
|
||||||
{
|
{
|
||||||
MOV(16, R(temp_reg), R(value_reg));
|
#if 0
|
||||||
SHR(16, R(temp_reg), Imm8(8));
|
MOV(16, R(CX), R(value_reg));
|
||||||
OR(16, R(value_reg), R(temp_reg));
|
SHR(16, R(CX), Imm8(8));
|
||||||
MOV(16, R(temp_reg), R(value_reg));
|
OR(16, R(value_reg), R(CX));
|
||||||
SHR(16, R(temp_reg), Imm8(4));
|
MOV(16, R(CX), R(value_reg));
|
||||||
OR(16, R(value_reg), R(temp_reg));
|
SHR(16, R(CX), Imm8(4));
|
||||||
MOV(16, R(temp_reg), R(value_reg));
|
OR(16, R(value_reg), R(CX));
|
||||||
SHR(16, R(temp_reg), Imm8(2));
|
MOV(16, R(CX), R(value_reg));
|
||||||
OR(16, R(value_reg), R(temp_reg));
|
SHR(16, R(CX), Imm8(2));
|
||||||
MOV(16, R(temp_reg), R(value_reg));
|
OR(16, R(value_reg), R(CX));
|
||||||
SHR(16, R(temp_reg), Imm8(1));
|
MOV(16, R(CX), R(value_reg));
|
||||||
OR(16, R(value_reg), R(temp_reg));
|
SHR(16, R(CX), Imm8(1));
|
||||||
}
|
OR(16, R(value_reg), R(CX));
|
||||||
|
MOVZX(32,16,value_reg, R(value_reg));
|
||||||
|
#else
|
||||||
|
BSR(16, CX, R(value_reg));
|
||||||
|
FixupBranch undef = J_CC(CC_Z); //CX is written, but undefined
|
||||||
|
|
||||||
// HORRIBLE UGLINESS, someone please fix.
|
MOV(32, R(value_reg), Imm32(2));
|
||||||
// See http://code.google.com/p/dolphin-emu/source/detail?r=3125
|
SHL(32, R(value_reg), R(CL));
|
||||||
void DSPEmitter::dsp_increment_one(X64Reg ar, X64Reg wr, X64Reg wr_pow, X64Reg temp_reg)
|
SUB(32, R(value_reg), Imm32(1));
|
||||||
{
|
//don't waste an instruction on jumping over an effective noop
|
||||||
// if ((tmp & tmb) == tmb)
|
|
||||||
MOV(16, R(temp_reg), R(ar));
|
|
||||||
AND(16, R(temp_reg), R(wr_pow));
|
|
||||||
CMP(16, R(temp_reg), R(wr_pow));
|
|
||||||
FixupBranch not_equal = J_CC(CC_NE);
|
|
||||||
|
|
||||||
// tmp -= wr_reg
|
SetJumpTarget(undef);
|
||||||
SUB(16, R(ar), R(wr));
|
#endif
|
||||||
|
OR(16, R(value_reg), Imm16(1));
|
||||||
FixupBranch end = J();
|
XOR(64, R(RCX), R(RCX));
|
||||||
SetJumpTarget(not_equal);
|
|
||||||
|
|
||||||
// else tmp++
|
|
||||||
ADD(16, R(ar), Imm16(1));
|
|
||||||
SetJumpTarget(end);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// EAX = g_dsp.r[reg]
|
// EAX = g_dsp.r[reg]
|
||||||
// EDX = g_dsp.r[DSP_REG_WR0 + reg]
|
// EDX = g_dsp.r[DSP_REG_WR0 + reg]
|
||||||
|
//clobbers RCX
|
||||||
void DSPEmitter::increment_addr_reg(int reg)
|
void DSPEmitter::increment_addr_reg(int reg)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
u16 ar = g_dsp.r[reg];
|
||||||
|
u16 wr = g_dsp.r[reg+8];
|
||||||
|
u16 nar = ar+1;
|
||||||
|
//this works, because nar^ar will have all the bits from the highest
|
||||||
|
//changed bit downwards set(true only for +1!)
|
||||||
|
//based on an idea by Mylek
|
||||||
|
if((nar^ar)>=((wr<<1)|1))
|
||||||
|
nar -= wr+1;
|
||||||
|
*/
|
||||||
|
|
||||||
// s16 tmp = g_dsp.r[reg];
|
// s16 tmp = g_dsp.r[reg];
|
||||||
#ifdef _M_IX86 // All32
|
#ifdef _M_IX86 // All32
|
||||||
MOV(16, R(EAX), M(&g_dsp.r[reg]));
|
MOV(16, R(AX), M(&g_dsp.r[reg]));
|
||||||
MOV(16, R(EDX), M(&g_dsp.r[DSP_REG_WR0 + reg]));
|
MOV(16, R(DX), M(&g_dsp.r[DSP_REG_WR0 + reg]));
|
||||||
#else
|
#else
|
||||||
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
||||||
MOV(16, R(EAX), MDisp(R11,reg*2));
|
MOV(16, R(AX), MDisp(R11,reg*2));
|
||||||
MOV(16, R(EDX), MDisp(R11,(DSP_REG_WR0 + reg)*2));
|
MOV(16, R(DX), MDisp(R11,(DSP_REG_WR0 + reg)*2));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// ToMask(WR0), calculating it into EDI
|
MOV(16,R(DI), R(AX));
|
||||||
MOV(16, R(EDI), R(EDX));
|
ADD(16,R(AX), Imm16(1));
|
||||||
ToMask(EDI);
|
XOR(16,R(DI), R(AX));
|
||||||
|
MOV(16,R(SI), R(DX));
|
||||||
|
|
||||||
dsp_increment_one(EAX, EDX, EDI);
|
SHL(16,R(SI), Imm8(1));
|
||||||
|
OR(16,R(SI), Imm16(3));
|
||||||
|
CMP(16,R(DI), R(SI));
|
||||||
|
FixupBranch nowrap = J_CC(CC_L);
|
||||||
|
|
||||||
|
SUB(16,R(AX), R(DX));
|
||||||
|
SUB(16,R(AX), Imm16(1));
|
||||||
|
|
||||||
|
SetJumpTarget(nowrap);
|
||||||
|
|
||||||
// g_dsp.r[reg] = tmp;
|
// g_dsp.r[reg] = tmp;
|
||||||
#ifdef _M_IX86 // All32
|
#ifdef _M_IX86 // All32
|
||||||
MOV(16, M(&g_dsp.r[reg]), R(EAX));
|
MOV(16, M(&g_dsp.r[reg]), R(AX));
|
||||||
#else
|
#else
|
||||||
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
||||||
MOV(16, MDisp(R11,reg*2), R(EAX));
|
MOV(16, MDisp(R11,reg*2), R(AX));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// See http://code.google.com/p/dolphin-emu/source/detail?r=3125
|
|
||||||
void DSPEmitter::dsp_decrement_one(X64Reg ar, X64Reg wr, X64Reg wr_pow, X64Reg temp_reg)
|
|
||||||
{
|
|
||||||
// compute min from wr_pow and ar
|
|
||||||
// min = (tmb+1-ar)&tmb;
|
|
||||||
LEA(16, temp_reg, MDisp(wr_pow, 1));
|
|
||||||
SUB(16, R(temp_reg), R(ar));
|
|
||||||
AND(16, R(temp_reg), R(wr_pow));
|
|
||||||
|
|
||||||
// wr < min
|
|
||||||
CMP(16, R(wr), R(temp_reg));
|
|
||||||
FixupBranch wr_lt_min = J_CC(CC_B);
|
|
||||||
// !min
|
|
||||||
TEST(16, R(temp_reg), R(temp_reg));
|
|
||||||
FixupBranch min_zero = J_CC(CC_Z);
|
|
||||||
|
|
||||||
// ar--;
|
|
||||||
SUB(16, R(ar), Imm16(1));
|
|
||||||
FixupBranch end = J();
|
|
||||||
|
|
||||||
// ar += wr;
|
|
||||||
SetJumpTarget(wr_lt_min);
|
|
||||||
SetJumpTarget(min_zero);
|
|
||||||
ADD(16, R(ar), R(wr));
|
|
||||||
|
|
||||||
SetJumpTarget(end);
|
|
||||||
}
|
|
||||||
|
|
||||||
// EAX = g_dsp.r[reg]
|
// EAX = g_dsp.r[reg]
|
||||||
// EDX = g_dsp.r[DSP_REG_WR0 + reg]
|
// EDX = g_dsp.r[DSP_REG_WR0 + reg]
|
||||||
|
//clobbers RCX
|
||||||
void DSPEmitter::decrement_addr_reg(int reg)
|
void DSPEmitter::decrement_addr_reg(int reg)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
u16 ar = g_dsp.r[reg];
|
||||||
|
u16 wr = g_dsp.r[reg+8];
|
||||||
|
u16 m = ToMask(wr) | 1;
|
||||||
|
u16 nar = ar-1;
|
||||||
|
if((ar&m) - 1 < m-wr)
|
||||||
|
nar += wr+1;
|
||||||
|
return nar;
|
||||||
|
*/
|
||||||
|
|
||||||
// s16 ar = g_dsp.r[reg];
|
// s16 ar = g_dsp.r[reg];
|
||||||
#ifdef _M_IX86 // All32
|
#ifdef _M_IX86 // All32
|
||||||
MOV(16, R(EAX), M(&g_dsp.r[reg]));
|
MOV(16, R(AX), M(&g_dsp.r[reg]));
|
||||||
MOV(16, R(EDX), M(&g_dsp.r[DSP_REG_WR0 + reg]));
|
MOVZX(32, 16, EDX, M(&g_dsp.r[DSP_REG_WR0 + reg]));
|
||||||
#else
|
#else
|
||||||
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
||||||
MOV(16, R(EAX), MDisp(R11,reg*2));
|
MOV(16, R(AX), MDisp(R11,reg*2));
|
||||||
MOV(16, R(EDX), MDisp(R11,(DSP_REG_WR0 + reg)*2));
|
MOVZX(32, 16, EDX, MDisp(R11,(DSP_REG_WR0 + reg)*2));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// ToMask(WR0), calculating it into EDI
|
// ToMask(WR0), calculating it into EDI
|
||||||
MOV(16, R(EDI), R(EDX));
|
//u16 m = ToMask(wr) | 1;
|
||||||
ToMask(EDI);
|
MOV(16, R(DI), R(DX));
|
||||||
|
ToMask(DI);
|
||||||
|
|
||||||
dsp_decrement_one(EAX, EDX, EDI);
|
//u16 nar = ar-1;
|
||||||
|
MOV(16, R(CX), R(AX));
|
||||||
|
SUB(16, R(AX), Imm16(1));
|
||||||
|
|
||||||
|
//(ar&m) - 1
|
||||||
|
AND(32, R(ECX), R(EDI));
|
||||||
|
SUB(32, R(ECX), Imm32(1));
|
||||||
|
|
||||||
|
//m-wr
|
||||||
|
SUB(32, R(EDI), R(EDX));
|
||||||
|
CMP(32, R(ECX), R(EDI));
|
||||||
|
FixupBranch out1 = J_CC(CC_GE);
|
||||||
|
ADD(16,R(AX),R(DX));
|
||||||
|
ADD(16,R(AX),Imm16(1));
|
||||||
|
|
||||||
|
SetJumpTarget(out1);
|
||||||
|
|
||||||
// g_dsp.r[reg] = tmp;
|
// g_dsp.r[reg] = tmp;
|
||||||
#ifdef _M_IX86 // All32
|
#ifdef _M_IX86 // All32
|
||||||
MOV(16, M(&g_dsp.r[reg]), R(EAX));
|
MOV(16, M(&g_dsp.r[reg]), R(AX));
|
||||||
#else
|
#else
|
||||||
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
||||||
MOV(16, MDisp(R11,reg*2), R(EAX));
|
MOV(16, MDisp(R11,reg*2), R(AX));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,71 +172,70 @@ void DSPEmitter::decrement_addr_reg(int reg)
|
||||||
// EDI = tomask(EDX)
|
// EDI = tomask(EDX)
|
||||||
void DSPEmitter::increase_addr_reg(int reg)
|
void DSPEmitter::increase_addr_reg(int reg)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
u16 ar = g_dsp.r[reg];
|
||||||
|
u16 wr = g_dsp.r[reg+8];
|
||||||
|
u16 ix = g_dsp.r[reg+4];
|
||||||
|
u16 m = ToMask(wr) | 1;
|
||||||
|
u16 nar = ar+ix;
|
||||||
|
if (ix >= 0) {
|
||||||
|
if((ar&m) + (ix&m) -(int)m-1 >= 0)
|
||||||
|
nar -= wr+1;
|
||||||
|
} else {
|
||||||
|
if((ar&m) + (ix&m) -(int)m-1 < m-wr)
|
||||||
|
nar += wr+1;
|
||||||
|
}
|
||||||
|
return nar;
|
||||||
|
*/
|
||||||
|
|
||||||
#ifdef _M_IX86 // All32
|
#ifdef _M_IX86 // All32
|
||||||
MOVZX(32, 16, ECX, M(&g_dsp.r[DSP_REG_IX0 + reg]));
|
MOV(16, R(SI), M(&g_dsp.r[DSP_REG_IX0 + reg]));
|
||||||
|
MOV(16, R(AX), M(&g_dsp.r[reg]));
|
||||||
|
MOVZX(32, 16, EDX, M(&g_dsp.r[DSP_REG_WR0 + reg]));
|
||||||
#else
|
#else
|
||||||
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
||||||
MOVZX(32, 16, ECX, MDisp(R11,(DSP_REG_IX0 + reg)*2));
|
MOV(16, R(SI), MDisp(R11,(DSP_REG_IX0 + reg)*2));
|
||||||
#endif
|
MOV(16, R(AX), MDisp(R11,reg*2));
|
||||||
// IX0 == 0, bail out
|
MOVZX(32, 16, EDX, MDisp(R11,(DSP_REG_WR0 + reg)*2));
|
||||||
|
|
||||||
TEST(16, R(ECX), R(ECX));
|
|
||||||
// code too long for a 5-byte jump
|
|
||||||
// TODO: optimize a bit, maybe merge loops?
|
|
||||||
FixupBranch end = J_CC(CC_Z, true);
|
|
||||||
|
|
||||||
#ifdef _M_IX86 // All32
|
|
||||||
MOV(16, R(EAX), M(&g_dsp.r[reg]));
|
|
||||||
MOV(16, R(EDX), M(&g_dsp.r[DSP_REG_WR0 + reg]));
|
|
||||||
#else
|
|
||||||
MOV(16, R(EAX), MDisp(R11,reg*2));
|
|
||||||
MOV(16, R(EDX), MDisp(R11,(DSP_REG_WR0 + reg)*2));
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// ToMask(WR0), calculating it into EDI
|
// ToMask(WR0), calculating it into EDI
|
||||||
MOV(16, R(EDI), R(EDX));
|
//u16 m = ToMask(wr) | 1;
|
||||||
ToMask(EDI);
|
MOV(16, R(DI), R(DX));
|
||||||
|
ToMask(DI);
|
||||||
|
|
||||||
// IX0 > 0
|
//u16 nar = ar+ix;
|
||||||
// TODO: ToMask flushes flags set by TEST,
|
MOV(16, R(CX), R(AX));
|
||||||
// needs another CMP here.
|
ADD(16, R(AX), R(SI));
|
||||||
CMP(16, R(ECX), Imm16(0));
|
|
||||||
FixupBranch negative = J_CC(CC_L);
|
|
||||||
|
|
||||||
JumpTarget loop_pos = GetCodePtr();
|
//(ar&m) + (ix&m) -(int)m-1
|
||||||
|
AND(32, R(ECX), R(EDI));
|
||||||
|
AND(32, R(ESI), R(EDI));
|
||||||
|
ADD(32, R(ECX), R(ESI));
|
||||||
|
SUB(32, R(ECX), R(EDI));
|
||||||
|
SUB(32, R(ECX), Imm32(1));
|
||||||
|
|
||||||
// dsp_increment
|
TEST(16,R(SI), Imm16(0x8000));
|
||||||
dsp_increment_one(EAX, EDX, EDI);
|
FixupBranch negative = J_CC(CC_NZ);
|
||||||
|
|
||||||
SUB(16, R(ECX), Imm16(1)); // value--
|
CMP(32, R(ECX), Imm32(0));
|
||||||
#ifdef _M_IX86 // All32
|
FixupBranch out1 = J_CC(CC_L);
|
||||||
CMP(16, M(&g_dsp.r[DSP_REG_IX0 + reg]), Imm16(127));
|
SUB(16,R(AX),R(DX));
|
||||||
#else
|
SUB(16,R(AX),Imm16(1));
|
||||||
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
FixupBranch out2 = J();
|
||||||
CMP(16, MDisp(R11,(DSP_REG_IX0 + reg)*2), Imm16(127));
|
|
||||||
#endif
|
|
||||||
FixupBranch dbg = J_CC(CC_NE);
|
|
||||||
CMP(16, R(ECX), Imm16(1));
|
|
||||||
FixupBranch dbg2 = J_CC(CC_NE);
|
|
||||||
INT3();
|
|
||||||
SetJumpTarget(dbg2);
|
|
||||||
SetJumpTarget(dbg);
|
|
||||||
CMP(16, R(ECX), Imm16(0)); // value > 0
|
|
||||||
J_CC(CC_G, loop_pos);
|
|
||||||
FixupBranch end_pos = J();
|
|
||||||
|
|
||||||
// else, IX0 < 0
|
|
||||||
SetJumpTarget(negative);
|
SetJumpTarget(negative);
|
||||||
JumpTarget loop_neg = GetCodePtr();
|
|
||||||
|
|
||||||
// dsp_decrement
|
//m-wr
|
||||||
dsp_decrement_one(EAX, EDX, EDI);
|
SUB(32, R(EDI), R(EDX));
|
||||||
|
CMP(32, R(ECX), R(EDI));
|
||||||
|
FixupBranch out3 = J_CC(CC_GE);
|
||||||
|
ADD(16,R(AX),R(DX));
|
||||||
|
ADD(16,R(AX),Imm16(1));
|
||||||
|
|
||||||
ADD(16, R(ECX), Imm16(1)); // value++
|
SetJumpTarget(out1);
|
||||||
CMP(16, R(ECX), Imm16(0)); // value < 0
|
SetJumpTarget(out2);
|
||||||
J_CC(CC_L, loop_neg);
|
SetJumpTarget(out3);
|
||||||
|
|
||||||
SetJumpTarget(end_pos);
|
|
||||||
|
|
||||||
// g_dsp.r[reg] = tmp;
|
// g_dsp.r[reg] = tmp;
|
||||||
#ifdef _M_IX86 // All32
|
#ifdef _M_IX86 // All32
|
||||||
|
@ -230,8 +244,6 @@ void DSPEmitter::increase_addr_reg(int reg)
|
||||||
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
||||||
MOV(16, MDisp(R11,reg*2), R(EAX));
|
MOV(16, MDisp(R11,reg*2), R(EAX));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
SetJumpTarget(end);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decrease addr register according to the correspond ix register
|
// Decrease addr register according to the correspond ix register
|
||||||
|
@ -241,58 +253,68 @@ void DSPEmitter::increase_addr_reg(int reg)
|
||||||
// EDI = tomask(EDX)
|
// EDI = tomask(EDX)
|
||||||
void DSPEmitter::decrease_addr_reg(int reg)
|
void DSPEmitter::decrease_addr_reg(int reg)
|
||||||
{
|
{
|
||||||
#ifdef _M_IX86 // All32
|
/*
|
||||||
MOV(16, R(ECX), M(&g_dsp.r[DSP_REG_IX0 + reg]));
|
u16 ar = g_dsp.r[reg];
|
||||||
#else
|
u16 wr = g_dsp.r[reg+8];
|
||||||
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
u16 ix = g_dsp.r[reg+4];
|
||||||
MOV(16, R(ECX), MDisp(R11,(DSP_REG_IX0 + reg)*2));
|
u16 m = ToMask(wr) | 1;
|
||||||
#endif
|
u16 nar = ar-ix; //!!
|
||||||
// IX0 == 0, bail out
|
if ((u16)ix > 0x8000) { // equiv: ix < 0 && ix != -0x8000 //!!
|
||||||
TEST(16, R(ECX), R(ECX));
|
if((ar&m) - (int)(ix&m) >= 0) //!!
|
||||||
// code too long for a 5-byte jump
|
nar -= wr+1;
|
||||||
// TODO: optimize a bit, maybe merge loops?
|
} else {
|
||||||
FixupBranch end = J_CC(CC_Z, true);
|
if((ar&m) - (int)(ix&m) < m-wr) //!!
|
||||||
|
nar += wr+1;
|
||||||
|
}
|
||||||
|
return nar;
|
||||||
|
*/
|
||||||
|
|
||||||
#ifdef _M_IX86 // All32
|
#ifdef _M_IX86 // All32
|
||||||
MOV(16, R(EAX), M(&g_dsp.r[reg]));
|
MOV(16, R(SI), M(&g_dsp.r[DSP_REG_IX0 + reg]));
|
||||||
MOV(16, R(EDX), M(&g_dsp.r[DSP_REG_WR0 + reg]));
|
MOV(16, R(AX), M(&g_dsp.r[reg]));
|
||||||
|
MOVZX(32, 16, EDX, M(&g_dsp.r[DSP_REG_WR0 + reg]));
|
||||||
#else
|
#else
|
||||||
MOV(16, R(EAX), MDisp(R11,reg*2));
|
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
||||||
MOV(16, R(EDX), MDisp(R11,(DSP_REG_WR0 + reg)*2));
|
MOV(16, R(SI), MDisp(R11,(DSP_REG_IX0 + reg)*2));
|
||||||
|
MOV(16, R(AX), MDisp(R11,reg*2));
|
||||||
|
MOVZX(32, 16, EDX, MDisp(R11,(DSP_REG_WR0 + reg)*2));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// ToMask(WR0), calculating it into EDI
|
// ToMask(WR0), calculating it into EDI
|
||||||
MOV(16, R(EDI), R(EDX));
|
//u16 m = ToMask(wr) | 1;
|
||||||
ToMask(EDI);
|
MOV(16, R(DI), R(DX));
|
||||||
|
ToMask(DI);
|
||||||
|
|
||||||
// IX0 > 0
|
//u16 nar = ar-ix;
|
||||||
// TODO: ToMask flushes flags set by TEST,
|
MOV(16, R(CX), R(AX));
|
||||||
// needs another CMP here.
|
SUB(16, R(AX), R(SI));
|
||||||
CMP(16, R(ECX), Imm16(0));
|
|
||||||
FixupBranch negative = J_CC(CC_L);
|
|
||||||
|
|
||||||
JumpTarget loop_pos = GetCodePtr();
|
//(ar&m) + (ix&m)
|
||||||
|
AND(32, R(ECX), R(EDI));
|
||||||
|
AND(32, R(ESI), R(EDI));
|
||||||
|
SUB(32, R(ECX), R(ESI));
|
||||||
|
|
||||||
// dsp_decrement
|
CMP(16,R(SI), Imm16(0x8000));
|
||||||
dsp_decrement_one(EAX, EDX, EDI);
|
FixupBranch negative = J_CC(CC_BE);
|
||||||
|
|
||||||
SUB(16, R(ECX), Imm16(1)); // value--
|
CMP(32, R(ECX), Imm32(0));
|
||||||
CMP(16, R(ECX), Imm16(0)); // value > 0
|
FixupBranch out1 = J_CC(CC_L);
|
||||||
J_CC(CC_G, loop_pos);
|
SUB(16,R(AX),R(DX));
|
||||||
FixupBranch end_pos = J();
|
SUB(16,R(AX),Imm16(1));
|
||||||
|
FixupBranch out2 = J();
|
||||||
|
|
||||||
// else, IX0 < 0
|
|
||||||
SetJumpTarget(negative);
|
SetJumpTarget(negative);
|
||||||
JumpTarget loop_neg = GetCodePtr();
|
|
||||||
|
|
||||||
// dsp_increment
|
//m-wr
|
||||||
dsp_increment_one(EAX, EDX, EDI);
|
SUB(32, R(EDI), R(EDX));
|
||||||
|
CMP(32, R(ECX), R(EDI));
|
||||||
|
FixupBranch out3 = J_CC(CC_GE);
|
||||||
|
ADD(16,R(AX),R(DX));
|
||||||
|
ADD(16,R(AX),Imm16(1));
|
||||||
|
|
||||||
ADD(16, R(ECX), Imm16(1)); // value++
|
SetJumpTarget(out1);
|
||||||
CMP(16, R(ECX), Imm16(0)); // value < 0
|
SetJumpTarget(out2);
|
||||||
J_CC(CC_L, loop_neg);
|
SetJumpTarget(out3);
|
||||||
|
|
||||||
SetJumpTarget(end_pos);
|
|
||||||
|
|
||||||
// g_dsp.r[reg] = tmp;
|
// g_dsp.r[reg] = tmp;
|
||||||
#ifdef _M_IX86 // All32
|
#ifdef _M_IX86 // All32
|
||||||
|
@ -301,8 +323,6 @@ void DSPEmitter::decrease_addr_reg(int reg)
|
||||||
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
||||||
MOV(16, MDisp(R11,reg*2), R(EAX));
|
MOV(16, MDisp(R11,reg*2), R(EAX));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
SetJumpTarget(end);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue