dsp: fix SHIFTER and ACC shift values. Simplify
This commit is contained in:
parent
713705a6fe
commit
a50ed2cc7d
|
@ -95,6 +95,7 @@ void WriteReg(u32 addr,u32 data)
|
|||
dsp_writenmem(addr);
|
||||
dsp_writenmem(addr+1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (sz==1)
|
||||
WriteAicaReg<1>(addr,data);
|
||||
|
|
|
@ -99,7 +99,7 @@ public:
|
|||
Mov(ADRS_REG, 0);
|
||||
Ldr(MDEC_CT, dsp_operand(&DSP->regs.MDEC_CT));
|
||||
|
||||
#ifndef __ANDROID__
|
||||
#if 0
|
||||
Instruction* instr_cur = GetBuffer()->GetEndAddress<Instruction*>();
|
||||
DEBUG_LOG(AICA_ARM, "DSP PROLOGUE");
|
||||
Disassemble(instr_start, instr_cur);
|
||||
|
@ -115,7 +115,6 @@ public:
|
|||
if (op.XSEL || op.YRL || (op.ADRL && op.SHIFT != 3))
|
||||
{
|
||||
verify(op.IRA < 0x38);
|
||||
bool sign_extend = true;
|
||||
if (op.IRA <= 0x1f)
|
||||
//INPUTS = DSP->MEMS[op.IRA];
|
||||
Ldr(INPUTS, dsp_operand(DSP->MEMS, op.IRA));
|
||||
|
@ -134,12 +133,7 @@ public:
|
|||
else
|
||||
{
|
||||
Mov(INPUTS, 0);
|
||||
sign_extend = false;
|
||||
}
|
||||
|
||||
// sign extend 24 bits
|
||||
if (sign_extend)
|
||||
Sbfiz(INPUTS, INPUTS, 0, 24);
|
||||
}
|
||||
|
||||
if (op.IWT)
|
||||
|
@ -165,8 +159,6 @@ public:
|
|||
Mov(w1, MDEC_CT);
|
||||
Bfc(w1, 7, 25);
|
||||
Ldr(B, dsp_operand(DSP->TEMP, x1));
|
||||
// sign extend 24 bits
|
||||
Sbfiz(B, B, 0, 24);
|
||||
}
|
||||
if (op.NEGB)
|
||||
//B = 0 - B;
|
||||
|
@ -190,8 +182,6 @@ public:
|
|||
Mov(w1, MDEC_CT);
|
||||
Bfc(w1, 7, 25);
|
||||
Ldr(X, dsp_operand(DSP->TEMP, x1));
|
||||
// sign extend 24 bits
|
||||
Sbfiz(X, X, 0, 24);
|
||||
}
|
||||
|
||||
// Y
|
||||
|
@ -199,9 +189,6 @@ public:
|
|||
{
|
||||
//Y = FRC_REG;
|
||||
Mov(Y, FRC_REG);
|
||||
//Y <<= 19;
|
||||
//Y >>= 19;
|
||||
// FRC_REG has 13 bits
|
||||
}
|
||||
else if (op.YSEL == 1)
|
||||
{
|
||||
|
@ -210,8 +197,8 @@ public:
|
|||
Sbfx(Y, Y, 3, 13);
|
||||
}
|
||||
else if (op.YSEL == 2)
|
||||
//Y = (Y_REG >> 11) & 0x1FFF;
|
||||
Sbfx(Y, Y_REG, 11, 13);
|
||||
//Y = Y_REG >> 11;
|
||||
Asr(Y, Y_REG, 11);
|
||||
else if (op.YSEL == 3)
|
||||
//Y = (Y_REG >> 4) & 0x0FFF;
|
||||
Sbfx(Y, Y_REG, 4, 12);
|
||||
|
@ -226,21 +213,19 @@ public:
|
|||
// There's a 1-step delay at the output of the X*Y + B adder. So we use the ACC value from the previous step.
|
||||
if (op.SHIFT == 0)
|
||||
{
|
||||
//SHIFTED = ACC >> 2; // 26 bits -> 24 bits
|
||||
Asr(SHIFTED, ACC, 2);
|
||||
// SHIFTED = clamp(SHIFTED, -0x80000, 0x7FFFF)
|
||||
// SHIFTED = clamp(ACC, -0x80000, 0x7FFFF)
|
||||
Mov(w0, 0x80000);
|
||||
Neg(w1, w0);
|
||||
Cmp(SHIFTED, w1);
|
||||
Csel(SHIFTED, w1, SHIFTED, lt);
|
||||
Cmp(ACC, w1);
|
||||
Csel(SHIFTED, w1, ACC, lt);
|
||||
Sub(w0, w0, 1);
|
||||
Cmp(SHIFTED, w0);
|
||||
Csel(SHIFTED, w0, SHIFTED, gt);
|
||||
}
|
||||
else if (op.SHIFT == 1)
|
||||
{
|
||||
//SHIFTED = ACC >> 1; // 26 bits -> 24 bits and x2 scale
|
||||
Asr(SHIFTED, ACC, 1);
|
||||
//SHIFTED = ACC << 1; // x2 scale
|
||||
Lsl(SHIFTED, ACC, 1);
|
||||
// SHIFTED = clamp(SHIFTED, -0x80000, 0x7FFFF)
|
||||
Mov(w0, 0x80000);
|
||||
Neg(w1, w0);
|
||||
|
@ -252,40 +237,28 @@ public:
|
|||
}
|
||||
else if (op.SHIFT == 2)
|
||||
{
|
||||
//SHIFTED = ACC >> 1;
|
||||
Asr(SHIFTED, ACC, 1);
|
||||
|
||||
// sign extend 24 bits
|
||||
Sbfiz(SHIFTED, SHIFTED, 0, 24);
|
||||
//SHIFTED = ACC << 1; // x2 scale
|
||||
Lsl(SHIFTED, ACC, 1);
|
||||
}
|
||||
else if (op.SHIFT == 3)
|
||||
{
|
||||
//SHIFTED = ACC >> 2;
|
||||
Asr(SHIFTED, ACC, 2);
|
||||
// sign extend 24 bits
|
||||
Sbfiz(SHIFTED, SHIFTED, 0, 24);
|
||||
//SHIFTED = ACC;
|
||||
Mov(SHIFTED, ACC);
|
||||
}
|
||||
}
|
||||
|
||||
// ACCUM
|
||||
//s64 v = ((s64)X * (s64)Y) >> 10; // magic value from dynarec. 1 sign bit + 24-1 bits + 13-1 bits -> 26 bits?
|
||||
//ACC = (((s64)X * (s64)Y) >> 12) + B;
|
||||
const Register& X64 = Register::GetXRegFromCode(X_alias->GetCode());
|
||||
const Register& Y64 = Register::GetXRegFromCode(Y.GetCode());
|
||||
Sxtw(X64, *X_alias);
|
||||
Sxtw(Y64, Y);
|
||||
Mul(x0, X64, Y64);
|
||||
Asr(x0, x0, 10);
|
||||
// sign extend 26 bits
|
||||
Asr(x0, x0, 12);
|
||||
if (op.ZERO)
|
||||
Sbfiz(ACC, w0, 0, 26);
|
||||
Mov(ACC, w0);
|
||||
else
|
||||
{
|
||||
Sbfiz(w0, w0, 0, 26);
|
||||
//ACC = v + B;
|
||||
Add(ACC, w0, B);
|
||||
// sign extend 26 bits
|
||||
Sbfiz(ACC, ACC, 0, 26);
|
||||
}
|
||||
|
||||
if (op.TWT)
|
||||
{
|
||||
|
@ -304,8 +277,8 @@ public:
|
|||
//FRC_REG = SHIFTED & 0x0FFF;
|
||||
Ubfx(FRC_REG, SHIFTED, 0, 12);
|
||||
else
|
||||
//FRC_REG = (SHIFTED >> 11) & 0x1FFF;
|
||||
Ubfx(FRC_REG, SHIFTED, 11, 13);
|
||||
//FRC_REG = SHIFTED >> 11;
|
||||
Asr(FRC_REG, SHIFTED, 11);
|
||||
}
|
||||
|
||||
if (step & 1)
|
||||
|
@ -339,24 +312,21 @@ public:
|
|||
if (op.ADRL)
|
||||
{
|
||||
if (op.SHIFT == 3)
|
||||
//ADRS_REG = (SHIFTED >> 12) & 0xFFF;
|
||||
Ubfx(ADRS_REG, SHIFTED, 12, 12);
|
||||
//ADRS_REG = SHIFTED >> 12;
|
||||
Asr(ADRS_REG, SHIFTED, 12);
|
||||
else
|
||||
//ADRS_REG = (INPUTS >> 16);
|
||||
Ubfx(ADRS_REG, INPUTS, 16, 16);
|
||||
//ADRS_REG = INPUTS >> 16;
|
||||
Asr(ADRS_REG, INPUTS, 16);
|
||||
}
|
||||
|
||||
if (op.EWT)
|
||||
{
|
||||
// 4 ????
|
||||
//DSPData->EFREG[op.EWA] += SHIFTED >> 4; // x86 dynarec uses = instead of +=
|
||||
//DSPData->EFREG[op.EWA] = SHIFTED >> 4;
|
||||
MemOperand mem_operand = dspdata_operand(DSPData->EFREG, op.EWA);
|
||||
Ldr(w1, mem_operand);
|
||||
Asr(w2, SHIFTED, 4);
|
||||
Add(w1, w1, w2);
|
||||
Asr(w1, SHIFTED, 4);
|
||||
Str(w1, mem_operand);
|
||||
}
|
||||
#ifndef __ANDROID__
|
||||
#if 0
|
||||
instr_cur = GetBuffer()->GetEndAddress<Instruction*>();
|
||||
DEBUG_LOG(AICA_ARM, "DSP STEP %d: %04x %04x %04x %04x", step, mpro[0], mpro[1], mpro[2], mpro[3]);
|
||||
Disassemble(instr_start, instr_cur);
|
||||
|
@ -378,7 +348,7 @@ public:
|
|||
Ldp(x19, x20, MemOperand(sp, 80));
|
||||
Ldp(x29, x30, MemOperand(sp, 96, PostIndex));
|
||||
Ret();
|
||||
#ifndef __ANDROID__
|
||||
#if 0
|
||||
instr_cur = GetBuffer()->GetEndAddress<Instruction*>();
|
||||
DEBUG_LOG(AICA_ARM, "DSP EPILOGUE");
|
||||
Disassemble(instr_start, instr_cur);
|
||||
|
@ -534,9 +504,7 @@ void dsp_step()
|
|||
dsp_recompile();
|
||||
}
|
||||
|
||||
#ifdef __ANDROID__
|
||||
((void (*)())&dsp.DynCode)();
|
||||
#endif
|
||||
}
|
||||
|
||||
void dsp_writenmem(u32 addr)
|
||||
|
|
|
@ -122,9 +122,6 @@ void AICADSP_Step(struct dsp_t *DSP)
|
|||
else
|
||||
INPUTS = 0;
|
||||
|
||||
INPUTS <<= 8;
|
||||
INPUTS >>= 8;
|
||||
|
||||
if (IWT)
|
||||
{
|
||||
u32 IWA = (IPtr[1] >> 1) & 0x1F;
|
||||
|
@ -141,34 +138,28 @@ void AICADSP_Step(struct dsp_t *DSP)
|
|||
if (BSEL)
|
||||
B = ACC;
|
||||
else
|
||||
{
|
||||
B = DSP->TEMP[(TRA + DSP->regs.MDEC_CT) & 0x7F];
|
||||
B <<= 8; //Sign extend
|
||||
B >>= 8;
|
||||
}
|
||||
if (NEGB)
|
||||
B = 0 - B;
|
||||
B = -B;
|
||||
}
|
||||
else
|
||||
{
|
||||
B = 0;
|
||||
}
|
||||
|
||||
// X
|
||||
if (XSEL)
|
||||
X = INPUTS;
|
||||
else
|
||||
{
|
||||
X = DSP->TEMP[(TRA + DSP->regs.MDEC_CT) & 0x7F];
|
||||
X <<= 8;
|
||||
X >>= 8;
|
||||
}
|
||||
|
||||
// Y
|
||||
if (YSEL == 0)
|
||||
Y = FRC_REG;
|
||||
else if (YSEL == 1)
|
||||
Y = DSPData->COEF[COEF] >> 3; //COEF is 16 bits
|
||||
Y = ((s32)(s16)DSPData->COEF[COEF]) >> 3; //COEF is 16 bits
|
||||
else if (YSEL == 2)
|
||||
Y = (Y_REG >> 11) & 0x1FFF;
|
||||
Y = Y_REG >> 11;
|
||||
else if (YSEL == 3)
|
||||
Y = (Y_REG >> 4) & 0x0FFF;
|
||||
|
||||
|
@ -179,43 +170,31 @@ void AICADSP_Step(struct dsp_t *DSP)
|
|||
// There's a 1-step delay at the output of the X*Y + B adder. So we use the ACC value from the previous step.
|
||||
if (SHIFT == 0)
|
||||
{
|
||||
SHIFTED = ACC >> 2; // 26 bits -> 24 bits
|
||||
if (SHIFTED > 0x0007FFFF)
|
||||
SHIFTED = 0x0007FFFF;
|
||||
if (SHIFTED < (-0x00080000))
|
||||
SHIFTED = -0x00080000;
|
||||
SHIFTED = ACC;
|
||||
if (SHIFTED > 0x007FFFFF)
|
||||
SHIFTED = 0x007FFFFF;
|
||||
if (SHIFTED < (-0x00800000))
|
||||
SHIFTED = -0x00800000;
|
||||
}
|
||||
else if (SHIFT == 1)
|
||||
{
|
||||
SHIFTED = ACC >> 1; // 26 bits -> 24 bits and x2 scale
|
||||
if (SHIFTED > 0x0007FFFF)
|
||||
SHIFTED = 0x0007FFFF;
|
||||
if (SHIFTED < (-0x00080000))
|
||||
SHIFTED = -0x00080000;
|
||||
SHIFTED = ACC << 1; // x2 scale
|
||||
if (SHIFTED > 0x007FFFFF)
|
||||
SHIFTED = 0x007FFFFF;
|
||||
if (SHIFTED < (-0x00800000))
|
||||
SHIFTED = -0x00800000;
|
||||
}
|
||||
else if (SHIFT == 2)
|
||||
{
|
||||
SHIFTED = ACC >> 1;
|
||||
SHIFTED <<= 8;
|
||||
SHIFTED >>= 8;
|
||||
SHIFTED = ACC << 1; // x2 scale
|
||||
}
|
||||
else if (SHIFT == 3)
|
||||
{
|
||||
SHIFTED = ACC >> 2;
|
||||
SHIFTED <<= 8;
|
||||
SHIFTED >>= 8;
|
||||
SHIFTED = ACC;
|
||||
}
|
||||
|
||||
// ACCUM
|
||||
Y <<= 19;
|
||||
Y >>= 19;
|
||||
|
||||
s64 v = ((s64)X * (s64)Y) >> 10; // magic value from dynarec. 1 sign bit + 24-1 bits + 13-1 bits -> 26 bits?
|
||||
v <<= 6; // 26 bits only
|
||||
v >>= 6;
|
||||
ACC = v + B;
|
||||
ACC <<= 6; // 26 bits only
|
||||
ACC >>= 6;
|
||||
ACC = (((s64)X * (s64)Y) >> 12) + B;
|
||||
|
||||
if (TWT)
|
||||
{
|
||||
|
@ -228,7 +207,7 @@ void AICADSP_Step(struct dsp_t *DSP)
|
|||
if (SHIFT == 3)
|
||||
FRC_REG = SHIFTED & 0x0FFF;
|
||||
else
|
||||
FRC_REG = (SHIFTED >> 11) & 0x1FFF;
|
||||
FRC_REG = SHIFTED >> 11;
|
||||
}
|
||||
|
||||
if (step & 1)
|
||||
|
@ -282,16 +261,15 @@ void AICADSP_Step(struct dsp_t *DSP)
|
|||
if (ADRL)
|
||||
{
|
||||
if (SHIFT == 3)
|
||||
ADRS_REG = (SHIFTED >> 12) & 0xFFF;
|
||||
ADRS_REG = SHIFTED >> 12;
|
||||
else
|
||||
ADRS_REG = (INPUTS >> 16);
|
||||
ADRS_REG = INPUTS >> 16;
|
||||
}
|
||||
|
||||
if (EWT)
|
||||
{
|
||||
u32 EWA = (IPtr[2] >> 8) & 0x0F;
|
||||
// 4 ????
|
||||
DSPData->EFREG[EWA] += SHIFTED >> 4; // dynarec uses = instead of +=
|
||||
DSPData->EFREG[EWA] = SHIFTED >> 4;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue