844 lines
19 KiB
C++
844 lines
19 KiB
C++
#include "dsp.h"
|
||
#include "aica_mem.h"
|
||
#include "hw/aica/aica_if.h"
|
||
#include "oslib/oslib.h"
|
||
|
||
/*
|
||
DSP rec_v1
|
||
|
||
Tries to emulate a guesstimation of the aica dsp, by directly emitting x86 opcodes.
|
||
|
||
This was my first dsp implementation, as implemented for nullDC 1.0.3.
|
||
|
||
This was derived from a schematic I drew for the dsp, based on
|
||
liberal interpretation of known specs, the saturn dsp, digital
|
||
electronics assumptions, as well as "best-fitted" my typical
|
||
test game suite.
|
||
|
||
|
||
Initiall code by skmp, now part of the reicast project.
|
||
See LICENSE & COPYRIGHT files further details
|
||
*/
|
||
|
||
DECL_ALIGN(4096) dsp_t dsp;
|
||
|
||
//float format is ?
|
||
u16 DYNACALL PACK(s32 val)
|
||
{
|
||
u32 temp;
|
||
int sign,exponent,k;
|
||
|
||
sign = (val >> 23) & 0x1;
|
||
temp = (val ^ (val << 1)) & 0xFFFFFF;
|
||
exponent = 0;
|
||
for (k=0; k<12; k++)
|
||
{
|
||
if (temp & 0x800000)
|
||
break;
|
||
temp <<= 1;
|
||
exponent += 1;
|
||
}
|
||
if (exponent < 12)
|
||
val = (val << exponent) & 0x3FFFFF;
|
||
else
|
||
val <<= 11;
|
||
val >>= 11;
|
||
val |= sign << 15;
|
||
val |= exponent << 11;
|
||
|
||
return (u16)val;
|
||
}
|
||
|
||
s32 DYNACALL UNPACK(u16 val)
|
||
{
|
||
int sign,exponent,mantissa;
|
||
s32 uval;
|
||
|
||
sign = (val >> 15) & 0x1;
|
||
exponent = (val >> 11) & 0xF;
|
||
mantissa = val & 0x7FF;
|
||
uval = mantissa << 11;
|
||
if (exponent > 11)
|
||
exponent = 11;
|
||
else
|
||
uval |= (sign ^ 1) << 22;
|
||
uval |= sign << 23;
|
||
uval <<= 8;
|
||
uval >>= 8;
|
||
uval >>= exponent;
|
||
|
||
return uval;
|
||
}
|
||
|
||
void DecodeInst(u32 *IPtr,_INST *i)
|
||
{
|
||
i->TRA=(IPtr[0]>>9)&0x7F;
|
||
i->TWT=(IPtr[0]>>8)&0x01;
|
||
i->TWA=(IPtr[0]>>1)&0x7F;
|
||
|
||
i->XSEL=(IPtr[1]>>15)&0x01;
|
||
i->YSEL=(IPtr[1]>>13)&0x03;
|
||
i->IRA=(IPtr[1]>>7)&0x3F;
|
||
i->IWT=(IPtr[1]>>6)&0x01;
|
||
i->IWA=(IPtr[1]>>1)&0x1F;
|
||
|
||
i->TABLE=(IPtr[2]>>15)&0x01;
|
||
i->MWT=(IPtr[2]>>14)&0x01;
|
||
i->MRD=(IPtr[2]>>13)&0x01;
|
||
i->EWT=(IPtr[2]>>12)&0x01;
|
||
i->EWA=(IPtr[2]>>8)&0x0F;
|
||
i->ADRL=(IPtr[2]>>7)&0x01;
|
||
i->FRCL=(IPtr[2]>>6)&0x01;
|
||
i->SHIFT=(IPtr[2]>>4)&0x03;
|
||
i->YRL=(IPtr[2]>>3)&0x01;
|
||
i->NEGB=(IPtr[2]>>2)&0x01;
|
||
i->ZERO=(IPtr[2]>>1)&0x01;
|
||
i->BSEL=(IPtr[2]>>0)&0x01;
|
||
|
||
i->NOFL=(IPtr[3]>>15)&1; //????
|
||
//i->COEF=(IPtr[3]>>9)&0x3f;
|
||
|
||
i->MASA=(IPtr[3]>>9)&0x3f; //???
|
||
i->ADREB=(IPtr[3]>>8)&0x1;
|
||
i->NXADR=(IPtr[3]>>7)&0x1;
|
||
}
|
||
|
||
#if HOST_CPU == CPU_X86 && FEAT_DSPREC == DYNAREC_JIT
|
||
#include "emitter/x86_emitter.h"
|
||
|
||
const bool SUPPORT_NOFL=false;
|
||
|
||
|
||
#define assert verify
|
||
|
||
#pragma warning(disable:4311)
|
||
|
||
#define DYNBUF 0x10000
|
||
/*
|
||
//#define USEFLOATPACK
|
||
//pack s24 to s1e4s11
|
||
naked u16 packasm(s32 val)
|
||
{
|
||
__asm
|
||
{
|
||
mov edx,ecx; //eax will be sign
|
||
and edx,0x80000; //get the sign
|
||
|
||
jz poz;
|
||
neg ecx;
|
||
|
||
poz:
|
||
bsr eax,ecx;
|
||
jz _zero;
|
||
|
||
//24 -> 11
|
||
//13 -> 0
|
||
//12..0 -> 0
|
||
sub eax,11;
|
||
cmovs eax,0; //if <0 -> 0
|
||
|
||
shr ecx,eax; //shift out mantissa as needed (yeah i know, no rounding here and all .. )
|
||
|
||
shr eax,12; //[14:12] is exp
|
||
or edx,ecx; //merge [15] | [11:0]
|
||
or eax,edx; //merge [14:12] | ([15] | [11:0]), result on eax
|
||
ret;
|
||
|
||
_zero:
|
||
xor eax,eax;
|
||
ret;
|
||
}
|
||
}
|
||
//ONLY lower 16 bits are valid, rest are ignored but do movzx to avoid partial stalls :)
|
||
naked s32 unpackasm(u32 val)
|
||
{
|
||
__asm
|
||
{
|
||
mov eax,ecx; //get mantissa bits
|
||
and ecx,0x7FF; //
|
||
|
||
shl eax,11; //get shift factor (shift)
|
||
mov edx,eax; //keep a copy for the sign
|
||
and eax,0xF; //get shift factor (mask)
|
||
|
||
shl ecx,eax; //shift mantissa to normal position
|
||
|
||
test edx,0x10; //signed ?
|
||
jnz _negme;
|
||
|
||
ret; //nop, return as is
|
||
|
||
_negme:
|
||
//yep, negate and return
|
||
neg eax;
|
||
ret;
|
||
|
||
}
|
||
}*/
|
||
|
||
void dsp_init()
|
||
{
|
||
memset(&dsp,0,sizeof(dsp));
|
||
memset(DSPData,0,sizeof(*DSPData));
|
||
|
||
dsp.dyndirty=true;
|
||
dsp.RBL=0x2000-1;
|
||
dsp.RBP=0;
|
||
dsp.regs.MDEC_CT=1;
|
||
|
||
|
||
//os_MakeExecutable(dsp.DynCode,sizeof(dsp.DynCode));
|
||
#if HOST_OS == OS_WINDOWS
|
||
DWORD old;
|
||
VirtualProtect(dsp.DynCode, sizeof(dsp.DynCode), PAGE_EXECUTE_READWRITE, &old);
|
||
#endif
|
||
|
||
}
|
||
void dsp_recompile();
|
||
|
||
void* dyna_realloc(void*ptr,u32 oldsize,u32 newsize)
|
||
{
|
||
return dsp.DynCode;
|
||
}
|
||
void _dsp_debug_step_start()
|
||
{
|
||
memset(&dsp.regs_init,0,sizeof(dsp.regs_init));
|
||
}
|
||
void _dsp_debug_step_end()
|
||
{
|
||
verify(dsp.regs_init.MAD_OUT);
|
||
verify(dsp.regs_init.MEM_ADDR);
|
||
verify(dsp.regs_init.MEM_RD_DATA);
|
||
verify(dsp.regs_init.MEM_WT_DATA);
|
||
verify(dsp.regs_init.FRC_REG);
|
||
verify(dsp.regs_init.ADRS_REG);
|
||
verify(dsp.regs_init.Y_REG);
|
||
|
||
//verify(dsp.regs_init.MDEC_CT); // -> its done on C
|
||
verify(dsp.regs_init.MWT_1);
|
||
verify(dsp.regs_init.MRD_1);
|
||
// verify(dsp.regs_init.MADRS); //THAT WAS not real, MEM_ADDR is the deal ;p
|
||
verify(dsp.regs_init.MEMS);
|
||
verify(dsp.regs_init.NOFL_1);
|
||
verify(dsp.regs_init.NOFL_2);
|
||
verify(dsp.regs_init.TEMPS);
|
||
verify(dsp.regs_init.EFREG);
|
||
}
|
||
#define nwtn(x) verify(!dsp.regs_init.x)
|
||
#define wtn(x) nwtn(x);dsp.regs_init.x=true;
|
||
|
||
//sign extend to 32 bits
|
||
void dsp_rec_se(x86_block& x86e,x86_gpr_reg reg,u32 src_sz,u32 dst_sz=0xFF)
|
||
{
|
||
if (dst_sz==0xFF)
|
||
dst_sz=src_sz;
|
||
//24 -> 32 (pad to 32 bits)
|
||
x86e.Emit(op_shl32,reg,32-src_sz);
|
||
//32 -> 24 (MSB propagation)
|
||
x86e.Emit(op_sar32,reg,32-dst_sz);
|
||
}
|
||
//Reads : MWT_1,MRD_1,MEM_ADDR
|
||
//Writes : Wire MEM_RD_DATA_NV
|
||
void dsp_rec_DRAM_CI(x86_block& x86e,_INST& prev_op,u32 step,x86_gpr_reg MEM_RD_DATA_NV)
|
||
{
|
||
nwtn(MWT_1);
|
||
nwtn(MRD_1);
|
||
nwtn(MEM_ADDR);
|
||
nwtn(MEM_WT_DATA);
|
||
|
||
//Request : step x (odd step)
|
||
//Operation : x+1 (even step)
|
||
//Data avail : x+2 (odd step, can request again)
|
||
if (!(step&1))
|
||
{
|
||
//Get and mask ram address :)
|
||
x86e.Emit(op_mov32,EAX,&dsp.regs.MEM_ADDR);
|
||
x86e.Emit(op_and32,EAX,AICA_RAM_MASK);
|
||
|
||
x86e.Emit(op_add32,EAX,(unat)aica_ram.data);
|
||
|
||
//prev. opcode did a mem read request ?
|
||
if (prev_op.MRD)
|
||
{
|
||
//Do the read [MEM_ADDRS] -> MEM_RD_DATA_NV
|
||
x86e.Emit(op_movsx16to32,MEM_RD_DATA_NV,x86_mrm(EAX));
|
||
}
|
||
//prev. opcode did a mem write request ?
|
||
if (prev_op.MWT)
|
||
{
|
||
//Do the write [MEM_ADDRS] <-MEM_WT_DATA
|
||
x86e.Emit(op_mov32,EDX,&dsp.regs.MEM_WT_DATA);
|
||
x86e.Emit(op_mov16,x86_mrm(EAX),EDX);
|
||
}
|
||
}
|
||
}
|
||
//Reads : ADRS_REG,MADRS,MDEC_CT
|
||
//Writes : MEM_ADDR
|
||
void dsp_rec_MEM_AGU(x86_block& x86e,_INST& op,u32 step)
|
||
{
|
||
nwtn(ADRS_REG);
|
||
nwtn(MEM_ADDR);
|
||
|
||
//These opcode fields are valid on odd steps (mem req. is only allowed then)
|
||
//MEM Request : step x
|
||
//Mem operation : step x+1 (address is available at this point)
|
||
if (step&1)
|
||
{
|
||
//Addrs is 16:1
|
||
x86e.Emit(op_mov32,EAX,&DSPData->MADRS[op.MASA]);
|
||
|
||
//Added if ADREB
|
||
if (op.ADREB)
|
||
x86e.Emit(op_add32,EAX,&dsp.regs.ADRS_REG);
|
||
|
||
//+1 if NXADR is set
|
||
if (op.NXADR)
|
||
x86e.Emit(op_add32,EAX,1);
|
||
|
||
//RBL warp around is here, according to docs, but that seems to cause _very_ bad results
|
||
// if (!op.TABLE)
|
||
// x86e.Emit(op_and32,EAX,dsp.RBL);
|
||
|
||
//MDEC_CT is added if !TABLE
|
||
if (!op.TABLE)
|
||
x86e.Emit(op_add32,EAX,&dsp.regs.MDEC_CT);
|
||
|
||
//RBL/RBP are constants for the program
|
||
//Apply RBL if !TABLE
|
||
//Else limit to 16 bit add
|
||
//*update* always limit to 16 bit add adter MDEC_CT ?
|
||
if (!op.TABLE)
|
||
x86e.Emit(op_and32,EAX,dsp.RBL);
|
||
else
|
||
x86e.Emit(op_and32,EAX,0xFFFF);
|
||
|
||
//Calculate the value !
|
||
//EAX*2 b/c it points to sample (16:1 of the address)
|
||
x86e.Emit(op_lea32,EDX,x86_mrm(EAX,sib_scale_2,x86_ptr::create(dsp.RBP)));
|
||
|
||
//Save the result to MEM_ADDR
|
||
x86e.Emit(op_mov32,&dsp.regs.MEM_ADDR,EDX);
|
||
}
|
||
wtn(MEM_ADDR);
|
||
}
|
||
//Reads : MEMS,MIXS,EXTS
|
||
//Writes : INPUTS (Wire)
|
||
void dsp_rec_INPUTS(x86_block& x86e,_INST& op,x86_gpr_reg INPUTS)
|
||
{
|
||
nwtn(MEMS);
|
||
|
||
//nwtn(MIXS); -> these are read only :)
|
||
//nwtn(EXTS);
|
||
|
||
//INPUTS is 24 bit, we convert everything to that
|
||
//Maby we dont need to convert, but just to sign extend ?
|
||
if(op.IRA<0x20)
|
||
{
|
||
x86e.Emit(op_mov32,INPUTS,&dsp.MEMS[op.IRA]);
|
||
dsp_rec_se(x86e,INPUTS,24);
|
||
}
|
||
else if(op.IRA<0x30)
|
||
{
|
||
x86e.Emit(op_mov32,INPUTS,&dsp.MIXS[op.IRA-0x20]);
|
||
dsp_rec_se(x86e,INPUTS,20,24);
|
||
}
|
||
else if(op.IRA<0x32)
|
||
{
|
||
x86e.Emit(op_mov32,ESI,&DSPData->EXTS[op.IRA-0x30]);
|
||
//x86e.Emit(op_shl32,INPUTS,8);
|
||
dsp_rec_se(x86e,INPUTS,16,24);
|
||
}
|
||
|
||
//Sign extend to 32 bits
|
||
//dsp_rec_se(x86e,INPUTS,24);
|
||
}
|
||
//Reads : MEM_RD_DATA,NO_FLT2
|
||
//Writes : MEMS
|
||
void dsp_rec_MEMS_WRITE(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg INPUTS)
|
||
{
|
||
nwtn(MEM_RD_DATA);
|
||
nwtn(NOFL_2);
|
||
|
||
//MEMS write reads from MEM_RD_DATA register (MEM_RD_DATA -> Converter -> MEMS).
|
||
//The converter's nofl flag has 2 steps delay (so that it can be set with the MRQ).
|
||
if (op.IWT)
|
||
{
|
||
x86e.Emit(op_movsx16to32,ECX,&dsp.regs.MEM_RD_DATA);
|
||
x86e.Emit(op_mov32,EAX,ECX);
|
||
|
||
//Pad and signed extend EAX
|
||
//x86e.Emit(op_shl32,EAX,16);
|
||
//x86e.Emit(op_sar32,EAX,8);
|
||
x86e.Emit(op_shl32,EAX,8);
|
||
|
||
if (SUPPORT_NOFL)
|
||
{
|
||
x86_Label* no_fl=x86e.CreateLabel(false,8);//no float conversions
|
||
|
||
//Do we have to convert ?
|
||
x86e.Emit(op_cmp32,&dsp.regs.NOFL_2,1);
|
||
x86e.Emit(op_je,no_fl);
|
||
{
|
||
//Convert !
|
||
x86e.Emit(op_call,x86_ptr_imm(UNPACK));
|
||
}
|
||
x86e.MarkLabel(no_fl);
|
||
}
|
||
x86e.Emit(op_mov32,&dsp.MEMS[op.IWA],EAX);
|
||
}
|
||
|
||
wtn(MEMS);
|
||
}
|
||
//Reads : MEM_RD_DATA_NV (Wire)
|
||
//Writes : MEM_RD_DATA
|
||
void dsp_rec_MEM_RD_DATA_WRITE(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg MEM_RD_DATA_NV)
|
||
{
|
||
//Request : step x (odd step)
|
||
//Operation : x+1 (even step)
|
||
//Data avail : x+2 (odd step, can request again)
|
||
//The MEM_RD_DATA_NV wire exists only on even steps
|
||
if (!(step&1))
|
||
{
|
||
x86e.Emit(op_mov32,&dsp.regs.MEM_RD_DATA,MEM_RD_DATA_NV);
|
||
}
|
||
|
||
wtn(MEM_RD_DATA);
|
||
}
|
||
|
||
x86_mrm_t dsp_reg_GenerateTempsAddrs(x86_block& x86e,u32 TEMPS_NUM,x86_gpr_reg TEMPSaddrsreg)
|
||
{
|
||
x86e.Emit(op_mov32,TEMPSaddrsreg,&dsp.regs.MDEC_CT);
|
||
x86e.Emit(op_add32,TEMPSaddrsreg,TEMPS_NUM);
|
||
x86e.Emit(op_and32,TEMPSaddrsreg,127);
|
||
return x86_mrm(ECX,sib_scale_4,dsp.TEMP);
|
||
}
|
||
//Reads : INPUTS,TEMP,FRC_REG,COEF,Y_REG
|
||
//Writes : MAD_OUT_NV (Wire)
|
||
void dsp_rec_MAD(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg INPUTS,x86_gpr_reg MAD_OUT_NV)
|
||
{
|
||
bool use_TEMP=op.XSEL==0 || (op.BSEL==0 && op.ZERO==0);
|
||
|
||
//TEMPS (if used) on ECX
|
||
const x86_gpr_reg TEMPS_reg=ECX;
|
||
if (use_TEMP)
|
||
{
|
||
//read temps
|
||
x86e.Emit(op_mov32,TEMPS_reg,dsp_reg_GenerateTempsAddrs(x86e,op.TRA,TEMPS_reg));
|
||
dsp_rec_se(x86e,TEMPS_reg,24);
|
||
}
|
||
|
||
x86_reg mul_x_input;
|
||
//X : 24 bits
|
||
if (op.XSEL==1)
|
||
{
|
||
//X=INPUTS
|
||
mul_x_input=INPUTS;
|
||
//x86e.Emit(op_mov32,EDX,INPUTS);
|
||
}
|
||
else
|
||
{
|
||
//X=TEMPS
|
||
mul_x_input=TEMPS_reg;
|
||
//x86e.Emit(op_mov32,EDX,TEMPS_reg);
|
||
}
|
||
|
||
//MUL Y in : EAX
|
||
//Y : 13 bits
|
||
switch(op.YSEL)
|
||
{
|
||
case 0:
|
||
//Y=FRC_REG[13]
|
||
x86e.Emit(op_mov32,EAX,&dsp.regs.FRC_REG);
|
||
dsp_rec_se(x86e,EAX,13);
|
||
break;
|
||
|
||
case 1:
|
||
//Y=COEF[13]
|
||
x86e.Emit(op_mov32,EAX,&DSPData->COEF[step]);
|
||
dsp_rec_se(x86e,EAX,16,13);
|
||
break;
|
||
|
||
case 2:
|
||
//Y=Y_REG[23:11] (Y_REG is 19 bits, INPUTS[23:4], so that is realy 19:7)
|
||
x86e.Emit(op_mov32,EAX,&dsp.regs.Y_REG);
|
||
dsp_rec_se(x86e,EAX,19,13);
|
||
break;
|
||
|
||
case 3:
|
||
//Y=0'Y_REG[15:4] (Y_REG is 19 bits, INPUTS[23:4], so that is realy 11:0)
|
||
x86e.Emit(op_mov32,EAX,&dsp.regs.Y_REG);
|
||
x86e.Emit(op_and32,0xFFF);//Clear bit 13+
|
||
break;
|
||
}
|
||
|
||
//Do the mul -- maby it has overflow protection ?
|
||
//24+13=37, -11 = 26
|
||
//that can be >>1 or >>2 on the shifter after the mul
|
||
x86e.Emit(op_imul32,mul_x_input);
|
||
//*NOTE* here, shrd is unsigned, but we have EDX signed, and we may only shift up to 11 bits from it
|
||
//so it works just fine :)
|
||
x86e.Emit(op_shrd32,EAX,EDX,10);
|
||
|
||
//cut the upper bits so that it is 26 bits signed
|
||
dsp_rec_se(x86e,EAX,26);
|
||
|
||
//Adder, takes MUL_OUT at EAX
|
||
//Adds B (EDX)
|
||
//Outputs EAX
|
||
|
||
if (!op.ZERO) //if zero is set the adder has no effect
|
||
{
|
||
if (op.BSEL==1)
|
||
{
|
||
//B=MAD_OUT[??]
|
||
//mad out is stored on s32 format, so no need for sign extension
|
||
x86e.Emit(op_mov32,EDX,&dsp.regs.MAD_OUT);
|
||
}
|
||
else
|
||
{
|
||
//B=TEMP[??]
|
||
//TEMPS is already sign extended, so no need for it
|
||
//Just converting 24 -> 26 bits using lea
|
||
x86e.Emit(op_lea32,EDX,x86_mrm(TEMPS_reg,sib_scale_4,0));
|
||
}
|
||
//Gating is applied here normally (ZERO).
|
||
//NEGB then inverts the value (NOT) (or 0 , if gated) and the adder adds +1 if NEGB is set.
|
||
//However, (~X)+1 = -X , and (~0)+1=0 so i skip the add
|
||
if (op.NEGB)
|
||
{
|
||
x86e.Emit(op_neg32,EDX);
|
||
}
|
||
|
||
//Add hm, is there overflow protection here ?
|
||
//The result of mul is on EAX, we modify that
|
||
x86e.Emit(op_add32,EAX,EDX);
|
||
}
|
||
|
||
//cut the upper bits so that it is 26 bits signed
|
||
dsp_rec_se(x86e,EAX,26);
|
||
|
||
//Write to MAD_OUT_NV wire :)
|
||
x86e.Emit(op_mov32,MAD_OUT_NV,EAX);
|
||
}
|
||
|
||
//Reads : INPUTS,MAD_OUT
|
||
//Writes : EFREG,TEMP,FRC_REG,ADRS_REG,MEM_WT_DATA
|
||
void dsp_rec_EFO_FB(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg INPUTS)
|
||
{
|
||
nwtn(MAD_OUT);
|
||
//MAD_OUT is s32, no sign extension needed
|
||
x86e.Emit(op_mov32,EAX,&dsp.regs.MAD_OUT);
|
||
//sh .. l ?
|
||
switch(op.SHIFT)
|
||
{
|
||
case 0:
|
||
x86e.Emit(op_sar32,EAX,2);
|
||
//×1 Protected
|
||
x86e.Emit(op_mov32,EDX,(u32)-524288);//8388608//32768//524288
|
||
x86e.Emit(op_cmp32,EAX,EDX);
|
||
x86e.Emit(op_cmovl32,EAX,EDX);
|
||
x86e.Emit(op_neg32,EDX);
|
||
x86e.Emit(op_cmp32,EAX,EDX);
|
||
x86e.Emit(op_cmovg32,EAX,EDX);
|
||
//protect !
|
||
break;
|
||
case 1:
|
||
//×2 Protected
|
||
x86e.Emit(op_sar32,EAX,1);
|
||
|
||
x86e.Emit(op_mov32,EDX,(u32)-524288);//8388608//32768//524288
|
||
x86e.Emit(op_cmp32,EAX,EDX);
|
||
x86e.Emit(op_cmovl32,EAX,EDX);
|
||
x86e.Emit(op_not32,EDX);
|
||
x86e.Emit(op_cmp32,EAX,EDX);
|
||
x86e.Emit(op_cmovg32,EAX,EDX);
|
||
//protect !
|
||
break;
|
||
case 2:
|
||
//×2 Not protected
|
||
x86e.Emit(op_sar32,EAX,1);
|
||
dsp_rec_se(x86e,EAX,24);
|
||
break;
|
||
case 3:
|
||
//×1 Not protected
|
||
x86e.Emit(op_sar32,EAX,1);
|
||
x86e.Emit(op_shl32,EAX,2);
|
||
dsp_rec_se(x86e,EAX,24);
|
||
break;
|
||
}
|
||
|
||
//Write EFREG ?
|
||
if (op.EWT)
|
||
{
|
||
x86e.Emit(op_mov32,EDX,EAX);
|
||
//top 16 bits ? or lower 16 ?
|
||
//i use top 16, following the same rule as the input
|
||
x86e.Emit(op_sar32,EDX,4);
|
||
|
||
//write :)
|
||
x86e.Emit(op_mov16,&DSPData->EFREG[op.EWA],DX);
|
||
}
|
||
|
||
//Write TEMPS ?
|
||
if (op.TWT)
|
||
{
|
||
//Temps is 24 bit, stored as s32 (no conversion required)
|
||
|
||
//write it
|
||
x86e.Emit(op_mov32,dsp_reg_GenerateTempsAddrs(x86e,op.TWA,ECX),EAX);
|
||
}
|
||
|
||
//COMMON TO FRC_REG and ADRS_REG
|
||
//interpolation mode : shift1=1=shift0
|
||
//non interpolation : shift1!=1 && shift0!=1 ? ( why && ?) -- i implement it as ||
|
||
|
||
//Write to FRC_REG ?
|
||
if (op.FRCL)
|
||
{
|
||
if (op.SHIFT==3)
|
||
{
|
||
//FRC_REG[12:0]=Shift[23:11]
|
||
x86e.Emit(op_mov32,ECX,EAX);
|
||
x86e.Emit(op_sar32,ECX,11);
|
||
}
|
||
else
|
||
{
|
||
//FRC_REG[12:0]=0'Shift[11:0]
|
||
x86e.Emit(op_mov32,ECX,EAX);
|
||
x86e.Emit(op_and32,ECX,(1<<12)-1);//bit 12 and up are 0'd
|
||
}
|
||
x86e.Emit(op_mov32,&dsp.regs.FRC_REG,ECX);
|
||
}
|
||
|
||
//Write to ADDRS_REG ?
|
||
if (op.ADRL)
|
||
{
|
||
if (op.SHIFT==3)
|
||
{
|
||
//ADRS_REG[11:0]=Shift[23,23,23,23,23,22:16]
|
||
x86e.Emit(op_mov32,ECX,EAX);
|
||
x86e.Emit(op_shl32,ECX,8); //bit31=bit 23
|
||
x86e.Emit(op_sar32,ECX,24); //bit 0 = bit16 (16+8=24)
|
||
}
|
||
else
|
||
{
|
||
//ADRS_REG[11:0]=0'Shift[23:12]
|
||
x86e.Emit(op_mov32,ECX,EAX);
|
||
x86e.Emit(op_sar32,ECX,12);
|
||
x86e.Emit(op_and32,ECX,(1<<12)-1);//bit 11 and up are 0'd
|
||
}
|
||
x86e.Emit(op_mov32,&dsp.regs.ADRS_REG,ECX);
|
||
}
|
||
|
||
//MEM_WT_DATA write
|
||
//This kills off any non protected regs (EAX,EDX,ECX)
|
||
{
|
||
//pack ?
|
||
if (!op.NOFL && SUPPORT_NOFL)
|
||
{ //yes
|
||
x86e.Emit(op_mov32,ECX,EAX);
|
||
x86e.Emit(op_call,x86_ptr_imm(PACK));
|
||
}
|
||
else
|
||
{ //shift (look @ EFREG write for more info)
|
||
x86e.Emit(op_sar32,EAX,8);
|
||
}
|
||
//data in on EAX
|
||
x86e.Emit(op_mov32,&dsp.regs.MEM_WT_DATA,EAX);
|
||
}
|
||
|
||
//more stuff here
|
||
wtn(EFREG);
|
||
wtn(TEMPS);
|
||
wtn(FRC_REG);
|
||
wtn(ADRS_REG);
|
||
wtn(MEM_WT_DATA);
|
||
}
|
||
void dsp_recompile()
|
||
{
|
||
dsp.dyndirty=false;
|
||
|
||
x86_block x86e;
|
||
x86e.Init(dyna_realloc,dyna_realloc);
|
||
|
||
x86e.Emit(op_push32,EBX);
|
||
x86e.Emit(op_push32,EBP);
|
||
x86e.Emit(op_push32,ESI);
|
||
x86e.Emit(op_push32,EDI);
|
||
|
||
//OK.
|
||
//Input comes from mems, mixs and exts, as well as possible memory reads and writes
|
||
//mems is read/write (memory loads go there), mixs and exts are read only.
|
||
//There are various delays (registers) so i need to properly emulate (more on that later)
|
||
|
||
//Registers that can be written : MIXS,FRC_REG,ADRS_REG,EFREG,TEMP
|
||
|
||
//MRD, MWT, NOFL, TABLE, NXADR, ADREB, and MASA[4:0]
|
||
//Only allowed on odd steps, when counting from 1 (2,4,6, ...).That is even steps when counting from 0 (1,3,5, ...)
|
||
for(int step=0;step<128;++step)
|
||
{
|
||
u32* mpro=DSPData->MPRO+step*4;
|
||
u32 prev_step=(step-1)&127;
|
||
u32* prev_mpro=DSPData->MPRO+prev_step*4;
|
||
//if its a nop just go to the next opcode
|
||
//No, don't really do that, we need to propage opcode bits :p
|
||
//if (mpro[0]==0 && mpro[1]==0 && mpro[2]== 0 && mpro[3]==0)
|
||
// continue;
|
||
|
||
_INST op;
|
||
_INST prev_op;
|
||
DecodeInst(mpro,&op);
|
||
DecodeInst(prev_mpro,&prev_op);
|
||
|
||
//printf("[%d] "
|
||
// "TRA %d,TWT %d,TWA %d,XSEL %d,YSEL %d,IRA %d,IWT %d,IWA %d,TABLE %d,MWT %d,MRD %d,EWT %d,EWA %d,ADRL %d,FRCL %d,SHIFT %d,YRL %d,NEGB %d,ZERO %d,BSEL %d,NOFL %d,MASA %d,ADREB %d,NXADR %d\n"
|
||
// ,step
|
||
// ,op.TRA,op.TWT,op.TWA,op.XSEL,op.YSEL,op.IRA,op.IWT,op.IWA,op.TABLE,op.MWT,op.MRD,op.EWT,op.EWA,op.ADRL,op.FRCL,op.SHIFT,op.YRL,op.NEGB,op.ZERO,op.BSEL,op.NOFL,op.MASA,op.ADREB,op.NXADR);
|
||
|
||
//Dynarec !
|
||
_dsp_debug_step_start();
|
||
//DSP regs are on memory
|
||
//Wires stay on x86 regs, written to memory as fast as possible
|
||
|
||
//EDI=MEM_RD_DATA_NV
|
||
dsp_rec_DRAM_CI(x86e,prev_op,step,EDI);
|
||
|
||
//;)
|
||
//Address Generation Unit ! nothing spectacular really ...
|
||
dsp_rec_MEM_AGU(x86e,op,step);
|
||
|
||
//Calculate INPUTS wire
|
||
//ESI : INPUTS
|
||
dsp_rec_INPUTS(x86e,op,ESI);
|
||
|
||
//:o ?
|
||
//Write the MEMS register
|
||
dsp_rec_MEMS_WRITE(x86e,op,step,ESI);
|
||
|
||
//Write the MEM_RD_DATA regiter
|
||
//Last use of MEM_RD_DATA_NV(EDI)
|
||
dsp_rec_MEM_RD_DATA_WRITE(x86e,op,step,EDI);
|
||
//EDI is now free :D
|
||
|
||
//EDI is used for MAD_OUT_NV
|
||
//Mul-add
|
||
dsp_rec_MAD(x86e,op,step,ESI,EDI);
|
||
|
||
//Effect output/ Feedback
|
||
dsp_rec_EFO_FB(x86e,op,step,ESI);
|
||
|
||
//Write MAD_OUT_NV
|
||
{
|
||
x86e.Emit(op_mov32,&dsp.regs.MAD_OUT,EDI);
|
||
wtn(MAD_OUT);
|
||
}
|
||
//These are implemented here :p
|
||
|
||
//Inputs -> Y reg
|
||
//Last use of inputs (ESI) and its destructive at that ;p
|
||
{
|
||
if (op.YRL)
|
||
{
|
||
x86e.Emit(op_sar32,ESI,4);//[23:4]
|
||
x86e.Emit(op_mov32,&dsp.regs.Y_REG,ESI);
|
||
|
||
}
|
||
wtn(Y_REG);
|
||
}
|
||
|
||
//NOFL delay propagation :)
|
||
{
|
||
//NOFL_2=NOFL_1
|
||
x86e.Emit(op_mov32,EAX,&dsp.regs.NOFL_1);
|
||
x86e.Emit(op_mov32,&dsp.regs.NOFL_2,EAX);
|
||
//NOFL_1 = NOFL
|
||
x86e.Emit(op_mov32,&dsp.regs.NOFL_1,op.NOFL);
|
||
|
||
wtn(NOFL_2);
|
||
wtn(NOFL_1);
|
||
}
|
||
|
||
//MWT_1/MRD_1 propagation
|
||
{
|
||
//MWT_1=MWT
|
||
x86e.Emit(op_mov32,&dsp.regs.MWT_1,op.MWT);
|
||
//MRD_1=MRD
|
||
x86e.Emit(op_mov32,&dsp.regs.MRD_1,op.MRD);
|
||
|
||
wtn(MWT_1);
|
||
wtn(MRD_1);
|
||
}
|
||
|
||
_dsp_debug_step_end();
|
||
}
|
||
|
||
//Need to decrement MDEC_CT here :)
|
||
x86e.Emit(op_pop32,EDI);
|
||
x86e.Emit(op_pop32,ESI);
|
||
x86e.Emit(op_pop32,EBP);
|
||
x86e.Emit(op_pop32,EBX);
|
||
x86e.Emit(op_ret);
|
||
x86e.Generate();
|
||
}
|
||
|
||
|
||
|
||
void dsp_print_mame();
|
||
void dsp_step_mame();
|
||
void dsp_emu_grandia();
|
||
void dsp_step()
|
||
{
|
||
//clear output reg
|
||
memset(DSPData->EFREG,0,sizeof(DSPData->EFREG));
|
||
|
||
if (dsp.dyndirty)
|
||
{
|
||
dsp.dyndirty=false;
|
||
//dsp_print_mame();
|
||
dsp_recompile();
|
||
}
|
||
//dsp_step_mame();
|
||
//dsp_emu_grandia();
|
||
|
||
//run the code :p
|
||
((void (*)())&dsp.DynCode)();
|
||
|
||
dsp.regs.MDEC_CT--;
|
||
if (dsp.regs.MDEC_CT==0)
|
||
dsp.regs.MDEC_CT=dsp.RBL;
|
||
//here ? or before ?
|
||
//memset(DSP->MIXS,0,4*16);
|
||
}
|
||
|
||
void dsp_writenmem(u32 addr)
|
||
{
|
||
addr-=0x3000;
|
||
//COEF : native
|
||
//MEMS : native
|
||
//MPRO : native
|
||
if (addr>=0x400 && addr<0xC00)
|
||
{
|
||
dsp.dyndirty=true;
|
||
}
|
||
|
||
/*
|
||
//buffered DSP state
|
||
//24 bit wide
|
||
u32 TEMP[128];
|
||
//24 bit wide
|
||
u32 MEMS[32];
|
||
//20 bit wide
|
||
s32 MIXS[16];
|
||
*/
|
||
}
|
||
|
||
void dsp_readmem(u32 addr)
|
||
{
|
||
//nothing ? :p
|
||
}
|
||
|
||
void dsp_term()
|
||
{
|
||
}
|
||
#endif
|