flycast/core/hw/aica/dsp.cpp

838 lines
19 KiB
C++
Raw Normal View History

2018-07-29 19:47:30 +00:00
#include "dsp.h"
2013-12-19 17:10:14 +00:00
#include "aica_mem.h"
#include "hw/aica/aica_if.h"
#include "oslib/oslib.h"
2014-12-17 06:12:08 +00:00
/*
DSP rec_v1
Tries to emulate a guesstimation of the aica dsp, by directly emitting x86 opcodes.
This was my first dsp implementation, as implemented for nullDC 1.0.3.
This was derived from a schematic I drew for the dsp, based on
liberal interpretation of known specs, the saturn dsp, digital
electronics assumptions, as well as "best-fitted" my typical
test game suite.
Initiall code by skmp, now part of the reicast project.
See LICENSE & COPYRIGHT files further details
*/
DECL_ALIGN(4096) dsp_t dsp;
2013-12-19 17:10:14 +00:00
2018-07-29 19:47:30 +00:00
//float format is ?
u16 DYNACALL PACK(s32 val)
{
u32 temp;
int sign,exponent,k;
sign = (val >> 23) & 0x1;
temp = (val ^ (val << 1)) & 0xFFFFFF;
exponent = 0;
for (k=0; k<12; k++)
{
if (temp & 0x800000)
break;
temp <<= 1;
exponent += 1;
}
if (exponent < 12)
val = (val << exponent) & 0x3FFFFF;
else
val <<= 11;
val >>= 11;
val |= sign << 15;
val |= exponent << 11;
return (u16)val;
}
s32 DYNACALL UNPACK(u16 val)
{
int sign,exponent,mantissa;
s32 uval;
sign = (val >> 15) & 0x1;
exponent = (val >> 11) & 0xF;
mantissa = val & 0x7FF;
uval = mantissa << 11;
if (exponent > 11)
exponent = 11;
else
uval |= (sign ^ 1) << 22;
uval |= sign << 23;
uval <<= 8;
uval >>= 8;
uval >>= exponent;
return uval;
}
2019-01-23 20:14:51 +00:00
void DecodeInst(u32 *IPtr,_INST *i)
{
i->TRA=(IPtr[0]>>9)&0x7F;
i->TWT=(IPtr[0]>>8)&0x01;
i->TWA=(IPtr[0]>>1)&0x7F;
i->XSEL=(IPtr[1]>>15)&0x01;
i->YSEL=(IPtr[1]>>13)&0x03;
i->IRA=(IPtr[1]>>7)&0x3F;
i->IWT=(IPtr[1]>>6)&0x01;
i->IWA=(IPtr[1]>>1)&0x1F;
i->TABLE=(IPtr[2]>>15)&0x01;
i->MWT=(IPtr[2]>>14)&0x01;
i->MRD=(IPtr[2]>>13)&0x01;
i->EWT=(IPtr[2]>>12)&0x01;
i->EWA=(IPtr[2]>>8)&0x0F;
i->ADRL=(IPtr[2]>>7)&0x01;
i->FRCL=(IPtr[2]>>6)&0x01;
i->SHIFT=(IPtr[2]>>4)&0x03;
i->YRL=(IPtr[2]>>3)&0x01;
i->NEGB=(IPtr[2]>>2)&0x01;
i->ZERO=(IPtr[2]>>1)&0x01;
i->BSEL=(IPtr[2]>>0)&0x01;
i->NOFL=(IPtr[3]>>15)&1; //????
//i->COEF=(IPtr[3]>>9)&0x3f;
i->MASA=(IPtr[3]>>9)&0x3f; //???
i->ADREB=(IPtr[3]>>8)&0x1;
i->NXADR=(IPtr[3]>>7)&0x1;
}
2015-07-25 06:39:35 +00:00
#if HOST_CPU == CPU_X86 && FEAT_DSPREC == DYNAREC_JIT
2013-12-19 17:10:14 +00:00
#include "emitter/x86_emitter.h"
const bool SUPPORT_NOFL=false;
#define assert verify
#pragma warning(disable:4311)
2014-12-17 06:12:08 +00:00
#define DYNBUF 0x10000
2013-12-19 17:10:14 +00:00
/*
//#define USEFLOATPACK
//pack s24 to s1e4s11
naked u16 packasm(s32 val)
{
__asm
{
mov edx,ecx; //eax will be sign
and edx,0x80000; //get the sign
2013-12-19 17:10:14 +00:00
jz poz;
neg ecx;
poz:
bsr eax,ecx;
jz _zero;
//24 -> 11
//13 -> 0
//12..0 -> 0
sub eax,11;
cmovs eax,0; //if <0 -> 0
2013-12-19 17:10:14 +00:00
shr ecx,eax; //shift out mantissa as needed (yeah i know, no rounding here and all .. )
2013-12-19 17:10:14 +00:00
shr eax,12; //[14:12] is exp
or edx,ecx; //merge [15] | [11:0]
or eax,edx; //merge [14:12] | ([15] | [11:0]), result on eax
2013-12-19 17:10:14 +00:00
ret;
_zero:
xor eax,eax;
ret;
}
}
//ONLY lower 16 bits are valid, rest are ignored but do movzx to avoid partial stalls :)
2013-12-19 17:10:14 +00:00
naked s32 unpackasm(u32 val)
{
__asm
{
mov eax,ecx; //get mantissa bits
and ecx,0x7FF; //
2013-12-19 17:10:14 +00:00
shl eax,11; //get shift factor (shift)
mov edx,eax; //keep a copy for the sign
and eax,0xF; //get shift factor (mask)
2013-12-19 17:10:14 +00:00
shl ecx,eax; //shift mantissa to normal position
2013-12-19 17:10:14 +00:00
test edx,0x10; //signed ?
2013-12-19 17:10:14 +00:00
jnz _negme;
ret; //nop, return as is
2013-12-19 17:10:14 +00:00
_negme:
//yep, negate and return
neg eax;
ret;
}
}*/
void dsp_init()
{
memset(&dsp,0,sizeof(dsp));
memset(DSPData,0,sizeof(*DSPData));
dsp.dyndirty=true;
dsp.RBL=0x2000-1;
dsp.RBP=0;
dsp.regs.MDEC_CT=1;
mem_region_set_exec(dsp.DynCode, sizeof(dsp.DynCode));
2013-12-19 17:10:14 +00:00
}
void dsp_recompile();
void* dyna_realloc(void*ptr,u32 oldsize,u32 newsize)
{
return dsp.DynCode;
}
void _dsp_debug_step_start()
{
memset(&dsp.regs_init,0,sizeof(dsp.regs_init));
}
void _dsp_debug_step_end()
{
verify(dsp.regs_init.MAD_OUT);
verify(dsp.regs_init.MEM_ADDR);
verify(dsp.regs_init.MEM_RD_DATA);
verify(dsp.regs_init.MEM_WT_DATA);
verify(dsp.regs_init.FRC_REG);
verify(dsp.regs_init.ADRS_REG);
verify(dsp.regs_init.Y_REG);
//verify(dsp.regs_init.MDEC_CT); // -> its done on C
verify(dsp.regs_init.MWT_1);
verify(dsp.regs_init.MRD_1);
// verify(dsp.regs_init.MADRS); //THAT WAS not real, MEM_ADDR is the deal ;p
verify(dsp.regs_init.MEMS);
verify(dsp.regs_init.NOFL_1);
verify(dsp.regs_init.NOFL_2);
verify(dsp.regs_init.TEMPS);
verify(dsp.regs_init.EFREG);
}
2018-07-29 19:47:30 +00:00
#define nwtn(x) verify(!dsp.regs_init.x)
#define wtn(x) nwtn(x);dsp.regs_init.x=true;
2013-12-19 17:10:14 +00:00
//sign extend to 32 bits
void dsp_rec_se(x86_block& x86e,x86_gpr_reg reg,u32 src_sz,u32 dst_sz=0xFF)
{
if (dst_sz==0xFF)
dst_sz=src_sz;
//24 -> 32 (pad to 32 bits)
x86e.Emit(op_shl32,reg,32-src_sz);
//32 -> 24 (MSB propagation)
x86e.Emit(op_sar32,reg,32-dst_sz);
}
//Reads : MWT_1,MRD_1,MEM_ADDR
//Writes : Wire MEM_RD_DATA_NV
void dsp_rec_DRAM_CI(x86_block& x86e,_INST& prev_op,u32 step,x86_gpr_reg MEM_RD_DATA_NV)
{
nwtn(MWT_1);
nwtn(MRD_1);
nwtn(MEM_ADDR);
nwtn(MEM_WT_DATA);
//Request : step x (odd step)
//Operation : x+1 (even step)
//Data avail : x+2 (odd step, can request again)
if (!(step&1))
{
//Get and mask ram address :)
x86e.Emit(op_mov32,EAX,&dsp.regs.MEM_ADDR);
x86e.Emit(op_and32,EAX,AICA_RAM_MASK);
x86e.Emit(op_add32,EAX,(unat)aica_ram.data);
//prev. opcode did a mem read request ?
if (prev_op.MRD)
{
//Do the read [MEM_ADDRS] -> MEM_RD_DATA_NV
x86e.Emit(op_movsx16to32,MEM_RD_DATA_NV,x86_mrm(EAX));
}
//prev. opcode did a mem write request ?
if (prev_op.MWT)
{
//Do the write [MEM_ADDRS] <-MEM_WT_DATA
x86e.Emit(op_mov32,EDX,&dsp.regs.MEM_WT_DATA);
x86e.Emit(op_mov16,x86_mrm(EAX),EDX);
}
}
}
//Reads : ADRS_REG,MADRS,MDEC_CT
//Writes : MEM_ADDR
void dsp_rec_MEM_AGU(x86_block& x86e,_INST& op,u32 step)
{
nwtn(ADRS_REG);
nwtn(MEM_ADDR);
//These opcode fields are valid on odd steps (mem req. is only allowed then)
//MEM Request : step x
//Mem operation : step x+1 (address is available at this point)
2013-12-19 17:10:14 +00:00
if (step&1)
{
//Addrs is 16:1
x86e.Emit(op_mov32,EAX,&DSPData->MADRS[op.MASA]);
//Added if ADREB
if (op.ADREB)
x86e.Emit(op_add32,EAX,&dsp.regs.ADRS_REG);
//+1 if NXADR is set
if (op.NXADR)
x86e.Emit(op_add32,EAX,1);
//RBL warp around is here, according to docs, but that seems to cause _very_ bad results
2013-12-19 17:10:14 +00:00
// if (!op.TABLE)
// x86e.Emit(op_and32,EAX,dsp.RBL);
//MDEC_CT is added if !TABLE
if (!op.TABLE)
x86e.Emit(op_add32,EAX,&dsp.regs.MDEC_CT);
//RBL/RBP are constants for the program
2013-12-19 17:10:14 +00:00
//Apply RBL if !TABLE
//Else limit to 16 bit add
//*update* always limit to 16 bit add adter MDEC_CT ?
2013-12-19 17:10:14 +00:00
if (!op.TABLE)
x86e.Emit(op_and32,EAX,dsp.RBL);
else
x86e.Emit(op_and32,EAX,0xFFFF);
//Calculate the value !
//EAX*2 b/c it points to sample (16:1 of the address)
x86e.Emit(op_lea32,EDX,x86_mrm(EAX,sib_scale_2,x86_ptr::create(dsp.RBP)));
//Save the result to MEM_ADDR
x86e.Emit(op_mov32,&dsp.regs.MEM_ADDR,EDX);
}
wtn(MEM_ADDR);
}
//Reads : MEMS,MIXS,EXTS
//Writes : INPUTS (Wire)
void dsp_rec_INPUTS(x86_block& x86e,_INST& op,x86_gpr_reg INPUTS)
{
nwtn(MEMS);
//nwtn(MIXS); -> these are read only :)
//nwtn(EXTS);
//INPUTS is 24 bit, we convert everything to that
//Maby we dont need to convert, but just to sign extend ?
if(op.IRA<0x20)
{
x86e.Emit(op_mov32,INPUTS,&dsp.MEMS[op.IRA]);
dsp_rec_se(x86e,INPUTS,24);
}
else if(op.IRA<0x30)
{
x86e.Emit(op_mov32,INPUTS,&dsp.MIXS[op.IRA-0x20]);
dsp_rec_se(x86e,INPUTS,20,24);
}
else if(op.IRA<0x32)
{
x86e.Emit(op_mov32,ESI,&DSPData->EXTS[op.IRA-0x30]);
//x86e.Emit(op_shl32,INPUTS,8);
dsp_rec_se(x86e,INPUTS,16,24);
}
//Sign extend to 32 bits
//dsp_rec_se(x86e,INPUTS,24);
}
//Reads : MEM_RD_DATA,NO_FLT2
//Writes : MEMS
void dsp_rec_MEMS_WRITE(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg INPUTS)
{
nwtn(MEM_RD_DATA);
nwtn(NOFL_2);
//MEMS write reads from MEM_RD_DATA register (MEM_RD_DATA -> Converter -> MEMS).
//The converter's nofl flag has 2 steps delay (so that it can be set with the MRQ).
if (op.IWT)
{
x86e.Emit(op_movsx16to32,ECX,&dsp.regs.MEM_RD_DATA);
x86e.Emit(op_mov32,EAX,ECX);
//Pad and signed extend EAX
//x86e.Emit(op_shl32,EAX,16);
//x86e.Emit(op_sar32,EAX,8);
x86e.Emit(op_shl32,EAX,8);
if (SUPPORT_NOFL)
{
x86_Label* no_fl=x86e.CreateLabel(false,8);//no float conversions
2013-12-19 17:10:14 +00:00
//Do we have to convert ?
x86e.Emit(op_cmp32,&dsp.regs.NOFL_2,1);
x86e.Emit(op_je,no_fl);
{
//Convert !
x86e.Emit(op_call,x86_ptr_imm(UNPACK));
}
x86e.MarkLabel(no_fl);
}
x86e.Emit(op_mov32,&dsp.MEMS[op.IWA],EAX);
}
wtn(MEMS);
}
//Reads : MEM_RD_DATA_NV (Wire)
//Writes : MEM_RD_DATA
void dsp_rec_MEM_RD_DATA_WRITE(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg MEM_RD_DATA_NV)
{
//Request : step x (odd step)
//Operation : x+1 (even step)
//Data avail : x+2 (odd step, can request again)
//The MEM_RD_DATA_NV wire exists only on even steps
if (!(step&1))
{
x86e.Emit(op_mov32,&dsp.regs.MEM_RD_DATA,MEM_RD_DATA_NV);
}
wtn(MEM_RD_DATA);
}
x86_mrm_t dsp_reg_GenerateTempsAddrs(x86_block& x86e,u32 TEMPS_NUM,x86_gpr_reg TEMPSaddrsreg)
{
x86e.Emit(op_mov32,TEMPSaddrsreg,&dsp.regs.MDEC_CT);
x86e.Emit(op_add32,TEMPSaddrsreg,TEMPS_NUM);
x86e.Emit(op_and32,TEMPSaddrsreg,127);
return x86_mrm(ECX,sib_scale_4,dsp.TEMP);
}
//Reads : INPUTS,TEMP,FRC_REG,COEF,Y_REG
//Writes : MAD_OUT_NV (Wire)
void dsp_rec_MAD(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg INPUTS,x86_gpr_reg MAD_OUT_NV)
{
bool use_TEMP=op.XSEL==0 || (op.BSEL==0 && op.ZERO==0);
//TEMPS (if used) on ECX
const x86_gpr_reg TEMPS_reg=ECX;
if (use_TEMP)
{
//read temps
x86e.Emit(op_mov32,TEMPS_reg,dsp_reg_GenerateTempsAddrs(x86e,op.TRA,TEMPS_reg));
dsp_rec_se(x86e,TEMPS_reg,24);
}
x86_reg mul_x_input;
//X : 24 bits
if (op.XSEL==1)
{
//X=INPUTS
mul_x_input=INPUTS;
//x86e.Emit(op_mov32,EDX,INPUTS);
}
else
{
//X=TEMPS
mul_x_input=TEMPS_reg;
//x86e.Emit(op_mov32,EDX,TEMPS_reg);
}
//MUL Y in : EAX
//Y : 13 bits
switch(op.YSEL)
{
case 0:
//Y=FRC_REG[13]
x86e.Emit(op_mov32,EAX,&dsp.regs.FRC_REG);
dsp_rec_se(x86e,EAX,13);
break;
case 1:
//Y=COEF[13]
x86e.Emit(op_mov32,EAX,&DSPData->COEF[step]);
dsp_rec_se(x86e,EAX,16,13);
break;
case 2:
//Y=Y_REG[23:11] (Y_REG is 19 bits, INPUTS[23:4], so that is realy 19:7)
x86e.Emit(op_mov32,EAX,&dsp.regs.Y_REG);
dsp_rec_se(x86e,EAX,19,13);
break;
case 3:
//Y=0'Y_REG[15:4] (Y_REG is 19 bits, INPUTS[23:4], so that is realy 11:0)
x86e.Emit(op_mov32,EAX,&dsp.regs.Y_REG);
x86e.Emit(op_and32,0xFFF);//Clear bit 13+
break;
}
//Do the mul -- maby it has overflow protection ?
//24+13=37, -11 = 26
//that can be >>1 or >>2 on the shifter after the mul
x86e.Emit(op_imul32,mul_x_input);
//*NOTE* here, shrd is unsigned, but we have EDX signed, and we may only shift up to 11 bits from it
//so it works just fine :)
x86e.Emit(op_shrd32,EAX,EDX,10);
//cut the upper bits so that it is 26 bits signed
dsp_rec_se(x86e,EAX,26);
//Adder, takes MUL_OUT at EAX
//Adds B (EDX)
//Outputs EAX
if (!op.ZERO) //if zero is set the adder has no effect
{
if (op.BSEL==1)
{
//B=MAD_OUT[??]
//mad out is stored on s32 format, so no need for sign extension
2013-12-19 17:10:14 +00:00
x86e.Emit(op_mov32,EDX,&dsp.regs.MAD_OUT);
}
else
{
//B=TEMP[??]
//TEMPS is already sign extended, so no need for it
2013-12-19 17:10:14 +00:00
//Just converting 24 -> 26 bits using lea
x86e.Emit(op_lea32,EDX,x86_mrm(TEMPS_reg,sib_scale_4,0));
}
//Gating is applied here normally (ZERO).
2013-12-19 17:10:14 +00:00
//NEGB then inverts the value (NOT) (or 0 , if gated) and the adder adds +1 if NEGB is set.
//However, (~X)+1 = -X , and (~0)+1=0 so i skip the add
if (op.NEGB)
{
x86e.Emit(op_neg32,EDX);
}
//Add hm, is there overflow protection here ?
//The result of mul is on EAX, we modify that
2013-12-19 17:10:14 +00:00
x86e.Emit(op_add32,EAX,EDX);
}
//cut the upper bits so that it is 26 bits signed
dsp_rec_se(x86e,EAX,26);
//Write to MAD_OUT_NV wire :)
x86e.Emit(op_mov32,MAD_OUT_NV,EAX);
}
//Reads : INPUTS,MAD_OUT
2013-12-19 17:10:14 +00:00
//Writes : EFREG,TEMP,FRC_REG,ADRS_REG,MEM_WT_DATA
void dsp_rec_EFO_FB(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg INPUTS)
{
nwtn(MAD_OUT);
//MAD_OUT is s32, no sign extension needed
2013-12-19 17:10:14 +00:00
x86e.Emit(op_mov32,EAX,&dsp.regs.MAD_OUT);
//sh .. l ?
switch(op.SHIFT)
{
case 0:
x86e.Emit(op_sar32,EAX,2);
//×1 Protected
x86e.Emit(op_mov32,EDX,(u32)-524288);//8388608//32768//524288
x86e.Emit(op_cmp32,EAX,EDX);
x86e.Emit(op_cmovl32,EAX,EDX);
x86e.Emit(op_neg32,EDX);
x86e.Emit(op_cmp32,EAX,EDX);
x86e.Emit(op_cmovg32,EAX,EDX);
//protect !
break;
case 1:
//×2 Protected
x86e.Emit(op_sar32,EAX,1);
x86e.Emit(op_mov32,EDX,(u32)-524288);//8388608//32768//524288
x86e.Emit(op_cmp32,EAX,EDX);
x86e.Emit(op_cmovl32,EAX,EDX);
x86e.Emit(op_not32,EDX);
x86e.Emit(op_cmp32,EAX,EDX);
x86e.Emit(op_cmovg32,EAX,EDX);
//protect !
break;
case 2:
//×2 Not protected
x86e.Emit(op_sar32,EAX,1);
dsp_rec_se(x86e,EAX,24);
break;
case 3:
//×1 Not protected
x86e.Emit(op_sar32,EAX,1);
x86e.Emit(op_shl32,EAX,2);
dsp_rec_se(x86e,EAX,24);
break;
}
//Write EFREG ?
if (op.EWT)
{
x86e.Emit(op_mov32,EDX,EAX);
//top 16 bits ? or lower 16 ?
//i use top 16, following the same rule as the input
2013-12-19 17:10:14 +00:00
x86e.Emit(op_sar32,EDX,4);
//write :)
x86e.Emit(op_mov16,&DSPData->EFREG[op.EWA],DX);
}
//Write TEMPS ?
if (op.TWT)
{
//Temps is 24 bit, stored as s32 (no conversion required)
2013-12-19 17:10:14 +00:00
//write it
x86e.Emit(op_mov32,dsp_reg_GenerateTempsAddrs(x86e,op.TWA,ECX),EAX);
}
//COMMON TO FRC_REG and ADRS_REG
//interpolation mode : shift1=1=shift0
//non interpolation : shift1!=1 && shift0!=1 ? ( why && ?) -- i implement it as ||
//Write to FRC_REG ?
if (op.FRCL)
{
if (op.SHIFT==3)
{
//FRC_REG[12:0]=Shift[23:11]
x86e.Emit(op_mov32,ECX,EAX);
x86e.Emit(op_sar32,ECX,11);
}
else
{
//FRC_REG[12:0]=0'Shift[11:0]
x86e.Emit(op_mov32,ECX,EAX);
x86e.Emit(op_and32,ECX,(1<<12)-1);//bit 12 and up are 0'd
}
x86e.Emit(op_mov32,&dsp.regs.FRC_REG,ECX);
}
//Write to ADDRS_REG ?
if (op.ADRL)
{
if (op.SHIFT==3)
{
//ADRS_REG[11:0]=Shift[23,23,23,23,23,22:16]
x86e.Emit(op_mov32,ECX,EAX);
x86e.Emit(op_shl32,ECX,8); //bit31=bit 23
x86e.Emit(op_sar32,ECX,24); //bit 0 = bit16 (16+8=24)
}
else
{
//ADRS_REG[11:0]=0'Shift[23:12]
x86e.Emit(op_mov32,ECX,EAX);
x86e.Emit(op_sar32,ECX,12);
x86e.Emit(op_and32,ECX,(1<<12)-1);//bit 11 and up are 0'd
}
x86e.Emit(op_mov32,&dsp.regs.ADRS_REG,ECX);
}
//MEM_WT_DATA write
//This kills off any non protected regs (EAX,EDX,ECX)
{
//pack ?
if (!op.NOFL && SUPPORT_NOFL)
{ //yes
x86e.Emit(op_mov32,ECX,EAX);
x86e.Emit(op_call,x86_ptr_imm(PACK));
}
else
{ //shift (look @ EFREG write for more info)
x86e.Emit(op_sar32,EAX,8);
}
//data in on EAX
x86e.Emit(op_mov32,&dsp.regs.MEM_WT_DATA,EAX);
}
//more stuff here
wtn(EFREG);
wtn(TEMPS);
wtn(FRC_REG);
wtn(ADRS_REG);
wtn(MEM_WT_DATA);
}
void dsp_recompile()
{
dsp.dyndirty=false;
x86_block x86e;
x86e.Init(dyna_realloc,dyna_realloc);
x86e.Emit(op_push32,EBX);
x86e.Emit(op_push32,EBP);
x86e.Emit(op_push32,ESI);
x86e.Emit(op_push32,EDI);
//OK.
//Input comes from mems, mixs and exts, as well as possible memory reads and writes
//mems is read/write (memory loads go there), mixs and exts are read only.
//There are various delays (registers) so i need to properly emulate (more on that later)
//Registers that can be written : MIXS,FRC_REG,ADRS_REG,EFREG,TEMP
2013-12-19 17:10:14 +00:00
//MRD, MWT, NOFL, TABLE, NXADR, ADREB, and MASA[4:0]
//Only allowed on odd steps, when counting from 1 (2,4,6, ...).That is even steps when counting from 0 (1,3,5, ...)
2013-12-19 17:10:14 +00:00
for(int step=0;step<128;++step)
{
u32* mpro=DSPData->MPRO+step*4;
u32 prev_step=(step-1)&127;
u32* prev_mpro=DSPData->MPRO+prev_step*4;
//if its a nop just go to the next opcode
//No, don't really do that, we need to propage opcode bits :p
2013-12-19 17:10:14 +00:00
//if (mpro[0]==0 && mpro[1]==0 && mpro[2]== 0 && mpro[3]==0)
// continue;
_INST op;
_INST prev_op;
DecodeInst(mpro,&op);
DecodeInst(prev_mpro,&prev_op);
//printf("[%d] "
// "TRA %d,TWT %d,TWA %d,XSEL %d,YSEL %d,IRA %d,IWT %d,IWA %d,TABLE %d,MWT %d,MRD %d,EWT %d,EWA %d,ADRL %d,FRCL %d,SHIFT %d,YRL %d,NEGB %d,ZERO %d,BSEL %d,NOFL %d,MASA %d,ADREB %d,NXADR %d\n"
// ,step
// ,op.TRA,op.TWT,op.TWA,op.XSEL,op.YSEL,op.IRA,op.IWT,op.IWA,op.TABLE,op.MWT,op.MRD,op.EWT,op.EWA,op.ADRL,op.FRCL,op.SHIFT,op.YRL,op.NEGB,op.ZERO,op.BSEL,op.NOFL,op.MASA,op.ADREB,op.NXADR);
//Dynarec !
_dsp_debug_step_start();
//DSP regs are on memory
//Wires stay on x86 regs, written to memory as fast as possible
2013-12-19 17:10:14 +00:00
//EDI=MEM_RD_DATA_NV
dsp_rec_DRAM_CI(x86e,prev_op,step,EDI);
//;)
//Address Generation Unit ! nothing spectacular really ...
2013-12-19 17:10:14 +00:00
dsp_rec_MEM_AGU(x86e,op,step);
//Calculate INPUTS wire
//ESI : INPUTS
dsp_rec_INPUTS(x86e,op,ESI);
//:o ?
//Write the MEMS register
dsp_rec_MEMS_WRITE(x86e,op,step,ESI);
//Write the MEM_RD_DATA regiter
//Last use of MEM_RD_DATA_NV(EDI)
dsp_rec_MEM_RD_DATA_WRITE(x86e,op,step,EDI);
//EDI is now free :D
//EDI is used for MAD_OUT_NV
//Mul-add
dsp_rec_MAD(x86e,op,step,ESI,EDI);
//Effect output/ Feedback
dsp_rec_EFO_FB(x86e,op,step,ESI);
//Write MAD_OUT_NV
{
x86e.Emit(op_mov32,&dsp.regs.MAD_OUT,EDI);
wtn(MAD_OUT);
}
//These are implemented here :p
//Inputs -> Y reg
//Last use of inputs (ESI) and its destructive at that ;p
{
if (op.YRL)
{
x86e.Emit(op_sar32,ESI,4);//[23:4]
x86e.Emit(op_mov32,&dsp.regs.Y_REG,ESI);
}
wtn(Y_REG);
}
//NOFL delay propagation :)
{
//NOFL_2=NOFL_1
x86e.Emit(op_mov32,EAX,&dsp.regs.NOFL_1);
x86e.Emit(op_mov32,&dsp.regs.NOFL_2,EAX);
//NOFL_1 = NOFL
x86e.Emit(op_mov32,&dsp.regs.NOFL_1,op.NOFL);
wtn(NOFL_2);
wtn(NOFL_1);
}
//MWT_1/MRD_1 propagation
{
//MWT_1=MWT
x86e.Emit(op_mov32,&dsp.regs.MWT_1,op.MWT);
//MRD_1=MRD
x86e.Emit(op_mov32,&dsp.regs.MRD_1,op.MRD);
wtn(MWT_1);
wtn(MRD_1);
}
_dsp_debug_step_end();
}
//Need to decrement MDEC_CT here :)
x86e.Emit(op_pop32,EDI);
x86e.Emit(op_pop32,ESI);
x86e.Emit(op_pop32,EBP);
x86e.Emit(op_pop32,EBX);
x86e.Emit(op_ret);
x86e.Generate();
}
void dsp_print_mame();
void dsp_step_mame();
void dsp_emu_grandia();
void dsp_step()
{
//clear output reg
memset(DSPData->EFREG,0,sizeof(DSPData->EFREG));
if (dsp.dyndirty)
{
dsp.dyndirty=false;
//dsp_print_mame();
dsp_recompile();
}
//dsp_step_mame();
//dsp_emu_grandia();
//run the code :p
((void (*)())&dsp.DynCode)();
dsp.regs.MDEC_CT--;
if (dsp.regs.MDEC_CT==0)
dsp.regs.MDEC_CT=dsp.RBL;
//here ? or before ?
//memset(DSP->MIXS,0,4*16);
}
void dsp_writenmem(u32 addr)
{
addr-=0x3000;
//COEF : native
//MEMS : native
//MPRO : native
if (addr>=0x400 && addr<0xC00)
{
dsp.dyndirty=true;
}
/*
//buffered DSP state
//24 bit wide
u32 TEMP[128];
//24 bit wide
u32 MEMS[32];
//20 bit wide
s32 MIXS[16];
*/
}
void dsp_readmem(u32 addr)
{
//nothing ? :p
}
void dsp_term()
{
}
2018-07-29 19:47:30 +00:00
#endif