flycast/core/hw/aica/dsp.cpp

838 lines
19 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "dsp.h"
#include "aica_mem.h"
#include "hw/aica/aica_if.h"
#include "oslib/oslib.h"
/*
DSP rec_v1
Tries to emulate a guesstimation of the aica dsp, by directly emitting x86 opcodes.
This was my first dsp implementation, as implemented for nullDC 1.0.3.
This was derived from a schematic I drew for the dsp, based on
liberal interpretation of known specs, the saturn dsp, digital
electronics assumptions, as well as "best-fitted" my typical
test game suite.
Initiall code by skmp, now part of the reicast project.
See LICENSE & COPYRIGHT files further details
*/
DECL_ALIGN(4096) dsp_t dsp;
//float format is ?
u16 DYNACALL PACK(s32 val)
{
u32 temp;
int sign,exponent,k;
sign = (val >> 23) & 0x1;
temp = (val ^ (val << 1)) & 0xFFFFFF;
exponent = 0;
for (k=0; k<12; k++)
{
if (temp & 0x800000)
break;
temp <<= 1;
exponent += 1;
}
if (exponent < 12)
val = (val << exponent) & 0x3FFFFF;
else
val <<= 11;
val >>= 11;
val |= sign << 15;
val |= exponent << 11;
return (u16)val;
}
s32 DYNACALL UNPACK(u16 val)
{
int sign,exponent,mantissa;
s32 uval;
sign = (val >> 15) & 0x1;
exponent = (val >> 11) & 0xF;
mantissa = val & 0x7FF;
uval = mantissa << 11;
if (exponent > 11)
exponent = 11;
else
uval |= (sign ^ 1) << 22;
uval |= sign << 23;
uval <<= 8;
uval >>= 8;
uval >>= exponent;
return uval;
}
void DecodeInst(u32 *IPtr,_INST *i)
{
i->TRA=(IPtr[0]>>9)&0x7F;
i->TWT=(IPtr[0]>>8)&0x01;
i->TWA=(IPtr[0]>>1)&0x7F;
i->XSEL=(IPtr[1]>>15)&0x01;
i->YSEL=(IPtr[1]>>13)&0x03;
i->IRA=(IPtr[1]>>7)&0x3F;
i->IWT=(IPtr[1]>>6)&0x01;
i->IWA=(IPtr[1]>>1)&0x1F;
i->TABLE=(IPtr[2]>>15)&0x01;
i->MWT=(IPtr[2]>>14)&0x01;
i->MRD=(IPtr[2]>>13)&0x01;
i->EWT=(IPtr[2]>>12)&0x01;
i->EWA=(IPtr[2]>>8)&0x0F;
i->ADRL=(IPtr[2]>>7)&0x01;
i->FRCL=(IPtr[2]>>6)&0x01;
i->SHIFT=(IPtr[2]>>4)&0x03;
i->YRL=(IPtr[2]>>3)&0x01;
i->NEGB=(IPtr[2]>>2)&0x01;
i->ZERO=(IPtr[2]>>1)&0x01;
i->BSEL=(IPtr[2]>>0)&0x01;
i->NOFL=(IPtr[3]>>15)&1; //????
//i->COEF=(IPtr[3]>>9)&0x3f;
i->MASA=(IPtr[3]>>9)&0x3f; //???
i->ADREB=(IPtr[3]>>8)&0x1;
i->NXADR=(IPtr[3]>>7)&0x1;
}
#if HOST_CPU == CPU_X86 && FEAT_DSPREC == DYNAREC_JIT
#include "emitter/x86_emitter.h"
const bool SUPPORT_NOFL=false;
#define assert verify
#pragma warning(disable:4311)
#define DYNBUF 0x10000
/*
//#define USEFLOATPACK
//pack s24 to s1e4s11
naked u16 packasm(s32 val)
{
__asm
{
mov edx,ecx; //eax will be sign
and edx,0x80000; //get the sign
jz poz;
neg ecx;
poz:
bsr eax,ecx;
jz _zero;
//24 -> 11
//13 -> 0
//12..0 -> 0
sub eax,11;
cmovs eax,0; //if <0 -> 0
shr ecx,eax; //shift out mantissa as needed (yeah i know, no rounding here and all .. )
shr eax,12; //[14:12] is exp
or edx,ecx; //merge [15] | [11:0]
or eax,edx; //merge [14:12] | ([15] | [11:0]), result on eax
ret;
_zero:
xor eax,eax;
ret;
}
}
//ONLY lower 16 bits are valid, rest are ignored but do movzx to avoid partial stalls :)
naked s32 unpackasm(u32 val)
{
__asm
{
mov eax,ecx; //get mantissa bits
and ecx,0x7FF; //
shl eax,11; //get shift factor (shift)
mov edx,eax; //keep a copy for the sign
and eax,0xF; //get shift factor (mask)
shl ecx,eax; //shift mantissa to normal position
test edx,0x10; //signed ?
jnz _negme;
ret; //nop, return as is
_negme:
//yep, negate and return
neg eax;
ret;
}
}*/
void dsp_init()
{
memset(&dsp,0,sizeof(dsp));
memset(DSPData,0,sizeof(*DSPData));
dsp.dyndirty=true;
dsp.RBL=0x2000-1;
dsp.RBP=0;
dsp.regs.MDEC_CT=1;
mem_region_set_exec(dsp.DynCode, sizeof(dsp.DynCode));
}
void dsp_recompile();
void* dyna_realloc(void*ptr,u32 oldsize,u32 newsize)
{
return dsp.DynCode;
}
void _dsp_debug_step_start()
{
memset(&dsp.regs_init,0,sizeof(dsp.regs_init));
}
void _dsp_debug_step_end()
{
verify(dsp.regs_init.MAD_OUT);
verify(dsp.regs_init.MEM_ADDR);
verify(dsp.regs_init.MEM_RD_DATA);
verify(dsp.regs_init.MEM_WT_DATA);
verify(dsp.regs_init.FRC_REG);
verify(dsp.regs_init.ADRS_REG);
verify(dsp.regs_init.Y_REG);
//verify(dsp.regs_init.MDEC_CT); // -> its done on C
verify(dsp.regs_init.MWT_1);
verify(dsp.regs_init.MRD_1);
// verify(dsp.regs_init.MADRS); //THAT WAS not real, MEM_ADDR is the deal ;p
verify(dsp.regs_init.MEMS);
verify(dsp.regs_init.NOFL_1);
verify(dsp.regs_init.NOFL_2);
verify(dsp.regs_init.TEMPS);
verify(dsp.regs_init.EFREG);
}
#define nwtn(x) verify(!dsp.regs_init.x)
#define wtn(x) nwtn(x);dsp.regs_init.x=true;
//sign extend to 32 bits
void dsp_rec_se(x86_block& x86e,x86_gpr_reg reg,u32 src_sz,u32 dst_sz=0xFF)
{
if (dst_sz==0xFF)
dst_sz=src_sz;
//24 -> 32 (pad to 32 bits)
x86e.Emit(op_shl32,reg,32-src_sz);
//32 -> 24 (MSB propagation)
x86e.Emit(op_sar32,reg,32-dst_sz);
}
//Reads : MWT_1,MRD_1,MEM_ADDR
//Writes : Wire MEM_RD_DATA_NV
void dsp_rec_DRAM_CI(x86_block& x86e,_INST& prev_op,u32 step,x86_gpr_reg MEM_RD_DATA_NV)
{
nwtn(MWT_1);
nwtn(MRD_1);
nwtn(MEM_ADDR);
nwtn(MEM_WT_DATA);
//Request : step x (odd step)
//Operation : x+1 (even step)
//Data avail : x+2 (odd step, can request again)
if (!(step&1))
{
//Get and mask ram address :)
x86e.Emit(op_mov32,EAX,&dsp.regs.MEM_ADDR);
x86e.Emit(op_and32,EAX,AICA_RAM_MASK);
x86e.Emit(op_add32,EAX,(unat)aica_ram.data);
//prev. opcode did a mem read request ?
if (prev_op.MRD)
{
//Do the read [MEM_ADDRS] -> MEM_RD_DATA_NV
x86e.Emit(op_movsx16to32,MEM_RD_DATA_NV,x86_mrm(EAX));
}
//prev. opcode did a mem write request ?
if (prev_op.MWT)
{
//Do the write [MEM_ADDRS] <-MEM_WT_DATA
x86e.Emit(op_mov32,EDX,&dsp.regs.MEM_WT_DATA);
x86e.Emit(op_mov16,x86_mrm(EAX),EDX);
}
}
}
//Reads : ADRS_REG,MADRS,MDEC_CT
//Writes : MEM_ADDR
void dsp_rec_MEM_AGU(x86_block& x86e,_INST& op,u32 step)
{
nwtn(ADRS_REG);
nwtn(MEM_ADDR);
//These opcode fields are valid on odd steps (mem req. is only allowed then)
//MEM Request : step x
//Mem operation : step x+1 (address is available at this point)
if (step&1)
{
//Addrs is 16:1
x86e.Emit(op_mov32,EAX,&DSPData->MADRS[op.MASA]);
//Added if ADREB
if (op.ADREB)
x86e.Emit(op_add32,EAX,&dsp.regs.ADRS_REG);
//+1 if NXADR is set
if (op.NXADR)
x86e.Emit(op_add32,EAX,1);
//RBL warp around is here, according to docs, but that seems to cause _very_ bad results
// if (!op.TABLE)
// x86e.Emit(op_and32,EAX,dsp.RBL);
//MDEC_CT is added if !TABLE
if (!op.TABLE)
x86e.Emit(op_add32,EAX,&dsp.regs.MDEC_CT);
//RBL/RBP are constants for the program
//Apply RBL if !TABLE
//Else limit to 16 bit add
//*update* always limit to 16 bit add adter MDEC_CT ?
if (!op.TABLE)
x86e.Emit(op_and32,EAX,dsp.RBL);
else
x86e.Emit(op_and32,EAX,0xFFFF);
//Calculate the value !
//EAX*2 b/c it points to sample (16:1 of the address)
x86e.Emit(op_lea32,EDX,x86_mrm(EAX,sib_scale_2,x86_ptr::create(dsp.RBP)));
//Save the result to MEM_ADDR
x86e.Emit(op_mov32,&dsp.regs.MEM_ADDR,EDX);
}
wtn(MEM_ADDR);
}
//Reads : MEMS,MIXS,EXTS
//Writes : INPUTS (Wire)
void dsp_rec_INPUTS(x86_block& x86e,_INST& op,x86_gpr_reg INPUTS)
{
nwtn(MEMS);
//nwtn(MIXS); -> these are read only :)
//nwtn(EXTS);
//INPUTS is 24 bit, we convert everything to that
//Maby we dont need to convert, but just to sign extend ?
if(op.IRA<0x20)
{
x86e.Emit(op_mov32,INPUTS,&dsp.MEMS[op.IRA]);
dsp_rec_se(x86e,INPUTS,24);
}
else if(op.IRA<0x30)
{
x86e.Emit(op_mov32,INPUTS,&dsp.MIXS[op.IRA-0x20]);
dsp_rec_se(x86e,INPUTS,20,24);
}
else if(op.IRA<0x32)
{
x86e.Emit(op_mov32,ESI,&DSPData->EXTS[op.IRA-0x30]);
//x86e.Emit(op_shl32,INPUTS,8);
dsp_rec_se(x86e,INPUTS,16,24);
}
//Sign extend to 32 bits
//dsp_rec_se(x86e,INPUTS,24);
}
//Reads : MEM_RD_DATA,NO_FLT2
//Writes : MEMS
void dsp_rec_MEMS_WRITE(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg INPUTS)
{
nwtn(MEM_RD_DATA);
nwtn(NOFL_2);
//MEMS write reads from MEM_RD_DATA register (MEM_RD_DATA -> Converter -> MEMS).
//The converter's nofl flag has 2 steps delay (so that it can be set with the MRQ).
if (op.IWT)
{
x86e.Emit(op_movsx16to32,ECX,&dsp.regs.MEM_RD_DATA);
x86e.Emit(op_mov32,EAX,ECX);
//Pad and signed extend EAX
//x86e.Emit(op_shl32,EAX,16);
//x86e.Emit(op_sar32,EAX,8);
x86e.Emit(op_shl32,EAX,8);
if (SUPPORT_NOFL)
{
x86_Label* no_fl=x86e.CreateLabel(false,8);//no float conversions
//Do we have to convert ?
x86e.Emit(op_cmp32,&dsp.regs.NOFL_2,1);
x86e.Emit(op_je,no_fl);
{
//Convert !
x86e.Emit(op_call,x86_ptr_imm(UNPACK));
}
x86e.MarkLabel(no_fl);
}
x86e.Emit(op_mov32,&dsp.MEMS[op.IWA],EAX);
}
wtn(MEMS);
}
//Reads : MEM_RD_DATA_NV (Wire)
//Writes : MEM_RD_DATA
void dsp_rec_MEM_RD_DATA_WRITE(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg MEM_RD_DATA_NV)
{
//Request : step x (odd step)
//Operation : x+1 (even step)
//Data avail : x+2 (odd step, can request again)
//The MEM_RD_DATA_NV wire exists only on even steps
if (!(step&1))
{
x86e.Emit(op_mov32,&dsp.regs.MEM_RD_DATA,MEM_RD_DATA_NV);
}
wtn(MEM_RD_DATA);
}
x86_mrm_t dsp_reg_GenerateTempsAddrs(x86_block& x86e,u32 TEMPS_NUM,x86_gpr_reg TEMPSaddrsreg)
{
x86e.Emit(op_mov32,TEMPSaddrsreg,&dsp.regs.MDEC_CT);
x86e.Emit(op_add32,TEMPSaddrsreg,TEMPS_NUM);
x86e.Emit(op_and32,TEMPSaddrsreg,127);
return x86_mrm(ECX,sib_scale_4,dsp.TEMP);
}
//Reads : INPUTS,TEMP,FRC_REG,COEF,Y_REG
//Writes : MAD_OUT_NV (Wire)
void dsp_rec_MAD(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg INPUTS,x86_gpr_reg MAD_OUT_NV)
{
bool use_TEMP=op.XSEL==0 || (op.BSEL==0 && op.ZERO==0);
//TEMPS (if used) on ECX
const x86_gpr_reg TEMPS_reg=ECX;
if (use_TEMP)
{
//read temps
x86e.Emit(op_mov32,TEMPS_reg,dsp_reg_GenerateTempsAddrs(x86e,op.TRA,TEMPS_reg));
dsp_rec_se(x86e,TEMPS_reg,24);
}
x86_reg mul_x_input;
//X : 24 bits
if (op.XSEL==1)
{
//X=INPUTS
mul_x_input=INPUTS;
//x86e.Emit(op_mov32,EDX,INPUTS);
}
else
{
//X=TEMPS
mul_x_input=TEMPS_reg;
//x86e.Emit(op_mov32,EDX,TEMPS_reg);
}
//MUL Y in : EAX
//Y : 13 bits
switch(op.YSEL)
{
case 0:
//Y=FRC_REG[13]
x86e.Emit(op_mov32,EAX,&dsp.regs.FRC_REG);
dsp_rec_se(x86e,EAX,13);
break;
case 1:
//Y=COEF[13]
x86e.Emit(op_mov32,EAX,&DSPData->COEF[step]);
dsp_rec_se(x86e,EAX,16,13);
break;
case 2:
//Y=Y_REG[23:11] (Y_REG is 19 bits, INPUTS[23:4], so that is realy 19:7)
x86e.Emit(op_mov32,EAX,&dsp.regs.Y_REG);
dsp_rec_se(x86e,EAX,19,13);
break;
case 3:
//Y=0'Y_REG[15:4] (Y_REG is 19 bits, INPUTS[23:4], so that is realy 11:0)
x86e.Emit(op_mov32,EAX,&dsp.regs.Y_REG);
x86e.Emit(op_and32,0xFFF);//Clear bit 13+
break;
}
//Do the mul -- maby it has overflow protection ?
//24+13=37, -11 = 26
//that can be >>1 or >>2 on the shifter after the mul
x86e.Emit(op_imul32,mul_x_input);
//*NOTE* here, shrd is unsigned, but we have EDX signed, and we may only shift up to 11 bits from it
//so it works just fine :)
x86e.Emit(op_shrd32,EAX,EDX,10);
//cut the upper bits so that it is 26 bits signed
dsp_rec_se(x86e,EAX,26);
//Adder, takes MUL_OUT at EAX
//Adds B (EDX)
//Outputs EAX
if (!op.ZERO) //if zero is set the adder has no effect
{
if (op.BSEL==1)
{
//B=MAD_OUT[??]
//mad out is stored on s32 format, so no need for sign extension
x86e.Emit(op_mov32,EDX,&dsp.regs.MAD_OUT);
}
else
{
//B=TEMP[??]
//TEMPS is already sign extended, so no need for it
//Just converting 24 -> 26 bits using lea
x86e.Emit(op_lea32,EDX,x86_mrm(TEMPS_reg,sib_scale_4,0));
}
//Gating is applied here normally (ZERO).
//NEGB then inverts the value (NOT) (or 0 , if gated) and the adder adds +1 if NEGB is set.
//However, (~X)+1 = -X , and (~0)+1=0 so i skip the add
if (op.NEGB)
{
x86e.Emit(op_neg32,EDX);
}
//Add hm, is there overflow protection here ?
//The result of mul is on EAX, we modify that
x86e.Emit(op_add32,EAX,EDX);
}
//cut the upper bits so that it is 26 bits signed
dsp_rec_se(x86e,EAX,26);
//Write to MAD_OUT_NV wire :)
x86e.Emit(op_mov32,MAD_OUT_NV,EAX);
}
//Reads : INPUTS,MAD_OUT
//Writes : EFREG,TEMP,FRC_REG,ADRS_REG,MEM_WT_DATA
void dsp_rec_EFO_FB(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg INPUTS)
{
nwtn(MAD_OUT);
//MAD_OUT is s32, no sign extension needed
x86e.Emit(op_mov32,EAX,&dsp.regs.MAD_OUT);
//sh .. l ?
switch(op.SHIFT)
{
case 0:
x86e.Emit(op_sar32,EAX,2);
//×1 Protected
x86e.Emit(op_mov32,EDX,(u32)-524288);//8388608//32768//524288
x86e.Emit(op_cmp32,EAX,EDX);
x86e.Emit(op_cmovl32,EAX,EDX);
x86e.Emit(op_neg32,EDX);
x86e.Emit(op_cmp32,EAX,EDX);
x86e.Emit(op_cmovg32,EAX,EDX);
//protect !
break;
case 1:
//×2 Protected
x86e.Emit(op_sar32,EAX,1);
x86e.Emit(op_mov32,EDX,(u32)-524288);//8388608//32768//524288
x86e.Emit(op_cmp32,EAX,EDX);
x86e.Emit(op_cmovl32,EAX,EDX);
x86e.Emit(op_not32,EDX);
x86e.Emit(op_cmp32,EAX,EDX);
x86e.Emit(op_cmovg32,EAX,EDX);
//protect !
break;
case 2:
//×2 Not protected
x86e.Emit(op_sar32,EAX,1);
dsp_rec_se(x86e,EAX,24);
break;
case 3:
//×1 Not protected
x86e.Emit(op_sar32,EAX,1);
x86e.Emit(op_shl32,EAX,2);
dsp_rec_se(x86e,EAX,24);
break;
}
//Write EFREG ?
if (op.EWT)
{
x86e.Emit(op_mov32,EDX,EAX);
//top 16 bits ? or lower 16 ?
//i use top 16, following the same rule as the input
x86e.Emit(op_sar32,EDX,4);
//write :)
x86e.Emit(op_mov16,&DSPData->EFREG[op.EWA],DX);
}
//Write TEMPS ?
if (op.TWT)
{
//Temps is 24 bit, stored as s32 (no conversion required)
//write it
x86e.Emit(op_mov32,dsp_reg_GenerateTempsAddrs(x86e,op.TWA,ECX),EAX);
}
//COMMON TO FRC_REG and ADRS_REG
//interpolation mode : shift1=1=shift0
//non interpolation : shift1!=1 && shift0!=1 ? ( why && ?) -- i implement it as ||
//Write to FRC_REG ?
if (op.FRCL)
{
if (op.SHIFT==3)
{
//FRC_REG[12:0]=Shift[23:11]
x86e.Emit(op_mov32,ECX,EAX);
x86e.Emit(op_sar32,ECX,11);
}
else
{
//FRC_REG[12:0]=0'Shift[11:0]
x86e.Emit(op_mov32,ECX,EAX);
x86e.Emit(op_and32,ECX,(1<<12)-1);//bit 12 and up are 0'd
}
x86e.Emit(op_mov32,&dsp.regs.FRC_REG,ECX);
}
//Write to ADDRS_REG ?
if (op.ADRL)
{
if (op.SHIFT==3)
{
//ADRS_REG[11:0]=Shift[23,23,23,23,23,22:16]
x86e.Emit(op_mov32,ECX,EAX);
x86e.Emit(op_shl32,ECX,8); //bit31=bit 23
x86e.Emit(op_sar32,ECX,24); //bit 0 = bit16 (16+8=24)
}
else
{
//ADRS_REG[11:0]=0'Shift[23:12]
x86e.Emit(op_mov32,ECX,EAX);
x86e.Emit(op_sar32,ECX,12);
x86e.Emit(op_and32,ECX,(1<<12)-1);//bit 11 and up are 0'd
}
x86e.Emit(op_mov32,&dsp.regs.ADRS_REG,ECX);
}
//MEM_WT_DATA write
//This kills off any non protected regs (EAX,EDX,ECX)
{
//pack ?
if (!op.NOFL && SUPPORT_NOFL)
{ //yes
x86e.Emit(op_mov32,ECX,EAX);
x86e.Emit(op_call,x86_ptr_imm(PACK));
}
else
{ //shift (look @ EFREG write for more info)
x86e.Emit(op_sar32,EAX,8);
}
//data in on EAX
x86e.Emit(op_mov32,&dsp.regs.MEM_WT_DATA,EAX);
}
//more stuff here
wtn(EFREG);
wtn(TEMPS);
wtn(FRC_REG);
wtn(ADRS_REG);
wtn(MEM_WT_DATA);
}
void dsp_recompile()
{
dsp.dyndirty=false;
x86_block x86e;
x86e.Init(dyna_realloc,dyna_realloc);
x86e.Emit(op_push32,EBX);
x86e.Emit(op_push32,EBP);
x86e.Emit(op_push32,ESI);
x86e.Emit(op_push32,EDI);
//OK.
//Input comes from mems, mixs and exts, as well as possible memory reads and writes
//mems is read/write (memory loads go there), mixs and exts are read only.
//There are various delays (registers) so i need to properly emulate (more on that later)
//Registers that can be written : MIXS,FRC_REG,ADRS_REG,EFREG,TEMP
//MRD, MWT, NOFL, TABLE, NXADR, ADREB, and MASA[4:0]
//Only allowed on odd steps, when counting from 1 (2,4,6, ...).That is even steps when counting from 0 (1,3,5, ...)
for(int step=0;step<128;++step)
{
u32* mpro=DSPData->MPRO+step*4;
u32 prev_step=(step-1)&127;
u32* prev_mpro=DSPData->MPRO+prev_step*4;
//if its a nop just go to the next opcode
//No, don't really do that, we need to propage opcode bits :p
//if (mpro[0]==0 && mpro[1]==0 && mpro[2]== 0 && mpro[3]==0)
// continue;
_INST op;
_INST prev_op;
DecodeInst(mpro,&op);
DecodeInst(prev_mpro,&prev_op);
//printf("[%d] "
// "TRA %d,TWT %d,TWA %d,XSEL %d,YSEL %d,IRA %d,IWT %d,IWA %d,TABLE %d,MWT %d,MRD %d,EWT %d,EWA %d,ADRL %d,FRCL %d,SHIFT %d,YRL %d,NEGB %d,ZERO %d,BSEL %d,NOFL %d,MASA %d,ADREB %d,NXADR %d\n"
// ,step
// ,op.TRA,op.TWT,op.TWA,op.XSEL,op.YSEL,op.IRA,op.IWT,op.IWA,op.TABLE,op.MWT,op.MRD,op.EWT,op.EWA,op.ADRL,op.FRCL,op.SHIFT,op.YRL,op.NEGB,op.ZERO,op.BSEL,op.NOFL,op.MASA,op.ADREB,op.NXADR);
//Dynarec !
_dsp_debug_step_start();
//DSP regs are on memory
//Wires stay on x86 regs, written to memory as fast as possible
//EDI=MEM_RD_DATA_NV
dsp_rec_DRAM_CI(x86e,prev_op,step,EDI);
//;)
//Address Generation Unit ! nothing spectacular really ...
dsp_rec_MEM_AGU(x86e,op,step);
//Calculate INPUTS wire
//ESI : INPUTS
dsp_rec_INPUTS(x86e,op,ESI);
//:o ?
//Write the MEMS register
dsp_rec_MEMS_WRITE(x86e,op,step,ESI);
//Write the MEM_RD_DATA regiter
//Last use of MEM_RD_DATA_NV(EDI)
dsp_rec_MEM_RD_DATA_WRITE(x86e,op,step,EDI);
//EDI is now free :D
//EDI is used for MAD_OUT_NV
//Mul-add
dsp_rec_MAD(x86e,op,step,ESI,EDI);
//Effect output/ Feedback
dsp_rec_EFO_FB(x86e,op,step,ESI);
//Write MAD_OUT_NV
{
x86e.Emit(op_mov32,&dsp.regs.MAD_OUT,EDI);
wtn(MAD_OUT);
}
//These are implemented here :p
//Inputs -> Y reg
//Last use of inputs (ESI) and its destructive at that ;p
{
if (op.YRL)
{
x86e.Emit(op_sar32,ESI,4);//[23:4]
x86e.Emit(op_mov32,&dsp.regs.Y_REG,ESI);
}
wtn(Y_REG);
}
//NOFL delay propagation :)
{
//NOFL_2=NOFL_1
x86e.Emit(op_mov32,EAX,&dsp.regs.NOFL_1);
x86e.Emit(op_mov32,&dsp.regs.NOFL_2,EAX);
//NOFL_1 = NOFL
x86e.Emit(op_mov32,&dsp.regs.NOFL_1,op.NOFL);
wtn(NOFL_2);
wtn(NOFL_1);
}
//MWT_1/MRD_1 propagation
{
//MWT_1=MWT
x86e.Emit(op_mov32,&dsp.regs.MWT_1,op.MWT);
//MRD_1=MRD
x86e.Emit(op_mov32,&dsp.regs.MRD_1,op.MRD);
wtn(MWT_1);
wtn(MRD_1);
}
_dsp_debug_step_end();
}
//Need to decrement MDEC_CT here :)
x86e.Emit(op_pop32,EDI);
x86e.Emit(op_pop32,ESI);
x86e.Emit(op_pop32,EBP);
x86e.Emit(op_pop32,EBX);
x86e.Emit(op_ret);
x86e.Generate();
}
void dsp_print_mame();
void dsp_step_mame();
void dsp_emu_grandia();
void dsp_step()
{
//clear output reg
memset(DSPData->EFREG,0,sizeof(DSPData->EFREG));
if (dsp.dyndirty)
{
dsp.dyndirty=false;
//dsp_print_mame();
dsp_recompile();
}
//dsp_step_mame();
//dsp_emu_grandia();
//run the code :p
((void (*)())&dsp.DynCode)();
dsp.regs.MDEC_CT--;
if (dsp.regs.MDEC_CT==0)
dsp.regs.MDEC_CT=dsp.RBL;
//here ? or before ?
//memset(DSP->MIXS,0,4*16);
}
void dsp_writenmem(u32 addr)
{
addr-=0x3000;
//COEF : native
//MEMS : native
//MPRO : native
if (addr>=0x400 && addr<0xC00)
{
dsp.dyndirty=true;
}
/*
//buffered DSP state
//24 bit wide
u32 TEMP[128];
//24 bit wide
u32 MEMS[32];
//20 bit wide
s32 MIXS[16];
*/
}
void dsp_readmem(u32 addr)
{
//nothing ? :p
}
void dsp_term()
{
}
#endif