DSP LLE: Remove byteswaps and other bloat from the hot path (do them at load/dma time). Minor speedup, perhaps.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@2939 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2009-04-08 21:46:54 +00:00
parent e5edb4c4ad
commit 8c861ad58d
13 changed files with 65 additions and 77 deletions

View File

@ -32,7 +32,7 @@ namespace DSPInterpreter {
void unknown(const UDSPInstruction& opc)
{
//_assert_msg_(MASTER_LOG, !g_dsp.exception_in_progress_hack, "assert while exception");
ERROR_LOG(DSPLLE, "LLE: Unrecognized opcode 0x%04x, pc 0x%04x", opc.hex, g_dsp.err_pc);
ERROR_LOG(DSPLLE, "LLE: Unrecognized opcode 0x%04x, pc 0x%04x", opc.hex, g_dsp.pc);
}
// test register and updates SR accordingly
@ -158,7 +158,7 @@ void rti(const UDSPInstruction& opc)
void halt(const UDSPInstruction& opc)
{
g_dsp.cr |= 0x4;
g_dsp.pc = g_dsp.err_pc;
g_dsp.pc--;
}
@ -1688,7 +1688,7 @@ void msubc(const UDSPInstruction& opc)
void srs(const UDSPInstruction& opc)
{
u8 reg = ((opc.hex >> 8) & 0x7) + 0x18;
u16 addr = (s8)opc.hex;
u16 addr = (u16)(s16)(s8)opc.hex;
dsp_dmem_write(addr, g_dsp.r[reg]);
}
@ -1701,7 +1701,7 @@ void srs(const UDSPInstruction& opc)
void lrs(const UDSPInstruction& opc)
{
u8 reg = ((opc.hex >> 8) & 0x7) + 0x18;
u16 addr = (s8) opc.hex;
u16 addr = (u16)(s16)(s8)opc.hex;
g_dsp.r[reg] = dsp_dmem_read(addr);
}

View File

@ -25,6 +25,8 @@
namespace DSPInterpreter {
typedef void (*DSPInterpreterFunc)(const UDSPInstruction& opc);
void unknown(const UDSPInstruction& opc);
void call(const UDSPInstruction& opc);
void callr(const UDSPInstruction& opc);

View File

@ -36,6 +36,5 @@ u32 Memory_Read_U32(u32 _uAddress);
void ProfilerStart();
#endif
#endif

View File

@ -102,7 +102,7 @@ bool DumpCWCode(u32 _Address, u32 _Length)
{
for (size_t i = _Address; i < _Address + _Length; i++)
{
u16 val = Common::swap16(g_dsp.iram[i]);
u16 val = g_dsp.iram[i];
fprintf(pFile, " cw 0x%04x \n", val);
}

View File

@ -341,7 +341,7 @@ char* gd_dis_opcode(gd_globals_t* gdg)
}
pc &= 0x0fff;
op1 = Common::swap16(gdg->binbuf[pc]);
op1 = gdg->binbuf[pc];
// find opcode
for (j = 0; j < opcodes_size; j++)

View File

@ -68,6 +68,8 @@ void Update_SR_Register16(s16 _Value)
}
}
// If this always returns 1, Hermes' demo sounds better.
// However, most AX games are negatively affected.
int GetMultiplyModifier()
{
if (g_dsp.r[DSP_REG_SR] & (1 << 13))

View File

@ -90,7 +90,7 @@ void gdsp_mbox_write_l(u8 mbx, u16 val)
if (mbx == GDSP_MBOX_DSP)
{
DEBUG_LOG(DSPLLE, " - DSP writes mail to mbx %i: 0x%08x (pc=0x%04x)", mbx, gdsp_mbox_peek(GDSP_MBOX_DSP), g_dsp.err_pc);
DEBUG_LOG(DSPLLE, " - DSP writes mail to mbx %i: 0x%08x (pc=0x%04x)", mbx, gdsp_mbox_peek(GDSP_MBOX_DSP), g_dsp.pc);
}
}
@ -110,7 +110,7 @@ u16 gdsp_mbox_read_l(u8 mbx)
val = gdsp_mbox[mbx][1];
gdsp_mbox[mbx][0] &= ~0x8000;
DEBUG_LOG(DSPLLE, "- DSP reads mail from mbx %i: %08x (pc=0x%04x)", mbx, gdsp_mbox_peek(mbx), g_dsp.err_pc);
DEBUG_LOG(DSPLLE, "- DSP reads mail from mbx %i: %08x (pc=0x%04x)", mbx, gdsp_mbox_peek(mbx), g_dsp.pc);
if (g_dspInitialize.bOnThread)
g_CriticalSection.Leave();
@ -127,7 +127,6 @@ void gdsp_ifx_write(u16 addr, u16 val)
case 0xfb: // DIRQ
if (val & 0x1)
g_dsp.irq_request();
break;
case 0xfc: // DMBH
@ -211,7 +210,7 @@ void gdsp_idma_in(u16 dsp_addr, u32 addr, u32 size)
for (u32 i = 0; i < size; i += 2)
{
// TODO : this may be different on Wii.
*(u16*)&dst[dsp_addr + i] = *(u16*)&g_dsp.cpu_ram[(addr + i) & 0x0fffffff];
*(u16*)&dst[dsp_addr + i] = Common::swap16(*(u16*)&g_dsp.cpu_ram[(addr + i) & 0x0fffffff]);
}
WriteProtectMemory(g_dsp.iram, DSP_IRAM_BYTE_SIZE, false);
@ -242,7 +241,7 @@ void gdsp_ddma_in(u16 dsp_addr, u32 addr, u32 size)
for (u32 i = 0; i < size; i += 2)
{
*(u16*)&dst[dsp_addr + i] = *(u16*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF];
*(u16*)&dst[dsp_addr + i] = Common::swap16(*(u16*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF]);
}
INFO_LOG(DSPLLE, "*** ddma_in RAM (0x%08x) -> DRAM_DSP (0x%04x) : size (0x%08x)\n", addr, dsp_addr / 2, size);
@ -261,7 +260,7 @@ void gdsp_ddma_out(u16 dsp_addr, u32 addr, u32 size)
for (u32 i = 0; i < size; i += 2)
{
*(u16*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF] = *(u16*)&src[dsp_addr + i];
*(u16*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF] = Common::swap16(*(u16*)&src[dsp_addr + i]);
}
INFO_LOG(DSPLLE, "*** ddma_out DRAM_DSP (0x%04x) -> RAM (0x%08x) : size (0x%08x)\n", dsp_addr / 2, addr, size);

View File

@ -53,17 +53,17 @@
extern u16 gdsp_ifx_regs[256];
u32 gdsp_mbox_peek(u8 mbx);
void gdsp_mbox_write_h(u8 mbx, u16 val);
void gdsp_mbox_write_l(u8 mbx, u16 val);
void gdsp_mbox_write_h(u8 mbx, u16 val);
void gdsp_mbox_write_l(u8 mbx, u16 val);
u16 gdsp_mbox_read_h(u8 mbx);
u16 gdsp_mbox_read_l(u8 mbx);
void gdsp_ifx_init();
void gdsp_ifx_init();
void gdsp_ifx_write(u16 addr, u16 val);
u16 gdsp_ifx_read(u16 addr);
u16 gdsp_ifx_read(u16 addr);
void gdsp_idma_in(u16 dsp_addr, u32 addr, u32 size);
void gdsp_idma_in(u16 dsp_addr, u32 addr, u32 size);
#endif

View File

@ -89,7 +89,7 @@ void gdsp_init()
// Just zero out DRAM.
for (int i = 0; i < DSP_DRAM_SIZE; i++)
{
g_dsp.dram[i] = 0x0021;
g_dsp.dram[i] = 0x2100;
}
// copied from a real console after the custom UCode has been loaded
@ -145,6 +145,11 @@ bool gdsp_load_irom(const char *fname)
return false;
}
fclose(pFile);
// Byteswap the rom.
for (int i = 0; i < DSP_IROM_SIZE; i++)
g_dsp.irom[i] = Common::swap16(g_dsp.irom[i]);
return true;
}
// Always keep IROM write protected.
@ -166,6 +171,9 @@ bool gdsp_load_coef(const char *fname)
return false;
}
fclose(pFile);
// Byteswap the rom.
for (int i = 0; i < DSP_IROM_SIZE; i++)
g_dsp.coef[i] = Common::swap16(g_dsp.coef[i]);
return true;
}
// Always keep COEF write protected. We unprotect only when DMA-ing
@ -213,9 +221,9 @@ void gdsp_step()
{
g_dsp.step_counter++;
#if PROFILE
g_dsp.err_pc = g_dsp.pc;
#if PROFILE
ProfilerAddDelta(g_dsp.err_pc, 1);
if (g_dsp.step_counter == 1)
{
@ -226,7 +234,6 @@ void gdsp_step()
{
ProfilerDump(g_dsp.step_counter);
}
#endif
u16 opc = dsp_fetch_code();
@ -334,8 +341,6 @@ void gdsp_run_cycles(int cycles)
// idle loop and if so we waste some time here. Might be beneficial to slice even further.
while (cycles > 0)
{
if (cr_halt)
return;
gdsp_step();
cycles--;
// We don't bother directly supporting pause - if the main emu pauses,

View File

@ -75,7 +75,9 @@ struct SDSP
{
u16 r[32];
u16 pc;
#if PROFILE
u16 err_pc;
#endif
u16* iram;
u16* dram;
u16* irom;

View File

@ -30,19 +30,14 @@
#include "gdsp_memory.h"
#include "gdsp_interface.h"
u16 dsp_swap16(u16 x)
{
return (x >> 8) | (x << 8);
}
u16 dsp_imem_read(u16 addr)
{
switch (addr >> 12)
{
case 0:
return dsp_swap16(g_dsp.iram[addr & DSP_IRAM_MASK]);
return g_dsp.iram[addr & DSP_IRAM_MASK];
case 8:
return dsp_swap16(g_dsp.irom[addr & DSP_IROM_MASK]);
return g_dsp.irom[addr & DSP_IROM_MASK];
default:
ERROR_LOG(DSPLLE, "%04x DSP ERROR: Executing from invalid (%04x) memory", g_dsp.pc, addr);
return 0;
@ -54,10 +49,10 @@ u16 dsp_dmem_read(u16 addr)
switch (addr >> 12)
{
case 0x0: // 0xxx DRAM
return dsp_swap16(g_dsp.dram[addr & DSP_DRAM_MASK]);
return g_dsp.dram[addr & DSP_DRAM_MASK];
case 0x1: // 1xxx COEF
return dsp_swap16(g_dsp.coef[addr & DSP_COEF_MASK]);
return g_dsp.coef[addr & DSP_COEF_MASK];
// FIXME: unknown addresses used by zelda
/* case 0x2:
@ -74,37 +69,24 @@ u16 dsp_dmem_read(u16 addr)
}
}
void dsp_dmem_write(u16 addr, u16 val)
{
switch (addr >> 12)
{
case 0x0: // 0xxx DRAM
g_dsp.dram[addr & DSP_DRAM_MASK] = dsp_swap16(val);
break;
case 0x0: // 0xxx DRAM
g_dsp.dram[addr & DSP_DRAM_MASK] = val;
break;
case 0x1: // 1xxx COEF
ERROR_LOG(DSPLLE, "someone writes to COEF");
break;
case 0xf: // Fxxx HW regs
gdsp_ifx_write(addr, val);
break;
default: // error
ERROR_LOG(DSPLLE, "%04x DSP ERROR: Write to UNKNOWN (%04x) memory", g_dsp.pc, addr);
break;
case 0x1: // 1xxx COEF
ERROR_LOG(DSPLLE, "someone writes to COEF (pc = %02x)", g_dsp.pc);
break;
case 0xf: // Fxxx HW regs
gdsp_ifx_write(addr, val);
break;
default: // error
ERROR_LOG(DSPLLE, "%04x DSP ERROR: Write to UNKNOWN (%04x) memory", g_dsp.pc, addr);
break;
}
}
u16 dsp_fetch_code()
{
u16 opc = dsp_imem_read(g_dsp.pc);
g_dsp.pc++;
return opc;
}
u16 dsp_peek_code()
{
return dsp_imem_read(g_dsp.pc);
}
}

View File

@ -27,10 +27,22 @@
#include "Globals.h"
u16 dsp_fetch_code();
u16 dsp_peek_code();
#include "gdsp_interpreter.h"
u16 dsp_imem_read(u16 addr);
void dsp_dmem_write(u16 addr, u16 val);
u16 dsp_dmem_read(u16 addr);
inline u16 dsp_fetch_code()
{
u16 opc = dsp_imem_read(g_dsp.pc);
g_dsp.pc++;
return opc;
}
inline u16 dsp_peek_code()
{
return dsp_imem_read(g_dsp.pc);
}
#endif

View File

@ -49,9 +49,6 @@ SoundStream *soundStream = NULL;
#define GDSP_MBOX_CPU 0
#define GDSP_MBOX_DSP 1
extern u32 m_addressPBs;
bool AXTask(u32& _uMail);
bool bCanWork = false;
bool bIsRunning = false;
@ -312,18 +309,6 @@ void DSP_WriteMailboxLow(bool _CPUMailbox, u16 _uLowMail)
{
gdsp_mbox_write_l(GDSP_MBOX_CPU, _uLowMail);
u32 uAddress = gdsp_mbox_peek(GDSP_MBOX_CPU);
u16 errpc = g_dsp.err_pc;
DEBUG_LOG(DSPLLE, "CPU writes mail to mbx 0: 0x%08x (pc=0x%04x)\n", uAddress, errpc);
// I couldn't find any better way to detect the AX mails so this had to
// do. Please feel free to change it.
if ((errpc == 0x0054 || errpc == 0x0055) && m_addressPBs == 0)
{
DEBUG_LOG(DSPLLE, "AXTask ======== 0x%08x (pc=0x%04x)", uAddress, errpc);
AXTask(uAddress);
}
}
else
{