Merge pull request #6077 from leoetlino/dsp-fixes

Small DSP accelerator fixes
This commit is contained in:
Leo Lam 2017-09-25 00:02:43 +02:00 committed by GitHub
commit 38a8d04c35
15 changed files with 578 additions and 187 deletions

View File

@ -4,77 +4,65 @@
#include "Core/DSP/DSPAccelerator.h" #include "Core/DSP/DSPAccelerator.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/Logging/Log.h" #include "Common/Logging/Log.h"
#include "Common/MathUtil.h" #include "Common/MathUtil.h"
#include "Core/DSP/DSPCore.h"
#include "Core/DSP/DSPHWInterface.h"
#include "Core/DSP/DSPHost.h"
namespace DSP namespace DSP
{ {
u16 dsp_read_aram_d3() u16 Accelerator::ReadD3()
{ {
// Zelda ucode reads ARAM through 0xffd3.
const u32 EndAddress = (g_dsp.ifx_regs[DSP_ACEAH] << 16) | g_dsp.ifx_regs[DSP_ACEAL];
u32 Address = (g_dsp.ifx_regs[DSP_ACCAH] << 16) | g_dsp.ifx_regs[DSP_ACCAL];
u16 val = 0; u16 val = 0;
switch (g_dsp.ifx_regs[DSP_FORMAT]) switch (m_sample_format)
{ {
case 0x5: // u8 reads case 0x5: // u8 reads
val = Host::ReadHostMemory(Address); val = ReadMemory(m_current_address);
Address++; m_current_address++;
break; break;
case 0x6: // u16 reads case 0x6: // u16 reads
val = (Host::ReadHostMemory(Address * 2) << 8) | Host::ReadHostMemory(Address * 2 + 1); val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1);
Address++; m_current_address++;
break; break;
default: default:
ERROR_LOG(DSPLLE, "dsp_read_aram_d3() - unknown format 0x%x", g_dsp.ifx_regs[DSP_FORMAT]); ERROR_LOG(DSPLLE, "dsp_read_aram_d3() - unknown format 0x%x", m_sample_format);
break; break;
} }
if (Address >= EndAddress) if (m_current_address >= m_end_address)
{ {
// Set address back to start address. (never seen this here!) // Set address back to start address. (never seen this here!)
Address = (g_dsp.ifx_regs[DSP_ACSAH] << 16) | g_dsp.ifx_regs[DSP_ACSAL]; m_current_address = m_start_address;
} }
g_dsp.ifx_regs[DSP_ACCAH] = Address >> 16;
g_dsp.ifx_regs[DSP_ACCAL] = Address & 0xffff;
return val; return val;
} }
void dsp_write_aram_d3(u16 value) void Accelerator::WriteD3(u16 value)
{ {
// Zelda ucode writes a bunch of zeros to ARAM through d3 during // Zelda ucode writes a bunch of zeros to ARAM through d3 during
// initialization. Don't know if it ever does it later, too. // initialization. Don't know if it ever does it later, too.
// Pikmin 2 Wii writes non-stop to 0x10008000-0x1000801f (non-zero values too) // Pikmin 2 Wii writes non-stop to 0x10008000-0x1000801f (non-zero values too)
// Zelda TP Wii writes non-stop to 0x10000000-0x1000001f (non-zero values too) // Zelda TP Wii writes non-stop to 0x10000000-0x1000001f (non-zero values too)
u32 Address = (g_dsp.ifx_regs[DSP_ACCAH] << 16) | g_dsp.ifx_regs[DSP_ACCAL];
switch (g_dsp.ifx_regs[DSP_FORMAT]) switch (m_sample_format)
{ {
case 0xA: // u16 writes case 0xA: // u16 writes
Host::WriteHostMemory(value >> 8, Address * 2); WriteMemory(m_current_address * 2, value >> 8);
Host::WriteHostMemory(value & 0xFF, Address * 2 + 1); WriteMemory(m_current_address * 2 + 1, value & 0xFF);
Address++; m_current_address++;
break; break;
default: default:
ERROR_LOG(DSPLLE, "dsp_write_aram_d3() - unknown format 0x%x", g_dsp.ifx_regs[DSP_FORMAT]); ERROR_LOG(DSPLLE, "dsp_write_aram_d3() - unknown format 0x%x", m_sample_format);
break; break;
} }
g_dsp.ifx_regs[DSP_ACCAH] = Address >> 16;
g_dsp.ifx_regs[DSP_ACCAL] = Address & 0xffff;
} }
u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u16 sample_format, u16 Accelerator::Read(s16* coefs)
s16* yn1, s16* yn2, u16* pred_scale, s16* coefs,
std::function<void()> end_exception)
{ {
if (m_reads_stopped)
return 0x0000;
u16 val; u16 val;
u8 step_size_bytes = 0; u8 step_size_bytes = 0;
@ -84,69 +72,69 @@ u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u1
// extension and do/do not use ADPCM. It also remains to be figured out // extension and do/do not use ADPCM. It also remains to be figured out
// whether there's a difference between the usual accelerator "read // whether there's a difference between the usual accelerator "read
// address" and 0xd3. // address" and 0xd3.
switch (sample_format) switch (m_sample_format)
{ {
case 0x00: // ADPCM audio case 0x00: // ADPCM audio
{ {
// ADPCM decoding, not much to explain here. int scale = 1 << (m_pred_scale & 0xF);
if ((*current_address & 15) == 0) int coef_idx = (m_pred_scale >> 4) & 0x7;
{
*pred_scale = Host::ReadHostMemory((*current_address & ~15) >> 1);
*current_address += 2;
}
switch (end_address & 15)
{
case 0: // Tom and Jerry
step_size_bytes = 1;
break;
case 1: // Blazing Angels
step_size_bytes = 0;
break;
default:
step_size_bytes = 2;
break;
}
int scale = 1 << (*pred_scale & 0xF);
int coef_idx = (*pred_scale >> 4) & 0x7;
s32 coef1 = coefs[coef_idx * 2 + 0]; s32 coef1 = coefs[coef_idx * 2 + 0];
s32 coef2 = coefs[coef_idx * 2 + 1]; s32 coef2 = coefs[coef_idx * 2 + 1];
int temp = (*current_address & 1) ? (Host::ReadHostMemory(*current_address >> 1) & 0xF) : int temp = (m_current_address & 1) ? (ReadMemory(m_current_address >> 1) & 0xF) :
(Host::ReadHostMemory(*current_address >> 1) >> 4); (ReadMemory(m_current_address >> 1) >> 4);
if (temp >= 8) if (temp >= 8)
temp -= 16; temp -= 16;
s32 val32 = (scale * temp) + ((0x400 + coef1 * *yn1 + coef2 * *yn2) >> 11); s32 val32 = (scale * temp) + ((0x400 + coef1 * m_yn1 + coef2 * m_yn2) >> 11);
val = static_cast<s16>(MathUtil::Clamp<s32>(val32, -0x7FFF, 0x7FFF)); val = static_cast<s16>(MathUtil::Clamp<s32>(val32, -0x7FFF, 0x7FFF));
step_size_bytes = 2;
*yn2 = *yn1; m_yn2 = m_yn1;
*yn1 = val; m_yn1 = val;
*current_address += 1; m_current_address += 1;
// These two cases are handled in a special way, separate from normal overflow handling:
// the ACCOV exception does not fire at all, the predscale register is not updated,
// and if the end address is 16-byte aligned, the DSP loops to start_address + 1
// instead of start_address.
if ((m_end_address & 0xf) == 0x0 && m_current_address == m_end_address)
{
m_current_address = m_start_address + 1;
}
else if ((m_end_address & 0xf) == 0x1 && m_current_address == m_end_address - 1)
{
m_current_address = m_start_address;
}
// If any of these special cases were hit, the DSP does not update the predscale register.
else if ((m_current_address & 15) == 0)
{
m_pred_scale = ReadMemory((m_current_address & ~15) >> 1);
m_current_address += 2;
step_size_bytes += 2;
}
break; break;
} }
case 0x0A: // 16-bit PCM audio case 0x0A: // 16-bit PCM audio
val = (Host::ReadHostMemory(*current_address * 2) << 8) | val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1);
Host::ReadHostMemory(*current_address * 2 + 1); m_yn2 = m_yn1;
*yn2 = *yn1; m_yn1 = val;
*yn1 = val;
step_size_bytes = 2; step_size_bytes = 2;
*current_address += 1; m_current_address += 1;
break; break;
case 0x19: // 8-bit PCM audio case 0x19: // 8-bit PCM audio
val = Host::ReadHostMemory(*current_address) << 8; val = ReadMemory(m_current_address) << 8;
*yn2 = *yn1; m_yn2 = m_yn1;
*yn1 = val; m_yn1 = val;
step_size_bytes = 2; step_size_bytes = 2;
*current_address += 1; m_current_address += 1;
break; break;
default: default:
ERROR_LOG(DSPLLE, "dsp_read_accelerator() - unknown format 0x%x", g_dsp.ifx_regs[DSP_FORMAT]); ERROR_LOG(DSPLLE, "dsp_read_accelerator() - unknown format 0x%x", m_sample_format);
step_size_bytes = 2; step_size_bytes = 2;
*current_address += 1; m_current_address += 1;
val = 0; val = 0;
break; break;
} }
@ -160,30 +148,66 @@ u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u1
// Somehow, YN1 and YN2 must be initialized with their "loop" values, // Somehow, YN1 and YN2 must be initialized with their "loop" values,
// so yeah, it seems likely that we should raise an exception to let // so yeah, it seems likely that we should raise an exception to let
// the DSP program do that, at least if DSP_FORMAT == 0x0A. // the DSP program do that, at least if DSP_FORMAT == 0x0A.
if (*current_address == (end_address + step_size_bytes - 1)) if (m_current_address == (m_end_address + step_size_bytes - 1))
{ {
// Set address back to start address. // Set address back to start address.
*current_address = start_address; m_current_address = m_start_address;
end_exception(); m_reads_stopped = true;
OnEndException();
} }
SetCurrentAddress(m_current_address);
return val; return val;
} }
u16 dsp_read_accelerator() void Accelerator::DoState(PointerWrap& p)
{ {
const u32 start_address = (g_dsp.ifx_regs[DSP_ACSAH] << 16) | g_dsp.ifx_regs[DSP_ACSAL]; p.Do(m_start_address);
const u32 end_address = (g_dsp.ifx_regs[DSP_ACEAH] << 16) | g_dsp.ifx_regs[DSP_ACEAL]; p.Do(m_end_address);
u32 current_address = (g_dsp.ifx_regs[DSP_ACCAH] << 16) | g_dsp.ifx_regs[DSP_ACCAL]; p.Do(m_current_address);
p.Do(m_sample_format);
p.Do(m_yn1);
p.Do(m_yn2);
p.Do(m_pred_scale);
p.Do(m_reads_stopped);
}
auto end_address_reached = [] { DSPCore_SetException(EXP_ACCOV); }; constexpr u32 START_END_ADDRESS_MASK = 0x3fffffff;
const u16 val = ReadAccelerator( constexpr u32 CURRENT_ADDRESS_MASK = 0xbfffffff;
start_address, end_address, &current_address, g_dsp.ifx_regs[DSP_FORMAT],
reinterpret_cast<s16*>(&g_dsp.ifx_regs[DSP_YN1]),
reinterpret_cast<s16*>(&g_dsp.ifx_regs[DSP_YN2]), &g_dsp.ifx_regs[DSP_PRED_SCALE],
reinterpret_cast<s16*>(&g_dsp.ifx_regs[DSP_COEF_A1_0]), end_address_reached);
gdsp_ifx_write(DSP_ACCAH, current_address >> 16); void Accelerator::SetStartAddress(u32 address)
gdsp_ifx_write(DSP_ACCAL, current_address & 0xffff); {
return val; m_start_address = address & START_END_ADDRESS_MASK;
}
void Accelerator::SetEndAddress(u32 address)
{
m_end_address = address & START_END_ADDRESS_MASK;
}
void Accelerator::SetCurrentAddress(u32 address)
{
m_current_address = address & CURRENT_ADDRESS_MASK;
}
void Accelerator::SetSampleFormat(u16 format)
{
m_sample_format = format;
}
void Accelerator::SetYn1(s16 yn1)
{
m_yn1 = yn1;
}
void Accelerator::SetYn2(s16 yn2)
{
m_yn2 = yn2;
m_reads_stopped = false;
}
void Accelerator::SetPredScale(u16 pred_scale)
{
m_pred_scale = pred_scale & 0x7f;
} }
} // namespace DSP } // namespace DSP

View File

@ -4,18 +4,56 @@
#pragma once #pragma once
#include <functional>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
class PointerWrap;
namespace DSP namespace DSP
{ {
u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u16 sample_format, class Accelerator
s16* yn1, s16* yn2, u16* pred_scale, s16* coefs, {
std::function<void()> end_exception); public:
virtual ~Accelerator() = default;
u16 dsp_read_accelerator(); u16 Read(s16* coefs);
// Zelda ucode reads ARAM through 0xffd3.
u16 ReadD3();
void WriteD3(u16 value);
u16 dsp_read_aram_d3(); u32 GetStartAddress() const { return m_start_address; }
void dsp_write_aram_d3(u16 value); u32 GetEndAddress() const { return m_end_address; }
u32 GetCurrentAddress() const { return m_current_address; }
u16 GetSampleFormat() const { return m_sample_format; }
s16 GetYn1() const { return m_yn1; }
s16 GetYn2() const { return m_yn2; }
u16 GetPredScale() const { return m_pred_scale; }
void SetStartAddress(u32 address);
void SetEndAddress(u32 address);
void SetCurrentAddress(u32 address);
void SetSampleFormat(u16 format);
void SetYn1(s16 yn1);
void SetYn2(s16 yn2);
void SetPredScale(u16 pred_scale);
void DoState(PointerWrap& p);
protected:
virtual void OnEndException() = 0;
virtual u8 ReadMemory(u32 address) = 0;
virtual void WriteMemory(u32 address, u8 value) = 0;
// DSP accelerator registers.
u32 m_start_address = 0;
u32 m_end_address = 0;
u32 m_current_address = 0;
u16 m_sample_format = 0;
s16 m_yn1 = 0;
s16 m_yn2 = 0;
u16 m_pred_scale = 0;
// When an ACCOV is triggered, the accelerator stops reading back anything
// and updating the current address register, unless the YN2 register is written to.
// This is kept track of internally; this state is not exposed via any register.
bool m_reads_stopped = false;
};
} // namespace DSP } // namespace DSP

View File

@ -15,12 +15,14 @@
#include "Common/MemoryUtil.h" #include "Common/MemoryUtil.h"
#include "Common/MsgHandler.h" #include "Common/MsgHandler.h"
#include "Core/DSP/DSPAccelerator.h"
#include "Core/DSP/DSPAnalyzer.h" #include "Core/DSP/DSPAnalyzer.h"
#include "Core/DSP/DSPHWInterface.h" #include "Core/DSP/DSPHWInterface.h"
#include "Core/DSP/DSPHost.h" #include "Core/DSP/DSPHost.h"
#include "Core/DSP/Interpreter/DSPIntUtil.h" #include "Core/DSP/Interpreter/DSPIntUtil.h"
#include "Core/DSP/Interpreter/DSPInterpreter.h" #include "Core/DSP/Interpreter/DSPInterpreter.h"
#include "Core/DSP/Jit/DSPEmitter.h" #include "Core/DSP/Jit/DSPEmitter.h"
#include "Core/HW/DSP.h"
namespace DSP namespace DSP
{ {
@ -111,11 +113,21 @@ static void DSPCore_FreeMemoryPages()
g_dsp.irom = g_dsp.iram = g_dsp.dram = g_dsp.coef = nullptr; g_dsp.irom = g_dsp.iram = g_dsp.dram = g_dsp.coef = nullptr;
} }
class LLEAccelerator final : public Accelerator
{
protected:
u8 ReadMemory(u32 address) override { return Host::ReadHostMemory(address); }
void WriteMemory(u32 address, u8 value) override { Host::WriteHostMemory(value, address); }
void OnEndException() override { DSPCore_SetException(EXP_ACCOV); }
};
bool DSPCore_Init(const DSPInitOptions& opts) bool DSPCore_Init(const DSPInitOptions& opts)
{ {
g_dsp.step_counter = 0; g_dsp.step_counter = 0;
g_init_hax = false; g_init_hax = false;
g_dsp.accelerator = std::make_unique<LLEAccelerator>();
g_dsp.irom = static_cast<u16*>(Common::AllocateMemoryPages(DSP_IROM_BYTE_SIZE)); g_dsp.irom = static_cast<u16*>(Common::AllocateMemoryPages(DSP_IROM_BYTE_SIZE));
g_dsp.iram = static_cast<u16*>(Common::AllocateMemoryPages(DSP_IRAM_BYTE_SIZE)); g_dsp.iram = static_cast<u16*>(Common::AllocateMemoryPages(DSP_IRAM_BYTE_SIZE));
g_dsp.dram = static_cast<u16*>(Common::AllocateMemoryPages(DSP_DRAM_BYTE_SIZE)); g_dsp.dram = static_cast<u16*>(Common::AllocateMemoryPages(DSP_DRAM_BYTE_SIZE));

View File

@ -16,6 +16,8 @@
namespace DSP namespace DSP
{ {
class Accelerator;
namespace JIT namespace JIT
{ {
namespace x86 namespace x86
@ -299,6 +301,8 @@ struct SDSP
// Accelerator / DMA / other hardware registers. Not GPRs. // Accelerator / DMA / other hardware registers. Not GPRs.
std::array<u16, 256> ifx_regs; std::array<u16, 256> ifx_regs;
std::unique_ptr<Accelerator> accelerator;
// When state saving, all of the above can just be memcpy'd into the save state. // When state saving, all of the above can just be memcpy'd into the save state.
// The below needs special handling. // The below needs special handling.
u16* iram; u16* iram;

View File

@ -98,7 +98,7 @@ u16 gdsp_mbox_read_l(Mailbox mbx)
return (u16)value; return (u16)value;
} }
void gdsp_ifx_write(u32 addr, u32 val) void gdsp_ifx_write(u32 addr, u16 val)
{ {
g_dsp_cap->LogIFXWrite(addr, val); g_dsp_cap->LogIFXWrite(addr, val);
@ -136,10 +136,6 @@ void gdsp_ifx_write(u32 addr, u32 val)
g_dsp.ifx_regs[DSP_DSBL] = 0; g_dsp.ifx_regs[DSP_DSBL] = 0;
break; break;
case DSP_ACDATA1: // Accelerator write (Zelda type) - "UnkZelda"
dsp_write_aram_d3(val);
break;
case DSP_GAIN: case DSP_GAIN:
if (val) if (val)
{ {
@ -151,21 +147,45 @@ void gdsp_ifx_write(u32 addr, u32 val)
case DSP_DSCR: case DSP_DSCR:
g_dsp.ifx_regs[addr & 0xFF] = val; g_dsp.ifx_regs[addr & 0xFF] = val;
break; break;
/*
case DSP_ACCAL:
dsp_step_accelerator();
break;
*/
// Masking occurs for the start and end addresses as soon as the registers are written to.
case DSP_ACSAH: case DSP_ACSAH:
case DSP_ACEAH: g_dsp.accelerator->SetStartAddress(val << 16 |
g_dsp.ifx_regs[addr & 0xff] = val & 0x3fff; static_cast<u16>(g_dsp.accelerator->GetStartAddress()));
break;
case DSP_ACSAL:
g_dsp.accelerator->SetStartAddress(
static_cast<u16>(g_dsp.accelerator->GetStartAddress() >> 16) << 16 | val);
break;
case DSP_ACEAH:
g_dsp.accelerator->SetEndAddress(val << 16 |
static_cast<u16>(g_dsp.accelerator->GetEndAddress()));
break;
case DSP_ACEAL:
g_dsp.accelerator->SetEndAddress(
static_cast<u16>(g_dsp.accelerator->GetEndAddress() >> 16) << 16 | val);
break; break;
// This also happens for the current address, but with a different mask.
case DSP_ACCAH: case DSP_ACCAH:
g_dsp.ifx_regs[addr & 0xff] = val & 0xbfff; g_dsp.accelerator->SetCurrentAddress(val << 16 |
static_cast<u16>(g_dsp.accelerator->GetCurrentAddress()));
break;
case DSP_ACCAL:
g_dsp.accelerator->SetCurrentAddress(
static_cast<u16>(g_dsp.accelerator->GetCurrentAddress() >> 16) << 16 | val);
break;
case DSP_FORMAT:
g_dsp.accelerator->SetSampleFormat(val);
break;
case DSP_YN1:
g_dsp.accelerator->SetYn1(val);
break;
case DSP_YN2:
g_dsp.accelerator->SetYn2(val);
break;
case DSP_PRED_SCALE:
g_dsp.accelerator->SetPredScale(val);
break;
case DSP_ACDATA1: // Accelerator write (Zelda type) - "UnkZelda"
g_dsp.accelerator->WriteD3(val);
break; break;
default: default:
@ -208,11 +228,30 @@ static u16 _gdsp_ifx_read(u16 addr)
case DSP_DSCR: case DSP_DSCR:
return g_dsp.ifx_regs[addr & 0xFF]; return g_dsp.ifx_regs[addr & 0xFF];
case DSP_ACSAH:
return static_cast<u16>(g_dsp.accelerator->GetStartAddress() >> 16);
case DSP_ACSAL:
return static_cast<u16>(g_dsp.accelerator->GetStartAddress());
case DSP_ACEAH:
return static_cast<u16>(g_dsp.accelerator->GetEndAddress() >> 16);
case DSP_ACEAL:
return static_cast<u16>(g_dsp.accelerator->GetEndAddress());
case DSP_ACCAH:
return static_cast<u16>(g_dsp.accelerator->GetCurrentAddress() >> 16);
case DSP_ACCAL:
return static_cast<u16>(g_dsp.accelerator->GetCurrentAddress());
case DSP_FORMAT:
return g_dsp.accelerator->GetSampleFormat();
case DSP_YN1:
return g_dsp.accelerator->GetYn1();
case DSP_YN2:
return g_dsp.accelerator->GetYn2();
case DSP_PRED_SCALE:
return g_dsp.accelerator->GetPredScale();
case DSP_ACCELERATOR: // ADPCM Accelerator reads case DSP_ACCELERATOR: // ADPCM Accelerator reads
return dsp_read_accelerator(); return g_dsp.accelerator->Read(reinterpret_cast<s16*>(&g_dsp.ifx_regs[DSP_COEF_A1_0]));
case DSP_ACDATA1: // Accelerator reads (Zelda type) - "UnkZelda" case DSP_ACDATA1: // Accelerator reads (Zelda type) - "UnkZelda"
return dsp_read_aram_d3(); return g_dsp.accelerator->ReadD3();
default: default:
if ((addr & 0xff) >= 0xa0) if ((addr & 0xff) >= 0xa0)

View File

@ -22,6 +22,6 @@ u16 gdsp_mbox_read_h(Mailbox mbx);
u16 gdsp_mbox_read_l(Mailbox mbx); u16 gdsp_mbox_read_l(Mailbox mbx);
void gdsp_ifx_init(); void gdsp_ifx_init();
void gdsp_ifx_write(u32 addr, u32 val); void gdsp_ifx_write(u32 addr, u16 val);
u16 gdsp_ifx_read(u16 addr); u16 gdsp_ifx_read(u16 addr);
} // namespace DSP } // namespace DSP

View File

@ -530,9 +530,9 @@ void DSPEmitter::dmem_write(X64Reg value)
FixupBranch end = J(true); FixupBranch end = J(true);
// else if (saddr == 0xf) // else if (saddr == 0xf)
SetJumpTarget(ifx); SetJumpTarget(ifx);
// Does it mean gdsp_ifx_write needs u32 rather than u16?
DSPJitRegCache c(m_gpr); DSPJitRegCache c(m_gpr);
X64Reg abisafereg = m_gpr.MakeABICallSafe(value); X64Reg abisafereg = m_gpr.MakeABICallSafe(value);
MOVZX(32, 16, abisafereg, R(abisafereg));
m_gpr.PushRegs(); m_gpr.PushRegs();
ABI_CallFunctionRR(gdsp_ifx_write, EAX, abisafereg); ABI_CallFunctionRR(gdsp_ifx_write, EAX, abisafereg);
m_gpr.PopRegs(); m_gpr.PopRegs();

View File

@ -13,6 +13,7 @@
#endif #endif
#include <functional> #include <functional>
#include <memory>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/MathUtil.h" #include "Common/MathUtil.h"
@ -163,21 +164,67 @@ void DumpPB(const PB_TYPE& pb)
#endif #endif
// Simulated accelerator state. // Simulated accelerator state.
static u32 acc_loop_addr, acc_end_addr;
static u32* acc_cur_addr;
static PB_TYPE* acc_pb; static PB_TYPE* acc_pb;
static bool acc_end_reached; static bool acc_end_reached;
class HLEAccelerator final : public Accelerator
{
protected:
void OnEndException() override
{
if (acc_pb->audio_addr.looping)
{
// Set the ADPCM info to continue processing at loop_addr.
SetPredScale(acc_pb->adpcm_loop_info.pred_scale);
if (!acc_pb->is_stream)
{
SetYn1(acc_pb->adpcm_loop_info.yn1);
SetYn2(acc_pb->adpcm_loop_info.yn2);
}
else
{
// Refresh YN1 and YN2. This indirectly causes the accelerator to resume reads.
SetYn1(GetYn1());
SetYn2(GetYn2());
#ifdef AX_GC
// If we're streaming, increment the loop counter.
acc_pb->loop_counter++;
#endif
}
}
else
{
// Non looping voice reached the end -> running = 0.
acc_pb->running = 0;
#ifdef AX_WII
// One of the few meaningful differences between AXGC and AXWii:
// while AXGC handles non looping voices ending by relying on the
// accelerator to stop reads once the loop address is reached,
// AXWii has the 0000 samples internally in DRAM and use an internal
// pointer to it (loop addr does not contain 0000 samples on AXWii!).
acc_end_reached = true;
#endif
}
}
u8 ReadMemory(u32 address) override { return ReadARAM(address); }
void WriteMemory(u32 address, u8 value) override { WriteARAM(value, address); }
};
static std::unique_ptr<Accelerator> s_accelerator = std::make_unique<HLEAccelerator>();
// Sets up the simulated accelerator. // Sets up the simulated accelerator.
void AcceleratorSetup(PB_TYPE* pb, u32* cur_addr) void AcceleratorSetup(PB_TYPE* pb)
{ {
acc_pb = pb; acc_pb = pb;
// Masking occurs for the start and end addresses as soon as the registers are written to. s_accelerator->SetStartAddress(HILO_TO_32(pb->audio_addr.loop_addr));
acc_loop_addr = HILO_TO_32(pb->audio_addr.loop_addr) & 0x3fffffff; s_accelerator->SetEndAddress(HILO_TO_32(pb->audio_addr.end_addr));
acc_end_addr = HILO_TO_32(pb->audio_addr.end_addr) & 0x3fffffff; s_accelerator->SetCurrentAddress(HILO_TO_32(pb->audio_addr.cur_addr));
acc_cur_addr = cur_addr; s_accelerator->SetSampleFormat(pb->audio_addr.sample_format);
// It also happens for the current address, but with a different mask. s_accelerator->SetYn1(pb->adpcm.yn1);
*acc_cur_addr &= 0xbfffffff; s_accelerator->SetYn2(pb->adpcm.yn2);
s_accelerator->SetPredScale(pb->adpcm.pred_scale);
acc_end_reached = false; acc_end_reached = false;
} }
@ -190,50 +237,7 @@ u16 AcceleratorGetSample()
if (acc_end_reached) if (acc_end_reached)
return 0; return 0;
auto end_address_reached = [] { return s_accelerator->Read(acc_pb->adpcm.coefs);
// loop back to loop_addr.
*acc_cur_addr = acc_loop_addr;
if (acc_pb->audio_addr.looping)
{
// Set the ADPCM info to continue processing at loop_addr.
//
// For some reason, yn1 and yn2 aren't set if the voice is not of
// stream type. This is what the AX UCode does and I don't really
// know why.
acc_pb->adpcm.pred_scale = acc_pb->adpcm_loop_info.pred_scale;
if (!acc_pb->is_stream)
{
acc_pb->adpcm.yn1 = acc_pb->adpcm_loop_info.yn1;
acc_pb->adpcm.yn2 = acc_pb->adpcm_loop_info.yn2;
}
#ifdef AX_GC
else
{
// If we're streaming, increment the loop counter.
acc_pb->loop_counter++;
}
#endif
}
else
{
// Non looping voice reached the end -> running = 0.
acc_pb->running = 0;
#ifdef AX_WII
// One of the few meaningful differences between AXGC and AXWii:
// while AXGC handles non looping voices ending by having 0000
// samples at the loop address, AXWii has the 0000 samples
// internally in DRAM and use an internal pointer to it (loop addr
// does not contain 0000 samples on AXWii!).
acc_end_reached = true;
#endif
}
};
return ReadAccelerator(acc_loop_addr, acc_end_addr, acc_cur_addr,
acc_pb->audio_addr.sample_format, &acc_pb->adpcm.yn1, &acc_pb->adpcm.yn2,
&acc_pb->adpcm.pred_scale, acc_pb->adpcm.coefs, end_address_reached);
} }
// Reads samples from the input callback, resamples them to <count> samples at // Reads samples from the input callback, resamples them to <count> samples at
@ -375,8 +379,7 @@ u32 ResampleAudio(std::function<s16(u32)> input_callback, s16* output, u32 count
// if required. // if required.
void GetInputSamples(PB_TYPE& pb, s16* samples, u16 count, const s16* coeffs) void GetInputSamples(PB_TYPE& pb, s16* samples, u16 count, const s16* coeffs)
{ {
u32 cur_addr = HILO_TO_32(pb.audio_addr.cur_addr); AcceleratorSetup(&pb);
AcceleratorSetup(&pb, &cur_addr);
if (coeffs) if (coeffs)
coeffs += pb.coef_select * 0x200; coeffs += pb.coef_select * 0x200;
@ -385,9 +388,12 @@ void GetInputSamples(PB_TYPE& pb, s16* samples, u16 count, const s16* coeffs)
pb.src.cur_addr_frac, HILO_TO_32(pb.src.ratio), pb.src_type, coeffs); pb.src.cur_addr_frac, HILO_TO_32(pb.src.ratio), pb.src_type, coeffs);
pb.src.cur_addr_frac = (curr_pos & 0xFFFF); pb.src.cur_addr_frac = (curr_pos & 0xFFFF);
// Update current position in the PB. // Update current position, YN1, YN2 and pred scale in the PB.
pb.audio_addr.cur_addr_hi = static_cast<u16>(cur_addr >> 16) & 0xbfff; pb.audio_addr.cur_addr_hi = static_cast<u16>(s_accelerator->GetCurrentAddress() >> 16);
pb.audio_addr.cur_addr_lo = static_cast<u16>(cur_addr); pb.audio_addr.cur_addr_lo = static_cast<u16>(s_accelerator->GetCurrentAddress());
pb.adpcm.yn1 = s_accelerator->GetYn1();
pb.adpcm.yn2 = s_accelerator->GetYn2();
pb.adpcm.pred_scale = s_accelerator->GetPredScale();
} }
// Add samples to an output buffer, with optional volume ramping. // Add samples to an output buffer, with optional volume ramping.

View File

@ -17,6 +17,7 @@
#include "Common/Thread.h" #include "Common/Thread.h"
#include "Core/ConfigManager.h" #include "Core/ConfigManager.h"
#include "Core/Core.h" #include "Core/Core.h"
#include "Core/DSP/DSPAccelerator.h"
#include "Core/DSP/DSPCaptureLogger.h" #include "Core/DSP/DSPCaptureLogger.h"
#include "Core/DSP/DSPCore.h" #include "Core/DSP/DSPCore.h"
#include "Core/DSP/DSPHWInterface.h" #include "Core/DSP/DSPHWInterface.h"
@ -71,6 +72,7 @@ void DSPLLE::DoState(PointerWrap& p)
p.Do(g_dsp.step_counter); p.Do(g_dsp.step_counter);
p.DoArray(g_dsp.ifx_regs); p.DoArray(g_dsp.ifx_regs);
g_dsp.accelerator->DoState(p);
p.Do(g_dsp.mbox[0]); p.Do(g_dsp.mbox[0]);
p.Do(g_dsp.mbox[1]); p.Do(g_dsp.mbox[1]);
Common::UnWriteProtectMemory(g_dsp.iram, DSP_IRAM_BYTE_SIZE, false); Common::UnWriteProtectMemory(g_dsp.iram, DSP_IRAM_BYTE_SIZE, false);

View File

@ -74,7 +74,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
static std::thread g_save_thread; static std::thread g_save_thread;
// Don't forget to increase this after doing changes on the savestate system // Don't forget to increase this after doing changes on the savestate system
static const u32 STATE_VERSION = 89; // Last changed in PR 5890 static const u32 STATE_VERSION = 90; // Last changed in PR 6077
// Maps savestate versions to Dolphin versions. // Maps savestate versions to Dolphin versions.
// Versions after 42 don't need to be added to this list, // Versions after 42 don't need to be added to this list,

View File

@ -390,6 +390,10 @@ void handle_dsp_mail(void)
while (real_dsp.CheckMailTo()) while (real_dsp.CheckMailTo())
; ;
} }
else if (mail == 0x80050000)
{
CON_PrintRow(4, 25, "ACCOV at step %i", dsp_steps);
}
// ROM dumping mails // ROM dumping mails
else if (mail == 0x8888c0de) else if (mail == 0x8888c0de)

View File

@ -0,0 +1,64 @@
incdir "tests"
include "dsp_base.inc"
; Test parameters
lri $AC0.M, #0x0000 ; start
lri $AC0.L, #0x0000 ; start
lri $AC1.M, #0x0000 ; end
lri $AC1.L, #0x0011 ; end
; Reset some registers
lri $AC0.H, #0xffff
sr @0xffda, $AC0.H ; pred scale
sr @0xffdb, $AC0.H ; yn1
sr @0xffdc, $AC0.H ; yn2
; Set the sample format
lri $AC0.H, #0x0
sr @0xffd1, $AC0.H
; Set the starting and current address
srs @ACSAH, $AC0.M
srs @ACCAH, $AC0.M
srs @ACSAL, $AC0.L
srs @ACCAL, $AC0.L
; Set the ending address
srs @ACEAH, $AC1.M
srs @ACEAL, $AC1.L
call load_hw_reg_to_regs
call send_back ; check the accelerator regs before a read
bloopi #40, end_of_loop
lr $IX3, @ARAM
call load_hw_reg_to_regs
call send_back ; after a read
end_of_loop:
nop
jmp end_of_test
load_hw_reg_to_regs:
lr $AR0, @0xffd1 ; format
lr $AR1, @0xffd2 ; unknown
lr $AR2, @0xffda ; pred scale
lr $AR3, @0xffdb ; yn1
lr $IX0, @0xffdc ; yn2
lr $IX1, @0xffdf ; unknown accelerator register
lri $AC0.H, #0
lrs $AC0.M, @ACSAH
lrs $AC0.L, @ACSAL
lri $AC1.H, #0
lrs $AC1.M, @ACEAH
lrs $AC1.L, @ACEAL
lrs $AX0.H, @ACCAH
lrs $AX0.L, @ACCAL
lrs $AX1.H, @ACCAH
lrs $AX1.L, @ACCAL
lrs $AX1.H, @ACCAH
lrs $AX1.L, @ACCAL
ret

View File

@ -106,7 +106,7 @@ MEM_LO: equ 0x0f7F
jmp start_of_test jmp start_of_test
; This is where we jump when we're done testing, see above. ; This is where we jump when we're done testing, see above.
; We just fall into a loop, playing dead until someone resets the DSP. ; We just fall into a loop, playing dead until someone resets the DSP.
end_of_test: end_of_test:
nop nop
jmp end_of_test jmp end_of_test
@ -138,8 +138,18 @@ irq4:
lri $ac0.m, #0x0004 lri $ac0.m, #0x0004
jmp irq jmp irq
irq5: irq5:
lri $ac0.m, #0x0005 lrs $ac0.m, @DMBH
jmp irq andcf $ac0.m, #0x8000
jlz irq5
si @DMBH, #0x8005
si @DMBL, #0x0000
si @DIRQ, #0x0001
lri $ac0.m, #0xbbbb
sr @0xffda, $ac0.m ; pred scale
sr @0xffdb, $ac0.m ; yn1
lr $ix2, @ARAM
sr @0xffdc, $ac0.m ; yn2
rti
irq6: irq6:
lri $ac0.m, #0x0006 lri $ac0.m, #0x0006
jmp irq jmp irq
@ -156,7 +166,7 @@ irq:
si @DIRQ, #0x0001 si @DIRQ, #0x0001
halt ; Through some magic this allows us to properly ack the exception in dspspy halt ; Through some magic this allows us to properly ack the exception in dspspy
;rti ; allow dumping of ucodes which cause exceptions...probably not safe at all ;rti ; allow dumping of ucodes which cause exceptions...probably not safe at all
; DMA:s the current state of the registers back to the PowerPC. To do this, ; DMA:s the current state of the registers back to the PowerPC. To do this,
; it must write the contents of all regs to DRAM. ; it must write the contents of all regs to DRAM.
; Unfortunately, this loop uses ar0 so it's best to use AR1 and friends for testing ; Unfortunately, this loop uses ar0 so it's best to use AR1 and friends for testing
@ -216,13 +226,13 @@ send_back:
dma_copy: dma_copy:
mrr $ax0.l, $ac1.m mrr $ax0.l, $ac1.m
; Wait for the CPU to send us a mail. ; Wait for the CPU to send us a mail.
call 0x807e call 0x807e
si @DMBH, #0x8888 si @DMBH, #0x8888
si @DMBL, #0xfeeb si @DMBL, #0xfeeb
si @DIRQ, #0x0001 si @DIRQ, #0x0001
; wait for the CPU to recieve our response before we execute the next op ; wait for the CPU to recieve our response before we execute the next op
call 0x8078 call 0x8078
andi $ac0.m, #0x7fff andi $ac0.m, #0x7fff
lrs $ac1.m, @CMBL lrs $ac1.m, @CMBL
@ -261,14 +271,14 @@ dma_copy:
lrri $ac0.m, @$ar0 lrri $ac0.m, @$ar0
lrri $ac1.m, @$ar0 lrri $ac1.m, @$ar0
lr $ar0, @REGS_BASE lr $ar0, @REGS_BASE
ret ; from send_back ret ; from send_back
; If you are in set40 mode, use this instead of send_back if you want to stay ; If you are in set40 mode, use this instead of send_back if you want to stay
; in set40 mode. ; in set40 mode.
send_back_40: send_back_40:
set16 set16
call send_back call send_back
set40 set40
ret ret

View File

@ -2,11 +2,12 @@ add_dolphin_test(MMIOTest MMIOTest.cpp)
add_dolphin_test(PageFaultTest PageFaultTest.cpp) add_dolphin_test(PageFaultTest PageFaultTest.cpp)
add_dolphin_test(CoreTimingTest CoreTimingTest.cpp) add_dolphin_test(CoreTimingTest CoreTimingTest.cpp)
add_dolphin_test(DSPAcceleratorTest DSP/DSPAcceleratorTest.cpp)
add_dolphin_test(DSPAssemblyTest add_dolphin_test(DSPAssemblyTest
DSP/DSPAssemblyTest.cpp DSP/DSPAssemblyTest.cpp
DSP/DSPTestBinary.cpp DSP/DSPTestBinary.cpp
DSP/DSPTestText.cpp DSP/DSPTestText.cpp
DSP/HermesBinary.cpp DSP/HermesBinary.cpp
) )
add_dolphin_test(ESFormatsTest IOS/ES/FormatsTest.cpp IOS/ES/TestBinaryData.cpp) add_dolphin_test(ESFormatsTest IOS/ES/FormatsTest.cpp IOS/ES/TestBinaryData.cpp)

View File

@ -0,0 +1,187 @@
// Copyright 2017 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <array>
#include <gtest/gtest.h>
#include "Common/CommonTypes.h"
#include "Core/DSP/DSPAccelerator.h"
// Simulated DSP accelerator.
class TestAccelerator : public DSP::Accelerator
{
public:
// For convenience.
u16 TestRead()
{
std::array<s16, 16> coefs{};
m_accov_raised = false;
return Read(coefs.data());
}
bool EndExceptionRaised() const { return m_accov_raised; }
protected:
void OnEndException() override
{
EXPECT_TRUE(m_reads_stopped);
m_accov_raised = true;
}
u8 ReadMemory(u32 address) override { return 0; }
void WriteMemory(u32 address, u8 value) override {}
bool m_accov_raised = false;
};
TEST(DSPAccelerator, Initialization)
{
TestAccelerator accelerator;
accelerator.SetCurrentAddress(0x00000000);
accelerator.SetStartAddress(0x00000000);
accelerator.SetEndAddress(0x00001000);
EXPECT_EQ(accelerator.GetStartAddress(), 0x00000000u);
EXPECT_EQ(accelerator.GetCurrentAddress(), 0x00000000u);
EXPECT_EQ(accelerator.GetEndAddress(), 0x00001000u);
}
TEST(DSPAccelerator, SimpleReads)
{
TestAccelerator accelerator;
accelerator.SetCurrentAddress(0x00000000);
accelerator.SetStartAddress(0x00000000);
accelerator.SetEndAddress(0x00001000);
for (size_t i = 1; i <= 0xf; ++i)
{
accelerator.TestRead();
EXPECT_FALSE(accelerator.EndExceptionRaised());
EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + i);
}
}
TEST(DSPAccelerator, AddressMasking)
{
TestAccelerator accelerator;
accelerator.SetCurrentAddress(0x48000000);
accelerator.SetStartAddress(0x48000000);
accelerator.SetEndAddress(0x48001000);
EXPECT_EQ(accelerator.GetStartAddress(), 0x08000000u);
EXPECT_EQ(accelerator.GetCurrentAddress(), 0x08000000u);
EXPECT_EQ(accelerator.GetEndAddress(), 0x08001000u);
accelerator.SetCurrentAddress(0xffffffff);
accelerator.SetStartAddress(0xffffffff);
accelerator.SetEndAddress(0xffffffff);
EXPECT_EQ(accelerator.GetStartAddress(), 0x3fffffffu);
EXPECT_EQ(accelerator.GetCurrentAddress(), 0xbfffffffu);
EXPECT_EQ(accelerator.GetEndAddress(), 0x3fffffffu);
}
TEST(DSPAccelerator, PredScaleRegisterMasking)
{
TestAccelerator accelerator;
accelerator.SetPredScale(0xbbbb);
EXPECT_EQ(accelerator.GetPredScale(), 0x3bu);
accelerator.SetPredScale(0xcccc);
EXPECT_EQ(accelerator.GetPredScale(), 0x4cu);
accelerator.SetPredScale(0xffff);
EXPECT_EQ(accelerator.GetPredScale(), 0x7fu);
}
TEST(DSPAccelerator, OverflowBehaviour)
{
TestAccelerator accelerator;
accelerator.SetCurrentAddress(0x00000000);
accelerator.SetStartAddress(0x00000000);
accelerator.SetEndAddress(0x0000000f);
for (size_t i = 1; i <= 0xf; ++i)
{
accelerator.TestRead();
EXPECT_FALSE(accelerator.EndExceptionRaised());
EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + i);
}
accelerator.TestRead();
EXPECT_TRUE(accelerator.EndExceptionRaised());
EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress());
// Since an ACCOV has fired, reads are stopped (until the YN2 register is reset),
// so the current address shouldn't be updated for this read.
accelerator.TestRead();
EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress());
// Simulate a write to YN2, which internally resets the "reads stopped" flag.
// After resetting it, reads should work once again.
accelerator.SetYn2(0);
for (size_t i = 1; i <= 0xf; ++i)
{
accelerator.TestRead();
EXPECT_FALSE(accelerator.EndExceptionRaised());
EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + i);
}
}
TEST(DSPAccelerator, OverflowFor16ByteAlignedAddresses)
{
TestAccelerator accelerator;
accelerator.SetCurrentAddress(0x00000000);
accelerator.SetStartAddress(0x00000000);
accelerator.SetEndAddress(0x00000010);
for (size_t i = 1; i <= 0xf; ++i)
{
accelerator.TestRead();
EXPECT_FALSE(accelerator.EndExceptionRaised());
EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + i);
}
accelerator.TestRead();
EXPECT_FALSE(accelerator.EndExceptionRaised());
EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + 1);
accelerator.TestRead();
EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + 2);
}
TEST(DSPAccelerator, OverflowForXXXXXXX1Addresses)
{
TestAccelerator accelerator;
accelerator.SetCurrentAddress(0x00000000);
accelerator.SetStartAddress(0x00000000);
accelerator.SetEndAddress(0x00000011);
for (size_t i = 1; i <= 0xf; ++i)
{
accelerator.TestRead();
EXPECT_FALSE(accelerator.EndExceptionRaised());
EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + i);
}
accelerator.TestRead();
EXPECT_FALSE(accelerator.EndExceptionRaised());
EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress());
accelerator.TestRead();
EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + 1);
}
TEST(DSPAccelerator, CurrentAddressSkips)
{
TestAccelerator accelerator;
accelerator.SetCurrentAddress(0x00000000);
accelerator.SetStartAddress(0x00000000);
accelerator.SetEndAddress(0x00001000);
for (size_t j = 1; j <= 0xf; ++j)
accelerator.TestRead();
EXPECT_EQ(accelerator.GetCurrentAddress(), 0x0000000fu);
accelerator.TestRead();
EXPECT_EQ(accelerator.GetCurrentAddress(), 0x00000012u);
accelerator.TestRead();
EXPECT_EQ(accelerator.GetCurrentAddress(), 0x00000013u);
}