Core audio system work (Watch for regressions please!):
* Restore Audio Throttle to function properly, broken by Ayuanx many hundreds of revisions back. * Simplify DSPLLE JIT dispatcher in preparation for an asm rewrite * Remove hack that made DSPLLE JIT seem faster than it was by running fewer cycles, but resulting in bad sound. This shows off how mysteriously slow it is - I don't understand why it's not faster. Use the DSPLLE interpreter for now if you want to use DSPLLE. * Made "DSPLLE on Thread" work properly with correct-ish timing - although the speed benefit is really small now. If it seems like this change slows anything non-LLE down, try turning off Audio Throttle and use the frame limiter in options instead. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5541 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
6ea0d50872
commit
1d1b08a091
|
@ -575,25 +575,25 @@ void GenerateDSPInterruptFromPlugin(DSPInterruptType type, bool _bSet)
|
|||
// This happens at 4 khz, since 32 bytes at 4khz = 4 bytes at 32 khz (16bit stereo pcm)
|
||||
void UpdateAudioDMA()
|
||||
{
|
||||
if (g_audioDMA.AudioDMAControl.Enable && g_audioDMA.BlocksLeft)
|
||||
if (g_audioDMA.BlocksLeft)
|
||||
{
|
||||
// Read audio at g_audioDMA.ReadAddress in RAM and push onto an
|
||||
// external audio fifo in the emulator, to be mixed with the disc
|
||||
// streaming output. If that audio queue fills up, we delay the
|
||||
// emulator.
|
||||
dsp_plugin->DSP_SendAIBuffer(g_audioDMA.ReadAddress, 8);
|
||||
|
||||
// AyuanX: let's do it in a bundle to speed up
|
||||
if (g_audioDMA.BlocksLeft == g_audioDMA.AudioDMAControl.NumBlocks)
|
||||
dsp_plugin->DSP_SendAIBuffer(g_audioDMA.SourceAddress, g_audioDMA.AudioDMAControl.NumBlocks * 8);
|
||||
|
||||
//g_audioDMA.ReadAddress += 32;
|
||||
g_audioDMA.ReadAddress += 32;
|
||||
g_audioDMA.BlocksLeft--;
|
||||
|
||||
if (g_audioDMA.BlocksLeft == 0)
|
||||
{
|
||||
GenerateDSPInterrupt(DSP::INT_AID);
|
||||
//g_audioDMA.ReadAddress = g_audioDMA.SourceAddress;
|
||||
g_audioDMA.BlocksLeft = g_audioDMA.AudioDMAControl.NumBlocks;
|
||||
if (g_audioDMA.AudioDMAControl.Enable)
|
||||
{
|
||||
g_audioDMA.BlocksLeft = g_audioDMA.AudioDMAControl.NumBlocks;
|
||||
g_audioDMA.ReadAddress = g_audioDMA.SourceAddress;
|
||||
}
|
||||
//DEBUG_LOG(DSPLLE, "ADMA read addresses: %08x", g_audioDMA.ReadAddress);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -187,6 +187,7 @@ void DSPCore_CheckExternalInterrupt()
|
|||
|
||||
void DSPCore_CheckExceptions()
|
||||
{
|
||||
// Early out to skip the loop in the common case.
|
||||
if (g_dsp.exceptions == 0)
|
||||
return;
|
||||
|
||||
|
@ -215,14 +216,19 @@ void DSPCore_CheckExceptions()
|
|||
}
|
||||
}
|
||||
|
||||
// Delegate to JIT or interpreter as appropriate.
|
||||
// Delegate to JIT or interpreter as appropriate.
|
||||
// Handle state changes and stepping.
|
||||
int DSPCore_RunCycles(int cycles)
|
||||
{
|
||||
if(jit) {
|
||||
jit->RunBlock(cycles);
|
||||
static int spare_cycles = 0;
|
||||
if (jit)
|
||||
{
|
||||
// DSPCore_CheckExceptions();
|
||||
// DSPCore_CheckExternalInterrupt();
|
||||
spare_cycles = jit->RunForCycles(cycles + spare_cycles);
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (cycles > 0) {
|
||||
reswitch:
|
||||
switch (core_state)
|
||||
|
|
|
@ -215,53 +215,52 @@ const u8 *DSPEmitter::Compile(int start_addr) {
|
|||
|
||||
void STACKALIGN DSPEmitter::CompileDispatcher()
|
||||
{
|
||||
/*
|
||||
// TODO
|
||||
enterDispatcher = GetCodePtr();
|
||||
AlignCode16();
|
||||
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
||||
|
||||
const u8 *outer_loop = GetCodePtr();
|
||||
|
||||
|
||||
//Landing pad for drec space
|
||||
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||
RET();*/
|
||||
}
|
||||
|
||||
// Don't use the % operator in the inner loop. It's slow.
|
||||
void STACKALIGN DSPEmitter::RunBlock(int cycles)
|
||||
int STACKALIGN DSPEmitter::RunForCycles(int cycles)
|
||||
{
|
||||
// How does this variable work?
|
||||
static int idleskip = 0;
|
||||
const int idle_cycles = 1000;
|
||||
|
||||
#define BURST_LENGTH 512 // Must be a power of two
|
||||
u16 block_cycles = BURST_LENGTH + 1;
|
||||
|
||||
// Trigger an external interrupt at the start of the cycle
|
||||
while (!(g_dsp.cr & CR_HALT))
|
||||
{
|
||||
if (block_cycles > BURST_LENGTH)
|
||||
{
|
||||
block_cycles = 0;
|
||||
}
|
||||
|
||||
DSPCore_CheckExternalInterrupt();
|
||||
DSPCore_CheckExceptions();
|
||||
// Compile the block if needed
|
||||
if (!blocks[g_dsp.pc])
|
||||
u16 block_addr = g_dsp.pc;
|
||||
if (!blocks[block_addr])
|
||||
{
|
||||
blockSize[g_dsp.pc] = 0;
|
||||
CompileCurrent();
|
||||
}
|
||||
|
||||
int block_size = blockSize[block_addr];
|
||||
// Execute the block if we have enough cycles
|
||||
if (cycles > blockSize[g_dsp.pc])
|
||||
if (cycles > block_size)
|
||||
{
|
||||
u16 start_addr = g_dsp.pc;
|
||||
|
||||
// 5%. Not sure where the rationale originally came from.
|
||||
if (((idleskip & 127) > 121) &&
|
||||
(DSPAnalyzer::code_flags[g_dsp.pc] & DSPAnalyzer::CODE_IDLE_SKIP)) {
|
||||
block_cycles = 0;
|
||||
blocks[block_addr]();
|
||||
if (DSPAnalyzer::code_flags[block_addr] & DSPAnalyzer::CODE_IDLE_SKIP) {
|
||||
if (cycles > idle_cycles)
|
||||
cycles -= idle_cycles;
|
||||
else
|
||||
cycles = 0;
|
||||
} else {
|
||||
blocks[g_dsp.pc]();
|
||||
cycles -= block_size;
|
||||
}
|
||||
idleskip++;
|
||||
if ((idleskip & (BURST_LENGTH - 1)) == 0)
|
||||
idleskip = 0;
|
||||
block_cycles += blockSize[start_addr];
|
||||
cycles -= blockSize[start_addr];
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return cycles;
|
||||
}
|
||||
|
|
|
@ -44,7 +44,7 @@ public:
|
|||
|
||||
const u8 *Compile(int start_addr);
|
||||
|
||||
void STACKALIGN RunBlock(int cycles);
|
||||
int STACKALIGN RunForCycles(int cycles);
|
||||
|
||||
// Register helpers
|
||||
void setCompileSR(u16 bit);
|
||||
|
@ -101,11 +101,19 @@ private:
|
|||
u16 *blockSize;
|
||||
u16 compileSR;
|
||||
|
||||
// CALL this to start the dispatcher
|
||||
u8 *enterDispatcher;
|
||||
|
||||
// JMP here when a block should be dispatches. make sure you're in a block
|
||||
// or at the same stack level already.
|
||||
u8 *dispatcher;
|
||||
|
||||
// The index of the last stored ext value (compile time).
|
||||
int storeIndex;
|
||||
|
||||
// Counts down.
|
||||
// int cycles;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(DSPEmitter);
|
||||
|
||||
void ToMask(Gen::X64Reg value_reg = Gen::EDI, Gen::X64Reg temp_reg = Gen::ESI);
|
||||
|
|
|
@ -104,8 +104,8 @@ void Run()
|
|||
gdsp_running = true;
|
||||
while (!(g_dsp.cr & CR_HALT) && gdsp_running)
|
||||
{
|
||||
if(jit)
|
||||
jit->RunBlock(1);
|
||||
if (jit)
|
||||
jit->RunForCycles(1);
|
||||
else {
|
||||
// Automatically let the other threads work if we're idle skipping
|
||||
if(DSPAnalyzer::code_flags[g_dsp.pc] & DSPAnalyzer::CODE_IDLE_SKIP)
|
||||
|
@ -180,6 +180,7 @@ int RunCyclesDebug(int cycles)
|
|||
cycles--;
|
||||
if (cycles < 0)
|
||||
return 0;
|
||||
|
||||
// We don't bother directly supporting pause - if the main emu pauses,
|
||||
// it just won't call this function anymore.
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
|
||||
#include "Common.h" // Common
|
||||
#include "Atomic.h"
|
||||
#include "CommonTypes.h"
|
||||
#include "LogManager.h"
|
||||
#include "Thread.h"
|
||||
|
@ -49,7 +50,7 @@ SoundStream *soundStream = NULL;
|
|||
bool g_InitMixer = false;
|
||||
|
||||
bool bIsRunning = false;
|
||||
u32 cycle_count = 0;
|
||||
volatile u32 cycle_count = 0;
|
||||
|
||||
// Standard crap to make wxWidgets happy
|
||||
#ifdef _WIN32
|
||||
|
@ -219,17 +220,19 @@ THREAD_RETURN dsp_thread(void* lpParameter)
|
|||
{
|
||||
while (bIsRunning)
|
||||
{
|
||||
u32 cycles = 0;
|
||||
|
||||
if (jit)
|
||||
{
|
||||
cycles = cycle_count;
|
||||
DSPCore_RunCycles(cycles);
|
||||
int cycles = (int)cycle_count;
|
||||
if (cycles > 0) {
|
||||
if (jit)
|
||||
{
|
||||
cycles -= DSPCore_RunCycles(cycles);
|
||||
}
|
||||
else {
|
||||
cycles -= DSPInterpreter::RunCycles(cycles);
|
||||
}
|
||||
Common::AtomicAdd(cycle_count, -cycles);
|
||||
}
|
||||
else
|
||||
DSPInterpreter::Run();
|
||||
|
||||
cycle_count -= cycles;
|
||||
// yield?
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -374,7 +377,8 @@ void DSP_WriteMailboxLow(bool _CPUMailbox, u16 _uLowMail)
|
|||
|
||||
void DSP_Update(int cycles)
|
||||
{
|
||||
int cyclesRatio = cycles / (jit?20:6);
|
||||
int dsp_cycles = cycles / 6; //(jit?20:6);
|
||||
|
||||
// Sound stream update job has been handled by AudioDMA routine, which is more efficient
|
||||
/*
|
||||
// This gets called VERY OFTEN. The soundstream update might be expensive so only do it 200 times per second or something.
|
||||
|
@ -398,11 +402,14 @@ void DSP_Update(int cycles)
|
|||
if (!g_dspInitialize.bOnThread)
|
||||
{
|
||||
// ~1/6th as many cycles as the period PPC-side.
|
||||
DSPCore_RunCycles(cyclesRatio);;
|
||||
DSPCore_RunCycles(dsp_cycles);
|
||||
}
|
||||
else
|
||||
{
|
||||
cycle_count += (cyclesRatio);
|
||||
// Wait for dsp thread to catch up reasonably. Note: this logic should be thought through.
|
||||
while (cycle_count > dsp_cycles)
|
||||
;
|
||||
Common::AtomicAdd(cycle_count, dsp_cycles);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue