aica: fix race conditions between sh4/arm
Fix sound issue in Test Drive Le Mans and Looney Tunes
This commit is contained in:
parent
2d8bc6d6ee
commit
a920ae5901
|
@ -172,40 +172,35 @@ void WriteAicaReg(u32 reg,u32 data)
|
|||
{
|
||||
case SCIPD_addr:
|
||||
verify(sz!=1);
|
||||
// other bits are read-only
|
||||
if (data & (1<<5))
|
||||
{
|
||||
SCIPD->SCPU=1;
|
||||
update_arm_interrupts();
|
||||
}
|
||||
//Read only
|
||||
return;
|
||||
break;
|
||||
|
||||
case SCIRE_addr:
|
||||
{
|
||||
verify(sz!=1);
|
||||
SCIPD->full&=~(data /*& SCIEB->full*/ ); //is the & SCIEB->full needed ? doesn't seem like it
|
||||
data=0;//Write only
|
||||
update_arm_interrupts();
|
||||
}
|
||||
verify(sz != 1);
|
||||
SCIPD->full &= ~data /*& SCIEB->full)*/; //is the & SCIEB->full needed ? doesn't seem like it
|
||||
update_arm_interrupts();
|
||||
break;
|
||||
|
||||
case MCIPD_addr:
|
||||
if (data & (1<<5))
|
||||
verify(sz != 1);
|
||||
// other bits are read-only
|
||||
if (data & (1 << 5))
|
||||
{
|
||||
verify(sz!=1);
|
||||
MCIPD->SCPU=1;
|
||||
MCIPD->SCPU = 1;
|
||||
UpdateSh4Ints();
|
||||
aicaarm::avoidRaceCondition();
|
||||
}
|
||||
//Read only
|
||||
return;
|
||||
break;
|
||||
|
||||
case MCIRE_addr:
|
||||
{
|
||||
verify(sz!=1);
|
||||
MCIPD->full&=~data;
|
||||
UpdateSh4Ints();
|
||||
//Write only
|
||||
}
|
||||
verify(sz != 1);
|
||||
MCIPD->full &= ~data;
|
||||
UpdateSh4Ints();
|
||||
break;
|
||||
|
||||
case TIMER_A:
|
||||
|
|
|
@ -23,6 +23,7 @@ u32 rtc_EN;
|
|||
int dma_sched_id;
|
||||
u32 RealTimeClock;
|
||||
int rtc_schid = -1;
|
||||
u32 SB_ADST;
|
||||
|
||||
u32 GetRTC_now()
|
||||
{
|
||||
|
@ -330,7 +331,7 @@ static void Write_SB_ADST(u32 addr, u32 data)
|
|||
//0x005F7818 SB_ADST RW AICA:G2-DMA start
|
||||
//0x005F781C SB_ADSUSP RW AICA:G2-DMA suspend
|
||||
|
||||
if (data&1)
|
||||
if ((data & 1) == 1 && (SB_ADST & 1) == 0)
|
||||
{
|
||||
if (SB_ADEN&1)
|
||||
{
|
||||
|
@ -391,6 +392,23 @@ static void Write_SB_ADST(u32 addr, u32 data)
|
|||
}
|
||||
}
|
||||
|
||||
u32 Read_SB_ADST(u32 addr)
|
||||
{
|
||||
// Le Mans and Looney Tunes sometimes send the same dma transfer twice after checking SB_ADST == 0.
|
||||
// To avoid this, we pretend SB_ADST is still set when there is a pending aica-dma interrupt.
|
||||
// This is only done once.
|
||||
if ((SB_ISTNRM & (1 << (u8)holly_SPU_DMA)) && !(SB_ADST & 2))
|
||||
{
|
||||
SB_ADST |= 2;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
SB_ADST &= ~2;
|
||||
return SB_ADST;
|
||||
}
|
||||
}
|
||||
|
||||
template<u32 STAG, HollyInterruptID iainterrupt, const char *LogTag>
|
||||
void Write_SB_STAG(u32 addr, u32 data)
|
||||
{
|
||||
|
@ -433,7 +451,7 @@ void aica_sb_Init()
|
|||
// G2-DMA registers
|
||||
|
||||
// AICA
|
||||
sb_rio_register(SB_ADST_addr, RIO_WF, nullptr, &Write_SB_ADST);
|
||||
sb_rio_register(SB_ADST_addr, RIO_FUNC, &Read_SB_ADST, &Write_SB_ADST);
|
||||
#ifdef STRICT_MODE
|
||||
sb_rio_register(SB_ADSTAR_addr, RIO_WF, nullptr, &Write_SB_STAR<SB_ADSTAR_addr, holly_AICA_ILLADDR, AICA_TAG>);
|
||||
sb_rio_register(SB_ADSTAG_addr, RIO_WF, nullptr, &Write_SB_STAG<SB_ADSTAG_addr, holly_AICA_ILLADDR, AICA_TAG>);
|
||||
|
@ -464,6 +482,8 @@ void aica_sb_Init()
|
|||
|
||||
void aica_sb_Reset(bool hard)
|
||||
{
|
||||
if (hard)
|
||||
SB_ADST = 0;
|
||||
}
|
||||
|
||||
void aica_sb_Term()
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#include "dsp.h"
|
||||
#include "sgc_if.h"
|
||||
|
||||
u8 aica_reg[0x8000];
|
||||
alignas(4) u8 aica_reg[0x8000];
|
||||
|
||||
//00000000~007FFFFF @DRAM_AREA*
|
||||
//00800000~008027FF @CHANNEL_DATA
|
||||
|
|
|
@ -7,5 +7,5 @@ void libAICA_WriteReg(u32 addr,u32 data,u32 size);
|
|||
void init_mem();
|
||||
void term_mem();
|
||||
|
||||
extern u8 aica_reg[0x8000];
|
||||
alignas(4) extern u8 aica_reg[0x8000];
|
||||
|
||||
|
|
|
@ -63,13 +63,15 @@ static void CPUUndefinedException();
|
|||
//
|
||||
// ARM7 interpreter
|
||||
//
|
||||
static int clockTicks;
|
||||
|
||||
static void runInterpreter(u32 CycleCount)
|
||||
{
|
||||
if (!Arm7Enabled)
|
||||
return;
|
||||
|
||||
u32 clockTicks = 0;
|
||||
while (clockTicks < CycleCount)
|
||||
clockTicks -= CycleCount;
|
||||
while (clockTicks < 0)
|
||||
{
|
||||
if (reg[INTR_PEND].I)
|
||||
CPUFiq();
|
||||
|
@ -79,6 +81,11 @@ static void runInterpreter(u32 CycleCount)
|
|||
}
|
||||
}
|
||||
|
||||
void aicaarm::avoidRaceCondition()
|
||||
{
|
||||
clockTicks = std::min(clockTicks, -50);
|
||||
}
|
||||
|
||||
void aicaarm::run(u32 samples)
|
||||
{
|
||||
for (u32 i = 0; i < samples; i++)
|
||||
|
@ -353,15 +360,15 @@ void update_armintc()
|
|||
|
||||
//Emulate a single arm op, passed in opcode
|
||||
|
||||
u32 DYNACALL arm_single_op(u32 opcode)
|
||||
void DYNACALL arm_single_op(u32 opcode)
|
||||
{
|
||||
u32 clockTicks=0;
|
||||
u32 clockTicks = 0;
|
||||
|
||||
#define NO_OPCODE_READ
|
||||
|
||||
#include "arm-new.h"
|
||||
|
||||
return clockTicks;
|
||||
reg[CYCL_CNT].I -= clockTicks;
|
||||
}
|
||||
|
||||
template<u32 Pd>
|
||||
|
|
|
@ -7,7 +7,8 @@ void init();
|
|||
void reset();
|
||||
void run(u32 samples);
|
||||
void enable(bool enabled);
|
||||
|
||||
// Called when the arm interrupts the SH4 to make sure it has enough cycles to finish what it's doing.
|
||||
void avoidRaceCondition();
|
||||
}
|
||||
|
||||
enum Arm7Reg
|
||||
|
|
|
@ -408,7 +408,7 @@ static ArmOp decodeArmOp(u32 opcode, u32 arm_pc)
|
|||
//Offset
|
||||
newbits.full |= 4;
|
||||
|
||||
arm_printf("ARM: MEM TFX R %08X -> %08X\n", opcode, newbits.full);
|
||||
arm_printf("ARM: MEM TFX R %08X -> %08X", opcode, newbits.full);
|
||||
|
||||
return decodeArmOp(newbits.full, arm_pc);
|
||||
}
|
||||
|
@ -439,7 +439,7 @@ static ArmOp decodeArmOp(u32 opcode, u32 arm_pc)
|
|||
//Offset
|
||||
newbits.full |= 4;
|
||||
|
||||
arm_printf("ARM: MEM TFX W %08X -> %08X\n", opcode, newbits.full);
|
||||
arm_printf("ARM: MEM TFX W %08X -> %08X", opcode, newbits.full);
|
||||
|
||||
return decodeArmOp(newbits.full, arm_pc);
|
||||
}
|
||||
|
@ -705,7 +705,7 @@ void *arm7rec_getMemOp(bool Load, bool Byte)
|
|||
}
|
||||
|
||||
extern bool Arm7Enabled;
|
||||
extern "C" void DYNACALL arm_mainloop(u32 cycl, void* regs, void* entrypoints);
|
||||
extern "C" void DYNACALL arm_mainloop(void* regs, void* entrypoints);
|
||||
|
||||
// Run a timeslice of arm7
|
||||
|
||||
|
@ -714,9 +714,17 @@ void aicaarm::run(u32 samples)
|
|||
for (u32 i = 0; i < samples; i++)
|
||||
{
|
||||
if (Arm7Enabled)
|
||||
arm_mainloop(ARM_CYCLES_PER_SAMPLE, arm_Reg, EntryPoints);
|
||||
{
|
||||
arm_Reg[CYCL_CNT].I += ARM_CYCLES_PER_SAMPLE;
|
||||
arm_mainloop(arm_Reg, EntryPoints);
|
||||
}
|
||||
libAICA_TimeStep();
|
||||
}
|
||||
}
|
||||
|
||||
void aicaarm::avoidRaceCondition()
|
||||
{
|
||||
arm_Reg[CYCL_CNT].I = std::max((int)arm_Reg[CYCL_CNT].I, 50);
|
||||
}
|
||||
|
||||
#endif // FEAT_AREC != DYNAREC_NONE
|
||||
|
|
|
@ -423,6 +423,6 @@ void arm7rec_flush();
|
|||
extern "C" void arm7rec_compile();
|
||||
void *arm7rec_getMemOp(bool load, bool byte);
|
||||
template<u32 Pd> void DYNACALL MSR_do(u32 v);
|
||||
u32 DYNACALL arm_single_op(u32 opcode);
|
||||
void DYNACALL arm_single_op(u32 opcode);
|
||||
|
||||
void arm7backend_compile(const std::vector<ArmOp> block_ops, u32 cycles);
|
||||
|
|
|
@ -49,8 +49,8 @@ static void storeReg(eReg host_reg, Arm7Reg guest_reg, ArmOp::Condition cc = Arm
|
|||
STR(host_reg, r8, (u8*)&arm_Reg[guest_reg].I - (u8*)&arm_Reg[0].I, ARM::Offset, (ARM::ConditionCode)cc);
|
||||
}
|
||||
|
||||
static const std::array<eReg, 5> alloc_regs{
|
||||
r6, r7, r9, r10, r11
|
||||
static const std::array<eReg, 6> alloc_regs{
|
||||
r5, r6, r7, r9, r10, r11
|
||||
};
|
||||
|
||||
class Arm32ArmRegAlloc : public ArmRegAlloc<alloc_regs.size(), Arm32ArmRegAlloc>
|
||||
|
@ -428,11 +428,25 @@ static void emitFallback(const ArmOp& op)
|
|||
//Call interpreter
|
||||
MOV32(r0, op.arg[0].getImmediate());
|
||||
call((u32)arm_single_op);
|
||||
SUB(r5, r5, r0, false);
|
||||
}
|
||||
|
||||
void arm7backend_compile(const std::vector<ArmOp> block_ops, u32 cycles)
|
||||
{
|
||||
loadReg(r2, CYCL_CNT);
|
||||
if (is_i8r4(cycles))
|
||||
SUB(r2, r2, cycles);
|
||||
else
|
||||
{
|
||||
u32 togo = cycles;
|
||||
while(ARMImmid8r4_enc(togo) == -1)
|
||||
{
|
||||
SUB(r2, r2, 256);
|
||||
togo -= 256;
|
||||
}
|
||||
SUB(r2, r2, togo);
|
||||
}
|
||||
storeReg(r2, CYCL_CNT);
|
||||
|
||||
regalloc = new Arm32ArmRegAlloc(block_ops);
|
||||
void *codestart = icPtr;
|
||||
|
||||
|
@ -474,19 +488,6 @@ void arm7backend_compile(const std::vector<ArmOp> block_ops, u32 cycles)
|
|||
}
|
||||
storeFlags();
|
||||
|
||||
if (is_i8r4(cycles))
|
||||
SUB(r5, r5, cycles, true);
|
||||
else
|
||||
{
|
||||
u32 togo = cycles;
|
||||
while(ARMImmid8r4_enc(togo) == -1)
|
||||
{
|
||||
SUB(r5, r5, 256);
|
||||
togo -= 256;
|
||||
}
|
||||
SUB(r5, r5, togo, true);
|
||||
}
|
||||
JUMP((u32)&arm_exit, CC_MI); //statically predicted as not taken
|
||||
JUMP((u32)&arm_dispatch);
|
||||
|
||||
vmem_platform_flush_cache(codestart, (u8*)icPtr - 1, codestart, (u8*)icPtr - 1);
|
||||
|
|
|
@ -37,7 +37,7 @@ extern const u32 ICacheSize;
|
|||
|
||||
class Arm7Compiler;
|
||||
|
||||
#define MAX_REGS 7
|
||||
#define MAX_REGS 8
|
||||
|
||||
class AArch64ArmRegAlloc : public ArmRegAlloc<MAX_REGS, AArch64ArmRegAlloc>
|
||||
{
|
||||
|
@ -49,7 +49,7 @@ class AArch64ArmRegAlloc : public ArmRegAlloc<MAX_REGS, AArch64ArmRegAlloc>
|
|||
static const WRegister& getReg(int i)
|
||||
{
|
||||
static const WRegister regs[] = {
|
||||
w19, w20, w21, w22, w23, w24, w25
|
||||
w19, w20, w21, w22, w23, w24, w25, w27
|
||||
};
|
||||
static_assert(MAX_REGS == ARRAY_SIZE(regs), "MAX_REGS == ARRAY_SIZE(regs)");
|
||||
verify(i >= 0 && (u32)i < ARRAY_SIZE(regs));
|
||||
|
@ -539,7 +539,6 @@ class Arm7Compiler : public MacroAssembler
|
|||
set_flags = false;
|
||||
Mov(w0, op.arg[0].getImmediate());
|
||||
call((void*)arm_single_op);
|
||||
Subs(w27, w27, w0);
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -547,6 +546,10 @@ public:
|
|||
|
||||
void compile(const std::vector<ArmOp> block_ops, u32 cycles)
|
||||
{
|
||||
Ldr(w1, arm_reg_operand(CYCL_CNT));
|
||||
Sub(w1, w1, cycles);
|
||||
Str(w1, arm_reg_operand(CYCL_CNT));
|
||||
|
||||
regalloc = new AArch64ArmRegAlloc(*this, block_ops);
|
||||
|
||||
for (u32 i = 0; i < block_ops.size(); i++)
|
||||
|
@ -594,14 +597,7 @@ public:
|
|||
endConditional(condLabel);
|
||||
}
|
||||
|
||||
//pop registers & return
|
||||
Subs(w27, w27, cycles);
|
||||
ptrdiff_t offset = reinterpret_cast<uintptr_t>(arm_exit) - GetBuffer()->GetStartAddress<uintptr_t>();
|
||||
Label arm_exit_label;
|
||||
BindToOffset(&arm_exit_label, offset);
|
||||
B(&arm_exit_label, mi);
|
||||
|
||||
offset = reinterpret_cast<uintptr_t>(arm_dispatch) - GetBuffer()->GetStartAddress<uintptr_t>();
|
||||
ptrdiff_t offset = reinterpret_cast<uintptr_t>(arm_dispatch) - GetBuffer()->GetStartAddress<uintptr_t>();
|
||||
Label arm_dispatch_label;
|
||||
BindToOffset(&arm_dispatch_label, offset);
|
||||
B(&arm_dispatch_label);
|
||||
|
@ -666,7 +662,7 @@ __asm__ (
|
|||
|
||||
".globl arm_mainloop \n\t"
|
||||
".hidden arm_mainloop \n"
|
||||
"arm_mainloop: \n\t" // arm_mainloop(cycles, regs, entry points)
|
||||
"arm_mainloop: \n\t" // arm_mainloop(regs, entry points)
|
||||
"stp x25, x26, [sp, #-96]! \n\t"
|
||||
"stp x27, x28, [sp, #16] \n\t"
|
||||
"stp x29, x30, [sp, #32] \n\t"
|
||||
|
@ -674,16 +670,15 @@ __asm__ (
|
|||
"stp x21, x22, [sp, #64] \n\t"
|
||||
"stp x23, x24, [sp, #80] \n\t"
|
||||
|
||||
"mov x28, x1 \n\t" // arm7 registers
|
||||
"mov x26, x2 \n\t" // lookup base
|
||||
|
||||
"ldr w27, [x28, #192] \n\t" // cycle count
|
||||
"add w27, w27, w0 \n\t" // add cycles for this timeslice
|
||||
"mov x28, x0 \n\t" // arm7 registers
|
||||
"mov x26, x1 \n\t" // lookup base
|
||||
|
||||
".globl arm_dispatch \n\t"
|
||||
".hidden arm_dispatch \n"
|
||||
"arm_dispatch: \n\t"
|
||||
"ldr w3, [x28, #192] \n\t" // load cycle counter
|
||||
"ldp w0, w1, [x28, #184] \n\t" // load Next PC, interrupt
|
||||
"tbnz w3, #31, arm_exit \n\t" // exit if cycle counter negative
|
||||
"ubfx w2, w0, #2, #21 \n\t" // w2 = pc >> 2. Note: assuming address space == 8 MB (23 bits)
|
||||
"cbnz w1, arm_dofiq \n\t" // if interrupt pending, handle it
|
||||
|
||||
|
@ -698,7 +693,6 @@ __asm__ (
|
|||
".globl arm_exit \n\t"
|
||||
".hidden arm_exit \n"
|
||||
"arm_exit: \n\t"
|
||||
"str w27, [x28, #192] \n\t" // if timeslice is over, save remaining cycles
|
||||
"ldp x23, x24, [sp, #80] \n\t"
|
||||
"ldp x21, x22, [sp, #64] \n\t"
|
||||
"ldp x19, x20, [sp, #48] \n\t"
|
||||
|
|
|
@ -19,8 +19,6 @@
|
|||
|
||||
#include "build.h"
|
||||
|
||||
#define TAIL_CALLING 1
|
||||
|
||||
#if HOST_CPU == CPU_X64 && FEAT_AREC != DYNAREC_NONE
|
||||
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
|
@ -30,7 +28,6 @@ using namespace Xbyak::util;
|
|||
|
||||
#include "arm7_rec.h"
|
||||
|
||||
extern u32 arm_single_op(u32 opcode);
|
||||
extern "C" void CompileCode();
|
||||
extern "C" void CPUFiq();
|
||||
extern "C" void arm_dispatch();
|
||||
|
@ -44,24 +41,11 @@ static const Xbyak::Reg32 call_regs[] = { ecx, edx, r8d, r9d };
|
|||
#else
|
||||
static const Xbyak::Reg32 call_regs[] = { edi, esi, edx, ecx };
|
||||
#endif
|
||||
#ifdef TAIL_CALLING
|
||||
extern "C" u32 (**entry_points)();
|
||||
#endif
|
||||
u32 (**entry_points)();
|
||||
|
||||
class Arm7Compiler;
|
||||
|
||||
#ifdef TAIL_CALLING
|
||||
#ifdef _WIN32
|
||||
static const std::array<Xbyak::Reg32, 7> alloc_regs {
|
||||
ebx, ebp, edi, esi, r12d, r13d, r15d
|
||||
};
|
||||
#else
|
||||
static const std::array<Xbyak::Reg32, 5> alloc_regs {
|
||||
ebx, ebp, r12d, r13d, r15d
|
||||
};
|
||||
#endif
|
||||
#else
|
||||
#ifdef _WIN32
|
||||
static const std::array<Xbyak::Reg32, 8> alloc_regs {
|
||||
ebx, ebp, edi, esi, r12d, r13d, r14d, r15d
|
||||
|
@ -71,7 +55,6 @@ static const std::array<Xbyak::Reg32, 6> alloc_regs {
|
|||
ebx, ebp, r12d, r13d, r14d, r15d
|
||||
};
|
||||
#endif
|
||||
#endif
|
||||
|
||||
class X64ArmRegAlloc : public ArmRegAlloc<sizeof(alloc_regs) / sizeof(alloc_regs[0]), X64ArmRegAlloc>
|
||||
{
|
||||
|
@ -801,11 +784,6 @@ class Arm7Compiler : public Xbyak::CodeGenerator
|
|||
set_flags = false;
|
||||
mov(call_regs[0], op.arg[0].getImmediate());
|
||||
call(arm_single_op);
|
||||
#ifdef TAIL_CALLING
|
||||
sub(r14d, eax);
|
||||
#else
|
||||
sub(dword[rip + &arm_Reg[CYCL_CNT].I], eax);
|
||||
#endif
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -815,13 +793,8 @@ public:
|
|||
{
|
||||
regalloc = new X64ArmRegAlloc(*this, block_ops);
|
||||
|
||||
#ifndef TAIL_CALLING
|
||||
#ifdef _WIN32
|
||||
sub(rsp, 40); // 16-byte alignment + 32-byte shadow area
|
||||
#else
|
||||
sub(rsp, 8); // 16-byte alignment
|
||||
#endif
|
||||
#endif
|
||||
sub(dword[rip + &arm_Reg[CYCL_CNT]], cycles);
|
||||
|
||||
ArmOp::Condition currentCondition = ArmOp::AL;
|
||||
Xbyak::Label *condLabel = nullptr;
|
||||
|
||||
|
@ -880,21 +853,9 @@ public:
|
|||
}
|
||||
}
|
||||
endConditional(condLabel);
|
||||
#ifdef TAIL_CALLING
|
||||
sub(r14d, cycles);
|
||||
#else
|
||||
mov(eax, cycles);
|
||||
#endif
|
||||
#ifdef TAIL_CALLING
|
||||
|
||||
jmp((void*)&arm_dispatch);
|
||||
#else
|
||||
#ifdef _WIN32
|
||||
add(rsp, 40);
|
||||
#else
|
||||
add(rsp, 8);
|
||||
#endif
|
||||
ret();
|
||||
#endif
|
||||
|
||||
ready();
|
||||
icPtr += getSize();
|
||||
|
||||
|
@ -923,57 +884,6 @@ void arm7backend_compile(const std::vector<ArmOp> block_ops, u32 cycles)
|
|||
|
||||
#ifndef _MSC_VER
|
||||
|
||||
#ifndef TAIL_CALLING
|
||||
extern "C"
|
||||
u32 arm_compilecode()
|
||||
{
|
||||
CompileCode();
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
void arm_mainloop(u32 cycl, void* regs, void* entrypoints)
|
||||
{
|
||||
entry_points = (u32 (**)())entrypoints;
|
||||
arm_Reg[CYCL_CNT].I += cycl;
|
||||
|
||||
__asm__ (
|
||||
"push %rbx \n\t"
|
||||
"push %rbp \n\t"
|
||||
#ifdef _WIN32
|
||||
"push %rdi \n\t"
|
||||
"push %rsi \n\t"
|
||||
#endif
|
||||
"push %r12 \n\t"
|
||||
"push %r13 \n\t"
|
||||
"push %r14 \n\t"
|
||||
"push %r15 \n\t"
|
||||
);
|
||||
|
||||
while ((int)arm_Reg[CYCL_CNT].I > 0)
|
||||
{
|
||||
if (arm_Reg[INTR_PEND].I)
|
||||
CPUFiq();
|
||||
|
||||
arm_Reg[CYCL_CNT].I -= entry_points[(arm_Reg[R15_ARM_NEXT].I & (ARAM_SIZE_MAX - 1)) / 4]();
|
||||
}
|
||||
|
||||
__asm__ (
|
||||
"pop %r15 \n\t"
|
||||
"pop %r14 \n\t"
|
||||
"pop %r13 \n\t"
|
||||
"pop %r12 \n\t"
|
||||
#ifdef _WIN32
|
||||
"pop %rsi \n\t"
|
||||
"pop %rdi \n\t"
|
||||
#endif
|
||||
"pop %rbp \n\t"
|
||||
"pop %rbx \n\t"
|
||||
);
|
||||
}
|
||||
|
||||
#else // !TAIL_CALLING
|
||||
|
||||
#ifdef __MACH__
|
||||
#define _U "_"
|
||||
#else
|
||||
|
@ -986,7 +896,7 @@ __asm__ (
|
|||
"jmp " _U"arm_dispatch \n\t"
|
||||
|
||||
".globl " _U"arm_mainloop \n"
|
||||
_U"arm_mainloop: \n\t" // arm_mainloop(cycles, regs, entry points)
|
||||
_U"arm_mainloop: \n\t" // arm_mainloop(regs, entry points)
|
||||
#ifdef _WIN32
|
||||
"pushq %rdi \n\t"
|
||||
"pushq %rsi \n\t"
|
||||
|
@ -1003,13 +913,10 @@ __asm__ (
|
|||
"subq $8, %rsp \n\t" // 16-byte stack alignment
|
||||
#endif
|
||||
|
||||
"movl " _U"arm_Reg + 192(%rip), %r14d \n\t" // CYCL_CNT
|
||||
#ifdef _WIN32
|
||||
"add %ecx, %r14d \n\t" // add cycles for this timeslice
|
||||
"movq %r8, entry_points(%rip) \n\t"
|
||||
"movq %rdx, entry_points(%rip) \n\t"
|
||||
#else
|
||||
"add %edi, %r14d \n\t" // add cycles for this timeslice
|
||||
"movq %rdx, " _U"entry_points(%rip) \n\t"
|
||||
"movq %rsi, " _U"entry_points(%rip) \n\t"
|
||||
#endif
|
||||
|
||||
".globl " _U"arm_dispatch \n"
|
||||
|
@ -1017,7 +924,7 @@ __asm__ (
|
|||
"movq " _U"entry_points(%rip), %rdx \n\t"
|
||||
"movl " _U"arm_Reg + 184(%rip), %ecx \n\t" // R15_ARM_NEXT
|
||||
"movl " _U"arm_Reg + 188(%rip), %eax \n\t" // INTR_PEND
|
||||
"cmp $0, %r14d \n\t"
|
||||
"cmp $0," _U"arm_Reg + 192(%rip) \n\t"
|
||||
"jle 2f \n\t" // timeslice is over
|
||||
"test %eax, %eax \n\t"
|
||||
"jne 1f \n\t" // if interrupt pending, handle it
|
||||
|
@ -1030,7 +937,6 @@ __asm__ (
|
|||
"jmp " _U"arm_dispatch \n"
|
||||
|
||||
"2: \n\t" // arm_exit:
|
||||
"movl %r14d, " _U"arm_Reg + 192(%rip) \n\t" // CYCL_CNT: save remaining cycles
|
||||
#ifdef _WIN32
|
||||
"addq $40, %rsp \n\t"
|
||||
#else
|
||||
|
@ -1048,6 +954,5 @@ __asm__ (
|
|||
#endif
|
||||
"ret \n"
|
||||
);
|
||||
#endif // !TAIL_CALLING
|
||||
#endif // !_MSC_VER
|
||||
#endif // X64 && DYNAREC_JIT
|
||||
|
|
|
@ -16,7 +16,7 @@ EXTERN arm_Reg: PTR DWORD
|
|||
EXTERN entry_points: QWORD
|
||||
|
||||
PUBLIC arm_mainloop
|
||||
arm_mainloop PROC FRAME ; arm_mainloop(cycles, regs, entry points)
|
||||
arm_mainloop PROC FRAME ; arm_mainloop(regs, entry points)
|
||||
push rdi
|
||||
.pushreg rdi
|
||||
push rsi
|
||||
|
@ -37,16 +37,14 @@ arm_mainloop PROC FRAME ; arm_mainloop(cycles, regs, entry points)
|
|||
.allocstack 40
|
||||
.endprolog
|
||||
|
||||
mov r14d, dword ptr [arm_Reg + 192] ; CYCL_CNT
|
||||
add r14d, ecx ; add cycles for this timeslice
|
||||
mov qword ptr [entry_points], r8
|
||||
mov qword ptr [entry_points], rdx
|
||||
|
||||
PUBLIC arm_dispatch
|
||||
arm_dispatch::
|
||||
mov rdx, qword ptr [entry_points]
|
||||
mov ecx, dword ptr [arm_Reg + 184] ; R15_ARM_NEXT
|
||||
mov eax, dword ptr [arm_Reg + 188] ; INTR_PEND
|
||||
cmp r14d, 0
|
||||
cmp dword ptr [arm_Reg + 192], 0
|
||||
jle arm_exit ; timeslice is over
|
||||
test eax, eax
|
||||
jne arm_dofiq ; if interrupt pending, handle it
|
||||
|
@ -59,7 +57,6 @@ arm_dofiq:
|
|||
jmp arm_dispatch
|
||||
|
||||
arm_exit:
|
||||
mov dword ptr [arm_Reg + 192], r14d ; CYCL_CNT: save remaining cycles
|
||||
add rsp, 40
|
||||
pop rbp
|
||||
pop rbx
|
||||
|
|
|
@ -490,7 +490,7 @@ extern u32 SB_ISTNRM;
|
|||
#define SB_ADEN SB_REG_32(ADEN)
|
||||
|
||||
//0x005F7818 SB_ADST RW AICA:G2-DMA start
|
||||
#define SB_ADST SB_REG_32(ADST)
|
||||
//#define SB_ADST SB_REG_32(ADST)
|
||||
//0x005F781C SB_ADSUSP RW AICA:G2-DMA suspend
|
||||
#define SB_ADSUSP SB_REG_32(ADSUSP)
|
||||
|
||||
|
|
|
@ -216,7 +216,7 @@ XEntryPoints: .word CSYM(EntryPoints)
|
|||
|
||||
.global CSYM(arm_mainloop)
|
||||
HIDDEN(arm_mainloop)
|
||||
CSYM(arm_mainloop): @(cntx,lookup_base,cycles)
|
||||
CSYM(arm_mainloop): @(cntx,lookup_base)
|
||||
|
||||
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
sub sp, #4 @ 8-byte stack alignment
|
||||
|
@ -225,19 +225,19 @@ CSYM(arm_mainloop): @(cntx,lookup_base,cycles)
|
|||
ldr r8,Xarm_Reg @load cntx
|
||||
ldr r4,XEntryPoints @load lookup base
|
||||
#else
|
||||
mov r8,r1 @load cntx
|
||||
mov r4,r2 @load lookup base
|
||||
mov r8,r0 @load cntx
|
||||
mov r4,r1 @load lookup base
|
||||
#endif
|
||||
|
||||
ldr r5,[r8,#192] @load cycle count
|
||||
add r5,r0 @add cycles for this timeslice
|
||||
|
||||
b CSYM(arm_dispatch)
|
||||
|
||||
.global CSYM(arm_dispatch)
|
||||
HIDDEN(arm_dispatch)
|
||||
CSYM(arm_dispatch):
|
||||
ldr r3,[r8,#192]
|
||||
ldrd r0,r1,[r8,#184] @load: Next PC, interrupt
|
||||
cmp r3,#0
|
||||
ble arm_exit
|
||||
ubfx r2,r0,#2,#21 @ assuming 8 MB address space max (23 bits)
|
||||
cmp r1,#0
|
||||
bne arm_dofiq
|
||||
|
@ -251,7 +251,6 @@ arm_dofiq:
|
|||
.global CSYM(arm_exit)
|
||||
HIDDEN(arm_exit)
|
||||
CSYM(arm_exit):
|
||||
str r5,[r8,#192] @if timeslice is over, save remaining cycles
|
||||
add sp, #4
|
||||
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
|
||||
|
|
|
@ -2094,14 +2094,6 @@ void ngen_Compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool s
|
|||
//StoreImms(r0,r1,(u32)&last_run_block,(u32)code); //useful when code jumps to random locations ...
|
||||
++blockno;
|
||||
|
||||
if (settings.profile.run_counts)
|
||||
{
|
||||
MOV32(r1,(u32)&block->runs);
|
||||
LDR(r0,r1);
|
||||
ADD(r0,r0,1);
|
||||
STR(r0,r1);
|
||||
}
|
||||
|
||||
//reg alloc
|
||||
reg.DoAlloc(block,alloc_regs,alloc_fpu);
|
||||
|
||||
|
|
|
@ -49,6 +49,7 @@ extern u32 ARMRST;//arm reset reg
|
|||
extern u32 rtc_EN;
|
||||
extern int dma_sched_id;
|
||||
extern u32 RealTimeClock;
|
||||
extern u32 SB_ADST;
|
||||
|
||||
//./core/hw/aica/aica_mem.o
|
||||
extern u8 aica_reg[0x8000];
|
||||
|
@ -297,7 +298,7 @@ bool dc_serialize(void **data, unsigned int *total_size)
|
|||
REICAST_S(SB_ISTNRM);
|
||||
REICAST_S(SB_FFST_rc);
|
||||
REICAST_S(SB_FFST);
|
||||
|
||||
REICAST_S(SB_ADST);
|
||||
|
||||
sys_rom->Serialize(data, total_size);
|
||||
sys_nvmem->Serialize(data, total_size);
|
||||
|
@ -528,6 +529,7 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size)
|
|||
REICAST_US(SB_ISTNRM);
|
||||
REICAST_US(SB_FFST_rc);
|
||||
REICAST_US(SB_FFST);
|
||||
SB_ADST = 0;
|
||||
|
||||
if (settings.platform.system == DC_PLATFORM_NAOMI || settings.platform.system == DC_PLATFORM_ATOMISWAVE)
|
||||
{
|
||||
|
@ -820,6 +822,10 @@ bool dc_unserialize(void **data, unsigned int *total_size)
|
|||
REICAST_US(SB_ISTNRM);
|
||||
REICAST_US(SB_FFST_rc);
|
||||
REICAST_US(SB_FFST);
|
||||
if (version >= V15)
|
||||
REICAST_US(SB_ADST);
|
||||
else
|
||||
SB_ADST = 0;
|
||||
|
||||
if (version < V5)
|
||||
{
|
||||
|
|
|
@ -568,5 +568,6 @@ enum serialize_version_enum {
|
|||
V12 = 807,
|
||||
V13 = 808,
|
||||
V14 = 809,
|
||||
VCUR_FLYCAST = V14,
|
||||
V15 = 810,
|
||||
VCUR_FLYCAST = V15,
|
||||
} ;
|
||||
|
|
Loading…
Reference in New Issue