diff --git a/core/hw/aica/aica.cpp b/core/hw/aica/aica.cpp index 826c0de72..d212cfadc 100644 --- a/core/hw/aica/aica.cpp +++ b/core/hw/aica/aica.cpp @@ -172,40 +172,35 @@ void WriteAicaReg(u32 reg,u32 data) { case SCIPD_addr: verify(sz!=1); + // other bits are read-only if (data & (1<<5)) { SCIPD->SCPU=1; update_arm_interrupts(); } - //Read only - return; + break; case SCIRE_addr: - { - verify(sz!=1); - SCIPD->full&=~(data /*& SCIEB->full*/ ); //is the & SCIEB->full needed ? doesn't seem like it - data=0;//Write only - update_arm_interrupts(); - } + verify(sz != 1); + SCIPD->full &= ~data /*& SCIEB->full)*/; //is the & SCIEB->full needed ? doesn't seem like it + update_arm_interrupts(); break; case MCIPD_addr: - if (data & (1<<5)) + verify(sz != 1); + // other bits are read-only + if (data & (1 << 5)) { - verify(sz!=1); - MCIPD->SCPU=1; + MCIPD->SCPU = 1; UpdateSh4Ints(); + aicaarm::avoidRaceCondition(); } - //Read only - return; + break; case MCIRE_addr: - { - verify(sz!=1); - MCIPD->full&=~data; - UpdateSh4Ints(); - //Write only - } + verify(sz != 1); + MCIPD->full &= ~data; + UpdateSh4Ints(); break; case TIMER_A: diff --git a/core/hw/aica/aica_if.cpp b/core/hw/aica/aica_if.cpp index 5d58e94c9..dd3e0d7bb 100644 --- a/core/hw/aica/aica_if.cpp +++ b/core/hw/aica/aica_if.cpp @@ -23,6 +23,7 @@ u32 rtc_EN; int dma_sched_id; u32 RealTimeClock; int rtc_schid = -1; +u32 SB_ADST; u32 GetRTC_now() { @@ -330,7 +331,7 @@ static void Write_SB_ADST(u32 addr, u32 data) //0x005F7818 SB_ADST RW AICA:G2-DMA start //0x005F781C SB_ADSUSP RW AICA:G2-DMA suspend - if (data&1) + if ((data & 1) == 1 && (SB_ADST & 1) == 0) { if (SB_ADEN&1) { @@ -391,6 +392,23 @@ static void Write_SB_ADST(u32 addr, u32 data) } } +u32 Read_SB_ADST(u32 addr) +{ + // Le Mans and Looney Tunes sometimes send the same dma transfer twice after checking SB_ADST == 0. + // To avoid this, we pretend SB_ADST is still set when there is a pending aica-dma interrupt. + // This is only done once. + if ((SB_ISTNRM & (1 << (u8)holly_SPU_DMA)) && !(SB_ADST & 2)) + { + SB_ADST |= 2; + return 1; + } + else + { + SB_ADST &= ~2; + return SB_ADST; + } +} + template void Write_SB_STAG(u32 addr, u32 data) { @@ -433,7 +451,7 @@ void aica_sb_Init() // G2-DMA registers // AICA - sb_rio_register(SB_ADST_addr, RIO_WF, nullptr, &Write_SB_ADST); + sb_rio_register(SB_ADST_addr, RIO_FUNC, &Read_SB_ADST, &Write_SB_ADST); #ifdef STRICT_MODE sb_rio_register(SB_ADSTAR_addr, RIO_WF, nullptr, &Write_SB_STAR); sb_rio_register(SB_ADSTAG_addr, RIO_WF, nullptr, &Write_SB_STAG); @@ -464,6 +482,8 @@ void aica_sb_Init() void aica_sb_Reset(bool hard) { + if (hard) + SB_ADST = 0; } void aica_sb_Term() diff --git a/core/hw/aica/aica_mem.cpp b/core/hw/aica/aica_mem.cpp index 1fc538b39..060fe500b 100644 --- a/core/hw/aica/aica_mem.cpp +++ b/core/hw/aica/aica_mem.cpp @@ -4,7 +4,7 @@ #include "dsp.h" #include "sgc_if.h" -u8 aica_reg[0x8000]; +alignas(4) u8 aica_reg[0x8000]; //00000000~007FFFFF @DRAM_AREA* //00800000~008027FF @CHANNEL_DATA diff --git a/core/hw/aica/aica_mem.h b/core/hw/aica/aica_mem.h index 30d88eb23..424649715 100644 --- a/core/hw/aica/aica_mem.h +++ b/core/hw/aica/aica_mem.h @@ -7,5 +7,5 @@ void libAICA_WriteReg(u32 addr,u32 data,u32 size); void init_mem(); void term_mem(); -extern u8 aica_reg[0x8000]; +alignas(4) extern u8 aica_reg[0x8000]; diff --git a/core/hw/arm7/arm7.cpp b/core/hw/arm7/arm7.cpp index b47663029..0177644c2 100644 --- a/core/hw/arm7/arm7.cpp +++ b/core/hw/arm7/arm7.cpp @@ -63,13 +63,15 @@ static void CPUUndefinedException(); // // ARM7 interpreter // +static int clockTicks; + static void runInterpreter(u32 CycleCount) { if (!Arm7Enabled) return; - u32 clockTicks = 0; - while (clockTicks < CycleCount) + clockTicks -= CycleCount; + while (clockTicks < 0) { if (reg[INTR_PEND].I) CPUFiq(); @@ -79,6 +81,11 @@ static void runInterpreter(u32 CycleCount) } } +void aicaarm::avoidRaceCondition() +{ + clockTicks = std::min(clockTicks, -50); +} + void aicaarm::run(u32 samples) { for (u32 i = 0; i < samples; i++) @@ -353,15 +360,15 @@ void update_armintc() //Emulate a single arm op, passed in opcode -u32 DYNACALL arm_single_op(u32 opcode) +void DYNACALL arm_single_op(u32 opcode) { - u32 clockTicks=0; + u32 clockTicks = 0; #define NO_OPCODE_READ #include "arm-new.h" - return clockTicks; + reg[CYCL_CNT].I -= clockTicks; } template diff --git a/core/hw/arm7/arm7.h b/core/hw/arm7/arm7.h index 7b7cf5674..1862ef431 100644 --- a/core/hw/arm7/arm7.h +++ b/core/hw/arm7/arm7.h @@ -7,7 +7,8 @@ void init(); void reset(); void run(u32 samples); void enable(bool enabled); - +// Called when the arm interrupts the SH4 to make sure it has enough cycles to finish what it's doing. +void avoidRaceCondition(); } enum Arm7Reg diff --git a/core/hw/arm7/arm7_rec.cpp b/core/hw/arm7/arm7_rec.cpp index 9177a10c4..fcd38651a 100644 --- a/core/hw/arm7/arm7_rec.cpp +++ b/core/hw/arm7/arm7_rec.cpp @@ -408,7 +408,7 @@ static ArmOp decodeArmOp(u32 opcode, u32 arm_pc) //Offset newbits.full |= 4; - arm_printf("ARM: MEM TFX R %08X -> %08X\n", opcode, newbits.full); + arm_printf("ARM: MEM TFX R %08X -> %08X", opcode, newbits.full); return decodeArmOp(newbits.full, arm_pc); } @@ -439,7 +439,7 @@ static ArmOp decodeArmOp(u32 opcode, u32 arm_pc) //Offset newbits.full |= 4; - arm_printf("ARM: MEM TFX W %08X -> %08X\n", opcode, newbits.full); + arm_printf("ARM: MEM TFX W %08X -> %08X", opcode, newbits.full); return decodeArmOp(newbits.full, arm_pc); } @@ -705,7 +705,7 @@ void *arm7rec_getMemOp(bool Load, bool Byte) } extern bool Arm7Enabled; -extern "C" void DYNACALL arm_mainloop(u32 cycl, void* regs, void* entrypoints); +extern "C" void DYNACALL arm_mainloop(void* regs, void* entrypoints); // Run a timeslice of arm7 @@ -714,9 +714,17 @@ void aicaarm::run(u32 samples) for (u32 i = 0; i < samples; i++) { if (Arm7Enabled) - arm_mainloop(ARM_CYCLES_PER_SAMPLE, arm_Reg, EntryPoints); + { + arm_Reg[CYCL_CNT].I += ARM_CYCLES_PER_SAMPLE; + arm_mainloop(arm_Reg, EntryPoints); + } libAICA_TimeStep(); } } +void aicaarm::avoidRaceCondition() +{ + arm_Reg[CYCL_CNT].I = std::max((int)arm_Reg[CYCL_CNT].I, 50); +} + #endif // FEAT_AREC != DYNAREC_NONE diff --git a/core/hw/arm7/arm7_rec.h b/core/hw/arm7/arm7_rec.h index d59d34d1a..9e7c06dae 100644 --- a/core/hw/arm7/arm7_rec.h +++ b/core/hw/arm7/arm7_rec.h @@ -423,6 +423,6 @@ void arm7rec_flush(); extern "C" void arm7rec_compile(); void *arm7rec_getMemOp(bool load, bool byte); template void DYNACALL MSR_do(u32 v); -u32 DYNACALL arm_single_op(u32 opcode); +void DYNACALL arm_single_op(u32 opcode); void arm7backend_compile(const std::vector block_ops, u32 cycles); diff --git a/core/hw/arm7/arm7_rec_arm32.cpp b/core/hw/arm7/arm7_rec_arm32.cpp index 5683d793a..372783c6a 100644 --- a/core/hw/arm7/arm7_rec_arm32.cpp +++ b/core/hw/arm7/arm7_rec_arm32.cpp @@ -49,8 +49,8 @@ static void storeReg(eReg host_reg, Arm7Reg guest_reg, ArmOp::Condition cc = Arm STR(host_reg, r8, (u8*)&arm_Reg[guest_reg].I - (u8*)&arm_Reg[0].I, ARM::Offset, (ARM::ConditionCode)cc); } -static const std::array alloc_regs{ - r6, r7, r9, r10, r11 +static const std::array alloc_regs{ + r5, r6, r7, r9, r10, r11 }; class Arm32ArmRegAlloc : public ArmRegAlloc @@ -428,11 +428,25 @@ static void emitFallback(const ArmOp& op) //Call interpreter MOV32(r0, op.arg[0].getImmediate()); call((u32)arm_single_op); - SUB(r5, r5, r0, false); } void arm7backend_compile(const std::vector block_ops, u32 cycles) { + loadReg(r2, CYCL_CNT); + if (is_i8r4(cycles)) + SUB(r2, r2, cycles); + else + { + u32 togo = cycles; + while(ARMImmid8r4_enc(togo) == -1) + { + SUB(r2, r2, 256); + togo -= 256; + } + SUB(r2, r2, togo); + } + storeReg(r2, CYCL_CNT); + regalloc = new Arm32ArmRegAlloc(block_ops); void *codestart = icPtr; @@ -474,19 +488,6 @@ void arm7backend_compile(const std::vector block_ops, u32 cycles) } storeFlags(); - if (is_i8r4(cycles)) - SUB(r5, r5, cycles, true); - else - { - u32 togo = cycles; - while(ARMImmid8r4_enc(togo) == -1) - { - SUB(r5, r5, 256); - togo -= 256; - } - SUB(r5, r5, togo, true); - } - JUMP((u32)&arm_exit, CC_MI); //statically predicted as not taken JUMP((u32)&arm_dispatch); vmem_platform_flush_cache(codestart, (u8*)icPtr - 1, codestart, (u8*)icPtr - 1); diff --git a/core/hw/arm7/arm7_rec_arm64.cpp b/core/hw/arm7/arm7_rec_arm64.cpp index b5deabae6..eeff27c68 100644 --- a/core/hw/arm7/arm7_rec_arm64.cpp +++ b/core/hw/arm7/arm7_rec_arm64.cpp @@ -37,7 +37,7 @@ extern const u32 ICacheSize; class Arm7Compiler; -#define MAX_REGS 7 +#define MAX_REGS 8 class AArch64ArmRegAlloc : public ArmRegAlloc { @@ -49,7 +49,7 @@ class AArch64ArmRegAlloc : public ArmRegAlloc static const WRegister& getReg(int i) { static const WRegister regs[] = { - w19, w20, w21, w22, w23, w24, w25 + w19, w20, w21, w22, w23, w24, w25, w27 }; static_assert(MAX_REGS == ARRAY_SIZE(regs), "MAX_REGS == ARRAY_SIZE(regs)"); verify(i >= 0 && (u32)i < ARRAY_SIZE(regs)); @@ -539,7 +539,6 @@ class Arm7Compiler : public MacroAssembler set_flags = false; Mov(w0, op.arg[0].getImmediate()); call((void*)arm_single_op); - Subs(w27, w27, w0); } public: @@ -547,6 +546,10 @@ public: void compile(const std::vector block_ops, u32 cycles) { + Ldr(w1, arm_reg_operand(CYCL_CNT)); + Sub(w1, w1, cycles); + Str(w1, arm_reg_operand(CYCL_CNT)); + regalloc = new AArch64ArmRegAlloc(*this, block_ops); for (u32 i = 0; i < block_ops.size(); i++) @@ -594,14 +597,7 @@ public: endConditional(condLabel); } - //pop registers & return - Subs(w27, w27, cycles); - ptrdiff_t offset = reinterpret_cast(arm_exit) - GetBuffer()->GetStartAddress(); - Label arm_exit_label; - BindToOffset(&arm_exit_label, offset); - B(&arm_exit_label, mi); - - offset = reinterpret_cast(arm_dispatch) - GetBuffer()->GetStartAddress(); + ptrdiff_t offset = reinterpret_cast(arm_dispatch) - GetBuffer()->GetStartAddress(); Label arm_dispatch_label; BindToOffset(&arm_dispatch_label, offset); B(&arm_dispatch_label); @@ -666,7 +662,7 @@ __asm__ ( ".globl arm_mainloop \n\t" ".hidden arm_mainloop \n" - "arm_mainloop: \n\t" // arm_mainloop(cycles, regs, entry points) + "arm_mainloop: \n\t" // arm_mainloop(regs, entry points) "stp x25, x26, [sp, #-96]! \n\t" "stp x27, x28, [sp, #16] \n\t" "stp x29, x30, [sp, #32] \n\t" @@ -674,16 +670,15 @@ __asm__ ( "stp x21, x22, [sp, #64] \n\t" "stp x23, x24, [sp, #80] \n\t" - "mov x28, x1 \n\t" // arm7 registers - "mov x26, x2 \n\t" // lookup base - - "ldr w27, [x28, #192] \n\t" // cycle count - "add w27, w27, w0 \n\t" // add cycles for this timeslice + "mov x28, x0 \n\t" // arm7 registers + "mov x26, x1 \n\t" // lookup base ".globl arm_dispatch \n\t" ".hidden arm_dispatch \n" "arm_dispatch: \n\t" + "ldr w3, [x28, #192] \n\t" // load cycle counter "ldp w0, w1, [x28, #184] \n\t" // load Next PC, interrupt + "tbnz w3, #31, arm_exit \n\t" // exit if cycle counter negative "ubfx w2, w0, #2, #21 \n\t" // w2 = pc >> 2. Note: assuming address space == 8 MB (23 bits) "cbnz w1, arm_dofiq \n\t" // if interrupt pending, handle it @@ -698,7 +693,6 @@ __asm__ ( ".globl arm_exit \n\t" ".hidden arm_exit \n" "arm_exit: \n\t" - "str w27, [x28, #192] \n\t" // if timeslice is over, save remaining cycles "ldp x23, x24, [sp, #80] \n\t" "ldp x21, x22, [sp, #64] \n\t" "ldp x19, x20, [sp, #48] \n\t" diff --git a/core/hw/arm7/arm7_rec_x64.cpp b/core/hw/arm7/arm7_rec_x64.cpp index e1bd606de..b2d9c5793 100644 --- a/core/hw/arm7/arm7_rec_x64.cpp +++ b/core/hw/arm7/arm7_rec_x64.cpp @@ -19,8 +19,6 @@ #include "build.h" -#define TAIL_CALLING 1 - #if HOST_CPU == CPU_X64 && FEAT_AREC != DYNAREC_NONE #define XBYAK_NO_OP_NAMES @@ -30,7 +28,6 @@ using namespace Xbyak::util; #include "arm7_rec.h" -extern u32 arm_single_op(u32 opcode); extern "C" void CompileCode(); extern "C" void CPUFiq(); extern "C" void arm_dispatch(); @@ -44,24 +41,11 @@ static const Xbyak::Reg32 call_regs[] = { ecx, edx, r8d, r9d }; #else static const Xbyak::Reg32 call_regs[] = { edi, esi, edx, ecx }; #endif -#ifdef TAIL_CALLING extern "C" u32 (**entry_points)(); -#endif u32 (**entry_points)(); class Arm7Compiler; -#ifdef TAIL_CALLING -#ifdef _WIN32 -static const std::array alloc_regs { - ebx, ebp, edi, esi, r12d, r13d, r15d -}; -#else -static const std::array alloc_regs { - ebx, ebp, r12d, r13d, r15d -}; -#endif -#else #ifdef _WIN32 static const std::array alloc_regs { ebx, ebp, edi, esi, r12d, r13d, r14d, r15d @@ -71,7 +55,6 @@ static const std::array alloc_regs { ebx, ebp, r12d, r13d, r14d, r15d }; #endif -#endif class X64ArmRegAlloc : public ArmRegAlloc { @@ -801,11 +784,6 @@ class Arm7Compiler : public Xbyak::CodeGenerator set_flags = false; mov(call_regs[0], op.arg[0].getImmediate()); call(arm_single_op); -#ifdef TAIL_CALLING - sub(r14d, eax); -#else - sub(dword[rip + &arm_Reg[CYCL_CNT].I], eax); -#endif } public: @@ -815,13 +793,8 @@ public: { regalloc = new X64ArmRegAlloc(*this, block_ops); -#ifndef TAIL_CALLING -#ifdef _WIN32 - sub(rsp, 40); // 16-byte alignment + 32-byte shadow area -#else - sub(rsp, 8); // 16-byte alignment -#endif -#endif + sub(dword[rip + &arm_Reg[CYCL_CNT]], cycles); + ArmOp::Condition currentCondition = ArmOp::AL; Xbyak::Label *condLabel = nullptr; @@ -880,21 +853,9 @@ public: } } endConditional(condLabel); -#ifdef TAIL_CALLING - sub(r14d, cycles); -#else - mov(eax, cycles); -#endif -#ifdef TAIL_CALLING + jmp((void*)&arm_dispatch); -#else -#ifdef _WIN32 - add(rsp, 40); -#else - add(rsp, 8); -#endif - ret(); -#endif + ready(); icPtr += getSize(); @@ -923,57 +884,6 @@ void arm7backend_compile(const std::vector block_ops, u32 cycles) #ifndef _MSC_VER -#ifndef TAIL_CALLING -extern "C" -u32 arm_compilecode() -{ - CompileCode(); - return 0; -} - -extern "C" -void arm_mainloop(u32 cycl, void* regs, void* entrypoints) -{ - entry_points = (u32 (**)())entrypoints; - arm_Reg[CYCL_CNT].I += cycl; - - __asm__ ( - "push %rbx \n\t" - "push %rbp \n\t" -#ifdef _WIN32 - "push %rdi \n\t" - "push %rsi \n\t" -#endif - "push %r12 \n\t" - "push %r13 \n\t" - "push %r14 \n\t" - "push %r15 \n\t" - ); - - while ((int)arm_Reg[CYCL_CNT].I > 0) - { - if (arm_Reg[INTR_PEND].I) - CPUFiq(); - - arm_Reg[CYCL_CNT].I -= entry_points[(arm_Reg[R15_ARM_NEXT].I & (ARAM_SIZE_MAX - 1)) / 4](); - } - - __asm__ ( - "pop %r15 \n\t" - "pop %r14 \n\t" - "pop %r13 \n\t" - "pop %r12 \n\t" -#ifdef _WIN32 - "pop %rsi \n\t" - "pop %rdi \n\t" -#endif - "pop %rbp \n\t" - "pop %rbx \n\t" - ); -} - -#else // !TAIL_CALLING - #ifdef __MACH__ #define _U "_" #else @@ -986,7 +896,7 @@ __asm__ ( "jmp " _U"arm_dispatch \n\t" ".globl " _U"arm_mainloop \n" - _U"arm_mainloop: \n\t" // arm_mainloop(cycles, regs, entry points) + _U"arm_mainloop: \n\t" // arm_mainloop(regs, entry points) #ifdef _WIN32 "pushq %rdi \n\t" "pushq %rsi \n\t" @@ -1003,13 +913,10 @@ __asm__ ( "subq $8, %rsp \n\t" // 16-byte stack alignment #endif - "movl " _U"arm_Reg + 192(%rip), %r14d \n\t" // CYCL_CNT #ifdef _WIN32 - "add %ecx, %r14d \n\t" // add cycles for this timeslice - "movq %r8, entry_points(%rip) \n\t" + "movq %rdx, entry_points(%rip) \n\t" #else - "add %edi, %r14d \n\t" // add cycles for this timeslice - "movq %rdx, " _U"entry_points(%rip) \n\t" + "movq %rsi, " _U"entry_points(%rip) \n\t" #endif ".globl " _U"arm_dispatch \n" @@ -1017,7 +924,7 @@ __asm__ ( "movq " _U"entry_points(%rip), %rdx \n\t" "movl " _U"arm_Reg + 184(%rip), %ecx \n\t" // R15_ARM_NEXT "movl " _U"arm_Reg + 188(%rip), %eax \n\t" // INTR_PEND - "cmp $0, %r14d \n\t" + "cmp $0," _U"arm_Reg + 192(%rip) \n\t" "jle 2f \n\t" // timeslice is over "test %eax, %eax \n\t" "jne 1f \n\t" // if interrupt pending, handle it @@ -1030,7 +937,6 @@ __asm__ ( "jmp " _U"arm_dispatch \n" "2: \n\t" // arm_exit: - "movl %r14d, " _U"arm_Reg + 192(%rip) \n\t" // CYCL_CNT: save remaining cycles #ifdef _WIN32 "addq $40, %rsp \n\t" #else @@ -1048,6 +954,5 @@ __asm__ ( #endif "ret \n" ); -#endif // !TAIL_CALLING #endif // !_MSC_VER #endif // X64 && DYNAREC_JIT diff --git a/core/hw/arm7/arm7_x64_msvc.asm b/core/hw/arm7/arm7_x64_msvc.asm index e6a1fe5ff..7a71e3d34 100644 --- a/core/hw/arm7/arm7_x64_msvc.asm +++ b/core/hw/arm7/arm7_x64_msvc.asm @@ -16,7 +16,7 @@ EXTERN arm_Reg: PTR DWORD EXTERN entry_points: QWORD PUBLIC arm_mainloop -arm_mainloop PROC FRAME ; arm_mainloop(cycles, regs, entry points) +arm_mainloop PROC FRAME ; arm_mainloop(regs, entry points) push rdi .pushreg rdi push rsi @@ -37,16 +37,14 @@ arm_mainloop PROC FRAME ; arm_mainloop(cycles, regs, entry points) .allocstack 40 .endprolog - mov r14d, dword ptr [arm_Reg + 192] ; CYCL_CNT - add r14d, ecx ; add cycles for this timeslice - mov qword ptr [entry_points], r8 + mov qword ptr [entry_points], rdx PUBLIC arm_dispatch arm_dispatch:: mov rdx, qword ptr [entry_points] mov ecx, dword ptr [arm_Reg + 184] ; R15_ARM_NEXT mov eax, dword ptr [arm_Reg + 188] ; INTR_PEND - cmp r14d, 0 + cmp dword ptr [arm_Reg + 192], 0 jle arm_exit ; timeslice is over test eax, eax jne arm_dofiq ; if interrupt pending, handle it @@ -59,7 +57,6 @@ arm_dofiq: jmp arm_dispatch arm_exit: - mov dword ptr [arm_Reg + 192], r14d ; CYCL_CNT: save remaining cycles add rsp, 40 pop rbp pop rbx diff --git a/core/hw/holly/sb.h b/core/hw/holly/sb.h index 248dc8fbe..1984044f2 100644 --- a/core/hw/holly/sb.h +++ b/core/hw/holly/sb.h @@ -490,7 +490,7 @@ extern u32 SB_ISTNRM; #define SB_ADEN SB_REG_32(ADEN) //0x005F7818 SB_ADST RW AICA:G2-DMA start -#define SB_ADST SB_REG_32(ADST) +//#define SB_ADST SB_REG_32(ADST) //0x005F781C SB_ADSUSP RW AICA:G2-DMA suspend #define SB_ADSUSP SB_REG_32(ADSUSP) diff --git a/core/rec-ARM/ngen_arm.S b/core/rec-ARM/ngen_arm.S index cc19aed2d..de28cbcc5 100644 --- a/core/rec-ARM/ngen_arm.S +++ b/core/rec-ARM/ngen_arm.S @@ -216,7 +216,7 @@ XEntryPoints: .word CSYM(EntryPoints) .global CSYM(arm_mainloop) HIDDEN(arm_mainloop) -CSYM(arm_mainloop): @(cntx,lookup_base,cycles) +CSYM(arm_mainloop): @(cntx,lookup_base) push {r4, r5, r6, r7, r8, r9, r10, r11, lr} sub sp, #4 @ 8-byte stack alignment @@ -225,19 +225,19 @@ CSYM(arm_mainloop): @(cntx,lookup_base,cycles) ldr r8,Xarm_Reg @load cntx ldr r4,XEntryPoints @load lookup base #else - mov r8,r1 @load cntx - mov r4,r2 @load lookup base + mov r8,r0 @load cntx + mov r4,r1 @load lookup base #endif - ldr r5,[r8,#192] @load cycle count - add r5,r0 @add cycles for this timeslice - b CSYM(arm_dispatch) .global CSYM(arm_dispatch) HIDDEN(arm_dispatch) CSYM(arm_dispatch): + ldr r3,[r8,#192] ldrd r0,r1,[r8,#184] @load: Next PC, interrupt + cmp r3,#0 + ble arm_exit ubfx r2,r0,#2,#21 @ assuming 8 MB address space max (23 bits) cmp r1,#0 bne arm_dofiq @@ -251,7 +251,6 @@ arm_dofiq: .global CSYM(arm_exit) HIDDEN(arm_exit) CSYM(arm_exit): - str r5,[r8,#192] @if timeslice is over, save remaining cycles add sp, #4 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} diff --git a/core/rec-ARM/rec_arm.cpp b/core/rec-ARM/rec_arm.cpp index 8d442e1f9..ffac04849 100644 --- a/core/rec-ARM/rec_arm.cpp +++ b/core/rec-ARM/rec_arm.cpp @@ -2094,14 +2094,6 @@ void ngen_Compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool s //StoreImms(r0,r1,(u32)&last_run_block,(u32)code); //useful when code jumps to random locations ... ++blockno; - if (settings.profile.run_counts) - { - MOV32(r1,(u32)&block->runs); - LDR(r0,r1); - ADD(r0,r0,1); - STR(r0,r1); - } - //reg alloc reg.DoAlloc(block,alloc_regs,alloc_fpu); diff --git a/core/serialize.cpp b/core/serialize.cpp index 319088163..1cf9811aa 100644 --- a/core/serialize.cpp +++ b/core/serialize.cpp @@ -49,6 +49,7 @@ extern u32 ARMRST;//arm reset reg extern u32 rtc_EN; extern int dma_sched_id; extern u32 RealTimeClock; +extern u32 SB_ADST; //./core/hw/aica/aica_mem.o extern u8 aica_reg[0x8000]; @@ -297,7 +298,7 @@ bool dc_serialize(void **data, unsigned int *total_size) REICAST_S(SB_ISTNRM); REICAST_S(SB_FFST_rc); REICAST_S(SB_FFST); - + REICAST_S(SB_ADST); sys_rom->Serialize(data, total_size); sys_nvmem->Serialize(data, total_size); @@ -528,6 +529,7 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size) REICAST_US(SB_ISTNRM); REICAST_US(SB_FFST_rc); REICAST_US(SB_FFST); + SB_ADST = 0; if (settings.platform.system == DC_PLATFORM_NAOMI || settings.platform.system == DC_PLATFORM_ATOMISWAVE) { @@ -820,6 +822,10 @@ bool dc_unserialize(void **data, unsigned int *total_size) REICAST_US(SB_ISTNRM); REICAST_US(SB_FFST_rc); REICAST_US(SB_FFST); + if (version >= V15) + REICAST_US(SB_ADST); + else + SB_ADST = 0; if (version < V5) { diff --git a/core/types.h b/core/types.h index 569810389..2e9512939 100644 --- a/core/types.h +++ b/core/types.h @@ -568,5 +568,6 @@ enum serialize_version_enum { V12 = 807, V13 = 808, V14 = 809, - VCUR_FLYCAST = V14, + V15 = 810, + VCUR_FLYCAST = V15, } ;