arm32: sh4 dynarec infiniloop on reset. aica rec: fix icache flush

Fixes hang when exiting awave service menu Issue #208
2021-03-20 15:06:16 +01:00 · 2021-03-20 15:06:16 +01:00 · 0b6420d90a
parent 9bc832c4c6
commit 0b6420d90a
3 changed files with 16 additions and 73 deletions
--- a/core/hw/arm7/arm7_rec_arm32.cpp
+++ b/core/hw/arm7/arm7_rec_arm32.cpp
@ -425,6 +425,8 @@ static void emitFallback(const ArmOp& op)

 void arm7backend_compile(const std::vector<ArmOp>& block_ops, u32 cycles)
 {
+	void *codestart = recompiler::currentCode();
+
 	loadReg(r2, CYCL_CNT);
 	while (!is_i8r4(cycles))
 	{
@ -435,7 +437,6 @@ void arm7backend_compile(const std::vector<ArmOp>& block_ops, u32 cycles)
 	storeReg(r2, CYCL_CNT);

 	regalloc = new Arm32ArmRegAlloc(block_ops);
-	void *codestart = recompiler::currentCode();

 	loadFlags();

--- a/core/hw/sh4/dyna/shil_canonical.h
+++ b/core/hw/sh4/dyna/shil_canonical.h
@ -1,27 +1,12 @@
 /*

 	This is a header file that can create 
-	a) Shil opcode enums
-	b) Shil opcode classes/portable C implementation ("canonical" implementation)
-	c) The routing table for canonical implementations
-	d) Cookies (if you're really lucky)
-
+	SHIL_MODE == 0) Shil opcode enums
+	SHIL_MODE == 1) Shil opcode classes/portable C implementation ("canonical" implementation)
+	SHIL_MODE == 2) Shil opcode classes declaration
+	SHIL_MODE == 3) The routing table for canonical implementations
+	SHIL_MODE == 4) opcode name list (for logging/disass)
 */
-#if HOST_CPU == CPU_ARM && !defined(__ANDROID__) && 0
-//FIXME: Fix extern function support on shil, or remove these
-extern "C" void ftrv_asm(float* fd,float* fn, float* fm);
-extern "C" f32 fipr_asm(float* fn, float* fm);
-#define ftrv_impl ftrv_asm
-#define fipr_impl fipr_asm
-#endif
-
-#ifndef ftrv_impl
-#define ftrv_impl f1
-#endif
-
-#ifndef fipr_impl
-#define fipr_impl f1
-#endif

 #define fsca_impl fsca_table

@ -928,7 +913,7 @@ shil_compile
 (
 	shil_cf_arg_ptr(rs2);
 	shil_cf_arg_ptr(rs1);
-	shil_cf(fipr_impl);
+	shil_cf(f1);
 	shil_cf_rv_f32(rd);
 )

@ -1004,7 +989,7 @@ shil_compile
 	shil_cf_arg_ptr(rs2);
 	shil_cf_arg_ptr(rs1);
 	shil_cf_arg_ptr(rd);
-	shil_cf(ftrv_impl);
+	shil_cf(f1);
 )
 shil_opc_end()

--- a/core/rec-ARM/ngen_arm.S
+++ b/core/rec-ARM/ngen_arm.S
@ -6,7 +6,6 @@
 .align 8

 .equ SH4_TIMESLICE, 448
-.equ BM_BLOCKLIST_MASK, 65532 @FFFC

 #if defined(__APPLE__)
 #define CSYM(n) _##n
@ -162,9 +161,14 @@ CSYM(intc_sched):        @ next_pc _MUST_ be on ram
    bl CSYM(UpdateSystem)
 	mov lr,r4
 	cmp r0,#0
-	bxeq lr			@faster than bxeq r4 (as it should, call stack cache)
+	bne CSYM(do_iter)
+	ldr r0,[r8,#-156]         @load CpuRunning
+	cmp r0,#0
+	beq CSYM(cleanup)
+	bx lr

-do_iter:
+HIDDEN(do_iter)
+CSYM(do_iter):
 	mov r0,r4
 	bl CSYM(rdv_DoInterrupts)
 	mov r4,r0
@ -201,51 +205,4 @@ bx lr
 end_ngen_mainloop:
@@@@@@@@@@ ngen_mainloop @@@@@@@@@@

-@@@@@@
-@matrix mul
-#ifndef __ANDROID__
-.global CSYM(ftrv_asm)
-HIDDEN(ftrv_asm)
-CSYM(ftrv_asm):
-
-@r0=dst,r1=vec,r2=mtx
-
-@3x vld1.32 might be faster
-vldm r2,{d16-d24}
-vldm r1, {d0-d1} 
-
-VMUL.F32 Q2,Q8,d0[0]
-VMLA.F32 Q2,Q9,d0[1]
-VMLA.F32 Q2,Q10,d1[0]
-VMLA.F32 Q2,Q11,d1[1]
-
-vstm r0,{d4,d5}
-
-bx lr
-
-.global CSYM(fipr_asm)
-HIDDEN(fipr_asm)
-CSYM(fipr_asm):
-
-@ vdot
-@		idp=fr[n+0]*fr[m+0];
-@		idp+=fr[n+1]*fr[m+1];
-@		idp+=fr[n+2]*fr[m+2];
-@		idp+=fr[n+3]*fr[m+3];
-
-
-vldm r0, {d0,d1}
-vldm r1, {d2,d3}
-
-vmul.f32 q0,q1
-@NEON is quite nice actually ! if only its performance was good enough ...
-vpadd.f32 d0,d0,d1 @d0={d0[0]+d0[1], d1[0]+d1[1]}
-vpadd.f32 d0,d0,d0 @d0={d0[0]+d0[1]+d1[0]+d1[1], d0[0]+d0[1]+d1[0]+d1[1]}
-
-@store to ret ..
-vmov r0,s0
-bx lr
-
-#endif
-
 #endif