diff --git a/CMakeLists.txt b/CMakeLists.txt index ef0ea6e72..14a86cfe0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,8 @@ endif() option(ENABLE_CTEST "Enables unit tests" OFF) option(ENABLE_OPROFILE "Enable OProfile" OFF) option(TEST_AUTOMATION "Enable test automation" OFF) +option(ENABLE_LOG "Enable full logging" OFF) +option(ASAN "Enable address sanitizer" OFF) project(flycast) @@ -52,6 +54,12 @@ endif() target_compile_features(${PROJECT_NAME} PRIVATE c_std_11 cxx_std_11) set_target_properties(${PROJECT_NAME} PROPERTIES CXX_EXTENSIONS OFF) +if(NOT MSVC) + target_compile_options(${PROJECT_NAME} PRIVATE + $<$:-fno-rtti> + -fno-strict-aliasing + -ffast-math) +endif() target_compile_definitions(${PROJECT_NAME} PRIVATE $<$:GL_SILENCE_DEPRECATION> @@ -60,6 +68,7 @@ target_compile_definitions(${PROJECT_NAME} PRIVATE $<$:_USE_MATH_DEFINES> $<$:NOMINMAX> $<$:TEST_AUTOMATION> + $<$:DEBUGFAST> ENABLE_MODEM) target_include_directories(${PROJECT_NAME} PRIVATE core core/deps core/deps/stb core/deps/xbyak core/khronos) @@ -183,6 +192,12 @@ if(UNIX AND NOT APPLE AND NOT ANDROID) find_package(Threads REQUIRED) target_link_libraries(${PROJECT_NAME} PRIVATE Threads::Threads) + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") + set_target_properties(${PROJECT_NAME} PROPERTIES POSITION_INDEPENDENT_CODE False) + if(${CMAKE_VERSION} VERSION_LESS "3.14.0") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie") + endif() + endif() if(NOT SDL2_FOUND) find_package(X11 REQUIRED) @@ -196,6 +211,11 @@ if(UNIX AND NOT APPLE AND NOT ANDROID) target_link_libraries(${PROJECT_NAME} PRIVATE ${CMAKE_DL_LIBS} rt) endif() +if(ASAN) + target_compile_options(${PROJECT_NAME} PRIVATE -fsanitize=address -static-libasan) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address -static-libasan") +endif() + target_sources(${PROJECT_NAME} PRIVATE core/deps/chdpsr/cdipsr.cpp core/deps/chdpsr/cdipsr.h) @@ -566,6 +586,7 @@ target_sources(${PROJECT_NAME} PRIVATE core/hw/sh4/modules/tmu.cpp core/hw/sh4/modules/ubc.cpp core/hw/sh4/modules/wince.h + core/hw/sh4/sh4_cache.h core/hw/sh4/sh4_core.h core/hw/sh4/sh4_core_regs.cpp core/hw/sh4/sh4_if.h @@ -732,6 +753,7 @@ target_sources(${PROJECT_NAME} PRIVATE core/rend/osd.h core/rend/sorter.cpp core/rend/sorter.h + core/rend/tileclip.h core/rend/TexCache.cpp core/rend/TexCache.h) @@ -872,6 +894,7 @@ if(BUILD_TESTING) core/deps/gtest/src/gtest_main.cc) target_sources(${PROJECT_NAME} PRIVATE + tests/src/div32_test.cpp tests/src/test_stubs.cpp tests/src/serialize_test.cpp) endif() diff --git a/core/hw/aica/aica.cpp b/core/hw/aica/aica.cpp index a3f166e86..b7106265d 100644 --- a/core/hw/aica/aica.cpp +++ b/core/hw/aica/aica.cpp @@ -4,8 +4,10 @@ #include "sgc_if.h" #include "hw/holly/holly_intc.h" #include "hw/holly/sb.h" +#include "hw/sh4/sh4_sched.h" +#include "hw/arm7/arm7.h" -#define SH4_IRQ_BIT (1<<(holly_SPU_IRQ&255)) +#define SH4_IRQ_BIT (1 << (holly_SPU_IRQ & 31)) CommonData_struct* CommonData; DSPData_struct* DSPData; @@ -77,17 +79,23 @@ static void UpdateSh4Ints() if ((SB_ISTEXT & SH4_IRQ_BIT) != 0) asic_CancelInterrupt(holly_SPU_IRQ); } - } - AicaTimer timers[3]; -//Mainloop -void libAICA_Update(u32 samples) +int aica_schid = -1; +const int AICA_TICK = 145125; // 44.1 KHz / 32 + +static int AicaUpdate(int tag, int c, int j) { - AICA_Sample32(); + arm_Run(32); + if (!settings.aica.NoBatch && !settings.aica.DSPEnabled) + AICA_Sample32(); + + return AICA_TICK; } +//Mainloop + void libAICA_TimeStep() { for (int i=0;i<3;i++) @@ -195,6 +203,11 @@ s32 libAICA_Init() MCIRE=(InterruptInfo*)&aica_reg[0x28B4+8]; sgc_Init(); + if (aica_schid == -1) + { + aica_schid = sh4_sched_register(0, &AicaUpdate); + sh4_sched_request(aica_schid, AICA_TICK); + } return 0; } @@ -202,8 +215,10 @@ s32 libAICA_Init() void libAICA_Reset(bool hard) { if (hard) + { init_mem(); - sgc_Init(); + sgc_Init(); + } for (u32 i = 0; i < 3; i++) timers[i].Init(aica_reg, i); aica_Reset(hard); diff --git a/core/hw/aica/aica_if.cpp b/core/hw/aica/aica_if.cpp index 4d6f76cf2..b693db999 100644 --- a/core/hw/aica/aica_if.cpp +++ b/core/hw/aica/aica_if.cpp @@ -5,10 +5,14 @@ */ #include "aica_if.h" +#include "aica_mem.h" #include "hw/holly/sb.h" #include "hw/holly/holly_intc.h" #include "hw/sh4/sh4_mem.h" #include "hw/sh4/sh4_sched.h" +#include "profiler/profiler.h" +#include "hw/sh4/dyna/blockmanager.h" +#include "hw/arm7/arm7.h" #include @@ -18,6 +22,7 @@ u32 ARMRST;//arm reset reg u32 rtc_EN=0; int dma_sched_id; u32 RealTimeClock; +int rtc_schid = -1; u32 GetRTC_now() { @@ -107,11 +112,12 @@ u32 ReadMem_aica_reg(u32 addr,u32 sz) } } -void ArmSetRST() +static void ArmSetRST() { - ARMRST&=1; - libARM_SetResetState(ARMRST); + ARMRST &= 1; + arm_SetEnabled(ARMRST == 0); } + void WriteMem_aica_reg(u32 addr,u32 data,u32 sz) { addr&=0x7FFF; @@ -121,12 +127,12 @@ void WriteMem_aica_reg(u32 addr,u32 data,u32 sz) if (addr==0x2C01) { VREG=data; - INFO_LOG(AICA, "VREG = %02X", VREG); + INFO_LOG(AICA_ARM, "VREG = %02X", VREG); } else if (addr==0x2C00) { ARMRST=data; - INFO_LOG(AICA, "ARMRST = %02X", ARMRST); + INFO_LOG(AICA_ARM, "ARMRST = %02X", ARMRST); ArmSetRST(); } else @@ -140,7 +146,7 @@ void WriteMem_aica_reg(u32 addr,u32 data,u32 sz) { VREG=(data>>8)&0xFF; ARMRST=data&0xFF; - INFO_LOG(AICA, "VREG = %02X ARMRST %02X", VREG, ARMRST); + INFO_LOG(AICA_ARM, "VREG = %02X ARMRST %02X", VREG, ARMRST); ArmSetRST(); } else @@ -149,15 +155,35 @@ void WriteMem_aica_reg(u32 addr,u32 data,u32 sz) } } } + +static int DreamcastSecond(int tag, int c, int j) +{ + RealTimeClock++; + + prof_periodical(); + +#if FEAT_SHREC != DYNAREC_NONE + bm_Periodical_1s(); +#endif + + return SH4_MAIN_CLOCK; +} + //Init/res/term void aica_Init() { RealTimeClock = GetRTC_now(); + if (rtc_schid == -1) + { + rtc_schid = sh4_sched_register(0, &DreamcastSecond); + sh4_sched_request(rtc_schid, SH4_MAIN_CLOCK); + } } void aica_Reset(bool hard) { - aica_Init(); + if (hard) + aica_Init(); VREG = 0; ARMRST = 0; } @@ -167,7 +193,7 @@ void aica_Term() } -int dma_end_sched(int tag, int cycl, int jitt) +static int dma_end_sched(int tag, int cycl, int jitt) { u32 len=SB_ADLEN & 0x7FFFFFFF; @@ -189,7 +215,7 @@ int dma_end_sched(int tag, int cycl, int jitt) return 0; } -void Write_SB_ADST(u32 addr, u32 data) +static void Write_SB_ADST(u32 addr, u32 data) { //0x005F7800 SB_ADSTAG RW AICA:G2-DMA G2 start address //0x005F7804 SB_ADSTAR RW AICA:G2-DMA system memory start address @@ -214,7 +240,10 @@ void Write_SB_ADST(u32 addr, u32 data) u32 tmp=src; src=dst; dst=tmp; + DEBUG_LOG(AICA, "AICA-DMA : SB_ADDIR==1 DMA Read to 0x%X from 0x%X %d bytes", dst, src, len); } + else + DEBUG_LOG(AICA, "AICA-DMA : SB_ADDIR==0:DMA Write to 0x%X from 0x%X %d bytes", dst, src, len); WriteMemBlock_nommu_dma(dst,src,len); @@ -231,7 +260,7 @@ void Write_SB_ADST(u32 addr, u32 data) } } -void Write_SB_E1ST(u32 addr, u32 data) +static void Write_SB_E1ST(u32 addr, u32 data) { //0x005F7800 SB_ADSTAG RW AICA:G2-DMA G2 start address //0x005F7804 SB_ADSTAR RW AICA:G2-DMA system memory start address @@ -285,7 +314,7 @@ void Write_SB_E1ST(u32 addr, u32 data) } } -void Write_SB_E2ST(u32 addr, u32 data) +static void Write_SB_E2ST(u32 addr, u32 data) { if ((data & 1) && (SB_E2EN & 1)) { @@ -321,7 +350,7 @@ void Write_SB_E2ST(u32 addr, u32 data) } -void Write_SB_DDST(u32 addr, u32 data) +static void Write_SB_DDST(u32 addr, u32 data) { if ((data & 1) && (SB_DDEN & 1)) { diff --git a/core/hw/aica/aica_if.h b/core/hw/aica/aica_if.h index 984b1b3b7..a58888d5a 100644 --- a/core/hw/aica/aica_if.h +++ b/core/hw/aica/aica_if.h @@ -4,7 +4,6 @@ extern u32 VREG; extern VArray2 aica_ram; -extern u32 RealTimeClock; u32 GetRTC_now(); u32 ReadMem_aica_rtc(u32 addr,u32 sz); void WriteMem_aica_rtc(u32 addr,u32 data,u32 sz); @@ -15,9 +14,10 @@ void aica_Init(); void aica_Reset(bool hard); void aica_Term(); -#define UpdateAica(clc) libAICA_Update(clc) -#define UpdateArm(clc) libARM_Update(clc) - void aica_sb_Init(); void aica_sb_Reset(bool hard); void aica_sb_Term(); + +s32 libAICA_Init(); +void libAICA_Reset(bool hard); +void libAICA_Term(); diff --git a/core/hw/aica/sgc_if.cpp b/core/hw/aica/sgc_if.cpp index 56fdc2084..b09462708 100755 --- a/core/hw/aica/sgc_if.cpp +++ b/core/hw/aica/sgc_if.cpp @@ -1368,11 +1368,6 @@ u32 cdda_index=CDDA_SIZE<<1; //no DSP for now in this version void AICA_Sample32() { - if (settings.aica.NoBatch || settings.aica.DSPEnabled) - { - return; - } - SampleType mxlr[64]; memset(mxlr,0,sizeof(mxlr)); diff --git a/core/hw/arm7/arm7.cpp b/core/hw/arm7/arm7.cpp index 90a4515dc..fe8c19161 100644 --- a/core/hw/arm7/arm7.cpp +++ b/core/hw/arm7/arm7.cpp @@ -13,21 +13,16 @@ #define CPUWriteHalfWord arm_WriteMem16 #define CPUWriteByte arm_WriteMem8 - #define reg arm_Reg #define armNextPC reg[R15_ARM_NEXT].I - #define CPUUpdateTicksAccesint(a) 1 #define CPUUpdateTicksAccessSeq32(a) 1 #define CPUUpdateTicksAccesshort(a) 1 #define CPUUpdateTicksAccess32(a) 1 #define CPUUpdateTicksAccess16(a) 1 - - -//bool arm_FiqPending; -- not used , i use the input directly :) -//bool arm_IrqPending; +#define ARM_CYCLES_PER_SAMPLE 256 alignas(8) reg_pair arm_Reg[RN_ARM_REG_COUNT]; @@ -82,10 +77,11 @@ void arm_Run_(u32 CycleCount) } } -void arm_Run(u32 CycleCount) { - for (int i=0;i<32;i++) +void arm_Run(u32 samples) +{ + for (u32 i = 0; i < samples; i++) { - arm_Run_(CycleCount/32); + arm_Run_(ARM_CYCLES_PER_SAMPLE); libAICA_TimeStep(); } } @@ -1585,13 +1581,12 @@ void armv_MOV32(eReg regn, u32 imm) #endif // HOST_CPU == CPU_ARM //Run a timeslice for ARMREC -//CycleCount is pretty much fixed to (512*32) for now (might change to a diff constant, but will be constant) -void arm_Run(u32 CycleCount) +void arm_Run(u32 samples) { - for (int i = 0; i < 32; i++) + for (int i = 0; i < samples; i++) { if (Arm7Enabled) - arm_mainloop(CycleCount / 32, arm_Reg, EntryPoints); + arm_mainloop(ARM_CYCLES_PER_SAMPLE, arm_Reg, EntryPoints); libAICA_TimeStep(); } } diff --git a/core/hw/arm7/arm7.h b/core/hw/arm7/arm7.h index 67e7d7024..726bd0536 100644 --- a/core/hw/arm7/arm7.h +++ b/core/hw/arm7/arm7.h @@ -3,11 +3,9 @@ void arm_Init(); void arm_Reset(); -void arm_Run(u32 uNumCycles); +void arm_Run(u32 samples); void arm_SetEnabled(bool enabled); -#define arm_sh4_bias (2) - enum { RN_CPSR = 16, diff --git a/core/hw/arm7/arm_mem.cpp b/core/hw/arm7/arm_mem.cpp index 8be027eda..53c3a4572 100644 --- a/core/hw/arm7/arm_mem.cpp +++ b/core/hw/arm7/arm_mem.cpp @@ -1,4 +1,5 @@ #include "arm_mem.h" +#include "hw/aica/aica_mem.h" #define REG_L (0x2D00) #define REG_M (0x2D04) @@ -82,4 +83,4 @@ template u32 arm_ReadReg<4,u32>(u32 adr); template void arm_WriteReg<1>(u32 adr,u8 data); template void arm_WriteReg<2>(u32 adr,u16 data); -template void arm_WriteReg<4>(u32 adr,u32 data); \ No newline at end of file +template void arm_WriteReg<4>(u32 adr,u32 data); diff --git a/core/hw/arm7/vbaARM.cpp b/core/hw/arm7/vbaARM.cpp index 532a3b400..b225f4c0e 100644 --- a/core/hw/arm7/vbaARM.cpp +++ b/core/hw/arm7/vbaARM.cpp @@ -23,14 +23,3 @@ void libARM_Reset(bool hard) arm_Reset(); arm_SetEnabled(false); } - -void libARM_SetResetState(u32 state) -{ - arm_SetEnabled(state==0); -} - -//Mainloop -void libARM_Update(u32 Cycles) -{ - arm_Run(Cycles/arm_sh4_bias); -} diff --git a/core/hw/mem/_vmem.h b/core/hw/mem/_vmem.h index 373683305..4a2d93711 100644 --- a/core/hw/mem/_vmem.h +++ b/core/hw/mem/_vmem.h @@ -64,14 +64,6 @@ _vmem_handler _vmem_register_handler(_vmem_ReadMem8FP* read8,_vmem_ReadMem16FP* (read<1,u8>,read<2,u16>,read<4,u32>, \ write<1,u8>,write<2,u16>,write<4,u32>) -#define _vmem_register_handler_Template1(read,write,extra_Tparam) _vmem_register_handler \ - (read<1,u8,extra_Tparam>,read<2,u16,extra_Tparam>,read<4,u32,extra_Tparam>, \ - write<1,u8,extra_Tparam>,write<2,u16,extra_Tparam>,write<4,u32,extra_Tparam>) - -#define _vmem_register_handler_Template2(read,write,etp1,etp2) _vmem_register_handler \ - (read<1,u8,etp1,etp2>,read<2,u16,etp1,etp2>,read<4,u32,etp1,etp2>, \ - write<1,u8,etp1,etp2>,write<2,u16,etp1,etp2>,write<4,u32,etp1,etp2>) - void _vmem_map_handler(_vmem_handler Handler,u32 start,u32 end); void _vmem_map_block(void* base,u32 start,u32 end,u32 mask); void _vmem_mirror_mapping(u32 new_region,u32 start,u32 size); diff --git a/core/hw/sh4/interpr/sh4_interpreter.cpp b/core/hw/sh4/interpr/sh4_interpreter.cpp index 14f183f82..6fe00b7b3 100644 --- a/core/hw/sh4/interpr/sh4_interpreter.cpp +++ b/core/hw/sh4/interpr/sh4_interpreter.cpp @@ -7,15 +7,16 @@ #include "../sh4_interpreter.h" #include "../sh4_opcode_list.h" #include "../sh4_core.h" -#include "hw/aica/aica_if.h" #include "../sh4_interrupts.h" #include "hw/sh4/sh4_mem.h" -#include "profiler/profiler.h" -#include "../dyna/blockmanager.h" #include "../sh4_sched.h" +#include "hw/holly/sb.h" +#include "../sh4_cache.h" #define CPU_RATIO (8) +sh4_icache icache; + static s32 l; static void ExecuteOpcode(u16 op) @@ -26,6 +27,14 @@ static void ExecuteOpcode(u16 op) l -= CPU_RATIO; } +static u16 ReadNexOp() +{ + u32 addr = next_pc; + next_pc += 2; + + return icache.ReadMem(addr); +} + void Sh4_int_Run() { sh4_int_bCpuRun=true; @@ -39,9 +48,7 @@ void Sh4_int_Run() #endif do { - u32 addr = next_pc; - next_pc += 2; - u32 op = IReadMem16(addr); + u32 op = ReadNexOp(); ExecuteOpcode(op); } while (l > 0); @@ -79,8 +86,7 @@ void Sh4_int_Step() } else { - u32 op=ReadMem16(next_pc); - next_pc+=2; + u32 op = ReadNexOp(); ExecuteOpcode(op); } } @@ -118,6 +124,7 @@ void Sh4_int_Reset(bool hard) fpscr.full = 0x0004001; old_fpscr=fpscr; UpdateFPSCR(); + icache.Reset(hard); //Any more registers have default value ? INFO_LOG(INTERPRETER, "Sh4 Reset"); @@ -135,9 +142,7 @@ void ExecuteDelayslot() #if !defined(NO_MMU) try { #endif - u32 addr = next_pc; - next_pc += 2; - u32 op = IReadMem16(addr); + u32 op = ReadNexOp(); if (op != 0) // Looney Tunes: Space Race hack ExecuteOpcode(op); @@ -165,40 +170,6 @@ void ExecuteDelayslot_RTE() #endif } -//General update - -//3584 Cycles -#define AICA_SAMPLE_GCM 441 -#define AICA_SAMPLE_CYCLES (SH4_MAIN_CLOCK/(44100/AICA_SAMPLE_GCM)*32) - -int aica_schid = -1; -int rtc_schid = -1; - -//14336 Cycles - -const int AICA_TICK=145124; - -static int AicaUpdate(int tag, int c, int j) -{ - UpdateArm(512*32); - UpdateAica(1*32); - - return AICA_TICK; -} - -static int DreamcastSecond(int tag, int c, int j) -{ - RealTimeClock++; - - prof_periodical(); - -#if FEAT_SHREC != DYNAREC_NONE - bm_Periodical_1s(); -#endif - - return SH4_MAIN_CLOCK; -} - // every SH4_TIMESLICE cycles int UpdateSystem() { @@ -240,16 +211,8 @@ void Get_Sh4Interpreter(sh4_if* rv) void Sh4_int_Init() { - verify(sizeof(Sh4cntx)==448); + static_assert(sizeof(Sh4cntx) == 448, "Invalid Sh4Cntx size"); - if (aica_schid == -1) - { - aica_schid=sh4_sched_register(0,&AicaUpdate); - sh4_sched_request(aica_schid,AICA_TICK); - - rtc_schid=sh4_sched_register(0,&DreamcastSecond); - sh4_sched_request(rtc_schid,SH4_MAIN_CLOCK); - } memset(&p_sh4rcb->cntx, 0, sizeof(p_sh4rcb->cntx)); } diff --git a/core/hw/sh4/interpr/sh4_opcodes.cpp b/core/hw/sh4/interpr/sh4_opcodes.cpp index d9e5791ad..7ca9d9d7f 100644 --- a/core/hw/sh4/interpr/sh4_opcodes.cpp +++ b/core/hw/sh4/interpr/sh4_opcodes.cpp @@ -1185,21 +1185,18 @@ sh4op(i0000_0000_0011_1000) //ocbi @ sh4op(i0000_nnnn_1001_0011) { - u32 n = GetN(op); //printf("ocbi @0x%08X \n",r[n]); } //ocbp @ sh4op(i0000_nnnn_1010_0011) { - u32 n = GetN(op); //printf("ocbp @0x%08X \n",r[n]); } //ocbwb @ sh4op(i0000_nnnn_1011_0011) { - u32 n = GetN(op); //printf("ocbwb @0x%08X \n",r[n]); } @@ -1550,86 +1547,59 @@ sh4op(i0000_0000_0001_1001) } //div0s , sh4op(i0010_nnnn_mmmm_0111) -{//ToDo : Check This [26/4/05] +{ u32 n = GetN(op); u32 m = GetM(op); - //new implementation - sr.Q=r[n]>>31; - sr.M=r[m]>>31; - sr.T=sr.M^sr.Q; - return; - /* - if ((r[n] & 0x80000000)!=0) - sr.Q = 1; - else - sr.Q = 0; - - if ((r[m] & 0x80000000)!=0) - sr.M = 1; - else - sr.M = 0; - - if (sr.Q == sr.M) - sr.T = 0; - else - sr.T = 1; - */ + sr.Q = r[n] >> 31; + sr.M = r[m] >> 31; + sr.T = sr.M ^ sr.Q; } //div1 , sh4op(i0011_nnnn_mmmm_0100) { - u32 n=GetN(op); - u32 m=GetM(op); + u32 n = GetN(op); + u32 m = GetM(op); - u32 tmp0, tmp2; - unsigned char old_q, tmp1; - - old_q = sr.Q; - sr.Q = (u8)((0x80000000 & r[n]) !=0); + const u8 old_q = sr.Q; + sr.Q = (u8)((0x80000000 & r[n]) != 0); r[n] <<= 1; - r[n] |= (unsigned long)sr.T; + r[n] |= sr.T; - tmp0 = r[n]; // this need only be done once here .. - // Old implementation -// tmp2 = r[m]; -// -// if( 0 == old_q ) -// { -// if( 0 == sr.M ) -// { -// r[n] -= tmp2; -// tmp1 = (r[n]>tmp0); -// sr.Q = (sr.Q==0) ? tmp1 : (u8)(tmp1==0) ; -// } -// else -// { -// r[n] += tmp2; -// tmp1 =(r[n]tmp0); -// sr.Q = (sr.Q==0) ? (u8)(tmp1==0) : tmp1 ; -// } -// } - - r[n] += (2 * (old_q ^ sr.M) - 1) * r[m]; - sr.Q ^= old_q ^ (sr.M ? r[n] > tmp0 : r[n] >= tmp0); + const u32 old_rn = r[n]; + if (old_q == 0) + { + if (sr.M == 0) + { + r[n] -= r[m]; + bool tmp1 = r[n] > old_rn; + sr.Q = sr.Q ^ tmp1; + } + else + { + r[n] += r[m]; + bool tmp1 = r[n] < old_rn; + sr.Q = !sr.Q ^ tmp1; + } + } + else + { + if (sr.M == 0) + { + r[n] += r[m]; + bool tmp1 = r[n] < old_rn; + sr.Q = sr.Q ^ tmp1; + } + else + { + r[n] -= r[m]; + bool tmp1 = r[n] > old_rn; + sr.Q = !sr.Q ^ tmp1; + } + } sr.T = (sr.Q == sr.M); } diff --git a/core/hw/sh4/modules/ccn.cpp b/core/hw/sh4/modules/ccn.cpp index a71f595c5..3c3359f95 100644 --- a/core/hw/sh4/modules/ccn.cpp +++ b/core/hw/sh4/modules/ccn.cpp @@ -9,6 +9,7 @@ #include "hw/sh4/sh4_if.h" #include "hw/sh4/sh4_mmr.h" #include "hw/sh4/sh4_core.h" +#include "hw/sh4/sh4_cache.h" //Types @@ -84,6 +85,7 @@ void CCN_CCR_write(u32 addr, u32 value) if (temp.ICI) { DEBUG_LOG(SH4, "Sh4: i-cache invalidation %08X", curr_pc); //Shikigami No Shiro II uses ICI frequently + icache.Invalidate(); } temp.ICI=0; @@ -155,10 +157,10 @@ void ccn_init() } -void ccn_reset() +void ccn_reset(bool hard) { CCN_TRA = 0x0; - CCN_EXPEVT = 0x0; + CCN_EXPEVT = hard ? 0 : 0x20; CCN_MMUCR.reg_data = 0x0; CCN_CCR.reg_data = 0x0; } diff --git a/core/hw/sh4/modules/mmu.h b/core/hw/sh4/modules/mmu.h index c9419321a..49653e000 100644 --- a/core/hw/sh4/modules/mmu.h +++ b/core/hw/sh4/modules/mmu.h @@ -46,6 +46,7 @@ void ITLB_Sync(u32 entry); bool mmu_match(u32 va, CCN_PTEH_type Address, CCN_PTEL_type Data); void mmu_set_state(); void mmu_flush_table(); +void mmu_raise_exception(u32 mmu_error, u32 address, u32 am); static INLINE bool mmu_enabled() { diff --git a/core/hw/sh4/modules/modules.h b/core/hw/sh4/modules/modules.h index 59f5c4393..4077e9965 100644 --- a/core/hw/sh4/modules/modules.h +++ b/core/hw/sh4/modules/modules.h @@ -35,7 +35,7 @@ void tmu_reset(bool hard); void tmu_term(); void ccn_init(); -void ccn_reset(); +void ccn_reset(bool hard); void ccn_term(); void MMU_init(); diff --git a/core/hw/sh4/sh4_cache.h b/core/hw/sh4/sh4_cache.h new file mode 100644 index 000000000..79650a6a6 --- /dev/null +++ b/core/hw/sh4/sh4_cache.h @@ -0,0 +1,105 @@ +/* + Copyright 2020 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include +#include "types.h" +#include "sh4_mem.h" +#include "modules/mmu.h" + +// +// SH4 instruction cache implementation +// +class sh4_icache +{ +public: + u16 ReadMem(u32 address) + { + if ((address & 0xE0000000) == 0xA0000000 // P2, P4: non-cacheable + || (address & 0xE0000000) == 0xE0000000 + || !CCN_CCR.ICE) // Instruction cache disabled + return IReadMem16(address); + + u32 index = CCN_CCR.IIX ? + ((address >> 5) & 0x7f) | ((address >> (25 - 7)) & 0x80) + : (address >> 5) & 0xff; + +#ifndef NO_MMU + if (mmu_enabled()) + { + u32 paddr; + u32 rv = mmu_instruction_translation(address, paddr); + if (rv != MMU_ERROR_NONE) + mmu_raise_exception(rv, address, MMU_TT_IREAD); + address = paddr; + } +#endif + + cache_line& line = lines[index]; + const u32 tag = (address >> 10) & 0x7ffff; + if (!line.valid || tag != line.address) + { + // miss + line.valid = true; + line.address = tag; + const u32 line_addr = address & ~0x1f; + u32 *p = (u32 *)line.data; + for (int i = 0; i < 32; i += 4) + *p++ = _vmem_ReadMem32(line_addr + i); + } + + return *(u16*)&line.data[address & 0x1f]; + } + + void Invalidate() + { + for (auto& line : lines) + line.valid = false; + } + + void Reset(bool hard) + { + if (hard) + memset(&lines[0], 0, sizeof(lines)); + } + + bool Serialize(void **data, unsigned int *total_size) + { + REICAST_S(lines); + + return true; + } + + bool Unserialize(void **data, unsigned int *total_size) + { + REICAST_US(lines); + + return true; + } + +private: + struct cache_line { + bool valid; + u32 address; + u8 data[32]; + }; + + std::array lines; +}; + +extern sh4_icache icache; diff --git a/core/hw/sh4/sh4_mmr.cpp b/core/hw/sh4/sh4_mmr.cpp index ab0d403b8..1797076c9 100644 --- a/core/hw/sh4/sh4_mmr.cpp +++ b/core/hw/sh4/sh4_mmr.cpp @@ -868,7 +868,7 @@ void sh4_mmr_reset(bool hard) OnChipRAM = {}; //Reset register values bsc_reset(hard); - ccn_reset(); + ccn_reset(hard); cpg_reset(); dmac_reset(); intc_reset(); diff --git a/core/hw/sh4/sh4_mmr.h b/core/hw/sh4/sh4_mmr.h index 29458a2a4..534b03c79 100644 --- a/core/hw/sh4/sh4_mmr.h +++ b/core/hw/sh4/sh4_mmr.h @@ -24,17 +24,6 @@ extern std::array TMU; extern std::array SCI; extern std::array SCIF; -/* -//Region P4 -u32 ReadMem_P4(u32 addr,u32 sz); -void WriteMem_P4(u32 addr,u32 data,u32 sz); - -//Area7 -u32 ReadMem_area7(u32 addr,u32 sz); -void WriteMem_area7(u32 addr,u32 data,u32 sz); -void DYNACALL WriteMem_sq_32(u32 address,u32 data);*/ - -//Init/Res/Term void sh4_mmr_init(); void sh4_mmr_reset(bool hard); void sh4_mmr_term(); @@ -42,14 +31,14 @@ void sh4_mmr_term(); template void sh4_rio_reg(T& arr, u32 addr, RegIO flags, u32 sz, RegReadAddrFP* rp=0, RegWriteAddrFP* wp=0); -#define A7_REG_HASH(addr) ((addr>>16)&0x1FFF) +#define A7_REG_HASH(addr) (((addr) >> 16) & 0x1FFF) -#define SH4IO_REGN(mod,addr,size) (mod[(addr&255)/4].data##size) -#define SH4IO_REG(mod,name,size) SH4IO_REGN(mod,mod##_##name##_addr,size) -#define SH4IO_REG_T(mod,name,size) ((mod##_##name##_type&)SH4IO_REG(mod,name,size)) +#define SH4IO_REGN(mod, addr, size) ((mod)[((addr) & 255) / 4].data##size) +#define SH4IO_REG(mod, name, size) SH4IO_REGN(mod, mod##_##name##_addr, size) +#define SH4IO_REG_T(mod, name, size) ((mod##_##name##_type&)SH4IO_REG(mod, name, size)) -#define SH4IO_REG_OFS(mod,name,o,s,size) SH4IO_REGN(mod,mod##_##name##0_addr+o*s,size) -#define SH4IO_REG_T_OFS(mod,name,o,s,size) ((mod##_##name##_type&)SH4IO_REG_OFS(mod,name,o,s,size)) +#define SH4IO_REG_OFS(mod, name, o, s, size) SH4IO_REGN(mod, mod##_##name##0_addr + (o) * (s), size) +#define SH4IO_REG_T_OFS(mod, name, o, s, size) ((mod##_##name##_type&)SH4IO_REG_OFS(mod, name, o, s, size)) //CCN module registers base #define CCN_BASE_addr 0x1F000000 diff --git a/core/reios/reios.cpp b/core/reios/reios.cpp index ee61a4ade..4564d6973 100644 --- a/core/reios/reios.cpp +++ b/core/reios/reios.cpp @@ -22,6 +22,7 @@ #include "iso9660.h" #include "font.h" #include "hw/aica/aica.h" +#include "hw/aica/aica_mem.h" #include "oslib/oslib.h" #include diff --git a/core/serialize.cpp b/core/serialize.cpp index 9feb7315a..4363dfd09 100644 --- a/core/serialize.cpp +++ b/core/serialize.cpp @@ -15,6 +15,7 @@ #include "reios/gdrom_hle.h" #include "hw/sh4/dyna/blockmanager.h" #include "hw/naomi/naomi_cart.h" +#include "hw/sh4/sh4_cache.h" #define REICAST_SKIP(size) do { if (*data) *(u8**)data += (size); *total_size += (size); } while (false) @@ -45,6 +46,7 @@ extern u32 VREG;//video reg =P extern u32 ARMRST;//arm reset reg extern u32 rtc_EN; extern int dma_sched_id; +extern u32 RealTimeClock; //./core/hw/aica/aica_mem.o extern u8 aica_reg[0x8000]; @@ -275,7 +277,7 @@ bool dc_serialize(void **data, unsigned int *total_size) { int i = 0; - serialize_version_enum version = V8; + serialize_version_enum version = V9; *total_size = 0 ; @@ -308,6 +310,7 @@ bool dc_serialize(void **data, unsigned int *total_size) REICAST_S(VREG); REICAST_S(ARMRST); REICAST_S(rtc_EN); + REICAST_S(RealTimeClock); REICAST_SA(aica_reg,0x8000); @@ -392,6 +395,7 @@ bool dc_serialize(void **data, unsigned int *total_size) register_serialize(TMU, data, total_size) ; register_serialize(SCI, data, total_size) ; register_serialize(SCIF, data, total_size) ; + icache.Serialize(data, total_size); REICAST_SA(mem_b.data, mem_b.size); @@ -664,6 +668,7 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size) register_unserialize(TMU, data, total_size, V9_LIBRETRO) ; register_unserialize(SCI, data, total_size, V9_LIBRETRO) ; register_unserialize(SCIF, data, total_size, V9_LIBRETRO) ; + icache.Reset(true); REICAST_USA(mem_b.data, mem_b.size); REICAST_USA(InterruptEnvId,32); @@ -852,6 +857,8 @@ bool dc_unserialize(void **data, unsigned int *total_size) REICAST_US(VREG); REICAST_US(ARMRST); REICAST_US(rtc_EN); + if (version >= V9) + REICAST_US(RealTimeClock); REICAST_USA(aica_reg,0x8000); @@ -987,6 +994,10 @@ bool dc_unserialize(void **data, unsigned int *total_size) register_unserialize(TMU, data, total_size, version) ; register_unserialize(SCI, data, total_size, version) ; register_unserialize(SCIF, data, total_size, version) ; + if (version >= V9) + icache.Unserialize(data, total_size); + else + icache.Reset(true); REICAST_USA(mem_b.data, mem_b.size); diff --git a/core/types.h b/core/types.h index 8dc5bba87..18a7c87c1 100644 --- a/core/types.h +++ b/core/types.h @@ -523,17 +523,6 @@ void libPvr_LockedBlockWrite(vram_block* block,u32 addr); //set to 0 if not used void* libPvr_GetRenderTarget(); -//AICA -s32 libAICA_Init(); -void libAICA_Reset(bool hard); -void libAICA_Term(); - -u32 libAICA_ReadReg(u32 addr,u32 size); -void libAICA_WriteReg(u32 addr,u32 data,u32 size); - -void libAICA_Update(u32 cycles); //called every ~1800 cycles, set to 0 if not used - - //GDR s32 libGDR_Init(); void libGDR_Reset(bool hard); @@ -564,13 +553,9 @@ static inline void libExtDevice_WriteMem_A5(u32 addr,u32 data,u32 size) { } //ARM s32 libARM_Init(); -void libARM_Reset(bool M); +void libARM_Reset(bool hard); void libARM_Term(); -void libARM_SetResetState(u32 State); -void libARM_Update(u32 cycles); - - #define ReadMemArrRet(arr,addr,sz) \ {if (sz==1) \ return arr[addr]; \ @@ -620,4 +605,5 @@ enum serialize_version_enum { V6 = 801, V7 = 802, V8 = 803, + V9 = 804, } ; diff --git a/tests/src/div32_test.cpp b/tests/src/div32_test.cpp index 2c5e99e60..9a33f222b 100644 --- a/tests/src/div32_test.cpp +++ b/tests/src/div32_test.cpp @@ -8,7 +8,50 @@ #define SHIL_MODE 2 #include "hw/sh4/dyna/shil_canonical.h" -void div32s_slow(u32& r1, u32 r2, u32& r3) +static void div1(u32& r1, u32 r2) +{ + const u8 old_q = sr.Q; + sr.Q = (u8)((0x80000000 & r1) != 0); + + r1 <<= 1; + r1 |= sr.T; + + const u32 old_rn = r1; + + if (old_q == 0) + { + if (sr.M == 0) + { + r1 -= r2; + bool tmp1 = r1 > old_rn; + sr.Q = sr.Q ^ tmp1; + } + else + { + r1 += r2; + bool tmp1 = r1 < old_rn; + sr.Q = !sr.Q ^ tmp1; + } + } + else + { + if (sr.M == 0) + { + r1 += r2; + bool tmp1 = r1 < old_rn; + sr.Q = sr.Q ^ tmp1; + } + else + { + r1 -= r2; + bool tmp1 = r1 > old_rn; + sr.Q = !sr.Q ^ tmp1; + } + } + sr.T = (sr.Q == sr.M); +} + +static void div32s_slow(u32& r1, u32 r2, u32& r3) { sr.Q = r3 >> 31; sr.M = r2 >> 31; @@ -19,23 +62,11 @@ void div32s_slow(u32& r1, u32 r2, u32& r3) r1 = (u32)rv; sr.T = rv >> 32; - // DIV1 - unsigned char old_q = sr.Q; - sr.Q = (u8)((0x80000000 & r3) !=0); - - r3 <<= 1; - r3 |= (unsigned long)sr.T; - - u32 tmp0 = r3; - - r3 += (2 * (old_q ^ sr.M) - 1) * r2; - sr.Q ^= old_q ^ (sr.M ? r3 > tmp0 : r3 >= tmp0); - - sr.T = (sr.Q == sr.M); + div1(r3, r2); } } -void div32s_fast(u32& r1, u32 r2, u32& r3) +static void div32s_fast(u32& r1, u32 r2, u32& r3) { sr.T = (r3 ^ r2) & 0x80000000; u64 rv = shil_opcl_div32s::f1::impl(r1, r2, r3); @@ -47,7 +78,7 @@ void div32s_fast(u32& r1, u32 r2, u32& r3) sr.T &= 1; } -void div32u_fast(u32& r1, u32 r2, u32& r3) +static void div32u_fast(u32& r1, u32 r2, u32& r3) { u64 rv = shil_opcl_div32u::f1::impl(r1, r2, r3); r1 = (u32)rv; @@ -57,7 +88,7 @@ void div32u_fast(u32& r1, u32 r2, u32& r3) r3 = shil_opcl_div32p2::f1::impl(r3, r2, sr.T); } -void div32u_slow(u32& r1, u32 r2, u32& r3) +static void div32u_slow(u32& r1, u32 r2, u32& r3) { sr.Q = 0; sr.M = 0; @@ -69,19 +100,7 @@ void div32u_slow(u32& r1, u32 r2, u32& r3) r1 = (u32)rv; sr.T = rv >> 32; - // DIV1 - unsigned char old_q = sr.Q; - sr.Q = (u8)((0x80000000 & r3) !=0); - - r3 <<= 1; - r3 |= (unsigned long)sr.T; - - u32 tmp0 = r3; - - r3 += (2 * (old_q ^ sr.M) - 1) * r2; - sr.Q ^= old_q ^ (sr.M ? r3 > tmp0 : r3 >= tmp0); - - sr.T = (sr.Q == sr.M); + div1(r3, r2); } } diff --git a/tests/src/serialize_test.cpp b/tests/src/serialize_test.cpp index ebbe949e3..ec6be3790 100644 --- a/tests/src/serialize_test.cpp +++ b/tests/src/serialize_test.cpp @@ -28,7 +28,7 @@ TEST_F(SerializeTest, SizeTest) unsigned int total_size = 0; void *data = nullptr; ASSERT_TRUE(dc_serialize(&data, &total_size)); - ASSERT_EQ(28114471u, total_size); + ASSERT_EQ(28124715u, total_size); }