sh4 icache implementation. move aica out of sh4/interp.

revert to original div1 impl
serialize rtc clock value
cmake fixes: asan and logging options, -no-pie on x64/linux
This commit is contained in:
Flyinghead 2020-06-09 12:02:01 +02:00
parent 18790901b1
commit 333df13fce
23 changed files with 341 additions and 257 deletions

View File

@ -9,6 +9,8 @@ endif()
option(ENABLE_CTEST "Enables unit tests" OFF)
option(ENABLE_OPROFILE "Enable OProfile" OFF)
option(TEST_AUTOMATION "Enable test automation" OFF)
option(ENABLE_LOG "Enable full logging" OFF)
option(ASAN "Enable address sanitizer" OFF)
project(flycast)
@ -52,6 +54,12 @@ endif()
target_compile_features(${PROJECT_NAME} PRIVATE c_std_11 cxx_std_11)
set_target_properties(${PROJECT_NAME} PROPERTIES CXX_EXTENSIONS OFF)
if(NOT MSVC)
target_compile_options(${PROJECT_NAME} PRIVATE
$<$<COMPILE_LANGUAGE:CXX>:-fno-rtti>
-fno-strict-aliasing
-ffast-math)
endif()
target_compile_definitions(${PROJECT_NAME} PRIVATE
$<$<BOOL:APPLE>:GL_SILENCE_DEPRECATION>
@ -60,6 +68,7 @@ target_compile_definitions(${PROJECT_NAME} PRIVATE
$<$<BOOL:MSVC>:_USE_MATH_DEFINES>
$<$<BOOL:MSVC>:NOMINMAX>
$<$<BOOL:${TEST_AUTOMATION}>:TEST_AUTOMATION>
$<$<BOOL:${ENABLE_LOG}>:DEBUGFAST>
ENABLE_MODEM)
target_include_directories(${PROJECT_NAME} PRIVATE core core/deps core/deps/stb core/deps/xbyak core/khronos)
@ -183,6 +192,12 @@ if(UNIX AND NOT APPLE AND NOT ANDROID)
find_package(Threads REQUIRED)
target_link_libraries(${PROJECT_NAME} PRIVATE Threads::Threads)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set_target_properties(${PROJECT_NAME} PROPERTIES POSITION_INDEPENDENT_CODE False)
if(${CMAKE_VERSION} VERSION_LESS "3.14.0")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie")
endif()
endif()
if(NOT SDL2_FOUND)
find_package(X11 REQUIRED)
@ -196,6 +211,11 @@ if(UNIX AND NOT APPLE AND NOT ANDROID)
target_link_libraries(${PROJECT_NAME} PRIVATE ${CMAKE_DL_LIBS} rt)
endif()
if(ASAN)
target_compile_options(${PROJECT_NAME} PRIVATE -fsanitize=address -static-libasan)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address -static-libasan")
endif()
target_sources(${PROJECT_NAME} PRIVATE
core/deps/chdpsr/cdipsr.cpp
core/deps/chdpsr/cdipsr.h)
@ -566,6 +586,7 @@ target_sources(${PROJECT_NAME} PRIVATE
core/hw/sh4/modules/tmu.cpp
core/hw/sh4/modules/ubc.cpp
core/hw/sh4/modules/wince.h
core/hw/sh4/sh4_cache.h
core/hw/sh4/sh4_core.h
core/hw/sh4/sh4_core_regs.cpp
core/hw/sh4/sh4_if.h
@ -732,6 +753,7 @@ target_sources(${PROJECT_NAME} PRIVATE
core/rend/osd.h
core/rend/sorter.cpp
core/rend/sorter.h
core/rend/tileclip.h
core/rend/TexCache.cpp
core/rend/TexCache.h)
@ -872,6 +894,7 @@ if(BUILD_TESTING)
core/deps/gtest/src/gtest_main.cc)
target_sources(${PROJECT_NAME} PRIVATE
tests/src/div32_test.cpp
tests/src/test_stubs.cpp
tests/src/serialize_test.cpp)
endif()

View File

@ -4,8 +4,10 @@
#include "sgc_if.h"
#include "hw/holly/holly_intc.h"
#include "hw/holly/sb.h"
#include "hw/sh4/sh4_sched.h"
#include "hw/arm7/arm7.h"
#define SH4_IRQ_BIT (1<<(holly_SPU_IRQ&255))
#define SH4_IRQ_BIT (1 << (holly_SPU_IRQ & 31))
CommonData_struct* CommonData;
DSPData_struct* DSPData;
@ -77,17 +79,23 @@ static void UpdateSh4Ints()
if ((SB_ISTEXT & SH4_IRQ_BIT) != 0)
asic_CancelInterrupt(holly_SPU_IRQ);
}
}
AicaTimer timers[3];
//Mainloop
void libAICA_Update(u32 samples)
int aica_schid = -1;
const int AICA_TICK = 145125; // 44.1 KHz / 32
static int AicaUpdate(int tag, int c, int j)
{
AICA_Sample32();
arm_Run(32);
if (!settings.aica.NoBatch && !settings.aica.DSPEnabled)
AICA_Sample32();
return AICA_TICK;
}
//Mainloop
void libAICA_TimeStep()
{
for (int i=0;i<3;i++)
@ -195,6 +203,11 @@ s32 libAICA_Init()
MCIRE=(InterruptInfo*)&aica_reg[0x28B4+8];
sgc_Init();
if (aica_schid == -1)
{
aica_schid = sh4_sched_register(0, &AicaUpdate);
sh4_sched_request(aica_schid, AICA_TICK);
}
return 0;
}
@ -202,8 +215,10 @@ s32 libAICA_Init()
void libAICA_Reset(bool hard)
{
if (hard)
{
init_mem();
sgc_Init();
sgc_Init();
}
for (u32 i = 0; i < 3; i++)
timers[i].Init(aica_reg, i);
aica_Reset(hard);

View File

@ -5,10 +5,14 @@
*/
#include "aica_if.h"
#include "aica_mem.h"
#include "hw/holly/sb.h"
#include "hw/holly/holly_intc.h"
#include "hw/sh4/sh4_mem.h"
#include "hw/sh4/sh4_sched.h"
#include "profiler/profiler.h"
#include "hw/sh4/dyna/blockmanager.h"
#include "hw/arm7/arm7.h"
#include <ctime>
@ -18,6 +22,7 @@ u32 ARMRST;//arm reset reg
u32 rtc_EN=0;
int dma_sched_id;
u32 RealTimeClock;
int rtc_schid = -1;
u32 GetRTC_now()
{
@ -107,11 +112,12 @@ u32 ReadMem_aica_reg(u32 addr,u32 sz)
}
}
void ArmSetRST()
static void ArmSetRST()
{
ARMRST&=1;
libARM_SetResetState(ARMRST);
ARMRST &= 1;
arm_SetEnabled(ARMRST == 0);
}
void WriteMem_aica_reg(u32 addr,u32 data,u32 sz)
{
addr&=0x7FFF;
@ -121,12 +127,12 @@ void WriteMem_aica_reg(u32 addr,u32 data,u32 sz)
if (addr==0x2C01)
{
VREG=data;
INFO_LOG(AICA, "VREG = %02X", VREG);
INFO_LOG(AICA_ARM, "VREG = %02X", VREG);
}
else if (addr==0x2C00)
{
ARMRST=data;
INFO_LOG(AICA, "ARMRST = %02X", ARMRST);
INFO_LOG(AICA_ARM, "ARMRST = %02X", ARMRST);
ArmSetRST();
}
else
@ -140,7 +146,7 @@ void WriteMem_aica_reg(u32 addr,u32 data,u32 sz)
{
VREG=(data>>8)&0xFF;
ARMRST=data&0xFF;
INFO_LOG(AICA, "VREG = %02X ARMRST %02X", VREG, ARMRST);
INFO_LOG(AICA_ARM, "VREG = %02X ARMRST %02X", VREG, ARMRST);
ArmSetRST();
}
else
@ -149,15 +155,35 @@ void WriteMem_aica_reg(u32 addr,u32 data,u32 sz)
}
}
}
static int DreamcastSecond(int tag, int c, int j)
{
RealTimeClock++;
prof_periodical();
#if FEAT_SHREC != DYNAREC_NONE
bm_Periodical_1s();
#endif
return SH4_MAIN_CLOCK;
}
//Init/res/term
void aica_Init()
{
RealTimeClock = GetRTC_now();
if (rtc_schid == -1)
{
rtc_schid = sh4_sched_register(0, &DreamcastSecond);
sh4_sched_request(rtc_schid, SH4_MAIN_CLOCK);
}
}
void aica_Reset(bool hard)
{
aica_Init();
if (hard)
aica_Init();
VREG = 0;
ARMRST = 0;
}
@ -167,7 +193,7 @@ void aica_Term()
}
int dma_end_sched(int tag, int cycl, int jitt)
static int dma_end_sched(int tag, int cycl, int jitt)
{
u32 len=SB_ADLEN & 0x7FFFFFFF;
@ -189,7 +215,7 @@ int dma_end_sched(int tag, int cycl, int jitt)
return 0;
}
void Write_SB_ADST(u32 addr, u32 data)
static void Write_SB_ADST(u32 addr, u32 data)
{
//0x005F7800 SB_ADSTAG RW AICA:G2-DMA G2 start address
//0x005F7804 SB_ADSTAR RW AICA:G2-DMA system memory start address
@ -214,7 +240,10 @@ void Write_SB_ADST(u32 addr, u32 data)
u32 tmp=src;
src=dst;
dst=tmp;
DEBUG_LOG(AICA, "AICA-DMA : SB_ADDIR==1 DMA Read to 0x%X from 0x%X %d bytes", dst, src, len);
}
else
DEBUG_LOG(AICA, "AICA-DMA : SB_ADDIR==0:DMA Write to 0x%X from 0x%X %d bytes", dst, src, len);
WriteMemBlock_nommu_dma(dst,src,len);
@ -231,7 +260,7 @@ void Write_SB_ADST(u32 addr, u32 data)
}
}
void Write_SB_E1ST(u32 addr, u32 data)
static void Write_SB_E1ST(u32 addr, u32 data)
{
//0x005F7800 SB_ADSTAG RW AICA:G2-DMA G2 start address
//0x005F7804 SB_ADSTAR RW AICA:G2-DMA system memory start address
@ -285,7 +314,7 @@ void Write_SB_E1ST(u32 addr, u32 data)
}
}
void Write_SB_E2ST(u32 addr, u32 data)
static void Write_SB_E2ST(u32 addr, u32 data)
{
if ((data & 1) && (SB_E2EN & 1))
{
@ -321,7 +350,7 @@ void Write_SB_E2ST(u32 addr, u32 data)
}
void Write_SB_DDST(u32 addr, u32 data)
static void Write_SB_DDST(u32 addr, u32 data)
{
if ((data & 1) && (SB_DDEN & 1))
{

View File

@ -4,7 +4,6 @@
extern u32 VREG;
extern VArray2 aica_ram;
extern u32 RealTimeClock;
u32 GetRTC_now();
u32 ReadMem_aica_rtc(u32 addr,u32 sz);
void WriteMem_aica_rtc(u32 addr,u32 data,u32 sz);
@ -15,9 +14,10 @@ void aica_Init();
void aica_Reset(bool hard);
void aica_Term();
#define UpdateAica(clc) libAICA_Update(clc)
#define UpdateArm(clc) libARM_Update(clc)
void aica_sb_Init();
void aica_sb_Reset(bool hard);
void aica_sb_Term();
s32 libAICA_Init();
void libAICA_Reset(bool hard);
void libAICA_Term();

View File

@ -1368,11 +1368,6 @@ u32 cdda_index=CDDA_SIZE<<1;
//no DSP for now in this version
void AICA_Sample32()
{
if (settings.aica.NoBatch || settings.aica.DSPEnabled)
{
return;
}
SampleType mxlr[64];
memset(mxlr,0,sizeof(mxlr));

View File

@ -13,21 +13,16 @@
#define CPUWriteHalfWord arm_WriteMem16
#define CPUWriteByte arm_WriteMem8
#define reg arm_Reg
#define armNextPC reg[R15_ARM_NEXT].I
#define CPUUpdateTicksAccesint(a) 1
#define CPUUpdateTicksAccessSeq32(a) 1
#define CPUUpdateTicksAccesshort(a) 1
#define CPUUpdateTicksAccess32(a) 1
#define CPUUpdateTicksAccess16(a) 1
//bool arm_FiqPending; -- not used , i use the input directly :)
//bool arm_IrqPending;
#define ARM_CYCLES_PER_SAMPLE 256
alignas(8) reg_pair arm_Reg[RN_ARM_REG_COUNT];
@ -82,10 +77,11 @@ void arm_Run_(u32 CycleCount)
}
}
void arm_Run(u32 CycleCount) {
for (int i=0;i<32;i++)
void arm_Run(u32 samples)
{
for (u32 i = 0; i < samples; i++)
{
arm_Run_(CycleCount/32);
arm_Run_(ARM_CYCLES_PER_SAMPLE);
libAICA_TimeStep();
}
}
@ -1585,13 +1581,12 @@ void armv_MOV32(eReg regn, u32 imm)
#endif // HOST_CPU == CPU_ARM
//Run a timeslice for ARMREC
//CycleCount is pretty much fixed to (512*32) for now (might change to a diff constant, but will be constant)
void arm_Run(u32 CycleCount)
void arm_Run(u32 samples)
{
for (int i = 0; i < 32; i++)
for (int i = 0; i < samples; i++)
{
if (Arm7Enabled)
arm_mainloop(CycleCount / 32, arm_Reg, EntryPoints);
arm_mainloop(ARM_CYCLES_PER_SAMPLE, arm_Reg, EntryPoints);
libAICA_TimeStep();
}
}

View File

@ -3,11 +3,9 @@
void arm_Init();
void arm_Reset();
void arm_Run(u32 uNumCycles);
void arm_Run(u32 samples);
void arm_SetEnabled(bool enabled);
#define arm_sh4_bias (2)
enum
{
RN_CPSR = 16,

View File

@ -1,4 +1,5 @@
#include "arm_mem.h"
#include "hw/aica/aica_mem.h"
#define REG_L (0x2D00)
#define REG_M (0x2D04)
@ -82,4 +83,4 @@ template u32 arm_ReadReg<4,u32>(u32 adr);
template void arm_WriteReg<1>(u32 adr,u8 data);
template void arm_WriteReg<2>(u32 adr,u16 data);
template void arm_WriteReg<4>(u32 adr,u32 data);
template void arm_WriteReg<4>(u32 adr,u32 data);

View File

@ -23,14 +23,3 @@ void libARM_Reset(bool hard)
arm_Reset();
arm_SetEnabled(false);
}
void libARM_SetResetState(u32 state)
{
arm_SetEnabled(state==0);
}
//Mainloop
void libARM_Update(u32 Cycles)
{
arm_Run(Cycles/arm_sh4_bias);
}

View File

@ -64,14 +64,6 @@ _vmem_handler _vmem_register_handler(_vmem_ReadMem8FP* read8,_vmem_ReadMem16FP*
(read<1,u8>,read<2,u16>,read<4,u32>, \
write<1,u8>,write<2,u16>,write<4,u32>)
#define _vmem_register_handler_Template1(read,write,extra_Tparam) _vmem_register_handler \
(read<1,u8,extra_Tparam>,read<2,u16,extra_Tparam>,read<4,u32,extra_Tparam>, \
write<1,u8,extra_Tparam>,write<2,u16,extra_Tparam>,write<4,u32,extra_Tparam>)
#define _vmem_register_handler_Template2(read,write,etp1,etp2) _vmem_register_handler \
(read<1,u8,etp1,etp2>,read<2,u16,etp1,etp2>,read<4,u32,etp1,etp2>, \
write<1,u8,etp1,etp2>,write<2,u16,etp1,etp2>,write<4,u32,etp1,etp2>)
void _vmem_map_handler(_vmem_handler Handler,u32 start,u32 end);
void _vmem_map_block(void* base,u32 start,u32 end,u32 mask);
void _vmem_mirror_mapping(u32 new_region,u32 start,u32 size);

View File

@ -7,15 +7,16 @@
#include "../sh4_interpreter.h"
#include "../sh4_opcode_list.h"
#include "../sh4_core.h"
#include "hw/aica/aica_if.h"
#include "../sh4_interrupts.h"
#include "hw/sh4/sh4_mem.h"
#include "profiler/profiler.h"
#include "../dyna/blockmanager.h"
#include "../sh4_sched.h"
#include "hw/holly/sb.h"
#include "../sh4_cache.h"
#define CPU_RATIO (8)
sh4_icache icache;
static s32 l;
static void ExecuteOpcode(u16 op)
@ -26,6 +27,14 @@ static void ExecuteOpcode(u16 op)
l -= CPU_RATIO;
}
static u16 ReadNexOp()
{
u32 addr = next_pc;
next_pc += 2;
return icache.ReadMem(addr);
}
void Sh4_int_Run()
{
sh4_int_bCpuRun=true;
@ -39,9 +48,7 @@ void Sh4_int_Run()
#endif
do
{
u32 addr = next_pc;
next_pc += 2;
u32 op = IReadMem16(addr);
u32 op = ReadNexOp();
ExecuteOpcode(op);
} while (l > 0);
@ -79,8 +86,7 @@ void Sh4_int_Step()
}
else
{
u32 op=ReadMem16(next_pc);
next_pc+=2;
u32 op = ReadNexOp();
ExecuteOpcode(op);
}
}
@ -118,6 +124,7 @@ void Sh4_int_Reset(bool hard)
fpscr.full = 0x0004001;
old_fpscr=fpscr;
UpdateFPSCR();
icache.Reset(hard);
//Any more registers have default value ?
INFO_LOG(INTERPRETER, "Sh4 Reset");
@ -135,9 +142,7 @@ void ExecuteDelayslot()
#if !defined(NO_MMU)
try {
#endif
u32 addr = next_pc;
next_pc += 2;
u32 op = IReadMem16(addr);
u32 op = ReadNexOp();
if (op != 0) // Looney Tunes: Space Race hack
ExecuteOpcode(op);
@ -165,40 +170,6 @@ void ExecuteDelayslot_RTE()
#endif
}
//General update
//3584 Cycles
#define AICA_SAMPLE_GCM 441
#define AICA_SAMPLE_CYCLES (SH4_MAIN_CLOCK/(44100/AICA_SAMPLE_GCM)*32)
int aica_schid = -1;
int rtc_schid = -1;
//14336 Cycles
const int AICA_TICK=145124;
static int AicaUpdate(int tag, int c, int j)
{
UpdateArm(512*32);
UpdateAica(1*32);
return AICA_TICK;
}
static int DreamcastSecond(int tag, int c, int j)
{
RealTimeClock++;
prof_periodical();
#if FEAT_SHREC != DYNAREC_NONE
bm_Periodical_1s();
#endif
return SH4_MAIN_CLOCK;
}
// every SH4_TIMESLICE cycles
int UpdateSystem()
{
@ -240,16 +211,8 @@ void Get_Sh4Interpreter(sh4_if* rv)
void Sh4_int_Init()
{
verify(sizeof(Sh4cntx)==448);
static_assert(sizeof(Sh4cntx) == 448, "Invalid Sh4Cntx size");
if (aica_schid == -1)
{
aica_schid=sh4_sched_register(0,&AicaUpdate);
sh4_sched_request(aica_schid,AICA_TICK);
rtc_schid=sh4_sched_register(0,&DreamcastSecond);
sh4_sched_request(rtc_schid,SH4_MAIN_CLOCK);
}
memset(&p_sh4rcb->cntx, 0, sizeof(p_sh4rcb->cntx));
}

View File

@ -1185,21 +1185,18 @@ sh4op(i0000_0000_0011_1000)
//ocbi @<REG_N>
sh4op(i0000_nnnn_1001_0011)
{
u32 n = GetN(op);
//printf("ocbi @0x%08X \n",r[n]);
}
//ocbp @<REG_N>
sh4op(i0000_nnnn_1010_0011)
{
u32 n = GetN(op);
//printf("ocbp @0x%08X \n",r[n]);
}
//ocbwb @<REG_N>
sh4op(i0000_nnnn_1011_0011)
{
u32 n = GetN(op);
//printf("ocbwb @0x%08X \n",r[n]);
}
@ -1550,86 +1547,59 @@ sh4op(i0000_0000_0001_1001)
}
//div0s <REG_M>,<REG_N>
sh4op(i0010_nnnn_mmmm_0111)
{//ToDo : Check This [26/4/05]
{
u32 n = GetN(op);
u32 m = GetM(op);
//new implementation
sr.Q=r[n]>>31;
sr.M=r[m]>>31;
sr.T=sr.M^sr.Q;
return;
/*
if ((r[n] & 0x80000000)!=0)
sr.Q = 1;
else
sr.Q = 0;
if ((r[m] & 0x80000000)!=0)
sr.M = 1;
else
sr.M = 0;
if (sr.Q == sr.M)
sr.T = 0;
else
sr.T = 1;
*/
sr.Q = r[n] >> 31;
sr.M = r[m] >> 31;
sr.T = sr.M ^ sr.Q;
}
//div1 <REG_M>,<REG_N>
sh4op(i0011_nnnn_mmmm_0100)
{
u32 n=GetN(op);
u32 m=GetM(op);
u32 n = GetN(op);
u32 m = GetM(op);
u32 tmp0, tmp2;
unsigned char old_q, tmp1;
old_q = sr.Q;
sr.Q = (u8)((0x80000000 & r[n]) !=0);
const u8 old_q = sr.Q;
sr.Q = (u8)((0x80000000 & r[n]) != 0);
r[n] <<= 1;
r[n] |= (unsigned long)sr.T;
r[n] |= sr.T;
tmp0 = r[n]; // this need only be done once here ..
// Old implementation
// tmp2 = r[m];
//
// if( 0 == old_q )
// {
// if( 0 == sr.M )
// {
// r[n] -= tmp2;
// tmp1 = (r[n]>tmp0);
// sr.Q = (sr.Q==0) ? tmp1 : (u8)(tmp1==0) ;
// }
// else
// {
// r[n] += tmp2;
// tmp1 =(r[n]<tmp0);
// sr.Q = (sr.Q==0) ? (u8)(tmp1==0) : tmp1 ;
// }
// }
// else
// {
// if( 0 == sr.M )
// {
// r[n] += tmp2;
// tmp1 =(r[n]<tmp0);
// sr.Q = (sr.Q==0) ? tmp1 : (u8)(tmp1==0) ;
// }
// else
// {
// r[n] -= tmp2;
// tmp1 =(r[n]>tmp0);
// sr.Q = (sr.Q==0) ? (u8)(tmp1==0) : tmp1 ;
// }
// }
r[n] += (2 * (old_q ^ sr.M) - 1) * r[m];
sr.Q ^= old_q ^ (sr.M ? r[n] > tmp0 : r[n] >= tmp0);
const u32 old_rn = r[n];
if (old_q == 0)
{
if (sr.M == 0)
{
r[n] -= r[m];
bool tmp1 = r[n] > old_rn;
sr.Q = sr.Q ^ tmp1;
}
else
{
r[n] += r[m];
bool tmp1 = r[n] < old_rn;
sr.Q = !sr.Q ^ tmp1;
}
}
else
{
if (sr.M == 0)
{
r[n] += r[m];
bool tmp1 = r[n] < old_rn;
sr.Q = sr.Q ^ tmp1;
}
else
{
r[n] -= r[m];
bool tmp1 = r[n] > old_rn;
sr.Q = !sr.Q ^ tmp1;
}
}
sr.T = (sr.Q == sr.M);
}

View File

@ -9,6 +9,7 @@
#include "hw/sh4/sh4_if.h"
#include "hw/sh4/sh4_mmr.h"
#include "hw/sh4/sh4_core.h"
#include "hw/sh4/sh4_cache.h"
//Types
@ -84,6 +85,7 @@ void CCN_CCR_write(u32 addr, u32 value)
if (temp.ICI) {
DEBUG_LOG(SH4, "Sh4: i-cache invalidation %08X", curr_pc);
//Shikigami No Shiro II uses ICI frequently
icache.Invalidate();
}
temp.ICI=0;
@ -155,10 +157,10 @@ void ccn_init()
}
void ccn_reset()
void ccn_reset(bool hard)
{
CCN_TRA = 0x0;
CCN_EXPEVT = 0x0;
CCN_EXPEVT = hard ? 0 : 0x20;
CCN_MMUCR.reg_data = 0x0;
CCN_CCR.reg_data = 0x0;
}

View File

@ -46,6 +46,7 @@ void ITLB_Sync(u32 entry);
bool mmu_match(u32 va, CCN_PTEH_type Address, CCN_PTEL_type Data);
void mmu_set_state();
void mmu_flush_table();
void mmu_raise_exception(u32 mmu_error, u32 address, u32 am);
static INLINE bool mmu_enabled()
{

View File

@ -35,7 +35,7 @@ void tmu_reset(bool hard);
void tmu_term();
void ccn_init();
void ccn_reset();
void ccn_reset(bool hard);
void ccn_term();
void MMU_init();

105
core/hw/sh4/sh4_cache.h Normal file
View File

@ -0,0 +1,105 @@
/*
Copyright 2020 flyinghead
This file is part of Flycast.
Flycast is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
Flycast is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Flycast. If not, see <https://www.gnu.org/licenses/>.
*/
#pragma once
#include <array>
#include "types.h"
#include "sh4_mem.h"
#include "modules/mmu.h"
//
// SH4 instruction cache implementation
//
class sh4_icache
{
public:
u16 ReadMem(u32 address)
{
if ((address & 0xE0000000) == 0xA0000000 // P2, P4: non-cacheable
|| (address & 0xE0000000) == 0xE0000000
|| !CCN_CCR.ICE) // Instruction cache disabled
return IReadMem16(address);
u32 index = CCN_CCR.IIX ?
((address >> 5) & 0x7f) | ((address >> (25 - 7)) & 0x80)
: (address >> 5) & 0xff;
#ifndef NO_MMU
if (mmu_enabled())
{
u32 paddr;
u32 rv = mmu_instruction_translation(address, paddr);
if (rv != MMU_ERROR_NONE)
mmu_raise_exception(rv, address, MMU_TT_IREAD);
address = paddr;
}
#endif
cache_line& line = lines[index];
const u32 tag = (address >> 10) & 0x7ffff;
if (!line.valid || tag != line.address)
{
// miss
line.valid = true;
line.address = tag;
const u32 line_addr = address & ~0x1f;
u32 *p = (u32 *)line.data;
for (int i = 0; i < 32; i += 4)
*p++ = _vmem_ReadMem32(line_addr + i);
}
return *(u16*)&line.data[address & 0x1f];
}
void Invalidate()
{
for (auto& line : lines)
line.valid = false;
}
void Reset(bool hard)
{
if (hard)
memset(&lines[0], 0, sizeof(lines));
}
bool Serialize(void **data, unsigned int *total_size)
{
REICAST_S(lines);
return true;
}
bool Unserialize(void **data, unsigned int *total_size)
{
REICAST_US(lines);
return true;
}
private:
struct cache_line {
bool valid;
u32 address;
u8 data[32];
};
std::array<cache_line, 256> lines;
};
extern sh4_icache icache;

View File

@ -868,7 +868,7 @@ void sh4_mmr_reset(bool hard)
OnChipRAM = {};
//Reset register values
bsc_reset(hard);
ccn_reset();
ccn_reset(hard);
cpg_reset();
dmac_reset();
intc_reset();

View File

@ -24,17 +24,6 @@ extern std::array<RegisterStruct, 12> TMU;
extern std::array<RegisterStruct, 8> SCI;
extern std::array<RegisterStruct, 10> SCIF;
/*
//Region P4
u32 ReadMem_P4(u32 addr,u32 sz);
void WriteMem_P4(u32 addr,u32 data,u32 sz);
//Area7
u32 ReadMem_area7(u32 addr,u32 sz);
void WriteMem_area7(u32 addr,u32 data,u32 sz);
void DYNACALL WriteMem_sq_32(u32 address,u32 data);*/
//Init/Res/Term
void sh4_mmr_init();
void sh4_mmr_reset(bool hard);
void sh4_mmr_term();
@ -42,14 +31,14 @@ void sh4_mmr_term();
template<typename T>
void sh4_rio_reg(T& arr, u32 addr, RegIO flags, u32 sz, RegReadAddrFP* rp=0, RegWriteAddrFP* wp=0);
#define A7_REG_HASH(addr) ((addr>>16)&0x1FFF)
#define A7_REG_HASH(addr) (((addr) >> 16) & 0x1FFF)
#define SH4IO_REGN(mod,addr,size) (mod[(addr&255)/4].data##size)
#define SH4IO_REG(mod,name,size) SH4IO_REGN(mod,mod##_##name##_addr,size)
#define SH4IO_REG_T(mod,name,size) ((mod##_##name##_type&)SH4IO_REG(mod,name,size))
#define SH4IO_REGN(mod, addr, size) ((mod)[((addr) & 255) / 4].data##size)
#define SH4IO_REG(mod, name, size) SH4IO_REGN(mod, mod##_##name##_addr, size)
#define SH4IO_REG_T(mod, name, size) ((mod##_##name##_type&)SH4IO_REG(mod, name, size))
#define SH4IO_REG_OFS(mod,name,o,s,size) SH4IO_REGN(mod,mod##_##name##0_addr+o*s,size)
#define SH4IO_REG_T_OFS(mod,name,o,s,size) ((mod##_##name##_type&)SH4IO_REG_OFS(mod,name,o,s,size))
#define SH4IO_REG_OFS(mod, name, o, s, size) SH4IO_REGN(mod, mod##_##name##0_addr + (o) * (s), size)
#define SH4IO_REG_T_OFS(mod, name, o, s, size) ((mod##_##name##_type&)SH4IO_REG_OFS(mod, name, o, s, size))
//CCN module registers base
#define CCN_BASE_addr 0x1F000000

View File

@ -22,6 +22,7 @@
#include "iso9660.h"
#include "font.h"
#include "hw/aica/aica.h"
#include "hw/aica/aica_mem.h"
#include "oslib/oslib.h"
#include <map>

View File

@ -15,6 +15,7 @@
#include "reios/gdrom_hle.h"
#include "hw/sh4/dyna/blockmanager.h"
#include "hw/naomi/naomi_cart.h"
#include "hw/sh4/sh4_cache.h"
#define REICAST_SKIP(size) do { if (*data) *(u8**)data += (size); *total_size += (size); } while (false)
@ -45,6 +46,7 @@ extern u32 VREG;//video reg =P
extern u32 ARMRST;//arm reset reg
extern u32 rtc_EN;
extern int dma_sched_id;
extern u32 RealTimeClock;
//./core/hw/aica/aica_mem.o
extern u8 aica_reg[0x8000];
@ -275,7 +277,7 @@ bool dc_serialize(void **data, unsigned int *total_size)
{
int i = 0;
serialize_version_enum version = V8;
serialize_version_enum version = V9;
*total_size = 0 ;
@ -308,6 +310,7 @@ bool dc_serialize(void **data, unsigned int *total_size)
REICAST_S(VREG);
REICAST_S(ARMRST);
REICAST_S(rtc_EN);
REICAST_S(RealTimeClock);
REICAST_SA(aica_reg,0x8000);
@ -392,6 +395,7 @@ bool dc_serialize(void **data, unsigned int *total_size)
register_serialize(TMU, data, total_size) ;
register_serialize(SCI, data, total_size) ;
register_serialize(SCIF, data, total_size) ;
icache.Serialize(data, total_size);
REICAST_SA(mem_b.data, mem_b.size);
@ -664,6 +668,7 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size)
register_unserialize(TMU, data, total_size, V9_LIBRETRO) ;
register_unserialize(SCI, data, total_size, V9_LIBRETRO) ;
register_unserialize(SCIF, data, total_size, V9_LIBRETRO) ;
icache.Reset(true);
REICAST_USA(mem_b.data, mem_b.size);
REICAST_USA(InterruptEnvId,32);
@ -852,6 +857,8 @@ bool dc_unserialize(void **data, unsigned int *total_size)
REICAST_US(VREG);
REICAST_US(ARMRST);
REICAST_US(rtc_EN);
if (version >= V9)
REICAST_US(RealTimeClock);
REICAST_USA(aica_reg,0x8000);
@ -987,6 +994,10 @@ bool dc_unserialize(void **data, unsigned int *total_size)
register_unserialize(TMU, data, total_size, version) ;
register_unserialize(SCI, data, total_size, version) ;
register_unserialize(SCIF, data, total_size, version) ;
if (version >= V9)
icache.Unserialize(data, total_size);
else
icache.Reset(true);
REICAST_USA(mem_b.data, mem_b.size);

View File

@ -523,17 +523,6 @@ void libPvr_LockedBlockWrite(vram_block* block,u32 addr); //set to 0 if not used
void* libPvr_GetRenderTarget();
//AICA
s32 libAICA_Init();
void libAICA_Reset(bool hard);
void libAICA_Term();
u32 libAICA_ReadReg(u32 addr,u32 size);
void libAICA_WriteReg(u32 addr,u32 data,u32 size);
void libAICA_Update(u32 cycles); //called every ~1800 cycles, set to 0 if not used
//GDR
s32 libGDR_Init();
void libGDR_Reset(bool hard);
@ -564,13 +553,9 @@ static inline void libExtDevice_WriteMem_A5(u32 addr,u32 data,u32 size) { }
//ARM
s32 libARM_Init();
void libARM_Reset(bool M);
void libARM_Reset(bool hard);
void libARM_Term();
void libARM_SetResetState(u32 State);
void libARM_Update(u32 cycles);
#define ReadMemArrRet(arr,addr,sz) \
{if (sz==1) \
return arr[addr]; \
@ -620,4 +605,5 @@ enum serialize_version_enum {
V6 = 801,
V7 = 802,
V8 = 803,
V9 = 804,
} ;

View File

@ -8,7 +8,50 @@
#define SHIL_MODE 2
#include "hw/sh4/dyna/shil_canonical.h"
void div32s_slow(u32& r1, u32 r2, u32& r3)
static void div1(u32& r1, u32 r2)
{
const u8 old_q = sr.Q;
sr.Q = (u8)((0x80000000 & r1) != 0);
r1 <<= 1;
r1 |= sr.T;
const u32 old_rn = r1;
if (old_q == 0)
{
if (sr.M == 0)
{
r1 -= r2;
bool tmp1 = r1 > old_rn;
sr.Q = sr.Q ^ tmp1;
}
else
{
r1 += r2;
bool tmp1 = r1 < old_rn;
sr.Q = !sr.Q ^ tmp1;
}
}
else
{
if (sr.M == 0)
{
r1 += r2;
bool tmp1 = r1 < old_rn;
sr.Q = sr.Q ^ tmp1;
}
else
{
r1 -= r2;
bool tmp1 = r1 > old_rn;
sr.Q = !sr.Q ^ tmp1;
}
}
sr.T = (sr.Q == sr.M);
}
static void div32s_slow(u32& r1, u32 r2, u32& r3)
{
sr.Q = r3 >> 31;
sr.M = r2 >> 31;
@ -19,23 +62,11 @@ void div32s_slow(u32& r1, u32 r2, u32& r3)
r1 = (u32)rv;
sr.T = rv >> 32;
// DIV1
unsigned char old_q = sr.Q;
sr.Q = (u8)((0x80000000 & r3) !=0);
r3 <<= 1;
r3 |= (unsigned long)sr.T;
u32 tmp0 = r3;
r3 += (2 * (old_q ^ sr.M) - 1) * r2;
sr.Q ^= old_q ^ (sr.M ? r3 > tmp0 : r3 >= tmp0);
sr.T = (sr.Q == sr.M);
div1(r3, r2);
}
}
void div32s_fast(u32& r1, u32 r2, u32& r3)
static void div32s_fast(u32& r1, u32 r2, u32& r3)
{
sr.T = (r3 ^ r2) & 0x80000000;
u64 rv = shil_opcl_div32s::f1::impl(r1, r2, r3);
@ -47,7 +78,7 @@ void div32s_fast(u32& r1, u32 r2, u32& r3)
sr.T &= 1;
}
void div32u_fast(u32& r1, u32 r2, u32& r3)
static void div32u_fast(u32& r1, u32 r2, u32& r3)
{
u64 rv = shil_opcl_div32u::f1::impl(r1, r2, r3);
r1 = (u32)rv;
@ -57,7 +88,7 @@ void div32u_fast(u32& r1, u32 r2, u32& r3)
r3 = shil_opcl_div32p2::f1::impl(r3, r2, sr.T);
}
void div32u_slow(u32& r1, u32 r2, u32& r3)
static void div32u_slow(u32& r1, u32 r2, u32& r3)
{
sr.Q = 0;
sr.M = 0;
@ -69,19 +100,7 @@ void div32u_slow(u32& r1, u32 r2, u32& r3)
r1 = (u32)rv;
sr.T = rv >> 32;
// DIV1
unsigned char old_q = sr.Q;
sr.Q = (u8)((0x80000000 & r3) !=0);
r3 <<= 1;
r3 |= (unsigned long)sr.T;
u32 tmp0 = r3;
r3 += (2 * (old_q ^ sr.M) - 1) * r2;
sr.Q ^= old_q ^ (sr.M ? r3 > tmp0 : r3 >= tmp0);
sr.T = (sr.Q == sr.M);
div1(r3, r2);
}
}

View File

@ -28,7 +28,7 @@ TEST_F(SerializeTest, SizeTest)
unsigned int total_size = 0;
void *data = nullptr;
ASSERT_TRUE(dc_serialize(&data, &total_size));
ASSERT_EQ(28114471u, total_size);
ASSERT_EQ(28124715u, total_size);
}