sh4 ocache implementation. IC and OC address/data read/write in P4

ignore SR.RB in user mode instead of forcing it 0
add STRICT_MODE to enable ocache in interpreter
don't flush mmu table when enabling it
fix fixNan64()
This commit is contained in:
Flyinghead 2020-06-12 17:35:14 +02:00
parent d29797988e
commit 22dcb1ec99
14 changed files with 571 additions and 128 deletions

View File

@ -116,8 +116,11 @@
*/
//#define NO_MMU
//#define STRICT_MODE
#ifndef STRICT_MODE
#define FAST_MMU
#define USE_WINCE_HACK
#endif
#define DC_PLATFORM_MASK 7
#define DC_PLATFORM_DREAMCAST 0 /* Works, for the most part */

View File

@ -260,9 +260,10 @@ static void WriteBios(u32 addr,u32 data,u32 sz)
//use unified size handler for registers
//it really makes no sense to use different size handlers on em -> especially when we can use templates :p
template<u32 sz, class T>
template<class T>
T DYNACALL ReadMem_area0(u32 addr)
{
constexpr u32 sz = (u32)sizeof(T);
addr &= 0x01FFFFFF;//to get rid of non needed bits
const u32 base=(addr>>16);
//map 0x0000 to 0x01FF to Default handler
@ -327,7 +328,7 @@ T DYNACALL ReadMem_area0(u32 addr)
//map 0x0070 to 0x0070
else if ((base ==0x0070) /*&& (addr>= 0x00700000)*/ && (addr<=0x00707FFF)) // :AICA- Sound Cntr. Reg.
{
return (T) ReadMem_aica_reg(addr,sz);//libAICA_ReadReg(addr,sz);
return (T)ReadMem_aica_reg(addr, sz);
}
//map 0x0071 to 0x0071
else if ((base ==0x0071) /*&& (addr>= 0x00710000)*/ && (addr<= 0x0071000B)) // :AICA- RTC Cntr. Reg.
@ -340,17 +341,24 @@ T DYNACALL ReadMem_area0(u32 addr)
ReadMemArrRet(aica_ram.data,addr&ARAM_MASK,sz);
}
//map 0x0100 to 0x01FF
else if ((base >=0x0100) && (base <=0x01FF) /*&& (addr>= 0x01000000) && (addr<= 0x01FFFFFF)*/) // :Ext. Device
else if (base >= 0x0100 && base <= 0x01FF) // G2 Ext. Device #1
{
return (T)libExtDevice_ReadMem_A0_010(addr,sz);
if (settings.platform.system == DC_PLATFORM_NAOMI)
return (T)libExtDevice_ReadMem_A0_010(addr, sz);
else
{
INFO_LOG(MEMORY, "Read from BBA not implemented, addr=%x", addr);
return 0;
}
}
INFO_LOG(MEMORY, "Read from area0<%d> not implemented [Unassigned], addr=%x", sz, addr);
return 0;
}
template<u32 sz, class T>
template<class T>
void DYNACALL WriteMem_area0(u32 addr,T data)
{
constexpr u32 sz = (u32)sizeof(T);
addr &= 0x01FFFFFF;//to get rid of non needed bits
const u32 base=(addr>>16);
@ -424,9 +432,12 @@ void DYNACALL WriteMem_area0(u32 addr,T data)
WriteMemArr(aica_ram.data, addr & ARAM_MASK, data, sz);
}
//map 0x0100 to 0x01FF
else if ((base >=0x0100) && (base <=0x01FF) /*&& (addr>= 0x01000000) && (addr<= 0x01FFFFFF)*/) // Ext. Device
else if (base >= 0x0100 && base <= 0x01FF) // G2 Ext. Device #1
{
libExtDevice_WriteMem_A0_010(addr,data,sz);
if (settings.platform.system == DC_PLATFORM_NAOMI)
libExtDevice_WriteMem_A0_010(addr, data, sz);
else
INFO_LOG(COMMON, "Write to BBA not implemented, addr=%x, data=%x, size=%d", addr, data, sz);
}
else
INFO_LOG(COMMON, "Write to area0_32 not implemented [Unassigned], addr=%x,data=%x,size=%d", addr, data, sz);

View File

@ -61,8 +61,8 @@ void _vmem_init_mappings();
_vmem_handler _vmem_register_handler(_vmem_ReadMem8FP* read8,_vmem_ReadMem16FP* read16,_vmem_ReadMem32FP* read32, _vmem_WriteMem8FP* write8,_vmem_WriteMem16FP* write16,_vmem_WriteMem32FP* write32);
#define _vmem_register_handler_Template(read,write) _vmem_register_handler \
(read<1,u8>,read<2,u16>,read<4,u32>, \
write<1,u8>,write<2,u16>,write<4,u32>)
(read<u8>,read<u16>,read<u32>, \
write<u8>,write<u16>,write<u32>)
void _vmem_map_handler(_vmem_handler Handler,u32 start,u32 end);
void _vmem_map_block(void* base,u32 start,u32 end,u32 mask);
@ -70,8 +70,6 @@ void _vmem_mirror_mapping(u32 new_region,u32 start,u32 size);
#define _vmem_map_block_mirror(base, start, end, blck_size) { \
u32 block_size = (blck_size) >> 24; \
u32 map_sz = (end) - (start) + 1; \
/* verify((map_sz % block_size) == 0); */ \
for (u32 _maip = (start); _maip <= (end); _maip += block_size) \
_vmem_map_block((base), _maip, _maip + block_size - 1, blck_size - 1); \
}

View File

@ -6,6 +6,7 @@
#include "sh4_opcodes.h"
#include "../sh4_core.h"
#include "../sh4_rom.h"
#include "../sh4_cache.h"
#include "hw/sh4/sh4_mem.h"
@ -26,6 +27,17 @@ void iNimp(const char*str);
#define IS_DENORMAL(f) (((*(f))&0x7f800000) == 0)
#ifdef STRICT_MODE
#define ReadMem8(addr) (ocache.ReadMem<u8>(addr))
#define ReadMem16(addr) (ocache.ReadMem<u16>(addr))
#define ReadMem32(addr) (ocache.ReadMem<u32>(addr))
#define ReadMem64(addr) (ocache.ReadMem<u64>(addr))
#define WriteMem8(addr, data) (ocache.WriteMem<u8>(addr, data))
#define WriteMem16(addr, data) (ocache.WriteMem<u16>(addr, data))
#define WriteMem32(addr, data) (ocache.WriteMem<u32>(addr, data))
#define WriteMem64(addr, data) (ocache.WriteMem<u64>(addr, data))
#endif
#define ReadMemU64(to,addr) to=ReadMem64(addr)
#define ReadMemU32(to,addr) to=ReadMem32(addr)
#define ReadMemS32(to,addr) to=(s32)ReadMem32(addr)
@ -73,7 +85,7 @@ INLINE void Denorm32(float &value)
#define CHECK_FPU_32(v) v = fixNaN(v)
#define CHECK_FPU_64(v)
#define CHECK_FPU_64(v) v = fixNaN64(v)
//fadd <FREG_M>,<FREG_N>
@ -626,7 +638,7 @@ sh4op(i1111_nnnn_0110_1101)
//Operation _can_ be done on sh4
u32 n = GetN(op)>>1;
SetDR(n,sqrt(GetDR(n)));
SetDR(n, fixNaN64(sqrt(GetDR(n))));
}
}

View File

@ -16,6 +16,7 @@
#define CPU_RATIO (8)
sh4_icache icache;
sh4_ocache ocache;
static s32 l;

View File

@ -16,14 +16,10 @@
#include "../modules/ccn.h"
#include "../sh4_interrupts.h"
#include "hw/gdrom/gdrom_if.h"
#include "../sh4_cache.h"
#include "hw/sh4/sh4_opcode.h"
void dofoo(sh4_opcode op)
{
r[op.n()]=gbr;
}
#define GetN(str) ((str>>8) & 0xf)
#define GetM(str) ((str>>4) & 0xf)
#define GetImm4(str) ((str>>0) & 0xf)
@ -36,6 +32,14 @@ void dofoo(sh4_opcode op)
#define iWarn cpu_iWarn
//Read Mem macros
#ifdef STRICT_MODE
#define ReadMem8(addr) (ocache.ReadMem<u8>(addr))
#define ReadMem16(addr) (ocache.ReadMem<u16>(addr))
#define ReadMem32(addr) (ocache.ReadMem<u32>(addr))
#define WriteMem8(addr, data) (ocache.WriteMem<u8>(addr, data))
#define WriteMem16(addr, data) (ocache.WriteMem<u16>(addr, data))
#define WriteMem32(addr, data) (ocache.WriteMem<u32>(addr, data))
#endif
#define ReadMemU32(to,addr) to=ReadMem32(addr)
#define ReadMemS32(to,addr) to=(s32)ReadMem32(addr)
@ -165,7 +169,6 @@ sh4op(i0000_nnnn_mmmm_1100)
u32 n = GetN(op);
u32 m = GetM(op);
ReadMemBOS8(r[n],r[0],r[m]);
//r[n]= (u32)(s8)ReadMem8(r[0]+r[m]);
}
@ -175,7 +178,6 @@ sh4op(i0000_nnnn_mmmm_1101)
u32 n = GetN(op);
u32 m = GetM(op);
ReadMemBOS16(r[n],r[0],r[m]);
//r[n] = (u32)(s16)ReadMem16(r[0] + r[m]);
}
@ -195,7 +197,6 @@ sh4op(i0000_nnnn_mmmm_0100)
u32 m = GetM(op);
WriteMemBOU8(r[0],r[n], r[m]);
//WriteMem8(r[0] + r[n], (u8)r[m]);
}
@ -641,7 +642,6 @@ sh4op(i0110_nnnn_mmmm_0001)
{
u32 n = GetN(op);
u32 m = GetM(op);
//r[n] = (u32)(s32)(s16)ReadMem16(r[m]);
ReadMemS16(r[n] ,r[m]);
}
@ -670,7 +670,6 @@ sh4op(i0110_nnnn_mmmm_0100)
{
u32 n = GetN(op);
u32 m = GetM(op);
//r[n] = (u32)(s32)(s8)ReadMem8(r[m]);
ReadMemS8(r[n],r[m]);
if (n != m)
r[m] += 1;
@ -682,7 +681,6 @@ sh4op(i0110_nnnn_mmmm_0101)
{
u32 n = GetN(op);
u32 m = GetM(op);
//r[n] = (u32)(s16)(u16)ReadMem16(r[m]);
ReadMemS16(r[n],r[m]);
if (n != m)
r[m] += 2;
@ -726,7 +724,6 @@ sh4op(i1000_0100_mmmm_iiii)
{
u32 disp = GetImm4(op);
u32 m = GetM(op);
//r[0] = (u32)(s8)ReadMem8(r[m] + disp);
ReadMemBOS8(r[0] ,r[m] , disp);
}
@ -736,7 +733,6 @@ sh4op(i1000_0101_mmmm_iiii)
{
u32 disp = GetImm4(op);
u32 m = GetM(op);
//r[0] = (u32)(s16)ReadMem16(r[m] + (disp << 1));
ReadMemBOS16(r[0],r[m] , (disp << 1));
}
@ -748,7 +744,6 @@ sh4op(i1001_nnnn_iiii_iiii)
{
u32 n = GetN(op);
u32 disp = (GetImm8(op));
//r[n]=(u32)(s32)(s16)ReadMem16((disp<<1) + pc + 4);
ReadMemS16(r[n],(disp<<1) + next_pc + 2);
}
@ -785,7 +780,6 @@ sh4op(i1100_0010_iiii_iiii)
sh4op(i1100_0100_iiii_iiii)
{
u32 disp = GetImm8(op);
//r[0] = (u32)(s8)ReadMem8(gbr+disp);
ReadMemBOS8(r[0],gbr,disp);
}
@ -794,7 +788,6 @@ sh4op(i1100_0100_iiii_iiii)
sh4op(i1100_0101_iiii_iiii)
{
u32 disp = GetImm8(op);
//r[0] = (u32)(s16)ReadMem16(gbr+(disp<<1) );
ReadMemBOS16(r[0],gbr,(disp<<1));
}
@ -811,7 +804,6 @@ sh4op(i1100_0110_iiii_iiii)
// mova @(<disp>,PC),R0
sh4op(i1100_0111_iiii_iiii)
{
//u32 disp = (() << 2) + ((pc + 4) & 0xFFFFFFFC);
r[0] = ((next_pc+2)&0xFFFFFFFC)+(GetImm8(op)<<2);
}
@ -843,6 +835,7 @@ sh4op(i0000_nnnn_1100_0011)
u32 n = GetN(op);
WriteMemU32(r[n],r[0]);//at r[n],r[0]
//iWarn(op, "movca.l R0, @<REG_N>");
// TODO ocache
}
//clrmac
@ -1185,19 +1178,22 @@ sh4op(i0000_0000_0011_1000)
//ocbi @<REG_N>
sh4op(i0000_nnnn_1001_0011)
{
//printf("ocbi @0x%08X \n",r[n]);
//printf("OCBI @R%d (0x%08x)\n", GetN(op), r[GetN(op)]);
ocache.WriteBack(r[GetN(op)], false, true);
}
//ocbp @<REG_N>
sh4op(i0000_nnnn_1010_0011)
{
//printf("ocbp @0x%08X \n",r[n]);
//printf("OCBP @R%d (%08x)\n", GetN(op), r[GetN(op)]);
ocache.WriteBack(r[GetN(op)], true, true);
}
//ocbwb @<REG_N>
sh4op(i0000_nnnn_1011_0011)
{
//printf("ocbwb @0x%08X \n",r[n]);
//printf("OCBWB @R%d (0x%08x)\n", GetN(op) ,r[GetN(op)]);
ocache.WriteBack(r[GetN(op)], true, false);
}
//pref @<REG_N>
@ -1275,6 +1271,11 @@ sh4op(i0000_nnnn_1000_0011)
else
do_sqw<false>(Dest);
}
else
{
//printf("PREF @R%d (0x%08x)\n", n, Dest);
ocache.Prefetch(Dest);
}
}

View File

@ -81,15 +81,19 @@ void CCN_CCR_write(u32 addr, u32 value)
CCN_CCR_type temp;
temp.reg_data=value;
if (temp.ICI) {
DEBUG_LOG(SH4, "Sh4: i-cache invalidation %08X", curr_pc);
//Shikigami No Shiro II uses ICI frequently
icache.Invalidate();
if (!settings.dynarec.Enable)
icache.Invalidate();
temp.ICI = 0;
}
if (temp.OCI) {
DEBUG_LOG(SH4, "Sh4: o-cache invalidation %08X", curr_pc);
if (!settings.dynarec.Enable)
ocache.Invalidate();
temp.OCI = 0;
}
temp.ICI=0;
temp.OCI=0;
CCN_CCR=temp;
}

View File

@ -205,10 +205,8 @@ void mmu_raise_exception(u32 mmu_error, u32 address, u32 am)
RaiseException(0xC0, 0x100);
else if (am == MMU_TT_DREAD) //READPROT - Data TLB Protection Violation Exception
RaiseException(0xA0, 0x100);
else
{
verify(false);
}
else //READPROT - Instr TLB Protection Violation Exception
RaiseException(0xA0, 0x100);
return;
//Mem is write protected , firstwrite
@ -499,14 +497,14 @@ u32 mmu_data_translation(u32 va, u32& rv)
if (sr.MD == 1 && ((va & 0xFC000000) == 0x7C000000))
{
// 7C000000 to 7FFFFFFF in P0 not translated in supervisor mode
rv = va;
return MMU_ERROR_NONE;
}
// Not called if CCN_MMUCR.AT == 0
//if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0))
if (fast_reg_lut[va >> 29] != 0)
{
// P1, P2 and P4 aren't translated
rv = va;
return MMU_ERROR_NONE;
}
@ -665,7 +663,6 @@ void mmu_set_state()
WriteMem32 = &mmu_WriteMem<u32>;
WriteMem64 = &mmu_WriteMem<u64>;
_vmem_enable_mmu(true);
mmu_flush_table();
}
else
{
@ -708,6 +705,7 @@ void MMU_reset()
memset(UTLB, 0, sizeof(UTLB));
memset(ITLB, 0, sizeof(ITLB));
mmu_set_state();
mmu_flush_table();
}
void MMU_term()

View File

@ -135,8 +135,6 @@ void turn_on_off_ch(u32 ch, bool on)
tmu_mask[ch]=on?0xFFFFFFFF:0x00000000;
tmu_mask64[ch] = on ? 0xFFFFFFFFFFFFFFFF : 0x0000000000000000;
write_TMU_TCNTch(ch,TCNT);
sched_chan_tick(ch);
}
//Update internal counter registers
@ -187,7 +185,6 @@ void UpdateTMUCounts(u32 reg)
}
tmu_shift[reg]+=2;
write_TMU_TCNTch(reg,TCNT);
sched_chan_tick(reg);
}
//Write to status registers
@ -227,10 +224,6 @@ int sched_tmu_cb(int ch, int sch_cycl, int jitter)
s64 tcnt64 = (s64)read_TMU_TCNTch64(ch);
u32 tcor = TMU_TCOR(ch);
u32 cycles = tcor << tmu_shift[ch];
//64 bit maths to differentiate big values from overflows
if (tcnt64 <= jitter) {
//raise interrupt, timer counted down
@ -240,6 +233,7 @@ int sched_tmu_cb(int ch, int sch_cycl, int jitter)
//printf("Interrupt for %d, %d cycles\n", ch, sch_cycl);
//schedule next trigger by writing the TCNT register
u32 tcor = TMU_TCOR(ch);
write_TMU_TCNTch(ch, tcor + tcnt);
}
else {

View File

@ -23,16 +23,16 @@
#include "modules/mmu.h"
//
// SH4 instruction cache implementation
// SH4 instruction cache
//
class sh4_icache
{
public:
u16 ReadMem(u32 address)
{
if ((address & 0xE0000000) == 0xA0000000 // P2, P4: non-cacheable
|| (address & 0xE0000000) == 0xE0000000
|| !CCN_CCR.ICE) // Instruction cache disabled
const u32 area = address >> 29;
if (area == 5 || area == 7 // P2, P4: non-cacheable
|| !CCN_CCR.ICE) // Instruction cache disabled
return IReadMem16(address);
u32 index = CCN_CCR.IIX ?
@ -58,9 +58,15 @@ public:
line.valid = true;
line.address = tag;
const u32 line_addr = address & ~0x1f;
u32 *p = (u32 *)line.data;
for (int i = 0; i < 32; i += 4)
*p++ = _vmem_ReadMem32(line_addr + i);
u8* memPtr = GetMemPtr(line_addr, sizeof(line.data));
if (memPtr != nullptr)
memcpy(line.data, memPtr, sizeof(line.data));
else
{
u32 *p = (u32 *)line.data;
for (int i = 0; i < 32; i += 4)
*p++ = _vmem_ReadMem32(line_addr + i);
}
}
return *(u16*)&line.data[address & 0x1f];
@ -92,6 +98,64 @@ public:
return true;
}
u32 ReadAddressArray(u32 addr)
{
u32 index = (addr >> 5) & 0xFF;
return lines[index].valid | (lines[index].address << 10);
}
void WriteAddressArray(u32 addr, u32 data)
{
u32 index = (addr >> 5) & 0xFF;
cache_line& line = lines[index];
bool associative = (addr & 8) != 0;
if (!associative)
{
line.valid = data & 1;
line.address = (data >> 10) & 0x7ffff;
}
else
{
u32 tag;
#ifndef NO_MMU
if (mmu_enabled())
{
u32 vaddr = data & ~0x3ff;
u32 paddr;
u32 rv = mmu_instruction_translation(vaddr, paddr);
if (rv == MMU_ERROR_TLB_MISS)
// Ignore the write
return;
if (rv != MMU_ERROR_NONE)
mmu_raise_exception(rv, vaddr, MMU_TT_IREAD);
tag = (paddr >> 10) & 0x7ffff;
}
else
#endif
{
tag = (data >> 10) & 0x7ffff;
}
if (!line.valid || tag != line.address)
// Ignore the write
return;
line.valid = data & 1;
}
}
u32 ReadDataArray(u32 addr)
{
u32 index = (addr >> 5) & 0xFF;
cache_line& line = lines[index];
return *(u32 *)&line.data[addr & 0x1C];
}
void WriteDataArray(u32 addr, u32 data)
{
u32 index = (addr >> 5) & 0xFF;
cache_line& line = lines[index];
*(u32 *)&line.data[addr & 0x1C] = data;
}
private:
struct cache_line {
bool valid;
@ -103,3 +167,350 @@ private:
};
extern sh4_icache icache;
//
// SH4 operand cache
//
class sh4_ocache
{
public:
template<class T>
T ReadMem(u32 address)
{
const u32 area = address >> 29;
if (area == 5 || area == 7 // P2, P4: non-cacheable
|| !CCN_CCR.OCE) // Operand cache disabled
{
return readMem<T>(address);
}
u32 index = lineIndex(address);
#ifndef NO_MMU
if (mmu_enabled())
{
u32 paddr;
u32 rv = mmu_data_translation<MMU_TT_DREAD, T>(address, paddr);
if (rv != MMU_ERROR_NONE)
mmu_raise_exception(rv, address, MMU_TT_DREAD);
address = paddr;
}
#endif
cache_line& line = lines[index];
const u32 tag = (address >> 10) & 0x7ffff;
if (!line.valid || tag != line.address)
{
// miss
if (line.dirty && line.valid)
// write-back needed
doWriteBack(index, line);
line.address = tag;
readCacheLine(address, line);
}
return *(T*)&line.data[address & 0x1f];
}
template<class T>
void WriteMem(u32 address, T data)
{
const u32 area = address >> 29;
if (area == 5 || area == 7 // P2, P4: non-cacheable
|| !CCN_CCR.OCE) // Operand cache disabled
{
writeMem(address, data);
return;
}
u32 index = lineIndex(address);
// Use CCR.CB if P1 otherwise use !CCR.WT
bool copy_back = area == 4 ? CCN_CCR.CB : !CCN_CCR.WT;
#ifndef NO_MMU
if (mmu_enabled())
{
u32 paddr;
u32 rv = mmu_data_translation<MMU_TT_DWRITE, T>(address, paddr);
if (rv != MMU_ERROR_NONE)
mmu_raise_exception(rv, address, MMU_TT_DWRITE);
address = paddr;
// FIXME need WT bit of page
}
#endif
cache_line& line = lines[index];
const u32 tag = (address >> 10) & 0x7ffff;
if (!line.valid || tag != line.address)
{
// miss and copy-back => read cache line
if (copy_back)
{
if (line.dirty && line.valid)
// write-back needed
doWriteBack(index, line);
line.address = tag;
readCacheLine(address, line);
}
}
else if (!copy_back)
{
// hit and write-through => update cache
*(T*)&line.data[address & 0x1f] = data;
}
if (copy_back)
{
// copy-back => update cache and mark line as dirty
line.dirty = true;
*(T*)&line.data[address & 0x1f] = data;
}
else
{
// write-through => update main ram
writeMem(address, data);
}
}
void WriteBack(u32 address, bool write_back, bool invalidate)
{
const u32 area = address >> 29;
if (area == 5 || area == 7) // P2, P4: non-cacheable
return;
address &= ~0x1F;
u32 index = lineIndex(address);
#ifndef NO_MMU
if (mmu_enabled())
{
u32 paddr;
u32 rv = mmu_data_translation<MMU_TT_DWRITE, u32>(address, paddr);
if (rv != MMU_ERROR_NONE)
mmu_raise_exception(rv, address, MMU_TT_DWRITE);
address = paddr;
}
#endif
cache_line& line = lines[index];
const u32 tag = (address >> 10) & 0x7ffff;
if (!line.valid || tag != line.address)
return;
if (write_back && line.dirty)
doWriteBack(index, line);
line.valid = !invalidate;
line.dirty = false;
}
void Prefetch(u32 address)
{
const u32 area = address >> 29;
if (area == 5 || area == 7) // P2, P4: non-cacheable
return;
address &= ~0x1F;
u32 index = lineIndex(address);
#ifndef NO_MMU
if (mmu_enabled())
{
u32 paddr;
u32 rv = mmu_data_translation<MMU_TT_DREAD, u8>(address, paddr);
if (rv != MMU_ERROR_NONE)
// ignore address translation errors
return;
address = paddr;
}
#endif
cache_line& line = lines[index];
const u32 tag = (address >> 10) & 0x7ffff;
if (line.valid && tag == line.address)
return;
if (line.valid && line.dirty)
doWriteBack(index, line);
line.address = tag;
readCacheLine(address, line);
}
void Invalidate()
{
for (auto& line : lines)
{
line.dirty = false;
line.valid = false;
}
}
void Reset(bool hard)
{
if (hard)
memset(&lines[0], 0, sizeof(lines));
}
bool Serialize(void **data, unsigned int *total_size)
{
REICAST_S(lines);
return true;
}
bool Unserialize(void **data, unsigned int *total_size)
{
REICAST_US(lines);
return true;
}
u32 ReadAddressArray(u32 addr)
{
u32 index = (addr >> 5) & 0x1FF;
return lines[index].valid | (lines[index].dirty << 1) | (lines[index].address << 10);
}
void WriteAddressArray(u32 addr, u32 data)
{
u32 index = (addr >> 5) & 0x1FF;
cache_line& line = lines[index];
bool associative = (addr & 8) != 0;
if (!associative)
{
if (line.valid && line.dirty)
doWriteBack(index, line);
line.address = (data >> 10) & 0x7ffff;
}
else
{
u32 tag;
#ifndef NO_MMU
if (mmu_enabled())
{
u32 vaddr = data & ~0x3ff;
u32 paddr;
u32 rv = mmu_data_translation<MMU_TT_DREAD, u8>(vaddr, paddr);
if (rv == MMU_ERROR_TLB_MISS)
// Ignore the write
return;
if (rv != MMU_ERROR_NONE)
mmu_raise_exception(rv, vaddr, MMU_TT_DREAD);
tag = (paddr >> 10) & 0x7ffff;
}
else
#endif
{
tag = (data >> 10) & 0x7ffff;
}
if (!line.valid || tag != line.address)
// Ignore the write
return;
if ((data & 3) != 0 && line.dirty)
doWriteBack(index, line);
}
line.valid = data & 1;
line.dirty = (data >> 1) & 1;
}
u32 ReadDataArray(u32 addr)
{
u32 index = (addr >> 5) & 0x1FF;
cache_line& line = lines[index];
return *(u32 *)&line.data[addr & 0x1C];
}
void WriteDataArray(u32 addr, u32 data)
{
u32 index = (addr >> 5) & 0x1FF;
cache_line& line = lines[index];
*(u32 *)&line.data[addr & 0x1C] = data;
}
private:
struct cache_line {
bool valid;
bool dirty;
u32 address;
u8 data[32];
};
u32 lineIndex(u32 address)
{
u32 index = CCN_CCR.OIX ?
((address >> (25 - 8)) & 0x100) | ((address >> 5) & (CCN_CCR.ORA ? 0x7f : 0xff))
: (address >> 5) & (CCN_CCR.ORA ? 0x17f : 0x1ff);
if (CCN_CCR.ORA && (address >> 29) == 3)
index |= 0x80;
return index;
}
void readCacheLine(u32 address, cache_line& line)
{
line.valid = true;
line.dirty = false;
const u32 line_addr = address & ~0x1f;
u8* memPtr = GetMemPtr(line_addr, sizeof(line.data));
if (memPtr != nullptr)
memcpy(line.data, memPtr, sizeof(line.data));
else
{
u32 *p = (u32 *)line.data;
for (int i = 0; i < 32; i += 4)
*p++ = _vmem_ReadMem32(line_addr + i);
}
}
void doWriteBack(u32 index, cache_line& line)
{
if (CCN_CCR.ORA && (index & 0x80))
return;
u32 line_addr = (line.address << 10) | ((index & 0x1F) << 5);
u8* memPtr = GetMemPtr(line_addr, sizeof(line.data));
if (memPtr != nullptr)
memcpy(memPtr, line.data, sizeof(line.data));
else
{
u32 *p = (u32 *)line.data;
for (int i = 0; i < 32; i += 4)
_vmem_WriteMem32(line_addr + i, *p++);
}
}
template<class T>
T readMem(u32 address)
{
switch (sizeof(T))
{
case 1:
return ReadMem8(address);
case 2:
return ReadMem16(address);
case 4:
return ReadMem32(address);
case 8:
return ReadMem64(address);
default:
die("Invalid data size");
return 0;
}
}
template<class T>
void writeMem(u32 address, T data)
{
switch (sizeof(T))
{
case 1:
WriteMem8(address, data);
break;
case 2:
WriteMem16(address, data);
break;
case 4:
WriteMem32(address, data);
break;
case 8:
WriteMem64(address, data);
break;
default:
die("Invalid data size");
break;
}
}
std::array<cache_line, 512> lines;
};
extern sh4_ocache ocache;

View File

@ -129,22 +129,28 @@ static INLINE void AdjustDelaySlotException(SH4ThrownException& ex)
// The SH4 sets the signaling bit to 0 for qNaN (unlike all recent CPUs). Some games relies on this.
static INLINE f32 fixNaN(f32 f)
{
// u32& hex = *(u32 *)&f;
// // no fast-math
// if (f != f)
// hex = 0x7fbfffff;
#ifdef STRICT_MODE
u32& hex = *(u32 *)&f;
// no fast-math
if (f != f)
hex = 0x7fbfffff;
// // fast-math
// if ((hex & 0x7fffffff) > 0x7f800000)
// hex = 0x7fbfffff;
#endif
return f;
}
static INLINE f64 fixNaN64(f64 f)
{
#ifdef STRICT_MODE
// no fast-math
// return f == f ? f : 0x7ff7ffffffffffffll;
u64& hex = *(u64 *)&f;
if (f != f)
hex = 0x7ff7ffffffffffffll;
// fast-math
// return (*(u64 *)&f & 0x7fffffffffffffffll) <= 0x7f80000000000000ll ? f : 0x7ff7ffffffffffffll;
#endif
return f;
}

View File

@ -33,31 +33,28 @@ static INLINE void ChangeFP()
}
}
//called when sr is changed and we must check for reg banks etc.. , returns true if interrupts got
//called when sr is changed and we must check for reg banks etc.
//returns true if interrupt pending
bool UpdateSR()
{
if (sr.MD)
{
if (old_sr.RB !=sr.RB)
if (old_sr.RB != sr.RB)
ChangeGPR();//bank change
}
else
{
if (sr.RB)
{
WARN_LOG(SH4, "UpdateSR MD=0;RB=1 , this must not happen");
sr.RB =0;//error - must always be 0
}
if (old_sr.RB)
ChangeGPR();//switch
}
old_sr.status=sr.status;
old_sr.status = sr.status;
old_sr.RB &= sr.MD;
return SRdecode();
}
//make x86 and sh4 float status registers match ;)
//make host and sh4 float status registers match ;)
u32 old_rm=0xFF;
u32 old_dn=0xFF;
@ -132,7 +129,7 @@ static void SetFloatStatusReg()
: "r"(off_mask), "r"(on_mask)
);
#else
printf("SetFloatStatusReg: Unsupported platform\n");
#error "SetFloatStatusReg: Unsupported platform"
#endif
#endif

View File

@ -84,17 +84,17 @@ static void map_area4(u32 base)
//AREA 5 -- Ext. Device
//Read Ext.Device
template <u32 sz,class T>
template <class T>
T DYNACALL ReadMem_extdev_T(u32 addr)
{
return (T)libExtDevice_ReadMem_A5(addr,sz);
return (T)libExtDevice_ReadMem_A5(addr, sizeof(T));
}
//Write Ext.Device
template <u32 sz,class T>
template <class T>
void DYNACALL WriteMem_extdev_T(u32 addr,T data)
{
libExtDevice_WriteMem_A5(addr,data,sz);
libExtDevice_WriteMem_A5(addr, data, sizeof(T));
}
_vmem_handler area5_handler;

View File

@ -9,6 +9,7 @@
#include "modules/mmu.h"
#include "modules/ccn.h"
#include "modules/modules.h"
#include "sh4_cache.h"
//64bytes of sq // now on context ~
@ -164,9 +165,10 @@ offset>>=2;
//Region P4
//Read P4
template <u32 sz,class T>
template <class T>
T DYNACALL ReadMem_P4(u32 addr)
{
constexpr size_t sz = sizeof(T);
switch((addr>>24)&0xFF)
{
@ -178,10 +180,18 @@ T DYNACALL ReadMem_P4(u32 addr)
return 0;
case 0xF0:
return 0;
DEBUG_LOG(SH4, "IC Address read %08x", addr);
if (sz == 4)
return icache.ReadAddressArray(addr);
else
return 0;
case 0xF1:
return 0;
DEBUG_LOG(SH4, "IC Data read %08x", addr);
if (sz == 4)
return icache.ReadDataArray(addr);
else
return 0;
case 0xF2:
{
@ -196,17 +206,18 @@ T DYNACALL ReadMem_P4(u32 addr)
}
case 0xF4:
{
//int W,Set,A;
//W=(addr>>14)&1;
//A=(addr>>3)&1;
//Set=(addr>>5)&0xFF;
//printf("Unhandled p4 read [Operand cache address array] %d:%d,%d 0x%x\n",Set,W,A,addr);
DEBUG_LOG(SH4, "OC Address read %08x", addr);
if (sz == 4)
return ocache.ReadAddressArray(addr);
else
return 0;
}
case 0xF5:
return 0;
DEBUG_LOG(SH4, "OC Data read %08x", addr);
if (sz == 4)
return ocache.ReadDataArray(addr);
else
return 0;
case 0xF6:
{
@ -237,15 +248,10 @@ T DYNACALL ReadMem_P4(u32 addr)
}
//Write P4
template <u32 sz,class T>
template <class T>
void DYNACALL WriteMem_P4(u32 addr,T data)
{
/*if (((addr>>26)&0x7)==7)
{
WriteMem_area7(addr,data,sz);
return;
}*/
constexpr size_t sz = sizeof(T);
switch((addr>>24)&0xFF)
{
@ -257,9 +263,15 @@ void DYNACALL WriteMem_P4(u32 addr,T data)
break;
case 0xF0:
DEBUG_LOG(SH4, "IC Address write %08x = %x", addr, data);
if (sz == 4)
icache.WriteAddressArray(addr, data);
return;
case 0xF1:
DEBUG_LOG(SH4, "IC Data write %08x = %x", addr, data);
if (sz == 4)
icache.WriteDataArray(addr, data);
return;
case 0xF2:
@ -288,17 +300,15 @@ void DYNACALL WriteMem_P4(u32 addr,T data)
}
case 0xF4:
{
//int W,Set,A;
//W=(addr>>14)&1;
//A=(addr>>3)&1;
//Set=(addr>>5)&0xFF;
//printf("Unhandled p4 Write [Operand cache address array] %d:%d,%d 0x%x = %x\n",Set,W,A,addr,data);
return;
}
DEBUG_LOG(SH4, "OC Address write %08x = %x", addr, data);
if (sz == 4)
ocache.WriteAddressArray(addr, data);
return;
case 0xF5:
//printf("Unhandled p4 Write [Operand cache data array] 0x%x = %x\n",addr,data);
DEBUG_LOG(SH4, "OC Data write %08x = %x", addr, data);
if (sz == 4)
ocache.WriteDataArray(addr, data);
return;
case 0xF6:
@ -380,10 +390,10 @@ void DYNACALL WriteMem_P4(u32 addr,T data)
//***********
//TODO : replace w/ mem mapped array
//Read SQ
template <u32 sz,class T>
template <class T>
T DYNACALL ReadMem_sq(u32 addr)
{
if (sz!=4)
if (sizeof(T) != 4)
{
INFO_LOG(SH4, "Store Queue Error - only 4 byte read are possible[x%X]", addr);
return 0xDE;
@ -396,10 +406,10 @@ T DYNACALL ReadMem_sq(u32 addr)
//Write SQ
template <u32 sz,class T>
template <class T>
void DYNACALL WriteMem_sq(u32 addr,T data)
{
if (sz!=4)
if (sizeof(T) != 4)
INFO_LOG(SH4, "Store Queue Error - only 4 byte writes are possible[x%X=0x%X]", addr, data);
u32 united_offset=addr & 0x3C;
@ -415,9 +425,10 @@ void DYNACALL WriteMem_sq(u32 addr,T data)
#define OUT_OF_RANGE(reg) INFO_LOG(SH4, "Out of range on register %s index %x", reg, addr)
//Read Area7
template <u32 sz,class T>
template <class T>
T DYNACALL ReadMem_area7(u32 addr)
{
constexpr size_t sz = sizeof(T);
/*
if (likely(addr==0xffd80024))
{
@ -435,9 +446,7 @@ T DYNACALL ReadMem_area7(u32 addr)
{
return DMAC_CHCR(2).full;
}
//else if (addr==)
//printf("%08X\n",addr);
addr&=0x1FFFFFFF;
u32 map_base=addr>>16;
switch (map_base & 0x1FFF)
@ -588,15 +597,15 @@ T DYNACALL ReadMem_area7(u32 addr)
break;
}
INFO_LOG(SH4, "Unknown Read from Area7 - addr=%x", addr);
return 0;
}
//Write Area7
template <u32 sz,class T>
template <class T>
void DYNACALL WriteMem_area7(u32 addr,T data)
{
constexpr size_t sz = sizeof(T);
if (likely(addr==0xFF000038))
{
CCN_QACR_write<0>(addr,data);
@ -608,8 +617,6 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
return;
}
//printf("%08X\n",addr);
addr&=0x1FFFFFFF;
u32 map_base=addr>>16;
switch (map_base & 0x1FFF)
@ -756,50 +763,50 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
//On Chip Ram
//***********
//Read OCR
template <u32 sz,class T>
template <class T>
T DYNACALL ReadMem_area7_OCR_T(u32 addr)
{
if (CCN_CCR.ORA)
{
if (sz==1)
if (sizeof(T) == 1)
return (T)OnChipRAM[addr&OnChipRAM_MASK];
else if (sz==2)
else if (sizeof(T) == 2)
return (T)*(u16*)&OnChipRAM[addr&OnChipRAM_MASK];
else if (sz==4)
else if (sizeof(T) == 4)
return (T)*(u32*)&OnChipRAM[addr&OnChipRAM_MASK];
else
{
ERROR_LOG(SH4, "ReadMem_area7_OCR_T: template SZ is wrong = %d", sz);
ERROR_LOG(SH4, "ReadMem_area7_OCR_T: template SZ is wrong = %zd", sizeof(T));
return 0xDE;
}
}
else
{
INFO_LOG(SH4, "On Chip Ram Read, but OCR is disabled");
INFO_LOG(SH4, "On Chip Ram Read, but OCR is disabled. addr %x", addr);
return 0xDE;
}
}
//Write OCR
template <u32 sz,class T>
template <class T>
void DYNACALL WriteMem_area7_OCR_T(u32 addr,T data)
{
if (CCN_CCR.ORA)
{
if (sz==1)
if (sizeof(T) == 1)
OnChipRAM[addr&OnChipRAM_MASK]=(u8)data;
else if (sz==2)
else if (sizeof(T) == 2)
*(u16*)&OnChipRAM[addr&OnChipRAM_MASK]=(u16)data;
else if (sz==4)
else if (sizeof(T) == 4)
*(u32*)&OnChipRAM[addr&OnChipRAM_MASK]=data;
else
{
ERROR_LOG(SH4, "WriteMem_area7_OCR_T: template SZ is wrong = %d", sz);
ERROR_LOG(SH4, "WriteMem_area7_OCR_T: template SZ is wrong = %zd", sizeof(T));
}
}
else
{
INFO_LOG(SH4, "On Chip Ram Write, but OCR is disabled");
INFO_LOG(SH4, "On Chip Ram Write, but OCR is disabled. addr %x", addr);
}
}