use sse for set lookups
This commit is contained in:
parent
a8722d8c56
commit
1afefdce1d
91
src/CP15.cpp
91
src/CP15.cpp
|
@ -18,6 +18,9 @@
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#if defined(__x86_64__)
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#endif
|
||||||
#include "NDS.h"
|
#include "NDS.h"
|
||||||
#include "DSi.h"
|
#include "DSi.h"
|
||||||
#include "ARM.h"
|
#include "ARM.h"
|
||||||
|
@ -374,10 +377,24 @@ u32 ARMv5::ICacheLookup(const u32 addr)
|
||||||
{
|
{
|
||||||
const u32 tag = (addr & ~(ICACHE_LINELENGTH - 1));
|
const u32 tag = (addr & ~(ICACHE_LINELENGTH - 1));
|
||||||
const u32 id = ((addr >> ICACHE_LINELENGTH_LOG2) & (ICACHE_LINESPERSET-1)) << ICACHE_SETS_LOG2;
|
const u32 id = ((addr >> ICACHE_LINELENGTH_LOG2) & (ICACHE_LINESPERSET-1)) << ICACHE_SETS_LOG2;
|
||||||
|
|
||||||
|
#if defined(__x86_64__)
|
||||||
|
__m128i tags; memcpy(&tags, &ICacheTags[id], 16);
|
||||||
|
__m128i mask = _mm_set1_epi32(~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK));
|
||||||
|
__m128i cmp = _mm_set1_epi32(tag | CACHE_FLAG_VALID);
|
||||||
|
tags = _mm_and_si128(tags, mask);
|
||||||
|
cmp = _mm_cmpeq_epi32(tags, cmp);
|
||||||
|
u32 set = _mm_movemask_epi8(cmp);
|
||||||
|
|
||||||
|
if (!set) goto miss;
|
||||||
|
else set = (__builtin_ctz(set) >> 2);
|
||||||
|
|
||||||
|
{
|
||||||
|
#else
|
||||||
for (int set = 0; set < ICACHE_SETS; set++)
|
for (int set = 0; set < ICACHE_SETS; set++)
|
||||||
{
|
{
|
||||||
if ((ICacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == (tag | CACHE_FLAG_VALID))
|
if ((ICacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == (tag | CACHE_FLAG_VALID))
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
u32 *cacheLine = (u32 *)&ICache[(id+set) << ICACHE_LINELENGTH_LOG2];
|
u32 *cacheLine = (u32 *)&ICache[(id+set) << ICACHE_LINELENGTH_LOG2];
|
||||||
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_ICACHE_STREAMING) [[unlikely]]
|
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_ICACHE_STREAMING) [[unlikely]]
|
||||||
|
@ -403,7 +420,7 @@ u32 ARMv5::ICacheLookup(const u32 addr)
|
||||||
}
|
}
|
||||||
|
|
||||||
// cache miss
|
// cache miss
|
||||||
|
miss:
|
||||||
// We do not fill the cacheline if it is disabled in the
|
// We do not fill the cacheline if it is disabled in the
|
||||||
// BIST test State register (See arm946e-s Rev 1 technical manual, 2.3.15 "Register 15, test State Register")
|
// BIST test State register (See arm946e-s Rev 1 technical manual, 2.3.15 "Register 15, test State Register")
|
||||||
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_ICACHE_LINEFILL) [[unlikely]]
|
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_ICACHE_LINEFILL) [[unlikely]]
|
||||||
|
@ -528,9 +545,23 @@ u32 ARMv5::DCacheLookup(const u32 addr)
|
||||||
const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1));
|
const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1));
|
||||||
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
||||||
|
|
||||||
|
#if defined(__x86_64__)
|
||||||
|
__m128i tags; memcpy(&tags, &DCacheTags[id], 16);
|
||||||
|
__m128i mask = _mm_set1_epi32(~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK));
|
||||||
|
__m128i cmp = _mm_set1_epi32(tag | CACHE_FLAG_VALID);
|
||||||
|
tags = _mm_and_si128(tags, mask);
|
||||||
|
cmp = _mm_cmpeq_epi32(tags, cmp);
|
||||||
|
u32 set = _mm_movemask_epi8(cmp);
|
||||||
|
|
||||||
|
if (!set) goto miss;
|
||||||
|
else set = (__builtin_ctz(set) >> 2);
|
||||||
|
|
||||||
|
{
|
||||||
|
#else
|
||||||
for (int set = 0; set < DCACHE_SETS; set++)
|
for (int set = 0; set < DCACHE_SETS; set++)
|
||||||
{
|
{
|
||||||
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == (tag | CACHE_FLAG_VALID))
|
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == (tag | CACHE_FLAG_VALID))
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
u32 *cacheLine = (u32 *)&DCache[(id+set) << DCACHE_LINELENGTH_LOG2];
|
u32 *cacheLine = (u32 *)&DCache[(id+set) << DCACHE_LINELENGTH_LOG2];
|
||||||
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_DCACHE_STREAMING) [[unlikely]]
|
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_DCACHE_STREAMING) [[unlikely]]
|
||||||
|
@ -550,7 +581,7 @@ u32 ARMv5::DCacheLookup(const u32 addr)
|
||||||
}
|
}
|
||||||
|
|
||||||
// cache miss
|
// cache miss
|
||||||
|
miss:
|
||||||
// We do not fill the cacheline if it is disabled in the
|
// We do not fill the cacheline if it is disabled in the
|
||||||
// BIST test State register (See arm946e-s Rev 1 technical manual, 2.3.15 "Register 15, test State Register")
|
// BIST test State register (See arm946e-s Rev 1 technical manual, 2.3.15 "Register 15, test State Register")
|
||||||
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_DCACHE_LINEFILL) [[unlikely]]
|
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_DCACHE_LINEFILL) [[unlikely]]
|
||||||
|
@ -632,10 +663,24 @@ bool ARMv5::DCacheWrite32(const u32 addr, const u32 val)
|
||||||
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
||||||
|
|
||||||
//Log(LogLevel::Debug, "Cache write 32: %08lx <= %08lx\n", addr, val);
|
//Log(LogLevel::Debug, "Cache write 32: %08lx <= %08lx\n", addr, val);
|
||||||
|
|
||||||
|
#if defined(__x86_64__)
|
||||||
|
__m128i tags; memcpy(&tags, &DCacheTags[id], 16);
|
||||||
|
__m128i mask = _mm_set1_epi32(~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK));
|
||||||
|
__m128i cmp = _mm_set1_epi32(tag);
|
||||||
|
tags = _mm_and_si128(tags, mask);
|
||||||
|
cmp = _mm_cmpeq_epi32(tags, cmp);
|
||||||
|
u32 set = _mm_movemask_epi8(cmp);
|
||||||
|
|
||||||
|
if (!set) return false;
|
||||||
|
else set = (__builtin_ctz(set) >> 2);
|
||||||
|
|
||||||
|
{
|
||||||
|
#else
|
||||||
for (int set = 0; set < DCACHE_SETS; set++)
|
for (int set = 0; set < DCACHE_SETS; set++)
|
||||||
{
|
{
|
||||||
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == tag)
|
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == tag)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
u32 *cacheLine = (u32 *)&DCache[(id+set) << DCACHE_LINELENGTH_LOG2];
|
u32 *cacheLine = (u32 *)&DCache[(id+set) << DCACHE_LINELENGTH_LOG2];
|
||||||
cacheLine[(addr & (DCACHE_LINELENGTH-1)) >> 2] = val;
|
cacheLine[(addr & (DCACHE_LINELENGTH-1)) >> 2] = val;
|
||||||
|
@ -667,10 +712,24 @@ bool ARMv5::DCacheWrite16(const u32 addr, const u16 val)
|
||||||
const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1)) | CACHE_FLAG_VALID;
|
const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1)) | CACHE_FLAG_VALID;
|
||||||
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
||||||
//Log(LogLevel::Debug, "Cache write 16: %08lx <= %04x\n", addr, val);
|
//Log(LogLevel::Debug, "Cache write 16: %08lx <= %04x\n", addr, val);
|
||||||
|
|
||||||
|
#if defined(__x86_64__)
|
||||||
|
__m128i tags; memcpy(&tags, &DCacheTags[id], 16);
|
||||||
|
__m128i mask = _mm_set1_epi32(~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK));
|
||||||
|
__m128i cmp = _mm_set1_epi32(tag);
|
||||||
|
tags = _mm_and_si128(tags, mask);
|
||||||
|
cmp = _mm_cmpeq_epi32(tags, cmp);
|
||||||
|
u32 set = _mm_movemask_epi8(cmp);
|
||||||
|
|
||||||
|
if (!set) return false;
|
||||||
|
else set = (__builtin_ctz(set) >> 2);
|
||||||
|
|
||||||
|
{
|
||||||
|
#else
|
||||||
for (int set = 0; set < DCACHE_SETS; set++)
|
for (int set = 0; set < DCACHE_SETS; set++)
|
||||||
{
|
{
|
||||||
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == tag)
|
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == tag)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
u16 *cacheLine = (u16 *)&DCache[(id+set) << DCACHE_LINELENGTH_LOG2];
|
u16 *cacheLine = (u16 *)&DCache[(id+set) << DCACHE_LINELENGTH_LOG2];
|
||||||
cacheLine[(addr & (DCACHE_LINELENGTH-1)) >> 1] = val;
|
cacheLine[(addr & (DCACHE_LINELENGTH-1)) >> 1] = val;
|
||||||
|
@ -703,10 +762,24 @@ bool ARMv5::DCacheWrite8(const u32 addr, const u8 val)
|
||||||
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
||||||
|
|
||||||
//Log(LogLevel::Debug, "Cache write 8: %08lx <= %02x\n", addr, val);
|
//Log(LogLevel::Debug, "Cache write 8: %08lx <= %02x\n", addr, val);
|
||||||
|
|
||||||
|
#if defined(__x86_64__)
|
||||||
|
__m128i tags; memcpy(&tags, &DCacheTags[id], 16);
|
||||||
|
__m128i mask = _mm_set1_epi32(~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK));
|
||||||
|
__m128i cmp = _mm_set1_epi32(tag);
|
||||||
|
tags = _mm_and_si128(tags, mask);
|
||||||
|
cmp = _mm_cmpeq_epi32(tags, cmp);
|
||||||
|
u32 set = _mm_movemask_epi8(cmp);
|
||||||
|
|
||||||
|
if (!set) return false;
|
||||||
|
else set = (__builtin_ctz(set) >> 2);
|
||||||
|
|
||||||
|
{
|
||||||
|
#else
|
||||||
for (int set = 0; set < DCACHE_SETS; set++)
|
for (int set = 0; set < DCACHE_SETS; set++)
|
||||||
{
|
{
|
||||||
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == tag)
|
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == tag)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
u8 *cacheLine = &DCache[(id+set) << DCACHE_LINELENGTH_LOG2];
|
u8 *cacheLine = &DCache[(id+set) << DCACHE_LINELENGTH_LOG2];
|
||||||
cacheLine[addr & (DCACHE_LINELENGTH-1)] = val;
|
cacheLine[addr & (DCACHE_LINELENGTH-1)] = val;
|
||||||
|
@ -738,10 +811,24 @@ void ARMv5::DCacheInvalidateByAddr(const u32 addr)
|
||||||
{
|
{
|
||||||
const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1)) | CACHE_FLAG_VALID;
|
const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1)) | CACHE_FLAG_VALID;
|
||||||
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
||||||
|
|
||||||
|
#if defined(__x86_64__)
|
||||||
|
__m128i tags; memcpy(&tags, &DCacheTags[id], 16);
|
||||||
|
__m128i mask = _mm_set1_epi32(~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK));
|
||||||
|
__m128i cmp = _mm_set1_epi32(tag);
|
||||||
|
tags = _mm_and_si128(tags, mask);
|
||||||
|
cmp = _mm_cmpeq_epi32(tags, cmp);
|
||||||
|
u32 set = _mm_movemask_epi8(cmp);
|
||||||
|
|
||||||
|
if (!set) return;
|
||||||
|
else set = (__builtin_ctz(set) >> 2);
|
||||||
|
|
||||||
|
{
|
||||||
|
#else
|
||||||
for (int set = 0; set < DCACHE_SETS; set++)
|
for (int set = 0; set < DCACHE_SETS; set++)
|
||||||
{
|
{
|
||||||
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == tag)
|
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == tag)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
//Log(LogLevel::Debug,"DCache invalidated %08lx\n", addr & ~(ICACHE_LINELENGTH-1));
|
//Log(LogLevel::Debug,"DCache invalidated %08lx\n", addr & ~(ICACHE_LINELENGTH-1));
|
||||||
DCacheTags[id+set] &= ~CACHE_FLAG_VALID;
|
DCacheTags[id+set] &= ~CACHE_FLAG_VALID;
|
||||||
|
|
Loading…
Reference in New Issue