parent
9f2b097e96
commit
26a6e887ad
104
src/CP15.cpp
104
src/CP15.cpp
|
@ -20,6 +20,8 @@
|
|||
#include <string.h>
|
||||
#if defined(__x86_64__)
|
||||
#include <emmintrin.h>
|
||||
#elif defined(__ARM_NEON)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#include "NDS.h"
|
||||
#include "DSi.h"
|
||||
|
@ -365,6 +367,25 @@ u32 ARMv5::ICacheLookup(const u32 addr)
|
|||
if (!set) goto miss; // check if none of them were a match
|
||||
else set = (__builtin_ctz(set) >> 2); // count trailing zeros and right shift to figure out which set had a match
|
||||
|
||||
{
|
||||
#elif defined(__ARM_NEON)
|
||||
uint32x4_t tags = { ICacheTags[id+0], ICacheTags[id+1], ICacheTags[id+2], ICacheTags[id+3] }; // load tags
|
||||
uint32x4_t mask = { ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK) }; // load mask
|
||||
uint32x4_t cmp = { tag | CACHE_FLAG_VALID,
|
||||
tag | CACHE_FLAG_VALID,
|
||||
tag | CACHE_FLAG_VALID,
|
||||
tag | CACHE_FLAG_VALID }; // load tag and flag we're checking for
|
||||
tags = vandq_u32(tags, mask); // mask out bits we dont wanna check for
|
||||
cmp = vceqq_u32(tags, cmp);
|
||||
uint16x4_t res = vmovn_u32(cmp);
|
||||
u64 set; memcpy(&set, &res, 4);
|
||||
|
||||
if (!set) goto miss;
|
||||
else set = __builtin_ctz(set) >> 3;
|
||||
|
||||
{
|
||||
#else
|
||||
// fallback for loop; slow
|
||||
|
@ -535,6 +556,25 @@ u32 ARMv5::DCacheLookup(const u32 addr)
|
|||
if (!set) goto miss; // check if none of them were a match
|
||||
else set = (__builtin_ctz(set) >> 2); // count trailing zeros and right shift to figure out which set had a match
|
||||
|
||||
{
|
||||
#elif defined(__ARM_NEON)
|
||||
uint32x4_t tags = { DCacheTags[id+0], DCacheTags[id+1], DCacheTags[id+2], DCacheTags[id+3] }; // load tags
|
||||
uint32x4_t mask = { ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK) }; // load mask
|
||||
uint32x4_t cmp = { tag | CACHE_FLAG_VALID,
|
||||
tag | CACHE_FLAG_VALID,
|
||||
tag | CACHE_FLAG_VALID,
|
||||
tag | CACHE_FLAG_VALID }; // load tag and flag we're checking for
|
||||
tags = vandq_u32(tags, mask); // mask out bits we dont wanna check for
|
||||
cmp = vceqq_u32(tags, cmp);
|
||||
uint16x4_t res = vmovn_u32(cmp);
|
||||
u64 set; memcpy(&set, &res, 4);
|
||||
|
||||
if (!set) goto miss;
|
||||
else set = __builtin_ctz(set) >> 3;
|
||||
|
||||
{
|
||||
#else
|
||||
// fallback for loop; slow
|
||||
|
@ -656,6 +696,22 @@ bool ARMv5::DCacheWrite32(const u32 addr, const u32 val)
|
|||
if (!set) return false; // check if none of them were a match
|
||||
else set = (__builtin_ctz(set) >> 2); // count trailing zeros and right shift to figure out which set had a match
|
||||
|
||||
{
|
||||
#elif defined(__ARM_NEON)
|
||||
uint32x4_t tags = { DCacheTags[id+0], DCacheTags[id+1], DCacheTags[id+2], DCacheTags[id+3] }; // load tags
|
||||
uint32x4_t mask = { ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK) }; // load mask
|
||||
uint32x4_t cmp = { tag, tag, tag, tag }; // load tag and flag we're checking for
|
||||
tags = vandq_u32(tags, mask); // mask out bits we dont wanna check for
|
||||
cmp = vceqq_u32(tags, cmp);
|
||||
uint16x4_t res = vmovn_u32(cmp);
|
||||
u64 set; memcpy(&set, &res, 4);
|
||||
|
||||
if (!set) return false;
|
||||
else set = __builtin_ctz(set) >> 3;
|
||||
|
||||
{
|
||||
#else
|
||||
// fallback for loop; slow
|
||||
|
@ -708,6 +764,22 @@ bool ARMv5::DCacheWrite16(const u32 addr, const u16 val)
|
|||
if (!set) return false; // check if none of them were a match
|
||||
else set = (__builtin_ctz(set) >> 2); // count trailing zeros and right shift to figure out which set had a match
|
||||
|
||||
{
|
||||
#elif defined(__ARM_NEON)
|
||||
uint32x4_t tags = { DCacheTags[id+0], DCacheTags[id+1], DCacheTags[id+2], DCacheTags[id+3] }; // load tags
|
||||
uint32x4_t mask = { ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK) }; // load mask
|
||||
uint32x4_t cmp = { tag, tag, tag, tag }; // load tag and flag we're checking for
|
||||
tags = vandq_u32(tags, mask); // mask out bits we dont wanna check for
|
||||
cmp = vceqq_u32(tags, cmp);
|
||||
uint16x4_t res = vmovn_u32(cmp);
|
||||
u64 set; memcpy(&set, &res, 4);
|
||||
|
||||
if (!set) return false;
|
||||
else set = __builtin_ctz(set) >> 3;
|
||||
|
||||
{
|
||||
#else
|
||||
// fallback for loop; slow
|
||||
|
@ -761,6 +833,22 @@ bool ARMv5::DCacheWrite8(const u32 addr, const u8 val)
|
|||
if (!set) return false; // check if none of them were a match
|
||||
else set = (__builtin_ctz(set) >> 2); // count trailing zeros and right shift to figure out which set had a match
|
||||
|
||||
{
|
||||
#elif defined(__ARM_NEON)
|
||||
uint32x4_t tags = { DCacheTags[id+0], DCacheTags[id+1], DCacheTags[id+2], DCacheTags[id+3] }; // load tags
|
||||
uint32x4_t mask = { ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK) }; // load mask
|
||||
uint32x4_t cmp = { tag, tag, tag, tag }; // load tag and flag we're checking for
|
||||
tags = vandq_u32(tags, mask); // mask out bits we dont wanna check for
|
||||
cmp = vceqq_u32(tags, cmp);
|
||||
uint16x4_t res = vmovn_u32(cmp);
|
||||
u64 set; memcpy(&set, &res, 4);
|
||||
|
||||
if (!set) return false;
|
||||
else set = __builtin_ctz(set) >> 3;
|
||||
|
||||
{
|
||||
#else
|
||||
// fallback for loop; slow
|
||||
|
@ -813,6 +901,22 @@ void ARMv5::DCacheInvalidateByAddr(const u32 addr)
|
|||
if (!set) return; // check if none of them were a match
|
||||
else set = (__builtin_ctz(set) >> 2); // count trailing zeros and right shift to figure out which set had a match
|
||||
|
||||
{
|
||||
#elif defined(__ARM_NEON)
|
||||
uint32x4_t tags = { DCacheTags[id+0], DCacheTags[id+1], DCacheTags[id+2], DCacheTags[id+3] }; // load tags
|
||||
uint32x4_t mask = { ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
||||
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK) }; // load mask
|
||||
uint32x4_t cmp = { tag, tag, tag, tag }; // load tag and flag we're checking for
|
||||
tags = vandq_u32(tags, mask); // mask out bits we dont wanna check for
|
||||
cmp = vceqq_u32(tags, cmp);
|
||||
uint16x4_t res = vmovn_u32(cmp);
|
||||
u64 set; memcpy(&set, &res, 4);
|
||||
|
||||
if (!set) return;
|
||||
else set = __builtin_ctz(set) >> 3;
|
||||
|
||||
{
|
||||
#else
|
||||
// fallback for loop; slow
|
||||
|
|
Loading…
Reference in New Issue