Merge vram dirty tracking

Squashed commit of the following:

commit b463a05d4b909372f0cd1ad91caa0c77a25e5901
Author: RSDuck <rsduck@users.noreply.github.com>
Date:   Mon Nov 30 01:55:35 2020 +0100

    minor fix

commit ce73cebbdf5da243d7ebade82d8799ded9cd6b28
Author: RSDuck <rsduck@users.noreply.github.com>
Date:   Mon Nov 30 00:43:08 2020 +0100

    fix dirty flags of BG/OBJ mappings not being reset

commit fc5d73a6178e3adc444398bdd23de8314b5ca8f8
Author: RSDuck <rsduck@users.noreply.github.com>
Date:   Mon Nov 30 00:11:13 2020 +0100

    use flat vram for gpu2d everywhere

commit 34ee9fe2bf04fcfa2a5a1c8d78d70007e606f1a2
Author: RSDuck <rsduck@users.noreply.github.com>
Date:   Sat Nov 28 19:10:34 2020 +0100

    mark VRAM dirty for display capture

commit e8778fa2f429c6df0eece19d6a5ee83ae23a0cf4
Author: RSDuck <rsduck@users.noreply.github.com>
Date:   Sat Nov 28 18:59:31 2020 +0100

    use flat VRAM for textures and texpals
    also skip rendering if nothing changed and a bunch of fixes

commit 53f2041e2e1a28b35702a2ed51de885c36689f71
Author: RSDuck <rsduck@users.noreply.github.com>
Date:   Fri Nov 27 18:29:56 2020 +0100

    use vram dirty tracking for extpals
    also preparations to take this further

commit 4cdfa329e95aed26d3b21319c8fd86a04abf20f7
Author: RSDuck <rsduck@users.noreply.github.com>
Date:   Mon Nov 16 23:32:22 2020 +0100

    VRAM dirty tracking
This commit is contained in:
RSDuck 2020-11-30 16:58:52 +01:00
parent acb272ed78
commit 6e8bac3909
10 changed files with 765 additions and 215 deletions

View File

@ -49,8 +49,8 @@ u8 VRAM_F[ 16*1024];
u8 VRAM_G[ 16*1024]; u8 VRAM_G[ 16*1024];
u8 VRAM_H[ 32*1024]; u8 VRAM_H[ 32*1024];
u8 VRAM_I[ 16*1024]; u8 VRAM_I[ 16*1024];
u8* VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I}; u8* const VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I};
u32 VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF}; u32 const VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF};
u8 VRAMCNT[9]; u8 VRAMCNT[9];
u8 VRAMSTAT; u8 VRAMSTAT;
@ -85,6 +85,62 @@ bool Accelerated;
GPU2D* GPU2D_A; GPU2D* GPU2D_A;
GPU2D* GPU2D_B; GPU2D* GPU2D_B;
/*
VRAM invalidation tracking
- we want to know when a VRAM region used for graphics changed
- for some regions unmapping is mandatory to modify them (Texture, TexPal and ExtPal) and
we don't want to completely invalidate them every time they're unmapped and remapped
For this reason we don't track the dirtyness per mapping region, but instead per VRAM bank
with VRAMDirty. Writes to LCDC go directly into VRAMDirty, while writes via other mapping regions
like BG or OBJ are first tracked in VRAMWritten_* and need to be flushed using SyncDirtyFlags.
This is more or less a description of VRAMTrackingSet::DeriveState
Each time before the memory is read two things could have happened
to each 16kb piece (16kb is the smallest unit in which mappings can
be made thus also the size VRAMMap_* use):
- this piece was remapped compared to last time we checked,
which means this location in memory is invalid.
- this piece wasn't remapped, which means we need to check whether
it was changed. This can be archived by checking VRAMDirty.
VRAMDirty need to be reset for the respective VRAM bank.
*/
VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG;
VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ;
VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG;
VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ;
VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal;
VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal;
VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal;
VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal;
VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture;
VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal;
NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG;
NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ;
NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG;
NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ;
NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7;
NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9];
u8 VRAMFlat_ABG[512*1024];
u8 VRAMFlat_BBG[128*1024];
u8 VRAMFlat_AOBJ[256*1024];
u8 VRAMFlat_BOBJ[128*1024];
u8 VRAMFlat_ABGExtPal[32*1024];
u8 VRAMFlat_BBGExtPal[32*1024];
u8 VRAMFlat_AOBJExtPal[8*1024];
u8 VRAMFlat_BOBJExtPal[8*1024];
u8 VRAMFlat_Texture[512*1024];
u8 VRAMFlat_TexPal[128*1024];
bool Init() bool Init()
{ {
@ -113,6 +169,30 @@ void DeInit()
if (Framebuffer[1][1]) delete[] Framebuffer[1][1]; if (Framebuffer[1][1]) delete[] Framebuffer[1][1];
} }
void ResetVRAMCache()
{
for (int i = 0; i < 9; i++)
VRAMDirty[i] = NonStupidBitField<128*1024/VRAMDirtyGranularity>();
VRAMDirty_ABG.Reset();
VRAMDirty_BBG.Reset();
VRAMDirty_AOBJ.Reset();
VRAMDirty_BOBJ.Reset();
VRAMDirty_ABGExtPal.Reset();
VRAMDirty_BBGExtPal.Reset();
VRAMDirty_AOBJExtPal.Reset();
VRAMDirty_BOBJExtPal.Reset();
memset(VRAMFlat_ABG, 0, sizeof(VRAMFlat_ABG));
memset(VRAMFlat_BBG, 0, sizeof(VRAMFlat_BBG));
memset(VRAMFlat_AOBJ, 0, sizeof(VRAMFlat_AOBJ));
memset(VRAMFlat_BOBJ, 0, sizeof(VRAMFlat_BOBJ));
memset(VRAMFlat_ABGExtPal, 0, sizeof(VRAMFlat_ABGExtPal));
memset(VRAMFlat_BBGExtPal, 0, sizeof(VRAMFlat_BBGExtPal));
memset(VRAMFlat_AOBJExtPal, 0, sizeof(VRAMFlat_AOBJExtPal));
memset(VRAMFlat_BOBJExtPal, 0, sizeof(VRAMFlat_BOBJExtPal));
}
void Reset() void Reset()
{ {
VCount = 0; VCount = 0;
@ -186,6 +266,8 @@ void Reset()
GPU2D_B->SetFramebuffer(Framebuffer[backbuf][0]); GPU2D_B->SetFramebuffer(Framebuffer[backbuf][0]);
ResetRenderer(); ResetRenderer();
ResetVRAMCache();
} }
void Stop() void Stop()
@ -261,6 +343,8 @@ void DoSavestate(Savestate* file)
GPU2D_A->DoSavestate(file); GPU2D_A->DoSavestate(file);
GPU2D_B->DoSavestate(file); GPU2D_B->DoSavestate(file);
GPU3D::DoSavestate(file); GPU3D::DoSavestate(file);
ResetVRAMCache();
} }
void AssignFramebuffers() void AssignFramebuffers()
@ -411,18 +495,8 @@ void SetRenderSettings(int renderer, RenderSettings& settings)
u8* GetUniqueBankPtr(u32 mask, u32 offset) u8* GetUniqueBankPtr(u32 mask, u32 offset)
{ {
if (!mask) return NULL; if (!mask || (mask & (mask - 1)) != 0) return NULL;
int num = __builtin_ctz(mask);
int num = 0;
if (!(mask & 0xFF)) { mask >>= 8; num += 8; }
else
{
if (!(mask & 0xF)) { mask >>= 4; num += 4; }
if (!(mask & 0x3)) { mask >>= 2; num += 2; }
if (!(mask & 0x1)) { mask >>= 1; num += 1; }
}
if (mask != 1) return NULL;
return &VRAM[num][offset & VRAMMask[num]]; return &VRAM[num][offset & VRAMMask[num]];
} }
@ -606,8 +680,6 @@ void MapVRAM_E(u32 bank, u8 cnt)
case 4: // ABG ext palette case 4: // ABG ext palette
UNMAP_RANGE(ABGExtPal, 0, 4); UNMAP_RANGE(ABGExtPal, 0, 4);
GPU2D_A->BGExtPalDirty(0);
GPU2D_A->BGExtPalDirty(2);
break; break;
} }
} }
@ -634,8 +706,6 @@ void MapVRAM_E(u32 bank, u8 cnt)
case 4: // ABG ext palette case 4: // ABG ext palette
MAP_RANGE(ABGExtPal, 0, 4); MAP_RANGE(ABGExtPal, 0, 4);
GPU2D_A->BGExtPalDirty(0);
GPU2D_A->BGExtPalDirty(2);
break; break;
} }
} }
@ -687,12 +757,10 @@ void MapVRAM_FG(u32 bank, u8 cnt)
case 4: // ABG ext palette case 4: // ABG ext palette
VRAMMap_ABGExtPal[((oldofs & 0x1) << 1)] &= ~bankmask; VRAMMap_ABGExtPal[((oldofs & 0x1) << 1)] &= ~bankmask;
VRAMMap_ABGExtPal[((oldofs & 0x1) << 1) + 1] &= ~bankmask; VRAMMap_ABGExtPal[((oldofs & 0x1) << 1) + 1] &= ~bankmask;
GPU2D_A->BGExtPalDirty((oldofs & 0x1) << 1);
break; break;
case 5: // AOBJ ext palette case 5: // AOBJ ext palette
VRAMMap_AOBJExtPal &= ~bankmask; VRAMMap_AOBJExtPal &= ~bankmask;
GPU2D_A->OBJExtPalDirty();
break; break;
} }
} }
@ -732,12 +800,10 @@ void MapVRAM_FG(u32 bank, u8 cnt)
case 4: // ABG ext palette case 4: // ABG ext palette
VRAMMap_ABGExtPal[((ofs & 0x1) << 1)] |= bankmask; VRAMMap_ABGExtPal[((ofs & 0x1) << 1)] |= bankmask;
VRAMMap_ABGExtPal[((ofs & 0x1) << 1) + 1] |= bankmask; VRAMMap_ABGExtPal[((ofs & 0x1) << 1) + 1] |= bankmask;
GPU2D_A->BGExtPalDirty((ofs & 0x1) << 1);
break; break;
case 5: // AOBJ ext palette case 5: // AOBJ ext palette
VRAMMap_AOBJExtPal |= bankmask; VRAMMap_AOBJExtPal |= bankmask;
GPU2D_A->OBJExtPalDirty();
break; break;
} }
} }
@ -773,8 +839,6 @@ void MapVRAM_H(u32 bank, u8 cnt)
case 2: // BBG ext palette case 2: // BBG ext palette
UNMAP_RANGE(BBGExtPal, 0, 4); UNMAP_RANGE(BBGExtPal, 0, 4);
GPU2D_B->BGExtPalDirty(0);
GPU2D_B->BGExtPalDirty(2);
break; break;
} }
} }
@ -800,8 +864,6 @@ void MapVRAM_H(u32 bank, u8 cnt)
case 2: // BBG ext palette case 2: // BBG ext palette
MAP_RANGE(BBGExtPal, 0, 4); MAP_RANGE(BBGExtPal, 0, 4);
GPU2D_B->BGExtPalDirty(0);
GPU2D_B->BGExtPalDirty(2);
break; break;
} }
} }
@ -841,7 +903,6 @@ void MapVRAM_I(u32 bank, u8 cnt)
case 3: // BOBJ ext palette case 3: // BOBJ ext palette
VRAMMap_BOBJExtPal &= ~bankmask; VRAMMap_BOBJExtPal &= ~bankmask;
GPU2D_B->OBJExtPalDirty();
break; break;
} }
} }
@ -871,7 +932,6 @@ void MapVRAM_I(u32 bank, u8 cnt)
case 3: // BOBJ ext palette case 3: // BOBJ ext palette
VRAMMap_BOBJExtPal |= bankmask; VRAMMap_BOBJExtPal |= bankmask;
GPU2D_B->OBJExtPalDirty();
break; break;
} }
} }
@ -937,6 +997,8 @@ void StartHBlank(u32 line)
DispStat[0] |= (1<<1); DispStat[0] |= (1<<1);
DispStat[1] |= (1<<1); DispStat[1] |= (1<<1);
SyncDirtyFlags();
if (VCount < 192) if (VCount < 192)
{ {
// draw // draw
@ -1096,4 +1158,224 @@ void SetVCount(u16 val)
NextVCount = val; NextVCount = val;
} }
template <u32 Size, u32 MappingGranularity>
NonStupidBitField<Size/VRAMDirtyGranularity> VRAMTrackingSet<Size, MappingGranularity>::DeriveState(u32* currentMappings)
{
NonStupidBitField<Size/VRAMDirtyGranularity> result;
u16 banksToBeZeroed = 0;
for (u32 i = 0; i < Size / MappingGranularity; i++)
{
if (currentMappings[i] != Mapping[i])
{
result |= NonStupidBitField<Size/VRAMDirtyGranularity>(i*VRAMBitsPerMapping, VRAMBitsPerMapping);
banksToBeZeroed |= currentMappings[i];
Mapping[i] = currentMappings[i];
}
else
{
u32 mapping = Mapping[i];
banksToBeZeroed |= mapping;
while (mapping != 0)
{
u32 num = __builtin_ctz(mapping);
mapping &= ~(1 << num);
// hack for **speed**
// this could probably be done less ugly but then we would rely
// on the compiler for vectorisation
static_assert(VRAMDirtyGranularity == 512);
if (MappingGranularity == 16*1024)
{
u32 dirty = ((u32*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 14)];
((u32*)result.Data)[i] |= dirty;
}
else if (MappingGranularity == 8*1024)
{
u16 dirty = ((u16*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 13)];
((u16*)result.Data)[i] |= dirty;
}
else if (MappingGranularity == 128*1024)
{
((u64*)result.Data)[i * 4 + 0] |= ((u64*)VRAMDirty[num].Data)[0];
((u64*)result.Data)[i * 4 + 1] |= ((u64*)VRAMDirty[num].Data)[1];
((u64*)result.Data)[i * 4 + 2] |= ((u64*)VRAMDirty[num].Data)[2];
((u64*)result.Data)[i * 4 + 3] |= ((u64*)VRAMDirty[num].Data)[3];
}
else
{
// welp
abort();
}
}
}
}
while (banksToBeZeroed != 0)
{
u32 num = __builtin_ctz(banksToBeZeroed);
banksToBeZeroed &= ~(1 << num);
memset(VRAMDirty[num].Data, 0, sizeof(VRAMDirty[num].Data));
}
return result;
}
template NonStupidBitField<32*1024/VRAMDirtyGranularity> VRAMTrackingSet<32*1024, 8*1024>::DeriveState(u32*);
template NonStupidBitField<8*1024/VRAMDirtyGranularity> VRAMTrackingSet<8*1024, 8*1024>::DeriveState(u32*);
template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 128*1024>::DeriveState(u32*);
template NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMTrackingSet<128*1024, 16*1024>::DeriveState(u32*);
template NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMTrackingSet<256*1024, 16*1024>::DeriveState(u32*);
template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 16*1024>::DeriveState(u32*);
template <u32 Size>
void SyncDirtyFlags(u32* mappings, NonStupidBitField<Size>& writtenFlags)
{
const u32 VRAMWrittenBitsPer16KB = 16*1024/VRAMDirtyGranularity;
for (typename NonStupidBitField<Size>::Iterator it = writtenFlags.Begin(); it != writtenFlags.End(); it++)
{
u32 mapping = mappings[*it / VRAMWrittenBitsPer16KB];
while (mapping != 0)
{
u32 num = __builtin_ctz(mapping);
VRAMDirty[num][*it & (VRAMMask[num] / VRAMDirtyGranularity)] = true;
mapping &= ~(1 << num);
}
}
memset(writtenFlags.Data, 0, sizeof(writtenFlags.Data));
}
void SyncDirtyFlags()
{
SyncDirtyFlags(VRAMMap_ABG, VRAMWritten_ABG);
SyncDirtyFlags(VRAMMap_AOBJ, VRAMWritten_AOBJ);
SyncDirtyFlags(VRAMMap_BBG, VRAMWritten_BBG);
SyncDirtyFlags(VRAMMap_BOBJ, VRAMWritten_BOBJ);
SyncDirtyFlags(VRAMMap_ARM7, VRAMWritten_ARM7);
}
template <u32 MappingGranularity, u32 Size>
inline bool CopyLinearVRAM(u8* flat, u32* mappings, NonStupidBitField<Size>& dirty, u64 (*slowAccess)(u32 addr))
{
const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity;
bool change = false;
typename NonStupidBitField<Size>::Iterator it = dirty.Begin();
while (it != dirty.End())
{
u32 offset = *it * VRAMDirtyGranularity;
u8* dst = flat + offset;
u8* fastAccess = GetUniqueBankPtr(mappings[*it / VRAMBitsPerMapping], offset);
if (fastAccess)
{
memcpy(dst, fastAccess, VRAMDirtyGranularity);
}
else
{
for (u32 i = 0; i < VRAMDirtyGranularity; i += 8)
*(u64*)&dst[i] = slowAccess(offset + i);
}
change = true;
it++;
}
return change;
}
bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<128*1024>(VRAMFlat_Texture, VRAMMap_Texture, dirty, ReadVRAM_Texture<u64>);
}
bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<16*1024>(VRAMFlat_TexPal, VRAMMap_TexPal, dirty, ReadVRAM_TexPal<u64>);
}
bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<16*1024>(VRAMFlat_ABG, VRAMMap_ABG, dirty, ReadVRAM_ABG<u64>);
}
bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<16*1024>(VRAMFlat_BBG, VRAMMap_BBG, dirty, ReadVRAM_BBG<u64>);
}
bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<16*1024>(VRAMFlat_AOBJ, VRAMMap_AOBJ, dirty, ReadVRAM_AOBJ<u64>);
}
bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<16*1024>(VRAMFlat_BOBJ, VRAMMap_BOBJ, dirty, ReadVRAM_BOBJ<u64>);
}
template<typename T>
T ReadVRAM_ABGExtPal(u32 addr)
{
u32 mask = VRAMMap_ABGExtPal[(addr >> 13) & 0x3];
T ret = 0;
if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0x7FFF];
if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF];
if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF];
return ret;
}
template<typename T>
T ReadVRAM_BBGExtPal(u32 addr)
{
u32 mask = VRAMMap_BBGExtPal[(addr >> 13) & 0x3];
T ret = 0;
if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF];
return ret;
}
template<typename T>
T ReadVRAM_AOBJExtPal(u32 addr)
{
u32 mask = VRAMMap_AOBJExtPal;
T ret = 0;
if (mask & (1<<4)) ret |= *(T*)&VRAM_F[addr & 0x1FFF];
if (mask & (1<<5)) ret |= *(T*)&VRAM_G[addr & 0x1FFF];
return ret;
}
template<typename T>
T ReadVRAM_BOBJExtPal(u32 addr)
{
u32 mask = VRAMMap_BOBJExtPal;
T ret = 0;
if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x1FFF];
return ret;
}
bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<8*1024>(VRAMFlat_ABGExtPal, VRAMMap_ABGExtPal, dirty, ReadVRAM_ABGExtPal<u64>);
}
bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<8*1024>(VRAMFlat_BBGExtPal, VRAMMap_BBGExtPal, dirty, ReadVRAM_BBGExtPal<u64>);
}
bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<8*1024>(VRAMFlat_AOBJExtPal, &VRAMMap_AOBJExtPal, dirty, ReadVRAM_AOBJExtPal<u64>);
}
bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty)
{
return CopyLinearVRAM<8*1024>(VRAMFlat_BOBJExtPal, &VRAMMap_BOBJExtPal, dirty, ReadVRAM_BOBJExtPal<u64>);
}
} }

View File

@ -20,6 +20,7 @@
#define GPU_H #define GPU_H
#include "GPU2D.h" #include "GPU2D.h"
#include "NonStupidBitfield.h"
namespace GPU namespace GPU
{ {
@ -45,7 +46,7 @@ extern u8 VRAM_G[ 16*1024];
extern u8 VRAM_H[ 32*1024]; extern u8 VRAM_H[ 32*1024];
extern u8 VRAM_I[ 16*1024]; extern u8 VRAM_I[ 16*1024];
extern u8* VRAM[9]; extern u8* const VRAM[9];
extern u32 VRAMMap_LCDC; extern u32 VRAMMap_LCDC;
extern u32 VRAMMap_ABG[0x20]; extern u32 VRAMMap_ABG[0x20];
@ -73,6 +74,73 @@ extern GPU2D* GPU2D_B;
extern int Renderer; extern int Renderer;
const u32 VRAMDirtyGranularity = 512;
extern NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG;
extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ;
extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG;
extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ;
extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7;
extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9];
template <u32 Size, u32 MappingGranularity>
struct VRAMTrackingSet
{
u16 Mapping[Size / MappingGranularity];
const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity;
void Reset()
{
memset(Mapping, 0, sizeof(Mapping));
}
NonStupidBitField<Size/VRAMDirtyGranularity> DeriveState(u32* currentMappings);
};
extern VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG;
extern VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ;
extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG;
extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ;
extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal;
extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal;
extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal;
extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal;
extern VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture;
extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal;
extern u8 VRAMFlat_ABG[512*1024];
extern u8 VRAMFlat_BBG[128*1024];
extern u8 VRAMFlat_AOBJ[256*1024];
extern u8 VRAMFlat_BOBJ[128*1024];
extern u8 VRAMFlat_ABGExtPal[32*1024];
extern u8 VRAMFlat_BBGExtPal[32*1024];
extern u8 VRAMFlat_AOBJExtPal[8*1024];
extern u8 VRAMFlat_BOBJExtPal[8*1024];
extern u8 VRAMFlat_Texture[512*1024];
extern u8 VRAMFlat_TexPal[128*1024];
bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty);
bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
void SyncDirtyFlags();
typedef struct typedef struct
{ {
@ -233,7 +301,11 @@ void WriteVRAM_LCDC(u32 addr, T val)
default: return; default: return;
} }
if (VRAMMap_LCDC & (1<<bank)) *(T*)&VRAM[bank][addr] = val; if (VRAMMap_LCDC & (1<<bank))
{
*(T*)&VRAM[bank][addr] = val;
VRAMDirty[bank][addr / VRAMDirtyGranularity] = true;
}
} }
@ -262,6 +334,8 @@ void WriteVRAM_ABG(u32 addr, T val)
{ {
u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F]; u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F];
VRAMWritten_ABG[(addr & 0x7FFFF) / VRAMDirtyGranularity] = true;
if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val; if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val; if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
@ -295,6 +369,8 @@ void WriteVRAM_AOBJ(u32 addr, T val)
{ {
u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF]; u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF];
VRAMWritten_AOBJ[(addr & 0x3FFFF) / VRAMDirtyGranularity] = true;
if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val; if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val; if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val; if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val;
@ -324,6 +400,8 @@ void WriteVRAM_BBG(u32 addr, T val)
{ {
u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7]; u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7];
VRAMWritten_BBG[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
if (mask & (1<<7)) *(T*)&VRAM_H[addr & 0x7FFF] = val; if (mask & (1<<7)) *(T*)&VRAM_H[addr & 0x7FFF] = val;
if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val; if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
@ -350,11 +428,12 @@ void WriteVRAM_BOBJ(u32 addr, T val)
{ {
u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7]; u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7];
VRAMWritten_BOBJ[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val; if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val; if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
} }
template<typename T> template<typename T>
T ReadVRAM_ARM7(u32 addr) T ReadVRAM_ARM7(u32 addr)
{ {
@ -372,6 +451,8 @@ void WriteVRAM_ARM7(u32 addr, T val)
{ {
u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1]; u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1];
VRAMWritten_ARM7[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val; if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
} }

View File

@ -148,12 +148,6 @@ void GPU2D::Reset()
CaptureCnt = 0; CaptureCnt = 0;
MasterBrightness = 0; MasterBrightness = 0;
BGExtPalStatus[0] = 0;
BGExtPalStatus[1] = 0;
BGExtPalStatus[2] = 0;
BGExtPalStatus[3] = 0;
OBJExtPalStatus = 0;
} }
void GPU2D::DoSavestate(Savestate* file) void GPU2D::DoSavestate(Savestate* file)
@ -208,13 +202,6 @@ void GPU2D::DoSavestate(Savestate* file)
if (!file->Saving) if (!file->Saving)
{ {
// refresh those
BGExtPalStatus[0] = 0;
BGExtPalStatus[1] = 0;
BGExtPalStatus[2] = 0;
BGExtPalStatus[3] = 0;
OBJExtPalStatus = 0;
CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]]; CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]];
CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[0]]; CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[0]];
} }
@ -758,6 +745,25 @@ void GPU2D::DrawScanline(u32 line)
int n3dline = line; int n3dline = line;
line = GPU::VCount; line = GPU::VCount;
if (Num == 0)
{
auto bgDirty = GPU::VRAMDirty_ABG.DeriveState(GPU::VRAMMap_ABG);
GPU::MakeVRAMFlat_ABGCoherent(bgDirty);
auto bgExtPalDirty = GPU::VRAMDirty_ABGExtPal.DeriveState(GPU::VRAMMap_ABGExtPal);
GPU::MakeVRAMFlat_ABGExtPalCoherent(bgExtPalDirty);
auto objExtPalDirty = GPU::VRAMDirty_AOBJExtPal.DeriveState(&GPU::VRAMMap_AOBJExtPal);
GPU::MakeVRAMFlat_AOBJExtPalCoherent(objExtPalDirty);
}
else
{
auto bgDirty = GPU::VRAMDirty_BBG.DeriveState(GPU::VRAMMap_BBG);
GPU::MakeVRAMFlat_BBGCoherent(bgDirty);
auto bgExtPalDirty = GPU::VRAMDirty_BBGExtPal.DeriveState(GPU::VRAMMap_BBGExtPal);
GPU::MakeVRAMFlat_BBGExtPalCoherent(bgExtPalDirty);
auto objExtPalDirty = GPU::VRAMDirty_BOBJExtPal.DeriveState(&GPU::VRAMMap_BOBJExtPal);
GPU::MakeVRAMFlat_BOBJExtPalCoherent(objExtPalDirty);
}
bool forceblank = false; bool forceblank = false;
// scanlines that end up outside of the GPU drawing range // scanlines that end up outside of the GPU drawing range
@ -970,6 +976,9 @@ void GPU2D::DoCapture(u32 line, u32 width)
u16* dst = (u16*)GPU::VRAM[dstvram]; u16* dst = (u16*)GPU::VRAM[dstvram];
u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width); u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width);
static_assert(GPU::VRAMDirtyGranularity == 512);
GPU::VRAMDirty[dstvram][(dstaddr & 0x1FFFF) / GPU::VRAMDirtyGranularity] = true;
// TODO: handle 3D in accelerated mode!! // TODO: handle 3D in accelerated mode!!
u32* srcA; u32* srcA;
@ -1188,85 +1197,20 @@ void GPU2D::SampleFIFO(u32 offset, u32 num)
} }
} }
void GPU2D::BGExtPalDirty(u32 base)
{
BGExtPalStatus[base] = 0;
BGExtPalStatus[base+1] = 0;
}
void GPU2D::OBJExtPalDirty()
{
OBJExtPalStatus = 0;
}
u16* GPU2D::GetBGExtPal(u32 slot, u32 pal) u16* GPU2D::GetBGExtPal(u32 slot, u32 pal)
{ {
u16* dst = &BGExtPalCache[slot][pal << 8]; const u32 PaletteSize = 256 * 2;
const u32 SlotSize = PaletteSize * 16;
if (!(BGExtPalStatus[slot] & (1<<pal))) return (u16*)&(Num == 0
{ ? GPU::VRAMFlat_ABGExtPal
if (Num) : GPU::VRAMFlat_BBGExtPal)[slot * SlotSize + pal * PaletteSize];
{
if (GPU::VRAMMap_BBGExtPal[slot] & (1<<7))
memcpy(dst, &GPU::VRAM_H[(slot << 13) + (pal << 9)], 256*2);
else
memset(dst, 0, 256*2);
}
else
{
memset(dst, 0, 256*2);
if (GPU::VRAMMap_ABGExtPal[slot] & (1<<4))
for (int i = 0; i < 256; i+=2)
*(u32*)&dst[i] |= *(u32*)&GPU::VRAM_E[(slot << 13) + (pal << 9) + (i << 1)];
if (GPU::VRAMMap_ABGExtPal[slot] & (1<<5))
for (int i = 0; i < 256; i+=2)
*(u32*)&dst[i] |= *(u32*)&GPU::VRAM_F[((slot&1) << 13) + (pal << 9) + (i << 1)];
if (GPU::VRAMMap_ABGExtPal[slot] & (1<<6))
for (int i = 0; i < 256; i+=2)
*(u32*)&dst[i] |= *(u32*)&GPU::VRAM_G[((slot&1) << 13) + (pal << 9) + (i << 1)];
}
BGExtPalStatus[slot] |= (1<<pal);
}
return dst;
} }
u16* GPU2D::GetOBJExtPal() u16* GPU2D::GetOBJExtPal()
{ {
u16* dst = OBJExtPalCache; return Num == 0
? (u16*)GPU::VRAMFlat_AOBJExtPal
if (!OBJExtPalStatus) : (u16*)GPU::VRAMFlat_BOBJExtPal;
{
if (Num)
{
if (GPU::VRAMMap_BOBJExtPal & (1<<8))
memcpy(dst, &GPU::VRAM_I[0], 16*256*2);
else
memset(dst, 0, 16*256*2);
}
else
{
memset(dst, 0, 16*256*2);
if (GPU::VRAMMap_AOBJExtPal & (1<<5))
for (int i = 0; i < 16*256; i+=2)
*(u32*)&dst[i] |= *(u32*)&GPU::VRAM_F[i << 1];
if (GPU::VRAMMap_AOBJExtPal & (1<<6))
for (int i = 0; i < 16*256; i+=2)
*(u32*)&dst[i] |= *(u32*)&GPU::VRAM_G[i << 1];
}
OBJExtPalStatus = 1;
}
return dst;
} }
@ -1697,6 +1641,20 @@ void GPU2D::DrawBG_3D()
} }
} }
void GetBGVRAM(u32 num, u8*& data, u32& mask)
{
if (num == 0)
{
data = GPU::VRAMFlat_ABG;
mask = 0x7FFFF;
}
else
{
data = GPU::VRAMFlat_BBG;
mask = 0x1FFFF;
}
}
template<bool mosaic, GPU2D::DrawPixel drawPixel> template<bool mosaic, GPU2D::DrawPixel drawPixel>
void GPU2D::DrawBG_Text(u32 line, u32 bgnum) void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
{ {
@ -1720,17 +1678,20 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
extpal = (DispCnt & 0x40000000); extpal = (DispCnt & 0x40000000);
if (extpal) extpalslot = ((bgnum<2) && (bgcnt&0x2000)) ? (2+bgnum) : bgnum; if (extpal) extpalslot = ((bgnum<2) && (bgcnt&0x2000)) ? (2+bgnum) : bgnum;
u8* bgvram;
u32 bgvrammask;
GetBGVRAM(Num, bgvram, bgvrammask);
if (Num) if (Num)
{ {
tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12); tilesetaddr = ((bgcnt & 0x003C) << 12);
tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3); tilemapaddr = ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0x400]; pal = (u16*)&GPU::Palette[0x400];
} }
else else
{ {
tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0]; pal = (u16*)&GPU::Palette[0];
} }
@ -1758,7 +1719,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
// preload shit as needed // preload shit as needed
if ((xoff & 0x7) || mosaic) if ((xoff & 0x7) || mosaic)
{ {
curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)); curtile = *(u16*)&bgvram[(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)) & bgvrammask];
if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12); if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12);
else curpal = pal; else curpal = pal;
@ -1779,7 +1740,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
(mosaic && ((xpos >> 3) != (lastxpos >> 3)))) (mosaic && ((xpos >> 3) != (lastxpos >> 3))))
{ {
// load a new tile // load a new tile
curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)); curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask];
if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12); if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12);
else curpal = pal; else curpal = pal;
@ -1794,7 +1755,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
if (WindowMask[i] & (1<<bgnum)) if (WindowMask[i] & (1<<bgnum))
{ {
u32 tilexoff = (curtile & 0x0400) ? (7-(xpos&0x7)) : (xpos&0x7); u32 tilexoff = (curtile & 0x0400) ? (7-(xpos&0x7)) : (xpos&0x7);
color = GPU::ReadVRAM_BG<u8>(pixelsaddr + tilexoff); color = bgvram[(pixelsaddr + tilexoff) & bgvrammask];
if (color) if (color)
drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum);
@ -1810,7 +1771,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
// preload shit as needed // preload shit as needed
if ((xoff & 0x7) || mosaic) if ((xoff & 0x7) || mosaic)
{ {
curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)); curtile = *(u16*)&bgvram[((tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3))) & bgvrammask];
curpal = pal + ((curtile & 0xF000) >> 8); curpal = pal + ((curtile & 0xF000) >> 8);
pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5) pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5)
+ (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2);
@ -1828,7 +1789,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
(mosaic && ((xpos >> 3) != (lastxpos >> 3)))) (mosaic && ((xpos >> 3) != (lastxpos >> 3))))
{ {
// load a new tile // load a new tile
curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)); curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask];
curpal = pal + ((curtile & 0xF000) >> 8); curpal = pal + ((curtile & 0xF000) >> 8);
pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5) pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5)
+ (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2);
@ -1842,11 +1803,11 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
u32 tilexoff = (curtile & 0x0400) ? (7-(xpos&0x7)) : (xpos&0x7); u32 tilexoff = (curtile & 0x0400) ? (7-(xpos&0x7)) : (xpos&0x7);
if (tilexoff & 0x1) if (tilexoff & 0x1)
{ {
color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) >> 4; color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] >> 4;
} }
else else
{ {
color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) & 0x0F; color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] & 0x0F;
} }
if (color) if (color)
@ -1895,17 +1856,20 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum)
rotY -= (BGMosaicY * rotD); rotY -= (BGMosaicY * rotD);
} }
u8* bgvram;
u32 bgvrammask;
if (Num) if (Num)
{ {
tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12); tilesetaddr = ((bgcnt & 0x003C) << 12);
tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3); tilemapaddr = ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0x400]; pal = (u16*)&GPU::Palette[0x400];
} }
else else
{ {
tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0]; pal = (u16*)&GPU::Palette[0];
} }
@ -1934,13 +1898,13 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum)
if ((!((finalX|finalY) & overflowmask))) if ((!((finalX|finalY) & overflowmask)))
{ {
curtile = GPU::ReadVRAM_BG<u8>(tilemapaddr + ((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11))); curtile = bgvram[(tilemapaddr + ((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11))) & bgvrammask];
// draw pixel // draw pixel
u32 tilexoff = (finalX >> 8) & 0x7; u32 tilexoff = (finalX >> 8) & 0x7;
u32 tileyoff = (finalY >> 8) & 0x7; u32 tileyoff = (finalY >> 8) & 0x7;
color = GPU::ReadVRAM_BG<u8>(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff); color = bgvram[(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff) & bgvrammask];
if (color) if (color)
drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum); drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum);
@ -1964,6 +1928,10 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
u16* pal; u16* pal;
u32 extpal; u32 extpal;
u8* bgvram;
u32 bgvrammask;
GetBGVRAM(Num, bgvram, bgvrammask);
extpal = (DispCnt & 0x40000000); extpal = (DispCnt & 0x40000000);
s16 rotA = BGRotA[bgnum-2]; s16 rotA = BGRotA[bgnum-2];
@ -2007,8 +1975,8 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
ofymask = ~ymask; ofymask = ~ymask;
} }
if (Num) tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 6); if (Num) tilemapaddr = ((bgcnt & 0x1F00) << 6);
else tilemapaddr = 0x06000000 + ((bgcnt & 0x1F00) << 6); else tilemapaddr = ((bgcnt & 0x1F00) << 6);
if (bgcnt & 0x0004) if (bgcnt & 0x0004)
{ {
@ -2035,7 +2003,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
if (!(finalX & ofxmask) && !(finalY & ofymask)) if (!(finalX & ofxmask) && !(finalY & ofymask))
{ {
color = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)); color = *(u16*)&bgvram[(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)) & bgvrammask];
if (color & 0x8000) if (color & 0x8000)
drawPixel(&BGOBJLine[i], color, 0x01000000<<bgnum); drawPixel(&BGOBJLine[i], color, 0x01000000<<bgnum);
@ -2074,7 +2042,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
if (!(finalX & ofxmask) && !(finalY & ofymask)) if (!(finalX & ofxmask) && !(finalY & ofymask))
{ {
color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)); color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask];
if (color) if (color)
drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum); drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum);
@ -2106,15 +2074,15 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
if (Num) if (Num)
{ {
tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12); tilesetaddr = ((bgcnt & 0x003C) << 12);
tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3); tilemapaddr = ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0x400]; pal = (u16*)&GPU::Palette[0x400];
} }
else else
{ {
tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0]; pal = (u16*)&GPU::Palette[0];
} }
@ -2144,7 +2112,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
if ((!((finalX|finalY) & overflowmask))) if ((!((finalX|finalY) & overflowmask)))
{ {
curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1)); curtile = *(u16*)&bgvram[(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1)) & bgvrammask];
if (extpal) curpal = GetBGExtPal(bgnum, curtile>>12); if (extpal) curpal = GetBGExtPal(bgnum, curtile>>12);
else curpal = pal; else curpal = pal;
@ -2156,7 +2124,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
if (curtile & 0x0400) tilexoff = 7-tilexoff; if (curtile & 0x0400) tilexoff = 7-tilexoff;
if (curtile & 0x0800) tileyoff = 7-tileyoff; if (curtile & 0x0800) tileyoff = 7-tileyoff;
color = GPU::ReadVRAM_BG<u8>(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff); color = bgvram[(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff) & bgvrammask];
if (color) if (color)
drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum);
@ -2222,8 +2190,9 @@ void GPU2D::DrawBG_Large(u32 line) // BG is always BG2
rotY -= (BGMosaicY * rotD); rotY -= (BGMosaicY * rotD);
} }
if (Num) tilemapaddr = 0x06200000; u8* bgvram;
else tilemapaddr = 0x06000000; u32 bgvrammask;
GetBGVRAM(Num, bgvram, bgvrammask);
// 256-color bitmap // 256-color bitmap
@ -2251,7 +2220,7 @@ void GPU2D::DrawBG_Large(u32 line) // BG is always BG2
if (!(finalX & ofxmask) && !(finalY & ofymask)) if (!(finalX & ofxmask) && !(finalY & ofymask))
{ {
color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)); color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask];
if (color) if (color)
drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2); drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2);
@ -2346,6 +2315,20 @@ void GPU2D::InterleaveSprites(u32 prio)
} }
} }
void GetOBJVRAM(u32 num, u8*& data, u32& mask)
{
if (num == 0)
{
data = GPU::VRAMFlat_AOBJ;
mask = 0x3FFFF;
}
else
{
data = GPU::VRAMFlat_BOBJ;
mask = 0x1FFFF;
}
}
#define DoDrawSprite(type, ...) \ #define DoDrawSprite(type, ...) \
if (iswin) \ if (iswin) \
{ \ { \
@ -2370,6 +2353,17 @@ void GPU2D::DrawSprites(u32 line)
OBJMosaicYCount = 0; OBJMosaicYCount = 0;
} }
if (Num == 0)
{
auto objDirty = GPU::VRAMDirty_AOBJ.DeriveState(GPU::VRAMMap_AOBJ);
GPU::MakeVRAMFlat_AOBJCoherent(objDirty);
}
else
{
auto objDirty = GPU::VRAMDirty_BOBJ.DeriveState(GPU::VRAMMap_BOBJ);
GPU::MakeVRAMFlat_BOBJCoherent(objDirty);
}
NumSprites = 0; NumSprites = 0;
memset(OBJLine, 0, 256*4); memset(OBJLine, 0, 256*4);
memset(OBJWindow, 0, 256); memset(OBJWindow, 0, 256);
@ -2482,6 +2476,10 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
u32 ytilefactor; u32 ytilefactor;
u8* objvram;
u32 objvrammask;
GetOBJVRAM(Num, objvram, objvrammask);
s32 centerX = boundwidth >> 1; s32 centerX = boundwidth >> 1;
s32 centerY = boundheight >> 1; s32 centerY = boundheight >> 1;
@ -2525,6 +2523,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
pixelattr |= (0xC0000000 | (alpha << 24)); pixelattr |= (0xC0000000 | (alpha << 24));
u32 pixelsaddr;
if (DispCnt & 0x40) if (DispCnt & 0x40)
{ {
if (DispCnt & 0x20) if (DispCnt & 0x20)
@ -2536,7 +2535,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
} }
else else
{ {
tilenum <<= (7 + ((DispCnt >> 22) & 0x1)); pixelsaddr = tilenum << (7 + ((DispCnt >> 22) & 0x1));
ytilefactor = ((width >> 8) * 2); ytilefactor = ((width >> 8) * 2);
} }
} }
@ -2544,23 +2543,21 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
{ {
if (DispCnt & 0x20) if (DispCnt & 0x20)
{ {
tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
ytilefactor = (256 * 2); ytilefactor = (256 * 2);
} }
else else
{ {
tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
ytilefactor = (128 * 2); ytilefactor = (128 * 2);
} }
} }
u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
for (; xoff < boundwidth;) for (; xoff < boundwidth;)
{ {
if ((u32)rotX < width && (u32)rotY < height) if ((u32)rotX < width && (u32)rotY < height)
{ {
color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)); color = *(u16*)&objvram[(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)) & objvrammask];
if (color & 0x8000) if (color & 0x8000)
{ {
@ -2585,9 +2582,10 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
} }
else else
{ {
u32 pixelsaddr = tilenum;
if (DispCnt & 0x10) if (DispCnt & 0x10)
{ {
tilenum <<= ((DispCnt >> 20) & 0x3); pixelsaddr <<= ((DispCnt >> 20) & 0x3);
ytilefactor = (width >> 11) << ((attrib[0] & 0x2000) ? 1:0); ytilefactor = (width >> 11) << ((attrib[0] & 0x2000) ? 1:0);
} }
else else
@ -2601,9 +2599,8 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
if (attrib[0] & 0x2000) if (attrib[0] & 0x2000)
{ {
// 256-color // 256-color
tilenum <<= 5;
ytilefactor <<= 5; ytilefactor <<= 5;
u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; pixelsaddr <<= 5;
if (!window) if (!window)
{ {
@ -2617,7 +2614,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
{ {
if ((u32)rotX < width && (u32)rotY < height) if ((u32)rotX < width && (u32)rotY < height)
{ {
color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)); color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)) & objvrammask];
if (color) if (color)
{ {
@ -2657,7 +2654,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
{ {
if ((u32)rotX < width && (u32)rotY < height) if ((u32)rotX < width && (u32)rotY < height)
{ {
color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)); color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)) & objvrammask];
if (rotX & 0x100) if (rotX & 0x100)
color >>= 4; color >>= 4;
else else
@ -2705,6 +2702,10 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
pixelattr |= 0x100000; pixelattr |= 0x100000;
} }
u8* objvram;
u32 objvrammask;
GetOBJVRAM(Num, objvram, objvrammask);
// yflip // yflip
if (attrib[1] & 0x2000) if (attrib[1] & 0x2000)
ypos = height-1 - ypos; ypos = height-1 - ypos;
@ -2735,6 +2736,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
pixelattr |= (0xC0000000 | (alpha << 24)); pixelattr |= (0xC0000000 | (alpha << 24));
u32 pixelsaddr = tilenum;
if (DispCnt & 0x40) if (DispCnt & 0x40)
{ {
if (DispCnt & 0x20) if (DispCnt & 0x20)
@ -2746,25 +2748,24 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
} }
else else
{ {
tilenum <<= (7 + ((DispCnt >> 22) & 0x1)); pixelsaddr <<= (7 + ((DispCnt >> 22) & 0x1));
tilenum += (ypos * width * 2); pixelsaddr += (ypos * width * 2);
} }
} }
else else
{ {
if (DispCnt & 0x20) if (DispCnt & 0x20)
{ {
tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
tilenum += (ypos * 256 * 2); pixelsaddr += (ypos * 256 * 2);
} }
else else
{ {
tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
tilenum += (ypos * 128 * 2); pixelsaddr += (ypos * 128 * 2);
} }
} }
u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
s32 pixelstride; s32 pixelstride;
if (attrib[1] & 0x1000) // xflip if (attrib[1] & 0x1000) // xflip
@ -2781,7 +2782,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
for (; xoff < xend;) for (; xoff < xend;)
{ {
color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr); color = *(u16*)&objvram[pixelsaddr & objvrammask];
pixelsaddr += pixelstride; pixelsaddr += pixelstride;
@ -2805,14 +2806,15 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
} }
else else
{ {
u32 pixelsaddr = tilenum;
if (DispCnt & 0x10) if (DispCnt & 0x10)
{ {
tilenum <<= ((DispCnt >> 20) & 0x3); pixelsaddr <<= ((DispCnt >> 20) & 0x3);
tilenum += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0); pixelsaddr += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0);
} }
else else
{ {
tilenum += ((ypos >> 3) * 0x20); pixelsaddr += ((ypos >> 3) * 0x20);
} }
if (spritemode == 1) pixelattr |= 0x80000000; if (spritemode == 1) pixelattr |= 0x80000000;
@ -2821,8 +2823,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
if (attrib[0] & 0x2000) if (attrib[0] & 0x2000)
{ {
// 256-color // 256-color
tilenum <<= 5; pixelsaddr <<= 5;
u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
pixelsaddr += ((ypos & 0x7) << 3); pixelsaddr += ((ypos & 0x7) << 3);
s32 pixelstride; s32 pixelstride;
@ -2851,7 +2852,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
for (; xoff < xend;) for (; xoff < xend;)
{ {
color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr); color = objvram[pixelsaddr];
pixelsaddr += pixelstride; pixelsaddr += pixelstride;
@ -2877,8 +2878,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
else else
{ {
// 16-color // 16-color
tilenum <<= 5; pixelsaddr <<= 5;
u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
pixelsaddr += ((ypos & 0x7) << 2); pixelsaddr += ((ypos & 0x7) << 2);
s32 pixelstride; s32 pixelstride;
@ -2910,13 +2910,13 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
{ {
if (attrib[1] & 0x1000) if (attrib[1] & 0x1000)
{ {
if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) & 0x0F; pixelsaddr--; } if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] & 0x0F; pixelsaddr--; }
else color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) >> 4; else color = objvram[pixelsaddr & objvrammask] >> 4;
} }
else else
{ {
if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) >> 4; pixelsaddr++; } if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] >> 4; pixelsaddr++; }
else color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) & 0x0F; else color = objvram[pixelsaddr & objvrammask] & 0x0F;
} }
if (color) if (color)

View File

@ -59,9 +59,6 @@ public:
void CheckWindows(u32 line); void CheckWindows(u32 line);
void BGExtPalDirty(u32 base);
void OBJExtPalDirty();
u16* GetBGExtPal(u32 slot, u32 pal); u16* GetBGExtPal(u32 slot, u32 pal);
u16* GetOBJExtPal(); u16* GetOBJExtPal();
@ -128,9 +125,6 @@ private:
u16 MasterBrightness; u16 MasterBrightness;
u16 BGExtPalCache[4][16*256]; u16 BGExtPalCache[4][16*256];
u16 OBJExtPalCache[16*256];
u32 BGExtPalStatus[4];
u32 OBJExtPalStatus;
u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb); u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb);
u32 ColorBlend5(u32 val1, u32 val2); u32 ColorBlend5(u32 val1, u32 val2);

View File

@ -179,6 +179,8 @@ u8 RenderFogDensityTable[34];
u32 RenderClearAttr1, RenderClearAttr2; u32 RenderClearAttr1, RenderClearAttr2;
bool RenderFrameIdentical;
u32 ZeroDotWLimit; u32 ZeroDotWLimit;
u32 GXStat; u32 GXStat;
@ -2491,6 +2493,19 @@ void VBlank()
} }
RenderNumPolygons = NumPolygons; RenderNumPolygons = NumPolygons;
RenderFrameIdentical = false;
}
else
{
RenderFrameIdentical = RenderDispCnt == DispCnt
&& RenderAlphaRef == AlphaRef
&& RenderClearAttr1 == ClearAttr1
&& RenderClearAttr2 == ClearAttr2
&& RenderFogColor == FogColor
&& RenderFogOffset == FogOffset * 0x200
&& memcmp(RenderEdgeTable, EdgeTable, 8*2) == 0
&& memcmp(RenderFogDensityTable + 1, FogDensityTable, 32) == 0
&& memcmp(RenderToonTable, ToonTable, 32*2) == 0;
} }
RenderDispCnt = DispCnt; RenderDispCnt = DispCnt;

View File

@ -87,6 +87,8 @@ extern u8 RenderFogDensityTable[34];
extern u32 RenderClearAttr1, RenderClearAttr2; extern u32 RenderClearAttr1, RenderClearAttr2;
extern bool RenderFrameIdentical;
extern std::array<Polygon*,2048> RenderPolygonRAM; extern std::array<Polygon*,2048> RenderPolygonRAM;
extern u32 RenderNumPolygons; extern u32 RenderNumPolygons;

View File

@ -58,6 +58,8 @@ bool PrevIsShadowMask;
bool Enabled; bool Enabled;
bool FrameIdentical;
// threading // threading
bool Threaded; bool Threaded;
@ -550,6 +552,16 @@ typedef struct
RendererPolygon PolygonList[2048]; RendererPolygon PolygonList[2048];
template <typename T>
inline T ReadVRAM_Texture(u32 addr)
{
return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF];
}
template <typename T>
inline T ReadVRAM_TexPal(u32 addr)
{
return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF];
}
void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
{ {
@ -606,10 +618,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 1: // A3I5 case 1: // A3I5
{ {
vramaddr += ((t * width) + s); vramaddr += ((t * width) + s);
u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
texpal <<= 4; texpal <<= 4;
*color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1)); *color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1));
*alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6); *alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6);
} }
break; break;
@ -617,12 +629,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 2: // 4-color case 2: // 4-color
{ {
vramaddr += (((t * width) + s) >> 2); vramaddr += (((t * width) + s) >> 2);
u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
pixel >>= ((s & 0x3) << 1); pixel >>= ((s & 0x3) << 1);
pixel &= 0x3; pixel &= 0x3;
texpal <<= 3; texpal <<= 3;
*color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1)); *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31; *alpha = (pixel==0) ? alpha0 : 31;
} }
break; break;
@ -630,12 +642,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 3: // 16-color case 3: // 16-color
{ {
vramaddr += (((t * width) + s) >> 1); vramaddr += (((t * width) + s) >> 1);
u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
if (s & 0x1) pixel >>= 4; if (s & 0x1) pixel >>= 4;
else pixel &= 0xF; else pixel &= 0xF;
texpal <<= 4; texpal <<= 4;
*color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1)); *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31; *alpha = (pixel==0) ? alpha0 : 31;
} }
break; break;
@ -643,10 +655,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 4: // 256-color case 4: // 256-color
{ {
vramaddr += ((t * width) + s); vramaddr += ((t * width) + s);
u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
texpal <<= 4; texpal <<= 4;
*color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1)); *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31; *alpha = (pixel==0) ? alpha0 : 31;
} }
break; break;
@ -660,30 +672,30 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
if (vramaddr >= 0x40000) if (vramaddr >= 0x40000)
slot1addr += 0x10000; slot1addr += 0x10000;
u8 val = GPU::ReadVRAM_Texture<u8>(vramaddr); u8 val = ReadVRAM_Texture<u8>(vramaddr);
val >>= (2 * (s & 0x3)); val >>= (2 * (s & 0x3));
u16 palinfo = GPU::ReadVRAM_Texture<u16>(slot1addr); u16 palinfo = ReadVRAM_Texture<u16>(slot1addr);
u32 paloffset = (palinfo & 0x3FFF) << 2; u32 paloffset = (palinfo & 0x3FFF) << 2;
texpal <<= 4; texpal <<= 4;
switch (val & 0x3) switch (val & 0x3)
{ {
case 0: case 0:
*color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); *color = ReadVRAM_TexPal<u16>(texpal + paloffset);
*alpha = 31; *alpha = 31;
break; break;
case 1: case 1:
*color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
*alpha = 31; *alpha = 31;
break; break;
case 2: case 2:
if ((palinfo >> 14) == 1) if ((palinfo >> 14) == 1)
{ {
u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
u32 r0 = color0 & 0x001F; u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0; u32 g0 = color0 & 0x03E0;
@ -700,8 +712,8 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
} }
else if ((palinfo >> 14) == 3) else if ((palinfo >> 14) == 3)
{ {
u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
u32 r0 = color0 & 0x001F; u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0; u32 g0 = color0 & 0x03E0;
@ -717,20 +729,20 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
*color = r | g | b; *color = r | g | b;
} }
else else
*color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 4); *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 4);
*alpha = 31; *alpha = 31;
break; break;
case 3: case 3:
if ((palinfo >> 14) == 2) if ((palinfo >> 14) == 2)
{ {
*color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 6); *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 6);
*alpha = 31; *alpha = 31;
} }
else if ((palinfo >> 14) == 3) else if ((palinfo >> 14) == 3)
{ {
u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
u32 r0 = color0 & 0x001F; u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0; u32 g0 = color0 & 0x03E0;
@ -759,10 +771,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 6: // A5I3 case 6: // A5I3
{ {
vramaddr += ((t * width) + s); vramaddr += ((t * width) + s);
u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
texpal <<= 4; texpal <<= 4;
*color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1)); *color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1));
*alpha = (pixel >> 3); *alpha = (pixel >> 3);
} }
break; break;
@ -770,7 +782,7 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 7: // direct color case 7: // direct color
{ {
vramaddr += (((t * width) + s) << 1); vramaddr += (((t * width) + s) << 1);
*color = GPU::ReadVRAM_Texture<u16>(vramaddr); *color = ReadVRAM_Texture<u16>(vramaddr);
*alpha = (*color & 0x8000) ? 31 : 0; *alpha = (*color & 0x8000) ? 31 : 0;
} }
break; break;
@ -2007,8 +2019,8 @@ void ClearBuffers()
{ {
for (int x = 0; x < 256; x++) for (int x = 0; x < 256; x++)
{ {
u16 val2 = GPU::ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1)); u16 val2 = ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1));
u16 val3 = GPU::ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1)); u16 val3 = ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1));
// TODO: confirm color conversion // TODO: confirm color conversion
u32 r = (val2 << 1) & 0x3E; if (r) r++; u32 r = (val2 << 1) & 0x3E; if (r) r++;
@ -2088,11 +2100,19 @@ void VCount144()
void RenderFrame() void RenderFrame()
{ {
auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture);
auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal);
bool textureChanged = GPU::MakeVRAMFlat_TextureCoherent(textureDirty);
bool texPalChanged = GPU::MakeVRAMFlat_TexPalCoherent(texPalDirty);
FrameIdentical = !(textureChanged || texPalChanged) && RenderFrameIdentical;
if (RenderThreadRunning) if (RenderThreadRunning)
{ {
Platform::Semaphore_Post(Sema_RenderStart); Platform::Semaphore_Post(Sema_RenderStart);
} }
else else if (!FrameIdentical)
{ {
ClearBuffers(); ClearBuffers();
RenderPolygons(false, &RenderPolygonRAM[0], RenderNumPolygons); RenderPolygons(false, &RenderPolygonRAM[0], RenderNumPolygons);
@ -2107,8 +2127,15 @@ void RenderThreadFunc()
if (!RenderThreadRunning) return; if (!RenderThreadRunning) return;
RenderThreadRendering = true; RenderThreadRendering = true;
if (FrameIdentical)
{
Platform::Semaphore_Post(Sema_ScanlineCount, 192);
}
else
{
ClearBuffers(); ClearBuffers();
RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons); RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons);
}
Platform::Semaphore_Post(Sema_RenderDone); Platform::Semaphore_Post(Sema_RenderDone);
RenderThreadRendering = false; RenderThreadRendering = false;

149
src/NonStupidBitfield.h Normal file
View File

@ -0,0 +1,149 @@
#ifndef NONSTUPIDBITFIELD_H
#define NONSTUPIDBITFIELD_H
#include "types.h"
#include <memory.h>
#include <initializer_list>
#include <algorithm>
// like std::bitset but less stupid and optimised for
// our use case (keeping track of memory invalidations)
template <u32 Size>
struct NonStupidBitField
{
static_assert((Size % 8) == 0, "bitfield size must be a multiple of 8");
static const u32 DataLength = Size / 8;
u8 Data[DataLength];
struct Ref
{
NonStupidBitField<Size>& BitField;
u32 Idx;
operator bool()
{
return BitField.Data[Idx >> 3] & (1 << (Idx & 0x7));
}
Ref& operator=(bool set)
{
BitField.Data[Idx >> 3] &= ~(1 << (Idx & 0x7));
BitField.Data[Idx >> 3] |= ((u8)set << (Idx & 0x7));
return *this;
}
};
struct Iterator
{
NonStupidBitField<Size>& BitField;
u32 DataIdx;
u32 BitIdx;
u64 RemainingBits;
u32 operator*() { return DataIdx * 8 + BitIdx; }
bool operator==(const Iterator& other) { return other.DataIdx == DataIdx; }
bool operator!=(const Iterator& other) { return other.DataIdx != DataIdx; }
template <typename T>
void Next()
{
while (RemainingBits == 0 && DataIdx < DataLength)
{
DataIdx += sizeof(T);
RemainingBits = *(T*)&BitField.Data[DataIdx];
}
BitIdx = __builtin_ctzll(RemainingBits);
RemainingBits &= ~(1ULL << BitIdx);
}
Iterator operator++(int)
{
Iterator prev(*this);
++*this;
return prev;
}
Iterator& operator++()
{
if ((DataLength % 8) == 0)
Next<u64>();
else if ((DataLength % 4) == 0)
Next<u32>();
else if ((DataLength % 2) == 0)
Next<u16>();
else
Next<u8>();
return *this;
}
};
NonStupidBitField(u32 start, u32 size)
{
memset(Data, 0, sizeof(Data));
if (size == 0)
return;
u32 roundedStartBit = (start + 7) & ~7;
u32 roundedEndBit = (start + size) & ~7;
if (roundedStartBit != roundedEndBit)
memset(Data + roundedStartBit / 8, 0xFF, (roundedEndBit - roundedStartBit) / 8);
if (start & 0x7)
Data[start >> 3] = 0xFF << (start & 0x7);
if ((start + size) & 0x7)
Data[(start + size) >> 3] = 0xFF >> ((start + size) & 0x7);
}
NonStupidBitField()
{
memset(Data, 0, sizeof(Data));
}
Iterator End()
{
return Iterator{*this, DataLength, 0, 0};
}
Iterator Begin()
{
if ((DataLength % 8) == 0)
return ++Iterator{*this, 0, 0, *(u64*)Data};
else if ((DataLength % 4) == 0)
return ++Iterator{*this, 0, 0, *(u32*)Data};
else if ((DataLength % 2) == 0)
return ++Iterator{*this, 0, 0, *(u16*)Data};
else
return ++Iterator{*this, 0, 0, *Data};
}
Ref operator[](u32 idx)
{
return Ref{*this, idx};
}
NonStupidBitField& operator|=(const NonStupidBitField<Size>& other)
{
for (u32 i = 0; i < DataLength; i++)
{
Data[i] |= other.Data[i];
}
return *this;
}
NonStupidBitField& operator&=(const NonStupidBitField<Size>& other)
{
for (u32 i = 0; i < DataLength; i++)
{
Data[i] &= other.Data[i];
}
return *this;
}
};
#endif

View File

@ -77,7 +77,7 @@ Semaphore* Semaphore_Create();
void Semaphore_Free(Semaphore* sema); void Semaphore_Free(Semaphore* sema);
void Semaphore_Reset(Semaphore* sema); void Semaphore_Reset(Semaphore* sema);
void Semaphore_Wait(Semaphore* sema); void Semaphore_Wait(Semaphore* sema);
void Semaphore_Post(Semaphore* sema); void Semaphore_Post(Semaphore* sema, int count = 1);
struct Mutex; struct Mutex;
Mutex* Mutex_Create(); Mutex* Mutex_Create();

View File

@ -230,9 +230,9 @@ void Semaphore_Wait(Semaphore* sema)
((QSemaphore*) sema)->acquire(); ((QSemaphore*) sema)->acquire();
} }
void Semaphore_Post(Semaphore* sema) void Semaphore_Post(Semaphore* sema, int count)
{ {
((QSemaphore*) sema)->release(); ((QSemaphore*) sema)->release(count);
} }
Mutex* Mutex_Create() Mutex* Mutex_Create()