a handful of tiny optimizations and important bugfixes in gfx3d savestates. also add a MSC_FORCEINLINE macro for people to use instead when they doing like my FORCEINLINES and are thinking about removing them. at least then we know which ones didnt work on gcc.
This commit is contained in:
parent
712b23d512
commit
4832522315
|
@ -529,7 +529,7 @@ static inline void MMU_VRAMmapRefreshBank(const int bank)
|
|||
u8 en = VRAMBankCnt & 0x80;
|
||||
if(!en) return;
|
||||
|
||||
int mst,ofs;
|
||||
int mst,ofs=0;
|
||||
switch(bank) {
|
||||
case VRAM_BANK_A:
|
||||
case VRAM_BANK_B:
|
||||
|
|
|
@ -274,10 +274,10 @@ FORCEINLINE u16 _MMU_read16(const int PROCNUM, const MMU_ACCESS_TYPE AT, const u
|
|||
if(PROCNUM==ARMCPU_ARM9 && AT == MMU_AT_CODE)
|
||||
{
|
||||
if ((addr & 0x0F000000) == 0x02000000)
|
||||
return T1ReadWord( ARM9Mem.MAIN_MEM, addr & _MMU_MAIN_MEM_MASK);
|
||||
return T1ReadWord_guaranteedAligned( ARM9Mem.MAIN_MEM, addr & _MMU_MAIN_MEM_MASK);
|
||||
|
||||
if(addr<0x02000000)
|
||||
return T1ReadWord(ARM9Mem.ARM9_ITCM, addr&0x7FFF);
|
||||
return T1ReadWord_guaranteedAligned(ARM9Mem.ARM9_ITCM, addr&0x7FFF);
|
||||
|
||||
goto dunno;
|
||||
}
|
||||
|
@ -297,16 +297,16 @@ dunno:
|
|||
else return _MMU_ARM7_read16(addr);
|
||||
}
|
||||
|
||||
FORCEINLINE u32 _MMU_read32(int PROCNUM, const MMU_ACCESS_TYPE AT, const u32 addr) {
|
||||
FORCEINLINE u32 _MMU_read32(const int PROCNUM, const MMU_ACCESS_TYPE AT, const u32 addr) {
|
||||
|
||||
//special handling for execution from arm9, since we spend so much time in there
|
||||
if(PROCNUM==ARMCPU_ARM9 && AT == MMU_AT_CODE)
|
||||
{
|
||||
if ( (addr & 0x0F000000) == 0x02000000)
|
||||
return T1ReadLong( ARM9Mem.MAIN_MEM, addr & _MMU_MAIN_MEM_MASK);
|
||||
return T1ReadLong_guaranteedAligned( ARM9Mem.MAIN_MEM, addr & _MMU_MAIN_MEM_MASK);
|
||||
|
||||
if(addr<0x02000000)
|
||||
return T1ReadLong(ARM9Mem.ARM9_ITCM, addr&0x7FFF);
|
||||
return T1ReadLong_guaranteedAligned(ARM9Mem.ARM9_ITCM, addr&0x7FFF);
|
||||
|
||||
goto dunno;
|
||||
}
|
||||
|
|
|
@ -1643,7 +1643,7 @@ u32 NDS_exec(s32 nb)
|
|||
}
|
||||
#ifdef _WIN32
|
||||
#ifdef DEVELOPER
|
||||
DisassemblerTools_Refresh(ARMCPU_ARM9);
|
||||
DisassemblerTools_Refresh<ARMCPU_ARM9>();
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
@ -1691,7 +1691,7 @@ u32 NDS_exec(s32 nb)
|
|||
}
|
||||
#ifdef _WIN32
|
||||
#ifdef DEVELOPER
|
||||
DisassemblerTools_Refresh(ARMCPU_ARM7);
|
||||
DisassemblerTools_Refresh<ARMCPU_ARM7>();
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -365,7 +365,7 @@ template<u32 PROCNUM>
|
|||
static u32
|
||||
armcpu_prefetch()
|
||||
{
|
||||
armcpu_t* armcpu = &ARMPROC;
|
||||
armcpu_t* const armcpu = &ARMPROC;
|
||||
#ifdef GDB_STUB
|
||||
u32 temp_instruction;
|
||||
#endif
|
||||
|
|
|
@ -124,7 +124,9 @@ static float normalTable[1024];
|
|||
#define fix10_2float(v) (((float)((s32)(v))) / (float)(1<<9))
|
||||
|
||||
CACHE_ALIGN u16 gfx3d_convertedScreen[256*192];
|
||||
CACHE_ALIGN u8 gfx3d_convertedAlpha[256*192];
|
||||
|
||||
//this extra *2 is a HACK to salvage some savestates. remove me when the savestate format changes.
|
||||
CACHE_ALIGN u8 gfx3d_convertedAlpha[256*192*2];
|
||||
|
||||
// Matrix stack handling
|
||||
static CACHE_ALIGN MatrixStack mtxStack[4] = {
|
||||
|
@ -1376,7 +1378,7 @@ void gfx3d_glFlush(u32 v)
|
|||
gfx3d.wbuffer = BIT1(v);
|
||||
}
|
||||
|
||||
static int _CDECL_ gfx3d_ysort_compare(const void * elem1, const void * elem2)
|
||||
static int _CDECL_ gfx3d_ysort_compare_old_qsort(const void * elem1, const void * elem2)
|
||||
{
|
||||
int num1 = *(int*)elem1;
|
||||
int num2 = *(int*)elem2;
|
||||
|
@ -1396,6 +1398,23 @@ static int _CDECL_ gfx3d_ysort_compare(const void * elem1, const void * elem2)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool gfx3d_ysort_compare(int num1, int num2)
|
||||
{
|
||||
const POLY &poly1 = polylist->list[num1];
|
||||
const POLY &poly2 = polylist->list[num2];
|
||||
|
||||
if(poly1.maxy > poly2.maxy)
|
||||
return true;
|
||||
else if(poly1.maxy < poly2.maxy)
|
||||
return false;
|
||||
else if(poly1.miny < poly2.miny)
|
||||
return true;
|
||||
else if(poly1.miny > poly2.miny)
|
||||
return false;
|
||||
else
|
||||
return false; //equal should always return false "strict weak ordering"
|
||||
}
|
||||
|
||||
|
||||
void gfx3d_VBlankSignal()
|
||||
{
|
||||
|
@ -1448,16 +1467,21 @@ void gfx3d_VBlankSignal()
|
|||
gfx3d.indexlist[ctr++] = i;
|
||||
}
|
||||
|
||||
//========NOT SURE YET WHETHER I NEED A STABLE SORT========
|
||||
|
||||
//now we have to sort the opaque polys by y-value.
|
||||
//should this be done after clipping??
|
||||
//does this need to be a stable sort???
|
||||
//test case: harvest moon island of happiness character cretor UI
|
||||
qsort(gfx3d.indexlist, opaqueCount, 4, gfx3d_ysort_compare);
|
||||
//std::stable_sort(gfx3d.indexlist, gfx3d.indexlist + opaqueCount, gfx3d_ysort_compare);
|
||||
qsort(gfx3d.indexlist, opaqueCount, 4, gfx3d_ysort_compare_old_qsort);
|
||||
|
||||
if(!gfx3d.sortmode)
|
||||
{
|
||||
//if we are autosorting translucent polys, we need to do this also
|
||||
//TODO - this is unverified behavior. need a test case
|
||||
qsort(gfx3d.indexlist + opaqueCount, polycount - opaqueCount, 4, gfx3d_ysort_compare);
|
||||
//std::stable_sort(gfx3d.indexlist + opaqueCount, gfx3d.indexlist + polycount - opaqueCount, gfx3d_ysort_compare);
|
||||
qsort(gfx3d.indexlist + opaqueCount, polycount - opaqueCount, 4, gfx3d_ysort_compare_old_qsort);
|
||||
}
|
||||
|
||||
//switch to the new lists
|
||||
|
@ -2256,14 +2280,6 @@ SFORMAT SF_GFX3D[]={
|
|||
{ "GMOD", 4, 1, &mode},
|
||||
{ "GMTM", 4,16, mtxTemporal},
|
||||
{ "GMCU", 4,64, mtxCurrent},
|
||||
{ "GM0P", 4, 1, &mtxStack[0].position},
|
||||
{ "GM0M", 4,16, mtxStack[0].matrix},
|
||||
{ "GM1P", 4, 1, &mtxStack[1].position},
|
||||
{ "GM1M", 4,496,mtxStack[1].matrix},
|
||||
{ "GM2P", 4, 1, &mtxStack[2].position},
|
||||
{ "GM2M", 4,496,mtxStack[2].matrix},
|
||||
{ "GM3P", 4, 1, &mtxStack[3].position},
|
||||
{ "GM3M", 4,16, mtxStack[3].matrix},
|
||||
{ "ML4I", 1, 1, &ML4x4ind},
|
||||
{ "ML3I", 1, 1, &ML4x3ind},
|
||||
{ "MM4I", 1, 1, &MM4x4ind},
|
||||
|
@ -2289,8 +2305,8 @@ SFORMAT SF_GFX3D[]={
|
|||
{ "GLPT", 4, 1, &PTind},
|
||||
{ "GLPC", 4, 4, PTcoords},
|
||||
{ "GLF9", 4, 1, &gxFIFO.tail},
|
||||
{ "GLF9", 1, 261, &gxFIFO.cmd},
|
||||
{ "GLF9", 4, 261, &gxFIFO.param},
|
||||
{ "GLF9", 1, 261, &gxFIFO.cmd[0]},
|
||||
{ "GLF9", 4, 261, &gxFIFO.param[0]},
|
||||
{ "GCOL", 1, 4, colorRGB},
|
||||
{ "GLCO", 4, 4, lightColor},
|
||||
{ "GLDI", 4, 4, lightDirection},
|
||||
|
@ -2323,7 +2339,7 @@ SFORMAT SF_GFX3D[]={
|
|||
{ "GTVM", 4, 4, tempVertInfo.map},
|
||||
{ "GTVF", 4, 1, &tempVertInfo.first},
|
||||
{ "G3CS", 2, 256*192, gfx3d_convertedScreen},
|
||||
{ "G3CA", 2, 256*192, gfx3d_convertedAlpha},
|
||||
{ "G3CA", 2, 256*192, gfx3d_convertedAlpha},
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
|
@ -2331,7 +2347,7 @@ SFORMAT SF_GFX3D[]={
|
|||
void gfx3d_savestate(std::ostream* os)
|
||||
{
|
||||
//version
|
||||
write32le(1,os);
|
||||
write32le(2,os);
|
||||
|
||||
//dump the render lists
|
||||
OSWRITE(vertlist->count);
|
||||
|
@ -2340,6 +2356,13 @@ void gfx3d_savestate(std::ostream* os)
|
|||
OSWRITE(polylist->count);
|
||||
for(int i=0;i<polylist->count;i++)
|
||||
polylist->list[i].save(os);
|
||||
|
||||
for(int i=0;i<4;i++)
|
||||
{
|
||||
OSWRITE(mtxStack[i].position);
|
||||
for(int j=0;j<mtxStack[i].size*16+16;j++)
|
||||
OSWRITE(mtxStack[i].matrix[j]);
|
||||
}
|
||||
}
|
||||
|
||||
bool gfx3d_loadstate(std::istream* is, int size)
|
||||
|
@ -2362,7 +2385,7 @@ bool gfx3d_loadstate(std::istream* is, int size)
|
|||
polylist = &polylists[listTwiddle];
|
||||
vertlist = &vertlists[listTwiddle];
|
||||
|
||||
if(version==1)
|
||||
if(version>=1)
|
||||
{
|
||||
OSREAD(vertlist->count);
|
||||
for(int i=0;i<vertlist->count;i++)
|
||||
|
@ -2372,6 +2395,16 @@ bool gfx3d_loadstate(std::istream* is, int size)
|
|||
polylist->list[i].load(is);
|
||||
}
|
||||
|
||||
if(version>=2)
|
||||
{
|
||||
for(int i=0;i<4;i++)
|
||||
{
|
||||
OSREAD(mtxStack[i].position);
|
||||
for(int j=0;j<mtxStack[i].size*16+16;j++)
|
||||
OSREAD(mtxStack[i].matrix[j]);
|
||||
}
|
||||
}
|
||||
|
||||
gfx3d.polylist = &polylists[listTwiddle^1];
|
||||
gfx3d.vertlist = &vertlists[listTwiddle^1];
|
||||
gfx3d.polylist->count=0;
|
||||
|
|
|
@ -232,7 +232,7 @@ extern CACHE_ALIGN const u8 alpha_5bit_to_4bit[32];
|
|||
//these contain the 3d framebuffer converted into the most useful format
|
||||
//they are stored here instead of in the renderers in order to consolidate the buffers
|
||||
extern CACHE_ALIGN u16 gfx3d_convertedScreen[256*192];
|
||||
extern CACHE_ALIGN u8 gfx3d_convertedAlpha[256*192];
|
||||
extern CACHE_ALIGN u8 gfx3d_convertedAlpha[256*192*2]; //see cpp for explanation of illogical *2
|
||||
|
||||
//GE commands:
|
||||
void gfx3d_glViewPort(u32 v);
|
||||
|
|
|
@ -22,16 +22,27 @@
|
|||
#define MEM_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "types.h"
|
||||
|
||||
/* Type 1 Memory, faster for byte (8 bits) accesses */
|
||||
|
||||
static INLINE u8 T1ReadByte(u8 * mem, u32 addr)
|
||||
static INLINE u8 T1ReadByte(u8* const mem, const u32 addr)
|
||||
{
|
||||
return mem[addr];
|
||||
}
|
||||
|
||||
static INLINE u16 T1ReadWord(void * mem, u32 addr)
|
||||
static INLINE u16 T1ReadWord_guaranteedAligned(void* const mem, const u32 addr)
|
||||
{
|
||||
assert((addr&1)==0);
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
return (((u8*)mem)[addr + 1] << 8) | ((u8*)mem)[addr];
|
||||
#else
|
||||
return *(u16*)((u8*)mem + addr);
|
||||
#endif
|
||||
}
|
||||
|
||||
static INLINE u16 T1ReadWord(void* const mem, const u32 addr)
|
||||
{
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
return (((u8*)mem)[addr + 1] << 8) | ((u8*)mem)[addr];
|
||||
|
@ -40,7 +51,19 @@ static INLINE u16 T1ReadWord(void * mem, u32 addr)
|
|||
#endif
|
||||
}
|
||||
|
||||
static INLINE u32 T1ReadLong(u8 * mem, u32 addr)
|
||||
static INLINE u32 T1ReadLong_guaranteedAligned(u8* const mem, const u32 addr)
|
||||
{
|
||||
assert((addr&3)==0);
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
return (mem[addr + 3] << 24 | mem[addr + 2] << 16 |
|
||||
mem[addr + 1] << 8 | mem[addr]);
|
||||
#else
|
||||
return *(u32*)(mem + addr);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static INLINE u32 T1ReadLong(u8* const mem, const u32 addr)
|
||||
{
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
return (mem[addr + 3] << 24 | mem[addr + 2] << 16 |
|
||||
|
@ -50,7 +73,7 @@ static INLINE u32 T1ReadLong(u8 * mem, u32 addr)
|
|||
#endif
|
||||
}
|
||||
|
||||
static INLINE u64 T1ReadQuad(u8 * mem, u32 addr)
|
||||
static INLINE u64 T1ReadQuad(u8* const mem, const u32 addr)
|
||||
{
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
return (u64(mem[addr + 7]) << 56 | u64(mem[addr + 6]) << 48 |
|
||||
|
@ -62,12 +85,12 @@ static INLINE u64 T1ReadQuad(u8 * mem, u32 addr)
|
|||
#endif
|
||||
}
|
||||
|
||||
static INLINE void T1WriteByte(u8 * mem, u32 addr, u8 val)
|
||||
static INLINE void T1WriteByte(u8* const mem, const u32 addr, const u8 val)
|
||||
{
|
||||
mem[addr] = val;
|
||||
}
|
||||
|
||||
static INLINE void T1WriteWord(u8 * mem, u32 addr, u16 val)
|
||||
static INLINE void T1WriteWord(u8* const mem, const u32 addr, const u16 val)
|
||||
{
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
mem[addr + 1] = val >> 8;
|
||||
|
@ -77,7 +100,7 @@ static INLINE void T1WriteWord(u8 * mem, u32 addr, u16 val)
|
|||
#endif
|
||||
}
|
||||
|
||||
static INLINE void T1WriteLong(u8 * mem, u32 addr, u32 val)
|
||||
static INLINE void T1WriteLong(u8* const mem, const u32 addr, const u32 val)
|
||||
{
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
mem[addr + 3] = val >> 24;
|
||||
|
@ -91,7 +114,7 @@ static INLINE void T1WriteLong(u8 * mem, u32 addr, u32 val)
|
|||
|
||||
/* Type 2 Memory, faster for word (16 bits) accesses */
|
||||
|
||||
static INLINE u8 T2ReadByte(u8 * mem, u32 addr)
|
||||
static INLINE u8 T2ReadByte(u8* const mem, const u32 addr)
|
||||
{
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
return mem[addr ^ 1];
|
||||
|
@ -100,12 +123,12 @@ static INLINE u8 T2ReadByte(u8 * mem, u32 addr)
|
|||
#endif
|
||||
}
|
||||
|
||||
static INLINE u16 T2ReadWord(u8 * mem, u32 addr)
|
||||
static INLINE u16 T2ReadWord(u8* const mem, const u32 addr)
|
||||
{
|
||||
return *((u16 *) (mem + addr));
|
||||
}
|
||||
|
||||
static INLINE u32 T2ReadLong(u8 * mem, u32 addr)
|
||||
static INLINE u32 T2ReadLong(u8* const mem, const u32 addr)
|
||||
{
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
return *((u16 *) (mem + addr + 2)) << 16 | *((u16 *) (mem + addr));
|
||||
|
@ -114,7 +137,7 @@ static INLINE u32 T2ReadLong(u8 * mem, u32 addr)
|
|||
#endif
|
||||
}
|
||||
|
||||
static INLINE void T2WriteByte(u8 * mem, u32 addr, u8 val)
|
||||
static INLINE void T2WriteByte(u8* const mem, const u32 addr, const u8 val)
|
||||
{
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
mem[addr ^ 1] = val;
|
||||
|
@ -123,12 +146,12 @@ static INLINE void T2WriteByte(u8 * mem, u32 addr, u8 val)
|
|||
#endif
|
||||
}
|
||||
|
||||
static INLINE void T2WriteWord(u8 * mem, u32 addr, u16 val)
|
||||
static INLINE void T2WriteWord(u8* const mem, const u32 addr, const u16 val)
|
||||
{
|
||||
*((u16 *) (mem + addr)) = val;
|
||||
}
|
||||
|
||||
static INLINE void T2WriteLong(u8 * mem, u32 addr, u32 val)
|
||||
static INLINE void T2WriteLong(u8* const mem, const u32 addr, const u32 val)
|
||||
{
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
*((u16 *) (mem + addr + 2)) = val >> 16;
|
||||
|
|
|
@ -131,11 +131,27 @@
|
|||
#ifndef FORCEINLINE
|
||||
#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
|
||||
#define FORCEINLINE __forceinline
|
||||
#define MSC_FORCEINLINE __forceinline
|
||||
#else
|
||||
#define FORCEINLINE INLINE
|
||||
#define MSC_FORCEINLINE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef _PREFETCH
|
||||
#if (defined(_MSC_VER) || defined(__INTEL_COMPILER)) && !defined(NOSSE2)
|
||||
#include <xmmintrin.h>
|
||||
#include <intrin.h>
|
||||
#define _PREFETCH(X) _mm_prefetch((char*)(X),_MM_HINT_T0);
|
||||
#define _PREFETCHNTA(X) _mm_prefetch((char*)(X),_MM_HINT_NTA);
|
||||
#else
|
||||
#define _PREFETCH(X) {}
|
||||
#define _PREFETCHNTA(X) {}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(__LP64__)
|
||||
typedef unsigned char u8;
|
||||
typedef unsigned short u16;
|
||||
|
|
|
@ -710,7 +710,8 @@ BOOL CALLBACK ViewDisasm_ARM9Proc (HWND hwnd, UINT message, WPARAM wParam, LPARA
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
void DisassemblerTools_Refresh(u8 proc)
|
||||
template<int proc>
|
||||
FORCEINLINE void DisassemblerTools_Refresh()
|
||||
{
|
||||
if (DisViewWnd[proc] == NULL) return;
|
||||
if (proc == 0)
|
||||
|
@ -725,4 +726,8 @@ void DisassemblerTools_Refresh(u8 proc)
|
|||
InvalidateRect(DisViewWnd[proc], NULL, FALSE);
|
||||
DisView7->autogo=false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//these templates needed to be instantiated manually
|
||||
template void DisassemblerTools_Refresh<0>();
|
||||
template void DisassemblerTools_Refresh<1>();
|
|
@ -30,6 +30,6 @@ extern LRESULT CALLBACK ViewDisasm_ARM7BoxProc(HWND hwnd, UINT msg, WPARAM wPara
|
|||
extern BOOL CALLBACK ViewDisasm_ARM9Proc (HWND hwnd, UINT message, WPARAM wParam, LPARAM lParam);
|
||||
extern LRESULT CALLBACK ViewDisasm_ARM9BoxProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam);
|
||||
|
||||
extern void DisassemblerTools_Refresh(u8 proc);
|
||||
template<int proc> void FORCEINLINE DisassemblerTools_Refresh();
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue