diff --git a/desmume/ChangeLog b/desmume/ChangeLog index df7598cae..69b0cda53 100644 --- a/desmume/ChangeLog +++ b/desmume/ChangeLog @@ -5,6 +5,9 @@ - Really minor memory access speed up (mainly added for clarity) [shash] - Added transparency and fixed material alpha support and alpha testing on the 3D core [shash] - Changed how depth initial values are calculated (fixes SM64DS skybox) [shash] + - Added SSE2 version for matrix [CrazyMax] + - Some fixes in core (New SMB don't freeze now) [CrazyMax] + - Some optimizations in code [CrazyMax] Mac OS X port: - Fixed: Filenames and paths with unicode characters now work. [Jeff] - Fixed: Load state from file button works again. [Jeff] @@ -17,6 +20,8 @@ Windows port: - Removed the bug report link with a define, to avoid reports from betas/external builds [shash] - Added the version on window bar to recognize versions from screenshots [shash] + - Changed graphics render core to DirectDraw (work fastest) [CrazyMax] + - Some fixes in 3D core OGL (fixed textures) [CrazyMax] 0.7.3 -> 0.8 Cocoa: diff --git a/desmume/src/FIFO.c b/desmume/src/FIFO.c index c86017f71..5307ff47a 100644 --- a/desmume/src/FIFO.c +++ b/desmume/src/FIFO.c @@ -25,15 +25,8 @@ void FIFOInit(FIFO * fifo) { - u32 i; - - fifo->begin = 0; - fifo->end = 0; - for(i = 0; i<0x8000; ++i) - fifo->data[i] = 0; - fifo->full = FALSE; - fifo->empty = TRUE; - fifo->error = FALSE; + memset(fifo,0,sizeof(FIFO)); + fifo->empty = TRUE; } void FIFOAdd(FIFO * fifo, u32 v) diff --git a/desmume/src/FIFO.h b/desmume/src/FIFO.h index 86e282902..361ee2e1f 100644 --- a/desmume/src/FIFO.h +++ b/desmume/src/FIFO.h @@ -44,6 +44,18 @@ void FIFOInit(FIFO * fifo); void FIFOAdd(FIFO * fifo, u32 v); u32 FIFOValue(FIFO * fifo); +//================== 3D GFX FIFO +typedef struct{ + u32 hits[640]; + u32 hits_count; + u32 empty; + u32 half; + u32 full; + u32 begin; + u32 end; + u32 irq; +} GFXFIFO; + #ifdef __cplusplus } #endif diff --git a/desmume/src/MMU.c b/desmume/src/MMU.c index 32c704b32..d59785478 100644 --- a/desmume/src/MMU.c +++ b/desmume/src/MMU.c @@ -174,6 +174,7 @@ void MMU_Init(void) { int i; LOG("MMU init\n"); + printlog("MMU init\n"); memset(&MMU, 0, sizeof(MMU_struct)); @@ -191,6 +192,7 @@ void MMU_Init(void) { MMU.MMU_MASK[1] = MMU_ARM7_MEM_MASK; MMU.ITCMRegion = 0x00800000; + //MMU.ITCMRegion = 0x00000000; MMU.MMU_WAIT16[0] = MMU_ARM9_WAIT16; MMU.MMU_WAIT16[1] = MMU_ARM7_WAIT16; @@ -199,6 +201,8 @@ void MMU_Init(void) { for(i = 0;i < 16;i++) FIFOInit(MMU.fifos + i); + memset(&MMU.gfxfifo, 0, sizeof(GFXFIFO)); + MMU.gfxfifo.empty=TRUE; MMU.gfxfifo.half=TRUE; mc_init(&MMU.fw, MC_TYPE_FLASH); /* init fw device */ mc_alloc(&MMU.fw, NDS_FW_SIZE_V1); @@ -260,6 +264,7 @@ void MMU_clearMem() MMU.DTCMRegion = 0; MMU.ITCMRegion = 0x00800000; + //MMU.ITCMRegion = 0x00000000; memset(MMU.timer, 0, sizeof(u16) * 2 * 4); memset(MMU.timerMODE, 0, sizeof(s32) * 2 * 4); @@ -566,7 +571,13 @@ u8 FASTCALL MMU_read8(u32 proc, u32 adr) return ARM9Mem.ARM9_DTCM[adr&0x3FFF]; } #endif - + + if(proc==ARMCPU_ARM9 && adr<0x02000000) + { + //printlog("MMU ITCM (08) Read %08X: %08X\n", adr, T1ReadByte(ARM9Mem.ARM9_ITCM, adr&0x7FFF)); + return T1ReadByte(ARM9Mem.ARM9_ITCM, adr&0x7FFF); + } + // CFlash reading, Mic if ((adr>=0x9000000)&&(adr<0x9900000)) return (unsigned char)cflash_read(adr); @@ -596,6 +607,11 @@ u16 FASTCALL MMU_read16(u32 proc, u32 adr) return T1ReadWord(ARM9Mem.ARM9_DTCM, adr & 0x3FFF); } #endif + if(proc==ARMCPU_ARM9 && adr<0x02000000) + { + //printlog("MMU ITCM (16) Read %08X: %08X\n", adr, T1ReadWord(ARM9Mem.ARM9_ITCM, adr&0x7FFF)); + return T1ReadWord(ARM9Mem.ARM9_ITCM, adr&0x7FFF); + } // CFlash reading, Mic if ((adr>=0x08800000)&&(adr<0x09900000)) @@ -666,6 +682,12 @@ u32 FASTCALL MMU_read32(u32 proc, u32 adr) return T1ReadLong(ARM9Mem.ARM9_DTCM, adr & 0x3FFF); } #endif + + if(proc==ARMCPU_ARM9 && adr<0x02000000) + { + //printlog("MMU ITCM (32) Read %08X: %08X\n", adr, T1ReadLong(ARM9Mem.ARM9_ITCM, adr&0x7FFF)); + return T1ReadLong(ARM9Mem.ARM9_ITCM, adr&0x7FFF); + } // CFlash reading, Mic if ((adr>=0x9000000)&&(adr<0x9900000)) @@ -681,15 +703,21 @@ u32 FASTCALL MMU_read32(u32 proc, u32 adr) // This is hacked due to the only current 3D core case 0x04000600: { + /* u32 fifonum = IPCFIFO+proc; u32 gxstat = (MMU.fifos[fifonum].empty<<26) | (1<<25) | (MMU.fifos[fifonum].full<<24) | /*((NDS_nbpush[0]&1)<<13) | ((NDS_nbpush[2]&0x1F)<<8) |*/ - 2; + // 2; + u32 gxstat =(2|(MMU.gfxfifo.hits_count<<16)| + (MMU.gfxfifo.full<<24)| + (MMU.gfxfifo.empty<<25)| + (MMU.gfxfifo.half<<26)| + (MMU.gfxfifo.irq<<30)); - LOG ("GXSTAT: 0x%X", gxstat); + //printlog("GXSTAT: 0x%X\n", gxstat); return gxstat; } @@ -818,6 +846,8 @@ u32 FASTCALL MMU_read32(u32 proc, u32 adr) /* Returns data from memory */ return T1ReadLong(MMU.MMU_MEM[proc][(adr >> 20) & 0xFF], adr & MMU.MMU_MASK[proc][(adr >> 20) & 0xFF]); } + +#define OFS(i) ((i>>3)&3) void FASTCALL MMU_write8(u32 proc, u32 adr, u8 val) { @@ -829,7 +859,11 @@ void FASTCALL MMU_write8(u32 proc, u32 adr, u8 val) return ; } #endif - + if(proc==ARMCPU_ARM9 && adr<0x02000000) + { + //printlog("MMU ITCM (08) Write %08X: %08X\n", adr, val); + return T1WriteByte(ARM9Mem.ARM9_ITCM, adr&0x7FFF, val); + } // CFlash writing, Mic if ((adr>=0x9000000)&&(adr<0x9900000)) { cflash_write(adr,val); @@ -979,40 +1013,66 @@ void FASTCALL MMU_write8(u32 proc, u32 adr, u8 val) case REG_VRAMCNTD: if(proc == ARMCPU_ARM9) { + + MMU_VRAMWriteBackToLCD(0) ; MMU_VRAMWriteBackToLCD(1) ; MMU_VRAMWriteBackToLCD(2) ; MMU_VRAMWriteBackToLCD(3) ; + if (!(val&0x80)) + { + u8 tmp=T1ReadByte(ARM9Mem.ARM9_REG, 0x240); + switch(tmp & 7) + { + case 0: + memset(ARM9Mem.ARM9_LCD,0,0x20000); + break; + case 1: + memset(ARM9Mem.ARM9_ABG+(0x20000*OFS(tmp)),0,0x20000); + break; + case 2: + memset(ARM9Mem.ARM9_AOBJ+(0x20000*(OFS(tmp)&1)),0,0x20000); + //memset(ARM9Mem.ARM9_ABG+0x40000,0,0x20000); + break; + case 3: + memset(ARM9Mem.textureSlotAddr[OFS(tmp)], 0, 0x20000); + break; + } + } else switch(val & 0x1F) { case 1 : MMU.vram_mode[adr-REG_VRAMCNTA] = 0; // BG-VRAM + memset(ARM9Mem.ARM9_ABG,0,0x20000); //MMU.vram_offset[0] = ARM9Mem.ARM9_ABG+(0x20000*0); // BG-VRAM break; case 1 | (1 << 3) : MMU.vram_mode[adr-REG_VRAMCNTA] = 1; // BG-VRAM + memset(ARM9Mem.ARM9_ABG+0x20000,0,0x20000); //MMU.vram_offset[0] = ARM9Mem.ARM9_ABG+(0x20000*1); // BG-VRAM break; case 1 | (2 << 3) : MMU.vram_mode[adr-REG_VRAMCNTA] = 2; // BG-VRAM + memset(ARM9Mem.ARM9_ABG+0x40000,0,0x20000); //MMU.vram_offset[0] = ARM9Mem.ARM9_ABG+(0x20000*2); // BG-VRAM break; case 1 | (3 << 3) : MMU.vram_mode[adr-REG_VRAMCNTA] = 3; // BG-VRAM + memset(ARM9Mem.ARM9_ABG+0x60000,0,0x20000); //MMU.vram_offset[0] = ARM9Mem.ARM9_ABG+(0x20000*3); // BG-VRAM break; - case 0: /* mapped to lcd */ + case 0: // mapped to lcd MMU.vram_mode[adr-REG_VRAMCNTA] = 4 | (adr-REG_VRAMCNTA) ; break ; } - /* - * FIXME: simply texture slot handling - * This is a first stab and is not correct. It does - * not handle a VRAM texture slot becoming - * unconfigured. - * Revisit all of VRAM control handling for future - * release? - */ + // + // FIXME: simply texture slot handling + // This is a first stab and is not correct. It does + // not handle a VRAM texture slot becoming + // unconfigured. + // Revisit all of VRAM control handling for future + // release? + // if ( val & 0x80) { if ( (val & 0x7) == 3) { int slot_index = (val >> 3) & 0x3; @@ -1024,6 +1084,7 @@ void FASTCALL MMU_write8(u32 proc, u32 adr, u8 val) MMU_VRAMReloadFromLCD(adr-REG_VRAMCNTA,val) ; } break; + case REG_VRAMCNTE : if(proc == ARMCPU_ARM9) { @@ -1204,7 +1265,12 @@ void FASTCALL MMU_write16(u32 proc, u32 adr, u16 val) return; } #endif - + if(proc==ARMCPU_ARM9 && adr<0x02000000) + { + //printlog("MMU ITCM (16) Write %08X: %08X\n", adr, val); + return T1WriteWord(ARM9Mem.ARM9_ITCM, adr&0x7FFF, val); + } + // CFlash writing, Mic if ((adr>=0x08800000)&&(adr<0x09900000)) { @@ -1837,6 +1903,11 @@ void FASTCALL MMU_write32(u32 proc, u32 adr, u32 val) return ; } #endif + if(proc==ARMCPU_ARM9 && adr<0x02000000) + { + //printlog("MMU ITCM (32) Write %08X: %08X\n", adr, val); + return T1WriteLong(ARM9Mem.ARM9_ITCM, adr&0x7FFF, val); + } // CFlash writing, Mic if ((adr>=0x9000000)&&(adr<0x9900000)) { @@ -2267,6 +2338,11 @@ void FASTCALL MMU_write32(u32 proc, u32 adr, u32 val) return; } + case 0x04000600: + { + MMU.gfxfifo.irq=(val>>30)&3; + return; + } case REG_DISPA_WININ: { if(proc == ARMCPU_ARM9) diff --git a/desmume/src/MMU.h b/desmume/src/MMU.h index 8cdbacc43..0a957d3cc 100644 --- a/desmume/src/MMU.h +++ b/desmume/src/MMU.h @@ -71,7 +71,8 @@ typedef struct { u8 ARM9_RW_MODE; - FIFO fifos[16]; + FIFO fifos[16]; + GFXFIFO gfxfifo; u32 * MMU_WAIT16[2]; u32 * MMU_WAIT32[2]; diff --git a/desmume/src/NDSSystem.c b/desmume/src/NDSSystem.c index d6c712ad2..021ec6f99 100644 --- a/desmume/src/NDSSystem.c +++ b/desmume/src/NDSSystem.c @@ -1467,6 +1467,14 @@ NDS_exec(s32 nb, BOOL force) MMU.CheckDMAs &= ~(1<<(3+(1<<2))); } } + + if(MMU.reg_IE[0]&(1<<21)) + { + if(MMU.gfxfifo.irq==0) return nds.cycles; + if(MMU.gfxfifo.irq==3) return nds.cycles; + if(MMU.gfxfifo.irq==1 && MMU.gfxfifo.half) NDS_makeARM9Int(21); + if(MMU.gfxfifo.irq==2 && MMU.gfxfifo.empty) NDS_makeARM9Int(21); + } if((MMU.reg_IF[0]&MMU.reg_IE[0]) && (MMU.reg_IME[0])) { diff --git a/desmume/src/debug.h b/desmume/src/debug.h index 2508f0f0a..f6a432a0f 100644 --- a/desmume/src/debug.h +++ b/desmume/src/debug.h @@ -53,8 +53,12 @@ void LogStop(void); #ifdef DEBUG #define LOG(...) DebugPrintf(MainLog, __FILE__, __LINE__, __VA_ARGS__) #else +#if defined(WIN32) && defined(BETAVERSION) && defined(OLD_LOG) +#define LOG(...) printlog(__VA_ARGS__) +#else #define LOG(...) #endif +#endif #ifdef GPUDEBUG #define GPULOG(...) DebugPrintf(MainLog, __FILE__, __LINE__, __VA_ARGS__) diff --git a/desmume/src/matrix.c b/desmume/src/matrix.c index 555d62b15..5f56e0bcd 100644 --- a/desmume/src/matrix.c +++ b/desmume/src/matrix.c @@ -23,13 +23,37 @@ #include #include "matrix.h" -void MatrixInit (float *matrix) +void __fastcall MatrixInit (float *matrix) { memset (matrix, 0, sizeof(float)*16); - matrix[0] = matrix[5] = matrix[10] = matrix[15] = 1.f; } +#ifdef SSE2 +void __fastcall MatrixIdentity (float *matrix) //============== TODO +{ + memset (matrix, 0, sizeof(float)*16); + matrix[0] = matrix[5] = matrix[10] = matrix[15] = 1.f; +} + +float __fastcall MatrixGetMultipliedIndex (int index, float *matrix, float *rightMatrix) +{ + int iMod = index%4, iDiv = (index>>2)<<2; + + return (matrix[iMod ]*rightMatrix[iDiv ])+(matrix[iMod+ 4]*rightMatrix[iDiv+1])+ + (matrix[iMod+8]*rightMatrix[iDiv+2])+(matrix[iMod+12]*rightMatrix[iDiv+3]); +} + +void __fastcall MatrixSet (float *matrix, int x, int y, float value) // TODO +{ + matrix [x+(y<<2)] = value; +} + +void __fastcall MatrixCopy (float *matrixDST, float *matrixSRC) +{ + memcpy (matrixDST, matrixSRC, sizeof(float)*16); +} +#else void MatrixMultVec4x4 (float *matrix, float *vecPtr) { float x = vecPtr[0]; @@ -85,45 +109,6 @@ void MatrixMultiply (float *matrix, float *rightMatrix) memcpy (matrix, tmpMatrix, sizeof(float)*16); } -/* -void MatrixMulti (float* right) -{ - float tmpMatrix[16]; - - tmpMatrix[0] = (matrix[0]*right[0])+(matrix[4]*right[1])+(matrix[8]*right[2])+(matrix[12]*right[3]); - tmpMatrix[1] = (matrix[1]*right[0])+(matrix[5]*right[1])+(matrix[9]*right[2])+(matrix[13]*right[3]); - tmpMatrix[2] = (matrix[2]*right[0])+(matrix[6]*right[1])+(matrix[10]*right[2])+(matrix[14]*right[3]); - tmpMatrix[3] = (matrix[3]*right[0])+(matrix[7]*right[1])+(matrix[11]*right[2])+(matrix[15]*right[3]); - - tmpMatrix[4] = (matrix[0]*right[4])+(matrix[4]*right[5])+(matrix[8]*right[6])+(matrix[12]*right[7]); - tmpMatrix[5] = (matrix[1]*right[4])+(matrix[5]*right[5])+(matrix[9]*right[6])+(matrix[13]*right[7]); - tmpMatrix[6] = (matrix[2]*right[4])+(matrix[6]*right[5])+(matrix[10]*right[6])+(matrix[14]*right[7]); - tmpMatrix[7] = (matrix[3]*right[4])+(matrix[7]*right[5])+(matrix[11]*right[6])+(matrix[15]*right[7]); - - tmpMatrix[8] = (matrix[0]*right[8])+(matrix[4]*right[9])+(matrix[8]*right[10])+(matrix[12]*right[11]); - tmpMatrix[9] = (matrix[1]*right[8])+(matrix[5]*right[9])+(matrix[9]*right[10])+(matrix[13]*right[11]); - tmpMatrix[10] = (matrix[2]*right[8])+(matrix[6]*right[9])+(matrix[10]*right[10])+(matrix[14]*right[11]); - tmpMatrix[11] = (matrix[3]*right[8])+(matrix[7]*right[9])+(matrix[11]*right[10])+(matrix[15]*right[11]); - - tmpMatrix[12] = (matrix[0]*right[12])+(matrix[4]*right[13])+(matrix[8]*right[14])+(matrix[12]*right[15]); - tmpMatrix[13] = (matrix[1]*right[12])+(matrix[5]*right[13])+(matrix[9]*right[14])+(matrix[13]*right[15]); - tmpMatrix[14] = (matrix[2]*right[12])+(matrix[6]*right[13])+(matrix[10]*right[14])+(matrix[14]*right[15]); - tmpMatrix[15] = (matrix[3]*right[12])+(matrix[7]*right[13])+(matrix[11]*right[14])+(matrix[15]*right[15]); - - memcpy (matrix, tmpMatrix, sizeof(float)*16); -} - - -float* Matrix::Get (void) -{ - return matrix; -} - -float MatrixGet (float *matrix, int index) -{ - return matrix[index]; -} -*/ float MatrixGetMultipliedIndex (int index, float *matrix, float *rightMatrix) { @@ -137,12 +122,7 @@ void MatrixSet (float *matrix, int x, int y, float value) { matrix [x+(y<<2)] = value; } -/* -void Matrix::Set (int pos, float value) -{ - matrix [pos] = value; -} -*/ + void MatrixCopy (float *matrixDST, float *matrixSRC) { memcpy (matrixDST, matrixSRC, sizeof(float)*16); @@ -173,16 +153,7 @@ void MatrixScale (float *matrix, float *ptr) matrix[10] *= ptr[2]; matrix[11] *= ptr[2]; } -/* -void Matrix::Set (float a11, float a21, float a31, float a41, - float a12, float a22, float a32, float a42, - float a13, float a23, float a33, float a43, - float a14, float a24, float a34, float a44) -{ -} -*/ - - +#endif //----------------------------------------- void MatrixStackInit (MatrixStack *stack) diff --git a/desmume/src/matrix.h b/desmume/src/matrix.h index 05c0786e2..d7b426ad6 100644 --- a/desmume/src/matrix.h +++ b/desmume/src/matrix.h @@ -21,14 +21,40 @@ #ifndef MATRIX_H #define MATRIX_H +#ifdef SSE2 + #include + #include + //typedef __declspec(align(16)) float gMatrix[4][4]; + //typedef float gMatrix[4][4]; + typedef float gMatrix[16]; +#endif + typedef struct MatrixStack { +#ifdef SSE2 + //gMatrix *matrix; float *matrix; +#else + float *matrix; +#endif int position; int size; } MatrixStack; -void MatrixInit (float *matrix); +#ifdef SSE2 +void __fastcall MatrixInit (float *matrix); +extern void __fastcall MatrixMultVec3x3 (const gMatrix matrix, const gMatrix vecPtr); +extern void __fastcall MatrixMultVec4x4 (const gMatrix matrix, const gMatrix vecPtr); +void __fastcall MatrixIdentity (float *matrix); +extern void __fastcall MatrixMultiply (const gMatrix matrix, const gMatrix rightMatrix); +float __fastcall MatrixGetMultipliedIndex (int index, float *matrix, float *rightMatrix); +void __fastcall MatrixSet (float *matrix, int x, int y, float value); +void __fastcall MatrixCopy (const gMatrix matrixDST, const gMatrix matrixSRC); +extern void __fastcall MatrixTranslate (const gMatrix matrix, const gMatrix ptr); +extern void __fastcall MatrixScale (const gMatrix matrix, const gMatrix ptr); +void __fastcall MatrixScale (const gMatrix matrix, const gMatrix ptr); +#else +void __fastcall MatrixInit (float *matrix); void MatrixMultVec3x3 (float *matrix, float *vecPtr); void MatrixMultVec4x4 (float *matrix, float *vecPtr); void MatrixIdentity (float *matrix); @@ -38,6 +64,7 @@ void MatrixSet (float *matrix, int x, int y, float value); void MatrixCopy (float *matrixDST, float *matrixSRC); void MatrixTranslate (float *matrix, float *ptr); void MatrixScale (float *matrix, float *ptr); +#endif void MatrixStackInit (MatrixStack *stack); void MatrixStackSetMaxSize (MatrixStack *stack, int size); diff --git a/desmume/src/windows/OGLRender.c b/desmume/src/windows/OGLRender.c index d667a90e8..6c1213d10 100644 --- a/desmume/src/windows/OGLRender.c +++ b/desmume/src/windows/OGLRender.c @@ -27,6 +27,7 @@ #define WIN32_LEAN_AND_MEAN #include #include + #include #else #include #include @@ -53,9 +54,9 @@ static float* normalTable = NULL; static int numVertex = 0; // Matrix stack handling -static MatrixStack mtxStack[4]; -static float mtxCurrent [4][16]; -static float mtxTemporal[16]; +static __declspec(align(16)) MatrixStack mtxStack[4]; +static __declspec(align(16)) float mtxCurrent [4][16]; +static __declspec(align(16)) float mtxTemporal[16]; // Indexes for matrix loading/multiplication static char ML4x4ind = 0; @@ -65,14 +66,14 @@ static char MM4x3_c = 0, MM4x3_l = 0; static char MM3x3_c = 0, MM3x3_l = 0; // Data for vertex submission -static float coord[3] = {0.0, 0.0, 0.0}; +static __declspec(align(16)) float coord[4] = {0.0, 0.0, 0.0, 0.0}; static char coordind = 0; // Data for basic transforms -static float trans[3] = {0.0, 0.0, 0.0}; +static __declspec(align(16)) float trans[4] = {0.0, 0.0, 0.0, 0.0}; static char transind = 0; -static float scale[3] = {0.0, 0.0, 0.0}; +static __declspec(align(16)) float scale[4] = {0.0, 0.0, 0.0, 0.0}; static char scaleind = 0; static const unsigned short polyType[4] = {GL_TRIANGLES, GL_QUADS, GL_TRIANGLE_STRIP, GL_QUAD_STRIP}; @@ -108,6 +109,9 @@ static char beginCalled = 0; static unsigned int vtxFormat; static unsigned int textureFormat=0, texturePalette=0; static unsigned int lastTextureFormat=0, lastTexturePalette=0; +static u32 sizeX = 1; +static u32 sizeY = 1; +static u8 currentIDtexture=0; static int diffuse[4] = {0}, ambient[4] = {0}, @@ -124,6 +128,23 @@ LightInformation g_lightInfo[4] = { 0 }; #ifndef DESMUME_COCOA extern HWND hwnd; + +int CheckHardwareSupport(HDC hdc) +{ + int PixelFormat = GetPixelFormat(hdc); + PIXELFORMATDESCRIPTOR pfd; + + DescribePixelFormat(hdc,PixelFormat,sizeof(PIXELFORMATDESCRIPTOR),&pfd); + if ((pfd.dwFlags & PFD_GENERIC_FORMAT) && !(pfd.dwFlags & PFD_GENERIC_ACCELERATED)) + return 0; // Software acceleration OpenGL + + else if ((pfd.dwFlags & PFD_GENERIC_FORMAT) && (pfd.dwFlags & PFD_GENERIC_ACCELERATED)) + return 1; // Half hardware acceleration OpenGL (MCD driver) + + else if ( !(pfd.dwFlags & PFD_GENERIC_FORMAT) && !(pfd.dwFlags & PFD_GENERIC_ACCELERATED)) + return 2; // Full hardware acceleration OpenGL + return -1; // check error +} #endif char NDS_glInit(void) @@ -135,6 +156,8 @@ char NDS_glInit(void) HGLRC hRC = NULL; int pixelFormat; PIXELFORMATDESCRIPTOR pfd; + int res; + char *opengl_modes[3]={"software","half hardware (MCD driver)","hardware"}; oglDC = GetDC (hwnd); @@ -161,8 +184,15 @@ char NDS_glInit(void) if(!wglMakeCurrent(oglDC, hRC)) return 0; + res=CheckHardwareSupport(oglDC); + if (res>=0&&res<=2) + printlog("OpenGL mode: %s\n",opengl_modes[res]); + else + printlog("OpenGL mode: uknown\n"); #endif + currentIDtexture=0; + glClearColor (0.f, 0.f, 0.f, 1.f); glColor3f (1.f, 1.f, 1.f); @@ -488,178 +518,181 @@ void NDS_glMultMatrix4x4(signed long v) MatrixIdentity (mtxTemporal); } -static __inline void SetupTexture (unsigned int format, unsigned int palette) +#define CHECKSLOT txt_slot_current_size--;\ + if (txt_slot_current_size<=0)\ + {\ + txt_slot_current++;\ + adr=ARM9Mem.textureSlotAddr[txt_slot_current];\ + adr-=txt_slot_size;\ + txt_slot_size=(txt_slot_current_size=0x020000);\ + } + +#define RGB16TO32(col,alpha) (((alpha)<<24) | ((((col) & 0x7C00)>>7)<<16) | ((((col) & 0x3E0)>>2)<<8) | (((col) & 0x1F)<<3)) + +static __inline u32 *SetupTexture (unsigned int format, unsigned int palette) { - if(format == 0) - return; - else + unsigned short *pal = NULL; + unsigned int mode = (unsigned short)((format>>26)&0x7); + unsigned int imageSize = sizeX*sizeY; + unsigned int paletteSize = 0; + unsigned int palZeroTransparent = (1-((format>>29)&1))*255; // shash: CONVERT THIS TO A TABLE :) + unsigned int x=0, y=0; + unsigned char * dst = texMAP; + u64 txt_slot_current_size=0x020000-((format & 0x3FFF)<<3); + u64 txt_slot_size=txt_slot_size; + u64 txt_slot_current=(format>>14)&3; + unsigned char * adr = (unsigned char *)(ARM9Mem.textureSlotAddr[txt_slot_current]+((format&0x3FFF)<<3)); + //printlog("Format: %04X\n",(format >> 14 & 3)); + //printlog("Texture %08X: width=%d, height=%d\n", format, sizeX, sizeY); + + switch(mode) { - unsigned short *pal = NULL; - unsigned int sizeX = (1<<(((format>>20)&0x7)+3)); - unsigned int sizeY = (1<<(((format>>23)&0x7)+3)); - unsigned int mode = (unsigned short)((format>>26)&0x7); - unsigned char * adr = (unsigned char *)(ARM9Mem.ARM9_LCD + ((format&0xFFFF)<<3)); - //unsigned short param = (unsigned short)((format>>30)&0xF); - //unsigned short param2 = (unsigned short)((format>>16)&0xF); - unsigned int imageSize = sizeX*sizeY; - unsigned int paletteSize = 0; - unsigned int palZeroTransparent = (1-((format>>29)&1))*255; // shash: CONVERT THIS TO A TABLE :) - unsigned int x=0, y=0; - unsigned char * dst = texMAP; - //unsigned char *src = NULL; - - if (mode == 0) - glDisable (GL_TEXTURE_2D); - else - glEnable (GL_TEXTURE_2D); - - switch(mode) + case 1: { - case 1: + paletteSize = 256; + pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4)); + break; + } + case 2: + { + paletteSize = 4; + pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<3)); + imageSize >>= 2; + break; + } + case 3: + { + paletteSize = 16; + pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4)); + imageSize >>= 1; + break; + } + case 4: + { + paletteSize = 256; + pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4)); + break; + } + case 5: + { + paletteSize = 0; + pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4)); + break; + } + case 6: + { + paletteSize = 256; + pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4)); + break; + } + case 7: + { + paletteSize = 0; + break; + } + } + + //printlog("Texture mode %02i: x=%04d, colors=%04X, index=%04d\n",mode,x,pal[adr[x]],adr[x]); + switch(mode) + { + case 1: + { + for(x = 0; x < imageSize; x++, dst += 4) { - paletteSize = 256; - pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4)); - break; + unsigned short c = pal[adr[x]&31], alpha = (adr[x]>>5); + dst[0] = (unsigned char)((c & 0x1F)<<3); + dst[1] = (unsigned char)((c & 0x3E0)>>2); + dst[2] = (unsigned char)((c & 0x7C00)>>7); + dst[3] = ((alpha<<2)+(alpha>>1))<<3; + CHECKSLOT; } - case 2: + break; + } + case 2: + { + for(x = 0; x < imageSize; ++x) { - paletteSize = 4; - pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<3)); - imageSize >>= 2; - break; - } - case 3: - { - paletteSize = 16; - pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4)); - imageSize >>= 1; - break; - } - case 4: - { - paletteSize = 256; - pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4)); - break; - } - case 5: - { - paletteSize = 0; - pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4)); - break; - } - case 6: - { - paletteSize = 256; - pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4)); - break; - } - case 7: - { - paletteSize = 0; - break; + unsigned short c = pal[(adr[x])&0x3]; + dst[0] = ((c & 0x1F)<<3); + dst[1] = ((c & 0x3E0)>>2); + dst[2] = ((c & 0x7C00)>>7); + dst[3] = ((adr[x]&3) == 0) ? palZeroTransparent : 255;//(c>>15)*255; + dst += 4; + + c = pal[((adr[x])>>2)&0x3]; + dst[0] = ((c & 0x1F)<<3); + dst[1] = ((c & 0x3E0)>>2); + dst[2] = ((c & 0x7C00)>>7); + dst[3] = (((adr[x]>>2)&3) == 0) ? palZeroTransparent : 255;//(c>>15)*255; + dst += 4; + + c = pal[((adr[x])>>4)&0x3]; + dst[0] = ((c & 0x1F)<<3); + dst[1] = ((c & 0x3E0)>>2); + dst[2] = ((c & 0x7C00)>>7); + dst[3] = (((adr[x]>>4)&3) == 0) ? palZeroTransparent : 255;//(c>>15)*255; + dst += 4; + + c = pal[(adr[x])>>6]; + dst[0] = ((c & 0x1F)<<3); + dst[1] = ((c & 0x3E0)>>2); + dst[2] = ((c & 0x7C00)>>7); + dst[3] = (((adr[x]>>6)&3) == 0) ? palZeroTransparent : 255;//(c>>15)*255; + dst += 4; + CHECKSLOT; } } - - switch(mode) + break; + case 3: { - case 1: + for(x = 0; x < imageSize; x++) { - for(x = 0; x < imageSize; x++, dst += 4) - { - unsigned short c = pal[adr[x]&31], alpha = (adr[x]>>5); - dst[0] = (unsigned char)((c & 0x1F)<<3); - dst[1] = (unsigned char)((c & 0x3E0)>>2); - dst[2] = (unsigned char)((c & 0x7C00)>>7); - dst[3] = ((alpha<<2)+(alpha>>1))<<3; - } + unsigned short c = pal[adr[x]&0xF]; + dst[0] = ((c & 0x1F)<<3); + dst[1] = ((c & 0x3E0)>>2); + dst[2] = ((c & 0x7C00)>>7); + dst[3] = (((adr[x])&0xF) == 0) ? palZeroTransparent : 255;//(c>>15)*255; + dst += 4; - break; + c = pal[((adr[x])>>4)]; + dst[0] = ((c & 0x1F)<<3); + dst[1] = ((c & 0x3E0)>>2); + dst[2] = ((c & 0x7C00)>>7); + dst[3] = (((adr[x]>>4)&0xF) == 0) ? palZeroTransparent : 255;//(c>>15)*255; + dst += 4; + CHECKSLOT; } - case 2: + } + break; + + case 4: //===================== ? + { + //printlog("texture mode 4"); + for(x = 0; x < imageSize; ++x, dst += 4) { - for(x = 0; x < imageSize; ++x) - { - unsigned short c = pal[(adr[x])&0x3]; - dst[0] = ((c & 0x1F)<<3); - dst[1] = ((c & 0x3E0)>>2); - dst[2] = ((c & 0x7C00)>>7); - dst[3] = ((adr[x]&3) == 0) ? palZeroTransparent : 255;//(c>>15)*255; - dst += 4; - - c = pal[((adr[x])>>2)&0x3]; - dst[0] = ((c & 0x1F)<<3); - dst[1] = ((c & 0x3E0)>>2); - dst[2] = ((c & 0x7C00)>>7); - dst[3] = (((adr[x]>>2)&3) == 0) ? palZeroTransparent : 255;//(c>>15)*255; - dst += 4; - - c = pal[((adr[x])>>4)&0x3]; - dst[0] = ((c & 0x1F)<<3); - dst[1] = ((c & 0x3E0)>>2); - dst[2] = ((c & 0x7C00)>>7); - dst[3] = (((adr[x]>>4)&3) == 0) ? palZeroTransparent : 255;//(c>>15)*255; - dst += 4; - - c = pal[(adr[x])>>6]; - dst[0] = ((c & 0x1F)<<3); - dst[1] = ((c & 0x3E0)>>2); - dst[2] = ((c & 0x7C00)>>7); - dst[3] = (((adr[x]>>6)&3) == 0) ? palZeroTransparent : 255;//(c>>15)*255; - dst += 4; - } + unsigned short c = pal[adr[x]]; + dst[0] = (unsigned char)((c & 0x1F)<<3); + dst[1] = (unsigned char)((c & 0x3E0)>>2); + dst[2] = (unsigned char)((c & 0x7C00)>>7); + dst[3] = (adr[x] == 0) ? palZeroTransparent : 255; + CHECKSLOT; } - break; - case 3: - { - for(x = 0; x < imageSize; x++) - { - unsigned short c = pal[adr[x]&0xF]; - dst[0] = ((c & 0x1F)<<3); - dst[1] = ((c & 0x3E0)>>2); - dst[2] = ((c & 0x7C00)>>7); - dst[3] = (((adr[x])&0xF) == 0) ? palZeroTransparent : 255;//(c>>15)*255; - dst += 4; + } + break; - c = pal[((adr[x])>>4)]; - dst[0] = ((c & 0x1F)<<3); - dst[1] = ((c & 0x3E0)>>2); - dst[2] = ((c & 0x7C00)>>7); - dst[3] = (((adr[x]>>4)&0xF) == 0) ? palZeroTransparent : 255;//(c>>15)*255; - dst += 4; - } - } - break; + case 5: + { + unsigned short * pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4)); + unsigned short * slot1; + unsigned int * map = (unsigned int *)adr; + unsigned i = 0; + unsigned int * dst = (unsigned int *)texMAP; - case 4: - { - for(x = 0; x < imageSize; ++x) - { - unsigned short c = pal[adr[x]]; - dst[0] = (unsigned char)((c & 0x1F)<<3); - dst[1] = (unsigned char)((c & 0x3E0)>>2); - dst[2] = (unsigned char)((c & 0x7C00)>>7); - dst[3] = (adr[x] == 0) ? palZeroTransparent : 255;//(c>>15)*255; - dst += 4; - } - } - break; - - case 5: - { - // UNOPTIMIZED - unsigned short * pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4)); - unsigned short * slot1 = (unsigned short*)((unsigned char *)(ARM9Mem.ARM9_LCD + ((format&0xFFFF)<<3)/2 + 0x20000)); - unsigned int * map = ((unsigned int *)adr), i = 0; - unsigned int * dst = (unsigned int *)texMAP; - - /* FIXME: the texture slots do not have to follow the VRAM bank layout */ - if ( (format & 0xc000) == 0x8000) { - /* texel are in slot 2 */ - slot1 = (unsigned short*)((unsigned char *)(ARM9Mem.ARM9_LCD + ((format&0x3FFF)<<2) + 0x30000)); - } - else { - slot1 = (unsigned short*)((unsigned char *)(ARM9Mem.ARM9_LCD +((format&0x3FFF)<<2) + 0x20000)); - } + if ( (format & 0xc000) == 0x8000) + // texel are in slot 2 + slot1=(const unsigned short*)&ARM9Mem.textureSlotAddr[1][((format&0x3FFF)<<2)+0x010000]; + else + slot1=(const unsigned short*)&ARM9Mem.textureSlotAddr[1][(format&0x3FFF)<<2]; for (y = 0; y < (sizeY/4); y ++) { @@ -669,250 +702,115 @@ static __inline void SetupTexture (unsigned int format, unsigned int palette) u16 pal1 = slot1[i]; u16 pal1offset = (pal1 & 0x3FFF)<<1; u8 mode = pal1>>14; + u32 tmp_col[4]; - for (sy = 0; sy < 4; sy++) + tmp_col[0]=RGB16TO32(pal[pal1offset],255); + tmp_col[1]=RGB16TO32(pal[pal1offset+1],255); + + switch (mode) + { + case 0: + tmp_col[2]=RGB16TO32(pal[pal1offset+2],255); + tmp_col[3]=RGB16TO32(0x7FFF,0); + break; + case 1: + tmp_col[2]=(((tmp_col[0]&0xFF)+(tmp_col[1]&0xff))>>1)| + (((tmp_col[0]&(0xFF<<8))+(tmp_col[1]&(0xFF<<8)))>>1)| + (((tmp_col[0]&(0xFF<<16))+(tmp_col[1]&(0xFF<<16)))>>1)| + (0xff<<24); + tmp_col[3]=RGB16TO32(0x7FFF,0); + break; + case 2: + tmp_col[2]=RGB16TO32(pal[pal1offset+2],255); + tmp_col[3]=RGB16TO32(pal[pal1offset+3],255); + break; + case 3: { - // Texture offset - u32 xAbs = (x<<2); - u32 yAbs = ((y<<2) + sy); - u32 currentPos = xAbs + yAbs*sizeX; + u32 red1, red2; + u32 green1, green2; + u32 blue1, blue2; + u16 tmp1, tmp2; - // Palette - u8 currRow = (u8)((currBlock >> (sy*8)) & 0xFF); -#define RGB16TO32(col,alpha) (((alpha)<<24) | ((((col) & 0x7C00)>>7)<<16) | ((((col) & 0x3E0)>>2)<<8) | (((col) & 0x1F)<<3)) -#define RGB32(r,g,b,a) (((a)<<24) | ((r)<<16) | ((g)<<8) | (b)) + red1=tmp_col[0]&0xff; + green1=(tmp_col[0]>>8)&0xff; + blue1=(tmp_col[0]>>16)&0xff; + red2=tmp_col[1]&0xff; + green2=(tmp_col[1]>>8)&0xff; + blue2=(tmp_col[1]>>16)&0xff; - switch (mode) - { - case 0: - { - int i; + tmp1=((red1*5+red2*3)>>6)| + (((green1*5+green2*3)>>6)<<5)| + (((blue1*5+blue2*3)>>6)<<10); + tmp2=((red2*5+red1*3)>>6)| + (((green2*5+green1*3)>>6)<<5)| + (((blue2*5+blue1*3)>>6)<<10); - for ( i = 0; i < 4; i++) { - int texel = (currRow >> (2 * i)) & 3; + tmp_col[2]=RGB16TO32(tmp1,255); + tmp_col[3]=RGB16TO32(tmp2,255); + break; + } + } + for (sy = 0; sy < 4; sy++) + { + // Texture offset + u32 currentPos = (x<<2) + ((y<<2) + sy)*sizeX; + u8 currRow = (u8)((currBlock>>(sy*8))&0xFF); - if ( texel == 3) { - dst[currentPos+i] = RGB16TO32(0x7fff, 0); - } - else { - u16 colour = pal[pal1offset+texel]; - dst[currentPos+i] = RGB16TO32( colour, 255); - } - } - break; - } - case 1: - { - u16 colours[3]; - int i; - - colours[0] = pal[pal1offset + 0]; - colours[1] = pal[pal1offset + 1]; - colours[2] = - /* RED */ - (((colours[0] & 0x1f) + - (colours[1] & 0x1f)) >> 1) | - /* GREEN */ - (((colours[0] & (0x1f << 5)) + - (colours[1] & (0x1f << 5))) >> 1) | - /* BLUE */ - (((colours[0] & (0x1f << 10)) + - (colours[1] & (0x1f << 10))) >> 1); - - for ( i = 0; i < 4; i++) { - int texel = (currRow >> (2 * i)) & 3; - - if ( texel == 3) { - dst[currentPos+i] = RGB16TO32(0, 0); - } - else { - dst[currentPos+i] = RGB16TO32( colours[texel], 255); - } - } - break; - } - case 2: - { - u16 col0 = pal[pal1offset+((currRow>>0)&3)]; - u16 col1 = pal[pal1offset+((currRow>>2)&3)]; - u16 col2 = pal[pal1offset+((currRow>>4)&3)]; - u16 col3 = pal[pal1offset+((currRow>>6)&3)]; - - dst[currentPos+0] = RGB16TO32(col0, 255); - dst[currentPos+1] = RGB16TO32(col1, 255); - dst[currentPos+2] = RGB16TO32(col2, 255); - dst[currentPos+3] = RGB16TO32(col3, 255); - - break; - } - case 3: - { - u16 colours[4]; - int i; - u32 red0, red1; - u32 green0, green1; - u32 blue0, blue1; - - colours[0] = pal[pal1offset + 0]; - colours[1] = pal[pal1offset + 1]; - - red0 = colours[0] & 0x1f; - green0 = (colours[0] & (0x1f << 5)) >> 5; - blue0 = (colours[0] & (0x1f << 10)) >> 10; - - red1 = colours[1] & 0x1f; - green1 = (colours[1] & (0x1f << 5)) >> 5; - blue1 = (colours[1] & (0x1f << 10)) >> 10; - - /* (colour0 * 5 + colour1 * 3) / 8 */ - colours[2] = - /* red */ - ((red0 * 5 + red1 * 3) >> 3) | - /* green */ - (((green0 * 5 + green1 * 3) >> 3) << 5) | - /* blue */ - (((blue0 * 5 + blue1 * 3) >> 3) << 10); - - /* (colour0 * 3 + colour1 * 5) / 8 */ - colours[3] = - /* red */ - ((red0 * 3 + red1 * 5) >> 3) | - /* green */ - (((green0 * 3 + green1 * 5) >> 3) << 5) | - /* blue */ - (((blue0 * 3 + blue1 * 5) >> 3) << 10); - - - for ( i = 0; i < 4; i++) { - int texel = (currRow >> (2 * i)) & 3; - - dst[currentPos+i] = RGB16TO32(colours[texel], 255); - } - break; - } - } + dst[currentPos+0] = tmp_col[(currRow>>0)&3]; + dst[currentPos+1] = tmp_col[(currRow>>2)&3]; + dst[currentPos+2] = tmp_col[(currRow>>4)&3]; + dst[currentPos+3] = tmp_col[(currRow>>6)&3]; } } } - - break; - } - case 6: + txt_slot_current_size-=4;; + if (txt_slot_current_size<=0) { - for(x = 0; x < imageSize; x++) - { - unsigned short c = pal[adr[x]&7]; - dst[0] = (unsigned char)((c & 0x1F)<<3); - dst[1] = (unsigned char)((c & 0x3E0)>>2); - dst[2] = (unsigned char)((c & 0x7C00)>>7); - dst[3] = (adr[x]&0xF8); - dst += 4; - } - break; - } - case 7: - { - unsigned short * map = ((unsigned short *)adr); - for(x = 0; x < imageSize; ++x) - { - unsigned short c = map[x]; - dst[0] = ((c & 0x1F)<<3); - dst[1] = ((c & 0x3E0)>>2); - dst[2] = ((c & 0x7C00)>>7); - dst[3] = (c>>15)*255; - dst += 4; - } + txt_slot_current++; + map=ARM9Mem.textureSlotAddr[txt_slot_current]; + map-=txt_slot_size>>2; + txt_slot_size=(txt_slot_current_size=0x020000); } break; } - - glBindTexture(GL_TEXTURE_2D, oglTextureID); - - - switch ((format>>18)&3) + case 6: { - case 0: + for(x = 0; x < imageSize; x++, dst += 4) { - glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA, sizeX, sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, texMAP); - break; + unsigned short c = pal[adr[x]&7]; + dst[0] = (unsigned char)((c & 0x1F)<<3); + dst[1] = (unsigned char)((c & 0x3E0)>>2); + dst[2] = (unsigned char)((c & 0x7C00)>>7); + dst[3] = (adr[x]&0xF8); + if (dst[3]!=0) dst[3]|=0x07; + CHECKSLOT; } - - case 1: + break; + } + case 7: + { + unsigned short * map = ((unsigned short *)adr); + for(x = 0; x < imageSize; ++x, dst += 4) { - u32 *src = (u32*)texMAP, *dst = (u32*)texMAP2; - - for (y = 0; y < sizeY; y++) - { - for (x = 0; x < sizeX; x++) - { - dst[y*sizeX*2 + x] = dst[y*sizeX*2 + (sizeX*2-x-1)] = src[y*sizeX + x]; - } - } - - sizeX <<= 1; - glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA, sizeX, sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, texMAP2); - break; + unsigned short c = map[x]; + dst[0] = ((c & 0x1F)<<3); + dst[1] = ((c & 0x3E0)>>2); + dst[2] = ((c & 0x7C00)>>7); + dst[3] = (c>>15)*255; + } - - case 2: + txt_slot_current_size-=2;; + if (txt_slot_current_size<=0) { - u32 *src = (u32*)texMAP;//, *dst = (u32*)texMAP2; - - for (y = 0; y < sizeY; y++) - { - memcpy (&src[(sizeY*2-y-1)*sizeX], &src[y*sizeX], sizeX*4); - } - - sizeY <<= 1; - glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA, sizeX, sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, texMAP); - break; - } - - case 3: - { - u32 *src = (u32*)texMAP, *dst = (u32*)texMAP2; - - for (y = 0; y < sizeY; y++) - { - for (x = 0; x < sizeX; x++) - { - dst[y*sizeX*2 + x] = dst[y*sizeX*2 + (sizeX*2-x-1)] = src[y*sizeX + x]; - } - } - - sizeX <<= 1; - - for (y = 0; y < sizeY; y++) - { - memcpy (&dst[(sizeY*2-y-1)*sizeX], &dst[y*sizeX], sizeX*4); - } - - sizeY <<= 1; - glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA, sizeX, sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, texMAP2); - break; + txt_slot_current++; + map=ARM9Mem.textureSlotAddr[txt_slot_current]; + map-=txt_slot_size>>1; + txt_slot_size=(txt_slot_current_size=0x020000); } } - - invTexWidth = 1.f/((float)sizeX*(1<<4)); - invTexHeight = 1.f/((float)sizeY*(1<<4)); - - glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_NEAREST); - - // S Coordinate options - if (!BIT16(format)) - glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_CLAMP); - else - glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_REPEAT); - - // T Coordinate options - if (!BIT17(format)) - glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_CLAMP); - else - glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_REPEAT); - - texCoordinateTransform = (format>>30); + break; } + return texMAP; } void NDS_glBegin(unsigned long v) @@ -944,9 +842,6 @@ void NDS_glBegin(unsigned long v) glDisable (GL_LIGHTING); } - // texture environment - glTexEnvi (GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, envMode); - glDepthFunc (depthFuncMode); // Cull face @@ -982,18 +877,41 @@ void NDS_glBegin(unsigned long v) glPolygonMode (GL_BACK, GL_LINE); } - if (textureFormat != lastTextureFormat || - texturePalette != lastTexturePalette) + // texture environment + if (textureFormat!=0) { - SetupTexture (textureFormat, texturePalette); + glTexEnvi (GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, texEnv[envMode]); + glEnable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D, oglTextureID); + if (textureFormat != lastTextureFormat || texturePalette != lastTexturePalette) + { + u32 *tmp; - lastTextureFormat = textureFormat; - lastTexturePalette = texturePalette; + sizeX = (1<<(((textureFormat>>20)&0x7)+3)); + sizeY = (1<<(((textureFormat>>23)&0x7)+3)); + tmp=SetupTexture (textureFormat, texturePalette); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, sizeX, sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, tmp); + glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_NEAREST); + // S Coordinate options + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (BIT16(textureFormat) ? GL_REPEAT : GL_CLAMP)); + // T Coordinate options + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (BIT17(textureFormat) ? GL_REPEAT : GL_CLAMP)); + + lastTextureFormat = textureFormat; + lastTexturePalette = texturePalette; + + texCoordinateTransform = (textureFormat>>30); + invTexWidth = 1.f/((float)sizeX*(1<<4)); + invTexHeight = 1.f/((float)sizeY*(1<<4)); + } + + glMatrixMode (GL_TEXTURE); + glLoadIdentity (); + glScaled (invTexWidth, invTexHeight, 1.f); } - - glMatrixMode (GL_TEXTURE); - glLoadIdentity (); - glScaled (invTexWidth, invTexHeight, 1.f); + else + glDisable(GL_TEXTURE_2D); glMatrixMode (GL_PROJECTION); glLoadMatrixf(mtxCurrent[0]); @@ -1025,7 +943,7 @@ void NDS_glColor3b(unsigned long v) static __inline void SetVertex() { - float coordTransformed[3] = { coord[0], coord[1], coord[2] }; + __declspec(align(16)) float coordTransformed[3] = { coord[0], coord[1], coord[2] }; if (texCoordinateTransform == 3) { @@ -1109,18 +1027,20 @@ int NDS_glGetNumVertex (void) void NDS_glGetLine (int line, unsigned short * dst) { int i; - u8 *screen3D = (u8 *)&GPU_screen3D [(192-(line%192))*256*3]; + u8 *screen3D = (u8 *)&GPU_screen3D [(192-(line%192))*768]; float *screen3Ddepth = &GPU_screen3Ddepth [(192-(line%192))*256]; + u32 r,g,b; - for(i = 0; i < 256; i++) + for(i = 0, t=0; i < 256; i++) { if (screen3Ddepth[i] < 1.f) { - u32 r = screen3D[i*3+0], - g = screen3D[i*3+1], - b = screen3D[i*3+2]; + t=i*3; + r = screen3D[t]; + g = screen3D[t+1]; + b = screen3D[t+2]; - dst[i] = (((r>>3)<<10) | ((g>>3)<<5) | (b>>3)); + dst[i] = ((r>>3)<<10) | ((g>>3)<<5) | (b>>3); } } } @@ -1260,10 +1180,11 @@ void NDS_glTexCoord(unsigned long val) if (texCoordinateTransform == 1) { - float s2 =( s* mtxCurrent[3][0] + t* mtxCurrent[3][4] + - (1.f/16.f)* mtxCurrent[3][8] + (1.f/16.f)* mtxCurrent[3][12]); - float t2 =( s* mtxCurrent[3][1] + t* mtxCurrent[3][5] + - (1.f/16.f)* mtxCurrent[3][9] + (1.f/16.f)* mtxCurrent[3][13]); + float s2, t2; + s2 =s*mtxCurrent[3][0] + t*mtxCurrent[3][4] + + 0.0625f*mtxCurrent[3][8] + 0.0625f*mtxCurrent[3][12]; + t2 =s*mtxCurrent[3][1] + t* mtxCurrent[3][5] + + 0.0625f*mtxCurrent[3][9] + 0.0625f*mtxCurrent[3][13]; glTexCoord2f (s2, t2); } @@ -1394,7 +1315,8 @@ void NDS_glControl(unsigned long v) void NDS_glNormal(unsigned long v) { - float normal[3] = { normalTable[v&1023], + + __declspec(align(16)) float normal[3] = { normalTable[v&1023], normalTable[(v>>10)&1023], normalTable[(v>>20)&1023]}; diff --git a/desmume/src/windows/main.c b/desmume/src/windows/main.c index 80bfa7cb0..441d8296c 100644 --- a/desmume/src/windows/main.c +++ b/desmume/src/windows/main.c @@ -57,6 +57,15 @@ #include "snddx.h" +#include +//===================== Init DirectDraw +LPDIRECTDRAW7 lpDDraw; +LPDIRECTDRAWSURFACE7 lpPrimarySurface; +LPDIRECTDRAWSURFACE7 lpBackSurface; +DDSURFACEDESC2 ddsd; +LPDIRECTDRAWCLIPPER lpDDClipPrimary; +LPDIRECTDRAWCLIPPER lpDDClipBack; + /* The compact flash disk image file */ static const char *bad_glob_cflash_disk_image_file; static char cflash_filename_buffer[512]; @@ -75,6 +84,7 @@ DWORD threadID; HWND hwnd; HDC hdc; HINSTANCE hAppInst; +RECT MainWindowRect; volatile BOOL execute = FALSE; volatile BOOL paused = TRUE; @@ -202,65 +212,12 @@ fill_configured_features( struct configured_features *config, LPSTR lpszArgument } // Rotation definitions -u8 GPU_screenrotated[4*256*192]; short GPU_rotation = 0; DWORD GPU_width = 256; DWORD GPU_height = 192*2; DWORD rotationstartscan = 192; DWORD rotationscanlines = 192*2; -void GPU_rotate(BITMAPV4HEADER *bmi) -{ - u16 *src, *dst; - int i,j, spos, dpos, desp; - src = (u16*)GPU_screen; - dst = (u16*)GPU_screenrotated; - - switch(GPU_rotation) - { - case 90: - desp=0; - for(i=0;i<256;i++) - { - dpos = 192*2*i; - spos = 256*(192*2-1) + desp; - while(spos > 0) - { - dst[dpos++] = src[spos]; - spos-=256; - } - desp++; - } - bmi->bV4Width = 192*2; - bmi->bV4Height = -256; - break; - case 270: - desp=255; - for(i=0;i<256;i++) - { - dpos = 192*2*i; - spos = desp; - while(spos < 256*192*2) - { - dst[dpos++] = src[spos]; - spos+=256; - } - desp--; - } - bmi->bV4Width = 192*2; - bmi->bV4Height = -256; - break; - case 180: - for(i=0; i < 256*192*2; i++) - dst[(256*192*2)-i] = src[i]; - bmi->bV4Width = 256; - bmi->bV4Height = -2*192; - break; - default: - memcpy(&GPU_screenrotated[0], &GPU_screen[0], sizeof(u8)*4*256*192); - } -} - void SetWindowClientSize(HWND hwnd, int cx, int cy) //found at: http://blogs.msdn.com/oldnewthing/archive/2003/09/11/54885.aspx { HMENU hmenu = GetMenu(hwnd); @@ -292,6 +249,22 @@ void SetWindowClientSize(HWND hwnd, int cx, int cy) //found at: http://blogs.msd SetWindowPos(hwnd, NULL, 0, 0, rcWindow.right - rcWindow.left, rcWindow.bottom - rcWindow.top, SWP_NOMOVE | SWP_NOZORDER); + if (lpBackSurface!=NULL) + { + IDirectDrawSurface7_Release(lpBackSurface); + memset(&ddsd, 0, sizeof(ddsd)); + ddsd.dwSize = sizeof(ddsd); + ddsd.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH; + ddsd.ddsCaps.dwCaps = DDSCAPS_OFFSCREENPLAIN; + ddsd.dwWidth = cx; + ddsd.dwHeight = cy; + + if (IDirectDraw7_CreateSurface(lpDDraw, &ddsd, &lpBackSurface, NULL) != DD_OK) + { + MessageBox(hwnd,"Unable to change DirectDraw surface (back)","Error",MB_OK); + return -1; + } + } } void ScaleScreen(float factor) @@ -372,8 +345,6 @@ void Input_Post() DWORD WINAPI run( LPVOID lpParameter) { char txt[80]; - BITMAPV4HEADER bmi; - BITMAPV4HEADER rotationbmi; u32 cycles = 0; int wait=0; u64 freq; @@ -390,31 +361,64 @@ DWORD WINAPI run( LPVOID lpParameter) int fpsframecount=0; u64 fpsticks=0; - //CreateBitmapIndirect(&bmi); - memset(&bmi, 0, sizeof(bmi)); - bmi.bV4Size = sizeof(bmi); - bmi.bV4Planes = 1; - bmi.bV4BitCount = 16; - bmi.bV4V4Compression = BI_RGB|BI_BITFIELDS; - bmi.bV4RedMask = 0x001F; - bmi.bV4GreenMask = 0x03E0; - bmi.bV4BlueMask = 0x7C00; - bmi.bV4Width = 256; - bmi.bV4Height = -192*2; - - memset(&rotationbmi, 0, sizeof(rotationbmi)); - rotationbmi.bV4Size = sizeof(rotationbmi); - rotationbmi.bV4Planes = 1; - rotationbmi.bV4BitCount = 16; - rotationbmi.bV4V4Compression = BI_RGB|BI_BITFIELDS; - rotationbmi.bV4RedMask = 0x001F; - rotationbmi.bV4GreenMask = 0x03E0; - rotationbmi.bV4BlueMask = 0x7C00; - rotationbmi.bV4Width = 256; - rotationbmi.bV4Height = -192; + DDCAPS hw_caps, sw_caps; - NDS_3D_SetDriver (GPU3D_OPENGL); + if (DirectDrawCreateEx(NULL, &lpDDraw, &IID_IDirectDraw7, NULL) != DD_OK) + { + MessageBox(hwnd,"Unable to initialize DirectDraw","Error",MB_OK); + return -1; + } + if (IDirectDraw7_SetCooperativeLevel(lpDDraw,hwnd, DDSCL_NORMAL) != DD_OK) + { + MessageBox(hwnd,"Unable to set DirectDraw Cooperative Level","Error",MB_OK); + return -1; + } + + memset(&ddsd, 0, sizeof(ddsd)); + ddsd.dwSize = sizeof(ddsd); + ddsd.ddsCaps.dwCaps = DDSCAPS_PRIMARYSURFACE; + ddsd.dwFlags = DDSD_CAPS; + + if (IDirectDraw7_CreateSurface(lpDDraw, &ddsd, &lpPrimarySurface, NULL) != DD_OK) + { + MessageBox(hwnd,"Unable to create DirectDraw surface (primary)","Error",MB_OK); + return -1; + } + + memset(&ddsd, 0, sizeof(ddsd)); + ddsd.dwSize = sizeof(ddsd); + ddsd.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH; + ddsd.ddsCaps.dwCaps = DDSCAPS_OFFSCREENPLAIN; + ddsd.dwWidth = 256; + ddsd.dwHeight = 384; + + if (IDirectDraw7_CreateSurface(lpDDraw, &ddsd, &lpBackSurface, NULL) != DD_OK) + { + MessageBox(hwnd,"Unable to create DirectDraw surface (back)","Error",MB_OK); + return -1; + } + + if (IDirectDraw7_CreateClipper(lpDDraw, 0, &lpDDClipPrimary, NULL) != DD_OK) + { + MessageBox(hwnd,"Unable to create DirectDraw clipper (Primary)","Error",MB_OK); + return -1; + } + + if (IDirectDrawClipper_SetHWnd(lpDDClipPrimary, 0, hwnd) != DD_OK) + { + MessageBox(hwnd,"Unable to set clipper for main window (Primary)","Error",MB_OK); + return -1; + } + + if (IDirectDrawSurface7_SetClipper(lpPrimarySurface, lpDDClipPrimary) != DD_OK) + { + MessageBox(hwnd,"Unable to set clipper (Primary)","Error",MB_OK); + return -1; + } + + NDS_3D_SetDriver (GPU3D_OPENGL); + if (!gpu3D->NDS_3D_Init ()) { MessageBox(hwnd,"Unable to initialize openGL","Error",MB_OK); @@ -436,18 +440,87 @@ DWORD WINAPI run( LPVOID lpParameter) if (!skipnextframe) { - if (GPU_rotation == 0) - { - RECT r ; - GetClientRect(hwnd,&r) ; - StretchDIBits (hdc, 0, 0, r.right-r.left, r.bottom-r.top, 0, 0, 256, 192*2, GPU_screen, (BITMAPINFO*)&bmi, DIB_RGB_COLORS,SRCCOPY); - } else - { - RECT r ; - GPU_rotate(&rotationbmi); - GetClientRect(hwnd,&r) ; - StretchDIBits(hdc, 0, 0, r.right-r.left, r.bottom-r.top, 0, 0, GPU_width, rotationscanlines, GPU_screenrotated, (BITMAPINFO*)&rotationbmi, DIB_RGB_COLORS,SRCCOPY); - } + memset(&ddsd, 0, sizeof(ddsd)); + ddsd.dwSize = sizeof(ddsd); + ddsd.dwFlags=DDSD_ALL; + + if (IDirectDrawSurface7_Lock(lpBackSurface,NULL,&ddsd,DDLOCK_WAIT, NULL) == DD_OK) + { + char* buffer = (char*)ddsd.lpSurface; + + int i, j, sz=256*sizeof(u32); + if (ddsd.ddpfPixelFormat.dwRGBBitCount>16) + { + u16 *tmpGPU_Screen_src=(u16*)GPU_screen; + u32 tmpGPU_screen[98304]; + for(i=0; i<98304; i++) + tmpGPU_screen[i]= (((tmpGPU_Screen_src[i]>>10)&0x1F)<<3)| + (((tmpGPU_Screen_src[i]>>5)&0x1F)<<11)| + (((tmpGPU_Screen_src[i])&0x1F)<<19); + switch (GPU_rotation) + { + case 0: + { + for (i = 0; i < 98304; i+=256) //384*256 + { + memcpy(buffer,tmpGPU_screen+i,sz); + buffer += ddsd.lPitch; + } + break; + } + case 90: + { + u32 start; + memset(buffer,0,384*ddsd.lPitch); + for (j=0; j<256; j++) + { + start=98304+j; + for (i=0; i<384; i++) + { + start-=256; + ((u32*)buffer)[i]=((u32 *)tmpGPU_screen)[start]; + } + buffer += ddsd.lPitch; + } + break; + } + case 180: + { + u32 start=98300; + for (j=0; j<384; j++) + { + for (i=0; i<256; i++, --start) + ((u32*)buffer)[i]=((u32 *)tmpGPU_screen)[start]; + buffer += ddsd.lPitch; + } + break; + } + case 270: + { + u32 start; + memset(buffer,0,384*ddsd.lPitch); + for (j=0; j<256; j++) + { + start=256-j; + for (i=0; i<384; i++) + { + ((u32*)buffer)[i]=((u32 *)tmpGPU_screen)[start]; + start+=256; + } + buffer += ddsd.lPitch; + } + break; + } + } + } + else + printlog("16bit depth color not supported"); + + + IDirectDrawSurface7_Unlock(lpBackSurface,ddsd.lpSurface); + IDirectDrawSurface7_Blt(lpPrimarySurface,&MainWindowRect,lpBackSurface,0, DDBLT_WAIT,0); + } + fpsframecount++; QueryPerformanceCounter((LARGE_INTEGER *)&curticks); if(curticks >= fpsticks + freq) @@ -525,6 +598,9 @@ DWORD WINAPI run( LPVOID lpParameter) paused = TRUE; Sleep(500); } + if (lpDDClipPrimary!=NULL) IDirectDraw7_Release(lpDDClipPrimary); + if (lpPrimarySurface != NULL) IDirectDraw7_Release(lpPrimarySurface); + if (lpDDraw != NULL) IDirectDraw7_Release(lpDDraw); return 1; } @@ -887,10 +963,44 @@ int WINAPI WinMain (HINSTANCE hThisInstance, return messages.wParam; } +void GetRect(HWND hwnd) +{ + POINT ptClient; + RECT rc; + + GetClientRect(hwnd,&rc); + ptClient.x=rc.left; + ptClient.y=rc.top; + ClientToScreen(hwnd,&ptClient); + MainWindowRect.left=ptClient.x; + MainWindowRect.top=ptClient.y; + ptClient.x=rc.right; + ptClient.y=rc.bottom; + ClientToScreen(hwnd,&ptClient); + MainWindowRect.right=ptClient.x; + MainWindowRect.bottom=ptClient.y; +} + LRESULT CALLBACK WindowProcedure (HWND hwnd, UINT message, WPARAM wParam, LPARAM lParam) { + static int tmp_execute; switch (message) // handle the messages { + /*case WM_ENTERMENULOOP: + { + if (execute) + { + NDS_Pause(); + tmp_execute=2; + } else tmp_execute=-1; + return 0; + } + case WM_EXITMENULOOP: + { + if (tmp_execute==2) NDS_UnPause(); + return 0; + }*/ + case WM_CREATE: { RECT clientSize, fullSize; @@ -927,13 +1037,17 @@ LRESULT CALLBACK WindowProcedure (HWND hwnd, UINT message, WPARAM wParam, LPARAM PostQuitMessage (0); // send a WM_QUIT to the message queue return 0; } - case WM_SIZE: - if (ForceRatio) { - RECT fullSize; - GetWindowRect(hwnd, &fullSize); - ScaleScreen((fullSize.bottom - fullSize.top - heightTradeOff) / DefaultHeight); - } - return 0; + case WM_MOVE: + GetRect(hwnd); + return 0; + case WM_SIZE: + if (ForceRatio) { + RECT fullSize; + GetWindowRect(hwnd, &fullSize); + ScaleScreen((fullSize.bottom - fullSize.top - heightTradeOff) / DefaultHeight); + } + GetRect(hwnd); + return 0; case WM_CLOSE: { NDS_Pause();