diff --git a/Source/Core/Common/Src/MathUtil.cpp b/Source/Core/Common/Src/MathUtil.cpp index 8e15cef701..c30d3b6306 100644 --- a/Source/Core/Common/Src/MathUtil.cpp +++ b/Source/Core/Common/Src/MathUtil.cpp @@ -145,12 +145,15 @@ void SaveSSEState() saved_sse_state = _mm_getcsr(); } -void MatrixMul(int n, const float *a, const float *b, float *result) +inline void MatrixMul(int n, const float *a, const float *b, float *result) { - for(int i = 0; i < n; ++i) { - for(int j= 0; j < n; ++j) { + for (int i = 0; i < n; ++i) + { + for (int j = 0; j < n; ++j) + { float temp = 0; - for(int k = 0; k < n; ++k) { + for (int k = 0; k < n; ++k) + { temp += a[i * n + k] * b[k * n + j]; } result[i * n + j] = temp; @@ -207,9 +210,9 @@ void Matrix33::Multiply(const Matrix33 &a, const Matrix33 &b, Matrix33 &result) void Matrix33::Multiply(const Matrix33 &a, const float vec[3], float result[3]) { - for(int i = 0; i < 3; ++i) { + for (int i = 0; i < 3; ++i) { result[i] = 0; - for(int k = 0; k < 3; ++k) { + for (int k = 0; k < 3; ++k) { result[i] += a.data[i * 3 + k] * vec[k]; } } @@ -226,13 +229,16 @@ void Matrix44::LoadIdentity(Matrix44 &mtx) void Matrix44::LoadMatrix33(Matrix44 &mtx, const Matrix33 &m33) { - for(int i = 0; i < 3; ++i) { - for(int j = 0; j < 3; ++j) { + for (int i = 0; i < 3; ++i) + { + for (int j = 0; j < 3; ++j) + { mtx.data[i * 4 + j] = m33.data[i * 3 + j]; } } - for(int i = 0; i < 3; ++i) { + for (int i = 0; i < 3; ++i) + { mtx.data[i * 4 + 3] = 0; mtx.data[i + 12] = 0; } diff --git a/Source/Core/Common/Src/MathUtil.h b/Source/Core/Common/Src/MathUtil.h index 45f7317a50..3b9967f05f 100644 --- a/Source/Core/Common/Src/MathUtil.h +++ b/Source/Core/Common/Src/MathUtil.h @@ -157,4 +157,5 @@ public: float data[16]; }; + #endif // _MATH_UTIL_H_ diff --git a/Source/Core/Core/Src/Core.cpp b/Source/Core/Core/Src/Core.cpp index 0c7738f1e0..2da395cd19 100644 --- a/Source/Core/Core/Src/Core.cpp +++ b/Source/Core/Core/Src/Core.cpp @@ -387,6 +387,10 @@ THREAD_RETURN EmuThread(void *pArg) VideoInitialize.pKeyPress = Callback_KeyPress; VideoInitialize.bWii = _CoreParameter.bWii; VideoInitialize.bUseDualCore = _CoreParameter.bUseDualCore; + VideoInitialize.pBBox = &PixelEngine::bbox[0]; + VideoInitialize.pBBoxActive = &PixelEngine::bbox_active; + + // May be needed for Stop and Start #ifdef SETUP_FREE_VIDEO_PLUGIN_ON_BOOT Plugins.FreeVideo(); diff --git a/Source/Core/Core/Src/HW/PixelEngine.cpp b/Source/Core/Core/Src/HW/PixelEngine.cpp index 7d4b3b63d2..480e9653b2 100644 --- a/Source/Core/Core/Src/HW/PixelEngine.cpp +++ b/Source/Core/Core/Src/HW/PixelEngine.cpp @@ -128,6 +128,9 @@ static bool g_bSignalFinishInterrupt; static int et_SetTokenOnMainThread; static int et_SetFinishOnMainThread; +u16 bbox[4]; +bool bbox_active; + void DoState(PointerWrap &p) { p.Do(m_ZConf); @@ -140,6 +143,9 @@ void DoState(PointerWrap &p) p.Do(g_bSignalTokenInterrupt); p.Do(g_bSignalFinishInterrupt); + + p.Do(bbox); + p.Do(bbox_active); } void UpdateInterrupts(); @@ -153,6 +159,13 @@ void Init() et_SetTokenOnMainThread = CoreTiming::RegisterEvent("SetToken", SetToken_OnMainThread); et_SetFinishOnMainThread = CoreTiming::RegisterEvent("SetFinish", SetFinish_OnMainThread); + + bbox[0] = 0x80; + bbox[1] = 0xA0; + bbox[2] = 0x80; + bbox[3] = 0xA0; + + bbox_active = false; } void Read16(u16& _uReturnValue, const u32 _iAddress) @@ -196,18 +209,12 @@ void Read16(u16& _uReturnValue, const u32 _iAddress) // The return values for these BBOX registers need to be gotten from the bounding box of the object. // See http://code.google.com/p/dolphin-emu/issues/detail?id=360#c74 for more details. - case PE_BBOX_LEFT: - _uReturnValue = 0x80; - break; - case PE_BBOX_RIGHT: - _uReturnValue = 0xA0; - break; - case PE_BBOX_TOP: - _uReturnValue = 0x80; - break; - case PE_BBOX_BOTTOM: - _uReturnValue = 0xA0; - break; + + // 0x80, 0xa0, 0x80, 0xa0 makes Paper Mario happy. + case PE_BBOX_LEFT: _uReturnValue = bbox[0]; INFO_LOG(PIXELENGINE, "R: BBOX_LEFT = %i", bbox[0]); bbox_active = false; break; + case PE_BBOX_RIGHT: _uReturnValue = bbox[1]; INFO_LOG(PIXELENGINE, "R: BBOX_RIGHT = %i", bbox[1]); bbox_active = false; break; + case PE_BBOX_TOP: _uReturnValue = bbox[2]; INFO_LOG(PIXELENGINE, "R: BBOX_TOP = %i", bbox[2]); bbox_active = false; break; + case PE_BBOX_BOTTOM: _uReturnValue = bbox[3]; INFO_LOG(PIXELENGINE, "R: BBOX_BOTTOM = %i", bbox[3]); bbox_active = false; break; default: WARN_LOG(PIXELENGINE, "(r16) unknown @ %08x", _iAddress); diff --git a/Source/Core/Core/Src/HW/PixelEngine.h b/Source/Core/Core/Src/HW/PixelEngine.h index 8785897dbc..ed8e7a164d 100644 --- a/Source/Core/Core/Src/HW/PixelEngine.h +++ b/Source/Core/Core/Src/HW/PixelEngine.h @@ -55,6 +55,10 @@ void SetToken(const u16 _token, const int _bSetTokenAcknowledge); void SetFinish(void); bool AllowIdleSkipping(); +// Bounding box functionality. Paper Mario (both) are a couple of the few games that use it. +extern u16 bbox[4]; +extern bool bbox_active; + } // end of namespace PixelEngine #endif diff --git a/Source/Core/VideoCommon/Src/BPMemory.h b/Source/Core/VideoCommon/Src/BPMemory.h index 8c6b2e6953..f58962f1d2 100644 --- a/Source/Core/VideoCommon/Src/BPMemory.h +++ b/Source/Core/VideoCommon/Src/BPMemory.h @@ -62,6 +62,7 @@ #define BPMEM_COPYFILTER1 0x54 #define BPMEM_CLEARBBOX1 0x55 #define BPMEM_CLEARBBOX2 0x56 +// what about 0x57? #define BPMEM_UNKNOWN 0x58 #define BPMEM_SCISSOROFFSET 0x59 #define BPMEM_UNKNOWN1 0x60 diff --git a/Source/Core/VideoCommon/Src/BPStructs.cpp b/Source/Core/VideoCommon/Src/BPStructs.cpp index 74c53906af..7773773439 100644 --- a/Source/Core/VideoCommon/Src/BPStructs.cpp +++ b/Source/Core/VideoCommon/Src/BPStructs.cpp @@ -211,7 +211,12 @@ void BPWritten(const Bypass& bp) } else { + // We should be able to get away with deactivating the current bbox tracking + // here. Not sure if there's a better spot to put this. // the number of lines copied is determined by the y scale * source efb height +#ifdef BBOX_SUPPORT + *g_VideoInitialize.pBBoxActive = false; +#endif const float yScale = bpmem.dispcopyyscale / 256.0f; const float xfbLines = ((bpmem.copyTexSrcWH.y + 1.0f) * yScale); RenderToXFB(bp, multirc, yScale, xfbLines, @@ -324,13 +329,35 @@ void BPWritten(const Bypass& bp) case BPMEM_CLEAR_Z: // Z Components (24-bit Zbuffer) break; // ------------------------- - // Culling Occulsion, we don't support this - // let's hope not many games use bboxes.. - // TODO(ector): add something that watches bboxes + // Bounding Box support // ------------------------- case BPMEM_CLEARBBOX1: - case BPMEM_CLEARBBOX2: + case BPMEM_CLEARBBOX2: { + +#ifdef BBOX_SUPPORT + // which is which? these are GUESSES! + if (bp.address == BPMEM_CLEARBBOX1) { + int right = bp.newvalue >> 10; + int left = bp.newvalue & 0x3ff; + + // We should only set these if bbox is calculated properly. + g_VideoInitialize.pBBox[0] = left; + g_VideoInitialize.pBBox[1] = right; + *g_VideoInitialize.pBBoxActive = true; + // WARN_LOG(VIDEO, "ClearBBox LR: %i, %08x - %i, %i", bp.address, bp.newvalue, left, right); + } else { + int bottom = bp.newvalue >> 10; + int top = bp.newvalue & 0x3ff; + + // We should only set these if bbox is calculated properly. + g_VideoInitialize.pBBox[2] = top; + g_VideoInitialize.pBBox[3] = bottom; + *g_VideoInitialize.pBBoxActive = true; + // WARN_LOG(VIDEO, "ClearBBox TB: %i, %08x - %i, %i", bp.address, bp.newvalue, top, bottom); + } +#endif break; + } case BPMEM_ZCOMPARE: // Set the Z-Compare case BPMEM_TEXINVALIDATE: // Used, if game has manual control the Texture Cache, which we don't allow case BPMEM_MIPMAP_STRIDE: // MipMap Stride Channel @@ -363,14 +390,14 @@ void BPWritten(const Bypass& bp) PanicAlert("Unknown is not 0xF! val = 0x%08x", bp.newvalue); break; - // Cases added due to: http://code.google.com/p/dolphin-emu/issues/detail?id=360#c90 - // Are these related to BBox? case BPMEM_UNKNOWN1: case BPMEM_UNKNOWN2: case BPMEM_UNKNOWN3: case BPMEM_UNKNOWN4: - + // Cases added due to: http://code.google.com/p/dolphin-emu/issues/detail?id=360#c90 + // Are these related to BBox? break; + // ------------------------------------------------ // On Default, we try to look for other things // before we give up and say its an unknown opcode diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index ab1b42dee4..d02e0afcfd 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -38,6 +38,10 @@ #include "VertexLoader_Color.h" #include "VertexLoader_TextCoord.h" +//BBox +#include "XFMemory.h" +extern float GC_ALIGNED16(g_fProjectionMatrix[16]); + #define USE_JIT #define COMPILED_CODE_SIZE 4096 @@ -82,6 +86,50 @@ void LOADERDECL PosMtx_Write() *VertexManager::s_pCurBufferPointer++ = 0; } +void LOADERDECL UpdateBoundingBox() +{ + if (!*g_VideoInitialize.pBBoxActive) + return; + + // Truly evil hack, reading backwards from the write pointer. If we were writing to write-only + // memory like we might have been with a D3D vertex buffer, this would have been a bad idea. + float *data = (float *)(VertexManager::s_pCurBufferPointer - 12); + // We must transform the just loaded point by the current world and projection matrix - in software. + // Then convert to screen space and update the bounding box. + float p[3] = {data[0], data[1], data[2]}; + + const float *world_matrix = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4; + const float *proj_matrix = &g_fProjectionMatrix[0]; + + float t[3]; + t[0] = p[0] * world_matrix[0] + p[1] * world_matrix[1] + p[2] * world_matrix[2] + world_matrix[3]; + t[1] = p[0] * world_matrix[4] + p[1] * world_matrix[5] + p[2] * world_matrix[6] + world_matrix[7]; + t[2] = p[0] * world_matrix[8] + p[1] * world_matrix[9] + p[2] * world_matrix[10] + world_matrix[11]; + + float o[4]; + o[0] = t[0] * proj_matrix[0] + t[1] * proj_matrix[1] + t[2] * proj_matrix[2] + proj_matrix[3]; + o[1] = t[0] * proj_matrix[4] + t[1] * proj_matrix[5] + t[2] * proj_matrix[6] + proj_matrix[7]; + o[2] = t[0] * proj_matrix[8] + t[1] * proj_matrix[9] + t[2] * proj_matrix[10] + proj_matrix[11]; + o[3] = t[0] * proj_matrix[12] + t[1] * proj_matrix[13] + t[2] * proj_matrix[14] + proj_matrix[15]; + + o[0] /= o[3]; + o[1] /= o[3]; + + // should possibly adjust for viewport? + o[0] = (o[0] + 1.0f) * 320.0f; + o[1] = (o[1] + 1.0f) * 240.0f; + + if (o[0] < g_VideoInitialize.pBBox[0]) g_VideoInitialize.pBBox[0] = std::max(0.0f, o[0]); + if (o[0] > g_VideoInitialize.pBBox[1]) g_VideoInitialize.pBBox[1] = std::min(640.0f, o[0]); + if (o[1] < g_VideoInitialize.pBBox[2]) g_VideoInitialize.pBBox[2] = std::max(0.0f, o[1]); + if (o[1] > g_VideoInitialize.pBBox[3]) g_VideoInitialize.pBBox[3] = std::min(480.0f, o[1]); + /* + if (GetAsyncKeyState(VK_LSHIFT)) { + ERROR_LOG(VIDEO, "XForm: %f %f %f to %f %f", p[0], p[1], p[2], o[0], o[1]); + ERROR_LOG(VIDEO, "%i %i %i %i", g_VideoInitialize.pBBox[0], g_VideoInitialize.pBBox[1], g_VideoInitialize.pBBox[2], g_VideoInitialize.pBBox[3]); + }*/ +} + void LOADERDECL TexMtx_ReadDirect_UByte() { s_curtexmtx[s_texmtxread] = DataReadU8() & 0x3f; @@ -247,6 +295,12 @@ void VertexLoader::CompileVertexTranslator() break; } + // OK, so we just got a point. Let's go back and read it for the bounding box. + +#ifdef BBOX_SUPPORT + WriteCall(UpdateBoundingBox); +#endif + // Normals vtx_decl.num_normals = 0; if (m_VtxDesc.Normal != NOT_PRESENT) { diff --git a/Source/Core/VideoCommon/Src/VertexShaderManager.cpp b/Source/Core/VideoCommon/Src/VertexShaderManager.cpp index 73927c7265..fb46200404 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderManager.cpp @@ -36,7 +36,8 @@ namespace VertexManager void Flush(); } -static float s_fMaterials[16]; +static float GC_ALIGNED16(s_fMaterials[16]); +float GC_ALIGNED16(g_fProjectionMatrix[16]); // track changes static bool bTexMatricesChanged[2], bPosNormalMatrixChanged, bProjectionChanged, bViewportChanged; @@ -204,8 +205,6 @@ void VertexShaderManager::SetConstants(bool proj_hax_1,bool Hack_hack1 ,float Ha if (bProjectionChanged) { bProjectionChanged = false; - static float GC_ALIGNED16(g_fProjectionMatrix[16]); - if (xfregs.rawProjection[6] == 0) { // Perspective g_fProjectionMatrix[0] = xfregs.rawProjection[0]; diff --git a/Source/Core/VideoCommon/Src/VideoCommon.h b/Source/Core/VideoCommon/Src/VideoCommon.h index c5a32bf06c..be393a60f2 100644 --- a/Source/Core/VideoCommon/Src/VideoCommon.h +++ b/Source/Core/VideoCommon/Src/VideoCommon.h @@ -45,7 +45,15 @@ enum // TODO: figure out what to do with PAL }; +// If this is enabled, bounding boxes will be computed for everything drawn. +// This can theoretically have a big speed hit in some geom heavy games. Needs more work. +// Helps some effects in Paper Mario (but they aren't quite right yet). +// May help Super Mario Galaxy? +// Do testing to figure out if the speed hit is bad? +// #define BBOX_SUPPORT + extern SVideoInitialize g_VideoInitialize; + // (mb2) for XFB update hack. TODO: find a static better place extern volatile u32 g_XFBUpdateRequested; diff --git a/Source/PluginSpecs/pluginspecs_video.h b/Source/PluginSpecs/pluginspecs_video.h index 7d205f6a1e..f25659e7cc 100644 --- a/Source/PluginSpecs/pluginspecs_video.h +++ b/Source/PluginSpecs/pluginspecs_video.h @@ -72,6 +72,11 @@ typedef struct void *pMemoryBase; bool bWii; bool bUseDualCore; + + unsigned short *pBBox; // points to four shorts: left, top, right, bottom + // TODO: + bool *pBBoxActive; // we guess that after a bbox reset, we only need to track bbox size until the corresponding read. + } SVideoInitialize; /////////////////////////////////////////////////////////////////////////////////////////////////////