diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index dc7f069a3..cf1e2d04c 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -18,14 +18,6 @@ along with the this software. If not, see . */ -#ifdef FASTBUILD - #undef FORCEINLINE - #define FORCEINLINE - //compilation speed hack (cuts time exactly in half by cutting out permutations) - #define DISABLE_MOSAIC - #define DISABLE_COLOREFFECTDISABLEHINT -#endif - #include "GPU.h" #include @@ -47,7 +39,13 @@ #include "matrix.h" #include "emufile.h" -u32 Render3DFramesPerSecond; +#ifdef FASTBUILD + #undef FORCEINLINE + #define FORCEINLINE + //compilation speed hack (cuts time exactly in half by cutting out permutations) + #define DISABLE_MOSAIC + #define DISABLE_COLOREFFECTDISABLEHINT +#endif //instantiate static instance u16 GPUEngineBase::_brightnessUpTable555[17][0x8000]; @@ -6759,6 +6757,11 @@ GPUSubsystem::GPUSubsystem() _displayTouch = new NDSDisplay(NDSDisplayID_Touch); _displayTouch->SetEngine(_engineSub); + _videoFrameCount = 0; + _render3DFrameCount = 0; + _frameNeedsFinish = false; + _willAutoApplyMasterBrightness = true; + _willAutoConvertRGB666ToRGB888 = true; _willAutoResolveToCustomBuffer = true; //TODO OSD @@ -6853,6 +6856,9 @@ void GPUSubsystem::Reset() this->SetCustomFramebufferSize(this->_displayInfo.customWidth, this->_displayInfo.customHeight); } + this->_videoFrameCount = 0; + this->_render3DFrameCount = 0; + this->ClearWithColor(0xFFFF); this->_displayInfo.didPerformCustomRender[NDSDisplayID_Main] = false; @@ -6883,19 +6889,30 @@ void GPUSubsystem::Reset() } void GPUSubsystem::ForceRender3DFinishAndFlush(bool willFlush) +{ + bool need3DDisplayFramebuffer; + bool need3DCaptureFramebuffer; + CurrentRenderer->GetFramebufferFlushStates(need3DDisplayFramebuffer, need3DCaptureFramebuffer); + + CurrentRenderer->SetFramebufferFlushStates(willFlush, willFlush); + CurrentRenderer->RenderFinish(); + CurrentRenderer->SetFramebufferFlushStates(need3DDisplayFramebuffer, need3DCaptureFramebuffer); +} + +void GPUSubsystem::ForceFrameStop() { if (CurrentRenderer->GetRenderNeedsFinish()) { - bool need3DDisplayFramebuffer; - bool need3DCaptureFramebuffer; - CurrentRenderer->GetFramebufferFlushStates(need3DDisplayFramebuffer, need3DCaptureFramebuffer); - - CurrentRenderer->SetFramebufferFlushStates(willFlush, willFlush); - CurrentRenderer->RenderFinish(); - CurrentRenderer->SetFramebufferFlushStates(need3DDisplayFramebuffer, need3DCaptureFramebuffer); + this->ForceRender3DFinishAndFlush(true); CurrentRenderer->SetRenderNeedsFinish(false); this->_event->DidRender3DEnd(); } + + if (this->_frameNeedsFinish) + { + this->_frameNeedsFinish = false; + this->_event->DidFrameEnd(false); + } } void GPUSubsystem::UpdateRenderProperties() @@ -6981,6 +6998,11 @@ const NDSDisplayInfo& GPUSubsystem::GetDisplayInfo() return this->_displayInfo; } +u32 GPUSubsystem::GetFPSRender3D() const +{ + return this->_render3DFrameCount; +} + void GPUSubsystem::SetDisplayDidCustomRender(NDSDisplayID displayID, bool theState) { this->_displayInfo.didPerformCustomRender[displayID] = theState; @@ -7023,8 +7045,6 @@ void GPUSubsystem::SetCustomFramebufferSize(size_t w, size_t h, void *clientNati return; } - GPU->ForceRender3DFinishAndFlush(false); - const float customWidthScale = (float)w / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH; const float customHeightScale = (float)h / (float)GPU_FRAMEBUFFER_NATIVE_HEIGHT; const float newGpuLargestDstLineCount = (size_t)ceilf(customHeightScale); @@ -7165,8 +7185,6 @@ void GPUSubsystem::SetCustomFramebufferSize(size_t w, size_t h) void GPUSubsystem::SetColorFormat(const NDSColorFormat outputFormat, void *clientNativeBuffer, void *clientCustomBuffer) { - GPU->ForceRender3DFinishAndFlush(false); - this->_displayInfo.colorFormat = outputFormat; this->_displayInfo.pixelBytes = (outputFormat == NDSColorFormat_BGR555_Rev) ? sizeof(u16) : sizeof(FragmentColor); @@ -7291,6 +7309,26 @@ u16* GPUSubsystem::GetCustomVRAMAddressUsingMappedAddress(const u32 mappedAddr) return (this->GetEngineMain()->GetCustomVRAMBlockPtr(blockID) + (_gpuCaptureLineIndex[blockLine] * this->_displayInfo.customWidth) + _gpuDstPitchIndex[linePixel]); } +bool GPUSubsystem::GetWillAutoApplyMasterBrightness() const +{ + return this->_willAutoApplyMasterBrightness; +} + +void GPUSubsystem::SetWillAutoApplyMasterBrightness(const bool willAutoApply) +{ + this->_willAutoApplyMasterBrightness = willAutoApply; +} + +bool GPUSubsystem::GetWillAutoConvertRGB666ToRGB888() const +{ + return this->_willAutoConvertRGB666ToRGB888; +} + +void GPUSubsystem::SetWillAutoConvertRGB666ToRGB888(const bool willAutoConvert) +{ + this->_willAutoConvertRGB666ToRGB888 = willAutoConvert; +} + bool GPUSubsystem::GetWillAutoResolveToCustomBuffer() const { return this->_willAutoResolveToCustomBuffer; @@ -7305,45 +7343,25 @@ template void GPUSubsystem::RenderLine(const u16 l, bool isFrameSkipRequested) { const bool isDisplayCaptureNeeded = this->_engineMain->WillDisplayCapture(l); - const bool isFramebufferRenderNeeded[2] = {(CommonSettings.showGpu.main && !this->_engineMain->GetIsMasterBrightFullIntensity()) || isDisplayCaptureNeeded, + const bool isFramebufferRenderNeeded[2] = { CommonSettings.showGpu.main && !this->_engineMain->GetIsMasterBrightFullIntensity(), CommonSettings.showGpu.sub && !this->_engineSub->GetIsMasterBrightFullIntensity() }; + if (!this->_frameNeedsFinish) + { + this->_event->DidFrameBegin(isFrameSkipRequested); + this->_frameNeedsFinish = true; + } + if (l == 0) { - this->_event->DidFrameBegin(isFrameSkipRequested); - // Clear displays to black if they are turned off by the user. if (!isFrameSkipRequested) { this->UpdateRenderProperties(); - - if (!isFramebufferRenderNeeded[GPUEngineID_Main]) - { - if (!CommonSettings.showGpu.main) - { - memset(this->_engineMain->renderedBuffer, 0, this->_engineMain->renderedWidth * this->_engineMain->renderedHeight * this->_displayInfo.pixelBytes); - } - else if (this->_engineMain->GetIsMasterBrightFullIntensity()) - { - this->_engineMain->ApplyMasterBrightness(); - } - } - - if (!isFramebufferRenderNeeded[GPUEngineID_Sub]) - { - if (!CommonSettings.showGpu.sub) - { - memset(this->_engineSub->renderedBuffer, 0, this->_engineSub->renderedWidth * this->_engineSub->renderedHeight * this->_displayInfo.pixelBytes); - } - else if (this->_engineSub->GetIsMasterBrightFullIntensity()) - { - this->_engineSub->ApplyMasterBrightness(); - } - } } } - if (isFramebufferRenderNeeded[GPUEngineID_Main] && !isFrameSkipRequested) + if ( (isFramebufferRenderNeeded[GPUEngineID_Main] || isDisplayCaptureNeeded) && !isFrameSkipRequested ) { // GPUEngineA:WillRender3DLayer() and GPUEngineA:WillCapture3DLayerDirect() both rely on register // states that might change on a per-line basis. Therefore, we need to check these states on a @@ -7387,6 +7405,17 @@ void GPUSubsystem::RenderLine(const u16 l, bool isFrameSkipRequested) if (l == 191) { + this->_engineMain->FramebufferPostprocess(); + this->_engineSub->FramebufferPostprocess(); + + this->_videoFrameCount++; + if (this->_videoFrameCount == 60) + { + this->_render3DFrameCount = gfx3d.render3DFrameCount; + gfx3d.render3DFrameCount = 0; + this->_videoFrameCount = 0; + } + if (!isFrameSkipRequested) { if (this->_displayInfo.isCustomSizeRequested) @@ -7405,17 +7434,42 @@ void GPUSubsystem::RenderLine(const u16 l, bool isFrameSkipRequested) this->_displayInfo.renderedWidth[NDSDisplayID_Touch] = this->_displayTouch->GetEngine()->renderedWidth; this->_displayInfo.renderedHeight[NDSDisplayID_Touch] = this->_displayTouch->GetEngine()->renderedHeight; - if (isFramebufferRenderNeeded[GPUEngineID_Main]) + if (this->_willAutoApplyMasterBrightness) { - this->_engineMain->ApplyMasterBrightness(); + if (CommonSettings.showGpu.main) + { + if (this->_engineMain->GetIsMasterBrightFullIntensity()) + { + this->_engineMain->ApplyMasterBrightness(); + } + else + { + this->_engineMain->ApplyMasterBrightness(); + } + } + else + { + memset(this->_engineMain->renderedBuffer, 0, this->_engineMain->renderedWidth * this->_engineMain->renderedHeight * this->_displayInfo.pixelBytes); + } + + if (CommonSettings.showGpu.sub) + { + if (this->_engineSub->GetIsMasterBrightFullIntensity()) + { + this->_engineSub->ApplyMasterBrightness(); + } + else + { + this->_engineSub->ApplyMasterBrightness(); + } + } + else + { + memset(this->_engineSub->renderedBuffer, 0, this->_engineSub->renderedWidth * this->_engineSub->renderedHeight * this->_displayInfo.pixelBytes); + } } - if (isFramebufferRenderNeeded[GPUEngineID_Sub]) - { - this->_engineSub->ApplyMasterBrightness(); - } - - if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) + if ( (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) && this->_willAutoConvertRGB666ToRGB888 ) { this->_engineMain->ResolveRGB666ToRGB888(); this->_engineSub->ResolveRGB666ToRGB888(); @@ -7428,18 +7482,11 @@ void GPUSubsystem::RenderLine(const u16 l, bool isFrameSkipRequested) } } - this->_engineMain->FramebufferPostprocess(); - this->_engineSub->FramebufferPostprocess(); - - gfx3d._videoFrameCount++; - if (gfx3d._videoFrameCount == 60) + if (this->_frameNeedsFinish) { - Render3DFramesPerSecond = gfx3d.render3DFrameCount; - gfx3d.render3DFrameCount = 0; - gfx3d._videoFrameCount = 0; + this->_frameNeedsFinish = false; + this->_event->DidFrameEnd(isFrameSkipRequested); } - - this->_event->DidFrameEnd(isFrameSkipRequested); } } diff --git a/desmume/src/GPU.h b/desmume/src/GPU.h index 12e6cbc9d..7d194481b 100644 --- a/desmume/src/GPU.h +++ b/desmume/src/GPU.h @@ -1608,6 +1608,11 @@ private: NDSDisplay *_displayMain; NDSDisplay *_displayTouch; + u32 _videoFrameCount; // Internal variable that increments when a video frame is completed. Resets every 60 video frames. + u32 _render3DFrameCount; // The current 3D rendering frame count, saved to this variable once every 60 video frames. + bool _frameNeedsFinish; + bool _willAutoApplyMasterBrightness; + bool _willAutoConvertRGB666ToRGB888; bool _willAutoResolveToCustomBuffer; u16 *_customVRAM; u16 *_customVRAMBlank; @@ -1628,7 +1633,11 @@ public: void Reset(); void ForceRender3DFinishAndFlush(bool willFlush); + void ForceFrameStop(); + const NDSDisplayInfo& GetDisplayInfo(); // Frontends need to call this whenever they need to read the video buffers from the emulator core + u32 GetFPSRender3D() const; + void SetDisplayDidCustomRender(NDSDisplayID displayID, bool theState); GPUEngineA* GetEngineMain(); @@ -1649,6 +1658,26 @@ public: void UpdateRenderProperties(); + // By default, the output framebuffer will have the master brightness applied before + // the DidFrameEnd event. The master brightness is applied using the CPU. + // + // To turn off this behavior, call SetWillAutoApplyMasterBrightness() and pass a value + // of "false". This can be useful if the client wants to apply the master brightness + // itself, for example, if a client applies it on the GPU. + bool GetWillAutoApplyMasterBrightness() const; + void SetWillAutoApplyMasterBrightness(const bool willAutoApply); + + // By default, if the output framebuffer is in RGB666 format, then the framebuffers will + // automatically be converted to the much more common RGB888 format. This conversion is + // performed on the CPU. + // + // To turn off this behavior, call SetWillAutoConvertRGB666ToRGB888() and pass a value + // of "false". This can be useful if the client wants to do its own post-processing + // while the color format is still RGB666, or if the client wants to do its own custom + // conversion (such as converting the framebuffer later on the GPU). + bool GetWillAutoConvertRGB666ToRGB888() const; + void SetWillAutoConvertRGB666ToRGB888(const bool willAutoConvert); + // Normally, the GPUs will automatically resolve their native buffers to the master // custom framebuffer at the end of V-blank so that all rendered graphics are contained // within a single common buffer. This is necessary for when someone wants to read diff --git a/desmume/src/Makefile.am b/desmume/src/Makefile.am index 6bbce87be..bcf5d2996 100644 --- a/desmume/src/Makefile.am +++ b/desmume/src/Makefile.am @@ -88,15 +88,45 @@ libdesmume_a_SOURCES = \ utils/tinyxml/tinyxmlerror.cpp \ utils/tinyxml/tinyxmlparser.cpp \ utils/glcorearb.h \ - addons/slot2_auto.cpp addons/slot2_mpcf.cpp addons/slot2_paddle.cpp addons/slot2_gbagame.cpp addons/slot2_none.cpp addons/slot2_rumblepak.cpp addons/slot2_guitarGrip.cpp addons/slot2_expMemory.cpp addons/slot2_piano.cpp addons/slot2_passme.cpp addons/slot1_none.cpp addons/slot1_r4.cpp addons/slot1_retail_nand.cpp addons/slot1_retail_auto.cpp addons/slot1_retail_mcrom.cpp addons/slot1_retail_mcrom_debug.cpp addons/slot1comp_mc.cpp addons/slot1comp_mc.h addons/slot1comp_rom.h addons/slot1comp_rom.cpp addons/slot1comp_protocol.h addons/slot1comp_protocol.cpp \ + addons/slot2_auto.cpp \ + addons/slot2_mpcf.cpp \ + addons/slot2_paddle.cpp \ + addons/slot2_gbagame.cpp \ + addons/slot2_none.cpp \ + addons/slot2_rumblepak.cpp \ + addons/slot2_guitarGrip.cpp \ + addons/slot2_expMemory.cpp \ + addons/slot2_piano.cpp \ + addons/slot2_passme.cpp \ + addons/slot1_none.cpp \ + addons/slot1_r4.cpp \ + addons/slot1_retail_nand.cpp \ + addons/slot1_retail_auto.cpp \ + addons/slot1_retail_mcrom.cpp \ + addons/slot1_retail_mcrom_debug.cpp \ + addons/slot1comp_mc.cpp \ + addons/slot1comp_mc.h \ + addons/slot1comp_rom.h \ + addons/slot1comp_rom.cpp \ + addons/slot1comp_protocol.h \ + addons/slot1comp_protocol.cpp \ cheatSystem.cpp cheatSystem.h \ texcache.cpp texcache.h rasterize.cpp rasterize.h \ metaspu/metaspu.cpp metaspu/metaspu.h \ - filter/2xsai.cpp filter/bilinear.cpp filter/epx.cpp filter/filter.h \ - filter/hq2x.cpp filter/hq2x.h \ - filter/hq3x.cpp filter/hq3x.dat \ - filter/hq4x.cpp filter/hq4x.dat \ - filter/interp.h filter/lq2x.cpp filter/lq2x.h filter/scanline.cpp \ + filter/2xsai.cpp \ + filter/bilinear.cpp \ + filter/deposterize.cpp \ + filter/epx.cpp \ + filter/filter.h \ + filter/hq2x.cpp \ + filter/hq2x.h \ + filter/hq3x.cpp \ + filter/hq3x.dat \ + filter/hq4x.cpp \ + filter/hq4x.dat \ + filter/interp.h \ + filter/lq2x.cpp filter/lq2x.h \ + filter/scanline.cpp \ filter/videofilter.cpp filter/videofilter.h \ filter/xbrz.cpp filter/xbrz.h \ version.cpp version.h \ @@ -110,18 +140,18 @@ libdesmume_a_SOURCES = \ libretro-common/rthreads/async_job.c \ libretro-common/rthreads/rsemaphore.c \ libretro-common/rthreads/rthreads.c - -if SUPPORT_SSE2 += \ + +if SUPPORT_SSE2 libdesmume_a_SOURCES += \ utils/colorspacehandler/colorspacehandler_SSE2.cpp endif -if SUPPORT_AVX2 += \ +if SUPPORT_AVX2 libdesmume_a_SOURCES += \ utils/colorspacehandler/colorspacehandler_AVX2.cpp endif -if SUPPORT_ALTIVEC += \ +if SUPPORT_ALTIVEC libdesmume_a_SOURCES += \ utils/colorspacehandler/colorspacehandler_AltiVec.cpp endif diff --git a/desmume/src/NDSSystem.cpp b/desmume/src/NDSSystem.cpp index 63e68a262..318481d8d 100644 --- a/desmume/src/NDSSystem.cpp +++ b/desmume/src/NDSSystem.cpp @@ -1391,8 +1391,8 @@ static void execHardware_hstart_vblankStart() triggerDma(EDMAMode_VBlank); //tracking for arm9 load average - nds.runCycleCollector[0][nds.idleFrameCounter] = 1120380-nds.idleCycles[0]; - nds.runCycleCollector[1][nds.idleFrameCounter] = 1120380-nds.idleCycles[1]; + nds.runCycleCollector[ARMCPU_ARM9][nds.idleFrameCounter] = 1120380-nds.idleCycles[0]; + nds.runCycleCollector[ARMCPU_ARM7][nds.idleFrameCounter] = 1120380-nds.idleCycles[1]; nds.idleFrameCounter++; nds.idleFrameCounter &= 15; nds.idleCycles[0] = 0; @@ -2953,9 +2953,12 @@ void NDS_swapScreen() } -void emu_halt() { +void emu_halt() +{ //printf("halting emu: ARM9 PC=%08X/%08X, ARM7 PC=%08X/%08X\n", NDS_ARM9.R[15], NDS_ARM9.instruct_adr, NDS_ARM7.R[15], NDS_ARM7.instruct_adr); execute = false; + GPU->ForceFrameStop(); + #ifdef LOG_ARM9 if (fp_dis9) { diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index c2f64fedb..bf703e5da 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -636,10 +636,42 @@ static void OGLGetDriverVersion(const char *oglVersionString, } } -void texDeleteCallback(TexCacheItem *texItem, void *param1, void *param2) +OpenGLTexture::OpenGLTexture() { - OpenGLRenderer *oglRenderer = (OpenGLRenderer *)param1; - oglRenderer->DeleteTexture(texItem); + _cacheSize = GetUnpackSizeUsingFormat(TexFormat_32bpp); + _invSizeS = 0.0f; + _invSizeT = 0.0f; + + glGenTextures(1, &_texID); +} + +OpenGLTexture::OpenGLTexture(u32 texAttributes, u32 palAttributes) : TextureStore(texAttributes, palAttributes) +{ + _cacheSize = GetUnpackSizeUsingFormat(TexFormat_32bpp); + _invSizeS = 1.0f / (float)_sizeS; + _invSizeT = 1.0f / (float)_sizeT; + + glGenTextures(1, &_texID); +} + +OpenGLTexture::~OpenGLTexture() +{ + glDeleteTextures(1, &this->_texID); +} + +GLuint OpenGLTexture::GetID() const +{ + return this->_texID; +} + +GLfloat OpenGLTexture::GetInvWidth() const +{ + return this->_invSizeS; +} + +GLfloat OpenGLTexture::GetInvHeight() const +{ + return this->_invSizeT; } template @@ -871,8 +903,8 @@ OpenGLRenderer::OpenGLRenderer() ref->fboPostprocessID = 0; ref->selectedRenderingFBO = 0; - currTexture = NULL; _mappedFramebuffer = NULL; + _workingTextureUnpackBuffer = (FragmentColor *)malloc_alignedCacheLine(1024 * 1024 * sizeof(FragmentColor)); _pixelReadNeedsFinish = false; _currentPolyIndex = 0; _shadowPolyID.reserve(POLYLIST_SIZE); @@ -881,6 +913,7 @@ OpenGLRenderer::OpenGLRenderer() OpenGLRenderer::~OpenGLRenderer() { free_aligned(_framebufferColor); + free_aligned(_workingTextureUnpackBuffer); // Destroy OpenGL rendering states delete ref; @@ -1196,14 +1229,7 @@ OpenGLRenderer_1_2::~OpenGLRenderer_1_2() DestroyMultisampledFBO(); // Kill the texture cache now before all of our texture IDs disappear. - TexCache_Reset(); - - while(!ref->freeTextureIDs.empty()) - { - GLuint temp = ref->freeTextureIDs.front(); - ref->freeTextureIDs.pop(); - glDeleteTextures(1, &temp); - } + texCache.Reset(); glFinish(); } @@ -1351,7 +1377,6 @@ Render3DError OpenGLRenderer_1_2::InitExtensions() INFO("OpenGL: Multisampled FBOs are unsupported. Multisample antialiasing will be disabled.\n"); } - this->InitTextures(); this->InitFinalRenderStates(&oglExtensionSet); // This must be done last return OGLERROR_NOERR; @@ -2062,13 +2087,6 @@ Render3DError OpenGLRenderer_1_2::InitFinalRenderStates(const std::setExpandFreeTextures(); - - return OGLERROR_NOERR; -} - Render3DError OpenGLRenderer_1_2::InitTables() { static bool needTableInit = true; @@ -2227,20 +2245,6 @@ void OpenGLRenderer_1_2::GetExtensionSet(std::set *oglExtensionSet) } } -Render3DError OpenGLRenderer_1_2::ExpandFreeTextures() -{ - static const GLsizei kInitTextures = 128; - GLuint oglTempTextureID[kInitTextures]; - glGenTextures(kInitTextures, oglTempTextureID); - - for(GLsizei i = 0; i < kInitTextures; i++) - { - this->ref->freeTextureIDs.push(oglTempTextureID[i]); - } - - return OGLERROR_NOERR; -} - Render3DError OpenGLRenderer_1_2::EnableVertexAttributes() { OGLRenderRef &OGLRef = *this->ref; @@ -2415,17 +2419,6 @@ Render3DError OpenGLRenderer_1_2::ReadBackPixels() return OGLERROR_NOERR; } -Render3DError OpenGLRenderer_1_2::DeleteTexture(const TexCacheItem *item) -{ - this->ref->freeTextureIDs.push((GLuint)item->texid); - if(this->currTexture == item) - { - this->currTexture = NULL; - } - - return OGLERROR_NOERR; -} - Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine) { OGLRenderRef &OGLRef = *this->ref; @@ -2694,7 +2687,7 @@ Render3DError OpenGLRenderer_1_2::RenderGeometry(const GFX3D_State &renderState, Render3DError OpenGLRenderer_1_2::EndRender(const u64 frameCount) { //needs to happen before endgl because it could free some textureids for expired cache items - TexCache_EvictFrame(); + texCache.Evict(); this->ReadBackPixels(); @@ -2955,122 +2948,142 @@ Render3DError OpenGLRenderer_1_2::SetupTexture(const POLY &thePoly, bool enableT return OGLERROR_NOERR; } + + OpenGLTexture *theTexture = (OpenGLTexture *)texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + const bool isNewTexture = (theTexture == NULL); + + if (isNewTexture) + { + theTexture = new OpenGLTexture(thePoly.texParam, thePoly.texPalette); + texCache.Add(theTexture); + } + + const NDSTextureFormat packFormat = theTexture->GetPackFormat(); // Enable textures if they weren't already enabled if (this->isShaderSupported) { glUniform1i(OGLRef.uniformPolyEnableTexture, GL_TRUE); - glUniform1i(OGLRef.uniformTexSingleBitAlpha, (params.texFormat != TEXMODE_A3I5 && params.texFormat != TEXMODE_A5I3) ? GL_TRUE : GL_FALSE); + glUniform1i(OGLRef.uniformTexSingleBitAlpha, (packFormat != TEXMODE_A3I5 && packFormat != TEXMODE_A5I3) ? GL_TRUE : GL_FALSE); + glUniform2f(OGLRef.uniformPolyTexScale, theTexture->GetInvWidth(), theTexture->GetInvHeight()); } else { glEnable(GL_TEXTURE_2D); + glMatrixMode(GL_TEXTURE); + glLoadIdentity(); + glScalef(theTexture->GetInvWidth(), theTexture->GetInvHeight(), 1.0f); } - TexCacheItem *newTexture = TexCache_SetTexture(TexFormat_32bpp, thePoly.texParam, thePoly.texPalette); - if(newTexture != this->currTexture) + glBindTexture(GL_TEXTURE_2D, theTexture->GetID()); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? OGLRef.stateTexMirroredRepeat : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? OGLRef.stateTexMirroredRepeat : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + + if (theTexture->IsLoadNeeded()) { - this->currTexture = newTexture; - //has the ogl renderer initialized the texture? - if(this->currTexture->GetDeleteCallback() == NULL) + theTexture->Unpack((u32 *)this->_workingTextureUnpackBuffer); + + const u32 *textureSrc = (u32 *)this->_workingTextureUnpackBuffer; + size_t texWidth = theTexture->GetWidth(); + size_t texHeight = theTexture->GetHeight(); + + if (this->_textureDeposterizeDstSurface.Surface != NULL) { - this->currTexture->SetDeleteCallback(&texDeleteCallback, this, NULL); - - if(OGLRef.freeTextureIDs.empty()) - { - this->ExpandFreeTextures(); - } - - this->currTexture->texid = (u64)OGLRef.freeTextureIDs.front(); - OGLRef.freeTextureIDs.pop(); - - glBindTexture(GL_TEXTURE_2D, (GLuint)this->currTexture->texid); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? OGLRef.stateTexMirroredRepeat : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? OGLRef.stateTexMirroredRepeat : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - - const NDSTextureFormat texFormat = this->currTexture->GetTextureFormat(); - const u32 *textureSrc = (u32 *)this->currTexture->decoded; - size_t texWidth = this->currTexture->sizeX; - size_t texHeight = this->currTexture->sizeY; - - if (this->_textureDeposterizeBuffer != NULL) - { - this->TextureDeposterize(textureSrc, texWidth, texHeight); - textureSrc = this->_textureDeposterizeBuffer; - } - - switch (this->_textureScalingFactor) - { - case 1: - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; - - case 2: - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); - - this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); - - glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, this->currTexture->sizeX, this->currTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; - } - - case 4: - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 2); - - this->TextureUpscale<4>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); - - texWidth = this->currTexture->sizeX; - texHeight = this->currTexture->sizeY; - this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); - - glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, this->currTexture->sizeX, this->currTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; - } - - default: - break; - } - - if (this->_textureSmooth) - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (this->_textureScalingFactor > 1) ? GL_LINEAR_MIPMAP_LINEAR : GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, this->_deviceInfo.maxAnisotropy); - } - else - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.0f); - } - } - else - { - //otherwise, just bind it - glBindTexture(GL_TEXTURE_2D, (GLuint)this->currTexture->texid); + this->TextureDeposterize(textureSrc, texWidth, texHeight); + textureSrc = (u32 *)this->_textureDeposterizeDstSurface.Surface; } - if (this->isShaderSupported) + switch (this->_textureScalingFactor) { - glUniform2f(OGLRef.uniformPolyTexScale, this->currTexture->invSizeX, this->currTexture->invSizeY); + case 1: + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + + if (isNewTexture) + { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + break; + } + + case 2: + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); + + if (isNewTexture) + { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, theTexture->GetWidth(), theTexture->GetHeight(), 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, theTexture->GetWidth(), theTexture->GetHeight(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + break; + } + + case 4: + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 2); + + this->TextureUpscale<4>(packFormat, textureSrc, texWidth, texHeight); + + if (isNewTexture) + { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + + texWidth = theTexture->GetWidth(); + texHeight = theTexture->GetHeight(); + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); + glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + + glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, theTexture->GetWidth(), theTexture->GetHeight(), 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + + texWidth = theTexture->GetWidth(); + texHeight = theTexture->GetHeight(); + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); + glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + + glTexSubImage2D(GL_TEXTURE_2D, 2, 0, 0, theTexture->GetWidth(), theTexture->GetHeight(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + break; + } + + default: + break; + } + + if (this->_textureSmooth) + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (this->_textureScalingFactor > 1) ? GL_LINEAR_MIPMAP_LINEAR : GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, this->_deviceInfo.maxAnisotropy); } else { - glMatrixMode(GL_TEXTURE); - glLoadIdentity(); - glScalef(this->currTexture->invSizeX, this->currTexture->invSizeY, 1.0f); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.0f); } } + theTexture->ResetCacheAge(); + theTexture->IncreaseCacheUsageCount(1); + return OGLERROR_NOERR; } @@ -3121,7 +3134,6 @@ Render3DError OpenGLRenderer_1_2::Reset() memset(OGLRef.vertIndexBuffer, 0, OGLRENDER_VERT_INDEX_BUFFER_COUNT * sizeof(GLushort)); } - this->currTexture = NULL; this->_currentPolyIndex = 0; OGLRef.vtxPtrPosition = (GLvoid *)offsetof(VERT, coord); @@ -3133,7 +3145,7 @@ Render3DError OpenGLRenderer_1_2::Reset() memset(this->clearImagePolyIDBuffer, 0, sizeof(this->clearImagePolyIDBuffer)); memset(this->clearImageFogBuffer, 0, sizeof(this->clearImageFogBuffer)); - TexCache_Reset(); + texCache.Reset(); return OGLERROR_NOERR; } @@ -3838,7 +3850,6 @@ Render3DError OpenGLRenderer_2_0::InitExtensions() INFO("OpenGL: Multisampled FBOs are unsupported. Multisample antialiasing will be disabled.\n"); } - this->InitTextures(); this->InitFinalRenderStates(&oglExtensionSet); // This must be done last return OGLERROR_NOERR; @@ -4615,104 +4626,130 @@ Render3DError OpenGLRenderer_2_0::SetupTexture(const POLY &thePoly, bool enableT return OGLERROR_NOERR; } - glUniform1i(OGLRef.uniformPolyEnableTexture, GL_TRUE); - glUniform1i(OGLRef.uniformTexSingleBitAlpha, (params.texFormat != TEXMODE_A3I5 && params.texFormat != TEXMODE_A5I3) ? GL_TRUE : GL_FALSE); + OpenGLTexture *theTexture = (OpenGLTexture *)texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + const bool isNewTexture = (theTexture == NULL); - TexCacheItem *newTexture = TexCache_SetTexture(TexFormat_32bpp, thePoly.texParam, thePoly.texPalette); - if(newTexture != this->currTexture) + if (isNewTexture) { - this->currTexture = newTexture; - //has the ogl renderer initialized the texture? - if(this->currTexture->GetDeleteCallback() == NULL) + theTexture = new OpenGLTexture(thePoly.texParam, thePoly.texPalette); + texCache.Add(theTexture); + } + + const NDSTextureFormat packFormat = theTexture->GetPackFormat(); + + glUniform1i(OGLRef.uniformPolyEnableTexture, GL_TRUE); + glUniform1i(OGLRef.uniformTexSingleBitAlpha, (packFormat != TEXMODE_A3I5 && packFormat != TEXMODE_A5I3) ? GL_TRUE : GL_FALSE); + glUniform2f(OGLRef.uniformPolyTexScale, theTexture->GetInvWidth(), theTexture->GetInvHeight()); + + glBindTexture(GL_TEXTURE_2D, theTexture->GetID()); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + + if (theTexture->IsLoadNeeded()) + { + theTexture->Unpack((u32 *)this->_workingTextureUnpackBuffer); + + const u32 *textureSrc = (u32 *)this->_workingTextureUnpackBuffer; + size_t texWidth = theTexture->GetWidth(); + size_t texHeight = theTexture->GetHeight(); + + if (this->_textureDeposterizeDstSurface.Surface != NULL) { - this->currTexture->SetDeleteCallback(&texDeleteCallback, this, NULL); - - if(OGLRef.freeTextureIDs.empty()) + this->TextureDeposterize(textureSrc, texWidth, texHeight); + textureSrc = (u32 *)this->_textureDeposterizeDstSurface.Surface; + } + + switch (this->_textureScalingFactor) + { + case 1: { - this->ExpandFreeTextures(); - } - - this->currTexture->texid = (u64)OGLRef.freeTextureIDs.front(); - OGLRef.freeTextureIDs.pop(); - - glBindTexture(GL_TEXTURE_2D, (GLuint)this->currTexture->texid); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - - const NDSTextureFormat texFormat = this->currTexture->GetTextureFormat(); - const u32 *textureSrc = (u32 *)this->currTexture->decoded; - size_t texWidth = this->currTexture->sizeX; - size_t texHeight = this->currTexture->sizeY; - - if (this->_textureDeposterizeBuffer != NULL) - { - this->TextureDeposterize(textureSrc, texWidth, texHeight); - textureSrc = this->_textureDeposterizeBuffer; - } - - switch (this->_textureScalingFactor) - { - case 1: - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; - - case 2: + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + + if (isNewTexture) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); - - this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); - - glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, this->currTexture->sizeX, this->currTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } - - case 4: + else { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 2); - - this->TextureUpscale<4>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); - - texWidth = this->currTexture->sizeX; - texHeight = this->currTexture->sizeY; - this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); - - glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, this->currTexture->sizeX, this->currTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } + break; + } + + case 2: + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); + + if (isNewTexture) + { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, theTexture->GetWidth(), theTexture->GetHeight(), 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, theTexture->GetWidth(), theTexture->GetHeight(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + break; + } + + case 4: + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 2); + + this->TextureUpscale<4>(packFormat, textureSrc, texWidth, texHeight); + + if (isNewTexture) + { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - default: - break; - } - - if (this->_textureSmooth) - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (this->_textureScalingFactor > 1) ? GL_LINEAR_MIPMAP_LINEAR : GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, this->_deviceInfo.maxAnisotropy); - } - else - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.0f); + texWidth = theTexture->GetWidth(); + texHeight = theTexture->GetHeight(); + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); + glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + + glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, theTexture->GetWidth(), theTexture->GetHeight(), 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + + texWidth = theTexture->GetWidth(); + texHeight = theTexture->GetHeight(); + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); + glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + + glTexSubImage2D(GL_TEXTURE_2D, 2, 0, 0, theTexture->GetWidth(), theTexture->GetHeight(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + break; } + + default: + break; + } + + if (this->_textureSmooth) + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (this->_textureScalingFactor > 1) ? GL_LINEAR_MIPMAP_LINEAR : GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, this->_deviceInfo.maxAnisotropy); } else { - //otherwise, just bind it - glBindTexture(GL_TEXTURE_2D, (GLuint)this->currTexture->texid); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.0f); } - - glUniform2f(OGLRef.uniformPolyTexScale, this->currTexture->invSizeX, this->currTexture->invSizeY); } + theTexture->ResetCacheAge(); + theTexture->IncreaseCacheUsageCount(1); + return OGLERROR_NOERR; } diff --git a/desmume/src/OGLRender.h b/desmume/src/OGLRender.h index a5cbf305d..976a811d0 100644 --- a/desmume/src/OGLRender.h +++ b/desmume/src/OGLRender.h @@ -24,6 +24,7 @@ #include #include #include "render3D.h" +#include "texcache.h" #include "types.h" #ifndef OGLRENDER_3_2_H @@ -491,9 +492,6 @@ struct OGLRenderRef GLuint vaoGeometryStatesID; GLuint vaoPostprocessStatesID; - // Textures - std::queue freeTextureIDs; - // Client-side Buffers GLfloat *color4fBuffer; GLushort *vertIndexBuffer; @@ -526,8 +524,6 @@ extern CACHE_ALIGN const GLfloat divide6bitBy63_LUT[64]; extern const GLfloat PostprocessVtxBuffer[16]; extern const GLubyte PostprocessElementBuffer[6]; -extern void texDeleteCallback(TexCacheItem *texItem, void *param1, void *param2); - //This is called by OGLRender whenever it initializes. //Platforms, please be sure to set this up. //return true if you successfully init. @@ -559,6 +555,23 @@ extern void (*OGLCreateRenderer_3_2_Func)(OpenGLRenderer **rendererPtr); bool IsVersionSupported(unsigned int checkVersionMajor, unsigned int checkVersionMinor, unsigned int checkVersionRevision); +class OpenGLTexture : public TextureStore +{ +private: + GLuint _texID; + GLfloat _invSizeS; + GLfloat _invSizeT; + +public: + OpenGLTexture(); + OpenGLTexture(u32 texAttributes, u32 palAttributes); + virtual ~OpenGLTexture(); + + GLuint GetID() const; + GLfloat GetInvWidth() const; + GLfloat GetInvHeight() const; +}; + #if defined(ENABLE_SSE2) class OpenGLRenderer : public Render3D_SSE2 #else @@ -587,11 +600,9 @@ protected: bool isVAOSupported; bool willFlipFramebufferOnGPU; bool willConvertFramebufferOnGPU; - - // Textures - TexCacheItem *currTexture; - + FragmentColor *_mappedFramebuffer; + FragmentColor *_workingTextureUnpackBuffer; bool _pixelReadNeedsFinish; size_t _currentPolyIndex; std::vector _shadowPolyID; @@ -611,7 +622,6 @@ protected: virtual void DestroyGeometryProgram() = 0; virtual Render3DError CreateVAOs() = 0; virtual void DestroyVAOs() = 0; - virtual Render3DError InitTextures() = 0; virtual Render3DError InitFinalRenderStates(const std::set *oglExtensionSet) = 0; virtual Render3DError InitTables() = 0; virtual Render3DError InitPostprocessingPrograms(const std::string &edgeMarkVtxShader, @@ -637,7 +647,6 @@ protected: virtual Render3DError UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer) = 0; virtual void GetExtensionSet(std::set *oglExtensionSet) = 0; - virtual Render3DError ExpandFreeTextures() = 0; virtual Render3DError EnableVertexAttributes() = 0; virtual Render3DError DisableVertexAttributes() = 0; virtual Render3DError DownsampleFBO() = 0; @@ -650,7 +659,6 @@ public: virtual ~OpenGLRenderer(); virtual Render3DError InitExtensions() = 0; - virtual Render3DError DeleteTexture(const TexCacheItem *item) = 0; bool IsExtensionPresent(const std::set *oglExtensionSet, const std::string extensionName) const; bool ValidateShaderCompile(GLuint theShader) const; @@ -675,7 +683,6 @@ protected: virtual void DestroyMultisampledFBO(); virtual Render3DError CreateVAOs(); virtual void DestroyVAOs(); - virtual Render3DError InitTextures(); virtual Render3DError InitFinalRenderStates(const std::set *oglExtensionSet); virtual Render3DError InitTables(); @@ -704,7 +711,6 @@ protected: virtual Render3DError UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer); virtual void GetExtensionSet(std::set *oglExtensionSet); - virtual Render3DError ExpandFreeTextures(); virtual Render3DError EnableVertexAttributes(); virtual Render3DError DisableVertexAttributes(); virtual Render3DError DownsampleFBO(); @@ -731,8 +737,6 @@ public: virtual Render3DError Reset(); virtual Render3DError RenderFinish(); virtual Render3DError SetFramebufferSize(size_t w, size_t h); - - virtual Render3DError DeleteTexture(const TexCacheItem *item); }; class OpenGLRenderer_1_3 : public OpenGLRenderer_1_2 diff --git a/desmume/src/OGLRender_3_2.cpp b/desmume/src/OGLRender_3_2.cpp index 7c48721b9..5626b677a 100644 --- a/desmume/src/OGLRender_3_2.cpp +++ b/desmume/src/OGLRender_3_2.cpp @@ -609,7 +609,6 @@ Render3DError OpenGLRenderer_3_2::InitExtensions() } } - this->InitTextures(); this->InitFinalRenderStates(&oglExtensionSet); // This must be done last return OGLERROR_NOERR; @@ -1686,7 +1685,6 @@ Render3DError OpenGLRenderer_3_2::SetupPolygon(const POLY &thePoly) Render3DError OpenGLRenderer_3_2::SetupTexture(const POLY &thePoly, bool enableTexturing) { - OGLRenderRef &OGLRef = *this->ref; const PolygonTexParams params = thePoly.getTexParams(); // Check if we need to use textures @@ -1695,99 +1693,125 @@ Render3DError OpenGLRenderer_3_2::SetupTexture(const POLY &thePoly, bool enableT return OGLERROR_NOERR; } - TexCacheItem *newTexture = TexCache_SetTexture(TexFormat_32bpp, thePoly.texParam, thePoly.texPalette); - if(newTexture != this->currTexture) + OpenGLTexture *theTexture = (OpenGLTexture *)texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + const bool isNewTexture = (theTexture == NULL); + + if (isNewTexture) { - this->currTexture = newTexture; - //has the ogl renderer initialized the texture? - if(this->currTexture->GetDeleteCallback() == NULL) + theTexture = new OpenGLTexture(thePoly.texParam, thePoly.texPalette); + texCache.Add(theTexture); + } + + glBindTexture(GL_TEXTURE_2D, theTexture->GetID()); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + + if (theTexture->IsLoadNeeded()) + { + theTexture->Unpack((u32 *)this->_workingTextureUnpackBuffer); + + const u32 *textureSrc = (u32 *)this->_workingTextureUnpackBuffer; + const NDSTextureFormat packFormat = theTexture->GetPackFormat(); + size_t texWidth = theTexture->GetWidth(); + size_t texHeight = theTexture->GetHeight(); + + if (this->_textureDeposterizeDstSurface.Surface != NULL) { - this->currTexture->SetDeleteCallback(&texDeleteCallback, this, NULL); - - if(OGLRef.freeTextureIDs.empty()) + this->TextureDeposterize(textureSrc, texWidth, texHeight); + textureSrc = (u32 *)this->_textureDeposterizeDstSurface.Surface; + } + + switch (this->_textureScalingFactor) + { + case 1: { - this->ExpandFreeTextures(); - } - - this->currTexture->texid = (u64)OGLRef.freeTextureIDs.front(); - OGLRef.freeTextureIDs.pop(); - - glBindTexture(GL_TEXTURE_2D, (GLuint)this->currTexture->texid); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - - const NDSTextureFormat texFormat = this->currTexture->GetTextureFormat(); - const u32 *textureSrc = (u32 *)this->currTexture->decoded; - size_t texWidth = this->currTexture->sizeX; - size_t texHeight = this->currTexture->sizeY; - - if (this->_textureDeposterizeBuffer != NULL) - { - this->TextureDeposterize(textureSrc, texWidth, texHeight); - textureSrc = this->_textureDeposterizeBuffer; - } - - switch (this->_textureScalingFactor) - { - case 1: - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; - - case 2: + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + + if (isNewTexture) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); - - this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); - - glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, this->currTexture->sizeX, this->currTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } - - case 4: + else { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 2); - - this->TextureUpscale<4>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); - - texWidth = this->currTexture->sizeX; - texHeight = this->currTexture->sizeY; - this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); - - glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, this->currTexture->sizeX, this->currTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } + break; + } + + case 2: + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); + + if (isNewTexture) + { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, theTexture->GetWidth(), theTexture->GetHeight(), 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, theTexture->GetWidth(), theTexture->GetHeight(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + break; + } + + case 4: + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 2); + + this->TextureUpscale<4>(packFormat, textureSrc, texWidth, texHeight); + + if (isNewTexture) + { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - default: - break; - } - - if (this->_textureSmooth) - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (this->_textureScalingFactor > 1) ? GL_LINEAR_MIPMAP_LINEAR : GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, this->_deviceInfo.maxAnisotropy); - } - else - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.0f); + texWidth = theTexture->GetWidth(); + texHeight = theTexture->GetHeight(); + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); + glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + + glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, theTexture->GetWidth(), theTexture->GetHeight(), 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + + texWidth = theTexture->GetWidth(); + texHeight = theTexture->GetHeight(); + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); + glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + + glTexSubImage2D(GL_TEXTURE_2D, 2, 0, 0, theTexture->GetWidth(), theTexture->GetHeight(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + break; } + + default: + break; + } + + if (this->_textureSmooth) + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (this->_textureScalingFactor > 1) ? GL_LINEAR_MIPMAP_LINEAR : GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, this->_deviceInfo.maxAnisotropy); } else { - //otherwise, just bind it - glBindTexture(GL_TEXTURE_2D, (GLuint)this->currTexture->texid); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.0f); } } + theTexture->ResetCacheAge(); + theTexture->IncreaseCacheUsageCount(1); + return OGLERROR_NOERR; } diff --git a/desmume/src/SPU.cpp b/desmume/src/SPU.cpp index a938df977..3b11b7e21 100644 --- a/desmume/src/SPU.cpp +++ b/desmume/src/SPU.cpp @@ -796,8 +796,11 @@ void SPU_struct::WriteByte(u32 addr, u8 val) regs.ctl_ch1bypass = (val >> 4) & 1; regs.ctl_ch3bypass = (val >> 5) & 1; regs.masteren = (val >> 7) & 1; - for(u8 i=0; i<16; i++) - KeyProbe(i); + //from r4925 - after changing 'masteren', we retrigger any sounds? doubtful. + //maybe we STOP sounds here, but we don't enable them (this would retrigger any previous sounds that had finished; glitched AC:WW) + //(probably broken in r3299) + //after commenting this out, I checked bug #1356. seems unrelated. + //for(int i=0; i<16; i++) KeyProbe(i); break; //SOUNDBIAS diff --git a/desmume/src/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj b/desmume/src/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj index d64c3c5f6..d4582bba9 100644 --- a/desmume/src/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj +++ b/desmume/src/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj @@ -112,6 +112,10 @@ AB2EE13117D57F5000F68622 /* fsnitro.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB2EE13017D57F5000F68622 /* fsnitro.cpp */; }; AB2EE13217D57F5000F68622 /* fsnitro.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB2EE13017D57F5000F68622 /* fsnitro.cpp */; }; AB2EE13317D57F5000F68622 /* fsnitro.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB2EE13017D57F5000F68622 /* fsnitro.cpp */; }; + AB301BDF1D9C8BAC00246A93 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB301BDE1D9C8BAC00246A93 /* deposterize.cpp */; }; + AB301BE01D9C8BCD00246A93 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB301BDE1D9C8BAC00246A93 /* deposterize.cpp */; }; + AB301BE11D9C8BCE00246A93 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB301BDE1D9C8BAC00246A93 /* deposterize.cpp */; }; + AB301BE21D9C8BCF00246A93 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB301BDE1D9C8BAC00246A93 /* deposterize.cpp */; }; AB350BA51478AC96007165AC /* IOKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB350BA41478AC96007165AC /* IOKit.framework */; }; AB350D3B147A1D93007165AC /* HID_usage_strings.plist in Resources */ = {isa = PBXBuildFile; fileRef = AB350D3A147A1D93007165AC /* HID_usage_strings.plist */; }; AB3701E5173A3FBF006E573E /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB74EC891738499C0026C41E /* Carbon.framework */; }; @@ -1323,6 +1327,7 @@ AB2EE12B17D57ED500F68622 /* slot1_retail_mcrom_debug.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = slot1_retail_mcrom_debug.cpp; sourceTree = ""; }; AB2EE12F17D57F5000F68622 /* fsnitro.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fsnitro.h; sourceTree = ""; }; AB2EE13017D57F5000F68622 /* fsnitro.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fsnitro.cpp; sourceTree = ""; }; + AB301BDE1D9C8BAC00246A93 /* deposterize.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deposterize.cpp; sourceTree = ""; }; AB350BA41478AC96007165AC /* IOKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = IOKit.framework; path = System/Library/Frameworks/IOKit.framework; sourceTree = SDKROOT; }; AB350D38147A1D8D007165AC /* English */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = English; path = translations/English.lproj/HID_usage_strings.plist; sourceTree = ""; }; AB3A655C16CC5416001F5D4A /* EmuControllerDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = EmuControllerDelegate.h; sourceTree = ""; }; @@ -2924,6 +2929,7 @@ children = ( ABFE14FA14C92FF5005D6699 /* 2xsai.cpp */, ABFE14FB14C92FF5005D6699 /* bilinear.cpp */, + AB301BDE1D9C8BAC00246A93 /* deposterize.cpp */, ABFE14FC14C92FF5005D6699 /* epx.cpp */, ABFE14FE14C92FF5005D6699 /* hq2x.cpp */, AB4C81E31B21676C00ACECD5 /* hq3x.cpp */, @@ -3845,6 +3851,7 @@ ABD1041C1346652500AF11D1 /* cocoa_input.mm in Sources */, AB3E34C9134AF4500056477A /* cocoa_output.mm in Sources */, ABFEA8CB1BB4EC1100B08C25 /* smooth.c in Sources */, + AB301BE11D9C8BCE00246A93 /* deposterize.cpp in Sources */, ABD1041E1346652500AF11D1 /* cocoa_rom.mm in Sources */, AB80E04D142BC4A800A52038 /* cocoa_util.mm in Sources */, ABE5DFE5143FB1DA00835AD8 /* cocoa_videofilter.mm in Sources */, @@ -3949,6 +3956,7 @@ AB796CF815CDCBA200C59155 /* cp15.cpp in Sources */, AB796CF915CDCBA200C59155 /* cpu_detect_x86_gcc.cpp in Sources */, AB796CFA15CDCBA200C59155 /* crc.cpp in Sources */, + AB301BDF1D9C8BAC00246A93 /* deposterize.cpp in Sources */, AB796CFB15CDCBA200C59155 /* datetime.cpp in Sources */, AB796CFC15CDCBA200C59155 /* debug.cpp in Sources */, ABFEA82E1BB4EC1100B08C25 /* ftlcdfil.c in Sources */, @@ -4155,6 +4163,7 @@ ABFEA8361BB4EC1100B08C25 /* ftmm.c in Sources */, ABFEA81E1BB4EC1000B08C25 /* ftfstype.c in Sources */, ABA731601BB51E7000B26147 /* pshinter.c in Sources */, + AB301BE01D9C8BCD00246A93 /* deposterize.cpp in Sources */, ABFEA8211BB4EC1000B08C25 /* ftgasp.c in Sources */, ABFEA83C1BB4EC1100B08C25 /* ftotval.c in Sources */, ABFEA8181BB4EC1000B08C25 /* ftdebug.c in Sources */, @@ -4446,6 +4455,7 @@ ABB3C6B81501C04F00E0C22E /* common.cpp in Sources */, ABB3C6B91501C04F00E0C22E /* cp15.cpp in Sources */, AB407F371A6206FB00313213 /* xbrz.cpp in Sources */, + AB301BE21D9C8BCF00246A93 /* deposterize.cpp in Sources */, ABB3C6BA1501C04F00E0C22E /* debug.cpp in Sources */, ABB3C6BB1501C04F00E0C22E /* Disassembler.cpp in Sources */, ABB3C6BC1501C04F00E0C22E /* driver.cpp in Sources */, @@ -4575,13 +4585,13 @@ buildSettings = { GCC_OPTIMIZATION_LEVEL = fast; GCC_UNROLL_LOOPS = YES; + LLVM_LTO = YES_THIN; }; name = Debug; }; AB796D6F15CDCBA200C59155 /* Release */ = { isa = XCBuildConfiguration; buildSettings = { - GCC_OPTIMIZATION_LEVEL = fast; GCC_UNROLL_LOOPS = YES; LLVM_LTO = YES; }; diff --git a/desmume/src/cocoa/DeSmuME (XCode 3).xcodeproj/project.pbxproj b/desmume/src/cocoa/DeSmuME (XCode 3).xcodeproj/project.pbxproj index 5ebd32aa8..e44ca960f 100644 --- a/desmume/src/cocoa/DeSmuME (XCode 3).xcodeproj/project.pbxproj +++ b/desmume/src/cocoa/DeSmuME (XCode 3).xcodeproj/project.pbxproj @@ -1418,6 +1418,11 @@ ABB9212317CEB4110049D4C5 /* slot1comp_protocol.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABB9212017CEB4110049D4C5 /* slot1comp_protocol.cpp */; }; ABB9212417CEB4110049D4C5 /* slot1comp_protocol.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABB9212017CEB4110049D4C5 /* slot1comp_protocol.cpp */; }; ABB9212517CEB4110049D4C5 /* slot1comp_protocol.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABB9212017CEB4110049D4C5 /* slot1comp_protocol.cpp */; }; + ABBB4ACD1D9C927C00794E08 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBB4ACC1D9C927C00794E08 /* deposterize.cpp */; }; + ABBB4ACE1D9C927C00794E08 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBB4ACC1D9C927C00794E08 /* deposterize.cpp */; }; + ABBB4ACF1D9C927C00794E08 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBB4ACC1D9C927C00794E08 /* deposterize.cpp */; }; + ABBB4AD01D9C927C00794E08 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBB4ACC1D9C927C00794E08 /* deposterize.cpp */; }; + ABBB4AD11D9C927C00794E08 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBB4ACC1D9C927C00794E08 /* deposterize.cpp */; }; ABBCE29715ACB1FF00A2C965 /* arm_jit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBCE29515ACB1FF00A2C965 /* arm_jit.cpp */; }; ABBCE29815ACB1FF00A2C965 /* arm_jit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBCE29515ACB1FF00A2C965 /* arm_jit.cpp */; }; ABBF04A614B515F300E505A0 /* AppIcon_ROMCheats.icns in Resources */ = {isa = PBXBuildFile; fileRef = ABBF04A414B515F300E505A0 /* AppIcon_ROMCheats.icns */; }; @@ -1953,6 +1958,7 @@ ABB97873144E89CC00793FA3 /* Icon_ActionReplay_32x32.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_ActionReplay_32x32.png; path = Images/Icon_ActionReplay_32x32.png; sourceTree = ""; }; ABB97874144E89CC00793FA3 /* Icon_CodeBreaker_32x32.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_CodeBreaker_32x32.png; path = Images/Icon_CodeBreaker_32x32.png; sourceTree = ""; }; ABB97875144E89CC00793FA3 /* Icon_DeSmuME_32x32.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_DeSmuME_32x32.png; path = Images/Icon_DeSmuME_32x32.png; sourceTree = ""; }; + ABBB4ACC1D9C927C00794E08 /* deposterize.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deposterize.cpp; sourceTree = ""; }; ABBC0F8C1394B1AA0028B6BD /* DefaultUserPrefs.plist */ = {isa = PBXFileReference; lastKnownFileType = file.bplist; path = DefaultUserPrefs.plist; sourceTree = ""; }; ABBCE29415ACB1E600A2C965 /* arm_jit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = arm_jit.h; path = ../arm_jit.h; sourceTree = SOURCE_ROOT; }; ABBCE29515ACB1FF00A2C965 /* arm_jit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = arm_jit.cpp; path = ../arm_jit.cpp; sourceTree = SOURCE_ROOT; }; @@ -3451,6 +3457,7 @@ children = ( ABFE14FA14C92FF5005D6699 /* 2xsai.cpp */, ABFE14FB14C92FF5005D6699 /* bilinear.cpp */, + ABBB4ACC1D9C927C00794E08 /* deposterize.cpp */, ABFE14FC14C92FF5005D6699 /* epx.cpp */, ABFE14FE14C92FF5005D6699 /* hq2x.cpp */, ABAAEFFE1B22361800E1269D /* hq3x.cpp */, @@ -4538,6 +4545,7 @@ AB50200C1D09E712002FA150 /* retro_stat.c in Sources */, AB7BB17F1D62C8CC00A7A6E2 /* colorspacehandler.cpp in Sources */, AB7BB1801D62C8CF00A7A6E2 /* colorspacehandler_AltiVec.cpp in Sources */, + ABBB4AD11D9C927C00794E08 /* deposterize.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -4719,6 +4727,7 @@ AB5020181D09E712002FA150 /* retro_stat.c in Sources */, AB37E3801D6188BC004A2C0D /* colorspacehandler.cpp in Sources */, AB37E38A1D61895F004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */, + ABBB4AD01D9C927C00794E08 /* deposterize.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -4930,6 +4939,7 @@ AB50200F1D09E712002FA150 /* retro_stat.c in Sources */, AB37E3741D6188BC004A2C0D /* colorspacehandler.cpp in Sources */, AB37E3771D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */, + ABBB4ACD1D9C927C00794E08 /* deposterize.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -5141,6 +5151,7 @@ AB5020121D09E712002FA150 /* retro_stat.c in Sources */, AB37E3781D6188BC004A2C0D /* colorspacehandler.cpp in Sources */, AB37E37B1D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */, + ABBB4ACE1D9C927C00794E08 /* deposterize.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -5322,6 +5333,7 @@ AB5020151D09E712002FA150 /* retro_stat.c in Sources */, AB37E37C1D6188BC004A2C0D /* colorspacehandler.cpp in Sources */, AB37E37D1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */, + ABBB4ACF1D9C927C00794E08 /* deposterize.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/desmume/src/cocoa/cocoa_output.mm b/desmume/src/cocoa/cocoa_output.mm index 384c61c9c..328812c7a 100644 --- a/desmume/src/cocoa/cocoa_output.mm +++ b/desmume/src/cocoa/cocoa_output.mm @@ -869,7 +869,7 @@ [super handleEmuFrameProcessed]; NDSFrameInfo frameInfo; - frameInfo.render3DFPS = Render3DFramesPerSecond; + frameInfo.render3DFPS = GPU->GetFPSRender3D(); frameInfo.frameIndex = currFrameCounter; frameInfo.lagFrameCount = TotalLagFrames; diff --git a/desmume/src/commandline.cpp b/desmume/src/commandline.cpp index 049fa7887..8551db797 100644 --- a/desmume/src/commandline.cpp +++ b/desmume/src/commandline.cpp @@ -67,6 +67,7 @@ CommandLine::CommandLine() , start_paused(FALSE) , autodetect_method(-1) , render3d(COMMANDLINE_RENDER3D_DEFAULT) +, language(1) //english by default { #ifndef HOST_WINDOWS disable_sound = 0; @@ -117,6 +118,9 @@ ENDL " --bios-arm9 BIN_FILE Uses the ARM9 BIOS provided at the specified path" ENDL " --bios-arm7 BIN_FILE Uses the ARM7 BIOS provided at the specified path" ENDL " --bios-swi Uses SWI from the provided bios files (else HLE)" ENDL +" --lang N Firmware language (can affect game translations)" ENDL +" 0 = Japanese, 1 = English (default), 2 = French" ENDL +" 3 = German, 4 = Italian, 5 = Spanish" ENDL ENDL "Arguments affecting contents of SLOT-1:" ENDL " --slot1 [RETAIL|RETAILAUTO|R4|RETAILNAND|RETAILMCDROM|RETAILDEBUG]" ENDL @@ -163,6 +167,7 @@ ENDL #define OPT_CONSOLE_TYPE 200 #define OPT_ARM9 201 #define OPT_ARM7 202 +#define OPT_LANGUAGE 203 #define OPT_SLOT1 300 #define OPT_SLOT1_FAT_DIR 301 @@ -224,7 +229,8 @@ bool CommandLine::parse(int argc,char **argv) { "console-type", required_argument, NULL, OPT_CONSOLE_TYPE }, { "bios-arm9", required_argument, NULL, OPT_ARM9}, { "bios-arm7", required_argument, NULL, OPT_ARM7}, - { "bios-swi", required_argument, &_bios_swi, 1}, + { "bios-swi", no_argument, &_bios_swi, 1}, + { "lang", required_argument, NULL, OPT_LANGUAGE}, //slot-1 contents { "slot1", required_argument, NULL, OPT_SLOT1}, @@ -308,6 +314,7 @@ bool CommandLine::parse(int argc,char **argv) //utilities case OPT_ADVANSCENE: CommonSettings.run_advanscene_import = optarg; break; + case OPT_LANGUAGE: language = atoi(optarg); break; } } //arg parsing loop @@ -354,10 +361,10 @@ bool CommandLine::parse(int argc,char **argv) //process 3d renderer _render3d = strtoupper(_render3d); if(_render3d == "NONE") render3d = COMMANDLINE_RENDER3D_NONE; - if(_render3d == "SW") render3d = COMMANDLINE_RENDER3D_SW; - if(_render3d == "OLDGL") render3d = COMMANDLINE_RENDER3D_OLDGL; - if(_render3d == "AUTOGL") render3d = COMMANDLINE_RENDER3D_AUTOGL; - if(_render3d == "GL") render3d = COMMANDLINE_RENDER3D_GL; + else if(_render3d == "SW") render3d = COMMANDLINE_RENDER3D_SW; + else if(_render3d == "OLDGL") render3d = COMMANDLINE_RENDER3D_OLDGL; + else if(_render3d == "AUTOGL") render3d = COMMANDLINE_RENDER3D_AUTOGL; + else if(_render3d == "GL") render3d = COMMANDLINE_RENDER3D_GL; if (autodetect_method != -1) CommonSettings.autodetectBackupMethod = autodetect_method; diff --git a/desmume/src/commandline.h b/desmume/src/commandline.h index b77239400..3e095bc0e 100644 --- a/desmume/src/commandline.h +++ b/desmume/src/commandline.h @@ -47,6 +47,7 @@ public: int depth_threshold; int autodetect_method; int render3d; + int language; std::string nds_file; std::string play_movie_file; std::string record_movie_file; diff --git a/desmume/src/emufile.h b/desmume/src/emufile.h index 95bbe7aaf..133151524 100644 --- a/desmume/src/emufile.h +++ b/desmume/src/emufile.h @@ -280,6 +280,7 @@ private: { mPositionCacheEnabled = false; mCondition = eCondition_Clean; + mFilePosition = 0; fp = fopen(fname,mode); if(!fp) failbit = true; diff --git a/desmume/src/filter/deposterize.cpp b/desmume/src/filter/deposterize.cpp new file mode 100644 index 000000000..84ebebb34 --- /dev/null +++ b/desmume/src/filter/deposterize.cpp @@ -0,0 +1,188 @@ +/* + Copyright (C) 2016 DeSmuME team + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the this software. If not, see . + */ + +#include "../types.h" +#include "filter.h" + +#define DEPOSTERIZE_THRESHOLD 23 // Possible values are [0-255], where lower a value prevents blending and a higher value allows for more blending + + +static u32 Deposterize_InterpLTE(const u32 pixA, const u32 pixB) +{ + const u32 aB = (pixB & 0xFF000000) >> 24; + if (aB == 0) + { + return pixA; + } + + const u32 rA = (pixA & 0x000000FF); + const u32 gA = (pixA & 0x0000FF00) >> 8; + const u32 bA = (pixA & 0x00FF0000) >> 16; + const u32 aA = (pixA & 0xFF000000) >> 24; + + const u32 rB = (pixB & 0x000000FF); + const u32 gB = (pixB & 0x0000FF00) >> 8; + const u32 bB = (pixB & 0x00FF0000) >> 16; + + const u32 rC = ( (rB - rA <= DEPOSTERIZE_THRESHOLD) || (rA - rB <= DEPOSTERIZE_THRESHOLD) ) ? ( ((rA+rB)>>1) ) : rA; + const u32 gC = ( (gB - gA <= DEPOSTERIZE_THRESHOLD) || (gA - gB <= DEPOSTERIZE_THRESHOLD) ) ? ( ((gA+gB)>>1) ) : gA; + const u32 bC = ( (bB - bA <= DEPOSTERIZE_THRESHOLD) || (bA - bB <= DEPOSTERIZE_THRESHOLD) ) ? ( ((bA+bB)>>1) ) : bA; + const u32 aC = ( (bB - aA <= DEPOSTERIZE_THRESHOLD) || (aA - aB <= DEPOSTERIZE_THRESHOLD) ) ? ( ((aA+aB)>>1) ) : aA; + + return (rC | (gC << 8) | (bC << 16) | (aC << 24)); +} + +static u32 Deposterize_Blend(const u32 pixA, const u32 pixB, const u32 weightA, const u32 weightB) +{ + const u32 aB = (pixB & 0xFF000000) >> 24; + if (aB == 0) + { + return pixA; + } + + const u32 weightSum = weightA + weightB; + + const u32 rbA = pixA & 0x00FF00FF; + const u32 gA = pixA & 0x0000FF00; + const u32 aA = (pixA & 0xFF000000) >> 24; + + const u32 rbB = pixB & 0x00FF00FF; + const u32 gB = pixB & 0x0000FF00; + + const u32 rbC = ( ((rbA * weightA) + (rbB * weightB)) / weightSum ) & 0x00FF00FF; + const u32 gC = ( (( gA * weightA) + ( gB * weightB)) / weightSum ) & 0x0000FF00; + const u32 aC = ( (( aA * weightA) + ( aB * weightB)) / weightSum ) << 24; + + return (rbC | gC | aC); +} + +void RenderDeposterize(SSurface Src, SSurface Dst) +{ + //---------------------------------------\n\ + // Input Pixel Mapping: 06|07|08 + // 05|00|01 + // 04|03|02 + // + // Output Pixel Mapping: 00 + + const int w = Src.Width; + const int h = Src.Height; + + u32 color[9]; + u32 blend[9]; + u32 *src = (u32 *)Src.Surface; + u32 *workingDst = (u32 *)Dst.workingSurface[0]; + u32 *finalDst = (u32 *)Dst.Surface; + + int i = 0; + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++, i++) + { + if ((src[i] & 0xFF000000) == 0) + { + workingDst[i] = src[i]; + continue; + } + + color[0] = src[i]; + color[1] = (x < w-1) ? src[i+1] : src[i]; + color[2] = ((x < w-1) && (y < h-1)) ? src[i+w+1] : src[i]; + color[3] = (y < h-1) ? src[i+w] : src[i]; + color[4] = ((x > 0) && (y < h-1)) ? src[i+w-1] : src[i]; + color[5] = (x > 0) ? src[i-1] : src[i]; + color[6] = ((x > 0) && (y > 0)) ? src[i-w-1] : src[i]; + color[7] = (y > 0) ? src[i-w] : src[i]; + color[8] = ((x < w-1) && (y > 0)) ? src[i-w+1] : src[i]; + + blend[0] = color[0]; + blend[1] = Deposterize_InterpLTE(color[0], color[1]); + blend[2] = Deposterize_InterpLTE(color[0], color[2]); + blend[3] = Deposterize_InterpLTE(color[0], color[3]); + blend[4] = Deposterize_InterpLTE(color[0], color[4]); + blend[5] = Deposterize_InterpLTE(color[0], color[5]); + blend[6] = Deposterize_InterpLTE(color[0], color[6]); + blend[7] = Deposterize_InterpLTE(color[0], color[7]); + blend[8] = Deposterize_InterpLTE(color[0], color[8]); + + workingDst[i] = Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(blend[0], blend[5], 1, 7), + Deposterize_Blend(blend[0], blend[1], 1, 7), + 1, 1), + Deposterize_Blend(Deposterize_Blend(blend[0], blend[7], 1, 7), + Deposterize_Blend(blend[0], blend[3], 1, 7), + 1, 1), + 1, 1), + Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(blend[0], blend[6], 7, 9), + Deposterize_Blend(blend[0], blend[2], 7, 9), + 1, 1), + Deposterize_Blend(Deposterize_Blend(blend[0], blend[8], 7, 9), + Deposterize_Blend(blend[0], blend[4], 7, 9), + 1, 1), + 1, 1), + 3, 1); + } + } + + i = 0; + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++, i++) + { + if ((src[i] & 0xFF000000) == 0) + { + finalDst[i] = src[i]; + continue; + } + + color[0] = workingDst[i]; + color[1] = (x < w-1) ? workingDst[i+1] : workingDst[i]; + color[2] = ((x < w-1) && (y < h-1)) ? workingDst[i+w+1] : workingDst[i]; + color[3] = (y < h-1) ? workingDst[i+w] : workingDst[i]; + color[4] = ((x > 0) && (y < h-1)) ? workingDst[i+w-1] : workingDst[i]; + color[5] = (x > 0) ? workingDst[i-1] : workingDst[i]; + color[6] = ((x > 0) && (y > 0)) ? workingDst[i-w-1] : workingDst[i]; + color[7] = (y > 0) ? workingDst[i-w] : workingDst[i]; + color[8] = ((x < w-1) && (y > 0)) ? workingDst[i-w+1] : workingDst[i]; + + blend[0] = color[0]; + blend[1] = Deposterize_InterpLTE(color[0], color[1]); + blend[2] = Deposterize_InterpLTE(color[0], color[2]); + blend[3] = Deposterize_InterpLTE(color[0], color[3]); + blend[4] = Deposterize_InterpLTE(color[0], color[4]); + blend[5] = Deposterize_InterpLTE(color[0], color[5]); + blend[6] = Deposterize_InterpLTE(color[0], color[6]); + blend[7] = Deposterize_InterpLTE(color[0], color[7]); + blend[8] = Deposterize_InterpLTE(color[0], color[8]); + + finalDst[i] = Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(blend[0], blend[5], 1, 7), + Deposterize_Blend(blend[0], blend[1], 1, 7), + 1, 1), + Deposterize_Blend(Deposterize_Blend(blend[0], blend[7], 1, 7), + Deposterize_Blend(blend[0], blend[3], 1, 7), + 1, 1), + 1, 1), + Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(blend[0], blend[6], 7, 9), + Deposterize_Blend(blend[0], blend[2], 7, 9), + 1, 1), + Deposterize_Blend(Deposterize_Blend(blend[0], blend[8], 7, 9), + Deposterize_Blend(blend[0], blend[4], 7, 9), + 1, 1), + 1, 1), + 3, 1); + } + } +} diff --git a/desmume/src/filter/filter.h b/desmume/src/filter/filter.h index 03be9e93e..f21e08ec2 100644 --- a/desmume/src/filter/filter.h +++ b/desmume/src/filter/filter.h @@ -1,19 +1,22 @@ /* -Copyright (C) 2009-2014 DeSmuME team + Copyright (C) 2009-2016 DeSmuME team + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the this software. If not, see . + */ -This file is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 2 of the License, or -(at your option) any later version. - -This file is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with the this software. If not, see . -*/ +#ifndef _IMAGE_FILTER_ +#define _IMAGE_FILTER_ #define FILTER_MAX_WORKING_SURFACE_COUNT 8 @@ -27,6 +30,8 @@ typedef struct { void *userData; } SSurface; +void RenderDeposterize(SSurface Src, SSurface Dst); + void RenderNearest2X (SSurface Src, SSurface Dst); void RenderLQ2X (SSurface Src, SSurface Dst); void RenderLQ2XS (SSurface Src, SSurface Dst); @@ -52,3 +57,5 @@ void Render3xBRZ(SSurface Src, SSurface Dst); void Render4xBRZ(SSurface Src, SSurface Dst); void Render5xBRZ(SSurface Src, SSurface Dst); void Render6xBRZ(SSurface Src, SSurface Dst); + +#endif // _IMAGE_FILTER_ diff --git a/desmume/src/frontend/posix/configure.ac b/desmume/src/frontend/posix/configure.ac index b84611963..7977523b9 100644 --- a/desmume/src/frontend/posix/configure.ac +++ b/desmume/src/frontend/posix/configure.ac @@ -299,6 +299,15 @@ AS_CASE([$host_cpu], [AC_DEFINE(HOST_32)] ) +AC_CHECK_DECL([__SSE2__]) +AM_CONDITIONAL([SUPPORT_SSE2], [test "x$ac_cv_have_decl___SSE2__" = xyes]) + +AC_CHECK_DECL([__AVX2__]) +AM_CONDITIONAL([SUPPORT_AVX2], [test "x$ac_cv_have_decl___AVX2__" = xyes]) + +AC_CHECK_DECL([__ALTIVEC__]) +AM_CONDITIONAL([SUPPORT_ALTIVEC], [test "x$ac_cv_have_decl___ALTIVEC__" = xyes]) + AC_SUBST(UI_DIR) AC_SUBST(PO_DIR) diff --git a/desmume/src/frontend/posix/gtk/main.cpp b/desmume/src/frontend/posix/gtk/main.cpp index 0d0bae2e4..4a607540f 100644 --- a/desmume/src/frontend/posix/gtk/main.cpp +++ b/desmume/src/frontend/posix/gtk/main.cpp @@ -1,6 +1,6 @@ /* main.cpp - this file is part of DeSmuME * - * Copyright (C) 2006-2015 DeSmuME Team + * Copyright (C) 2006-2016 DeSmuME Team * Copyright (C) 2007 Pascal Giard (evilynux) * * This file is free software; you can redistribute it and/or modify @@ -649,24 +649,30 @@ public: }; static void -init_configured_features( class configured_features *config) +init_configured_features( class configured_features *config ) { - config->engine_3d = 1; + if(config->render3d == COMMANDLINE_RENDER3D_GL || config->render3d == COMMANDLINE_RENDER3D_OLDGL || config->render3d == COMMANDLINE_RENDER3D_AUTOGL) + config->engine_3d = 2; + else + config->engine_3d = 1; config->savetype = 0; config->timeout = 0; /* use the default language */ - config->firmware_language = -1; + config->firmware_language = -1; + + /* If specified by --lang option the lang will change to choosed one */ + config->firmware_language = config->language; } static int fill_configured_features( class configured_features *config, - int argc, char ** argv) + char ** argv) { GOptionEntry options[] = { - { "3d-engine", 0, 0, G_OPTION_ARG_INT, &config->engine_3d, "Select 3d rendering engine. Available engines:\n" + { "3d-render", 0, 0, G_OPTION_ARG_INT, &config->engine_3d, "Select 3D rendering engine. Available engines:\n" "\t\t\t\t 0 = 3d disabled\n" "\t\t\t\t 1 = internal rasterizer (default)\n" #if defined(HAVE_LIBOSMESA) || defined(HAVE_GL_GLX) @@ -696,7 +702,6 @@ fill_configured_features( class configured_features *config, //g_option_context_add_main_entries (config->ctx, options, "options"); //g_option_context_add_group (config->ctx, gtk_get_option_group (TRUE)); - config->parse(argc,argv); if(!config->validate()) goto error; @@ -2413,7 +2418,7 @@ gboolean EmuLoop(gpointer data) // HUD display things (copied from Windows main.cpp) #ifdef HAVE_LIBAGG - Hud.fps3d = Render3DFramesPerSecond; + Hud.fps3d = GPU->GetFPSRender3D(); if(nds.idleFrameCounter==0 || oneSecond) { @@ -2446,7 +2451,7 @@ gboolean EmuLoop(gpointer data) for (i = 0; i < Frameskip; i++) { NDS_SkipNextFrame(); #ifdef HAVE_LIBAGG - Hud.fps3d = Render3DFramesPerSecond; + Hud.fps3d = GPU->GetFPSRender3D(); #endif desmume_cycle(); skipped_frames++; @@ -2459,7 +2464,7 @@ gboolean EmuLoop(gpointer data) for (i = 0; i < Frameskip; i++) { NDS_SkipNextFrame(); #ifdef HAVE_LIBAGG - Hud.fps3d = Render3DFramesPerSecond; + Hud.fps3d = GPU->GetFPSRender3D(); #endif desmume_cycle(); skipped_frames++; @@ -2481,7 +2486,7 @@ gboolean EmuLoop(gpointer data) // Aggressively skip frames to avoid delay NDS_SkipNextFrame(); #ifdef HAVE_LIBAGG - Hud.fps3d = Render3DFramesPerSecond; + Hud.fps3d = GPU->GetFPSRender3D(); #endif desmume_cycle(); skipped_frames++; @@ -3252,6 +3257,7 @@ int main (int argc, char *argv[]) // The global menu screws up the window size... unsetenv("UBUNTU_MENUPROXY"); + my_config.parse(argc, argv); init_configured_features( &my_config); if (!g_thread_supported()) @@ -3259,7 +3265,7 @@ int main (int argc, char *argv[]) gtk_init(&argc, &argv); - if ( !fill_configured_features( &my_config, argc, argv)) { + if ( !fill_configured_features( &my_config, argv)) { exit(0); } diff --git a/desmume/src/frontend/windows/CWindow.cpp b/desmume/src/frontend/windows/CWindow.cpp index 0e3a9d9dc..cfb7ef2e2 100644 --- a/desmume/src/frontend/windows/CWindow.cpp +++ b/desmume/src/frontend/windows/CWindow.cpp @@ -621,8 +621,10 @@ static void MyAdjustWindowRectEx(RECT* rect, HWND hwnd) ZeroMemory(&mbi, sizeof(mbi)); mbi.cbSize = sizeof(mbi); GetMenuBarInfo(hwnd, OBJID_MENU, 0, &mbi); - //int menuHeight = (mbi.rcBar.bottom - mbi.rcBar.top + 1); //zero 07-aug-2016 - why did I do this? it isn't normal in windows and in the case of no menu bar it was making a 1 instead of a 0 (r3184 in 2009) + + //if the menubar exists, its height is off by 1 (frame between bar and client area?) int menuHeight = (mbi.rcBar.bottom - mbi.rcBar.top); + if(menuHeight != 0) menuHeight++; rect->bottom -= cymenu; rect->bottom += menuHeight; diff --git a/desmume/src/frontend/windows/DeSmuME.vcxproj b/desmume/src/frontend/windows/DeSmuME.vcxproj index 53d2c00d8..27264e586 100644 --- a/desmume/src/frontend/windows/DeSmuME.vcxproj +++ b/desmume/src/frontend/windows/DeSmuME.vcxproj @@ -87,6 +87,7 @@ + diff --git a/desmume/src/frontend/windows/DeSmuME.vcxproj.filters b/desmume/src/frontend/windows/DeSmuME.vcxproj.filters index 69c4d424c..6d951c804 100644 --- a/desmume/src/frontend/windows/DeSmuME.vcxproj.filters +++ b/desmume/src/frontend/windows/DeSmuME.vcxproj.filters @@ -975,23 +975,8 @@ Core\utils\colorspacehandler - - Core\utils\colorspacehandler - - - Core\utils\colorspacehandler - - - Core\utils\colorspacehandler - - - Core\utils\colorspacehandler - - - Core\utils\colorspacehandler - - - Core\utils\colorspacehandler + + Core\filter @@ -1772,24 +1757,6 @@ Core\utils\colorspacehandler - - Core\utils\colorspacehandler - - - Core\utils\colorspacehandler - - - Core\utils\colorspacehandler - - - Core\utils\colorspacehandler - - - Core\utils\colorspacehandler - - - Core\utils\colorspacehandler - diff --git a/desmume/src/frontend/windows/main.cpp b/desmume/src/frontend/windows/main.cpp index d54414c3f..4f80a27b8 100644 --- a/desmume/src/frontend/windows/main.cpp +++ b/desmume/src/frontend/windows/main.cpp @@ -2184,7 +2184,7 @@ static void StepRunLoop_User() const int kFramesPerToolUpdate = 1; Hud.fps = mainLoopData.fps; - Hud.fps3d = Render3DFramesPerSecond; + Hud.fps3d = GPU->GetFPSRender3D(); Display(); @@ -3386,6 +3386,9 @@ int _main() } } + //not supported; use the GUI + //if(cmdline.language != -1) CommonSettings.fw_config.language = cmdline.language; + cmdline.process_movieCommands(); if(cmdline.load_slot != -1) diff --git a/desmume/src/frontend/windows/main.h b/desmume/src/frontend/windows/main.h index d59f9cc62..35fcd2b2d 100644 --- a/desmume/src/frontend/windows/main.h +++ b/desmume/src/frontend/windows/main.h @@ -57,15 +57,6 @@ extern bool ShowLagFrameCounter; #define GPU3D_SWRAST 2 #define GPU3D_OPENGL_OLD 3 -static const int LANGUAGE_ENGLISH = 0; -static const int LANGUAGE_FRENCH = 1; -static const int LANGUAGE_CHINESE = 3; -static const int LANGUAGE_ITALIAN = 4; -static const int LANGUAGE_JAPANESE = 5; -static const int LANGUAGE_SPANISH = 6; -static const int LANGUAGE_KOREAN = 7; -static const int LANGUAGE_BRAZILIAN = 8; - extern void Change3DCoreWithFallbackAndSave(int newCore); extern int backupmemorytype; diff --git a/desmume/src/frontend/windows/pathsettings.cpp b/desmume/src/frontend/windows/pathsettings.cpp index c4eac4402..172f6b6fe 100644 --- a/desmume/src/frontend/windows/pathsettings.cpp +++ b/desmume/src/frontend/windows/pathsettings.cpp @@ -184,14 +184,17 @@ BOOL PathSettings_OnInitDialog(HWND hDlg, HWND hwndFocus, LPARAM lParam) ti.uFlags = TTF_SUBCLASS | TTF_IDISHWND; ti.uId = (UINT_PTR)hwnd; ti.lpszText = - "The format a screenshot should be saved in.\r\n" + "The string format a screenshot should be saved with (google strftime).\r\n" "%f\t\tFilename\r\n" "%r\t\tRandom: 0 ~ RAND_MAX\r\n" "%t\t\tTick: Reset on startup\r\n" "%Y\t\tYear:Four Digit\r\n" + "%y\t\tYear:Two Digit\r\n" "%m\t\tMonth:Two Digit\r\n" - "%D\t\tDay:Two Digit\r\n" - "%H\t\tHour:Two Digit\r\n" + "%d\t\tDay:Two Digit\r\n" + "%H\t\tHour (24):Two Digit\r\n" + "%I\t\tHour (12):Two Digit\r\n" + "%p\t\tAM/PM\r\n" "%M\t\tMinute: Two Digit\r\n" "%S\t\tSecond: Two Digit\r\n"; GetClientRect(hwnd, &ti.rect); diff --git a/desmume/src/gfx3d.cpp b/desmume/src/gfx3d.cpp index a1afe371d..0d6093f83 100644 --- a/desmume/src/gfx3d.cpp +++ b/desmume/src/gfx3d.cpp @@ -503,9 +503,7 @@ void gfx3d_init() gfx3d.state.fogDensityTable = MMU.ARM9_REG+0x0360; gfx3d.state.edgeMarkColorTable = (u16 *)(MMU.ARM9_REG+0x0330); - gfx3d._videoFrameCount = 0; gfx3d.render3DFrameCount = 0; - Render3DFramesPerSecond = 0; makeTables(); Render3D_Init(); @@ -526,7 +524,10 @@ void gfx3d_deinit() void gfx3d_reset() { - GPU->ForceRender3DFinishAndFlush(false); + if (CurrentRenderer->GetRenderNeedsFinish()) + { + GPU->ForceRender3DFinishAndFlush(false); + } #ifdef _SHOW_VTX_COUNTERS max_polys = max_verts = 0; @@ -603,9 +604,7 @@ void gfx3d_reset() GFX_PIPEclear(); GFX_FIFOclear(); - gfx3d._videoFrameCount = 0; gfx3d.render3DFrameCount = 0; - Render3DFramesPerSecond = 0; CurrentRenderer->Reset(); } @@ -2299,7 +2298,12 @@ void gfx3d_VBlankSignal() void gfx3d_VBlankEndSignal(bool skipFrame) { - GPU->ForceRender3DFinishAndFlush(false); + if (CurrentRenderer->GetRenderNeedsFinish()) + { + GPU->ForceRender3DFinishAndFlush(false); + CurrentRenderer->SetRenderNeedsFinish(false); + GPU->GetEventHandler()->DidRender3DEnd(); + } if (!drawPending) return; if (skipFrame) return; @@ -2307,10 +2311,10 @@ void gfx3d_VBlankEndSignal(bool skipFrame) drawPending = FALSE; GPU->GetEventHandler()->DidRender3DBegin(); + CurrentRenderer->SetRenderNeedsFinish(true); if (CommonSettings.showGpu.main) { - CurrentRenderer->SetRenderNeedsFinish(true); CurrentRenderer->SetTextureProcessingProperties(CommonSettings.GFX3D_Renderer_TextureScalingFactor, CommonSettings.GFX3D_Renderer_TextureDeposterize, CommonSettings.GFX3D_Renderer_TextureSmoothing); @@ -2522,7 +2526,10 @@ void gfx3d_Update3DFramebuffers(FragmentColor *framebufferRGBA6665, u16 *framebu //-------------savestate void gfx3d_savestate(EMUFILE* os) { - GPU->ForceRender3DFinishAndFlush(true); + if (CurrentRenderer->GetRenderNeedsFinish()) + { + GPU->ForceRender3DFinishAndFlush(true); + } //version write32le(4,os); @@ -2555,6 +2562,10 @@ bool gfx3d_loadstate(EMUFILE* is, int size) if (read32le(&version,is) != 1) return false; if (size == 8) version = 0; + if (CurrentRenderer->GetRenderNeedsFinish()) + { + GPU->ForceRender3DFinishAndFlush(false); + } gfx3d_glPolygonAttrib_cache(); gfx3d_glTexImage_cache(); diff --git a/desmume/src/gfx3d.h b/desmume/src/gfx3d.h index b62b72b2c..ace1f7b3a 100644 --- a/desmume/src/gfx3d.h +++ b/desmume/src/gfx3d.h @@ -667,7 +667,6 @@ struct GFX3D GFX3D() : polylist(0) , vertlist(0) - , _videoFrameCount(0) , render3DFrameCount(0) { } @@ -681,11 +680,9 @@ struct GFX3D VERTLIST* vertlist; INDEXLIST indexlist; - u32 _videoFrameCount; // Internal variable that increments when a video frame is completed. Resets every 60 video frames. u32 render3DFrameCount; // Increments when gfx3d_doFlush() is called. Resets every 60 video frames. }; extern GFX3D gfx3d; -extern u32 Render3DFramesPerSecond; // save the current 3D rendering frame count to here every 60 video frames //--------------------- diff --git a/desmume/src/libretro-common/rthreads/rthreads.c b/desmume/src/libretro-common/rthreads/rthreads.c index deef2f8d2..31892c372 100644 --- a/desmume/src/libretro-common/rthreads/rthreads.c +++ b/desmume/src/libretro-common/rthreads/rthreads.c @@ -1,471 +1,572 @@ -/* Copyright (C) 2010-2016 The RetroArch team - * - * --------------------------------------------------------------------------------------- - * The following license statement only applies to this file (rthreads.c). - * --------------------------------------------------------------------------------------- - * - * Permission is hereby granted, free of charge, - * to any person obtaining a copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifdef __unix__ -#define _POSIX_C_SOURCE 199309 -#endif - -#include - -#include -#include - -/* with RETRO_WIN32_USE_PTHREADS, pthreads can be used even on win32. Maybe only supported in MSVC>=2005 */ - -#if defined(_WIN32) && !defined(RETRO_WIN32_USE_PTHREADS) -#define USE_WIN32_THREADS -#ifdef _XBOX -#include -#else -#define WIN32_LEAN_AND_MEAN -#include -#endif -#elif defined(GEKKO) -#include "gx_pthread.h" -#elif defined(PSP) -#include "psp_pthread.h" -#elif defined(__CELLOS_LV2__) -#include -#include -#else -#include -#include -#endif - - -#ifdef __MACH__ -#include -#include -#endif - -struct thread_data -{ - void (*func)(void*); - void *userdata; -}; - -struct sthread -{ -#ifdef USE_WIN32_THREADS - HANDLE thread; -#else - pthread_t id; -#endif -}; - -struct slock -{ -#ifdef USE_WIN32_THREADS - HANDLE lock; -#else - pthread_mutex_t lock; -#endif -}; - -struct scond -{ -#ifdef USE_WIN32_THREADS - HANDLE event; -#else - pthread_cond_t cond; -#endif -}; - -#ifdef USE_WIN32_THREADS -static DWORD CALLBACK thread_wrap(void *data_) -#else -static void *thread_wrap(void *data_) -#endif -{ - struct thread_data *data = (struct thread_data*)data_; - if (!data) - return 0; - data->func(data->userdata); - free(data); - return 0; -} - -/** - * sthread_create: - * @start_routine : thread entry callback function - * @userdata : pointer to userdata that will be made - * available in thread entry callback function - * - * Create a new thread. - * - * Returns: pointer to new thread if successful, otherwise NULL. - */ -sthread_t *sthread_create(void (*thread_func)(void*), void *userdata) -{ - bool thread_created = false; - struct thread_data *data = NULL; - sthread_t *thread = (sthread_t*)calloc(1, sizeof(*thread)); - - if (!thread) - return NULL; - - data = (struct thread_data*)calloc(1, sizeof(*data)); - if (!data) - goto error; - - data->func = thread_func; - data->userdata = userdata; - -#ifdef USE_WIN32_THREADS - thread->thread = CreateThread(NULL, 0, thread_wrap, data, 0, NULL); - thread_created = !!thread->thread; -#else - thread_created = pthread_create(&thread->id, NULL, thread_wrap, data) == 0; -#endif - - if (!thread_created) - goto error; - - return thread; - -error: - if (data) - free(data); - free(thread); - return NULL; -} - -/** - * sthread_detach: - * @thread : pointer to thread object - * - * Detach a thread. When a detached thread terminates, its - * resource sare automatically released back to the system - * without the need for another thread to join with the - * terminated thread. - * - * Returns: 0 on success, otherwise it returns a non-zero error number. - */ -int sthread_detach(sthread_t *thread) -{ -#ifdef USE_WIN32_THREADS - CloseHandle(thread->thread); - free(thread); - return 0; -#else - return pthread_detach(thread->id); -#endif -} - -/** - * sthread_join: - * @thread : pointer to thread object - * - * Join with a terminated thread. Waits for the thread specified by - * @thread to terminate. If that thread has already terminated, then - * it will return immediately. The thread specified by @thread must - * be joinable. - * - * Returns: 0 on success, otherwise it returns a non-zero error number. - */ -void sthread_join(sthread_t *thread) -{ -#ifdef USE_WIN32_THREADS - WaitForSingleObject(thread->thread, INFINITE); - CloseHandle(thread->thread); -#else - pthread_join(thread->id, NULL); -#endif - free(thread); -} - -/** - * sthread_isself: - * @thread : pointer to thread object - * - * Join with a terminated thread. Waits for the thread specified by - * @thread to terminate. If that thread has already terminated, then - * it will return immediately. The thread specified by @thread must - * be joinable. - * - * Returns: true (1) if calling thread is the specified thread - */ -bool sthread_isself(sthread_t *thread) -{ -#ifdef USE_WIN32_THREADS - return GetCurrentThread() == thread->thread; -#else - return pthread_equal(pthread_self(),thread->id); -#endif -} - -/** - * slock_new: - * - * Create and initialize a new mutex. Must be manually - * freed. - * - * Returns: pointer to a new mutex if successful, otherwise NULL. - **/ -slock_t *slock_new(void) -{ - slock_t *lock = (slock_t*)calloc(1, sizeof(*lock)); - if (!lock) - return NULL; - -#ifdef USE_WIN32_THREADS - lock->lock = CreateMutex(NULL, FALSE, NULL); - if (!lock->lock) - goto error; -#else - if ((pthread_mutex_init(&lock->lock, NULL) < 0)) - goto error; -#endif - - return lock; - -error: - slock_free(lock); - return NULL; -} - -/** - * slock_free: - * @lock : pointer to mutex object - * - * Frees a mutex. - **/ -void slock_free(slock_t *lock) -{ - if (!lock) - return; - -#ifdef USE_WIN32_THREADS - CloseHandle(lock->lock); -#else - pthread_mutex_destroy(&lock->lock); -#endif - free(lock); -} - -/** - * slock_lock: - * @lock : pointer to mutex object - * - * Locks a mutex. If a mutex is already locked by - * another thread, the calling thread shall block until - * the mutex becomes available. -**/ -void slock_lock(slock_t *lock) -{ -#ifdef USE_WIN32_THREADS - WaitForSingleObject(lock->lock, INFINITE); -#else - pthread_mutex_lock(&lock->lock); -#endif -} - -/** - * slock_unlock: - * @lock : pointer to mutex object - * - * Unlocks a mutex. - **/ -void slock_unlock(slock_t *lock) -{ -#ifdef USE_WIN32_THREADS - ReleaseMutex(lock->lock); -#else - pthread_mutex_unlock(&lock->lock); -#endif -} - -/** - * scond_new: - * - * Creates and initializes a condition variable. Must - * be manually freed. - * - * Returns: pointer to new condition variable on success, - * otherwise NULL. - **/ -scond_t *scond_new(void) -{ - bool event_created = false; - scond_t *cond = (scond_t*)calloc(1, sizeof(*cond)); - - if (!cond) - return NULL; - -#ifdef USE_WIN32_THREADS - cond->event = CreateEvent(NULL, FALSE, FALSE, NULL); - event_created = !!cond->event; -#else - event_created = (pthread_cond_init(&cond->cond, NULL) == 0); -#endif - - if (!event_created) - goto error; - - return cond; - -error: - free(cond); - return NULL; -} - -/** - * scond_free: - * @cond : pointer to condition variable object - * - * Frees a condition variable. -**/ -void scond_free(scond_t *cond) -{ - if (!cond) - return; - -#ifdef USE_WIN32_THREADS - CloseHandle(cond->event); -#else - pthread_cond_destroy(&cond->cond); -#endif - free(cond); -} - -/** - * scond_wait: - * @cond : pointer to condition variable object - * @lock : pointer to mutex object - * - * Block on a condition variable (i.e. wait on a condition). - **/ -void scond_wait(scond_t *cond, slock_t *lock) -{ -#ifdef USE_WIN32_THREADS - WaitForSingleObject(cond->event, 0); - - SignalObjectAndWait(lock->lock, cond->event, INFINITE, FALSE); - slock_lock(lock); -#else - pthread_cond_wait(&cond->cond, &lock->lock); -#endif -} - -/** - * scond_broadcast: - * @cond : pointer to condition variable object - * - * Broadcast a condition. Unblocks all threads currently blocked - * on the specified condition variable @cond. - **/ -int scond_broadcast(scond_t *cond) -{ -#ifdef USE_WIN32_THREADS - /* FIXME _- check how this function should differ - * from scond_signal implementation. */ - SetEvent(cond->event); - return 0; -#else - return pthread_cond_broadcast(&cond->cond); -#endif -} - -/** - * scond_signal: - * @cond : pointer to condition variable object - * - * Signal a condition. Unblocks at least one of the threads currently blocked - * on the specified condition variable @cond. - **/ -void scond_signal(scond_t *cond) -{ -#ifdef USE_WIN32_THREADS - SetEvent(cond->event); -#else - pthread_cond_signal(&cond->cond); -#endif -} - -/** - * scond_wait_timeout: - * @cond : pointer to condition variable object - * @lock : pointer to mutex object - * @timeout_us : timeout (in microseconds) - * - * Try to block on a condition variable (i.e. wait on a condition) until - * @timeout_us elapses. - * - * Returns: false (0) if timeout elapses before condition variable is - * signaled or broadcast, otherwise true (1). - **/ -bool scond_wait_timeout(scond_t *cond, slock_t *lock, int64_t timeout_us) -{ -#ifdef USE_WIN32_THREADS - DWORD ret; - - WaitForSingleObject(cond->event, 0); - ret = SignalObjectAndWait(lock->lock, cond->event, - (DWORD)(timeout_us) / 1000, FALSE); - - slock_lock(lock); - return ret == WAIT_OBJECT_0; -#else - int ret; - int64_t seconds, remainder; - struct timespec now = {0}; - -#ifdef __MACH__ - /* OSX doesn't have clock_gettime. */ - clock_serv_t cclock; - mach_timespec_t mts; - - host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); - clock_get_time(cclock, &mts); - mach_port_deallocate(mach_task_self(), cclock); - now.tv_sec = mts.tv_sec; - now.tv_nsec = mts.tv_nsec; -#elif defined(__CELLOS_LV2__) - sys_time_sec_t s; - sys_time_nsec_t n; - - sys_time_get_current_time(&s, &n); - now.tv_sec = s; - now.tv_nsec = n; -#elif defined(__mips__) - struct timeval tm; - - gettimeofday(&tm, NULL); - now.tv_sec = tm.tv_sec; - now.tv_nsec = tm.tv_usec * 1000; -#elif defined(RETRO_WIN32_USE_PTHREADS) - _ftime64_s(&now); -#elif !defined(GEKKO) - /* timeout on libogc is duration, not end time. */ - clock_gettime(CLOCK_REALTIME, &now); -#endif - - seconds = timeout_us / INT64_C(1000000); - remainder = timeout_us % INT64_C(1000000); - - now.tv_sec += seconds; - now.tv_nsec += remainder * INT64_C(1000); - - ret = pthread_cond_timedwait(&cond->cond, &lock->lock, &now); - return (ret == 0); -#endif -} +/* Copyright (C) 2010-2016 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (rthreads.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifdef __unix__ +#define _POSIX_C_SOURCE 199309 +#endif + +#include + +#include +#include + +/* with RETRO_WIN32_USE_PTHREADS, pthreads can be used even on win32. Maybe only supported in MSVC>=2005 */ + +#if defined(_WIN32) && !defined(RETRO_WIN32_USE_PTHREADS) +#define USE_WIN32_THREADS +#ifdef _XBOX +#include +#else +#define WIN32_LEAN_AND_MEAN +#include +#endif +#elif defined(GEKKO) +#include "gx_pthread.h" +#elif defined(PSP) +#include "psp_pthread.h" +#else +#include +#include +#endif + +#ifdef __MACH__ +#include +#include +#endif + +struct thread_data +{ + void (*func)(void*); + void *userdata; +}; + +struct sthread +{ +#ifdef USE_WIN32_THREADS + HANDLE thread; +#else + pthread_t id; +#endif +}; + +struct slock +{ +#ifdef USE_WIN32_THREADS + HANDLE lock; +#else + pthread_mutex_t lock; +#endif +}; + +struct scond +{ +#ifdef USE_WIN32_THREADS + + /* The syntax we'll use is mind-bending unless we use a struct. Plus, we might want to store more info later */ + /* This will be used as a linked list immplementing a queue of waiting threads */ + struct QueueEntry + { + struct QueueEntry* next; + }; + + /* With this implementation of scond, we don't have any way of waking (or even identifying) specific threads */ + /* But we need to wake them in the order indicated by the queue. */ + /* This potato token will get get passed around every waiter. The bearer can test whether he's next, and hold onto the potato if he is. */ + /* When he's done he can then put it back into play to progress the queue further */ + HANDLE hot_potato; + + /* The primary signalled event. Hot potatoes are passed until this is set. */ + HANDLE event; + + /* the head of the queue; NULL if queue is empty */ + struct QueueEntry* head; + + /* equivalent to the queue length */ + int waiters; + + /* how many waiters in the queue have been conceptually wakened by signals (even if we haven't managed to actually wake them yet */ + int wakens; + +#else + pthread_cond_t cond; +#endif +}; + +#ifdef USE_WIN32_THREADS +static DWORD CALLBACK thread_wrap(void *data_) +#else +static void *thread_wrap(void *data_) +#endif +{ + struct thread_data *data = (struct thread_data*)data_; + if (!data) + return 0; + data->func(data->userdata); + free(data); + return 0; +} + +/** + * sthread_create: + * @start_routine : thread entry callback function + * @userdata : pointer to userdata that will be made + * available in thread entry callback function + * + * Create a new thread. + * + * Returns: pointer to new thread if successful, otherwise NULL. + */ +sthread_t *sthread_create(void (*thread_func)(void*), void *userdata) +{ + bool thread_created = false; + struct thread_data *data = NULL; + sthread_t *thread = (sthread_t*)calloc(1, sizeof(*thread)); + + if (!thread) + return NULL; + + data = (struct thread_data*)calloc(1, sizeof(*data)); + if (!data) + goto error; + + data->func = thread_func; + data->userdata = userdata; + +#ifdef USE_WIN32_THREADS + thread->thread = CreateThread(NULL, 0, thread_wrap, data, 0, NULL); + thread_created = !!thread->thread; +#else + thread_created = pthread_create(&thread->id, NULL, thread_wrap, data) == 0; +#endif + + if (!thread_created) + goto error; + + return thread; + +error: + if (data) + free(data); + free(thread); + return NULL; +} + +/** + * sthread_detach: + * @thread : pointer to thread object + * + * Detach a thread. When a detached thread terminates, its + * resource sare automatically released back to the system + * without the need for another thread to join with the + * terminated thread. + * + * Returns: 0 on success, otherwise it returns a non-zero error number. + */ +int sthread_detach(sthread_t *thread) +{ +#ifdef USE_WIN32_THREADS + CloseHandle(thread->thread); + free(thread); + return 0; +#else + return pthread_detach(thread->id); +#endif +} + +/** + * sthread_join: + * @thread : pointer to thread object + * + * Join with a terminated thread. Waits for the thread specified by + * @thread to terminate. If that thread has already terminated, then + * it will return immediately. The thread specified by @thread must + * be joinable. + * + * Returns: 0 on success, otherwise it returns a non-zero error number. + */ +void sthread_join(sthread_t *thread) +{ +#ifdef USE_WIN32_THREADS + WaitForSingleObject(thread->thread, INFINITE); + CloseHandle(thread->thread); +#else + pthread_join(thread->id, NULL); +#endif + free(thread); +} + +/** + * sthread_isself: + * @thread : pointer to thread object + * + * Join with a terminated thread. Waits for the thread specified by + * @thread to terminate. If that thread has already terminated, then + * it will return immediately. The thread specified by @thread must + * be joinable. + * + * Returns: true (1) if calling thread is the specified thread + */ +bool sthread_isself(sthread_t *thread) +{ +#ifdef USE_WIN32_THREADS + return GetCurrentThread() == thread->thread; +#else + return pthread_equal(pthread_self(),thread->id); +#endif +} + +/** + * slock_new: + * + * Create and initialize a new mutex. Must be manually + * freed. + * + * Returns: pointer to a new mutex if successful, otherwise NULL. + **/ +slock_t *slock_new(void) +{ + bool mutex_created = false; + slock_t *lock = (slock_t*)calloc(1, sizeof(*lock)); + if (!lock) + return NULL; + +#ifdef USE_WIN32_THREADS + lock->lock = CreateMutex(NULL, FALSE, NULL); + mutex_created = !!lock->lock; +#else + mutex_created = (pthread_mutex_init(&lock->lock, NULL) == 0); +#endif + + if (!mutex_created) + goto error; + + return lock; + +error: + free(lock); + return NULL; +} + +/** + * slock_free: + * @lock : pointer to mutex object + * + * Frees a mutex. + **/ +void slock_free(slock_t *lock) +{ + if (!lock) + return; + +#ifdef USE_WIN32_THREADS + CloseHandle(lock->lock); +#else + pthread_mutex_destroy(&lock->lock); +#endif + free(lock); +} + +/** + * slock_lock: + * @lock : pointer to mutex object + * + * Locks a mutex. If a mutex is already locked by + * another thread, the calling thread shall block until + * the mutex becomes available. +**/ +void slock_lock(slock_t *lock) +{ +#ifdef USE_WIN32_THREADS + WaitForSingleObject(lock->lock, INFINITE); +#else + pthread_mutex_lock(&lock->lock); +#endif +} + +/** + * slock_unlock: + * @lock : pointer to mutex object + * + * Unlocks a mutex. + **/ +void slock_unlock(slock_t *lock) +{ +#ifdef USE_WIN32_THREADS + ReleaseMutex(lock->lock); +#else + pthread_mutex_unlock(&lock->lock); +#endif +} + +/** + * scond_new: + * + * Creates and initializes a condition variable. Must + * be manually freed. + * + * Returns: pointer to new condition variable on success, + * otherwise NULL. + **/ +scond_t *scond_new(void) +{ + scond_t *cond = (scond_t*)calloc(1, sizeof(*cond)); + + if (!cond) + return NULL; + +#ifdef USE_WIN32_THREADS + /* This is very complex because recreating condition variable semantics with win32 parts is not easy */ + /* The main problem is that a condition variable can be used to wake up a thread, but only if the thread is already waiting; */ + /* whereas a win32 event will 'wake up' a thread in advance (the event will be set in advance, so a 'waiter' wont even have to wait on it) */ + /* So at the very least, we need to do something clever. But there's bigger problems. */ + /* We don't even have a straightforward way in win32 to satisfy pthread_cond_wait's atomicity requirement. The bulk of this algorithm is solving that. */ + /* Note: We might could simplify this using vista+ condition variables, but we wanted an XP compatible solution. */ + cond->event = CreateEvent(NULL, FALSE, FALSE, NULL); + if(!cond->event) goto error; + cond->hot_potato = CreateEvent(NULL, FALSE, FALSE, NULL); + if(!cond->hot_potato) + { + CloseHandle(cond->event); + goto error; + } + cond->waiters = cond->wakens = 0; + cond->head = NULL; + +#else + if(pthread_cond_init(&cond->cond, NULL) != 0) + goto error; +#endif + + return cond; + +error: + free(cond); + return NULL; +} + +/** + * scond_free: + * @cond : pointer to condition variable object + * + * Frees a condition variable. +**/ +void scond_free(scond_t *cond) +{ + if (!cond) + return; + +#ifdef USE_WIN32_THREADS + CloseHandle(cond->event); + CloseHandle(cond->hot_potato); +#else + pthread_cond_destroy(&cond->cond); +#endif + free(cond); +} + +/** + * scond_wait: + * @cond : pointer to condition variable object + * @lock : pointer to mutex object + * + * Block on a condition variable (i.e. wait on a condition). + **/ +void scond_wait(scond_t *cond, slock_t *lock) +{ +#ifdef USE_WIN32_THREADS + + /* add ourselves to a queue of waiting threads */ + struct QueueEntry myentry; + struct QueueEntry** ptr = &cond->head; + while(*ptr) /* walk to the end of the linked list */ + ptr = &((*ptr)->next); + *ptr = &myentry; + myentry.next = NULL; + + cond->waiters++; + + /* now the conceptual lock release and condition block are supposed to be atomic. */ + /* we can't do that in windows, but we can simulate the effects by using the queue, by the following analysis: */ + /* What happens if they aren't atomic? */ + /* 1. a signaller can rush in and signal, expecting a waiter to get it; but the waiter wouldn't, because he isn't blocked yet */ + /* solution: win32 events make this easy. the event will sit there enabled */ + /* 2. a signaller can rush in and signal, and then turn right around and wait */ + /* solution: the signaller will get queued behind the waiter, who's enqueued before he releases the mutex */ + + /* It's my turn if I'm the head of the queue. Check to see if it's my turn. */ + while (cond->head != &myentry) + { + /* As long as someone is even going to be able to wake up when they receive the potato, keep it going round */ + if (cond->wakens > 0) + SetEvent(cond->hot_potato); + + /* Wait to catch the hot potato before checking for my turn again */ + SignalObjectAndWait(lock->lock, cond->hot_potato, INFINITE, FALSE); + slock_lock(lock); + } + + /* It's my turn now -- I hold the potato */ + SignalObjectAndWait(lock->lock, cond->event, INFINITE, FALSE); + slock_lock(lock); + + /* Remove ourselves from the queue */ + cond->head = myentry.next; + cond->waiters--; + + /* If any other wakenings are pending, go ahead and set it up */ + /* There may actually be no waiters. That's OK. The first waiter will come in, find it's his turn, and immediately get the signaled event */ + cond->wakens--; + if(cond->wakens>0) + { + SetEvent(cond->event); + + /* Progress the queue: Put the hot potato back into play. It'll be tossed around until next in line gets it */ + SetEvent(cond->hot_potato); + } + +#else + pthread_cond_wait(&cond->cond, &lock->lock); +#endif +} + +/** + * scond_broadcast: + * @cond : pointer to condition variable object + * + * Broadcast a condition. Unblocks all threads currently blocked + * on the specified condition variable @cond. + **/ +int scond_broadcast(scond_t *cond) +{ +#ifdef USE_WIN32_THREADS + + /* remember: we currently have mutex */ + if(cond->waiters == 0) return 0; + + /* awaken everything which is currently queued up */ + if(cond->wakens == 0) SetEvent(cond->event); + cond->wakens = cond->waiters; + + /* Since there is now at least one pending waken, the potato must be in play */ + SetEvent(cond->hot_potato); + + return 0; +#else + return pthread_cond_broadcast(&cond->cond); +#endif +} + +/** + * scond_signal: + * @cond : pointer to condition variable object + * + * Signal a condition. Unblocks at least one of the threads currently blocked + * on the specified condition variable @cond. + **/ +void scond_signal(scond_t *cond) +{ +#ifdef USE_WIN32_THREADS + + /* remember: we currently have mutex */ + if(cond->waiters == 0) return; + + /* wake up the next thing in the queue */ + if(cond->wakens == 0) SetEvent(cond->event); + cond->wakens++; + + /* Since there is now at least one pending waken, the potato must be in play */ + SetEvent(cond->hot_potato); + +#else + pthread_cond_signal(&cond->cond); +#endif +} + +/** + * scond_wait_timeout: + * @cond : pointer to condition variable object + * @lock : pointer to mutex object + * @timeout_us : timeout (in microseconds) + * + * Try to block on a condition variable (i.e. wait on a condition) until + * @timeout_us elapses. + * + * Returns: false (0) if timeout elapses before condition variable is + * signaled or broadcast, otherwise true (1). + **/ +bool scond_wait_timeout(scond_t *cond, slock_t *lock, int64_t timeout_us) +{ +#ifdef USE_WIN32_THREADS + DWORD ret; + + /* TODO: this is woefully inadequate. It needs to be solved with the newer approach used above */ + WaitForSingleObject(cond->event, 0); + ret = SignalObjectAndWait(lock->lock, cond->event, + (DWORD)(timeout_us) / 1000, FALSE); + + slock_lock(lock); + return ret == WAIT_OBJECT_0; +#else + int ret; + int64_t seconds, remainder; + struct timespec now = {0}; + +#ifdef __MACH__ + /* OSX doesn't have clock_gettime. */ + clock_serv_t cclock; + mach_timespec_t mts; + + host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); + clock_get_time(cclock, &mts); + mach_port_deallocate(mach_task_self(), cclock); + now.tv_sec = mts.tv_sec; + now.tv_nsec = mts.tv_nsec; +#elif defined(__CELLOS_LV2__) + sys_time_sec_t s; + sys_time_nsec_t n; + + sys_time_get_current_time(&s, &n); + now.tv_sec = s; + now.tv_nsec = n; +#elif defined(__mips__) + struct timeval tm; + + gettimeofday(&tm, NULL); + now.tv_sec = tm.tv_sec; + now.tv_nsec = tm.tv_usec * 1000; +#elif defined(RETRO_WIN32_USE_PTHREADS) + _ftime64_s(&now); +#elif !defined(GEKKO) + /* timeout on libogc is duration, not end time. */ + clock_gettime(CLOCK_REALTIME, &now); +#endif + + seconds = timeout_us / INT64_C(1000000); + remainder = timeout_us % INT64_C(1000000); + + now.tv_sec += seconds; + now.tv_nsec += remainder * INT64_C(1000); + + ret = pthread_cond_timedwait(&cond->cond, &lock->lock, &now); + return (ret == 0); +#endif +} \ No newline at end of file diff --git a/desmume/src/mc.cpp b/desmume/src/mc.cpp index 874c3ddc2..dc28c0281 100644 --- a/desmume/src/mc.cpp +++ b/desmume/src/mc.cpp @@ -294,7 +294,9 @@ BackupDevice::BackupDevice() else { printf("BackupDevice: Converting old raw .sav file.\n"); - sz = trim(buf, sz); + //dont TRIM this! it will wreck the searchFileSaveType below. + //was this intended for egregiously over-sized save files? too bad. + //sz = trim(buf, sz); } if (fpOut->fwrite(buf, sz) == sz) @@ -305,6 +307,7 @@ BackupDevice::BackupDevice() info.type = (res + 1); addr_size = info.addr_size = save_types[info.type].addr_size; info.size = fsize = sz; + fpMC = fpOut; //so ensure() works ensure(sz, fpOut); fsize = 0; } @@ -629,6 +632,7 @@ void BackupDevice::reset() else if(!memcmp(gameInfo.header.gameCode,"AH5", 3)) addr_size = 1; //over the hedge else if(!memcmp(gameInfo.header.gameCode,"AVH", 3)) addr_size = 1; //over the hedge - Hammy Goes Nuts! else if(!memcmp(gameInfo.header.gameCode,"AQ3", 3)) addr_size = 1; //spider-man 3 + else if(!memcmp(gameInfo.header.gameCode,"BPV", 3)) addr_size = 2; //puzzler world (should be eeprom 64KBits) //if we found a whitelist match, we dont need to run detection if(addr_size) state = RUNNING; @@ -1069,8 +1073,10 @@ bool BackupDevice::importData(const char *filename, u32 force_size) bool res = false; if (strlen(filename) < 4) return res; - if ((memcmp(filename + strlen(filename) - 4, ".duc", 4) == 0) || - (memcmp(filename + strlen(filename) - 4, ".dss", 4) == 0)) + std::string ext = strright(filename,4); + bool isDuc = strncasecmp(ext.c_str(), ".duc", 4) == 0; + bool isDss = strncasecmp(ext.c_str(), ".dss", 4) == 0; + if(isDuc || isDss) res = import_duc(filename, force_size); else if (import_no_gba(filename, force_size)) @@ -1499,26 +1505,49 @@ u32 BackupDevice::get_save_duc_size(const char* fname) bool BackupDevice::import_duc(const char* filename, u32 force_size) { u32 size; - char id[16]; + u8 id16[16] = {0}, id4[4] = {0}, id3[3] = {0}; FILE* file = fopen(filename, "rb"); if(!file) return false; - fseek(file, 0, SEEK_END); - size = (u32)ftell(file) - 500; - fseek(file, 0, SEEK_SET); + int version = 0; - // Make sure we really have the right file - fread((void *)id, sizeof(char), 16, file); + //ID version 1 + fread(id16, 1, 16, file); + if(!memcmp(id16, "ARDS000000000001", 16)) version = 1; - if (memcmp(id, "ARDS000000000001", 16) != 0) + //ID version 2 + fseek(file,0xA1,SEEK_SET); + fread(id3,1,3,file); + if(!memcmp(id16,"\0\0\0\0",4) && id3[2] == 0xC0) version = 2; + + if(version == 0) { + INVALID_DUC: printf("Not recognized as a valid DUC file\n"); fclose(file); return false; } - // Skip the rest of the header since we don't need it - fseek(file, 500, SEEK_SET); + + fseek(file, 0, SEEK_END); + size = (u32)ftell(file); + + //skip to raw data + if(version == 1) + { + size -= 500; + fseek(file, 500, SEEK_SET); + } + if(version == 2) + { + size -= 0xA4; + fseek(file, 0xA4, SEEK_SET); + + //validate size + int specifiedSize = (id3[0]<<8)+(id3[1]<<16); + if(specifiedSize != size) + goto INVALID_DUC; + } u32 left = 0; if (force_size > 0) diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index 0c4b3d1d0..faa3eca82 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -52,7 +52,6 @@ #include "matrix.h" #include "render3D.h" #include "gfx3d.h" -#include "texcache.h" #include "MMU.h" #include "NDSSystem.h" #include "utils/task.h" @@ -329,7 +328,7 @@ class RasterizerUnit { protected: SoftRasterizerRenderer *_softRender; - TexCacheItem *lastTexKey; + SoftRasterizerTexture *lastTexKey; VERT* verts[MAX_CLIPPED_VERTS]; int polynum; @@ -349,19 +348,16 @@ public: int width, height; s32 wmask, hmask; int wrap; - int wshift; - int texFormat; - void setup(u32 texParam) + void setup(SoftRasterizerTexture *theTexture, u32 texParam) { - texFormat = (texParam>>26)&7; - wshift = ((texParam>>20)&0x07) + 3; - width=(1 << wshift); - height=(8 << ((texParam>>23)&0x07)); - wmask = width-1; - hmask = height-1; + width = theTexture->GetRenderWidth(); + height = theTexture->GetRenderHeight(); + wmask = theTexture->GetRenderWidthMask(); + hmask = theTexture->GetRenderHeightMask(); + wrap = (texParam>>16)&0xF; - enabled = gfx3d.renderState.enableTexturing && (texFormat!=0); + enabled = gfx3d.renderState.enableTexturing && (theTexture->GetPackFormat() != TEXMODE_NONE); } FORCEINLINE void clamp(s32 &val, const int size, const s32 sizemask) @@ -459,7 +455,10 @@ public: sampler.dowrap(iu, iv); FragmentColor color; - color.color = ((u32*)lastTexKey->decoded)[(iv<GetUnpackData(); + + color.color = textureData[( iv << lastTexKey->GetRenderWidthShift() ) + iu]; + return color; } @@ -1004,15 +1003,15 @@ public: const size_t dstWidth = this->_softRender->GetFramebufferWidth(); const size_t dstHeight = this->_softRender->GetFramebufferHeight(); - lastTexKey = NULL; - const GFX3D_Clipper::TClippedPoly &firstClippedPoly = this->_softRender->clippedPolys[0]; const POLY &firstPoly = *firstClippedPoly.poly; PolygonAttributes polyAttr = firstPoly.getAttributes(); u32 lastPolyAttr = firstPoly.polyAttr; u32 lastTexParams = firstPoly.texParam; u32 lastTexPalette = firstPoly.texPalette; - sampler.setup(firstPoly.texParam); + + lastTexKey = this->_softRender->polyTexKeys[0]; + sampler.setup(lastTexKey, firstPoly.texParam); //iterate over polys for (size_t i = 0; i < polyCount; i++) @@ -1033,13 +1032,15 @@ public: if (lastTexParams != thePoly.texParam || lastTexPalette != thePoly.texPalette) { - sampler.setup(thePoly.texParam); lastTexParams = thePoly.texParam; lastTexPalette = thePoly.texPalette; + + lastTexKey = this->_softRender->polyTexKeys[i]; + sampler.setup(lastTexKey, thePoly.texParam); + lastTexKey->ResetCacheAge(); + lastTexKey->IncreaseCacheUsageCount(1); } - lastTexKey = this->_softRender->polyTexKeys[i]; - for (int j = 0; j < type; j++) this->verts[j] = &clippedPoly.clipVerts[j]; for (int j = type; j < MAX_CLIPPED_VERTS; j++) @@ -1145,6 +1146,60 @@ static void SoftRasterizerRendererDestroy() } } +SoftRasterizerTexture::SoftRasterizerTexture(u32 texAttributes, u32 palAttributes) : TextureStore(texAttributes, palAttributes) +{ + _cacheSize = GetUnpackSizeUsingFormat(TexFormat_15bpp); + _unpackData = (u32 *)malloc_alignedCacheLine(_cacheSize); + _renderWidth = _sizeS; + _renderHeight = _sizeT; + _renderWidthMask = _renderWidth - 1; + _renderHeightMask = _renderHeight - 1; + + _renderWidthShift = 0; + + u32 tempWidth = _renderWidth; + while ( (tempWidth & 1) == 0) + { + tempWidth >>= 1; + _renderWidthShift++; + } +} + +SoftRasterizerTexture::~SoftRasterizerTexture() +{ + free_aligned(this->_unpackData); +} + +u32* SoftRasterizerTexture::GetUnpackData() +{ + return this->_unpackData; +} + +u32 SoftRasterizerTexture::GetRenderWidth() const +{ + return this->_renderWidth; +} + +u32 SoftRasterizerTexture::GetRenderHeight() const +{ + return this->_renderHeight; +} + +u32 SoftRasterizerTexture::GetRenderWidthMask() const +{ + return this->_renderWidthMask; +} + +u32 SoftRasterizerTexture::GetRenderHeightMask() const +{ + return this->_renderHeightMask; +} + +u32 SoftRasterizerTexture::GetRenderWidthShift() const +{ + return this->_renderWidthShift; +} + GPU3DInterface gpu3DRasterize = { "SoftRasterizer", SoftRasterizerRendererCreate, @@ -1371,7 +1426,18 @@ void SoftRasterizerRenderer::setupTextures() const POLY &firstPoly = *firstClippedPoly.poly; u32 lastTexParams = firstPoly.texParam; u32 lastTexPalette = firstPoly.texPalette; - TexCacheItem *lastTexKey = TexCache_SetTexture(TexFormat_15bpp, firstPoly.texParam, firstPoly.texPalette); + + SoftRasterizerTexture *lastTexItem = (SoftRasterizerTexture *)texCache.GetTexture(firstPoly.texParam, firstPoly.texPalette); + if (lastTexItem == NULL) + { + lastTexItem = new SoftRasterizerTexture(firstPoly.texParam, firstPoly.texPalette); + texCache.Add(lastTexItem); + } + + if (lastTexItem->IsLoadNeeded()) + { + lastTexItem->Unpack(lastTexItem->GetUnpackData()); + } for (size_t i = 0; i < this->_clippedPolyCount; i++) { @@ -1384,13 +1450,24 @@ void SoftRasterizerRenderer::setupTextures() //and then it won't be safe. if (lastTexParams != thePoly.texParam || lastTexPalette != thePoly.texPalette) { - lastTexKey = TexCache_SetTexture(TexFormat_15bpp, thePoly.texParam, thePoly.texPalette); + lastTexItem = (SoftRasterizerTexture *)texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + if (lastTexItem == NULL) + { + lastTexItem = new SoftRasterizerTexture(thePoly.texParam, thePoly.texPalette); + texCache.Add(lastTexItem); + } + + if (lastTexItem->IsLoadNeeded()) + { + lastTexItem->Unpack(lastTexItem->GetUnpackData()); + } + lastTexParams = thePoly.texParam; lastTexPalette = thePoly.texPalette; } //printf("%08X %d\n",poly->texParam,rasterizerUnit[0].textures.currentNum); - polyTexKeys[i] = lastTexKey; + polyTexKeys[i] = lastTexItem; } } @@ -1535,7 +1612,7 @@ Render3DError SoftRasterizerRenderer::RenderGeometry(const GFX3D_State &renderSt { rasterizerUnit[0].mainLoop(); this->_renderGeometryNeedsFinish = false; - TexCache_EvictFrame(); // Since we're finishing geometry rendering here and now, also check the texture cache now. + texCache.Evict(); // Since we're finishing geometry rendering here and now, also check the texture cache now. } // printf("rendered %d of %d polys after backface culling\n",gfx3d.polylist->count-culled,gfx3d.polylist->count); @@ -1886,7 +1963,7 @@ Render3DError SoftRasterizerRenderer::Reset() memset(this->clearImagePolyIDBuffer, 0, sizeof(this->clearImagePolyIDBuffer)); memset(this->clearImageFogBuffer, 0, sizeof(this->clearImageFogBuffer)); - TexCache_Reset(); + texCache.Reset(); return RENDER3DERROR_NOERR; } @@ -1945,7 +2022,7 @@ Render3DError SoftRasterizerRenderer::RenderFinish() } // Now that geometry rendering is finished on all threads, check the texture cache. - TexCache_EvictFrame(); + texCache.Evict(); // Do multithreaded post-processing. if (this->currentRenderState->enableEdgeMarking || this->currentRenderState->enableFog) diff --git a/desmume/src/rasterize.h b/desmume/src/rasterize.h index c56129626..1582c2a27 100644 --- a/desmume/src/rasterize.h +++ b/desmume/src/rasterize.h @@ -20,6 +20,7 @@ #include "render3D.h" #include "gfx3d.h" +#include "texcache.h" #define SOFTRASTERIZER_DEPTH_EQUAL_TEST_TOLERANCE 0x200 @@ -39,6 +40,28 @@ struct SoftRasterizerPostProcessParams bool fogAlphaOnly; }; +class SoftRasterizerTexture : public TextureStore +{ +protected: + u32 *_unpackData; + u32 _renderWidth; + u32 _renderHeight; + u32 _renderWidthMask; + u32 _renderHeightMask; + u32 _renderWidthShift; + +public: + SoftRasterizerTexture(u32 texAttributes, u32 palAttributes); + virtual ~SoftRasterizerTexture(); + + u32* GetUnpackData(); + u32 GetRenderWidth() const; + u32 GetRenderHeight() const; + u32 GetRenderWidthMask() const; + u32 GetRenderHeightMask() const; + u32 GetRenderWidthShift() const; +}; + #if defined(ENABLE_SSE2) class SoftRasterizerRenderer : public Render3D_SSE2 #else @@ -75,7 +98,7 @@ public: FragmentColor toonColor32LUT[32]; GFX3D_Clipper::TClippedPoly *clippedPolys; FragmentAttributesBuffer *_framebufferAttributes; - TexCacheItem *polyTexKeys[POLYLIST_SIZE]; + SoftRasterizerTexture *polyTexKeys[POLYLIST_SIZE]; bool polyVisible[POLYLIST_SIZE]; bool polyBackfacing[POLYLIST_SIZE]; GFX3D_State *currentRenderState; diff --git a/desmume/src/render3D.cpp b/desmume/src/render3D.cpp index a730530c6..8d63b1edd 100644 --- a/desmume/src/render3D.cpp +++ b/desmume/src/render3D.cpp @@ -28,9 +28,9 @@ #include "gfx3d.h" #include "MMU.h" #include "texcache.h" +#include "./filter/filter.h" #include "./filter/xbrz.h" -#define TEXTURE_DEPOSTERIZE_THRESHOLD 21 // Possible values are [0-255], where lower a value prevents blending and a higher value allows for more blending int cur3DCore = GPU3D_NULL; @@ -126,55 +126,6 @@ void Render3DBaseDestroy() } } -static u32 TextureDeposterize_InterpLTE(const u32 pixA, const u32 pixB, const u32 threshold) -{ - const u32 aB = (pixB & 0xFF000000) >> 24; - if (aB == 0) - { - return pixA; - } - - const u32 rA = (pixA & 0x000000FF); - const u32 gA = (pixA & 0x0000FF00) >> 8; - const u32 bA = (pixA & 0x00FF0000) >> 16; - const u32 aA = (pixA & 0xFF000000) >> 24; - - const u32 rB = (pixB & 0x000000FF); - const u32 gB = (pixB & 0x0000FF00) >> 8; - const u32 bB = (pixB & 0x00FF0000) >> 16; - - const u32 rC = ( (rB - rA <= threshold) || (rA - rB <= threshold) ) ? ( ((rA+rB)>>1) ) : rA; - const u32 gC = ( (gB - gA <= threshold) || (gA - gB <= threshold) ) ? ( ((gA+gB)>>1) ) : gA; - const u32 bC = ( (bB - bA <= threshold) || (bA - bB <= threshold) ) ? ( ((bA+bB)>>1) ) : bA; - const u32 aC = ( (bB - aA <= threshold) || (aA - aB <= threshold) ) ? ( ((aA+aB)>>1) ) : aA; - - return (rC | (gC << 8) | (bC << 16) | (aC << 24)); -} - -static u32 TextureDeposterize_Blend(const u32 pixA, const u32 pixB, const u32 weightA, const u32 weightB) -{ - const u32 aB = (pixB & 0xFF000000) >> 24; - if (aB == 0) - { - return pixA; - } - - const u32 weightSum = weightA + weightB; - - const u32 rbA = pixA & 0x00FF00FF; - const u32 gA = pixA & 0x0000FF00; - const u32 aA = (pixA & 0xFF000000) >> 24; - - const u32 rbB = pixB & 0x00FF00FF; - const u32 gB = pixB & 0x0000FF00; - - const u32 rbC = ( ((rbA * weightA) + (rbB * weightB)) / weightSum ) & 0x00FF00FF; - const u32 gC = ( (( gA * weightA) + ( gB * weightB)) / weightSum ) & 0x0000FF00; - const u32 aC = ( (( aA * weightA) + ( aB * weightB)) / weightSum ) << 24; - - return (rbC | gC | aC); -} - FragmentAttributesBuffer::FragmentAttributesBuffer(size_t newCount) { count = newCount; @@ -284,15 +235,26 @@ Render3D::Render3D() _textureScalingFactor = 1; _textureSmooth = false; - _textureDeposterizeBuffer = NULL; _textureUpscaleBuffer = NULL; + memset(&_textureDeposterizeSrcSurface, 0, sizeof(_textureDeposterizeSrcSurface)); + memset(&_textureDeposterizeDstSurface, 0, sizeof(_textureDeposterizeDstSurface)); + + _textureDeposterizeSrcSurface.Width = _textureDeposterizeDstSurface.Width = 1; + _textureDeposterizeSrcSurface.Height = _textureDeposterizeDstSurface.Height = 1; + _textureDeposterizeSrcSurface.Pitch = _textureDeposterizeDstSurface.Pitch = 1; + Reset(); } Render3D::~Render3D() { - // Do nothing. + if (this->_textureDeposterizeDstSurface.Surface != NULL) + { + free_aligned(this->_textureDeposterizeDstSurface.Surface); + this->_textureDeposterizeDstSurface.Surface = NULL; + this->_textureDeposterizeDstSurface.workingSurface[0] = NULL; + } } const Render3DDeviceInfo& Render3D::GetDeviceInfo() @@ -384,20 +346,24 @@ void Render3D::SetTextureProcessingProperties(size_t scalingFactor, bool willDep const size_t newScalingFactor = (isScaleValid) ? scalingFactor : 1; bool needTexCacheReset = false; - if ( willDeposterize && (this->_textureDeposterizeBuffer == NULL) ) + if ( willDeposterize && (this->_textureDeposterizeDstSurface.Surface == NULL) ) { // 1024x1024 texels is the largest possible texture size. // We need two buffers, one for each deposterize stage. const size_t bufferSize = 1024 * 1024 * 2 * sizeof(u32); - this->_textureDeposterizeBuffer = (u32 *)malloc_alignedCacheLine(bufferSize); - memset(this->_textureDeposterizeBuffer, 0, bufferSize); + + this->_textureDeposterizeDstSurface.Surface = (unsigned char *)malloc_alignedCacheLine(bufferSize); + this->_textureDeposterizeDstSurface.workingSurface[0] = (unsigned char *)((u32 *)this->_textureDeposterizeDstSurface.Surface + (1024 * 1024)); + + memset(this->_textureDeposterizeDstSurface.Surface, 0, bufferSize); needTexCacheReset = true; } - else if ( !willDeposterize && (this->_textureDeposterizeBuffer != NULL) ) + else if ( !willDeposterize && (this->_textureDeposterizeDstSurface.Surface != NULL) ) { - free_aligned(this->_textureDeposterizeBuffer); - this->_textureDeposterizeBuffer = NULL; + free_aligned(this->_textureDeposterizeDstSurface.Surface); + this->_textureDeposterizeDstSurface.Surface = NULL; + this->_textureDeposterizeDstSurface.workingSurface[0] = NULL; needTexCacheReset = true; } @@ -422,124 +388,17 @@ void Render3D::SetTextureProcessingProperties(size_t scalingFactor, bool willDep if (needTexCacheReset) { - TexCache_Reset(); + texCache.Reset(); } } Render3DError Render3D::TextureDeposterize(const u32 *src, const size_t srcTexWidth, const size_t srcTexHeight) { - //---------------------------------------\n\ - // Input Pixel Mapping: 06|07|08 - // 05|00|01 - // 04|03|02 - // - // Output Pixel Mapping: 00 + this->_textureDeposterizeSrcSurface.Width = this->_textureDeposterizeDstSurface.Width = srcTexWidth; + this->_textureDeposterizeSrcSurface.Height = this->_textureDeposterizeDstSurface.Height = srcTexHeight; + this->_textureDeposterizeSrcSurface.Surface = (unsigned char *)src; - const int w = srcTexWidth; - const int h = srcTexHeight; - - u32 color[9]; - u32 blend[9]; - u32 *dst = this->_textureDeposterizeBuffer + (1024 * 1024); - u32 *finalDst = this->_textureDeposterizeBuffer; - - size_t i = 0; - for (int y = 0; y < h; y++) - { - for (int x = 0; x < w; x++, i++) - { - if ((src[i] & 0xFF000000) == 0) - { - dst[i] = src[i]; - continue; - } - - color[0] = src[i]; - color[1] = (x < w-1) ? src[i+1] : src[i]; - color[2] = ((x < w-1) && (y < h-1)) ? src[i+w+1] : src[i]; - color[3] = (y < h-1) ? src[i+w] : src[i]; - color[4] = ((x > 0) && (y < h-1)) ? src[i+w-1] : src[i]; - color[5] = (x > 0) ? src[i-1] : src[i]; - color[6] = ((x > 0) && (y > 0)) ? src[i-w-1] : src[i]; - color[7] = (y > 0) ? src[i-w] : src[i]; - color[8] = ((x < w-1) && (y > 0)) ? src[i-w+1] : src[i]; - - blend[0] = color[0]; - blend[1] = TextureDeposterize_InterpLTE(color[0], color[1], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[2] = TextureDeposterize_InterpLTE(color[0], color[2], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[3] = TextureDeposterize_InterpLTE(color[0], color[3], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[4] = TextureDeposterize_InterpLTE(color[0], color[4], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[5] = TextureDeposterize_InterpLTE(color[0], color[5], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[6] = TextureDeposterize_InterpLTE(color[0], color[6], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[7] = TextureDeposterize_InterpLTE(color[0], color[7], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[8] = TextureDeposterize_InterpLTE(color[0], color[8], TEXTURE_DEPOSTERIZE_THRESHOLD); - - dst[i] = TextureDeposterize_Blend(TextureDeposterize_Blend(TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[5], 1, 7), - TextureDeposterize_Blend(blend[0], blend[1], 1, 7), - 1, 1), - TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[7], 1, 7), - TextureDeposterize_Blend(blend[0], blend[3], 1, 7), - 1, 1), - 1, 1), - TextureDeposterize_Blend(TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[6], 7, 9), - TextureDeposterize_Blend(blend[0], blend[2], 7, 9), - 1, 1), - TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[8], 7, 9), - TextureDeposterize_Blend(blend[0], blend[4], 7, 9), - 1, 1), - 1, 1), - 3, 1); - } - } - - i = 0; - for (int y = 0; y < h; y++) - { - for (int x = 0; x < w; x++, i++) - { - if ((src[i] & 0xFF000000) == 0) - { - finalDst[i] = src[i]; - continue; - } - - color[0] = dst[i]; - color[1] = (x < w-1) ? dst[i+1] : dst[i]; - color[2] = ((x < w-1) && (y < h-1)) ? dst[i+w+1] : dst[i]; - color[3] = (y < h-1) ? dst[i+w] : dst[i]; - color[4] = ((x > 0) && (y < h-1)) ? dst[i+w-1] : dst[i]; - color[5] = (x > 0) ? dst[i-1] : dst[i]; - color[6] = ((x > 0) && (y > 0)) ? dst[i-w-1] : dst[i]; - color[7] = (y > 0) ? dst[i-w] : dst[i]; - color[8] = ((x < w-1) && (y > 0)) ? dst[i-w+1] : dst[i]; - - blend[0] = color[0]; - blend[1] = TextureDeposterize_InterpLTE(color[0], color[1], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[2] = TextureDeposterize_InterpLTE(color[0], color[2], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[3] = TextureDeposterize_InterpLTE(color[0], color[3], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[4] = TextureDeposterize_InterpLTE(color[0], color[4], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[5] = TextureDeposterize_InterpLTE(color[0], color[5], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[6] = TextureDeposterize_InterpLTE(color[0], color[6], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[7] = TextureDeposterize_InterpLTE(color[0], color[7], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[8] = TextureDeposterize_InterpLTE(color[0], color[8], TEXTURE_DEPOSTERIZE_THRESHOLD); - - finalDst[i] = TextureDeposterize_Blend(TextureDeposterize_Blend(TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[5], 1, 7), - TextureDeposterize_Blend(blend[0], blend[1], 1, 7), - 1, 1), - TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[7], 1, 7), - TextureDeposterize_Blend(blend[0], blend[3], 1, 7), - 1, 1), - 1, 1), - TextureDeposterize_Blend(TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[6], 7, 9), - TextureDeposterize_Blend(blend[0], blend[2], 7, 9), - 1, 1), - TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[8], 7, 9), - TextureDeposterize_Blend(blend[0], blend[4], 7, 9), - 1, 1), - 1, 1), - 3, 1); - } - } + RenderDeposterize(this->_textureDeposterizeSrcSurface, this->_textureDeposterizeDstSurface); return RENDER3DERROR_NOERR; } @@ -756,7 +615,7 @@ Render3DError Render3D::Reset() this->_willFlushFramebufferRGBA6665 = true; this->_willFlushFramebufferRGBA5551 = true; - TexCache_Reset(); + texCache.Reset(); return RENDER3DERROR_NOERR; } @@ -798,7 +657,7 @@ Render3DError Render3D::RenderFinish() Render3DError Render3D::VramReconfigureSignal() { - TexCache_Invalidate(); + texCache.Invalidate(); return RENDER3DERROR_NOERR; } diff --git a/desmume/src/render3D.h b/desmume/src/render3D.h index dd64ff99a..3ffd0df61 100644 --- a/desmume/src/render3D.h +++ b/desmume/src/render3D.h @@ -21,6 +21,7 @@ #include "gfx3d.h" #include "types.h" +#include "./filter/filter.h" #define kUnsetTranslucentPolyID 255 @@ -130,7 +131,10 @@ protected: size_t _textureScalingFactor; bool _textureSmooth; - u32 *_textureDeposterizeBuffer; + + SSurface _textureDeposterizeSrcSurface; + SSurface _textureDeposterizeDstSurface; + u32 *_textureUpscaleBuffer; CACHE_ALIGN u16 clearImageColor16Buffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; diff --git a/desmume/src/texcache.cpp b/desmume/src/texcache.cpp index f7931e23b..30e76cbcb 100644 --- a/desmume/src/texcache.cpp +++ b/desmume/src/texcache.cpp @@ -20,10 +20,10 @@ #include #include #include -#include #include "texcache.h" +#include "bits.h" #include "common.h" #include "debug.h" #include "gfx3d.h" @@ -40,7 +40,11 @@ using std::max; //only dump this from ogl renderer. for now, softrasterizer creates things in an incompatible pixel format //#define DEBUG_DUMP_TEXTURE -#define CONVERT(color) ((TEXFORMAT == TexFormat_32bpp)?(COLOR555TO8888_OPAQUE(color)):COLOR555TO6665_OPAQUE(color)) +#if defined(DEBUG_DUMP_TEXTURE) && defined(WIN32) + #define DO_DEBUG_DUMP_TEXTURE +#endif + +#define CONVERT(color) ((TEXCACHEFORMAT == TexFormat_32bpp)?(COLOR555TO8888_OPAQUE(color)):COLOR555TO6665_OPAQUE(color)) //This class represents a number of regions of memory which should be viewed as contiguous class MemSpan @@ -85,7 +89,7 @@ public: //dumps the memspan to the specified buffer //you may set size to limit the size to be copied - int dump(void* buf, int size=-1) + int dump(void* buf, int size=-1) const { if(size==-1) size = this->size; size = min(this->size,size); @@ -107,7 +111,7 @@ public: // this function does the same than dump // but works for both little and big endian // when buf is an u16 array - int dump16(void* buf, int size=-1) + int dump16(void* buf, int size=-1) const { if(size==-1) size = this->size; size = min(this->size,size); @@ -191,842 +195,1128 @@ static MemSpan MemSpan_TexPalette(u32 ofs, u32 len, bool silent) return ret; } -#if defined (DEBUG_DUMP_TEXTURE) && defined (WIN32) -#define DO_DEBUG_DUMP_TEXTURE -static void DebugDumpTexture(TexCacheItem* item) +static bool TextureLRUCompare(TextureStore *tex1, TextureStore *tex2) +{ + const size_t cacheAge1 = tex1->GetCacheAge(); + const size_t cacheAge2 = tex2->GetCacheAge(); + + if (cacheAge1 == cacheAge2) + { + return ( tex1->GetCacheUseCount() > tex2->GetCacheUseCount() ); + } + + return (cacheAge1 < cacheAge2); +} + +TextureCache texCache; + +TextureCache::TextureCache() +{ + _texCacheMap.clear(); + _texCacheList.reserve(4096); + _actualCacheSize = 0; + _cacheSizeThreshold = TEXCACHE_DEFAULT_THRESHOLD; + memset(_paletteDump, 0, sizeof(_paletteDump)); +} + +size_t TextureCache::GetActualCacheSize() const +{ + return this->_actualCacheSize; +} + +size_t TextureCache::GetCacheSizeThreshold() const +{ + return this->_cacheSizeThreshold; +} + +void TextureCache::SetCacheSizeThreshold(size_t newThreshold) +{ + this->_cacheSizeThreshold = newThreshold; +} + +void TextureCache::Invalidate() +{ + //check whether the palette memory changed + //TODO - we should handle this instead by setting dirty flags in the vram memory mapping and noting whether palette memory was dirty. + //but this will work for now + MemSpan mspal = MemSpan_TexPalette(0, PALETTE_DUMP_SIZE, true); + const bool paletteDirty = mspal.memcmp(this->_paletteDump); + if (paletteDirty) + { + mspal.dump(this->_paletteDump); + } + + for (TextureCacheMap::iterator it(this->_texCacheMap.begin()); it != this->_texCacheMap.end(); ++it) + { + it->second->SetSuspectedInvalid(); + + //when the palette changes, we assume all 4x4 textures are dirty. + //this is because each 4x4 item doesnt carry along with it a copy of the entire palette, for verification + //instead, we just use the one paletteDump for verifying of all 4x4 textures; and if paletteDirty is set, verification has failed + if( (it->second->GetPackFormat() == TEXMODE_4X4) && paletteDirty ) + { + it->second->SetAssumedInvalid(); + } + } +} + +void TextureCache::Evict() +{ + //debug print + //printf("%d %d/%d\n",index.size(),cache_size/1024,target/1024); + + //dont do anything unless we're over the target + if (this->_actualCacheSize <= this->_cacheSizeThreshold) + { + for (size_t i = 0; i < this->_texCacheList.size(); i++) + { + this->_texCacheList[i]->IncreaseCacheAge(1); + } + + return; + } + + //aim at cutting the cache to half of the max size + size_t targetCacheSize = this->_cacheSizeThreshold / 2; + + // Sort the textures in cache by age and usage count. Textures that we want to keep in + // cache are placed in the front of the list, while textures we want to evict are sorted + // to the back of the list. + std::sort(this->_texCacheList.begin(), this->_texCacheList.end(), &TextureLRUCompare); + + while (this->_actualCacheSize > targetCacheSize) + { + if (this->_texCacheMap.size() == 0) break; //just in case.. doesnt seem possible, cache_size wouldve been 0 + + TextureStore *item = this->_texCacheList.back(); + this->Remove(item); + this->_texCacheList.pop_back(); + + //printf("evicting! totalsize:%d\n",cache_size); + delete item; + } + + for (size_t i = 0; i < this->_texCacheList.size(); i++) + { + this->_texCacheList[i]->IncreaseCacheAge(1); + } +} + +void TextureCache::Reset() +{ + for (size_t i = 0; i < this->_texCacheList.size(); i++) + { + delete this->_texCacheList[i]; + } + + this->_texCacheMap.clear(); + this->_texCacheList.clear(); + this->_actualCacheSize = 0; + memset(this->_paletteDump, 0, sizeof(this->_paletteDump)); +} + +TextureStore* TextureCache::GetTexture(u32 texAttributes, u32 palAttributes) +{ + TextureStore *theTexture = NULL; + const TextureCacheKey key = TextureCache::GenerateKey(texAttributes, palAttributes); + const TextureCacheMap::iterator cachedTexture = this->_texCacheMap.find(key); + + if (cachedTexture == this->_texCacheMap.end()) + { + return theTexture; + } + else + { + theTexture = cachedTexture->second; + + if (theTexture->IsAssumedInvalid()) + { + theTexture->Update(); + } + else if (theTexture->IsSuspectedInvalid()) + { + theTexture->VRAMCompareAndUpdate(); + } + } + + return theTexture; +} + +void TextureCache::Add(TextureStore *texItem) +{ + const TextureCacheKey key = texItem->GetCacheKey(); + this->_texCacheMap[key] = texItem; + this->_texCacheList.push_back(texItem); + this->_actualCacheSize += texItem->GetCacheSize(); + //printf("allocating: up to %d with %d items\n", this->cache_size, this->cacheTable.size()); +} + +void TextureCache::Remove(TextureStore *texItem) +{ + const TextureCacheKey key = texItem->GetCacheKey(); + this->_texCacheMap.erase(key); + this->_actualCacheSize -= texItem->GetCacheSize(); +} + +TextureCacheKey TextureCache::GenerateKey(const u32 texAttributes, const u32 palAttributes) +{ + // Since the repeat, flip, and coordinate transformation modes are render settings + // and not data settings, we can mask out those bits to help reduce duplicate entries. + return (TextureCacheKey)( ((u64)palAttributes << 32) | (u64)(texAttributes & 0x3FF0FFFF) ); +} + +TextureStore::TextureStore() +{ + _textureAttributes = 0; + _paletteAttributes = 0; + _cacheKey = 0; + + _sizeS = 0; + _sizeT = 0; + _isPalZeroTransparent = false; + + _packFormat = TEXMODE_NONE; + _packAddress = 0; + _packSize = 0; + _packData = NULL; + + _paletteAddress = 0; + _paletteSize = 0; + _paletteColorTable = NULL; + + _packIndexAddress = 0; + _packIndexSize = 0; + _packIndexData = NULL; + _packSizeFirstSlot = 0; + + _suspectedInvalid = false; + _assumedInvalid = false; + _isLoadNeeded = false; + + _cacheSize = 0; + _cacheAge = 0; + _cacheUsageCount = 0; +} + +TextureStore::TextureStore(const u32 texAttributes, const u32 palAttributes) +{ + //for each texformat, multiplier from numtexels to numbytes (fixed point 30.2) + static const u32 texSizes[] = {0, 4, 1, 2, 4, 1, 4, 8}; + + //for each texformat, number of palette entries + static const u32 paletteSizeList[] = {0, 32, 4, 16, 256, 0, 8, 0}; + + _textureAttributes = texAttributes; + _paletteAttributes = palAttributes; + _cacheKey = TextureCache::GenerateKey(texAttributes, palAttributes); + + _sizeS = (8 << ((texAttributes >> 20) & 0x07)); + _sizeT = (8 << ((texAttributes >> 23) & 0x07)); + + _packFormat = (NDSTextureFormat)((texAttributes >> 26) & 0x07); + _packAddress = (texAttributes & 0xFFFF) << 3; + _packSize = (_sizeS * _sizeT * texSizes[_packFormat]) >> 2; //shifted because the texSizes multiplier is fixed point + + if ( (_packFormat == TEXMODE_I2) || (_packFormat == TEXMODE_I4) || (_packFormat == TEXMODE_I8) ) + { + _isPalZeroTransparent = ( ((texAttributes >> 29) & 1) != 0 ); + } + else + { + _isPalZeroTransparent = false; + } + + _paletteAddress = (_packFormat == TEXMODE_I2) ? palAttributes << 3 : palAttributes << 4; + _paletteSize = paletteSizeList[_packFormat] * sizeof(u16); + + if (_packFormat == TEXMODE_4X4) + { + const u32 indexBase = ((texAttributes & 0xC000) == 0x8000) ? 0x30000 : 0x20000; + const u32 indexOffset = (texAttributes & 0x3FFF) << 2; + _packIndexAddress = indexBase + indexOffset; + _packIndexSize = (_sizeS * _sizeT) >> 3; + + _packData = (u8 *)malloc_alignedCacheLine(_packSize + _packIndexSize + _paletteSize); + _packIndexData = _packData + _packSize; + _paletteColorTable = (u16 *)(_packData + _packSize + _packIndexSize); + + MemSpan currentPackedTexIndexMS = MemSpan_TexMem(_packIndexAddress, _packIndexSize); + currentPackedTexIndexMS.dump(_packIndexData, _packIndexSize); + } + else + { + _packIndexAddress = 0; + _packIndexSize = 0; + _packIndexData = NULL; + + _packData = (u8 *)malloc_alignedCacheLine(_packSize + _paletteSize); + _packIndexData = NULL; + _paletteColorTable = (u16 *)(_packData + _packSize); + } + + if (_paletteSize > 0) + { + MemSpan currentPaletteMS = MemSpan_TexPalette(_paletteAddress, _paletteSize, false); + +#ifdef WORDS_BIGENDIAN + currentPaletteMS.dump16(_paletteColorTable); +#else + currentPaletteMS.dump(_paletteColorTable); +#endif + } + else + { + _paletteColorTable = NULL; + } + + MemSpan currentPackedTexDataMS = MemSpan_TexMem(_packAddress, _packSize); + currentPackedTexDataMS.dump(_packData); + _packSizeFirstSlot = currentPackedTexDataMS.items[0].len; + + _suspectedInvalid = false; + _assumedInvalid = false; + _isLoadNeeded = true; + + _cacheSize = _packSize + _paletteSize + _packIndexSize; + _cacheAge = 0; + _cacheUsageCount = 0; +} + +TextureStore::~TextureStore() +{ + free_aligned(this->_packData); +} + +u32 TextureStore::GetTextureAttributes() const +{ + return this->_textureAttributes; +} + +u32 TextureStore::GetPaletteAttributes() const +{ + return this->_paletteAttributes; +} + +u32 TextureStore::GetWidth() const +{ + return this->_sizeS; +} + +u32 TextureStore::GetHeight() const +{ + return this->_sizeT; +} + +bool TextureStore::IsPalZeroTransparent() const +{ + return this->_isPalZeroTransparent; +} + +NDSTextureFormat TextureStore::GetPackFormat() const +{ + return this->_packFormat; +} + +u32 TextureStore::GetPackAddress() const +{ + return this->_packAddress; +} + +u32 TextureStore::GetPackSize() const +{ + return this->_packSize; +} + +u8* TextureStore::GetPackData() +{ + return this->_packData; +} + +u32 TextureStore::GetPaletteAddress() const +{ + return this->_paletteAddress; +} + +u32 TextureStore::GetPaletteSize() const +{ + return this->_paletteSize; +} + +u16* TextureStore::GetPaletteColorTable() const +{ + return this->_paletteColorTable; +} + +u32 TextureStore::GetPackIndexAddress() const +{ + return this->_packIndexAddress; +} + +u32 TextureStore::GetPackIndexSize() const +{ + return this->_packIndexSize; +} + +u8* TextureStore::GetPackIndexData() +{ + return this->_packIndexData; +} + +void TextureStore::SetTextureData(const MemSpan &packedData, const MemSpan &packedIndexData) +{ + //dump texture and 4x4 index data for cache keying + this->_packSizeFirstSlot = packedData.items[0].len; + + packedData.dump(this->_packData); + + if (this->_packFormat == TEXMODE_4X4) + { + packedIndexData.dump(this->_packIndexData, this->_packIndexSize); + } +} + +void TextureStore::SetTexturePalette(const MemSpan &packedPalette) +{ + if (this->_paletteSize > 0) + { +#ifdef WORDS_BIGENDIAN + packedPalette.dump16(this->_paletteColorTable); +#else + packedPalette.dump(this->_paletteColorTable); +#endif + } +} + +void TextureStore::SetTexturePalette(const u16 *paletteBuffer) +{ + if (this->_paletteSize > 0) + { + memcpy(this->_paletteColorTable, paletteBuffer, this->_paletteSize); + } +} + +size_t TextureStore::GetUnpackSizeUsingFormat(const TextureStoreUnpackFormat texCacheFormat) const +{ + return (this->_sizeS * this->_sizeT * sizeof(u32)); +} + +template +void TextureStore::Unpack(u32 *unpackBuffer) +{ + // Whenever a 1-bit alpha or no-alpha texture is unpacked (this means any texture + // format that is not A3I5 or A5I3), set all transparent pixels to 0 so that 3D + // renderers can assume that the transparent color is 0 during texture sampling. + + switch (this->_packFormat) + { + case TEXMODE_A3I5: + NDSTextureUnpackA3I5(this->_packSize, this->_packData, this->_paletteColorTable, unpackBuffer); + break; + + case TEXMODE_I2: + NDSTextureUnpackI2(this->_packSize, this->_packData, this->_paletteColorTable, this->_isPalZeroTransparent, unpackBuffer); + break; + + case TEXMODE_I4: + NDSTextureUnpackI4(this->_packSize, this->_packData, this->_paletteColorTable, this->_isPalZeroTransparent, unpackBuffer); + break; + + case TEXMODE_I8: + NDSTextureUnpackI8(this->_packSize, this->_packData, this->_paletteColorTable, this->_isPalZeroTransparent, unpackBuffer); + break; + + case TEXMODE_4X4: + { + if (this->_packSize > this->_packSizeFirstSlot) + { + PROGINFO("Your 4x4 texture has overrun its texture slot.\n"); + } + + NDSTextureUnpack4x4(this->_packSizeFirstSlot, (u32 *)this->_packData, (u16 *)this->_packIndexData, this->_paletteAddress, this->_textureAttributes, this->_sizeS, this->_sizeT, unpackBuffer); + break; + } + + case TEXMODE_A5I3: + NDSTextureUnpackA5I3(this->_packSize, this->_packData, this->_paletteColorTable, unpackBuffer); + break; + + case TEXMODE_16BPP: + NDSTextureUnpackDirect16Bit(this->_packSize, (u16 *)this->_packData, unpackBuffer); + break; + + default: + break; + } + +#ifdef DO_DEBUG_DUMP_TEXTURE + this->DebugDump(); +#endif + + this->_isLoadNeeded = false; +} + +bool TextureStore::IsSuspectedInvalid() const +{ + return this->_suspectedInvalid; +} + +void TextureStore::SetSuspectedInvalid() +{ + this->_suspectedInvalid = true; +} + +bool TextureStore::IsAssumedInvalid() const +{ + return this->_assumedInvalid; +} + +void TextureStore::SetAssumedInvalid() +{ + this->_assumedInvalid = true; +} + +void TextureStore::SetLoadNeeded() +{ + this->_isLoadNeeded = true; +} + +bool TextureStore::IsLoadNeeded() const +{ + return this->_isLoadNeeded; +} + +TextureCacheKey TextureStore::GetCacheKey() const +{ + return this->_cacheKey; +} + +size_t TextureStore::GetCacheSize() const +{ + return this->_cacheSize; +} + +void TextureStore::SetCacheSize(size_t cacheSize) +{ + this->_cacheSize = cacheSize; +} + +size_t TextureStore::GetCacheAge() const +{ + return this->_cacheAge; +} + +void TextureStore::IncreaseCacheAge(const size_t ageAmount) +{ + this->_cacheAge += ageAmount; +} + +void TextureStore::ResetCacheAge() +{ + this->_cacheAge = 0; +} + +size_t TextureStore::GetCacheUseCount() const +{ + return this->_cacheUsageCount; +} + +void TextureStore::IncreaseCacheUsageCount(const size_t usageCount) +{ + this->_cacheUsageCount += usageCount; +} + +void TextureStore::ResetCacheUsageCount() +{ + this->_cacheUsageCount = 0; +} + +void TextureStore::Update() +{ + MemSpan currentPaletteMS = MemSpan_TexPalette(this->_paletteAddress, this->_paletteSize, false); + MemSpan currentPackedTexDataMS = MemSpan_TexMem(this->_packAddress, this->_packSize); + + MemSpan currentPackedTexIndexMS; + if (this->_packFormat == TEXMODE_4X4) + { + //determine the location for 4x4 index data + currentPackedTexIndexMS = MemSpan_TexMem(this->_packIndexAddress, this->_packIndexSize); + } + + this->SetTextureData(currentPackedTexDataMS, currentPackedTexIndexMS); + this->SetTexturePalette(currentPaletteMS); + + this->_assumedInvalid = false; + this->_suspectedInvalid = false; + this->_isLoadNeeded = true; +} + +void TextureStore::VRAMCompareAndUpdate() +{ + bool needUpdateTexData = false; + bool needUpdatePalette = false; + + //dump the palette to a temp buffer, so that we don't have to worry about memory mapping. + //this isnt such a problem with texture memory, because we read sequentially from it. + //however, we read randomly from palette memory, so the mapping is more costly. + MemSpan currentPaletteMS = MemSpan_TexPalette(this->_paletteAddress, this->_paletteSize, false); + + CACHE_ALIGN u16 currentPalette[256]; +#ifdef WORDS_BIGENDIAN + currentPaletteMS.dump16(currentPalette); +#else + currentPaletteMS.dump(currentPalette); +#endif + + //when the palettes dont match: + //note that we are considering 4x4 textures to have a palette size of 0. + //they really have a potentially HUGE palette, too big for us to handle like a normal palette, + //so they go through a different system + if ( (this->_paletteSize > 0) && memcmp(this->_paletteColorTable, currentPalette, this->_paletteSize) ) + { + needUpdatePalette = true; + } + + //analyze the texture memory mapping and the specifications of this texture + MemSpan currentPackedTexDataMS = MemSpan_TexMem(this->_packAddress, this->_packSize); + + //when the texture data doesn't match + if ( (this->_packSize > 0) && currentPackedTexDataMS.memcmp(this->_packData, this->_packSize) ) + { + needUpdateTexData = true; + } + + //if the texture is 4x4 then the index data must match + MemSpan currentPackedTexIndexMS; + if (this->GetPackFormat() == TEXMODE_4X4) + { + //determine the location for 4x4 index data + currentPackedTexIndexMS = MemSpan_TexMem(this->_packIndexAddress, this->_packIndexSize); + + if ( (this->_packIndexSize > 0) && currentPackedTexIndexMS.memcmp(this->_packIndexData, this->_packIndexSize) ) + { + needUpdateTexData = true; + needUpdatePalette = true; + } + } + + if (needUpdateTexData) + { + this->SetTextureData(currentPackedTexDataMS, currentPackedTexIndexMS); + this->_isLoadNeeded = true; + } + + if (needUpdatePalette) + { + this->SetTexturePalette(currentPalette); + this->_isLoadNeeded = true; + } + + this->_assumedInvalid = false; + this->_suspectedInvalid = false; +} + +#ifdef DO_DEBUG_DUMP_TEXTURE +void TextureStore::DebugDump() { static int ctr=0; char fname[100]; sprintf(fname,"c:\\dump\\%d.bmp", ctr); ctr++; - - NDS_WriteBMP_32bppBuffer(item->sizeX,item->sizeY,item->decoded,fname); + + NDS_WriteBMP_32bppBuffer(this->sizeX, this->sizeY, this->unpackData, fname); } #endif -class TexCache +template +void NDSTextureUnpackI2(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, const bool isPalZeroTransparent, u32 *__restrict dstBuffer) { -public: - TexCache() - : cache_size(0) +#ifdef ENABLE_SSSE3 + const __m128i pal_vec128 = _mm_loadl_epi64((__m128i *)srcPal); +#endif + if (isPalZeroTransparent) { - memset(paletteDump,0,sizeof(paletteDump)); - } - - TTexCacheItemMultimap index; - - //this ought to be enough for anyone - //static const u32 kMaxCacheSize = 64*1024*1024; - //changed by zeromus on 15-dec. I couldnt find any games that were getting anywhere NEAR 64 - static const u32 kMaxCacheSize = 16*1024*1024; - //metal slug burns through sprites so fast, it can test it pretty quickly though - - //this is not really precise, it is off by a constant factor - u32 cache_size; - - void list_remove(TexCacheItem* item) - { - index.erase(item->iterator); - cache_size -= item->decode_len; - } - - void list_push_front(TexCacheItem* item) - { - item->iterator = index.insert(std::make_pair(item->texformat,item)); - cache_size += item->decode_len; - } - - template - TexCacheItem* scan(u32 format, u32 texpal) - { - //for each texformat, number of palette entries - static const int palSizes[] = {0, 32, 4, 16, 256, 0, 8, 0}; - - //for each texformat, multiplier from numtexels to numbytes (fixed point 30.2) - static const int texSizes[] = {0, 4, 1, 2, 4, 1, 4, 8}; - - //used to hold a copy of the palette specified for this texture - CACHE_ALIGN u16 pal[256]; - - NDSTextureFormat textureMode = (NDSTextureFormat)((format>>26)&0x07); - u32 sizeX=(8 << ((format>>20)&0x07)); - u32 sizeY=(8 << ((format>>23)&0x07)); - u32 imageSize = sizeX*sizeY; - - u8 *adr; - - u32 paletteAddress; - - switch (textureMode) +#ifdef ENABLE_SSSE3 + for (size_t i = 0; i < srcSize; i+=4, srcData+=4, dstBuffer+=16) { - case TEXMODE_I2: - paletteAddress = texpal<<3; - break; - - case TEXMODE_A3I5: - case TEXMODE_I4: - case TEXMODE_I8: - case TEXMODE_A5I3: - case TEXMODE_16BPP: - case TEXMODE_4X4: - default: - paletteAddress = texpal<<4; - break; - } - - //analyze the texture memory mapping and the specifications of this texture - int palSize = palSizes[textureMode]; - int texSize = (imageSize*texSizes[textureMode])>>2; //shifted because the texSizes multiplier is fixed point - MemSpan ms = MemSpan_TexMem((format&0xFFFF)<<3,texSize); - MemSpan mspal = MemSpan_TexPalette(paletteAddress,palSize*2,false); - - //determine the location for 4x4 index data - u32 indexBase; - if((format & 0xc000) == 0x8000) indexBase = 0x30000; - else indexBase = 0x20000; - - u32 indexOffset = (format&0x3FFF)<<2; - - int indexSize = 0; - MemSpan msIndex; - if(textureMode == TEXMODE_4X4) - { - indexSize = imageSize>>3; - msIndex = MemSpan_TexMem(indexOffset+indexBase,indexSize); - } - - - //dump the palette to a temp buffer, so that we don't have to worry about memory mapping. - //this isnt such a problem with texture memory, because we read sequentially from it. - //however, we read randomly from palette memory, so the mapping is more costly. - #ifdef MSB_FIRST - mspal.dump16(pal); - #else - mspal.dump(pal); - #endif - - //TODO - as a special optimization, keep the last item returned and check it first - - for(std::pair - iters = index.equal_range(format); - iters.first != iters.second; - ++iters.first) - { - TexCacheItem* curr = iters.first->second; + __m128i idx = _mm_set_epi32(0, 0, 0, *(u32 *)srcData); + idx = _mm_unpacklo_epi8(idx, idx); + idx = _mm_unpacklo_epi8(idx, idx); + idx = _mm_or_si128( _mm_or_si128( _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi32(0x00000003)), _mm_and_si128(_mm_srli_epi32(idx, 2), _mm_set1_epi32(0x00000300)) ), _mm_and_si128(_mm_srli_epi32(idx, 4), _mm_set1_epi32(0x00030000)) ), _mm_and_si128(_mm_srli_epi32(idx, 6), _mm_set1_epi32(0x03000000)) ); + idx = _mm_slli_epi16(idx, 1); - //conditions where we reject matches: - //when the teximage or texpal params dont match - //(this is our key for identifying textures in the cache) - //NEW: due to using format as a key we dont need to check this anymore - //if(curr->texformat != format) continue; - if(curr->texpal != texpal) continue; - - //we're being asked for a different format than what we had cached. - //TODO - this could be done at the entire cache level instead of checking repeatedly - if(curr->cacheFormat != TEXFORMAT) goto REJECT; - - //if the texture is assumed invalid, reject it - if(curr->assumedInvalid) goto REJECT; - - //the texture matches params, but isnt suspected invalid. accept it. - if(!curr->suspectedInvalid) return curr; - - //we suspect the texture may be invalid. we need to do a byte-for-byte comparison to re-establish that it is valid: - - //when the palettes dont match: - //note that we are considering 4x4 textures to have a palette size of 0. - //they really have a potentially HUGE palette, too big for us to handle like a normal palette, - //so they go through a different system - if(mspal.size != 0 && memcmp(curr->dump.palette,pal,mspal.size)) goto REJECT; - - //when the texture data doesn't match - if(ms.memcmp(&curr->dump.texture[0],curr->dump.textureSize)) goto REJECT; - - //if the texture is 4x4 then the index data must match - if(textureMode == TEXMODE_4X4) - { - if(msIndex.memcmp(curr->dump.texture + curr->dump.textureSize,curr->dump.indexSize)) goto REJECT; - } - - //we found a match. just return it - //REMINDER to make it primary/newest when we have smarter code - //list_remove(curr); - //list_push_front(curr); - curr->suspectedInvalid = false; - return curr; - - REJECT: - //we found a cached item for the current address, but the data is stale. - //for a variety of complicated reasons, we need to throw it out right this instant. - list_remove(curr); - delete curr; - break; - } - - //item was not found. recruit an existing one (the oldest), or create a new one - //evict(); //reduce the size of the cache if necessary - //TODO - as a peculiarity of the texcache, eviction must happen after the entire 3d frame runs - //to support separate cache and read passes - TexCacheItem* newitem = new TexCacheItem(); - newitem->suspectedInvalid = false; - newitem->texformat = format; - newitem->cacheFormat = TEXFORMAT; - newitem->texpal = texpal; - newitem->sizeX=sizeX; - newitem->sizeY=sizeY; - newitem->invSizeX=1.0f/((float)(sizeX)); - newitem->invSizeY=1.0f/((float)(sizeY)); - newitem->decode_len = sizeX*sizeY*4; - newitem->format = textureMode; - newitem->decoded = (u8 *)malloc_alignedCacheLine(newitem->decode_len); - list_push_front(newitem); - //printf("allocating: up to %d with %d items\n",cache_size,index.size()); - - u32 *dwdst = (u32*)newitem->decoded; - - //dump palette data for cache keying - if(palSize) - { - memcpy(newitem->dump.palette, pal, palSize*2); - } - - //dump texture and 4x4 index data for cache keying - const int texsize = newitem->dump.textureSize = ms.size; - const int indexsize = newitem->dump.indexSize = msIndex.size; - newitem->dump.texture = new u8[texsize+indexsize]; - ms.dump(&newitem->dump.texture[0],newitem->dump.maxTextureSize); //dump texture - if(textureMode == TEXMODE_4X4) - msIndex.dump(newitem->dump.texture+newitem->dump.textureSize,newitem->dump.indexSize); //dump 4x4 - - - //============================================================================ - //Texture conversion - //============================================================================ - - // Whenever a 1-bit alpha or no-alpha texture is unpacked (this means any texture - // format that is not A3I5 or A5I3), set all transparent pixels to 0 so that 3D - // renderers can assume that the transparent color is 0 during texture sampling. - - const bool isPalZeroTransparent = ( ((format >> 29) & 1) != 0 ); - - switch (newitem->format) - { - case TEXMODE_A3I5: - { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - const u16 c = pal[*adr & 31] & 0x7FFF; - const u8 alpha = *adr >> 5; - *dwdst++ = (TEXFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, material_3bit_to_5bit[alpha]) : COLOR555TO8888(c, material_3bit_to_8bit[alpha]); - } - } - break; - } - - case TEXMODE_I2: - { -#ifdef ENABLE_SSSE3 - const __m128i pal_vec128 = _mm_loadl_epi64((__m128i *)pal); -#endif - if (isPalZeroTransparent) - { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; -#ifdef ENABLE_SSSE3 - for (size_t x = 0; x < ms.items[j].len; x+=4, adr+=4, dwdst+=16) - { - __m128i idx = _mm_set_epi32(0, 0, 0, *(u32 *)adr); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_or_si128( _mm_or_si128( _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi32(0x00000003)), _mm_and_si128(_mm_srli_epi32(idx, 2), _mm_set1_epi32(0x00000300)) ), _mm_and_si128(_mm_srli_epi32(idx, 4), _mm_set1_epi32(0x00030000)) ), _mm_and_si128(_mm_srli_epi32(idx, 6), _mm_set1_epi32(0x03000000)) ); - idx = _mm_slli_epi16(idx, 1); - - __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - - const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0); - const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1); - - __m128i convertedColor[4]; - - if (TEXFORMAT == TexFormat_15bpp) - { - ColorspaceConvert555To6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); - } - else - { - ColorspaceConvert555To8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); - } - - // Set converted colors to 0 if the palette index is 0. - idx0 = _mm_cmpeq_epi16(idx0, _mm_set1_epi16(0x0100)); - idx1 = _mm_cmpeq_epi16(idx1, _mm_set1_epi16(0x0100)); - convertedColor[0] = _mm_andnot_si128(_mm_unpacklo_epi16(idx0, idx0), convertedColor[0]); - convertedColor[1] = _mm_andnot_si128(_mm_unpackhi_epi16(idx0, idx0), convertedColor[1]); - convertedColor[2] = _mm_andnot_si128(_mm_unpacklo_epi16(idx1, idx1), convertedColor[2]); - convertedColor[3] = _mm_andnot_si128(_mm_unpackhi_epi16(idx1, idx1), convertedColor[3]); - - _mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]); - _mm_store_si128((__m128i *)(dwdst + 4), convertedColor[1]); - _mm_store_si128((__m128i *)(dwdst + 8), convertedColor[2]); - _mm_store_si128((__m128i *)(dwdst + 12), convertedColor[3]); - } -#else - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - u8 idx; - - idx = *adr & 0x03; - *dwdst++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - - idx = (*adr >> 2) & 0x03; - *dwdst++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - - idx = (*adr >> 4) & 0x03; - *dwdst++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - - idx = (*adr >> 6) & 0x03; - *dwdst++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - } -#endif - } - } - else - { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; -#ifdef ENABLE_SSSE3 - for (size_t x = 0; x < ms.items[j].len; x+=4, adr+=4, dwdst+=16) - { - __m128i idx = _mm_set_epi32(0, 0, 0, *(u32 *)adr); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_or_si128( _mm_or_si128( _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi32(0x00000003)), _mm_and_si128(_mm_srli_epi32(idx, 2), _mm_set1_epi32(0x00000300)) ), _mm_and_si128(_mm_srli_epi32(idx, 4), _mm_set1_epi32(0x00030000)) ), _mm_and_si128(_mm_srli_epi32(idx, 6), _mm_set1_epi32(0x03000000)) ); - idx = _mm_slli_epi16(idx, 1); - - const __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - const __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - - const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0); - const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1); - - __m128i convertedColor[4]; - - if (TEXFORMAT == TexFormat_15bpp) - { - ColorspaceConvert555To6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); - } - else - { - ColorspaceConvert555To8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); - } - - _mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]); - _mm_store_si128((__m128i *)(dwdst + 4), convertedColor[1]); - _mm_store_si128((__m128i *)(dwdst + 8), convertedColor[2]); - _mm_store_si128((__m128i *)(dwdst + 12), convertedColor[3]); - } -#else - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - *dwdst++ = CONVERT(pal[ *adr & 0x03] & 0x7FFF); - *dwdst++ = CONVERT(pal[(*adr >> 2) & 0x03] & 0x7FFF); - *dwdst++ = CONVERT(pal[(*adr >> 4) & 0x03] & 0x7FFF); - *dwdst++ = CONVERT(pal[(*adr >> 6) & 0x03] & 0x7FFF); - } -#endif - } - } - break; - } - - case TEXMODE_I4: - { -#ifdef ENABLE_SSSE3 - const __m128i palLo = _mm_load_si128((__m128i *)pal + 0); - const __m128i palHi = _mm_load_si128((__m128i *)pal + 1); -#endif - if (isPalZeroTransparent) - { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; -#ifdef ENABLE_SSSE3 - for (size_t x = 0; x < ms.items[j].len; x+=8, adr+=8, dwdst+=16) - { - __m128i idx = _mm_loadl_epi64((__m128i *)adr); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi16(0x000F)), _mm_and_si128(_mm_srli_epi16(idx, 4), _mm_set1_epi16(0x0F00)) ); - idx = _mm_slli_epi16(idx, 1); - - __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - - const __m128i palMask = _mm_cmpeq_epi8( _mm_and_si128(idx, _mm_set1_epi8(0x10)), _mm_setzero_si128() ); - const __m128i palColor0A = _mm_shuffle_epi8(palLo, idx0); - const __m128i palColor0B = _mm_shuffle_epi8(palHi, idx0); - const __m128i palColor1A = _mm_shuffle_epi8(palLo, idx1); - const __m128i palColor1B = _mm_shuffle_epi8(palHi, idx1); - - const __m128i palColor0 = _mm_blendv_epi8( palColor0B, palColor0A, _mm_unpacklo_epi8(palMask, palMask) ); - const __m128i palColor1 = _mm_blendv_epi8( palColor1B, palColor1A, _mm_unpackhi_epi8(palMask, palMask) ); - - __m128i convertedColor[4]; - - if (TEXFORMAT == TexFormat_15bpp) - { - ColorspaceConvert555To6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); - } - else - { - ColorspaceConvert555To8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); - } - - // Set converted colors to 0 if the palette index is 0. - idx0 = _mm_cmpeq_epi16(idx0, _mm_set1_epi16(0x0100)); - idx1 = _mm_cmpeq_epi16(idx1, _mm_set1_epi16(0x0100)); - convertedColor[0] = _mm_andnot_si128(_mm_unpacklo_epi16(idx0, idx0), convertedColor[0]); - convertedColor[1] = _mm_andnot_si128(_mm_unpackhi_epi16(idx0, idx0), convertedColor[1]); - convertedColor[2] = _mm_andnot_si128(_mm_unpacklo_epi16(idx1, idx1), convertedColor[2]); - convertedColor[3] = _mm_andnot_si128(_mm_unpackhi_epi16(idx1, idx1), convertedColor[3]); - - _mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]); - _mm_store_si128((__m128i *)(dwdst + 4), convertedColor[1]); - _mm_store_si128((__m128i *)(dwdst + 8), convertedColor[2]); - _mm_store_si128((__m128i *)(dwdst + 12), convertedColor[3]); - } -#else - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - u8 idx; - - idx = *adr & 0xF; - *dwdst++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - - idx = *adr >> 4; - *dwdst++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - } -#endif - } - - } - else - { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; -#ifdef ENABLE_SSSE3 - for (size_t x = 0; x < ms.items[j].len; x+=8, adr+=8, dwdst+=16) - { - __m128i idx = _mm_loadl_epi64((__m128i *)adr); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi16(0x000F)), _mm_and_si128(_mm_srli_epi16(idx, 4), _mm_set1_epi16(0x0F00)) ); - idx = _mm_slli_epi16(idx, 1); - - const __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - const __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - - const __m128i palMask = _mm_cmpeq_epi8( _mm_and_si128(idx, _mm_set1_epi8(0x10)), _mm_setzero_si128() ); - const __m128i palColor0A = _mm_shuffle_epi8(palLo, idx0); - const __m128i palColor0B = _mm_shuffle_epi8(palHi, idx0); - const __m128i palColor1A = _mm_shuffle_epi8(palLo, idx1); - const __m128i palColor1B = _mm_shuffle_epi8(palHi, idx1); - - const __m128i palColor0 = _mm_blendv_epi8( palColor0B, palColor0A, _mm_unpacklo_epi8(palMask, palMask) ); - const __m128i palColor1 = _mm_blendv_epi8( palColor1B, palColor1A, _mm_unpackhi_epi8(palMask, palMask) ); - - __m128i convertedColor[4]; - - if (TEXFORMAT == TexFormat_15bpp) - { - ColorspaceConvert555To6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); - } - else - { - ColorspaceConvert555To8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); - } - - _mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]); - _mm_store_si128((__m128i *)(dwdst + 4), convertedColor[1]); - _mm_store_si128((__m128i *)(dwdst + 8), convertedColor[2]); - _mm_store_si128((__m128i *)(dwdst + 12), convertedColor[3]); - } -#else - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - *dwdst++ = CONVERT(pal[*adr & 0x0F] & 0x7FFF); - *dwdst++ = CONVERT(pal[*adr >> 4] & 0x7FFF); - } -#endif - } - } - break; - } - - case TEXMODE_I8: - { - if (isPalZeroTransparent) - { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - *dwdst++ = (*adr == 0) ? 0 : CONVERT(pal[*adr] & 0x7FFF); - } - } - } - else - { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - *dwdst++ = CONVERT(pal[*adr] & 0x7FFF); - } - } - } - break; - } - - case TEXMODE_4X4: - { - if (ms.numItems != 1) - { - PROGINFO("Your 4x4 texture has overrun its texture slot.\n"); - } - //this check isnt necessary since the addressing is tied to the texture data which will also run out: - //if(msIndex.numItems != 1) PROGINFO("Your 4x4 texture index has overrun its slot.\n"); - - #define PAL4X4(offset) ( LE_TO_LOCAL_16( *(u16*)( MMU.texInfo.texPalSlot[((paletteAddress + (offset)*2)>>14)&0x7] + ((paletteAddress + (offset)*2)&0x3FFF) ) ) & 0x7FFF ) - - u16* slot1; - u32* map = (u32*)ms.items[0].ptr; - u32 limit = ms.items[0].len<<2; - u32 d = 0; - if ( (format & 0xc000) == 0x8000) - // texel are in slot 2 - slot1=(u16*)&MMU.texInfo.textureSlotAddr[1][((format & 0x3FFF)<<2)+0x010000]; - else - slot1=(u16*)&MMU.texInfo.textureSlotAddr[1][(format & 0x3FFF)<<2]; - - u16 yTmpSize = (sizeY>>2); - u16 xTmpSize = (sizeX>>2); - - //this is flagged whenever a 4x4 overruns its slot. - //i am guessing we just generate black in that case - bool dead = false; - - for (size_t y = 0; y < yTmpSize; y++) - { - u32 tmpPos[4]={(y<<2)*sizeX,((y<<2)+1)*sizeX, - ((y<<2)+2)*sizeX,((y<<2)+3)*sizeX}; - for (size_t x = 0; x < xTmpSize; x++, d++) - { - if (d >= limit) - dead = true; - - if (dead) - { - for (int sy = 0; sy < 4; sy++) - { - const u32 currentPos = (x<<2) + tmpPos[sy]; - dwdst[currentPos] = dwdst[currentPos+1] = dwdst[currentPos+2] = dwdst[currentPos+3] = 0; - } - continue; - } - - const u32 currBlock = LE_TO_LOCAL_32(map[d]); - const u16 pal1 = LE_TO_LOCAL_16(slot1[d]); - const u16 pal1offset = (pal1 & 0x3FFF)<<1; - const u8 mode = pal1>>14; - u32 tmp_col[4]; - - tmp_col[0] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset) ); - tmp_col[1] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+1) ); - - switch (mode) - { - case 0: - tmp_col[2] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+2) ); - tmp_col[3] = 0x00000000; - break; - - case 1: -#ifdef MSB_FIRST - tmp_col[2] = ( (((tmp_col[0] & 0xFF000000) >> 1)+((tmp_col[1] & 0xFF000000) >> 1)) & 0xFF000000 ) | - ( (((tmp_col[0] & 0x00FF0000) + (tmp_col[1] & 0x00FF0000)) >> 1) & 0x00FF0000 ) | - ( (((tmp_col[0] & 0x0000FF00) + (tmp_col[1] & 0x0000FF00)) >> 1) & 0x0000FF00 ) | - 0x000000FF; - tmp_col[3] = 0x00000000; -#else - tmp_col[2] = ( (((tmp_col[0] & 0x00FF00FF) + (tmp_col[1] & 0x00FF00FF)) >> 1) & 0x00FF00FF ) | - ( (((tmp_col[0] & 0x0000FF00) + (tmp_col[1] & 0x0000FF00)) >> 1) & 0x0000FF00 ) | - 0xFF000000; - tmp_col[3] = 0x00000000; -#endif - break; - - case 2: - tmp_col[2] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+2) ); - tmp_col[3] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+3) ); - break; - - case 3: - { -#ifdef MSB_FIRST - const u32 r0 = (tmp_col[0]>>24) & 0x000000FF; - const u32 r1 = (tmp_col[1]>>24) & 0x000000FF; - const u32 g0 = (tmp_col[0]>>16) & 0x000000FF; - const u32 g1 = (tmp_col[1]>>16) & 0x000000FF; - const u32 b0 = (tmp_col[0]>> 8) & 0x000000FF; - const u32 b1 = (tmp_col[1]>> 8) & 0x000000FF; -#else - const u32 r0 = tmp_col[0] & 0x000000FF; - const u32 r1 = tmp_col[1] & 0x000000FF; - const u32 g0 = (tmp_col[0]>> 8) & 0x000000FF; - const u32 g1 = (tmp_col[1]>> 8) & 0x000000FF; - const u32 b0 = (tmp_col[0]>>16) & 0x000000FF; - const u32 b1 = (tmp_col[1]>>16) & 0x000000FF; -#endif - - const u16 tmp1 = ( (r0*5 + r1*3)>>6) | - ( ((g0*5 + g1*3)>>6) << 5 ) | - ( ((b0*5 + b1*3)>>6) << 10 ); - const u16 tmp2 = ( (r0*3 + r1*5)>>6) | - ( ((g0*3 + g1*5)>>6) << 5 ) | - ( ((b0*3 + b1*5)>>6) << 10 ); - - tmp_col[2] = COLOR555TO8888_OPAQUE(tmp1); - tmp_col[3] = COLOR555TO8888_OPAQUE(tmp2); - break; - } - } - - if (TEXFORMAT==TexFormat_15bpp) - { - for (size_t i = 0; i < 4; i++) - { -#ifdef MSB_FIRST - const u32 a = (tmp_col[i] >> 3) & 0x0000001F; - tmp_col[i] >>= 2; - tmp_col[i] &= 0x3F3F3F00; - tmp_col[i] |= a; -#else - const u32 a = (tmp_col[i] >> 3) & 0x1F000000; - tmp_col[i] >>= 2; - tmp_col[i] &= 0x003F3F3F; - tmp_col[i] |= a; -#endif - } - } - - //TODO - this could be more precise for 32bpp mode (run it through the color separation table) - - //set all 16 texels - for (size_t sy = 0; sy < 4; sy++) - { - // Texture offset - const u32 currentPos = (x<<2) + tmpPos[sy]; - const u8 currRow = (u8)((currBlock>>(sy<<3))&0xFF); - - dwdst[currentPos ] = tmp_col[ currRow &3]; - dwdst[currentPos+1] = tmp_col[(currRow>>2)&3]; - dwdst[currentPos+2] = tmp_col[(currRow>>4)&3]; - dwdst[currentPos+3] = tmp_col[(currRow>>6)&3]; - } - } - } - break; - } - - case TEXMODE_A5I3: - { -#ifdef ENABLE_SSSE3 - const __m128i pal_vec128 = _mm_load_si128((__m128i *)pal); -#endif - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; -#ifdef ENABLE_SSSE3 - for (size_t x = 0; x < ms.items[j].len; x+=16, adr+=16, dwdst+=16) - { - const __m128i bits = _mm_loadu_si128((__m128i *)adr); - - const __m128i idx = _mm_slli_epi16( _mm_and_si128(bits, _mm_set1_epi8(0x07)), 1 ); - const __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - const __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - - const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0); - const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1); - - __m128i tmpAlpha[2]; - __m128i convertedColor[4]; - - if (TEXFORMAT == TexFormat_15bpp) - { - const __m128i alpha = _mm_srli_epi16( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), 3 ); - const __m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); - const __m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); - - tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo); - tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo); - ColorspaceConvert555To6665_SSE2(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]); - - tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi); - tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi); - ColorspaceConvert555To6665_SSE2(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]); - } - else - { - const __m128i alpha = _mm_or_si128( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), _mm_srli_epi16(_mm_and_si128(bits, _mm_set1_epi8(0xE0)), 5) ); - const __m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); - const __m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); - - tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo); - tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo); - ColorspaceConvert555To8888_SSE2(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]); - - tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi); - tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi); - ColorspaceConvert555To8888_SSE2(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]); - } - - _mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]); - _mm_store_si128((__m128i *)(dwdst + 4), convertedColor[1]); - _mm_store_si128((__m128i *)(dwdst + 8), convertedColor[2]); - _mm_store_si128((__m128i *)(dwdst + 12), convertedColor[3]); - } -#else - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - const u16 c = pal[*adr&0x07] & 0x7FFF; - const u8 alpha = (*adr>>3); - *dwdst++ = (TEXFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, alpha) : COLOR555TO8888(c, material_5bit_to_8bit[alpha]); - } -#endif - } - break; - } - - case TEXMODE_16BPP: - { - for (size_t j = 0; j < ms.numItems; j++) - { - const u16 *map = (u16*)ms.items[j].ptr; - const size_t len = ms.items[j].len >> 1; - - for (size_t x = 0; x < len; x++) - { - const u16 c = LOCAL_TO_LE_16(map[x]); - *dwdst++ = (c & 0x8000) ? CONVERT(c & 0x7FFF) : 0; - } - } - break; - } - - default: - break; - } //switch(texture format) - -#ifdef DO_DEBUG_DUMP_TEXTURE - DebugDumpTexture(newitem); -#endif - - return newitem; - } //scan() - - static const int PALETTE_DUMP_SIZE = (64+16+16)*1024; - u8 paletteDump[PALETTE_DUMP_SIZE]; - - void invalidate() - { - //check whether the palette memory changed - //TODO - we should handle this instead by setting dirty flags in the vram memory mapping and noting whether palette memory was dirty. - //but this will work for now - MemSpan mspal = MemSpan_TexPalette(0,PALETTE_DUMP_SIZE,true); - bool paletteDirty = mspal.memcmp(paletteDump); - if (paletteDirty) - { - mspal.dump(paletteDump); - } - - for (TTexCacheItemMultimap::iterator it(index.begin()); it != index.end(); ++it) - { - it->second->suspectedInvalid = true; + __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - //when the palette changes, we assume all 4x4 textures are dirty. - //this is because each 4x4 item doesnt carry along with it a copy of the entire palette, for verification - //instead, we just use the one paletteDump for verifying of all 4x4 textures; and if paletteDirty is set, verification has failed - if( (it->second->GetTextureFormat() == TEXMODE_4X4) && paletteDirty ) + const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0); + const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1); + + __m128i convertedColor[4]; + + if (TEXCACHEFORMAT == TexFormat_15bpp) { - it->second->assumedInvalid = true; + ColorspaceConvert555To6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + } + else + { + ColorspaceConvert555To8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + } + + // Set converted colors to 0 if the palette index is 0. + idx0 = _mm_cmpeq_epi16(idx0, _mm_set1_epi16(0x0100)); + idx1 = _mm_cmpeq_epi16(idx1, _mm_set1_epi16(0x0100)); + convertedColor[0] = _mm_andnot_si128(_mm_unpacklo_epi16(idx0, idx0), convertedColor[0]); + convertedColor[1] = _mm_andnot_si128(_mm_unpackhi_epi16(idx0, idx0), convertedColor[1]); + convertedColor[2] = _mm_andnot_si128(_mm_unpacklo_epi16(idx1, idx1), convertedColor[2]); + convertedColor[3] = _mm_andnot_si128(_mm_unpackhi_epi16(idx1, idx1), convertedColor[3]); + + _mm_store_si128((__m128i *)(dstBuffer + 0), convertedColor[0]); + _mm_store_si128((__m128i *)(dstBuffer + 4), convertedColor[1]); + _mm_store_si128((__m128i *)(dstBuffer + 8), convertedColor[2]); + _mm_store_si128((__m128i *)(dstBuffer + 12), convertedColor[3]); + } +#else + for (size_t i = 0; i < srcSize; i++, srcData++) + { + u8 idx; + + idx = *srcData & 0x03; + *dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx] & 0x7FFF); + + idx = (*srcData >> 2) & 0x03; + *dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx] & 0x7FFF); + + idx = (*srcData >> 4) & 0x03; + *dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx] & 0x7FFF); + + idx = (*srcData >> 6) & 0x03; + *dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx] & 0x7FFF); + } +#endif + } + else + { +#ifdef ENABLE_SSSE3 + for (size_t i = 0; i < srcSize; i+=4, srcData+=4, dstBuffer+=16) + { + __m128i idx = _mm_set_epi32(0, 0, 0, *(u32 *)srcData); + idx = _mm_unpacklo_epi8(idx, idx); + idx = _mm_unpacklo_epi8(idx, idx); + idx = _mm_or_si128( _mm_or_si128( _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi32(0x00000003)), _mm_and_si128(_mm_srli_epi32(idx, 2), _mm_set1_epi32(0x00000300)) ), _mm_and_si128(_mm_srli_epi32(idx, 4), _mm_set1_epi32(0x00030000)) ), _mm_and_si128(_mm_srli_epi32(idx, 6), _mm_set1_epi32(0x03000000)) ); + idx = _mm_slli_epi16(idx, 1); + + const __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + const __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + + const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0); + const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1); + + __m128i convertedColor[4]; + + if (TEXCACHEFORMAT == TexFormat_15bpp) + { + ColorspaceConvert555To6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + } + else + { + ColorspaceConvert555To8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + } + + _mm_store_si128((__m128i *)(dstBuffer + 0), convertedColor[0]); + _mm_store_si128((__m128i *)(dstBuffer + 4), convertedColor[1]); + _mm_store_si128((__m128i *)(dstBuffer + 8), convertedColor[2]); + _mm_store_si128((__m128i *)(dstBuffer + 12), convertedColor[3]); + } +#else + for (size_t i = 0; i < srcSize; i++, srcData++) + { + *dstBuffer++ = CONVERT(srcPal[ *srcData & 0x03] & 0x7FFF); + *dstBuffer++ = CONVERT(srcPal[(*srcData >> 2) & 0x03] & 0x7FFF); + *dstBuffer++ = CONVERT(srcPal[(*srcData >> 4) & 0x03] & 0x7FFF); + *dstBuffer++ = CONVERT(srcPal[(*srcData >> 6) & 0x03] & 0x7FFF); + } +#endif + } +} + +template +void NDSTextureUnpackI4(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, const bool isPalZeroTransparent, u32 *__restrict dstBuffer) +{ +#ifdef ENABLE_SSSE3 + const __m128i palLo = _mm_load_si128((__m128i *)srcPal + 0); + const __m128i palHi = _mm_load_si128((__m128i *)srcPal + 1); +#endif + if (isPalZeroTransparent) + { +#ifdef ENABLE_SSSE3 + for (size_t i = 0; i < srcSize; i+=8, srcData+=8, dstBuffer+=16) + { + __m128i idx = _mm_loadl_epi64((__m128i *)srcData); + idx = _mm_unpacklo_epi8(idx, idx); + idx = _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi16(0x000F)), _mm_and_si128(_mm_srli_epi16(idx, 4), _mm_set1_epi16(0x0F00)) ); + idx = _mm_slli_epi16(idx, 1); + + __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + + const __m128i palMask = _mm_cmpeq_epi8( _mm_and_si128(idx, _mm_set1_epi8(0x10)), _mm_setzero_si128() ); + const __m128i palColor0A = _mm_shuffle_epi8(palLo, idx0); + const __m128i palColor0B = _mm_shuffle_epi8(palHi, idx0); + const __m128i palColor1A = _mm_shuffle_epi8(palLo, idx1); + const __m128i palColor1B = _mm_shuffle_epi8(palHi, idx1); + + const __m128i palColor0 = _mm_blendv_epi8( palColor0B, palColor0A, _mm_unpacklo_epi8(palMask, palMask) ); + const __m128i palColor1 = _mm_blendv_epi8( palColor1B, palColor1A, _mm_unpackhi_epi8(palMask, palMask) ); + + __m128i convertedColor[4]; + + if (TEXCACHEFORMAT == TexFormat_15bpp) + { + ColorspaceConvert555To6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + } + else + { + ColorspaceConvert555To8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + } + + // Set converted colors to 0 if the palette index is 0. + idx0 = _mm_cmpeq_epi16(idx0, _mm_set1_epi16(0x0100)); + idx1 = _mm_cmpeq_epi16(idx1, _mm_set1_epi16(0x0100)); + convertedColor[0] = _mm_andnot_si128(_mm_unpacklo_epi16(idx0, idx0), convertedColor[0]); + convertedColor[1] = _mm_andnot_si128(_mm_unpackhi_epi16(idx0, idx0), convertedColor[1]); + convertedColor[2] = _mm_andnot_si128(_mm_unpacklo_epi16(idx1, idx1), convertedColor[2]); + convertedColor[3] = _mm_andnot_si128(_mm_unpackhi_epi16(idx1, idx1), convertedColor[3]); + + _mm_store_si128((__m128i *)(dstBuffer + 0), convertedColor[0]); + _mm_store_si128((__m128i *)(dstBuffer + 4), convertedColor[1]); + _mm_store_si128((__m128i *)(dstBuffer + 8), convertedColor[2]); + _mm_store_si128((__m128i *)(dstBuffer + 12), convertedColor[3]); + } +#else + for (size_t i = 0; i < srcSize; i++, srcData++) + { + u8 idx; + + idx = *srcData & 0x0F; + *dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx] & 0x7FFF); + + idx = *srcData >> 4; + *dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx] & 0x7FFF); + } +#endif + } + else + { +#ifdef ENABLE_SSSE3 + for (size_t i = 0; i < srcSize; i+=8, srcData+=8, dstBuffer+=16) + { + __m128i idx = _mm_loadl_epi64((__m128i *)srcData); + idx = _mm_unpacklo_epi8(idx, idx); + idx = _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi16(0x000F)), _mm_and_si128(_mm_srli_epi16(idx, 4), _mm_set1_epi16(0x0F00)) ); + idx = _mm_slli_epi16(idx, 1); + + const __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + const __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + + const __m128i palMask = _mm_cmpeq_epi8( _mm_and_si128(idx, _mm_set1_epi8(0x10)), _mm_setzero_si128() ); + const __m128i palColor0A = _mm_shuffle_epi8(palLo, idx0); + const __m128i palColor0B = _mm_shuffle_epi8(palHi, idx0); + const __m128i palColor1A = _mm_shuffle_epi8(palLo, idx1); + const __m128i palColor1B = _mm_shuffle_epi8(palHi, idx1); + + const __m128i palColor0 = _mm_blendv_epi8( palColor0B, palColor0A, _mm_unpacklo_epi8(palMask, palMask) ); + const __m128i palColor1 = _mm_blendv_epi8( palColor1B, palColor1A, _mm_unpackhi_epi8(palMask, palMask) ); + + __m128i convertedColor[4]; + + if (TEXCACHEFORMAT == TexFormat_15bpp) + { + ColorspaceConvert555To6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + } + else + { + ColorspaceConvert555To8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + } + + _mm_store_si128((__m128i *)(dstBuffer + 0), convertedColor[0]); + _mm_store_si128((__m128i *)(dstBuffer + 4), convertedColor[1]); + _mm_store_si128((__m128i *)(dstBuffer + 8), convertedColor[2]); + _mm_store_si128((__m128i *)(dstBuffer + 12), convertedColor[3]); + } +#else + for (size_t i = 0; i < srcSize; i++, srcData++) + { + *dstBuffer++ = CONVERT(srcPal[*srcData & 0x0F] & 0x7FFF); + *dstBuffer++ = CONVERT(srcPal[*srcData >> 4] & 0x7FFF); + } +#endif + } +} + +template +void NDSTextureUnpackI8(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, const bool isPalZeroTransparent, u32 *__restrict dstBuffer) +{ + if (isPalZeroTransparent) + { + for (size_t i = 0; i < srcSize; i++, srcData++) + { + const u8 idx = *srcData; + *dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx] & 0x7FFF); + } + } + else + { + for (size_t i = 0; i < srcSize; i++, srcData++) + { + *dstBuffer++ = CONVERT(srcPal[*srcData] & 0x7FFF); + } + } +} + +template +void NDSTextureUnpackA3I5(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, u32 *__restrict dstBuffer) +{ + for (size_t i = 0; i < srcSize; i++, srcData++) + { + const u16 c = srcPal[*srcData & 0x1F] & 0x7FFF; + const u8 alpha = *srcData >> 5; + *dstBuffer++ = (TEXCACHEFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, material_3bit_to_5bit[alpha]) : COLOR555TO8888(c, material_3bit_to_8bit[alpha]); + } +} + +template +void NDSTextureUnpackA5I3(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, u32 *__restrict dstBuffer) +{ +#ifdef ENABLE_SSSE3 + const __m128i pal_vec128 = _mm_load_si128((__m128i *)srcPal); + + for (size_t i = 0; i < srcSize; i+=16, srcData+=16, dstBuffer+=16) + { + const __m128i bits = _mm_loadu_si128((__m128i *)srcData); + + const __m128i idx = _mm_slli_epi16( _mm_and_si128(bits, _mm_set1_epi8(0x07)), 1 ); + const __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + const __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + + const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0); + const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1); + + __m128i tmpAlpha[2]; + __m128i convertedColor[4]; + + if (TEXCACHEFORMAT == TexFormat_15bpp) + { + const __m128i alpha = _mm_srli_epi16( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), 3 ); + const __m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); + const __m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); + + tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo); + tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo); + ColorspaceConvert555To6665_SSE2(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]); + + tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi); + tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi); + ColorspaceConvert555To6665_SSE2(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]); + } + else + { + const __m128i alpha = _mm_or_si128( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), _mm_srli_epi16(_mm_and_si128(bits, _mm_set1_epi8(0xE0)), 5) ); + const __m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); + const __m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); + + tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo); + tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo); + ColorspaceConvert555To8888_SSE2(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]); + + tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi); + tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi); + ColorspaceConvert555To8888_SSE2(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]); + } + + _mm_store_si128((__m128i *)(dstBuffer + 0), convertedColor[0]); + _mm_store_si128((__m128i *)(dstBuffer + 4), convertedColor[1]); + _mm_store_si128((__m128i *)(dstBuffer + 8), convertedColor[2]); + _mm_store_si128((__m128i *)(dstBuffer + 12), convertedColor[3]); + } +#else + for (size_t i = 0; i < srcSize; i++, srcData++) + { + const u16 c = srcPal[*srcData & 0x07] & 0x7FFF; + const u8 alpha = (*srcData >> 3); + *dstBuffer++ = (TEXCACHEFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, alpha) : COLOR555TO8888(c, material_5bit_to_8bit[alpha]); + } +#endif +} + +#define PAL4X4(offset) ( LE_TO_LOCAL_16( *(u16*)( MMU.texInfo.texPalSlot[((palAddress + (offset)*2)>>14)&0x7] + ((palAddress + (offset)*2)&0x3FFF) ) ) & 0x7FFF ) + +template +void NDSTextureUnpack4x4(const size_t srcSize, const u32 *__restrict srcData, const u16 *__restrict srcIndex, const u32 palAddress, const u32 texAttributes, const u32 sizeX, const u32 sizeY, u32 *__restrict dstBuffer) +{ + const u32 limit = srcSize * sizeof(u32); + const u16 xTmpSize = sizeX >> 2; + const u16 yTmpSize = sizeY >> 2; + + //this is flagged whenever a 4x4 overruns its slot. + //i am guessing we just generate black in that case + bool dead = false; + + for (size_t y = 0, d = 0; y < yTmpSize; y++) + { + u32 tmpPos[4]={(y<<2)*sizeX,((y<<2)+1)*sizeX, + ((y<<2)+2)*sizeX,((y<<2)+3)*sizeX}; + for (size_t x = 0; x < xTmpSize; x++, d++) + { + if (d >= limit) + dead = true; + + if (dead) + { + for (int sy = 0; sy < 4; sy++) + { + const u32 currentPos = (x<<2) + tmpPos[sy]; + dstBuffer[currentPos] = dstBuffer[currentPos+1] = dstBuffer[currentPos+2] = dstBuffer[currentPos+3] = 0; + } + continue; + } + + const u32 currBlock = LE_TO_LOCAL_32(srcData[d]); + const u16 pal1 = LE_TO_LOCAL_16(srcIndex[d]); + const u16 pal1offset = (pal1 & 0x3FFF)<<1; + const u8 mode = pal1>>14; + CACHE_ALIGN u32 tmp_col[4]; + + tmp_col[0] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset) ); + tmp_col[1] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+1) ); + + switch (mode) + { + case 0: + tmp_col[2] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+2) ); + tmp_col[3] = 0x00000000; + break; + + case 1: +#ifdef LOCAL_BE + tmp_col[2] = ( (((tmp_col[0] & 0xFF000000) >> 1)+((tmp_col[1] & 0xFF000000) >> 1)) & 0xFF000000 ) | + ( (((tmp_col[0] & 0x00FF0000) + (tmp_col[1] & 0x00FF0000)) >> 1) & 0x00FF0000 ) | + ( (((tmp_col[0] & 0x0000FF00) + (tmp_col[1] & 0x0000FF00)) >> 1) & 0x0000FF00 ) | + 0x000000FF; + tmp_col[3] = 0x00000000; +#else + tmp_col[2] = ( (((tmp_col[0] & 0x00FF00FF) + (tmp_col[1] & 0x00FF00FF)) >> 1) & 0x00FF00FF ) | + ( (((tmp_col[0] & 0x0000FF00) + (tmp_col[1] & 0x0000FF00)) >> 1) & 0x0000FF00 ) | + 0xFF000000; + tmp_col[3] = 0x00000000; +#endif + break; + + case 2: + tmp_col[2] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+2) ); + tmp_col[3] = COLOR555TO8888_OPAQUE( PAL4X4(pal1offset+3) ); + break; + + case 3: + { +#ifdef LOCAL_BE + const u32 r0 = (tmp_col[0]>>24) & 0x000000FF; + const u32 r1 = (tmp_col[1]>>24) & 0x000000FF; + const u32 g0 = (tmp_col[0]>>16) & 0x000000FF; + const u32 g1 = (tmp_col[1]>>16) & 0x000000FF; + const u32 b0 = (tmp_col[0]>> 8) & 0x000000FF; + const u32 b1 = (tmp_col[1]>> 8) & 0x000000FF; +#else + const u32 r0 = tmp_col[0] & 0x000000FF; + const u32 r1 = tmp_col[1] & 0x000000FF; + const u32 g0 = (tmp_col[0]>> 8) & 0x000000FF; + const u32 g1 = (tmp_col[1]>> 8) & 0x000000FF; + const u32 b0 = (tmp_col[0]>>16) & 0x000000FF; + const u32 b1 = (tmp_col[1]>>16) & 0x000000FF; +#endif + + const u16 tmp1 = ( (r0*5 + r1*3)>>6) | + ( ((g0*5 + g1*3)>>6) << 5 ) | + ( ((b0*5 + b1*3)>>6) << 10 ); + const u16 tmp2 = ( (r0*3 + r1*5)>>6) | + ( ((g0*3 + g1*5)>>6) << 5 ) | + ( ((b0*3 + b1*5)>>6) << 10 ); + + tmp_col[2] = COLOR555TO8888_OPAQUE(tmp1); + tmp_col[3] = COLOR555TO8888_OPAQUE(tmp2); + break; + } + } + + if (TEXCACHEFORMAT == TexFormat_15bpp) + { + tmp_col[0] = ColorspaceConvert8888To6665(tmp_col[0]); + tmp_col[1] = ColorspaceConvert8888To6665(tmp_col[1]); + tmp_col[2] = ColorspaceConvert8888To6665(tmp_col[2]); + tmp_col[3] = ColorspaceConvert8888To6665(tmp_col[3]); + } + + //TODO - this could be more precise for 32bpp mode (run it through the color separation table) + + //set all 16 texels + for (size_t sy = 0; sy < 4; sy++) + { + // Texture offset + const u32 currentPos = (x<<2) + tmpPos[sy]; + const u8 currRow = (u8)((currBlock>>(sy<<3))&0xFF); + + dstBuffer[currentPos ] = tmp_col[ currRow &3]; + dstBuffer[currentPos+1] = tmp_col[(currRow>>2)&3]; + dstBuffer[currentPos+2] = tmp_col[(currRow>>4)&3]; + dstBuffer[currentPos+3] = tmp_col[(currRow>>6)&3]; } } } +} - void evict(u32 target = kMaxCacheSize) +template +void NDSTextureUnpackDirect16Bit(const size_t srcSize, const u16 *__restrict srcData, u32 *__restrict dstBuffer) +{ + const size_t pixCount = srcSize >> 1; + size_t i = 0; + +#ifdef ENABLE_SSE2 + const size_t pixCountVec128 = pixCount - (pixCount % 8); + for (; i < pixCountVec128; i+=8, srcData+=8, dstBuffer+=8) { - //debug print - //printf("%d %d/%d\n",index.size(),cache_size/1024,target/1024); - - //dont do anything unless we're over the target - if(cache_size target) + const v128u16 c = _mm_load_si128((v128u16 *)srcData); + const v128u16 alpha = _mm_cmpeq_epi16(_mm_srli_epi16(c, 15), _mm_set1_epi16(1)); + v128u32 convertedColor[2]; + + if (TEXCACHEFORMAT == TexFormat_15bpp) { - if(index.size()==0) break; //just in case.. doesnt seem possible, cache_size wouldve been 0 - - TexCacheItem* item = index.begin()->second; - list_remove(item); - //printf("evicting! totalsize:%d\n",cache_size); - delete item; + ColorspaceConvert555To6665Opaque_SSE2(c, convertedColor[0], convertedColor[1]); } + else + { + ColorspaceConvert555To8888Opaque_SSE2(c, convertedColor[0], convertedColor[1]); + } + + convertedColor[0] = _mm_blendv_epi8(_mm_setzero_si128(), convertedColor[0], _mm_unpacklo_epi16(alpha, alpha)); + convertedColor[1] = _mm_blendv_epi8(_mm_setzero_si128(), convertedColor[1], _mm_unpackhi_epi16(alpha, alpha)); + + _mm_store_si128((v128u32 *)(dstBuffer + 0), convertedColor[0]); + _mm_store_si128((v128u32 *)(dstBuffer + 4), convertedColor[1]); } -} texCache; - -void TexCache_Reset() -{ - texCache.evict(0); -} - -void TexCache_Invalidate() -{ - //note that this gets called whether texdata or texpalette gets reconfigured. - texCache.invalidate(); -} - -TexCacheItem* TexCache_SetTexture(TexCache_TexFormat TEXFORMAT, u32 format, u32 texpal) -{ - switch(TEXFORMAT) +#endif + + for (; i < pixCount; i++, srcData++) { - case TexFormat_32bpp: return texCache.scan(format,texpal); - case TexFormat_15bpp: return texCache.scan(format,texpal); - default: assert(false); return NULL; + const u16 c = LOCAL_TO_LE_16(*srcData); + *dstBuffer++ = (c & 0x8000) ? CONVERT(c & 0x7FFF) : 0; } } -//call this periodically to keep the tex cache clean -void TexCache_EvictFrame() -{ - texCache.evict(); -} +template void TextureStore::Unpack(u32 *unpackBuffer); +template void TextureStore::Unpack(u32 *unpackBuffer); diff --git a/desmume/src/texcache.h b/desmume/src/texcache.h index 62a8e7eb0..255558eef 100644 --- a/desmume/src/texcache.h +++ b/desmume/src/texcache.h @@ -21,90 +21,169 @@ #define _TEXCACHE_H_ #include +#include #include "types.h" #include "common.h" #include "gfx3d.h" -enum TexCache_TexFormat +//this ought to be enough for anyone +//#define TEXCACHE_DEFAULT_THRESHOLD (64*1024*1024) + +//changed by zeromus on 15-dec. I couldnt find any games that were getting anywhere NEAR 64 +//metal slug burns through sprites so fast, it can test it pretty quickly though +//#define TEXCACHE_DEFAULT_THRESHOLD (16*1024*1024) + +// rogerman, 2016-11-02: Increase this to 32MB for games that use many large textures, such +// as Umihara Kawase Shun, which can cache over 20MB in the first level. +#define TEXCACHE_DEFAULT_THRESHOLD (32*1024*1024) + +#define PALETTE_DUMP_SIZE ((64+16+16)*1024) + +enum TextureStoreUnpackFormat { TexFormat_None = 0, //used when nothing yet is cached TexFormat_32bpp, //used by ogl renderer TexFormat_15bpp //used by rasterizer }; -class TexCacheItem; +class MemSpan; +class TextureStore; -typedef std::multimap TTexCacheItemMultimap; -typedef void (*TexCacheItemDeleteCallback)(TexCacheItem *texItem, void *param1, void *param2); +typedef u64 TextureCacheKey; +typedef std::map TextureCacheMap; // Key = A TextureCacheKey that includes a combination of the texture's NDS texture attributes and palette attributes; Value = Pointer to the texture item +typedef std::vector TextureCacheList; +//typedef u32 TextureFingerprint; -class TexCacheItem +class TextureCache { -private: - TexCacheItemDeleteCallback _deleteCallback; - void *_deleteCallbackParam1; - void *_deleteCallbackParam2; +protected: + TextureCacheMap _texCacheMap; // Used to quickly find a texture item by using a key of type TextureCacheKey + TextureCacheList _texCacheList; // Used to sort existing texture items for various operations + size_t _actualCacheSize; + size_t _cacheSizeThreshold; + u8 _paletteDump[PALETTE_DUMP_SIZE]; public: - TexCacheItem() - : decode_len(0) - , decoded(NULL) - , suspectedInvalid(false) - , assumedInvalid(false) - , _deleteCallback(NULL) - , _deleteCallbackParam1(NULL) - , _deleteCallbackParam2(NULL) - , cacheFormat(TexFormat_None) - {} + TextureCache(); - ~TexCacheItem() - { - free_aligned(this->decoded); - if (this->_deleteCallback != NULL) this->_deleteCallback(this, this->_deleteCallbackParam1, this->_deleteCallbackParam2); - } - u32 decode_len; - NDSTextureFormat format; - u8* decoded; //decoded texture data - bool suspectedInvalid; - bool assumedInvalid; - TTexCacheItemMultimap::iterator iterator; - - NDSTextureFormat GetTextureFormat() const { return this->format; } - - u32 texformat, texpal; - u32 sizeX, sizeY; - float invSizeX, invSizeY; - - u32 texid; //used by ogl renderer for the texid - TexCache_TexFormat cacheFormat; - - struct Dump { - ~Dump() { - delete[] texture; - } - int textureSize, indexSize; - static const int maxTextureSize=128*1024; - u8* texture; - u8 palette[256*2]; - } dump; + size_t GetActualCacheSize() const; + size_t GetCacheSizeThreshold() const; + void SetCacheSizeThreshold(size_t newThreshold); - TexCacheItemDeleteCallback GetDeleteCallback() - { - return this->_deleteCallback; - } + void Invalidate(); + void Evict(); + void Reset(); - void SetDeleteCallback(TexCacheItemDeleteCallback callbackFunc, void *inParam1, void *inParam2) - { - this->_deleteCallback = callbackFunc; - this->_deleteCallbackParam1 = inParam1; - this->_deleteCallbackParam2 = inParam2; - } + TextureStore* GetTexture(u32 texAttributes, u32 palAttributes); + + void Add(TextureStore *texItem); + void Remove(TextureStore *texItem); + + static TextureCacheKey GenerateKey(const u32 texAttributes, const u32 palAttributes); }; -void TexCache_Invalidate(); -void TexCache_Reset(); -void TexCache_EvictFrame(); +class TextureStore +{ +protected: + u32 _textureAttributes; + u32 _paletteAttributes; + + u32 _sizeS; + u32 _sizeT; + bool _isPalZeroTransparent; + + NDSTextureFormat _packFormat; + u32 _packAddress; + u32 _packSize; + u8 *_packData; + + u32 _paletteAddress; + u32 _paletteSize; + u16 *_paletteColorTable; + + // Only used by 4x4 formatted textures + u32 _packIndexAddress; + u32 _packIndexSize; + u8 *_packIndexData; + u32 _packSizeFirstSlot; + + bool _suspectedInvalid; + bool _assumedInvalid; + bool _isLoadNeeded; + + TextureCacheKey _cacheKey; + size_t _cacheSize; + size_t _cacheAge; // A value of 0 means the texture was just used. The higher this value, the older the texture. + size_t _cacheUsageCount; + +public: + TextureStore(); + TextureStore(const u32 texAttributes, const u32 palAttributes); + virtual ~TextureStore(); + + u32 GetTextureAttributes() const; + u32 GetPaletteAttributes() const; + + u32 GetWidth() const; + u32 GetHeight() const; + bool IsPalZeroTransparent() const; + + NDSTextureFormat GetPackFormat() const; + u32 GetPackAddress() const; + u32 GetPackSize() const; + u8* GetPackData(); + + u32 GetPaletteAddress() const; + u32 GetPaletteSize() const; + u16* GetPaletteColorTable() const; + + u32 GetPackIndexAddress() const; + u32 GetPackIndexSize() const; + u8* GetPackIndexData(); + + void SetTextureData(const MemSpan &packedData, const MemSpan &packedIndexData); + void SetTexturePalette(const MemSpan &packedPalette); + void SetTexturePalette(const u16 *paletteBuffer); + + size_t GetUnpackSizeUsingFormat(const TextureStoreUnpackFormat texCacheFormat) const; + template void Unpack(u32 *unpackBuffer); + + bool IsSuspectedInvalid() const; + void SetSuspectedInvalid(); + + bool IsAssumedInvalid() const; + void SetAssumedInvalid(); + + void SetLoadNeeded(); + bool IsLoadNeeded() const; + + TextureCacheKey GetCacheKey() const; + + size_t GetCacheSize() const; + void SetCacheSize(size_t cacheSize); + + size_t GetCacheAge() const; + void IncreaseCacheAge(const size_t ageAmount); + void ResetCacheAge(); + + size_t GetCacheUseCount() const; + void IncreaseCacheUsageCount(const size_t usageCount); + void ResetCacheUsageCount(); + + void Update(); + void VRAMCompareAndUpdate(); + void DebugDump(); +}; -TexCacheItem* TexCache_SetTexture(TexCache_TexFormat TEXFORMAT, u32 format, u32 texpal); +template void NDSTextureUnpackI2(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, const bool isPalZeroTransparent, u32 *__restrict dstBuffer); +template void NDSTextureUnpackI4(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, const bool isPalZeroTransparent, u32 *__restrict dstBuffer); +template void NDSTextureUnpackI8(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, const bool isPalZeroTransparent, u32 *__restrict dstBuffer); +template void NDSTextureUnpackA3I5(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, u32 *__restrict dstBuffer); +template void NDSTextureUnpackA5I3(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, u32 *__restrict dstBuffer); +template void NDSTextureUnpack4x4(const size_t srcSize, const u32 *__restrict srcData, const u16 *__restrict srcIndex, const u32 palAddress, const u32 texAttributes, const u32 sizeX, const u32 sizeY, u32 *__restrict dstBuffer); +template void NDSTextureUnpackDirect16Bit(const size_t srcSize, const u16 *__restrict srcData, u32 *__restrict dstBuffer); + +extern TextureCache texCache; #endif diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler.cpp index 88d3bcf5f..a0b10ae78 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler.cpp @@ -142,90 +142,6 @@ void ColorspaceHandlerInit() } } -template -FORCEINLINE u32 ColorspaceConvert555To8888Opaque(const u16 src) -{ - return (SWAP_RB) ? COLOR555TO8888_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO8888_OPAQUE(src & 0x7FFF); -} - -template -FORCEINLINE u32 ColorspaceConvert555To6665Opaque(const u16 src) -{ - return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF); -} - -template -FORCEINLINE u32 ColorspaceConvert8888To6665(FragmentColor srcColor) -{ - FragmentColor outColor; - outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r) >> 2; - outColor.g = srcColor.g >> 2; - outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b) >> 2; - outColor.a = srcColor.a >> 3; - - return outColor.color; -} - -template -FORCEINLINE u32 ColorspaceConvert8888To6665(u32 srcColor) -{ - FragmentColor srcColorComponent; - srcColorComponent.color = srcColor; - - return ColorspaceConvert8888To6665(srcColorComponent); -} - -template -FORCEINLINE u32 ColorspaceConvert6665To8888(FragmentColor srcColor) -{ - FragmentColor outColor; - outColor.r = material_6bit_to_8bit[((SWAP_RB) ? srcColor.b : srcColor.r)]; - outColor.g = material_6bit_to_8bit[srcColor.g]; - outColor.b = material_6bit_to_8bit[((SWAP_RB) ? srcColor.r : srcColor.b)]; - outColor.a = material_5bit_to_8bit[srcColor.a]; - - return outColor.color; -} - -template -FORCEINLINE u32 ColorspaceConvert6665To8888(u32 srcColor) -{ - FragmentColor srcColorComponent; - srcColorComponent.color = srcColor; - - return ColorspaceConvert6665To8888(srcColorComponent); -} - -template -FORCEINLINE u16 ColorspaceConvert8888To5551(FragmentColor srcColor) -{ - return R5G5B5TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r) >> 3, srcColor.g >> 3, ((SWAP_RB) ? srcColor.r : srcColor.b) >> 3) | ((srcColor.a == 0) ? 0x0000 : 0x8000 ); -} - -template -FORCEINLINE u16 ColorspaceConvert8888To5551(u32 srcColor) -{ - FragmentColor srcColorComponent; - srcColorComponent.color = srcColor; - - return ColorspaceConvert8888To5551(srcColorComponent); -} - -template -FORCEINLINE u16 ColorspaceConvert6665To5551(FragmentColor srcColor) -{ - return R6G6B6TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r), srcColor.g, ((SWAP_RB) ? srcColor.r : srcColor.b)) | ((srcColor.a == 0) ? 0x0000 : 0x8000); -} - -template -FORCEINLINE u16 ColorspaceConvert6665To5551(u32 srcColor) -{ - FragmentColor srcColorComponent; - srcColorComponent.color = srcColor; - - return ColorspaceConvert6665To5551(srcColorComponent); -} - template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) { @@ -718,36 +634,6 @@ size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 * return this->ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(src, dst, pixCount); } -template u32 ColorspaceConvert555To8888Opaque(const u16 src); -template u32 ColorspaceConvert555To8888Opaque(const u16 src); - -template u32 ColorspaceConvert555To6665Opaque(const u16 src); -template u32 ColorspaceConvert555To6665Opaque(const u16 src); - -template u32 ColorspaceConvert8888To6665(FragmentColor srcColor); -template u32 ColorspaceConvert8888To6665(FragmentColor srcColor); - -template u32 ColorspaceConvert8888To6665(u32 srcColor); -template u32 ColorspaceConvert8888To6665(u32 srcColor); - -template u32 ColorspaceConvert6665To8888(FragmentColor srcColor); -template u32 ColorspaceConvert6665To8888(FragmentColor srcColor); - -template u32 ColorspaceConvert6665To8888(u32 srcColor); -template u32 ColorspaceConvert6665To8888(u32 srcColor); - -template u16 ColorspaceConvert8888To5551(FragmentColor srcColor); -template u16 ColorspaceConvert8888To5551(FragmentColor srcColor); - -template u16 ColorspaceConvert8888To5551(u32 srcColor); -template u16 ColorspaceConvert8888To5551(u32 srcColor); - -template u16 ColorspaceConvert6665To5551(FragmentColor srcColor); -template u16 ColorspaceConvert6665To5551(FragmentColor srcColor); - -template u16 ColorspaceConvert6665To5551(u32 srcColor); -template u16 ColorspaceConvert6665To5551(u32 srcColor); - template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler.h b/desmume/src/utils/colorspacehandler/colorspacehandler.h index 9573a9e33..3cecbee9a 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler.h @@ -130,16 +130,89 @@ extern CACHE_ALIGN u32 color_555_to_888[32768]; void ColorspaceHandlerInit(); -template u32 ColorspaceConvert555To8888Opaque(const u16 src); -template u32 ColorspaceConvert555To6665Opaque(const u16 src); -template u32 ColorspaceConvert8888To6665(FragmentColor srcColor); -template u32 ColorspaceConvert8888To6665(u32 srcColor); -template u32 ColorspaceConvert6665To8888(FragmentColor srcColor); -template u32 ColorspaceConvert6665To8888(u32 srcColor); -template u16 ColorspaceConvert8888To5551(FragmentColor srcColor); -template u16 ColorspaceConvert8888To5551(u32 srcColor); -template u16 ColorspaceConvert6665To5551(FragmentColor srcColor); -template u16 ColorspaceConvert6665To5551(u32 srcColor); +template +FORCEINLINE u32 ColorspaceConvert555To8888Opaque(const u16 src) +{ + return (SWAP_RB) ? COLOR555TO8888_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO8888_OPAQUE(src & 0x7FFF); +} + +template +FORCEINLINE u32 ColorspaceConvert555To6665Opaque(const u16 src) +{ + return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF); +} + +template +FORCEINLINE u32 ColorspaceConvert8888To6665(FragmentColor srcColor) +{ + FragmentColor outColor; + outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r) >> 2; + outColor.g = srcColor.g >> 2; + outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b) >> 2; + outColor.a = srcColor.a >> 3; + + return outColor.color; +} + +template +FORCEINLINE u32 ColorspaceConvert8888To6665(u32 srcColor) +{ + FragmentColor srcColorComponent; + srcColorComponent.color = srcColor; + + return ColorspaceConvert8888To6665(srcColorComponent); +} + +template +FORCEINLINE u32 ColorspaceConvert6665To8888(FragmentColor srcColor) +{ + FragmentColor outColor; + outColor.r = material_6bit_to_8bit[((SWAP_RB) ? srcColor.b : srcColor.r)]; + outColor.g = material_6bit_to_8bit[srcColor.g]; + outColor.b = material_6bit_to_8bit[((SWAP_RB) ? srcColor.r : srcColor.b)]; + outColor.a = material_5bit_to_8bit[srcColor.a]; + + return outColor.color; +} + +template +FORCEINLINE u32 ColorspaceConvert6665To8888(u32 srcColor) +{ + FragmentColor srcColorComponent; + srcColorComponent.color = srcColor; + + return ColorspaceConvert6665To8888(srcColorComponent); +} + +template +FORCEINLINE u16 ColorspaceConvert8888To5551(FragmentColor srcColor) +{ + return R5G5B5TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r) >> 3, srcColor.g >> 3, ((SWAP_RB) ? srcColor.r : srcColor.b) >> 3) | ((srcColor.a == 0) ? 0x0000 : 0x8000 ); +} + +template +FORCEINLINE u16 ColorspaceConvert8888To5551(u32 srcColor) +{ + FragmentColor srcColorComponent; + srcColorComponent.color = srcColor; + + return ColorspaceConvert8888To5551(srcColorComponent); +} + +template +FORCEINLINE u16 ColorspaceConvert6665To5551(FragmentColor srcColor) +{ + return R6G6B6TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r), srcColor.g, ((SWAP_RB) ? srcColor.r : srcColor.b)) | ((srcColor.a == 0) ? 0x0000 : 0x8000); +} + +template +FORCEINLINE u16 ColorspaceConvert6665To5551(u32 srcColor) +{ + FragmentColor srcColorComponent; + srcColorComponent.color = srcColor; + + return ColorspaceConvert6665To5551(srcColorComponent); +} template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); diff --git a/desmume/src/utils/task.cpp b/desmume/src/utils/task.cpp index b6f22b46e..57e0cd109 100644 --- a/desmume/src/utils/task.cpp +++ b/desmume/src/utils/task.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2009-2015 DeSmuME team + Copyright (C) 2009-2016 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -110,7 +110,8 @@ void Task::Impl::execute(const TWork &work, void *param) { slock_lock(this->mutex); - if (work == NULL || !this->_isThreadRunning) { + if ((work == NULL) || (this->workFunc != NULL) || !this->_isThreadRunning) + { slock_unlock(this->mutex); return; } @@ -128,12 +129,13 @@ void* Task::Impl::finish() slock_lock(this->mutex); - if (!this->_isThreadRunning) { + if ((this->workFunc == NULL) || !this->_isThreadRunning) { slock_unlock(this->mutex); return returnValue; } - while (this->workFunc != NULL) { + while (this->workFunc != NULL) + { scond_wait(this->condWork, this->mutex); } diff --git a/desmume/src/utils/vfat.cpp b/desmume/src/utils/vfat.cpp index e1256f08d..cd0e8cd6b 100644 --- a/desmume/src/utils/vfat.cpp +++ b/desmume/src/utils/vfat.cpp @@ -27,82 +27,79 @@ #include "../types.h" #include "../debug.h" #include "../emufile.h" +#include "retro_dirent.h" +#include "retro_stat.h" +#include "file/file_path.h" #include "emufat.h" #include "vfat.h" #include "libfat/libfat_public_api.h" -#include -#include -#include - -static char retro_dir[PATH_MAX_LENGTH]; enum EListCallbackArg { EListCallbackArg_Item, EListCallbackArg_Pop }; -typedef void (*ListCallback)(RDIR *rdir, EListCallbackArg); +typedef void (*ListCallback)(RDIR* rdir, EListCallbackArg); // List all files and subdirectories recursively +//TODO: clunky architecture. we've combined the callbacks into one handler. +//we could merge the callback and list_files function, or refactor the callback into one for each enum which receives a unit of work after +//the more detailed recursing logic (caused by libretro-common integration) is handled in the lister static void list_files(const char *filepath, ListCallback list_callback) { void * hFind; char *fname; u32 dwError; - RDIR *rdir = retro_opendir(filepath); - if (!rdir) - return; - strcpy(retro_dir, filepath); - if (retro_dirent_error(rdir)) - goto end; + RDIR* rdir = retro_opendir(filepath); + if(!rdir) return; + if(retro_dirent_error(rdir)) + { + retro_closedir(rdir); + return; + } - for (;;) - { - const char *name = NULL; - if (!retro_readdir(rdir)) - break; + for(;;) + { + if(!retro_readdir(rdir)) + break; - const char *fname = retro_dirent_get_name(rdir); - list_callback(rdir,EListCallbackArg_Item); + const char* fname = retro_dirent_get_name(rdir); + list_callback(rdir,EListCallbackArg_Item); + printf("cflash added %s\n",fname); - if (retro_dirent_is_dir(rdir, filepath) && (strcmp(fname, ".")) && strcmp(fname, "..")) - { - std::string subdir = (std::string)filepath + path_default_slash() + fname; - list_files(subdir.c_str(), list_callback); - list_callback(rdir, EListCallbackArg_Pop); + if(retro_dirent_is_dir(rdir) && (strcmp(fname, ".")) && (strcmp(fname, ".."))) + { + std::string subdir = (std::string)filepath + path_default_slash() + fname; + list_files(subdir.c_str(), list_callback); + list_callback(rdir, EListCallbackArg_Pop); } } -end: - retro_closedir(rdir); + retro_closedir(rdir); } -static unsigned long dataSectors = 0; -void count_ListCallback(RDIR *rdir, EListCallbackArg arg) +enum eCallbackType { - if(arg == EListCallbackArg_Pop) - return; - u32 sectors = 1; - if (!retro_dirent_is_dir(rdir, retro_dir)) - { - const char *path = retro_dirent_get_name(rdir); - /* allocate sectors for file */ - int32_t fileSize = path_get_size(path); - sectors += (fileSize+511)/512 + 1; - } + eCallbackType_Count, eCallbackType_Build +}; - dataSectors += sectors; -} +static eCallbackType callbackType; +//for eCallbackType_Count: +static bool count_failed = false; +static u64 dataSectors = 0; + +//recursing related.. really ought to be merged with list_files functionality static std::string currPath; static std::stack pathStack; static std::stack virtPathStack; static std::string currVirtPath; -void build_ListCallback(RDIR *rdir, EListCallbackArg arg) + +static void DirectoryListCallback(RDIR* rdir, EListCallbackArg arg) { - const char *fname = retro_dirent_get_name(rdir); + const char* fname = retro_dirent_get_name(rdir); if(arg == EListCallbackArg_Pop) { @@ -113,7 +110,7 @@ void build_ListCallback(RDIR *rdir, EListCallbackArg arg) return; } - if (retro_dirent_is_dir(rdir, retro_dir)) + if(retro_dirent_is_dir(rdir)) { if(!strcmp(fname,".")) return; if(!strcmp(fname,"..")) return; @@ -122,10 +119,18 @@ void build_ListCallback(RDIR *rdir, EListCallbackArg arg) virtPathStack.push(currVirtPath); currVirtPath = currVirtPath + "/" + fname; - bool ok = LIBFAT::MkDir(currVirtPath.c_str()); - if(!ok) - printf("ERROR adding dir %s via libfat\n",currVirtPath.c_str()); + if(callbackType == eCallbackType_Build) + { + bool ok = LIBFAT::MkDir(currVirtPath.c_str()); + + if(!ok) + printf("ERROR adding dir %s via libfat\n",currVirtPath.c_str()); + } + else + { + dataSectors++; //directories take one sector + } currPath = currPath + path_default_slash() + fname; return; @@ -134,47 +139,56 @@ void build_ListCallback(RDIR *rdir, EListCallbackArg arg) { std::string path = currPath + path_default_slash() + fname; - FILE* inf = fopen(path.c_str(),"rb"); - if(inf) + if(callbackType == eCallbackType_Build) { - u8 * buf; - size_t elements_read; - long len; + FILE* inf = fopen(path.c_str(),"rb"); + if(inf) + { + fseek(inf,0,SEEK_END); + long len = ftell(inf); + fseek(inf,0,SEEK_SET); + u8 *buf = new u8[len]; + fread(buf,1,len,inf); + fclose(inf); - fseek(inf, 0, SEEK_END); - len = ftell(inf); - fseek(inf, 0, SEEK_SET); - buf = new u8[len]; - elements_read = fread(buf, 1, len, inf); - if (elements_read != len) - printf( - "libfat: %lu bytes read instead of %l.\n", - elements_read, - len - ); - fclose(inf); - - std::string path = currVirtPath + "/" + fname; - printf("FAT + (%10.2f KB) %s \n",len/1024.f,path.c_str()); - bool ok = LIBFAT::WriteFile(path.c_str(),buf,len); - if(!ok) - printf("ERROR adding file to fat\n"); - delete[] buf; - } else printf("ERROR opening file for fat\n"); + std::string path = currVirtPath + "/" + fname; + printf("FAT + (%10.2f KB) %s \n",len/1024.f,path.c_str()); + bool ok = LIBFAT::WriteFile(path.c_str(),buf,len); + if(!ok) + printf("ERROR adding file to fat\n"); + delete[] buf; + } else printf("ERROR opening file for fat\n"); + } + else + { + //allocate sectors for file + int32_t fileSize = path_get_size(path.c_str()); + if(fileSize == -1) { count_failed = true; dataSectors = 0; } + else dataSectors += (fileSize+511)/512 + 1; + } } + } - - bool VFAT::build(const char* path, int extra_MB) { dataSectors = 0; currVirtPath = ""; currPath = path; - list_files(path, count_ListCallback); + + count_failed = false; + callbackType = eCallbackType_Count; + list_files(path, DirectoryListCallback); + + if(count_failed) + { + printf("FAILED enumerating files for fat\n"); + return false; + } dataSectors += 8; //a few for reserved sectors, etc. + dataSectors += extra_MB*1024*1024/512; //add extra write space //dataSectors += 16*1024*1024/512; //add 16MB worth of write space. this is probably enough for anyone, but maybe it should be configurable. //we could always suggest to users to add a big file to their directory to overwrite (that would cause the image to get padded) @@ -185,10 +199,7 @@ bool VFAT::build(const char* path, int extra_MB) if(dataSectors>=(0x80000000>>9)) { - printf( - "error allocating memory for fat (%lu KBytes)\n", - (dataSectors*512) / 1024 - ); + printf("error allocating memory for fat (%d KBytes)\n",(dataSectors*512)/1024); printf("total fat sizes > 2GB are never going to work\n"); } @@ -199,10 +210,7 @@ bool VFAT::build(const char* path, int extra_MB) } catch(std::bad_alloc) { - printf( - "error allocating memory for fat (%lu KBytes)\n", - (dataSectors*512) / 1024 - ); + printf("error allocating memory for fat (%d KBytes)\n",(dataSectors*512)/1024); printf("(out of memory)\n"); return false; } @@ -225,7 +233,8 @@ bool VFAT::build(const char* path, int extra_MB) //setup libfat and write all the files through it LIBFAT::Init(memf->buf(),memf->size()); - list_files(path, build_ListCallback); + callbackType = eCallbackType_Build; + list_files(path, DirectoryListCallback); LIBFAT::Shutdown(); return true; @@ -246,4 +255,4 @@ EMUFILE* VFAT::detach() EMUFILE* ret = file; file = NULL; return ret; -} +} \ No newline at end of file