diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index 86e180a55..1285ff496 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -2,7 +2,7 @@ Copyright (C) 2006 yopyop Copyright (C) 2006-2007 Theo Berkau Copyright (C) 2007 shash - Copyright (C) 2008-2017 DeSmuME team + Copyright (C) 2008-2018 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -7578,7 +7578,7 @@ void GPUSubsystem::ForceFrameStop() if (this->_frameNeedsFinish) { this->_frameNeedsFinish = false; - this->_event->DidFrameEnd(false, this->_displayInfo); + this->_event->DidFrameEnd(this->_willFrameSkip, this->_displayInfo); } } @@ -7616,8 +7616,6 @@ void GPUSubsystem::UpdateRenderProperties() this->_engineSub->isLineOutputNative[l] = true; } - this->_displayInfo.bufferIndex = (this->_displayInfo.bufferIndex + 1) % this->_displayInfo.framebufferPageCount; - const size_t nativeFramebufferSize = GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * this->_displayInfo.pixelBytes; const size_t customFramebufferSize = this->_displayInfo.customWidth * this->_displayInfo.customHeight * this->_displayInfo.pixelBytes; @@ -8176,14 +8174,7 @@ void GPUSubsystem::RenderLine(const size_t l) this->_engineSub->ApplySettings(); this->_event->DidApplyGPUSettingsEnd(); - u8 targetBufferIndex = this->_displayInfo.bufferIndex; - - if ( (l == 0) && !this->_willFrameSkip ) - { - targetBufferIndex = (targetBufferIndex + 1) % this->_displayInfo.framebufferPageCount; - } - - this->_event->DidFrameBegin(this->_willFrameSkip, targetBufferIndex, l); + this->_event->DidFrameBegin(l, this->_willFrameSkip, this->_displayInfo.framebufferPageCount, this->_displayInfo.bufferIndex); this->_frameNeedsFinish = true; } @@ -8375,6 +8366,14 @@ void GPUSubsystem::ClearWithColor(const u16 colorBGRA5551) } } +void GPUEventHandlerDefault::DidFrameBegin(const size_t line, const bool isFrameSkipRequested, const size_t pageCount, u8 &selectedBufferIndexInOut) +{ + if ( (pageCount > 1) && (line == 0) && !isFrameSkipRequested ) + { + selectedBufferIndexInOut = ((selectedBufferIndexInOut + 1) % pageCount); + } +} + GPUClientFetchObject::GPUClientFetchObject() { for (size_t i = 0; i < MAX_FRAMEBUFFER_PAGES; i++) diff --git a/desmume/src/GPU.h b/desmume/src/GPU.h index cb779e4cc..6ab8e0820 100644 --- a/desmume/src/GPU.h +++ b/desmume/src/GPU.h @@ -2,7 +2,7 @@ Copyright (C) 2006 yopyop Copyright (C) 2006-2007 Theo Berkau Copyright (C) 2007 shash - Copyright (C) 2009-2017 DeSmuME team + Copyright (C) 2009-2018 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1722,7 +1722,7 @@ public: class GPUEventHandler { public: - virtual void DidFrameBegin(bool isFrameSkipRequested, const u8 targetBufferIndex, const size_t line) = 0; + virtual void DidFrameBegin(const size_t line, const bool isFrameSkipRequested, const size_t pageCount, u8 &selectedBufferIndexInOut) = 0; virtual void DidFrameEnd(bool isFrameSkipped, const NDSDisplayInfo &latestDisplayInfo) = 0; virtual void DidRender3DBegin() = 0; virtual void DidRender3DEnd() = 0; @@ -1738,7 +1738,7 @@ public: class GPUEventHandlerDefault : public GPUEventHandler { public: - virtual void DidFrameBegin(bool isFrameSkipRequested, const u8 targetBufferIndex, const size_t line) {}; + virtual void DidFrameBegin(const size_t line, const bool isFrameSkipRequested, const size_t pageCount, u8 &selectedBufferIndexInOut); virtual void DidFrameEnd(bool isFrameSkipped, const NDSDisplayInfo &latestDisplayInfo) {}; virtual void DidRender3DBegin() {}; virtual void DidRender3DEnd() {}; diff --git a/desmume/src/NDSSystem.cpp b/desmume/src/NDSSystem.cpp index 0377ca1b6..a5b0c357a 100644 --- a/desmume/src/NDSSystem.cpp +++ b/desmume/src/NDSSystem.cpp @@ -1,6 +1,6 @@ /* Copyright (C) 2006 yopyop - Copyright (C) 2008-2017 DeSmuME team + Copyright (C) 2008-2018 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -570,6 +570,8 @@ bool GameInfo::loadROM(std::string fname, u32 type) void GameInfo::closeROM() { + GPU->ForceFrameStop(); + if (reader) reader->DeInit(fROM); diff --git a/desmume/src/frontend/cocoa/OGLDisplayOutput.h b/desmume/src/frontend/cocoa/OGLDisplayOutput.h index 69832c29c..76e12e923 100644 --- a/desmume/src/frontend/cocoa/OGLDisplayOutput.h +++ b/desmume/src/frontend/cocoa/OGLDisplayOutput.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2014-2017 DeSmuME team + Copyright (C) 2014-2018 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,7 +35,7 @@ #include "ClientDisplayView.h" -#define OPENGL_FETCH_BUFFER_COUNT 3 +#define OPENGL_FETCH_BUFFER_COUNT 2 class OGLVideoOutput; diff --git a/desmume/src/frontend/cocoa/cocoa_GPU.h b/desmume/src/frontend/cocoa/cocoa_GPU.h index e0ab05a4f..d420863fe 100644 --- a/desmume/src/frontend/cocoa/cocoa_GPU.h +++ b/desmume/src/frontend/cocoa/cocoa_GPU.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2013-2017 DeSmuME team + Copyright (C) 2013-2018 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,6 +47,15 @@ #define VIDEO_FLUSH_TIME_LIMIT_OFFSET 8 // The amount of time, in seconds, to wait for a flush to occur on a given CVDisplayLink before stopping it. +enum ClientDisplayBufferState +{ + ClientDisplayBufferState_Idle = 0, // The buffer has already been read and is currently idle. It is a candidate for a read or write operation. + ClientDisplayBufferState_Writing = 1, // The buffer is currently being written. It cannot be accessed. + ClientDisplayBufferState_Ready = 2, // The buffer was just written to, but has not been read yet. It is a candidate for a read or write operation. + ClientDisplayBufferState_PendingRead = 3, // The buffer has been marked that it will be read. It must not be accessed. + ClientDisplayBufferState_Reading = 4 // The buffer is currently being read. It cannot be accessed. +}; + class GPUEventHandlerOSX; #ifdef ENABLE_SHARED_FETCH_OBJECT @@ -58,7 +67,11 @@ typedef std::map DisplayLinkFlushTimeLimitMap; { GPUClientFetchObject *GPUFetchObject; task_t _taskEmulationLoop; - semaphore_t _semFramebuffer[2]; + + OSSpinLock _spinlockFramebufferStates[MAX_FRAMEBUFFER_PAGES]; + semaphore_t _semFramebuffer[MAX_FRAMEBUFFER_PAGES]; + volatile ClientDisplayBufferState _framebufferState[MAX_FRAMEBUFFER_PAGES]; + pthread_rwlock_t *_rwlockOutputList; pthread_mutex_t _mutexDisplayLinkLists; NSMutableArray *_cdsOutputList; @@ -80,7 +93,10 @@ typedef std::map DisplayLinkFlushTimeLimitMap; - (void) semaphoreFramebufferCreate; - (void) semaphoreFramebufferDestroy; -- (semaphore_t) semaphoreFramebufferAtIndex:(const u8)bufferIndex; +- (u8) selectBufferIndex:(const u8)currentIndex pageCount:(size_t)pageCount; +- (semaphore_t) semaphoreFramebufferPageAtIndex:(const u8)bufferIndex; +- (ClientDisplayBufferState) framebufferStateAtIndex:(uint8_t)index; +- (void) setFramebufferState:(ClientDisplayBufferState)bufferState index:(uint8_t)index; - (void) setOutputList:(NSMutableArray *)theOutputList rwlock:(pthread_rwlock_t *)theRWLock; - (void) incrementViewsUsingDirectToCPUFiltering; diff --git a/desmume/src/frontend/cocoa/cocoa_GPU.mm b/desmume/src/frontend/cocoa/cocoa_GPU.mm index 7a0182027..8770eea64 100644 --- a/desmume/src/frontend/cocoa/cocoa_GPU.mm +++ b/desmume/src/frontend/cocoa/cocoa_GPU.mm @@ -1,5 +1,5 @@ /* - Copyright (C) 2013-2017 DeSmuME team + Copyright (C) 2013-2018 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -76,7 +76,7 @@ public: bool GetRender3DNeedsFinish(); - virtual void DidFrameBegin(bool isFrameSkipRequested, const u8 targetBufferIndex, const size_t line); + virtual void DidFrameBegin(const size_t line, const bool isFrameSkipRequested, const size_t pageCount, u8 &selectedBufferIndexInOut); virtual void DidFrameEnd(bool isFrameSkipped, const NDSDisplayInfo &latestDisplayInfo); virtual void DidRender3DBegin(); virtual void DidRender3DEnd(); @@ -183,12 +183,12 @@ public: if (fetchObject == NULL) { fetchObject = new MacOGLClientFetchObject; + GPU->SetFramebufferPageCount(OPENGL_FETCH_BUFFER_COUNT); } fetchObject->Init(); gpuEvent->SetFetchObject(fetchObject); - GPU->SetFramebufferPageCount(OPENGL_FETCH_BUFFER_COUNT); GPU->SetWillAutoResolveToCustomBuffer(false); #endif @@ -254,16 +254,22 @@ public: gpuEvent->FramebufferLock(); #ifdef ENABLE_SHARED_FETCH_OBJECT - semaphore_wait([[self sharedData] semaphoreFramebufferAtIndex:0]); - semaphore_wait([[self sharedData] semaphoreFramebufferAtIndex:1]); + const size_t maxPages = GPU->GetDisplayInfo().framebufferPageCount; + for (size_t i = 0; i < maxPages; i++) + { + semaphore_wait([[self sharedData] semaphoreFramebufferPageAtIndex:i]); + } #endif GPU->SetCustomFramebufferSize(w, h); #ifdef ENABLE_SHARED_FETCH_OBJECT fetchObject->SetFetchBuffers(GPU->GetDisplayInfo()); - semaphore_signal([[self sharedData] semaphoreFramebufferAtIndex:1]); - semaphore_signal([[self sharedData] semaphoreFramebufferAtIndex:0]); + + for (size_t i = maxPages - 1; i < maxPages; i--) + { + semaphore_signal([[self sharedData] semaphoreFramebufferPageAtIndex:i]); + } #endif gpuEvent->FramebufferUnlock(); @@ -315,16 +321,22 @@ public: if (colorFormat != dispInfo.colorFormat) { #ifdef ENABLE_SHARED_FETCH_OBJECT - semaphore_wait([[self sharedData] semaphoreFramebufferAtIndex:0]); - semaphore_wait([[self sharedData] semaphoreFramebufferAtIndex:1]); + const size_t maxPages = GPU->GetDisplayInfo().framebufferPageCount; + for (size_t i = 0; i < maxPages; i++) + { + semaphore_wait([[self sharedData] semaphoreFramebufferPageAtIndex:i]); + } #endif GPU->SetColorFormat((NDSColorFormat)colorFormat); #ifdef ENABLE_SHARED_FETCH_OBJECT fetchObject->SetFetchBuffers(GPU->GetDisplayInfo()); - semaphore_signal([[self sharedData] semaphoreFramebufferAtIndex:1]); - semaphore_signal([[self sharedData] semaphoreFramebufferAtIndex:0]); + + for (size_t i = maxPages - 1; i < maxPages; i--) + { + semaphore_signal([[self sharedData] semaphoreFramebufferPageAtIndex:i]); + } #endif } @@ -866,15 +878,20 @@ public: gpuEvent->FramebufferLock(); #ifdef ENABLE_SHARED_FETCH_OBJECT - semaphore_wait([[self sharedData] semaphoreFramebufferAtIndex:0]); - semaphore_wait([[self sharedData] semaphoreFramebufferAtIndex:1]); + const size_t maxPages = GPU->GetDisplayInfo().framebufferPageCount; + for (size_t i = 0; i < maxPages; i++) + { + semaphore_wait([[self sharedData] semaphoreFramebufferPageAtIndex:i]); + } #endif GPU->ClearWithColor(colorBGRA5551); #ifdef ENABLE_SHARED_FETCH_OBJECT - semaphore_signal([[self sharedData] semaphoreFramebufferAtIndex:1]); - semaphore_signal([[self sharedData] semaphoreFramebufferAtIndex:0]); + for (size_t i = maxPages - 1; i < maxPages; i--) + { + semaphore_signal([[self sharedData] semaphoreFramebufferPageAtIndex:i]); + } #endif gpuEvent->FramebufferUnlock(); @@ -941,9 +958,14 @@ public: pthread_mutex_init(&_mutexFetchExecute, NULL); _taskEmulationLoop = 0; - _semFramebuffer[0] = 0; - _semFramebuffer[1] = 0; - + + for (size_t i = 0; i < MAX_FRAMEBUFFER_PAGES; i++) + { + _semFramebuffer[i] = 0; + _framebufferState[i] = ClientDisplayBufferState_Idle; + _spinlockFramebufferStates[i] = OS_SPINLOCK_INIT; + } + [[NSNotificationCenter defaultCenter] addObserver:self selector:@selector(respondToScreenChange:) name:@"NSApplicationDidChangeScreenParametersNotification" @@ -1003,30 +1025,136 @@ public: - (void) semaphoreFramebufferCreate { _taskEmulationLoop = mach_task_self(); - semaphore_create(_taskEmulationLoop, &_semFramebuffer[0], SYNC_POLICY_FIFO, 1); - semaphore_create(_taskEmulationLoop, &_semFramebuffer[1], SYNC_POLICY_FIFO, 1); + + for (size_t i = 0; i < MAX_FRAMEBUFFER_PAGES; i++) + { + semaphore_create(_taskEmulationLoop, &_semFramebuffer[i], SYNC_POLICY_FIFO, 1); + } } - (void) semaphoreFramebufferDestroy { - if (_semFramebuffer[0] != 0) + for (size_t i = MAX_FRAMEBUFFER_PAGES - 1; i < MAX_FRAMEBUFFER_PAGES; i--) { - semaphore_destroy(_taskEmulationLoop, _semFramebuffer[0]); - _semFramebuffer[0] = 0; - } - - if (_semFramebuffer[1] != 0) - { - semaphore_destroy(_taskEmulationLoop, _semFramebuffer[1]); - _semFramebuffer[1] = 0; + if (_semFramebuffer[i] != 0) + { + semaphore_destroy(_taskEmulationLoop, _semFramebuffer[i]); + _semFramebuffer[i] = 0; + } } } -- (semaphore_t) semaphoreFramebufferAtIndex:(const u8)bufferIndex +- (u8) selectBufferIndex:(const u8)currentIndex pageCount:(size_t)pageCount { + u8 selectedIndex = currentIndex; + bool stillSearching = true; + + // First, search for an idle buffer along with its corresponding semaphore. + if (stillSearching) + { + selectedIndex = (selectedIndex + 1) % pageCount; + for (; selectedIndex != currentIndex; selectedIndex = (selectedIndex + 1) % pageCount) + { + if ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle) + { + stillSearching = false; + break; + } + } + } + + // Next, search for either an idle or a ready buffer along with its corresponding semaphore. + if (stillSearching) + { + selectedIndex = (selectedIndex + 1) % pageCount; + for (size_t spin = 0; spin < 100ULL * pageCount; selectedIndex = (selectedIndex + 1) % pageCount, spin++) + { + if ( ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle) || + (([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Ready) && (selectedIndex != currentIndex)) ) + { + stillSearching = false; + break; + } + } + } + + // Since the most available buffers couldn't be taken, we're going to spin for some finite + // period of time until an idle buffer emerges. If that happens, then force wait on the + // buffer's corresponding semaphore. + if (stillSearching) + { + selectedIndex = (selectedIndex + 1) % pageCount; + for (size_t spin = 0; spin < 10000ULL * pageCount; selectedIndex = (selectedIndex + 1) % pageCount, spin++) + { + if ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle) + { + stillSearching = false; + break; + } + } + } + + // In an effort to find something that is likely to be available shortly in the future, + // search for any idle, ready or reading buffer, and then force wait on its corresponding + // semaphore. + if (stillSearching) + { + selectedIndex = (selectedIndex + 1) % pageCount; + for (; selectedIndex != currentIndex; selectedIndex = (selectedIndex + 1) % pageCount) + { + if ( ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle) || + ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Ready) || + ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Reading) ) + { + stillSearching = false; + break; + } + } + } + + // As a last resort, search for any buffer that is not currently writing, and then force wait + // on its corresponding semaphore. + if (stillSearching) + { + selectedIndex = (selectedIndex + 1) % pageCount; + for (; selectedIndex != currentIndex; selectedIndex = (selectedIndex + 1) % pageCount) + { + if ( ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle) || + ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Ready) || + ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Reading) || + ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_PendingRead) ) + { + stillSearching = false; + break; + } + } + } + + return selectedIndex; +} + +- (semaphore_t) semaphoreFramebufferPageAtIndex:(const u8)bufferIndex +{ + assert(bufferIndex < MAX_FRAMEBUFFER_PAGES); return _semFramebuffer[bufferIndex]; } +- (ClientDisplayBufferState) framebufferStateAtIndex:(uint8_t)index +{ + OSSpinLockLock(&_spinlockFramebufferStates[index]); + const ClientDisplayBufferState bufferState = _framebufferState[index]; + OSSpinLockUnlock(&_spinlockFramebufferStates[index]); + + return bufferState; +} + +- (void) setFramebufferState:(ClientDisplayBufferState)bufferState index:(uint8_t)index +{ + OSSpinLockLock(&_spinlockFramebufferStates[index]); + _framebufferState[index] = bufferState; + OSSpinLockUnlock(&_spinlockFramebufferStates[index]); +} + - (void) setOutputList:(NSMutableArray *)theOutputList rwlock:(pthread_rwlock_t *)theRWLock { pthread_rwlock_t *currentRWLock = _rwlockOutputList; @@ -1290,7 +1418,7 @@ void GPUEventHandlerOSX::SetFetchObject(GPUClientFetchObject *fetchObject) this->_fetchObject = fetchObject; } -void GPUEventHandlerOSX::DidFrameBegin(bool isFrameSkipRequested, const u8 targetBufferIndex, const size_t line) +void GPUEventHandlerOSX::DidFrameBegin(const size_t line, const bool isFrameSkipRequested, const size_t pageCount, u8 &selectedBufferIndexInOut) { this->FramebufferLock(); @@ -1298,7 +1426,14 @@ void GPUEventHandlerOSX::DidFrameBegin(bool isFrameSkipRequested, const u8 targe if (!isFrameSkipRequested) { MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)this->_fetchObject->GetClientData(); - semaphore_wait([sharedViewObject semaphoreFramebufferAtIndex:targetBufferIndex]); + + if ( (pageCount > 1) && (line == 0) ) + { + selectedBufferIndexInOut = [sharedViewObject selectBufferIndex:selectedBufferIndexInOut pageCount:pageCount]; + } + + semaphore_wait([sharedViewObject semaphoreFramebufferPageAtIndex:selectedBufferIndexInOut]); + [sharedViewObject setFramebufferState:ClientDisplayBufferState_Writing index:selectedBufferIndexInOut]; } #endif } @@ -1310,7 +1445,8 @@ void GPUEventHandlerOSX::DidFrameEnd(bool isFrameSkipped, const NDSDisplayInfo & if (!isFrameSkipped) { this->_fetchObject->SetFetchDisplayInfo(latestDisplayInfo); - semaphore_signal([sharedViewObject semaphoreFramebufferAtIndex:latestDisplayInfo.bufferIndex]); + [sharedViewObject setFramebufferState:ClientDisplayBufferState_Ready index:latestDisplayInfo.bufferIndex]; + semaphore_signal([sharedViewObject semaphoreFramebufferPageAtIndex:latestDisplayInfo.bufferIndex]); } #endif diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h index 43ed4a192..bf5a64fe0 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2017 DeSmuME team + Copyright (C) 2017-2018 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,16 +36,7 @@ #endif #define METAL_FETCH_BUFFER_COUNT 3 -#define RENDER_BUFFER_COUNT 4 - -enum ClientDisplayBufferState -{ - ClientDisplayBufferState_Idle = 0, // The buffer has already been read and is currently idle. It is a candidate for a read or write operation. - ClientDisplayBufferState_Writing = 1, // The buffer is currently being written. It cannot be accessed. - ClientDisplayBufferState_Ready = 2, // The buffer was just written to, but has not been read yet. It is a candidate for a read or write operation. - ClientDisplayBufferState_PendingRead = 3, // The buffer has been marked that it will be read. It must not be accessed. - ClientDisplayBufferState_Reading = 4 // The buffer is currently being read. It cannot be accessed. -}; +#define RENDER_BUFFER_COUNT 6 class MacMetalFetchObject; class MacMetalDisplayPresenter; @@ -89,6 +80,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; @interface MetalDisplayViewSharedData : MacClientSharedObject { id device; + id _fetchCommandQueue; id commandQueue; id defaultLibrary; @@ -124,6 +116,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; MetalTexturePair texPairFetch; id bceFetch; + BOOL willFetchImmediate; id texLQ2xLUT; id texHQ2xLUT; @@ -156,6 +149,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; @property (assign) MetalTexturePair texPairFetch; @property (assign) id bceFetch; +@property (assign) BOOL willFetchImmediate; @property (readonly, nonatomic) id texLQ2xLUT; @property (readonly, nonatomic) id texHQ2xLUT; @@ -215,7 +209,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; OSSpinLock _spinlockRenderBufferStates[RENDER_BUFFER_COUNT]; dispatch_semaphore_t _semRenderBuffers[RENDER_BUFFER_COUNT]; - ClientDisplayBufferState _renderBufferState[RENDER_BUFFER_COUNT]; + volatile ClientDisplayBufferState _renderBufferState[RENDER_BUFFER_COUNT]; MetalTexturePair texPairProcess; MetalRenderFrameInfo renderFrameInfo; diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm index 274647fed..c858aef59 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm @@ -1,5 +1,5 @@ /* - Copyright (C) 2017 DeSmuME team + Copyright (C) 2017-2018 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,6 +37,7 @@ @synthesize texPairFetch; @synthesize bceFetch; +@synthesize willFetchImmediate; @synthesize texLQ2xLUT; @synthesize texHQ2xLUT; @@ -66,6 +67,7 @@ [device retain]; commandQueue = [device newCommandQueue]; + _fetchCommandQueue = [device newCommandQueue]; defaultLibrary = [device newDefaultLibrary]; _fetch555Pipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch555"] error:nil] retain]; _fetch666Pipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666"] error:nil] retain]; @@ -148,7 +150,7 @@ idxBufferPtr[j+5] = k+0; } - id cb = [commandQueue commandBufferWithUnretainedReferences];; + id cb = [_fetchCommandQueue commandBufferWithUnretainedReferences];; id bce = [cb blitCommandEncoder]; [bce copyFromBuffer:tempHUDIndexBuffer @@ -235,9 +237,10 @@ texPairFetch.main = [_texDisplayPostprocessNative[NDSDisplayID_Main][0] retain]; texPairFetch.touch = [_texDisplayPostprocessNative[NDSDisplayID_Touch][0] retain]; bceFetch = nil; + willFetchImmediate = YES; // Set up the HQnx LUT textures. - SetupHQnxLUTs_Metal(device, commandQueue, texLQ2xLUT, texHQ2xLUT, texHQ3xLUT, texHQ4xLUT); + SetupHQnxLUTs_Metal(device, _fetchCommandQueue, texLQ2xLUT, texHQ2xLUT, texHQ3xLUT, texHQ4xLUT); texCurrentHQnxLUT = nil; return self; @@ -248,6 +251,7 @@ [device release]; [commandQueue release]; + [_fetchCommandQueue release]; [defaultLibrary release]; [_fetch555Pipeline release]; [_fetch666Pipeline release]; @@ -426,10 +430,9 @@ const size_t th = _fetchThreadsPerGroup.height; _fetchThreadGroupsPerGridCustom = MTLSizeMake((w + tw - 1) / tw, (h + th - 1) / th, 1); - id cb = [commandQueue commandBufferWithUnretainedReferences]; + id cb = [_fetchCommandQueue commandBufferWithUnretainedReferences]; MetalTexturePair newTexPair = [self setFetchTextureBindingsAtIndex:dispInfo.bufferIndex commandBuffer:cb]; [cb commit]; - [cb waitUntilCompleted]; const MetalTexturePair oldTexPair = [self texPairFetch]; @@ -625,108 +628,107 @@ - (void) fetchFromBufferIndex:(const u8)index { - id cb = [commandQueue commandBufferWithUnretainedReferences]; + id cb = [_fetchCommandQueue commandBufferWithUnretainedReferences]; + [cb enqueue]; + + [self setWillFetchImmediate:YES]; if (!_isSharedBufferTextureSupported) { - /* + semaphore_wait([self semaphoreFramebufferPageAtIndex:index]); + [self setFramebufferState:ClientDisplayBufferState_Reading index:index]; + id bce = [cb blitCommandEncoder]; [self setBceFetch:bce]; - */ - semaphore_wait([self semaphoreFramebufferAtIndex:index]); GPUFetchObject->GPUClientFetchObject::FetchFromBufferIndex(index); - semaphore_signal([self semaphoreFramebufferAtIndex:index]); - /* [self setBceFetch:nil]; [bce endEncoding]; - [cb addCompletedHandler:^(id block) { - semaphore_signal([self semaphoreFramebufferAtIndex:index]); - }]; - [cb commit]; - - cb = [commandQueue commandBufferWithUnretainedReferences]; - */ - } - else - { - GPUFetchObject->GPUClientFetchObject::FetchFromBufferIndex(index); + if ([self willFetchImmediate]) + { + [self setFramebufferState:ClientDisplayBufferState_Idle index:index]; + semaphore_signal([self semaphoreFramebufferPageAtIndex:index]); + } + else + { + [cb addCompletedHandler:^(id block) { + [self setFramebufferState:ClientDisplayBufferState_Idle index:index]; + semaphore_signal([self semaphoreFramebufferPageAtIndex:index]); + }]; + + [cb commit]; + + cb = [_fetchCommandQueue commandBufferWithUnretainedReferences]; + [cb enqueue]; + } } const MetalTexturePair newTexPair = [self setFetchTextureBindingsAtIndex:index commandBuffer:cb]; + [newTexPair.main retain]; + [newTexPair.touch retain]; - //[cb addCompletedHandler:^(id block) { + [cb addCompletedHandler:^(id block) { const MetalTexturePair oldTexPair = [self texPairFetch]; - - [newTexPair.main retain]; - [newTexPair.touch retain]; [self setTexPairFetch:newTexPair]; - [oldTexPair.main release]; [oldTexPair.touch release]; - //}]; + }]; + [cb commit]; } - (void) fetchNativeDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id)bce { - if (_isSharedBufferTextureSupported) - { - return; - } - id targetDestination = _texDisplayFetchNative[displayID][bufferIndex]; - const NDSDisplayInfo ¤tDisplayInfo = GPUFetchObject->GetFetchDisplayInfoForBufferIndex(bufferIndex); - [targetDestination replaceRegion:MTLRegionMake2D(0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT) - mipmapLevel:0 - withBytes:currentDisplayInfo.nativeBuffer[displayID] - bytesPerRow:_nativeLineSize]; - - /* - const id targetSource = _bufDisplayFetchNative[displayID][bufferIndex]; - - [bce copyFromBuffer:targetSource - sourceOffset:0 - sourceBytesPerRow:_nativeLineSize - sourceBytesPerImage:_nativeBufferSize - sourceSize:MTLSizeMake(GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 1) - toTexture:targetDestination - destinationSlice:0 - destinationLevel:0 - destinationOrigin:MTLOriginMake(0, 0, 0)]; - */ + if ([self willFetchImmediate]) + { + [targetDestination replaceRegion:MTLRegionMake2D(0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT) + mipmapLevel:0 + withBytes:currentDisplayInfo.nativeBuffer[displayID] + bytesPerRow:_nativeLineSize]; + } + else + { + [bce copyFromBuffer:_bufDisplayFetchNative[displayID][bufferIndex] + sourceOffset:0 + sourceBytesPerRow:_nativeLineSize + sourceBytesPerImage:_nativeBufferSize + sourceSize:MTLSizeMake(GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 1) + toTexture:targetDestination + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + } } - (void) fetchCustomDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id)bce { - if (_isSharedBufferTextureSupported) - { - return; - } - const NDSDisplayInfo ¤tDisplayInfo = GPUFetchObject->GetFetchDisplayInfoForBufferIndex(bufferIndex); id targetDestination = _texDisplayFetchCustom[displayID][bufferIndex]; - [targetDestination replaceRegion:MTLRegionMake2D(0, 0, currentDisplayInfo.customWidth, currentDisplayInfo.customHeight) - mipmapLevel:0 - withBytes:currentDisplayInfo.customBuffer[displayID] - bytesPerRow:_customLineSize]; - - /* - const id targetSource = _bufDisplayFetchCustom[displayID][bufferIndex]; - - [bce copyFromBuffer:targetSource - sourceOffset:0 - sourceBytesPerRow:_customLineSize - sourceBytesPerImage:_customBufferSize - sourceSize:MTLSizeMake(currentDisplayInfo.customWidth, currentDisplayInfo.customHeight, 1) - toTexture:targetDestination - destinationSlice:0 - destinationLevel:0 - destinationOrigin:MTLOriginMake(0, 0, 0)]; - */ + if ( (currentDisplayInfo.customWidth < GPU_FRAMEBUFFER_NATIVE_WIDTH * 5) && (currentDisplayInfo.customHeight < GPU_FRAMEBUFFER_NATIVE_HEIGHT * 5) ) + { + [targetDestination replaceRegion:MTLRegionMake2D(0, 0, currentDisplayInfo.customWidth, currentDisplayInfo.customHeight) + mipmapLevel:0 + withBytes:currentDisplayInfo.customBuffer[displayID] + bytesPerRow:_customLineSize]; + } + else + { + [self setWillFetchImmediate:NO]; + + [bce copyFromBuffer:_bufDisplayFetchCustom[displayID][bufferIndex] + sourceOffset:0 + sourceBytesPerRow:_customLineSize + sourceBytesPerImage:_customBufferSize + sourceSize:MTLSizeMake(currentDisplayInfo.customWidth, currentDisplayInfo.customHeight, 1) + toTexture:targetDestination + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + } } @end @@ -1385,10 +1387,10 @@ [cce endEncoding]; + [newTexProcess.main retain]; + [newTexProcess.touch retain]; + [cb addCompletedHandler:^(id block) { - [newTexProcess.main retain]; - [newTexProcess.touch retain]; - const MetalTexturePair oldTexPair = [self texPairProcess]; [self setTexPairProcess:newTexProcess]; [oldTexPair.main release]; @@ -1547,15 +1549,16 @@ } } + [newTexProcess.main retain]; + [newTexProcess.touch retain]; + [cb addCompletedHandler:^(id block) { - [newTexProcess.main retain]; - [newTexProcess.touch retain]; - const MetalTexturePair oldTexPair = [self texPairProcess]; [self setTexPairProcess:newTexProcess]; [oldTexPair.main release]; [oldTexPair.touch release]; }]; + [cb commit]; if ([self needsProcessFrameWait]) @@ -1626,10 +1629,10 @@ newTexProcess.touch = _texDisplayPixelScaler[NDSDisplayID_Touch]; } + [newTexProcess.main retain]; + [newTexProcess.touch retain]; + [cb addCompletedHandler:^(id block) { - [newTexProcess.main retain]; - [newTexProcess.touch retain]; - const MetalTexturePair oldTexPair = [self texPairProcess]; [self setTexPairProcess:newTexProcess]; [oldTexPair.main release]; @@ -1650,12 +1653,12 @@ } else { + [newTexProcess.main retain]; + [newTexProcess.touch retain]; + id cb = [self newCommandBuffer]; [cb addCompletedHandler:^(id block) { - [newTexProcess.main retain]; - [newTexProcess.touch retain]; - const MetalTexturePair oldTexPair = [self texPairProcess]; [self setTexPairProcess:newTexProcess]; [oldTexPair.main release]; @@ -1781,7 +1784,7 @@ if (stillSearching) { selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT; - for (size_t spin = 0; spin < 10ULL * RENDER_BUFFER_COUNT; selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT, spin++) + for (size_t spin = 0; spin < 100ULL * RENDER_BUFFER_COUNT; selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT, spin++) { if ( ([self renderBufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle) || (([self renderBufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Ready) && (selectedIndex != mrfi.renderIndex)) ) @@ -1802,7 +1805,7 @@ if (stillSearching) { selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT; - for (size_t spin = 0; spin < 100000ULL * RENDER_BUFFER_COUNT; selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT, spin++) + for (size_t spin = 0; spin < 10000ULL * RENDER_BUFFER_COUNT; selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT, spin++) { if ([self renderBufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle) { diff --git a/desmume/src/frontend/cocoa/userinterface/MacOGLDisplayView.mm b/desmume/src/frontend/cocoa/userinterface/MacOGLDisplayView.mm index 1d3a389b7..e43a78786 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacOGLDisplayView.mm +++ b/desmume/src/frontend/cocoa/userinterface/MacOGLDisplayView.mm @@ -1,5 +1,5 @@ /* - Copyright (C) 2017 DeSmuME team + Copyright (C) 2017-2018 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -195,14 +195,16 @@ void MacOGLClientFetchObject::FetchFromBufferIndex(const u8 index) MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)this->_clientData; this->_useDirectToCPUFilterPipeline = ([sharedViewObject numberViewsUsingDirectToCPUFiltering] > 0); - semaphore_wait([sharedViewObject semaphoreFramebufferAtIndex:index]); + semaphore_wait([sharedViewObject semaphoreFramebufferPageAtIndex:index]); + [sharedViewObject setFramebufferState:ClientDisplayBufferState_Reading index:index]; CGLLockContext(this->_context); CGLSetCurrentContext(this->_context); this->OGLClientFetchObject::FetchFromBufferIndex(index); CGLUnlockContext(this->_context); - semaphore_signal([sharedViewObject semaphoreFramebufferAtIndex:index]); + [sharedViewObject setFramebufferState:ClientDisplayBufferState_Idle index:index]; + semaphore_signal([sharedViewObject semaphoreFramebufferPageAtIndex:index]); } GLuint MacOGLClientFetchObject::GetFetchTexture(const NDSDisplayID displayID) @@ -436,7 +438,7 @@ void MacOGLDisplayPresenter::WriteLockEmuFramebuffer(const uint8_t bufferIndex) const GPUClientFetchObject &fetchObj = this->GetFetchObject(); MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)fetchObj.GetClientData(); - semaphore_wait([sharedViewObject semaphoreFramebufferAtIndex:bufferIndex]); + semaphore_wait([sharedViewObject semaphoreFramebufferPageAtIndex:bufferIndex]); } void MacOGLDisplayPresenter::ReadLockEmuFramebuffer(const uint8_t bufferIndex) @@ -444,7 +446,7 @@ void MacOGLDisplayPresenter::ReadLockEmuFramebuffer(const uint8_t bufferIndex) const GPUClientFetchObject &fetchObj = this->GetFetchObject(); MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)fetchObj.GetClientData(); - semaphore_wait([sharedViewObject semaphoreFramebufferAtIndex:bufferIndex]); + semaphore_wait([sharedViewObject semaphoreFramebufferPageAtIndex:bufferIndex]); } void MacOGLDisplayPresenter::UnlockEmuFramebuffer(const uint8_t bufferIndex) @@ -452,7 +454,7 @@ void MacOGLDisplayPresenter::UnlockEmuFramebuffer(const uint8_t bufferIndex) const GPUClientFetchObject &fetchObj = this->GetFetchObject(); MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)fetchObj.GetClientData(); - semaphore_signal([sharedViewObject semaphoreFramebufferAtIndex:bufferIndex]); + semaphore_signal([sharedViewObject semaphoreFramebufferPageAtIndex:bufferIndex]); } #pragma mark -