From 26ac91edd0bec08733f1d28f185e43c222916a2b Mon Sep 17 00:00:00 2001 From: rogerman Date: Tue, 5 Dec 2017 13:43:30 -0800 Subject: [PATCH] Cocoa Port: For Metal display views, replace all locks with semaphores, which are the correct synchronization primitive to use here. - Also change the CocoaDSOutput list lock from a mutex to a rwlock, since testing has shown that there is more thread contention here than I previously thought. --- desmume/src/frontend/cocoa/cocoa_GPU.h | 15 +- desmume/src/frontend/cocoa/cocoa_GPU.mm | 124 ++--- desmume/src/frontend/cocoa/cocoa_core.h | 2 +- desmume/src/frontend/cocoa/cocoa_core.mm | 30 +- desmume/src/frontend/cocoa/cocoa_globals.h | 2 + .../cocoa/userinterface/MacMetalDisplayView.h | 16 +- .../userinterface/MacMetalDisplayView.mm | 433 ++++++++---------- .../MacMetalDisplayViewShaders.metal | 13 + .../cocoa/userinterface/MacOGLDisplayView.mm | 10 +- 9 files changed, 308 insertions(+), 337 deletions(-) diff --git a/desmume/src/frontend/cocoa/cocoa_GPU.h b/desmume/src/frontend/cocoa/cocoa_GPU.h index fa3f7ad03..3e906a80e 100644 --- a/desmume/src/frontend/cocoa/cocoa_GPU.h +++ b/desmume/src/frontend/cocoa/cocoa_GPU.h @@ -19,6 +19,7 @@ #import #include #include +#include #include #import "cocoa_util.h" @@ -48,8 +49,8 @@ typedef std::map DisplayLinkFlushTimeLimitMap; @interface MacClientSharedObject : NSObject { GPUClientFetchObject *GPUFetchObject; - pthread_rwlock_t *_rwlockFramebuffer[2]; - pthread_mutex_t *_mutexOutputList; + sem_t *_semFramebuffer[2]; + pthread_rwlock_t *_rwlockOutputList; pthread_mutex_t _mutexDisplayLinkLists; NSMutableArray *_cdsOutputList; volatile int32_t numberViewsUsingDirectToCPUFiltering; @@ -58,7 +59,7 @@ typedef std::map DisplayLinkFlushTimeLimitMap; DisplayLinkFlushTimeLimitMap _displayLinkFlushTimeList; OSSpinLock spinlockFetchSignal; - BOOL _isFetchSignalled; + uint32_t _threadMessageID; uint8_t _fetchIndex; pthread_t _threadFetch; pthread_cond_t _condSignalFetch; @@ -68,8 +69,8 @@ typedef std::map DisplayLinkFlushTimeLimitMap; @property (assign, nonatomic) GPUClientFetchObject *GPUFetchObject; @property (readonly, nonatomic) volatile int32_t numberViewsUsingDirectToCPUFiltering; -- (pthread_rwlock_t *) rwlockFramebufferAtIndex:(const u8)bufferIndex; -- (void) setOutputList:(NSMutableArray *)theOutputList mutex:(pthread_mutex_t *)theMutex; +- (sem_t *) semaphoreFramebufferAtIndex:(const u8)bufferIndex; +- (void) setOutputList:(NSMutableArray *)theOutputList rwlock:(pthread_rwlock_t *)theRWLock; - (void) incrementViewsUsingDirectToCPUFiltering; - (void) decrementViewsUsingDirectToCPUFiltering; - (void) pushVideoDataToAllDisplayViews; @@ -79,7 +80,7 @@ typedef std::map DisplayLinkFlushTimeLimitMap; - (void) displayLinkListUpdate; - (void) fetchSynchronousAtIndex:(uint8_t)index; -- (void) signalFetchAtIndex:(uint8_t)index; +- (void) signalFetchAtIndex:(uint8_t)index message:(int32_t)messageID; - (void) runFetchLoop; @end @@ -135,7 +136,7 @@ typedef std::map DisplayLinkFlushTimeLimitMap; @property (readonly, nonatomic) GPUClientFetchObject *fetchObject; @property (readonly, nonatomic) MacClientSharedObject *sharedData; -- (void) setOutputList:(NSMutableArray *)theOutputList mutexPtr:(pthread_mutex_t *)theMutex; +- (void) setOutputList:(NSMutableArray *)theOutputList rwlock:(pthread_rwlock_t *)theRWLock; #endif - (BOOL) gpuStateByBit:(const UInt32)stateBit; diff --git a/desmume/src/frontend/cocoa/cocoa_GPU.mm b/desmume/src/frontend/cocoa/cocoa_GPU.mm index 9bfb49ec0..ca70eab20 100644 --- a/desmume/src/frontend/cocoa/cocoa_GPU.mm +++ b/desmume/src/frontend/cocoa/cocoa_GPU.mm @@ -253,16 +253,16 @@ public: gpuEvent->FramebufferLock(); #ifdef ENABLE_SHARED_FETCH_OBJECT - pthread_rwlock_wrlock([[self sharedData] rwlockFramebufferAtIndex:0]); - pthread_rwlock_wrlock([[self sharedData] rwlockFramebufferAtIndex:1]); + sem_wait([[self sharedData] semaphoreFramebufferAtIndex:0]); + sem_wait([[self sharedData] semaphoreFramebufferAtIndex:1]); #endif GPU->SetCustomFramebufferSize(w, h); #ifdef ENABLE_SHARED_FETCH_OBJECT fetchObject->SetFetchBuffers(GPU->GetDisplayInfo()); - pthread_rwlock_unlock([[self sharedData] rwlockFramebufferAtIndex:1]); - pthread_rwlock_unlock([[self sharedData] rwlockFramebufferAtIndex:0]); + sem_post([[self sharedData] semaphoreFramebufferAtIndex:1]); + sem_post([[self sharedData] semaphoreFramebufferAtIndex:0]); #endif gpuEvent->FramebufferUnlock(); @@ -314,16 +314,16 @@ public: if (colorFormat != dispInfo.colorFormat) { #ifdef ENABLE_SHARED_FETCH_OBJECT - pthread_rwlock_wrlock([[self sharedData] rwlockFramebufferAtIndex:0]); - pthread_rwlock_wrlock([[self sharedData] rwlockFramebufferAtIndex:1]); + sem_wait([[self sharedData] semaphoreFramebufferAtIndex:0]); + sem_wait([[self sharedData] semaphoreFramebufferAtIndex:1]); #endif GPU->SetColorFormat((NDSColorFormat)colorFormat); #ifdef ENABLE_SHARED_FETCH_OBJECT fetchObject->SetFetchBuffers(GPU->GetDisplayInfo()); - pthread_rwlock_unlock([[self sharedData] rwlockFramebufferAtIndex:1]); - pthread_rwlock_unlock([[self sharedData] rwlockFramebufferAtIndex:0]); + sem_post([[self sharedData] semaphoreFramebufferAtIndex:1]); + sem_post([[self sharedData] semaphoreFramebufferAtIndex:0]); #endif } @@ -343,9 +343,9 @@ public: } #ifdef ENABLE_SHARED_FETCH_OBJECT -- (void) setOutputList:(NSMutableArray *)theOutputList mutexPtr:(pthread_mutex_t *)theMutex +- (void) setOutputList:(NSMutableArray *)theOutputList rwlock:(pthread_rwlock_t *)theRWLock { - [(MacClientSharedObject *)fetchObject->GetClientData() setOutputList:theOutputList mutex:theMutex]; + [(MacClientSharedObject *)fetchObject->GetClientData() setOutputList:theOutputList rwlock:theRWLock]; } #endif @@ -866,18 +866,18 @@ public: #ifdef ENABLE_SHARED_FETCH_OBJECT const u8 bufferIndex = GPU->GetDisplayInfo().bufferIndex; - pthread_rwlock_wrlock([[self sharedData] rwlockFramebufferAtIndex:bufferIndex]); + sem_wait([[self sharedData] semaphoreFramebufferAtIndex:bufferIndex]); #endif GPU->ClearWithColor(colorBGRA5551); #ifdef ENABLE_SHARED_FETCH_OBJECT - pthread_rwlock_unlock([[self sharedData] rwlockFramebufferAtIndex:bufferIndex]); + sem_post([[self sharedData] semaphoreFramebufferAtIndex:bufferIndex]); #endif gpuEvent->FramebufferUnlock(); #ifdef ENABLE_SHARED_FETCH_OBJECT - [[self sharedData] signalFetchAtIndex:bufferIndex]; + [[self sharedData] signalFetchAtIndex:bufferIndex message:MESSAGE_FETCH_AND_PUSH_VIDEO]; #endif } @@ -918,15 +918,32 @@ public: return self; } - _rwlockFramebuffer[0] = (pthread_rwlock_t *)malloc(sizeof(pthread_rwlock_t)); - _rwlockFramebuffer[1] = (pthread_rwlock_t *)malloc(sizeof(pthread_rwlock_t)); + _semFramebuffer[0] = sem_open("desmume_semFramebuffer0", O_CREAT | O_EXCL, 0777, 1); + if (_semFramebuffer[0] == SEM_FAILED) + { + sem_unlink("desmume_semFramebuffer0"); + _semFramebuffer[0] = sem_open("desmume_semFramebuffer0", O_CREAT | O_EXCL, 0777, 1); + if (_semFramebuffer[0] == SEM_FAILED) + { + puts("desmume_semFramebuffer0 failed!"); + } + } + + _semFramebuffer[1] = sem_open("desmume_semFramebuffer1", O_CREAT | O_EXCL, 0777, 1); + if (_semFramebuffer[1] == SEM_FAILED) + { + sem_unlink("desmume_semFramebuffer1"); + _semFramebuffer[1] = sem_open("desmume_semFramebuffer1", O_CREAT | O_EXCL, 0777, 1); + if (_semFramebuffer[1] == SEM_FAILED) + { + puts("desmume_semFramebuffer1 failed!"); + } + } - pthread_rwlock_init(_rwlockFramebuffer[0], NULL); - pthread_rwlock_init(_rwlockFramebuffer[1], NULL); pthread_mutex_init(&_mutexDisplayLinkLists, NULL); GPUFetchObject = nil; - _mutexOutputList = NULL; + _rwlockOutputList = NULL; _cdsOutputList = nil; numberViewsUsingDirectToCPUFiltering = 0; @@ -935,7 +952,7 @@ public: [self displayLinkListUpdate]; spinlockFetchSignal = OS_SPINLOCK_INIT; - _isFetchSignalled = NO; + _threadMessageID = MESSAGE_NONE; _fetchIndex = 0; pthread_cond_init(&_condSignalFetch, NULL); pthread_create(&_threadFetch, NULL, &RunFetchThread, self); @@ -980,50 +997,52 @@ public: pthread_mutex_unlock(&_mutexDisplayLinkLists); pthread_mutex_destroy(&_mutexDisplayLinkLists); - pthread_mutex_t *currentMutex = _mutexOutputList; + pthread_rwlock_t *currentRWLock = _rwlockOutputList; - if (currentMutex != NULL) + if (currentRWLock != NULL) { - pthread_mutex_lock(currentMutex); + pthread_rwlock_wrlock(currentRWLock); } [_cdsOutputList release]; - if (currentMutex != NULL) + if (currentRWLock != NULL) { - pthread_mutex_unlock(currentMutex); + pthread_rwlock_unlock(currentRWLock); } - pthread_rwlock_destroy(_rwlockFramebuffer[0]); - pthread_rwlock_destroy(_rwlockFramebuffer[1]); + sem_close(_semFramebuffer[0]); + sem_close(_semFramebuffer[1]); + sem_unlink("desmume_semFramebuffer0"); + sem_unlink("desmume_semFramebuffer1"); [super dealloc]; } -- (pthread_rwlock_t *) rwlockFramebufferAtIndex:(const u8)bufferIndex +- (sem_t *) semaphoreFramebufferAtIndex:(const u8)bufferIndex { - return _rwlockFramebuffer[bufferIndex]; + return _semFramebuffer[bufferIndex]; } -- (void) setOutputList:(NSMutableArray *)theOutputList mutex:(pthread_mutex_t *)theMutex +- (void) setOutputList:(NSMutableArray *)theOutputList rwlock:(pthread_rwlock_t *)theRWLock { - pthread_mutex_t *currentMutex = _mutexOutputList; + pthread_rwlock_t *currentRWLock = _rwlockOutputList; - if (currentMutex != NULL) + if (currentRWLock != NULL) { - pthread_mutex_lock(currentMutex); + pthread_rwlock_wrlock(currentRWLock); } [_cdsOutputList release]; _cdsOutputList = theOutputList; [_cdsOutputList retain]; - if (currentMutex != NULL) + if (currentRWLock != NULL) { - pthread_mutex_unlock(currentMutex); + pthread_rwlock_unlock(currentRWLock); } - _mutexOutputList = theMutex; + _rwlockOutputList = theRWLock; } - (void) incrementViewsUsingDirectToCPUFiltering @@ -1038,11 +1057,11 @@ public: - (void) pushVideoDataToAllDisplayViews { - pthread_mutex_t *currentMutex = _mutexOutputList; + pthread_rwlock_t *currentRWLock = _rwlockOutputList; - if (currentMutex != NULL) + if (currentRWLock != NULL) { - pthread_mutex_lock(currentMutex); + pthread_rwlock_rdlock(currentRWLock); } for (CocoaDSOutput *cdsOutput in _cdsOutputList) @@ -1053,21 +1072,21 @@ public: } } - if (currentMutex != NULL) + if (currentRWLock != NULL) { - pthread_mutex_unlock(currentMutex); + pthread_rwlock_unlock(currentRWLock); } } - (void) flushAllDisplaysOnDisplayLink:(CVDisplayLinkRef)displayLink timeStamp:(const CVTimeStamp *)timeStamp { - pthread_mutex_t *currentMutex = _mutexOutputList; + pthread_rwlock_t *currentRWLock = _rwlockOutputList; CGDirectDisplayID displayID = CVDisplayLinkGetCurrentCGDisplay(displayLink); bool didFlushOccur = false; - if (currentMutex != NULL) + if (currentRWLock != NULL) { - pthread_mutex_lock(currentMutex); + pthread_rwlock_rdlock(currentRWLock); } for (CocoaDSOutput *cdsOutput in _cdsOutputList) @@ -1087,9 +1106,9 @@ public: } } - if (currentMutex != NULL) + if (currentRWLock != NULL) { - pthread_mutex_unlock(currentMutex); + pthread_rwlock_unlock(currentRWLock); } if (didFlushOccur) @@ -1195,12 +1214,12 @@ public: GPUFetchObject->FetchFromBufferIndex(index); } -- (void) signalFetchAtIndex:(uint8_t)index +- (void) signalFetchAtIndex:(uint8_t)index message:(int32_t)messageID { pthread_mutex_lock(&_mutexFetchExecute); _fetchIndex = index; - _isFetchSignalled = YES; + _threadMessageID = messageID; pthread_cond_signal(&_condSignalFetch); pthread_mutex_unlock(&_mutexFetchExecute); @@ -1212,14 +1231,15 @@ public: do { - while (!_isFetchSignalled) + while (_threadMessageID == MESSAGE_NONE) { pthread_cond_wait(&_condSignalFetch, &_mutexFetchExecute); } - _isFetchSignalled = NO; GPUFetchObject->FetchFromBufferIndex(_fetchIndex); [self pushVideoDataToAllDisplayViews]; + _threadMessageID = MESSAGE_NONE; + } while(true); } @@ -1275,7 +1295,7 @@ void GPUEventHandlerOSX::DidFrameBegin(bool isFrameSkipRequested, const u8 targe if (!isFrameSkipRequested) { MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)this->_fetchObject->GetClientData(); - pthread_rwlock_wrlock([sharedViewObject rwlockFramebufferAtIndex:targetBufferIndex]); + sem_wait([sharedViewObject semaphoreFramebufferAtIndex:targetBufferIndex]); } #endif } @@ -1287,7 +1307,7 @@ void GPUEventHandlerOSX::DidFrameEnd(bool isFrameSkipped, const NDSDisplayInfo & if (!isFrameSkipped) { this->_fetchObject->SetFetchDisplayInfo(latestDisplayInfo); - pthread_rwlock_unlock([sharedViewObject rwlockFramebufferAtIndex:latestDisplayInfo.bufferIndex]); + sem_post([sharedViewObject semaphoreFramebufferAtIndex:latestDisplayInfo.bufferIndex]); } #endif @@ -1296,7 +1316,7 @@ void GPUEventHandlerOSX::DidFrameEnd(bool isFrameSkipped, const NDSDisplayInfo & #ifdef ENABLE_SHARED_FETCH_OBJECT if (!isFrameSkipped) { - [sharedViewObject signalFetchAtIndex:latestDisplayInfo.bufferIndex]; + [sharedViewObject signalFetchAtIndex:latestDisplayInfo.bufferIndex message:MESSAGE_FETCH_AND_PUSH_VIDEO]; } #endif } diff --git a/desmume/src/frontend/cocoa/cocoa_core.h b/desmume/src/frontend/cocoa/cocoa_core.h index 8e4841390..b2836244f 100644 --- a/desmume/src/frontend/cocoa/cocoa_core.h +++ b/desmume/src/frontend/cocoa/cocoa_core.h @@ -38,7 +38,7 @@ typedef void *gdbstub_handle_t; typedef struct { CocoaDSCore *cdsCore; - pthread_mutex_t mutexOutputList; + pthread_rwlock_t rwlockOutputList; pthread_mutex_t mutexThreadExecute; pthread_cond_t condThreadExecute; pthread_rwlock_t rwlockCoreExecute; diff --git a/desmume/src/frontend/cocoa/cocoa_core.mm b/desmume/src/frontend/cocoa/cocoa_core.mm index f98657db5..36a7a3c0e 100644 --- a/desmume/src/frontend/cocoa/cocoa_core.mm +++ b/desmume/src/frontend/cocoa/cocoa_core.mm @@ -173,7 +173,7 @@ volatile bool execute = true; threadParam.cdsCore = self; - pthread_mutex_init(&threadParam.mutexOutputList, NULL); + pthread_rwlock_init(&threadParam.rwlockOutputList, NULL); pthread_mutex_init(&threadParam.mutexThreadExecute, NULL); pthread_cond_init(&threadParam.condThreadExecute, NULL); pthread_rwlock_init(&threadParam.rwlockCoreExecute, NULL); @@ -198,7 +198,7 @@ volatile bool execute = true; sp.sched_priority = sched_get_priority_max(thePolicy); pthread_setschedparam(coreThread, thePolicy, &sp); - [cdsGPU setOutputList:cdsOutputList mutexPtr:&threadParam.mutexOutputList]; + [cdsGPU setOutputList:cdsOutputList rwlock:&threadParam.rwlockOutputList]; OSXDriver *newDriver = new OSXDriver; newDriver->SetCoreThreadMutexLock(&threadParam.mutexThreadExecute); @@ -231,7 +231,7 @@ volatile bool execute = true; pthread_mutex_destroy(&threadParam.mutexThreadExecute); pthread_cond_destroy(&threadParam.condThreadExecute); - pthread_mutex_destroy(&threadParam.mutexOutputList); + pthread_rwlock_destroy(&threadParam.rwlockOutputList); pthread_rwlock_destroy(&threadParam.rwlockCoreExecute); [self setIsGdbStubStarted:NO]; @@ -644,7 +644,7 @@ volatile bool execute = true; execControl->SetExecutionBehavior((ExecutionBehavior)coreState); - pthread_mutex_lock(&threadParam.mutexOutputList); + pthread_rwlock_rdlock(&threadParam.rwlockOutputList); switch ((ExecutionBehavior)coreState) { @@ -718,7 +718,7 @@ volatile bool execute = true; break; } - pthread_mutex_unlock(&threadParam.mutexOutputList); + pthread_rwlock_unlock(&threadParam.rwlockOutputList); pthread_cond_signal(&threadParam.condThreadExecute); pthread_mutex_unlock(&threadParam.mutexThreadExecute); @@ -878,6 +878,8 @@ volatile bool execute = true; // count every other instance the timer fires. _isTimerAtSecond = !_isTimerAtSecond; + pthread_rwlock_rdlock(&threadParam.rwlockOutputList); + for (CocoaDSOutput *cdsOutput in cdsOutputList) { if ([cdsOutput isKindOfClass:[CocoaDSDisplay class]]) @@ -888,6 +890,8 @@ volatile bool execute = true; } } } + + pthread_rwlock_unlock(&threadParam.rwlockOutputList); } - (NSUInteger) frameNumber @@ -897,7 +901,7 @@ volatile bool execute = true; - (void) addOutput:(CocoaDSOutput *)theOutput { - pthread_mutex_lock(&threadParam.mutexOutputList); + pthread_rwlock_wrlock(&threadParam.rwlockOutputList); if ([theOutput isKindOfClass:[CocoaDSDisplay class]]) { @@ -909,21 +913,21 @@ volatile bool execute = true; } [[self cdsOutputList] addObject:theOutput]; - pthread_mutex_unlock(&threadParam.mutexOutputList); + pthread_rwlock_unlock(&threadParam.rwlockOutputList); } - (void) removeOutput:(CocoaDSOutput *)theOutput { - pthread_mutex_lock(&threadParam.mutexOutputList); + pthread_rwlock_wrlock(&threadParam.rwlockOutputList); [[self cdsOutputList] removeObject:theOutput]; - pthread_mutex_unlock(&threadParam.mutexOutputList); + pthread_rwlock_unlock(&threadParam.rwlockOutputList); } - (void) removeAllOutputs { - pthread_mutex_lock(&threadParam.mutexOutputList); + pthread_rwlock_wrlock(&threadParam.rwlockOutputList); [[self cdsOutputList] removeAllObjects]; - pthread_mutex_unlock(&threadParam.mutexOutputList); + pthread_rwlock_unlock(&threadParam.rwlockOutputList); } - (NSString *) cpuEmulationEngineString @@ -1200,7 +1204,7 @@ static void* RunCoreThread(void *arg) executionSpeedAverageFramesCollected = 0.0; } - pthread_mutex_lock(¶m->mutexOutputList); + pthread_rwlock_rdlock(¶m->rwlockOutputList); switch (behavior) { @@ -1227,7 +1231,7 @@ static void* RunCoreThread(void *arg) break; } - pthread_mutex_unlock(¶m->mutexOutputList); + pthread_rwlock_unlock(¶m->rwlockOutputList); switch (behavior) { diff --git a/desmume/src/frontend/cocoa/cocoa_globals.h b/desmume/src/frontend/cocoa/cocoa_globals.h index 742caa065..ceea51afd 100644 --- a/desmume/src/frontend/cocoa/cocoa_globals.h +++ b/desmume/src/frontend/cocoa/cocoa_globals.h @@ -334,6 +334,8 @@ enum */ enum { + MESSAGE_NONE = 0, + MESSAGE_CHECK_FOR_RESPONSE = 100, // Message to check if a port is responding. Usually sent to make sure that a thread is alive. MESSAGE_CHECK_RESPONSE_ECHO, // Response message when another port sends MESSAGE_CHECK_FOR_RESPONSE. Sent to confirm that a thread is indeed alive. MESSAGE_EXIT_THREAD, // Sent whenever there is a need to stop a thread. diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h index 2ddb4129b..6a4942594 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h @@ -21,6 +21,7 @@ #import #import #include +#include #import "DisplayViewCALayer.h" #import "../cocoa_GPU.h" @@ -39,8 +40,6 @@ struct MetalProcessedFrameInfo { uint8_t bufferIndex; id tex[2]; - bool isMainDisplayProcessed; - bool isTouchDisplayProcessed; }; typedef struct MetalProcessedFrameInfo MetalProcessedFrameInfo; @@ -65,6 +64,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; id _fetch888Pipeline; id _fetch555ConvertOnlyPipeline; id _fetch666ConvertOnlyPipeline; + id _fetch888PassthroughOnlyPipeline; id deposterizePipeline; id hudPipeline; id hudRGBAPipeline; @@ -185,8 +185,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; BOOL needsScreenVerticesUpdate; BOOL needsHUDVerticesUpdate; - pthread_mutex_t _mutexTexProcessUpdate; - pthread_mutex_t _mutexBufferUpdate; + sem_t *_semTexProcessUpdate; bool _needEncodeViewport; MTLViewport _newViewport; bool _willDrawHUD; @@ -200,8 +199,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; @property (readonly, nonatomic) ClientDisplay3DPresenter *cdp; @property (assign, nonatomic) MetalDisplayViewSharedData *sharedData; @property (readonly, nonatomic) MTLRenderPassColorAttachmentDescriptor *colorAttachment0Desc; -@property (readonly, nonatomic) pthread_mutex_t *mutexTexProcessUpdate; -@property (readonly, nonatomic) pthread_mutex_t *mutexBufferUpdate; +@property (readonly, nonatomic) sem_t *semTexProcessUpdate; @property (retain) id pixelScalePipeline; @property (retain) id outputRGBAPipeline; @property (retain) id outputDrawablePipeline; @@ -225,6 +223,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; - (void) resizeCPUPixelScalerUsingFilterID:(const VideoFilterTypeID)filterID; - (void) copyHUDFontUsingFace:(const FT_Face &)fontFace size:(const size_t)glyphSize tileSize:(const size_t)glyphTileSize info:(GlyphInfo *)glyphInfo; - (void) processDisplays; +- (void) updateTexCoordBuffer; - (void) updateRenderBuffers; - (void) renderForCommandBuffer:(id)cb outputPipelineState:(id)outputPipelineState @@ -282,7 +281,7 @@ private: protected: MacMetalDisplayPresenterObject *_presenterObject; pthread_mutex_t _mutexProcessPtr; - pthread_rwlock_t _cpuFilterRWLock[2][2]; + sem_t *_semCPUFilter[2][2]; virtual void _UpdateNormalSize(); virtual void _UpdateOrder(); @@ -299,7 +298,7 @@ public: MacMetalDisplayPresenterObject* GetPresenterObject() const; pthread_mutex_t* GetMutexProcessPtr(); - pthread_rwlock_t* GetCPUFilterRWLock(const NDSDisplayID displayID, const uint8_t bufferIndex); + sem_t* GetCPUFilterSemaphore(const NDSDisplayID displayID, const uint8_t bufferIndex); virtual void Init(); virtual void SetSharedData(MacClientSharedObject *sharedObject); @@ -312,7 +311,6 @@ public: // Client view interface virtual void ProcessDisplays(); - virtual void UpdateLayout(); virtual void CopyFrameToBuffer(uint32_t *dstBuffer); }; diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm index fdb7707a5..0f791ae29 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm @@ -17,6 +17,13 @@ #include "MacMetalDisplayView.h" +#include +#include +#include +#include + +#include "../cocoa_globals.h" + #include "../../../common.h" @implementation MetalDisplayViewSharedData @@ -71,6 +78,7 @@ _fetch888Pipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch888"] error:nil] retain]; _fetch555ConvertOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch555ConvertOnly"] error:nil] retain]; _fetch666ConvertOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666ConvertOnly"] error:nil] retain]; + _fetch888PassthroughOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch888PassthroughOnly"] error:nil] retain]; deposterizePipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"src_filter_deposterize"] error:nil] retain]; if ( IsOSXVersion(10, 13, 0) || IsOSXVersion(10, 13, 1) || IsOSXVersion(10, 13, 2) || IsOSXVersion(10, 13, 3) || IsOSXVersion(10, 13, 4) ) @@ -249,6 +257,7 @@ [_fetch888Pipeline release]; [_fetch555ConvertOnlyPipeline release]; [_fetch666ConvertOnlyPipeline release]; + [_fetch888PassthroughOnlyPipeline release]; [deposterizePipeline release]; [hudPipeline release]; [hudRGBAPipeline release]; @@ -585,72 +594,69 @@ isUsingFramebufferDirectlyTouch = false; } } - else if (currentDisplayInfo.colorFormat != NDSColorFormat_BGR888_Rev) + else { - bool isPipelineStateSet = false; - if (currentDisplayInfo.colorFormat == NDSColorFormat_BGR555_Rev) { // 16-bit textures aren't handled natively in Metal for macOS, so we need to explicitly convert to 32-bit here. [cce setComputePipelineState:_fetch555ConvertOnlyPipeline]; - isPipelineStateSet = true; } else if ( (currentDisplayInfo.colorFormat == NDSColorFormat_BGR666_Rev) && (currentDisplayInfo.needConvertColorFormat[NDSDisplayID_Main] || currentDisplayInfo.needConvertColorFormat[NDSDisplayID_Touch]) ) { [cce setComputePipelineState:_fetch666ConvertOnlyPipeline]; - isPipelineStateSet = true; + } + else + { + [cce setComputePipelineState:_fetch888PassthroughOnlyPipeline]; } - if (isPipelineStateSet) + if (isMainEnabled) { - if (isMainEnabled) + if (!currentDisplayInfo.didPerformCustomRender[NDSDisplayID_Main]) { - if (!currentDisplayInfo.didPerformCustomRender[NDSDisplayID_Main]) - { - [cce setTexture:_texDisplayFetchNative[NDSDisplayID_Main][index] atIndex:0]; - [cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Main][index] atIndex:1]; - [cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative - threadsPerThreadgroup:_fetchThreadsPerGroup]; - - texFetchTargetMain = _texDisplayPostprocessNative[NDSDisplayID_Main][index]; - } - else - { - [cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Main][index] atIndex:0]; - [cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Main][index] atIndex:1]; - [cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom - threadsPerThreadgroup:_fetchThreadsPerGroup]; - - texFetchTargetMain = _texDisplayPostprocessCustom[NDSDisplayID_Main][index]; - } + [cce setTexture:_texDisplayFetchNative[NDSDisplayID_Main][index] atIndex:0]; + [cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Main][index] atIndex:1]; + [cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative + threadsPerThreadgroup:_fetchThreadsPerGroup]; - isUsingFramebufferDirectlyMain = false; + texFetchTargetMain = _texDisplayPostprocessNative[NDSDisplayID_Main][index]; + } + else + { + [cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Main][index] atIndex:0]; + [cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Main][index] atIndex:1]; + [cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom + threadsPerThreadgroup:_fetchThreadsPerGroup]; + + texFetchTargetMain = _texDisplayPostprocessCustom[NDSDisplayID_Main][index]; } - if (isTouchEnabled) + isUsingFramebufferDirectlyMain = false; + } + + if (isTouchEnabled) + { + if (!currentDisplayInfo.didPerformCustomRender[NDSDisplayID_Touch]) { - if (!currentDisplayInfo.didPerformCustomRender[NDSDisplayID_Touch]) - { - [cce setTexture:_texDisplayFetchNative[NDSDisplayID_Touch][index] atIndex:0]; - [cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Touch][index] atIndex:1]; - [cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative - threadsPerThreadgroup:_fetchThreadsPerGroup]; - - texFetchTargetTouch = _texDisplayPostprocessNative[NDSDisplayID_Touch][index]; - } - else - { - [cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Touch][index] atIndex:0]; - [cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Touch][index] atIndex:1]; - [cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom - threadsPerThreadgroup:_fetchThreadsPerGroup]; - - texFetchTargetTouch = _texDisplayPostprocessCustom[NDSDisplayID_Touch][index]; - } + [cce setTexture:_texDisplayFetchNative[NDSDisplayID_Touch][index] atIndex:0]; + [cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Touch][index] atIndex:1]; + [cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative + threadsPerThreadgroup:_fetchThreadsPerGroup]; - isUsingFramebufferDirectlyTouch = false; + texFetchTargetTouch = _texDisplayPostprocessNative[NDSDisplayID_Touch][index]; } + else + { + [cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Touch][index] atIndex:0]; + [cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Touch][index] atIndex:1]; + [cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom + threadsPerThreadgroup:_fetchThreadsPerGroup]; + + texFetchTargetTouch = _texDisplayPostprocessCustom[NDSDisplayID_Touch][index]; + } + + isUsingFramebufferDirectlyTouch = false; } } @@ -666,7 +672,7 @@ - (void) fetchFromBufferIndex:(const u8)index { - pthread_rwlock_rdlock([self rwlockFramebufferAtIndex:index]); + sem_wait([self semaphoreFramebufferAtIndex:index]); id cb = [commandQueue commandBufferWithUnretainedReferences]; _fetchEncoder = [cb blitCommandEncoder]; @@ -680,18 +686,17 @@ if (index == 0) { [cb addCompletedHandler:^(id block) { - pthread_rwlock_unlock([self rwlockFramebufferAtIndex:0]); + sem_post([self semaphoreFramebufferAtIndex:0]); }]; } else { [cb addCompletedHandler:^(id block) { - pthread_rwlock_unlock([self rwlockFramebufferAtIndex:1]); + sem_post([self semaphoreFramebufferAtIndex:1]); }]; } [cb commit]; - [cb waitUntilScheduled]; } - (void) fetchNativeDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex @@ -745,8 +750,7 @@ @synthesize cdp; @synthesize sharedData; @synthesize colorAttachment0Desc; -@dynamic mutexTexProcessUpdate; -@dynamic mutexBufferUpdate; +@dynamic semTexProcessUpdate; @synthesize pixelScalePipeline; @synthesize outputRGBAPipeline; @synthesize outputDrawablePipeline; @@ -825,11 +829,17 @@ _processedFrameInfo.bufferIndex = 0; _processedFrameInfo.tex[NDSDisplayID_Main] = nil; _processedFrameInfo.tex[NDSDisplayID_Touch] = nil; - _processedFrameInfo.isMainDisplayProcessed = false; - _processedFrameInfo.isTouchDisplayProcessed = false; - pthread_mutex_init(&_mutexTexProcessUpdate, NULL); - pthread_mutex_init(&_mutexBufferUpdate, NULL); + _semTexProcessUpdate = sem_open("desmume_semTexProcessUpdate", O_CREAT | O_EXCL, 0777, 1); + if (_semTexProcessUpdate == SEM_FAILED) + { + sem_unlink("desmume_semTexProcessUpdate"); + _semTexProcessUpdate = sem_open("desmume_semTexProcessUpdate", O_CREAT | O_EXCL, 0777, 1); + if (_semTexProcessUpdate == SEM_FAILED) + { + puts("desmume_semTexProcessUpdate failed!"); + } + } return self; } @@ -868,20 +878,15 @@ [self setSharedData:nil]; - pthread_mutex_destroy(&_mutexTexProcessUpdate); - pthread_mutex_destroy(&_mutexBufferUpdate); + sem_close(_semTexProcessUpdate); + sem_unlink("desmume_semTexProcessUpdate"); [super dealloc]; } -- (pthread_mutex_t *) mutexTexProcessUpdate +- (sem_t *) semTexProcessUpdate { - return &_mutexTexProcessUpdate; -} - -- (pthread_mutex_t *) mutexBufferUpdate -{ - return &_mutexBufferUpdate; + return _semTexProcessUpdate; } - (VideoFilterTypeID) pixelScaler @@ -1314,27 +1319,15 @@ id texMain = (selectedDisplaySource[NDSDisplayID_Main] == NDSDisplayID_Main) ? [sharedData texFetchMain] : [sharedData texFetchTouch]; id texTouch = (selectedDisplaySource[NDSDisplayID_Touch] == NDSDisplayID_Touch) ? [sharedData texFetchTouch] : [sharedData texFetchMain]; - bool isDisplayProcessedMain = ![sharedData isUsingFramebufferDirectlyAtIndex:bufferIndex displayID:selectedDisplaySource[NDSDisplayID_Main]]; - bool isDisplayProcessedTouch = ![sharedData isUsingFramebufferDirectlyAtIndex:bufferIndex displayID:selectedDisplaySource[NDSDisplayID_Touch]]; - if ( (fetchDisplayInfo.pixelBytes != 0) && (useDeposterize || (cdp->GetPixelScaler() != VideoFilterTypeID_None)) ) { const bool willFilterOnGPU = cdp->WillFilterOnGPU(); const bool shouldProcessDisplay[2] = { (!fetchDisplayInfo.didPerformCustomRender[selectedDisplaySource[NDSDisplayID_Main]] || !fetchDisplayInfo.isCustomSizeRequested) && cdp->IsSelectedDisplayEnabled(NDSDisplayID_Main) && (mode == ClientDisplayMode_Main || mode == ClientDisplayMode_Dual), (!fetchDisplayInfo.didPerformCustomRender[selectedDisplaySource[NDSDisplayID_Touch]] || !fetchDisplayInfo.isCustomSizeRequested) && cdp->IsSelectedDisplayEnabled(NDSDisplayID_Touch) && (mode == ClientDisplayMode_Touch || mode == ClientDisplayMode_Dual) && (selectedDisplaySource[NDSDisplayID_Main] != selectedDisplaySource[NDSDisplayID_Touch]) }; - bool texFetchMainNeedsLock = (useDeposterize || ((cdp->GetPixelScaler() != VideoFilterTypeID_None) && willFilterOnGPU)) && shouldProcessDisplay[NDSDisplayID_Main]; - bool texFetchTouchNeedsLock = (useDeposterize || ((cdp->GetPixelScaler() != VideoFilterTypeID_None) && willFilterOnGPU)) && shouldProcessDisplay[NDSDisplayID_Touch]; - bool needsFetchBuffersLock = texFetchMainNeedsLock || texFetchTouchNeedsLock; - id cb = [self newCommandBuffer]; id cce = [cb computeCommandEncoder]; - if (needsFetchBuffersLock) - { - pthread_rwlock_rdlock([sharedData rwlockFramebufferAtIndex:bufferIndex]); - } - // Run the video source filters and the pixel scalers if (useDeposterize) { @@ -1353,12 +1346,10 @@ threadsPerThreadgroup:[sharedData deposterizeThreadsPerGroup]]; texMain = _texDisplaySrcDeposterize[NDSDisplayID_Main][1]; - isDisplayProcessedMain = true; if (selectedDisplaySource[NDSDisplayID_Main] == selectedDisplaySource[NDSDisplayID_Touch]) { texTouch = texMain; - isDisplayProcessedTouch = true; } } @@ -1375,35 +1366,6 @@ threadsPerThreadgroup:[sharedData deposterizeThreadsPerGroup]]; texTouch = _texDisplaySrcDeposterize[NDSDisplayID_Touch][1]; - isDisplayProcessedTouch = true; - } - - if (needsFetchBuffersLock) - { - needsFetchBuffersLock = !isDisplayProcessedMain || !isDisplayProcessedTouch; - - [cce endEncoding]; - - if (!needsFetchBuffersLock) - { - if (bufferIndex == 0) - { - [cb addCompletedHandler:^(id block) { - pthread_rwlock_unlock([sharedData rwlockFramebufferAtIndex:0]); - }]; - } - else - { - [cb addCompletedHandler:^(id block) { - pthread_rwlock_unlock([sharedData rwlockFramebufferAtIndex:1]); - }]; - } - } - - [cb commit]; - - cb = [self newCommandBuffer]; - cce = [cb computeCommandEncoder]; } } @@ -1424,14 +1386,12 @@ texMain = [self texDisplayPixelScaleMain]; width[NDSDisplayID_Main] = [[self texDisplayPixelScaleMain] width]; height[NDSDisplayID_Main] = [[self texDisplayPixelScaleMain] height]; - isDisplayProcessedMain = true; if (selectedDisplaySource[NDSDisplayID_Main] == selectedDisplaySource[NDSDisplayID_Touch]) { texTouch = texMain; width[NDSDisplayID_Touch] = width[NDSDisplayID_Main]; height[NDSDisplayID_Touch] = height[NDSDisplayID_Main]; - isDisplayProcessedTouch = true; } } @@ -1445,32 +1405,6 @@ texTouch = [self texDisplayPixelScaleTouch]; width[NDSDisplayID_Touch] = [[self texDisplayPixelScaleTouch] width]; height[NDSDisplayID_Touch] = [[self texDisplayPixelScaleTouch] height]; - isDisplayProcessedTouch = true; - } - - if (needsFetchBuffersLock) - { - needsFetchBuffersLock = false; - - [cce endEncoding]; - - if (bufferIndex == 0) - { - [cb addCompletedHandler:^(id block) { - pthread_rwlock_unlock([sharedData rwlockFramebufferAtIndex:0]); - }]; - } - else - { - [cb addCompletedHandler:^(id block) { - pthread_rwlock_unlock([sharedData rwlockFramebufferAtIndex:1]); - }]; - } - - [cb commit]; - - cb = [self newCommandBuffer]; - cce = [cb computeCommandEncoder]; } } @@ -1529,7 +1463,7 @@ if (shouldProcessDisplay[NDSDisplayID_Main] && ([self texDisplayPixelScaleMain] != nil)) { - pthread_rwlock_rdlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Main, bufferIndex)); + sem_wait(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Main, bufferIndex)); needsCPUFilterUnlockMain = true; vfMain->RunFilter(); @@ -1537,7 +1471,7 @@ { [[self bufCPUFilterDstMain] didModifyRange:NSMakeRange(0, vfMain->GetDstWidth() * vfMain->GetDstHeight() * sizeof(uint32_t))]; needsCPUFilterUnlockMain = false; - pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Main, bufferIndex)); + sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Main, bufferIndex)); } [bce copyFromBuffer:[self bufCPUFilterDstMain] @@ -1553,20 +1487,18 @@ texMain = [self texDisplayPixelScaleMain]; width[NDSDisplayID_Main] = [[self texDisplayPixelScaleMain] width]; height[NDSDisplayID_Main] = [[self texDisplayPixelScaleMain] height]; - isDisplayProcessedMain = true; if (selectedDisplaySource[NDSDisplayID_Main] == selectedDisplaySource[NDSDisplayID_Touch]) { texTouch = texMain; width[NDSDisplayID_Touch] = width[NDSDisplayID_Main]; height[NDSDisplayID_Touch] = height[NDSDisplayID_Main]; - isDisplayProcessedTouch = true; } } if (shouldProcessDisplay[NDSDisplayID_Touch] && ([self texDisplayPixelScaleTouch] != nil)) { - pthread_rwlock_rdlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Touch, bufferIndex)); + sem_wait(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Touch, bufferIndex)); needsCPUFilterUnlockTouch = true; vfTouch->RunFilter(); @@ -1574,7 +1506,7 @@ { [[self bufCPUFilterDstTouch] didModifyRange:NSMakeRange(0, vfTouch->GetDstWidth() * vfTouch->GetDstHeight() * sizeof(uint32_t))]; needsCPUFilterUnlockTouch = false; - pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Touch, bufferIndex)); + sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Touch, bufferIndex)); } [bce copyFromBuffer:[self bufCPUFilterDstTouch] @@ -1590,7 +1522,6 @@ texTouch = [self texDisplayPixelScaleTouch]; width[NDSDisplayID_Touch] = [[self texDisplayPixelScaleTouch] width]; height[NDSDisplayID_Touch] = [[self texDisplayPixelScaleTouch] height]; - isDisplayProcessedTouch = true; } [bce endEncoding]; @@ -1605,15 +1536,15 @@ if (bufferIndex == 0) { [cb addCompletedHandler:^(id block) { - pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Main, 0)); - pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Touch, 0)); + sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Main, 0)); + sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Touch, 0)); }]; } else { [cb addCompletedHandler:^(id block) { - pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Main, 1)); - pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Touch, 1)); + sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Main, 1)); + sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Touch, 1)); }]; } } @@ -1622,13 +1553,13 @@ if (bufferIndex == 0) { [cb addCompletedHandler:^(id block) { - pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Main, 0)); + sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Main, 0)); }]; } else { [cb addCompletedHandler:^(id block) { - pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Main, 1)); + sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Main, 1)); }]; } } @@ -1638,13 +1569,13 @@ if (bufferIndex == 0) { [cb addCompletedHandler:^(id block) { - pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Touch, 0)); + sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Touch, 0)); }]; } else { [cb addCompletedHandler:^(id block) { - pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Touch, 1)); + sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Touch, 1)); }]; } } @@ -1653,27 +1584,30 @@ } // Update the texture coordinates - pthread_mutex_lock(&_mutexTexProcessUpdate); - - cdp->SetScreenTextureCoordinates((float)width[NDSDisplayID_Main], (float)height[NDSDisplayID_Main], - (float)width[NDSDisplayID_Touch], (float)height[NDSDisplayID_Touch], - (float *)[_displayTexCoordBuffer contents]); - [_displayTexCoordBuffer didModifyRange:NSMakeRange(0, sizeof(float) * (4 * 8))]; + sem_wait(_semTexProcessUpdate); // Update the frame info id oldDisplayProcessedMain = _processedFrameInfo.tex[NDSDisplayID_Main]; id oldDisplayProcessedTouch = _processedFrameInfo.tex[NDSDisplayID_Touch]; _processedFrameInfo.bufferIndex = bufferIndex; - _processedFrameInfo.isMainDisplayProcessed = isDisplayProcessedMain; - _processedFrameInfo.isTouchDisplayProcessed = isDisplayProcessedTouch; _processedFrameInfo.tex[NDSDisplayID_Main] = [texMain retain]; _processedFrameInfo.tex[NDSDisplayID_Touch] = [texTouch retain]; + [self updateTexCoordBuffer]; + [oldDisplayProcessedMain release]; [oldDisplayProcessedTouch release]; - pthread_mutex_unlock(&_mutexTexProcessUpdate); + sem_post(_semTexProcessUpdate); +} + +- (void) updateTexCoordBuffer +{ + cdp->SetScreenTextureCoordinates((float)[_processedFrameInfo.tex[NDSDisplayID_Main] width], (float)[_processedFrameInfo.tex[NDSDisplayID_Main] height], + (float)[_processedFrameInfo.tex[NDSDisplayID_Touch] width], (float)[_processedFrameInfo.tex[NDSDisplayID_Touch] height], + (float *)[_displayTexCoordBuffer contents]); + [_displayTexCoordBuffer didModifyRange:NSMakeRange(0, sizeof(float) * (4 * 8))]; } - (void) updateRenderBuffers @@ -1690,8 +1624,6 @@ newViewport.znear = 0.0; newViewport.zfar = 1.0; - pthread_mutex_lock(&_mutexBufferUpdate); - if ([self needsViewportUpdate]) { needEncodeViewport = true; @@ -1790,8 +1722,6 @@ _willDrawHUDInput = cdp->GetHUDShowInput(); _hudStringLength = cdp->GetHUDString().length(); _hudTouchLineLength = hudTouchLineLength; - - pthread_mutex_unlock(&_mutexBufferUpdate); } - (void) renderForCommandBuffer:(id)cb @@ -1943,6 +1873,17 @@ const size_t clientWidth = cdp->GetPresenterProperties().clientWidth; const size_t clientHeight = cdp->GetPresenterProperties().clientHeight; + // Create a unique semaphore name based on mach_absolute_time(). + char semaphoreName[64]; + memset(semaphoreName, '\0', sizeof(semaphoreName)); + snprintf(semaphoreName, sizeof(semaphoreName), "desmume_semRenderToBuffer_0x%016llX", (unsigned long long)mach_absolute_time()); + + sem_t *semRenderToBuffer = sem_open(semaphoreName, O_CREAT, 0777, 1); + if (semRenderToBuffer == SEM_FAILED) + { + puts("desmume_semRenderToBuffer failed!"); + } + @autoreleasepool { MTLTextureDescriptor *texRenderDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm @@ -1956,19 +1897,13 @@ id texRender = [[sharedData device] newTextureWithDescriptor:texRenderDesc]; id dstMTLBuffer = [[sharedData device] newBufferWithLength:clientWidth * clientHeight * sizeof(uint32_t) options:MTLResourceStorageModeManaged]; - pthread_mutex_lock(&_mutexTexProcessUpdate); - - const bool needsFetchBuffersLock = !_processedFrameInfo.isMainDisplayProcessed || !_processedFrameInfo.isTouchDisplayProcessed; - if (needsFetchBuffersLock) - { - pthread_rwlock_rdlock([sharedData rwlockFramebufferAtIndex:_processedFrameInfo.bufferIndex]); - } + sem_wait(_semTexProcessUpdate); // Now that everything is set up, go ahead and draw everything. [colorAttachment0Desc setTexture:texRender]; id cb = [self newCommandBuffer]; - pthread_mutex_lock(&_mutexBufferUpdate); + [self updateRenderBuffers]; [self renderForCommandBuffer:cb outputPipelineState:[self outputRGBAPipeline] @@ -1976,35 +1911,13 @@ texDisplayMain:_processedFrameInfo.tex[NDSDisplayID_Main] texDisplayTouch:_processedFrameInfo.tex[NDSDisplayID_Touch]]; - if (needsFetchBuffersLock) - { - if (_processedFrameInfo.bufferIndex == 0) - { - [cb addCompletedHandler:^(id block) { - pthread_mutex_unlock(&_mutexBufferUpdate); - pthread_rwlock_unlock([sharedData rwlockFramebufferAtIndex:0]); - pthread_mutex_unlock(&_mutexTexProcessUpdate); - }]; - } - else - { - [cb addCompletedHandler:^(id block) { - pthread_mutex_unlock(&_mutexBufferUpdate); - pthread_rwlock_unlock([sharedData rwlockFramebufferAtIndex:1]); - pthread_mutex_unlock(&_mutexTexProcessUpdate); - }]; - } - } - else - { - [cb addCompletedHandler:^(id block) { - pthread_mutex_unlock(&_mutexBufferUpdate); - pthread_mutex_unlock(&_mutexTexProcessUpdate); - }]; - } - + [cb addCompletedHandler:^(id block) { + sem_post(_semTexProcessUpdate); + }]; [cb commit]; + sem_wait(semRenderToBuffer); + cb = [self newCommandBuffer]; id bce = [cb blitCommandEncoder]; @@ -2021,14 +1934,23 @@ [bce synchronizeResource:dstMTLBuffer]; [bce endEncoding]; + [cb addCompletedHandler:^(id block) { + sem_post(semRenderToBuffer); + }]; [cb commit]; - [cb waitUntilCompleted]; + + // Wait on this thread until the GPU completes its task, then continue execution on this thread. + sem_wait(semRenderToBuffer); + sem_post(semRenderToBuffer); memcpy(dstBuffer, [dstMTLBuffer contents], clientWidth * clientHeight * sizeof(uint32_t)); [texRender release]; [dstMTLBuffer release]; } + + sem_close(semRenderToBuffer); + sem_unlink(semaphoreName); } @end @@ -2073,20 +1995,16 @@ { @autoreleasepool { - pthread_mutex_lock([presenterObject mutexTexProcessUpdate]); + sem_wait([presenterObject semTexProcessUpdate]); const MetalProcessedFrameInfo &processedInfo = [presenterObject processedFrameInfo]; - const bool needsFetchBuffersLock = !processedInfo.isMainDisplayProcessed || !processedInfo.isTouchDisplayProcessed; - if (needsFetchBuffersLock) - { - pthread_rwlock_rdlock([[presenterObject sharedData] rwlockFramebufferAtIndex:processedInfo.bufferIndex]); - } // Now that everything is set up, go ahead and draw everything. id layerDrawable = [self nextDrawable]; [[presenterObject colorAttachment0Desc] setTexture:[layerDrawable texture]]; id cb = [presenterObject newCommandBuffer]; - pthread_mutex_lock([presenterObject mutexBufferUpdate]); + + [presenterObject updateRenderBuffers]; [presenterObject renderForCommandBuffer:cb outputPipelineState:[presenterObject outputDrawablePipeline] @@ -2096,32 +2014,9 @@ [cb presentDrawable:layerDrawable]; - if (needsFetchBuffersLock) - { - if (processedInfo.bufferIndex == 0) - { - [cb addCompletedHandler:^(id block) { - pthread_mutex_unlock([presenterObject mutexBufferUpdate]); - pthread_rwlock_unlock([[presenterObject sharedData] rwlockFramebufferAtIndex:0]); - pthread_mutex_unlock([presenterObject mutexTexProcessUpdate]); - }]; - } - else - { - [cb addCompletedHandler:^(id block) { - pthread_mutex_unlock([presenterObject mutexBufferUpdate]); - pthread_rwlock_unlock([[presenterObject sharedData] rwlockFramebufferAtIndex:1]); - pthread_mutex_unlock([presenterObject mutexTexProcessUpdate]); - }]; - } - } - else - { - [cb addCompletedHandler:^(id block) { - pthread_mutex_unlock([presenterObject mutexBufferUpdate]); - pthread_mutex_unlock([presenterObject mutexTexProcessUpdate]); - }]; - } + [cb addCompletedHandler:^(id block) { + sem_post([presenterObject semTexProcessUpdate]); + }]; [cb commit]; } @@ -2265,10 +2160,14 @@ MacMetalDisplayPresenter::~MacMetalDisplayPresenter() pthread_mutex_destroy(&this->_mutexProcessPtr); - pthread_rwlock_destroy(&this->_cpuFilterRWLock[NDSDisplayID_Main][0]); - pthread_rwlock_destroy(&this->_cpuFilterRWLock[NDSDisplayID_Touch][0]); - pthread_rwlock_destroy(&this->_cpuFilterRWLock[NDSDisplayID_Main][1]); - pthread_rwlock_destroy(&this->_cpuFilterRWLock[NDSDisplayID_Touch][1]); + sem_close(this->_semCPUFilter[NDSDisplayID_Main][0]); + sem_close(this->_semCPUFilter[NDSDisplayID_Main][1]); + sem_close(this->_semCPUFilter[NDSDisplayID_Touch][0]); + sem_close(this->_semCPUFilter[NDSDisplayID_Touch][1]); + sem_unlink("desmume_semCPUFilterMain0"); + sem_unlink("desmume_semCPUFilterMain1"); + sem_unlink("desmume_semCPUFilterTouch0"); + sem_unlink("desmume_semCPUFilterTouch1"); } void MacMetalDisplayPresenter::__InstanceInit(MacClientSharedObject *sharedObject) @@ -2286,10 +2185,49 @@ void MacMetalDisplayPresenter::__InstanceInit(MacClientSharedObject *sharedObjec pthread_mutex_init(&_mutexProcessPtr, NULL); - pthread_rwlock_init(&_cpuFilterRWLock[NDSDisplayID_Main][0], NULL); - pthread_rwlock_init(&_cpuFilterRWLock[NDSDisplayID_Touch][0], NULL); - pthread_rwlock_init(&_cpuFilterRWLock[NDSDisplayID_Main][1], NULL); - pthread_rwlock_init(&_cpuFilterRWLock[NDSDisplayID_Touch][1], NULL); + _semCPUFilter[NDSDisplayID_Main][0] = sem_open("desmume_semCPUFilterMain0", O_CREAT | O_EXCL, 0777, 1); + if (_semCPUFilter[NDSDisplayID_Main][0] == SEM_FAILED) + { + sem_unlink("desmume_semCPUFilterMain0"); + _semCPUFilter[NDSDisplayID_Main][0] = sem_open("desmume_semCPUFilterMain0", O_CREAT | O_EXCL, 0777, 1); + if (_semCPUFilter[NDSDisplayID_Main][0] == SEM_FAILED) + { + puts("desmume_semCPUFilterMain0 failed!"); + } + } + + _semCPUFilter[NDSDisplayID_Main][1] = sem_open("desmume_semCPUFilterMain1", O_CREAT | O_EXCL, 0777, 1); + if (_semCPUFilter[NDSDisplayID_Main][1] == SEM_FAILED) + { + sem_unlink("desmume_semCPUFilterMain1"); + _semCPUFilter[NDSDisplayID_Main][1] = sem_open("desmume_semCPUFilterMain1", O_CREAT | O_EXCL, 0777, 1); + if (_semCPUFilter[NDSDisplayID_Main][1] == SEM_FAILED) + { + puts("desmume_semCPUFilterMain1 failed!"); + } + } + + _semCPUFilter[NDSDisplayID_Touch][0] = sem_open("desmume_semCPUFilterTouch0", O_CREAT | O_EXCL, 0777, 1); + if (_semCPUFilter[NDSDisplayID_Touch][0] == SEM_FAILED) + { + sem_unlink("desmume_semCPUFilterTouch0"); + _semCPUFilter[NDSDisplayID_Touch][0] = sem_open("desmume_semCPUFilterTouch0", O_CREAT | O_EXCL, 0777, 1); + if (_semCPUFilter[NDSDisplayID_Touch][0] == SEM_FAILED) + { + puts("desmume_semCPUFilterTouch0 failed!"); + } + } + + _semCPUFilter[NDSDisplayID_Touch][1] = sem_open("desmume_semCPUFilterTouch1", O_CREAT | O_EXCL, 0777, 1); + if (_semCPUFilter[NDSDisplayID_Touch][1] == SEM_FAILED) + { + sem_unlink("desmume_semCPUFilterTouch1"); + _semCPUFilter[NDSDisplayID_Touch][1] = sem_open("desmume_semCPUFilterTouch1", O_CREAT | O_EXCL, 0777, 1); + if (_semCPUFilter[NDSDisplayID_Touch][1] == SEM_FAILED) + { + puts("desmume_semCPUFilterTouch1 failed!"); + } + } } void MacMetalDisplayPresenter::_UpdateNormalSize() @@ -2329,9 +2267,9 @@ void MacMetalDisplayPresenter::_LoadNativeDisplayByID(const NDSDisplayID display const uint8_t bufferIndex = fetchObjMutable.GetLastFetchIndex(); - pthread_rwlock_wrlock(&this->_cpuFilterRWLock[displayID][bufferIndex]); + sem_wait(this->_semCPUFilter[displayID][bufferIndex]); fetchObjMutable.CopyFromSrcClone(vf->GetSrcBufferPtr(), displayID, bufferIndex); - pthread_rwlock_unlock(&this->_cpuFilterRWLock[displayID][bufferIndex]); + sem_post(this->_semCPUFilter[displayID][bufferIndex]); } } @@ -2351,9 +2289,9 @@ pthread_mutex_t* MacMetalDisplayPresenter::GetMutexProcessPtr() return &this->_mutexProcessPtr; } -pthread_rwlock_t* MacMetalDisplayPresenter::GetCPUFilterRWLock(const NDSDisplayID displayID, const uint8_t bufferIndex) +sem_t* MacMetalDisplayPresenter::GetCPUFilterSemaphore(const NDSDisplayID displayID, const uint8_t bufferIndex) { - return &this->_cpuFilterRWLock[displayID][bufferIndex]; + return this->_semCPUFilter[displayID][bufferIndex]; } void MacMetalDisplayPresenter::Init() @@ -2406,11 +2344,6 @@ void MacMetalDisplayPresenter::ProcessDisplays() [this->_presenterObject processDisplays]; } -void MacMetalDisplayPresenter::UpdateLayout() -{ - [this->_presenterObject updateRenderBuffers]; -} - void MacMetalDisplayPresenter::CopyFrameToBuffer(uint32_t *dstBuffer) { [this->_presenterObject renderToBuffer:dstBuffer]; diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayViewShaders.metal b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayViewShaders.metal index dcdc8bada..c36d46f3d 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayViewShaders.metal +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayViewShaders.metal @@ -513,6 +513,19 @@ kernel void nds_fetch666ConvertOnly(const uint2 position [[thread_position_in_gr outTexture.write(float4(outColor, 1.0f), position); } +kernel void nds_fetch888PassthroughOnly(const uint2 position [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + if ( (position.x > inTexture.get_width() - 1) || (position.y > inTexture.get_height() - 1) ) + { + return; + } + + const float3 outColor = inTexture.read(position).rgb; + outTexture.write(float4(outColor, 1.0f), position); +} + float3 nds_apply_master_brightness(const float3 inColor, const uchar mode, const float intensity) { switch (mode) diff --git a/desmume/src/frontend/cocoa/userinterface/MacOGLDisplayView.mm b/desmume/src/frontend/cocoa/userinterface/MacOGLDisplayView.mm index f49fa1706..7e02d0ee9 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacOGLDisplayView.mm +++ b/desmume/src/frontend/cocoa/userinterface/MacOGLDisplayView.mm @@ -194,14 +194,14 @@ void MacOGLClientFetchObject::FetchFromBufferIndex(const u8 index) MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)this->_clientData; this->_useDirectToCPUFilterPipeline = ([sharedViewObject numberViewsUsingDirectToCPUFiltering] > 0); - pthread_rwlock_rdlock([sharedViewObject rwlockFramebufferAtIndex:index]); + sem_wait([sharedViewObject semaphoreFramebufferAtIndex:index]); CGLLockContext(this->_context); CGLSetCurrentContext(this->_context); this->OGLClientFetchObject::FetchFromBufferIndex(index); CGLUnlockContext(this->_context); - pthread_rwlock_unlock([sharedViewObject rwlockFramebufferAtIndex:index]); + sem_post([sharedViewObject semaphoreFramebufferAtIndex:index]); } GLuint MacOGLClientFetchObject::GetFetchTexture(const NDSDisplayID displayID) @@ -435,7 +435,7 @@ void MacOGLDisplayPresenter::WriteLockEmuFramebuffer(const uint8_t bufferIndex) const GPUClientFetchObject &fetchObj = this->GetFetchObject(); MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)fetchObj.GetClientData(); - pthread_rwlock_wrlock([sharedViewObject rwlockFramebufferAtIndex:bufferIndex]); + sem_wait([sharedViewObject semaphoreFramebufferAtIndex:bufferIndex]); } void MacOGLDisplayPresenter::ReadLockEmuFramebuffer(const uint8_t bufferIndex) @@ -443,7 +443,7 @@ void MacOGLDisplayPresenter::ReadLockEmuFramebuffer(const uint8_t bufferIndex) const GPUClientFetchObject &fetchObj = this->GetFetchObject(); MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)fetchObj.GetClientData(); - pthread_rwlock_rdlock([sharedViewObject rwlockFramebufferAtIndex:bufferIndex]); + sem_wait([sharedViewObject semaphoreFramebufferAtIndex:bufferIndex]); } void MacOGLDisplayPresenter::UnlockEmuFramebuffer(const uint8_t bufferIndex) @@ -451,7 +451,7 @@ void MacOGLDisplayPresenter::UnlockEmuFramebuffer(const uint8_t bufferIndex) const GPUClientFetchObject &fetchObj = this->GetFetchObject(); MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)fetchObj.GetClientData(); - pthread_rwlock_unlock([sharedViewObject rwlockFramebufferAtIndex:bufferIndex]); + sem_post([sharedViewObject semaphoreFramebufferAtIndex:bufferIndex]); } #pragma mark -