Cocoa Port: For Metal display views, replace all locks with semaphores, which are the correct synchronization primitive to use here.

- Also change the CocoaDSOutput list lock from a mutex to a rwlock, since testing has shown that there is more thread contention here than I previously thought.
This commit is contained in:
rogerman 2017-12-05 13:43:30 -08:00
parent f9109568b8
commit 26ac91edd0
9 changed files with 308 additions and 337 deletions

View File

@ -19,6 +19,7 @@
#import <CoreVideo/CoreVideo.h>
#include <pthread.h>
#include <libkern/OSAtomic.h>
#include <semaphore.h>
#include <map>
#import "cocoa_util.h"
@ -48,8 +49,8 @@ typedef std::map<CGDirectDisplayID, int64_t> DisplayLinkFlushTimeLimitMap;
@interface MacClientSharedObject : NSObject
{
GPUClientFetchObject *GPUFetchObject;
pthread_rwlock_t *_rwlockFramebuffer[2];
pthread_mutex_t *_mutexOutputList;
sem_t *_semFramebuffer[2];
pthread_rwlock_t *_rwlockOutputList;
pthread_mutex_t _mutexDisplayLinkLists;
NSMutableArray *_cdsOutputList;
volatile int32_t numberViewsUsingDirectToCPUFiltering;
@ -58,7 +59,7 @@ typedef std::map<CGDirectDisplayID, int64_t> DisplayLinkFlushTimeLimitMap;
DisplayLinkFlushTimeLimitMap _displayLinkFlushTimeList;
OSSpinLock spinlockFetchSignal;
BOOL _isFetchSignalled;
uint32_t _threadMessageID;
uint8_t _fetchIndex;
pthread_t _threadFetch;
pthread_cond_t _condSignalFetch;
@ -68,8 +69,8 @@ typedef std::map<CGDirectDisplayID, int64_t> DisplayLinkFlushTimeLimitMap;
@property (assign, nonatomic) GPUClientFetchObject *GPUFetchObject;
@property (readonly, nonatomic) volatile int32_t numberViewsUsingDirectToCPUFiltering;
- (pthread_rwlock_t *) rwlockFramebufferAtIndex:(const u8)bufferIndex;
- (void) setOutputList:(NSMutableArray *)theOutputList mutex:(pthread_mutex_t *)theMutex;
- (sem_t *) semaphoreFramebufferAtIndex:(const u8)bufferIndex;
- (void) setOutputList:(NSMutableArray *)theOutputList rwlock:(pthread_rwlock_t *)theRWLock;
- (void) incrementViewsUsingDirectToCPUFiltering;
- (void) decrementViewsUsingDirectToCPUFiltering;
- (void) pushVideoDataToAllDisplayViews;
@ -79,7 +80,7 @@ typedef std::map<CGDirectDisplayID, int64_t> DisplayLinkFlushTimeLimitMap;
- (void) displayLinkListUpdate;
- (void) fetchSynchronousAtIndex:(uint8_t)index;
- (void) signalFetchAtIndex:(uint8_t)index;
- (void) signalFetchAtIndex:(uint8_t)index message:(int32_t)messageID;
- (void) runFetchLoop;
@end
@ -135,7 +136,7 @@ typedef std::map<CGDirectDisplayID, int64_t> DisplayLinkFlushTimeLimitMap;
@property (readonly, nonatomic) GPUClientFetchObject *fetchObject;
@property (readonly, nonatomic) MacClientSharedObject *sharedData;
- (void) setOutputList:(NSMutableArray *)theOutputList mutexPtr:(pthread_mutex_t *)theMutex;
- (void) setOutputList:(NSMutableArray *)theOutputList rwlock:(pthread_rwlock_t *)theRWLock;
#endif
- (BOOL) gpuStateByBit:(const UInt32)stateBit;

View File

@ -253,16 +253,16 @@ public:
gpuEvent->FramebufferLock();
#ifdef ENABLE_SHARED_FETCH_OBJECT
pthread_rwlock_wrlock([[self sharedData] rwlockFramebufferAtIndex:0]);
pthread_rwlock_wrlock([[self sharedData] rwlockFramebufferAtIndex:1]);
sem_wait([[self sharedData] semaphoreFramebufferAtIndex:0]);
sem_wait([[self sharedData] semaphoreFramebufferAtIndex:1]);
#endif
GPU->SetCustomFramebufferSize(w, h);
#ifdef ENABLE_SHARED_FETCH_OBJECT
fetchObject->SetFetchBuffers(GPU->GetDisplayInfo());
pthread_rwlock_unlock([[self sharedData] rwlockFramebufferAtIndex:1]);
pthread_rwlock_unlock([[self sharedData] rwlockFramebufferAtIndex:0]);
sem_post([[self sharedData] semaphoreFramebufferAtIndex:1]);
sem_post([[self sharedData] semaphoreFramebufferAtIndex:0]);
#endif
gpuEvent->FramebufferUnlock();
@ -314,16 +314,16 @@ public:
if (colorFormat != dispInfo.colorFormat)
{
#ifdef ENABLE_SHARED_FETCH_OBJECT
pthread_rwlock_wrlock([[self sharedData] rwlockFramebufferAtIndex:0]);
pthread_rwlock_wrlock([[self sharedData] rwlockFramebufferAtIndex:1]);
sem_wait([[self sharedData] semaphoreFramebufferAtIndex:0]);
sem_wait([[self sharedData] semaphoreFramebufferAtIndex:1]);
#endif
GPU->SetColorFormat((NDSColorFormat)colorFormat);
#ifdef ENABLE_SHARED_FETCH_OBJECT
fetchObject->SetFetchBuffers(GPU->GetDisplayInfo());
pthread_rwlock_unlock([[self sharedData] rwlockFramebufferAtIndex:1]);
pthread_rwlock_unlock([[self sharedData] rwlockFramebufferAtIndex:0]);
sem_post([[self sharedData] semaphoreFramebufferAtIndex:1]);
sem_post([[self sharedData] semaphoreFramebufferAtIndex:0]);
#endif
}
@ -343,9 +343,9 @@ public:
}
#ifdef ENABLE_SHARED_FETCH_OBJECT
- (void) setOutputList:(NSMutableArray *)theOutputList mutexPtr:(pthread_mutex_t *)theMutex
- (void) setOutputList:(NSMutableArray *)theOutputList rwlock:(pthread_rwlock_t *)theRWLock
{
[(MacClientSharedObject *)fetchObject->GetClientData() setOutputList:theOutputList mutex:theMutex];
[(MacClientSharedObject *)fetchObject->GetClientData() setOutputList:theOutputList rwlock:theRWLock];
}
#endif
@ -866,18 +866,18 @@ public:
#ifdef ENABLE_SHARED_FETCH_OBJECT
const u8 bufferIndex = GPU->GetDisplayInfo().bufferIndex;
pthread_rwlock_wrlock([[self sharedData] rwlockFramebufferAtIndex:bufferIndex]);
sem_wait([[self sharedData] semaphoreFramebufferAtIndex:bufferIndex]);
#endif
GPU->ClearWithColor(colorBGRA5551);
#ifdef ENABLE_SHARED_FETCH_OBJECT
pthread_rwlock_unlock([[self sharedData] rwlockFramebufferAtIndex:bufferIndex]);
sem_post([[self sharedData] semaphoreFramebufferAtIndex:bufferIndex]);
#endif
gpuEvent->FramebufferUnlock();
#ifdef ENABLE_SHARED_FETCH_OBJECT
[[self sharedData] signalFetchAtIndex:bufferIndex];
[[self sharedData] signalFetchAtIndex:bufferIndex message:MESSAGE_FETCH_AND_PUSH_VIDEO];
#endif
}
@ -918,15 +918,32 @@ public:
return self;
}
_rwlockFramebuffer[0] = (pthread_rwlock_t *)malloc(sizeof(pthread_rwlock_t));
_rwlockFramebuffer[1] = (pthread_rwlock_t *)malloc(sizeof(pthread_rwlock_t));
_semFramebuffer[0] = sem_open("desmume_semFramebuffer0", O_CREAT | O_EXCL, 0777, 1);
if (_semFramebuffer[0] == SEM_FAILED)
{
sem_unlink("desmume_semFramebuffer0");
_semFramebuffer[0] = sem_open("desmume_semFramebuffer0", O_CREAT | O_EXCL, 0777, 1);
if (_semFramebuffer[0] == SEM_FAILED)
{
puts("desmume_semFramebuffer0 failed!");
}
}
_semFramebuffer[1] = sem_open("desmume_semFramebuffer1", O_CREAT | O_EXCL, 0777, 1);
if (_semFramebuffer[1] == SEM_FAILED)
{
sem_unlink("desmume_semFramebuffer1");
_semFramebuffer[1] = sem_open("desmume_semFramebuffer1", O_CREAT | O_EXCL, 0777, 1);
if (_semFramebuffer[1] == SEM_FAILED)
{
puts("desmume_semFramebuffer1 failed!");
}
}
pthread_rwlock_init(_rwlockFramebuffer[0], NULL);
pthread_rwlock_init(_rwlockFramebuffer[1], NULL);
pthread_mutex_init(&_mutexDisplayLinkLists, NULL);
GPUFetchObject = nil;
_mutexOutputList = NULL;
_rwlockOutputList = NULL;
_cdsOutputList = nil;
numberViewsUsingDirectToCPUFiltering = 0;
@ -935,7 +952,7 @@ public:
[self displayLinkListUpdate];
spinlockFetchSignal = OS_SPINLOCK_INIT;
_isFetchSignalled = NO;
_threadMessageID = MESSAGE_NONE;
_fetchIndex = 0;
pthread_cond_init(&_condSignalFetch, NULL);
pthread_create(&_threadFetch, NULL, &RunFetchThread, self);
@ -980,50 +997,52 @@ public:
pthread_mutex_unlock(&_mutexDisplayLinkLists);
pthread_mutex_destroy(&_mutexDisplayLinkLists);
pthread_mutex_t *currentMutex = _mutexOutputList;
pthread_rwlock_t *currentRWLock = _rwlockOutputList;
if (currentMutex != NULL)
if (currentRWLock != NULL)
{
pthread_mutex_lock(currentMutex);
pthread_rwlock_wrlock(currentRWLock);
}
[_cdsOutputList release];
if (currentMutex != NULL)
if (currentRWLock != NULL)
{
pthread_mutex_unlock(currentMutex);
pthread_rwlock_unlock(currentRWLock);
}
pthread_rwlock_destroy(_rwlockFramebuffer[0]);
pthread_rwlock_destroy(_rwlockFramebuffer[1]);
sem_close(_semFramebuffer[0]);
sem_close(_semFramebuffer[1]);
sem_unlink("desmume_semFramebuffer0");
sem_unlink("desmume_semFramebuffer1");
[super dealloc];
}
- (pthread_rwlock_t *) rwlockFramebufferAtIndex:(const u8)bufferIndex
- (sem_t *) semaphoreFramebufferAtIndex:(const u8)bufferIndex
{
return _rwlockFramebuffer[bufferIndex];
return _semFramebuffer[bufferIndex];
}
- (void) setOutputList:(NSMutableArray *)theOutputList mutex:(pthread_mutex_t *)theMutex
- (void) setOutputList:(NSMutableArray *)theOutputList rwlock:(pthread_rwlock_t *)theRWLock
{
pthread_mutex_t *currentMutex = _mutexOutputList;
pthread_rwlock_t *currentRWLock = _rwlockOutputList;
if (currentMutex != NULL)
if (currentRWLock != NULL)
{
pthread_mutex_lock(currentMutex);
pthread_rwlock_wrlock(currentRWLock);
}
[_cdsOutputList release];
_cdsOutputList = theOutputList;
[_cdsOutputList retain];
if (currentMutex != NULL)
if (currentRWLock != NULL)
{
pthread_mutex_unlock(currentMutex);
pthread_rwlock_unlock(currentRWLock);
}
_mutexOutputList = theMutex;
_rwlockOutputList = theRWLock;
}
- (void) incrementViewsUsingDirectToCPUFiltering
@ -1038,11 +1057,11 @@ public:
- (void) pushVideoDataToAllDisplayViews
{
pthread_mutex_t *currentMutex = _mutexOutputList;
pthread_rwlock_t *currentRWLock = _rwlockOutputList;
if (currentMutex != NULL)
if (currentRWLock != NULL)
{
pthread_mutex_lock(currentMutex);
pthread_rwlock_rdlock(currentRWLock);
}
for (CocoaDSOutput *cdsOutput in _cdsOutputList)
@ -1053,21 +1072,21 @@ public:
}
}
if (currentMutex != NULL)
if (currentRWLock != NULL)
{
pthread_mutex_unlock(currentMutex);
pthread_rwlock_unlock(currentRWLock);
}
}
- (void) flushAllDisplaysOnDisplayLink:(CVDisplayLinkRef)displayLink timeStamp:(const CVTimeStamp *)timeStamp
{
pthread_mutex_t *currentMutex = _mutexOutputList;
pthread_rwlock_t *currentRWLock = _rwlockOutputList;
CGDirectDisplayID displayID = CVDisplayLinkGetCurrentCGDisplay(displayLink);
bool didFlushOccur = false;
if (currentMutex != NULL)
if (currentRWLock != NULL)
{
pthread_mutex_lock(currentMutex);
pthread_rwlock_rdlock(currentRWLock);
}
for (CocoaDSOutput *cdsOutput in _cdsOutputList)
@ -1087,9 +1106,9 @@ public:
}
}
if (currentMutex != NULL)
if (currentRWLock != NULL)
{
pthread_mutex_unlock(currentMutex);
pthread_rwlock_unlock(currentRWLock);
}
if (didFlushOccur)
@ -1195,12 +1214,12 @@ public:
GPUFetchObject->FetchFromBufferIndex(index);
}
- (void) signalFetchAtIndex:(uint8_t)index
- (void) signalFetchAtIndex:(uint8_t)index message:(int32_t)messageID
{
pthread_mutex_lock(&_mutexFetchExecute);
_fetchIndex = index;
_isFetchSignalled = YES;
_threadMessageID = messageID;
pthread_cond_signal(&_condSignalFetch);
pthread_mutex_unlock(&_mutexFetchExecute);
@ -1212,14 +1231,15 @@ public:
do
{
while (!_isFetchSignalled)
while (_threadMessageID == MESSAGE_NONE)
{
pthread_cond_wait(&_condSignalFetch, &_mutexFetchExecute);
}
_isFetchSignalled = NO;
GPUFetchObject->FetchFromBufferIndex(_fetchIndex);
[self pushVideoDataToAllDisplayViews];
_threadMessageID = MESSAGE_NONE;
} while(true);
}
@ -1275,7 +1295,7 @@ void GPUEventHandlerOSX::DidFrameBegin(bool isFrameSkipRequested, const u8 targe
if (!isFrameSkipRequested)
{
MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)this->_fetchObject->GetClientData();
pthread_rwlock_wrlock([sharedViewObject rwlockFramebufferAtIndex:targetBufferIndex]);
sem_wait([sharedViewObject semaphoreFramebufferAtIndex:targetBufferIndex]);
}
#endif
}
@ -1287,7 +1307,7 @@ void GPUEventHandlerOSX::DidFrameEnd(bool isFrameSkipped, const NDSDisplayInfo &
if (!isFrameSkipped)
{
this->_fetchObject->SetFetchDisplayInfo(latestDisplayInfo);
pthread_rwlock_unlock([sharedViewObject rwlockFramebufferAtIndex:latestDisplayInfo.bufferIndex]);
sem_post([sharedViewObject semaphoreFramebufferAtIndex:latestDisplayInfo.bufferIndex]);
}
#endif
@ -1296,7 +1316,7 @@ void GPUEventHandlerOSX::DidFrameEnd(bool isFrameSkipped, const NDSDisplayInfo &
#ifdef ENABLE_SHARED_FETCH_OBJECT
if (!isFrameSkipped)
{
[sharedViewObject signalFetchAtIndex:latestDisplayInfo.bufferIndex];
[sharedViewObject signalFetchAtIndex:latestDisplayInfo.bufferIndex message:MESSAGE_FETCH_AND_PUSH_VIDEO];
}
#endif
}

View File

@ -38,7 +38,7 @@ typedef void *gdbstub_handle_t;
typedef struct
{
CocoaDSCore *cdsCore;
pthread_mutex_t mutexOutputList;
pthread_rwlock_t rwlockOutputList;
pthread_mutex_t mutexThreadExecute;
pthread_cond_t condThreadExecute;
pthread_rwlock_t rwlockCoreExecute;

View File

@ -173,7 +173,7 @@ volatile bool execute = true;
threadParam.cdsCore = self;
pthread_mutex_init(&threadParam.mutexOutputList, NULL);
pthread_rwlock_init(&threadParam.rwlockOutputList, NULL);
pthread_mutex_init(&threadParam.mutexThreadExecute, NULL);
pthread_cond_init(&threadParam.condThreadExecute, NULL);
pthread_rwlock_init(&threadParam.rwlockCoreExecute, NULL);
@ -198,7 +198,7 @@ volatile bool execute = true;
sp.sched_priority = sched_get_priority_max(thePolicy);
pthread_setschedparam(coreThread, thePolicy, &sp);
[cdsGPU setOutputList:cdsOutputList mutexPtr:&threadParam.mutexOutputList];
[cdsGPU setOutputList:cdsOutputList rwlock:&threadParam.rwlockOutputList];
OSXDriver *newDriver = new OSXDriver;
newDriver->SetCoreThreadMutexLock(&threadParam.mutexThreadExecute);
@ -231,7 +231,7 @@ volatile bool execute = true;
pthread_mutex_destroy(&threadParam.mutexThreadExecute);
pthread_cond_destroy(&threadParam.condThreadExecute);
pthread_mutex_destroy(&threadParam.mutexOutputList);
pthread_rwlock_destroy(&threadParam.rwlockOutputList);
pthread_rwlock_destroy(&threadParam.rwlockCoreExecute);
[self setIsGdbStubStarted:NO];
@ -644,7 +644,7 @@ volatile bool execute = true;
execControl->SetExecutionBehavior((ExecutionBehavior)coreState);
pthread_mutex_lock(&threadParam.mutexOutputList);
pthread_rwlock_rdlock(&threadParam.rwlockOutputList);
switch ((ExecutionBehavior)coreState)
{
@ -718,7 +718,7 @@ volatile bool execute = true;
break;
}
pthread_mutex_unlock(&threadParam.mutexOutputList);
pthread_rwlock_unlock(&threadParam.rwlockOutputList);
pthread_cond_signal(&threadParam.condThreadExecute);
pthread_mutex_unlock(&threadParam.mutexThreadExecute);
@ -878,6 +878,8 @@ volatile bool execute = true;
// count every other instance the timer fires.
_isTimerAtSecond = !_isTimerAtSecond;
pthread_rwlock_rdlock(&threadParam.rwlockOutputList);
for (CocoaDSOutput *cdsOutput in cdsOutputList)
{
if ([cdsOutput isKindOfClass:[CocoaDSDisplay class]])
@ -888,6 +890,8 @@ volatile bool execute = true;
}
}
}
pthread_rwlock_unlock(&threadParam.rwlockOutputList);
}
- (NSUInteger) frameNumber
@ -897,7 +901,7 @@ volatile bool execute = true;
- (void) addOutput:(CocoaDSOutput *)theOutput
{
pthread_mutex_lock(&threadParam.mutexOutputList);
pthread_rwlock_wrlock(&threadParam.rwlockOutputList);
if ([theOutput isKindOfClass:[CocoaDSDisplay class]])
{
@ -909,21 +913,21 @@ volatile bool execute = true;
}
[[self cdsOutputList] addObject:theOutput];
pthread_mutex_unlock(&threadParam.mutexOutputList);
pthread_rwlock_unlock(&threadParam.rwlockOutputList);
}
- (void) removeOutput:(CocoaDSOutput *)theOutput
{
pthread_mutex_lock(&threadParam.mutexOutputList);
pthread_rwlock_wrlock(&threadParam.rwlockOutputList);
[[self cdsOutputList] removeObject:theOutput];
pthread_mutex_unlock(&threadParam.mutexOutputList);
pthread_rwlock_unlock(&threadParam.rwlockOutputList);
}
- (void) removeAllOutputs
{
pthread_mutex_lock(&threadParam.mutexOutputList);
pthread_rwlock_wrlock(&threadParam.rwlockOutputList);
[[self cdsOutputList] removeAllObjects];
pthread_mutex_unlock(&threadParam.mutexOutputList);
pthread_rwlock_unlock(&threadParam.rwlockOutputList);
}
- (NSString *) cpuEmulationEngineString
@ -1200,7 +1204,7 @@ static void* RunCoreThread(void *arg)
executionSpeedAverageFramesCollected = 0.0;
}
pthread_mutex_lock(&param->mutexOutputList);
pthread_rwlock_rdlock(&param->rwlockOutputList);
switch (behavior)
{
@ -1227,7 +1231,7 @@ static void* RunCoreThread(void *arg)
break;
}
pthread_mutex_unlock(&param->mutexOutputList);
pthread_rwlock_unlock(&param->rwlockOutputList);
switch (behavior)
{

View File

@ -334,6 +334,8 @@ enum
*/
enum
{
MESSAGE_NONE = 0,
MESSAGE_CHECK_FOR_RESPONSE = 100, // Message to check if a port is responding. Usually sent to make sure that a thread is alive.
MESSAGE_CHECK_RESPONSE_ECHO, // Response message when another port sends MESSAGE_CHECK_FOR_RESPONSE. Sent to confirm that a thread is indeed alive.
MESSAGE_EXIT_THREAD, // Sent whenever there is a need to stop a thread.

View File

@ -21,6 +21,7 @@
#import <Cocoa/Cocoa.h>
#import <Metal/Metal.h>
#include <libkern/OSAtomic.h>
#include <semaphore.h>
#import "DisplayViewCALayer.h"
#import "../cocoa_GPU.h"
@ -39,8 +40,6 @@ struct MetalProcessedFrameInfo
{
uint8_t bufferIndex;
id<MTLTexture> tex[2];
bool isMainDisplayProcessed;
bool isTouchDisplayProcessed;
};
typedef struct MetalProcessedFrameInfo MetalProcessedFrameInfo;
@ -65,6 +64,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
id<MTLComputePipelineState> _fetch888Pipeline;
id<MTLComputePipelineState> _fetch555ConvertOnlyPipeline;
id<MTLComputePipelineState> _fetch666ConvertOnlyPipeline;
id<MTLComputePipelineState> _fetch888PassthroughOnlyPipeline;
id<MTLComputePipelineState> deposterizePipeline;
id<MTLRenderPipelineState> hudPipeline;
id<MTLRenderPipelineState> hudRGBAPipeline;
@ -185,8 +185,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
BOOL needsScreenVerticesUpdate;
BOOL needsHUDVerticesUpdate;
pthread_mutex_t _mutexTexProcessUpdate;
pthread_mutex_t _mutexBufferUpdate;
sem_t *_semTexProcessUpdate;
bool _needEncodeViewport;
MTLViewport _newViewport;
bool _willDrawHUD;
@ -200,8 +199,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
@property (readonly, nonatomic) ClientDisplay3DPresenter *cdp;
@property (assign, nonatomic) MetalDisplayViewSharedData *sharedData;
@property (readonly, nonatomic) MTLRenderPassColorAttachmentDescriptor *colorAttachment0Desc;
@property (readonly, nonatomic) pthread_mutex_t *mutexTexProcessUpdate;
@property (readonly, nonatomic) pthread_mutex_t *mutexBufferUpdate;
@property (readonly, nonatomic) sem_t *semTexProcessUpdate;
@property (retain) id<MTLComputePipelineState> pixelScalePipeline;
@property (retain) id<MTLRenderPipelineState> outputRGBAPipeline;
@property (retain) id<MTLRenderPipelineState> outputDrawablePipeline;
@ -225,6 +223,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
- (void) resizeCPUPixelScalerUsingFilterID:(const VideoFilterTypeID)filterID;
- (void) copyHUDFontUsingFace:(const FT_Face &)fontFace size:(const size_t)glyphSize tileSize:(const size_t)glyphTileSize info:(GlyphInfo *)glyphInfo;
- (void) processDisplays;
- (void) updateTexCoordBuffer;
- (void) updateRenderBuffers;
- (void) renderForCommandBuffer:(id<MTLCommandBuffer>)cb
outputPipelineState:(id<MTLRenderPipelineState>)outputPipelineState
@ -282,7 +281,7 @@ private:
protected:
MacMetalDisplayPresenterObject *_presenterObject;
pthread_mutex_t _mutexProcessPtr;
pthread_rwlock_t _cpuFilterRWLock[2][2];
sem_t *_semCPUFilter[2][2];
virtual void _UpdateNormalSize();
virtual void _UpdateOrder();
@ -299,7 +298,7 @@ public:
MacMetalDisplayPresenterObject* GetPresenterObject() const;
pthread_mutex_t* GetMutexProcessPtr();
pthread_rwlock_t* GetCPUFilterRWLock(const NDSDisplayID displayID, const uint8_t bufferIndex);
sem_t* GetCPUFilterSemaphore(const NDSDisplayID displayID, const uint8_t bufferIndex);
virtual void Init();
virtual void SetSharedData(MacClientSharedObject *sharedObject);
@ -312,7 +311,6 @@ public:
// Client view interface
virtual void ProcessDisplays();
virtual void UpdateLayout();
virtual void CopyFrameToBuffer(uint32_t *dstBuffer);
};

View File

@ -17,6 +17,13 @@
#include "MacMetalDisplayView.h"
#include <stdio.h>
#include <semaphore.h>
#include <mach/mach.h>
#include <mach/mach_time.h>
#include "../cocoa_globals.h"
#include "../../../common.h"
@implementation MetalDisplayViewSharedData
@ -71,6 +78,7 @@
_fetch888Pipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch888"] error:nil] retain];
_fetch555ConvertOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch555ConvertOnly"] error:nil] retain];
_fetch666ConvertOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666ConvertOnly"] error:nil] retain];
_fetch888PassthroughOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch888PassthroughOnly"] error:nil] retain];
deposterizePipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"src_filter_deposterize"] error:nil] retain];
if ( IsOSXVersion(10, 13, 0) || IsOSXVersion(10, 13, 1) || IsOSXVersion(10, 13, 2) || IsOSXVersion(10, 13, 3) || IsOSXVersion(10, 13, 4) )
@ -249,6 +257,7 @@
[_fetch888Pipeline release];
[_fetch555ConvertOnlyPipeline release];
[_fetch666ConvertOnlyPipeline release];
[_fetch888PassthroughOnlyPipeline release];
[deposterizePipeline release];
[hudPipeline release];
[hudRGBAPipeline release];
@ -585,72 +594,69 @@
isUsingFramebufferDirectlyTouch = false;
}
}
else if (currentDisplayInfo.colorFormat != NDSColorFormat_BGR888_Rev)
else
{
bool isPipelineStateSet = false;
if (currentDisplayInfo.colorFormat == NDSColorFormat_BGR555_Rev)
{
// 16-bit textures aren't handled natively in Metal for macOS, so we need to explicitly convert to 32-bit here.
[cce setComputePipelineState:_fetch555ConvertOnlyPipeline];
isPipelineStateSet = true;
}
else if ( (currentDisplayInfo.colorFormat == NDSColorFormat_BGR666_Rev) &&
(currentDisplayInfo.needConvertColorFormat[NDSDisplayID_Main] || currentDisplayInfo.needConvertColorFormat[NDSDisplayID_Touch]) )
{
[cce setComputePipelineState:_fetch666ConvertOnlyPipeline];
isPipelineStateSet = true;
}
else
{
[cce setComputePipelineState:_fetch888PassthroughOnlyPipeline];
}
if (isPipelineStateSet)
if (isMainEnabled)
{
if (isMainEnabled)
if (!currentDisplayInfo.didPerformCustomRender[NDSDisplayID_Main])
{
if (!currentDisplayInfo.didPerformCustomRender[NDSDisplayID_Main])
{
[cce setTexture:_texDisplayFetchNative[NDSDisplayID_Main][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Main][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative
threadsPerThreadgroup:_fetchThreadsPerGroup];
texFetchTargetMain = _texDisplayPostprocessNative[NDSDisplayID_Main][index];
}
else
{
[cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Main][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Main][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom
threadsPerThreadgroup:_fetchThreadsPerGroup];
texFetchTargetMain = _texDisplayPostprocessCustom[NDSDisplayID_Main][index];
}
[cce setTexture:_texDisplayFetchNative[NDSDisplayID_Main][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Main][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative
threadsPerThreadgroup:_fetchThreadsPerGroup];
isUsingFramebufferDirectlyMain = false;
texFetchTargetMain = _texDisplayPostprocessNative[NDSDisplayID_Main][index];
}
else
{
[cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Main][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Main][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom
threadsPerThreadgroup:_fetchThreadsPerGroup];
texFetchTargetMain = _texDisplayPostprocessCustom[NDSDisplayID_Main][index];
}
if (isTouchEnabled)
isUsingFramebufferDirectlyMain = false;
}
if (isTouchEnabled)
{
if (!currentDisplayInfo.didPerformCustomRender[NDSDisplayID_Touch])
{
if (!currentDisplayInfo.didPerformCustomRender[NDSDisplayID_Touch])
{
[cce setTexture:_texDisplayFetchNative[NDSDisplayID_Touch][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Touch][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative
threadsPerThreadgroup:_fetchThreadsPerGroup];
texFetchTargetTouch = _texDisplayPostprocessNative[NDSDisplayID_Touch][index];
}
else
{
[cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Touch][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Touch][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom
threadsPerThreadgroup:_fetchThreadsPerGroup];
texFetchTargetTouch = _texDisplayPostprocessCustom[NDSDisplayID_Touch][index];
}
[cce setTexture:_texDisplayFetchNative[NDSDisplayID_Touch][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Touch][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative
threadsPerThreadgroup:_fetchThreadsPerGroup];
isUsingFramebufferDirectlyTouch = false;
texFetchTargetTouch = _texDisplayPostprocessNative[NDSDisplayID_Touch][index];
}
else
{
[cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Touch][index] atIndex:0];
[cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Touch][index] atIndex:1];
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom
threadsPerThreadgroup:_fetchThreadsPerGroup];
texFetchTargetTouch = _texDisplayPostprocessCustom[NDSDisplayID_Touch][index];
}
isUsingFramebufferDirectlyTouch = false;
}
}
@ -666,7 +672,7 @@
- (void) fetchFromBufferIndex:(const u8)index
{
pthread_rwlock_rdlock([self rwlockFramebufferAtIndex:index]);
sem_wait([self semaphoreFramebufferAtIndex:index]);
id<MTLCommandBuffer> cb = [commandQueue commandBufferWithUnretainedReferences];
_fetchEncoder = [cb blitCommandEncoder];
@ -680,18 +686,17 @@
if (index == 0)
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_rwlock_unlock([self rwlockFramebufferAtIndex:0]);
sem_post([self semaphoreFramebufferAtIndex:0]);
}];
}
else
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_rwlock_unlock([self rwlockFramebufferAtIndex:1]);
sem_post([self semaphoreFramebufferAtIndex:1]);
}];
}
[cb commit];
[cb waitUntilScheduled];
}
- (void) fetchNativeDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex
@ -745,8 +750,7 @@
@synthesize cdp;
@synthesize sharedData;
@synthesize colorAttachment0Desc;
@dynamic mutexTexProcessUpdate;
@dynamic mutexBufferUpdate;
@dynamic semTexProcessUpdate;
@synthesize pixelScalePipeline;
@synthesize outputRGBAPipeline;
@synthesize outputDrawablePipeline;
@ -825,11 +829,17 @@
_processedFrameInfo.bufferIndex = 0;
_processedFrameInfo.tex[NDSDisplayID_Main] = nil;
_processedFrameInfo.tex[NDSDisplayID_Touch] = nil;
_processedFrameInfo.isMainDisplayProcessed = false;
_processedFrameInfo.isTouchDisplayProcessed = false;
pthread_mutex_init(&_mutexTexProcessUpdate, NULL);
pthread_mutex_init(&_mutexBufferUpdate, NULL);
_semTexProcessUpdate = sem_open("desmume_semTexProcessUpdate", O_CREAT | O_EXCL, 0777, 1);
if (_semTexProcessUpdate == SEM_FAILED)
{
sem_unlink("desmume_semTexProcessUpdate");
_semTexProcessUpdate = sem_open("desmume_semTexProcessUpdate", O_CREAT | O_EXCL, 0777, 1);
if (_semTexProcessUpdate == SEM_FAILED)
{
puts("desmume_semTexProcessUpdate failed!");
}
}
return self;
}
@ -868,20 +878,15 @@
[self setSharedData:nil];
pthread_mutex_destroy(&_mutexTexProcessUpdate);
pthread_mutex_destroy(&_mutexBufferUpdate);
sem_close(_semTexProcessUpdate);
sem_unlink("desmume_semTexProcessUpdate");
[super dealloc];
}
- (pthread_mutex_t *) mutexTexProcessUpdate
- (sem_t *) semTexProcessUpdate
{
return &_mutexTexProcessUpdate;
}
- (pthread_mutex_t *) mutexBufferUpdate
{
return &_mutexBufferUpdate;
return _semTexProcessUpdate;
}
- (VideoFilterTypeID) pixelScaler
@ -1314,27 +1319,15 @@
id<MTLTexture> texMain = (selectedDisplaySource[NDSDisplayID_Main] == NDSDisplayID_Main) ? [sharedData texFetchMain] : [sharedData texFetchTouch];
id<MTLTexture> texTouch = (selectedDisplaySource[NDSDisplayID_Touch] == NDSDisplayID_Touch) ? [sharedData texFetchTouch] : [sharedData texFetchMain];
bool isDisplayProcessedMain = ![sharedData isUsingFramebufferDirectlyAtIndex:bufferIndex displayID:selectedDisplaySource[NDSDisplayID_Main]];
bool isDisplayProcessedTouch = ![sharedData isUsingFramebufferDirectlyAtIndex:bufferIndex displayID:selectedDisplaySource[NDSDisplayID_Touch]];
if ( (fetchDisplayInfo.pixelBytes != 0) && (useDeposterize || (cdp->GetPixelScaler() != VideoFilterTypeID_None)) )
{
const bool willFilterOnGPU = cdp->WillFilterOnGPU();
const bool shouldProcessDisplay[2] = { (!fetchDisplayInfo.didPerformCustomRender[selectedDisplaySource[NDSDisplayID_Main]] || !fetchDisplayInfo.isCustomSizeRequested) && cdp->IsSelectedDisplayEnabled(NDSDisplayID_Main) && (mode == ClientDisplayMode_Main || mode == ClientDisplayMode_Dual),
(!fetchDisplayInfo.didPerformCustomRender[selectedDisplaySource[NDSDisplayID_Touch]] || !fetchDisplayInfo.isCustomSizeRequested) && cdp->IsSelectedDisplayEnabled(NDSDisplayID_Touch) && (mode == ClientDisplayMode_Touch || mode == ClientDisplayMode_Dual) && (selectedDisplaySource[NDSDisplayID_Main] != selectedDisplaySource[NDSDisplayID_Touch]) };
bool texFetchMainNeedsLock = (useDeposterize || ((cdp->GetPixelScaler() != VideoFilterTypeID_None) && willFilterOnGPU)) && shouldProcessDisplay[NDSDisplayID_Main];
bool texFetchTouchNeedsLock = (useDeposterize || ((cdp->GetPixelScaler() != VideoFilterTypeID_None) && willFilterOnGPU)) && shouldProcessDisplay[NDSDisplayID_Touch];
bool needsFetchBuffersLock = texFetchMainNeedsLock || texFetchTouchNeedsLock;
id<MTLCommandBuffer> cb = [self newCommandBuffer];
id<MTLComputeCommandEncoder> cce = [cb computeCommandEncoder];
if (needsFetchBuffersLock)
{
pthread_rwlock_rdlock([sharedData rwlockFramebufferAtIndex:bufferIndex]);
}
// Run the video source filters and the pixel scalers
if (useDeposterize)
{
@ -1353,12 +1346,10 @@
threadsPerThreadgroup:[sharedData deposterizeThreadsPerGroup]];
texMain = _texDisplaySrcDeposterize[NDSDisplayID_Main][1];
isDisplayProcessedMain = true;
if (selectedDisplaySource[NDSDisplayID_Main] == selectedDisplaySource[NDSDisplayID_Touch])
{
texTouch = texMain;
isDisplayProcessedTouch = true;
}
}
@ -1375,35 +1366,6 @@
threadsPerThreadgroup:[sharedData deposterizeThreadsPerGroup]];
texTouch = _texDisplaySrcDeposterize[NDSDisplayID_Touch][1];
isDisplayProcessedTouch = true;
}
if (needsFetchBuffersLock)
{
needsFetchBuffersLock = !isDisplayProcessedMain || !isDisplayProcessedTouch;
[cce endEncoding];
if (!needsFetchBuffersLock)
{
if (bufferIndex == 0)
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_rwlock_unlock([sharedData rwlockFramebufferAtIndex:0]);
}];
}
else
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_rwlock_unlock([sharedData rwlockFramebufferAtIndex:1]);
}];
}
}
[cb commit];
cb = [self newCommandBuffer];
cce = [cb computeCommandEncoder];
}
}
@ -1424,14 +1386,12 @@
texMain = [self texDisplayPixelScaleMain];
width[NDSDisplayID_Main] = [[self texDisplayPixelScaleMain] width];
height[NDSDisplayID_Main] = [[self texDisplayPixelScaleMain] height];
isDisplayProcessedMain = true;
if (selectedDisplaySource[NDSDisplayID_Main] == selectedDisplaySource[NDSDisplayID_Touch])
{
texTouch = texMain;
width[NDSDisplayID_Touch] = width[NDSDisplayID_Main];
height[NDSDisplayID_Touch] = height[NDSDisplayID_Main];
isDisplayProcessedTouch = true;
}
}
@ -1445,32 +1405,6 @@
texTouch = [self texDisplayPixelScaleTouch];
width[NDSDisplayID_Touch] = [[self texDisplayPixelScaleTouch] width];
height[NDSDisplayID_Touch] = [[self texDisplayPixelScaleTouch] height];
isDisplayProcessedTouch = true;
}
if (needsFetchBuffersLock)
{
needsFetchBuffersLock = false;
[cce endEncoding];
if (bufferIndex == 0)
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_rwlock_unlock([sharedData rwlockFramebufferAtIndex:0]);
}];
}
else
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_rwlock_unlock([sharedData rwlockFramebufferAtIndex:1]);
}];
}
[cb commit];
cb = [self newCommandBuffer];
cce = [cb computeCommandEncoder];
}
}
@ -1529,7 +1463,7 @@
if (shouldProcessDisplay[NDSDisplayID_Main] && ([self texDisplayPixelScaleMain] != nil))
{
pthread_rwlock_rdlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Main, bufferIndex));
sem_wait(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Main, bufferIndex));
needsCPUFilterUnlockMain = true;
vfMain->RunFilter();
@ -1537,7 +1471,7 @@
{
[[self bufCPUFilterDstMain] didModifyRange:NSMakeRange(0, vfMain->GetDstWidth() * vfMain->GetDstHeight() * sizeof(uint32_t))];
needsCPUFilterUnlockMain = false;
pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Main, bufferIndex));
sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Main, bufferIndex));
}
[bce copyFromBuffer:[self bufCPUFilterDstMain]
@ -1553,20 +1487,18 @@
texMain = [self texDisplayPixelScaleMain];
width[NDSDisplayID_Main] = [[self texDisplayPixelScaleMain] width];
height[NDSDisplayID_Main] = [[self texDisplayPixelScaleMain] height];
isDisplayProcessedMain = true;
if (selectedDisplaySource[NDSDisplayID_Main] == selectedDisplaySource[NDSDisplayID_Touch])
{
texTouch = texMain;
width[NDSDisplayID_Touch] = width[NDSDisplayID_Main];
height[NDSDisplayID_Touch] = height[NDSDisplayID_Main];
isDisplayProcessedTouch = true;
}
}
if (shouldProcessDisplay[NDSDisplayID_Touch] && ([self texDisplayPixelScaleTouch] != nil))
{
pthread_rwlock_rdlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Touch, bufferIndex));
sem_wait(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Touch, bufferIndex));
needsCPUFilterUnlockTouch = true;
vfTouch->RunFilter();
@ -1574,7 +1506,7 @@
{
[[self bufCPUFilterDstTouch] didModifyRange:NSMakeRange(0, vfTouch->GetDstWidth() * vfTouch->GetDstHeight() * sizeof(uint32_t))];
needsCPUFilterUnlockTouch = false;
pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Touch, bufferIndex));
sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Touch, bufferIndex));
}
[bce copyFromBuffer:[self bufCPUFilterDstTouch]
@ -1590,7 +1522,6 @@
texTouch = [self texDisplayPixelScaleTouch];
width[NDSDisplayID_Touch] = [[self texDisplayPixelScaleTouch] width];
height[NDSDisplayID_Touch] = [[self texDisplayPixelScaleTouch] height];
isDisplayProcessedTouch = true;
}
[bce endEncoding];
@ -1605,15 +1536,15 @@
if (bufferIndex == 0)
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Main, 0));
pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Touch, 0));
sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Main, 0));
sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Touch, 0));
}];
}
else
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Main, 1));
pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Touch, 1));
sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Main, 1));
sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Touch, 1));
}];
}
}
@ -1622,13 +1553,13 @@
if (bufferIndex == 0)
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Main, 0));
sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Main, 0));
}];
}
else
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Main, 1));
sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Main, 1));
}];
}
}
@ -1638,13 +1569,13 @@
if (bufferIndex == 0)
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Touch, 0));
sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Touch, 0));
}];
}
else
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_rwlock_unlock(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterRWLock(NDSDisplayID_Touch, 1));
sem_post(((MacMetalDisplayPresenter *)cdp)->GetCPUFilterSemaphore(NDSDisplayID_Touch, 1));
}];
}
}
@ -1653,27 +1584,30 @@
}
// Update the texture coordinates
pthread_mutex_lock(&_mutexTexProcessUpdate);
cdp->SetScreenTextureCoordinates((float)width[NDSDisplayID_Main], (float)height[NDSDisplayID_Main],
(float)width[NDSDisplayID_Touch], (float)height[NDSDisplayID_Touch],
(float *)[_displayTexCoordBuffer contents]);
[_displayTexCoordBuffer didModifyRange:NSMakeRange(0, sizeof(float) * (4 * 8))];
sem_wait(_semTexProcessUpdate);
// Update the frame info
id<MTLTexture> oldDisplayProcessedMain = _processedFrameInfo.tex[NDSDisplayID_Main];
id<MTLTexture> oldDisplayProcessedTouch = _processedFrameInfo.tex[NDSDisplayID_Touch];
_processedFrameInfo.bufferIndex = bufferIndex;
_processedFrameInfo.isMainDisplayProcessed = isDisplayProcessedMain;
_processedFrameInfo.isTouchDisplayProcessed = isDisplayProcessedTouch;
_processedFrameInfo.tex[NDSDisplayID_Main] = [texMain retain];
_processedFrameInfo.tex[NDSDisplayID_Touch] = [texTouch retain];
[self updateTexCoordBuffer];
[oldDisplayProcessedMain release];
[oldDisplayProcessedTouch release];
pthread_mutex_unlock(&_mutexTexProcessUpdate);
sem_post(_semTexProcessUpdate);
}
- (void) updateTexCoordBuffer
{
cdp->SetScreenTextureCoordinates((float)[_processedFrameInfo.tex[NDSDisplayID_Main] width], (float)[_processedFrameInfo.tex[NDSDisplayID_Main] height],
(float)[_processedFrameInfo.tex[NDSDisplayID_Touch] width], (float)[_processedFrameInfo.tex[NDSDisplayID_Touch] height],
(float *)[_displayTexCoordBuffer contents]);
[_displayTexCoordBuffer didModifyRange:NSMakeRange(0, sizeof(float) * (4 * 8))];
}
- (void) updateRenderBuffers
@ -1690,8 +1624,6 @@
newViewport.znear = 0.0;
newViewport.zfar = 1.0;
pthread_mutex_lock(&_mutexBufferUpdate);
if ([self needsViewportUpdate])
{
needEncodeViewport = true;
@ -1790,8 +1722,6 @@
_willDrawHUDInput = cdp->GetHUDShowInput();
_hudStringLength = cdp->GetHUDString().length();
_hudTouchLineLength = hudTouchLineLength;
pthread_mutex_unlock(&_mutexBufferUpdate);
}
- (void) renderForCommandBuffer:(id<MTLCommandBuffer>)cb
@ -1943,6 +1873,17 @@
const size_t clientWidth = cdp->GetPresenterProperties().clientWidth;
const size_t clientHeight = cdp->GetPresenterProperties().clientHeight;
// Create a unique semaphore name based on mach_absolute_time().
char semaphoreName[64];
memset(semaphoreName, '\0', sizeof(semaphoreName));
snprintf(semaphoreName, sizeof(semaphoreName), "desmume_semRenderToBuffer_0x%016llX", (unsigned long long)mach_absolute_time());
sem_t *semRenderToBuffer = sem_open(semaphoreName, O_CREAT, 0777, 1);
if (semRenderToBuffer == SEM_FAILED)
{
puts("desmume_semRenderToBuffer failed!");
}
@autoreleasepool
{
MTLTextureDescriptor *texRenderDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm
@ -1956,19 +1897,13 @@
id<MTLTexture> texRender = [[sharedData device] newTextureWithDescriptor:texRenderDesc];
id<MTLBuffer> dstMTLBuffer = [[sharedData device] newBufferWithLength:clientWidth * clientHeight * sizeof(uint32_t) options:MTLResourceStorageModeManaged];
pthread_mutex_lock(&_mutexTexProcessUpdate);
const bool needsFetchBuffersLock = !_processedFrameInfo.isMainDisplayProcessed || !_processedFrameInfo.isTouchDisplayProcessed;
if (needsFetchBuffersLock)
{
pthread_rwlock_rdlock([sharedData rwlockFramebufferAtIndex:_processedFrameInfo.bufferIndex]);
}
sem_wait(_semTexProcessUpdate);
// Now that everything is set up, go ahead and draw everything.
[colorAttachment0Desc setTexture:texRender];
id<MTLCommandBuffer> cb = [self newCommandBuffer];
pthread_mutex_lock(&_mutexBufferUpdate);
[self updateRenderBuffers];
[self renderForCommandBuffer:cb
outputPipelineState:[self outputRGBAPipeline]
@ -1976,35 +1911,13 @@
texDisplayMain:_processedFrameInfo.tex[NDSDisplayID_Main]
texDisplayTouch:_processedFrameInfo.tex[NDSDisplayID_Touch]];
if (needsFetchBuffersLock)
{
if (_processedFrameInfo.bufferIndex == 0)
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_mutex_unlock(&_mutexBufferUpdate);
pthread_rwlock_unlock([sharedData rwlockFramebufferAtIndex:0]);
pthread_mutex_unlock(&_mutexTexProcessUpdate);
}];
}
else
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_mutex_unlock(&_mutexBufferUpdate);
pthread_rwlock_unlock([sharedData rwlockFramebufferAtIndex:1]);
pthread_mutex_unlock(&_mutexTexProcessUpdate);
}];
}
}
else
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_mutex_unlock(&_mutexBufferUpdate);
pthread_mutex_unlock(&_mutexTexProcessUpdate);
}];
}
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
sem_post(_semTexProcessUpdate);
}];
[cb commit];
sem_wait(semRenderToBuffer);
cb = [self newCommandBuffer];
id<MTLBlitCommandEncoder> bce = [cb blitCommandEncoder];
@ -2021,14 +1934,23 @@
[bce synchronizeResource:dstMTLBuffer];
[bce endEncoding];
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
sem_post(semRenderToBuffer);
}];
[cb commit];
[cb waitUntilCompleted];
// Wait on this thread until the GPU completes its task, then continue execution on this thread.
sem_wait(semRenderToBuffer);
sem_post(semRenderToBuffer);
memcpy(dstBuffer, [dstMTLBuffer contents], clientWidth * clientHeight * sizeof(uint32_t));
[texRender release];
[dstMTLBuffer release];
}
sem_close(semRenderToBuffer);
sem_unlink(semaphoreName);
}
@end
@ -2073,20 +1995,16 @@
{
@autoreleasepool
{
pthread_mutex_lock([presenterObject mutexTexProcessUpdate]);
sem_wait([presenterObject semTexProcessUpdate]);
const MetalProcessedFrameInfo &processedInfo = [presenterObject processedFrameInfo];
const bool needsFetchBuffersLock = !processedInfo.isMainDisplayProcessed || !processedInfo.isTouchDisplayProcessed;
if (needsFetchBuffersLock)
{
pthread_rwlock_rdlock([[presenterObject sharedData] rwlockFramebufferAtIndex:processedInfo.bufferIndex]);
}
// Now that everything is set up, go ahead and draw everything.
id<CAMetalDrawable> layerDrawable = [self nextDrawable];
[[presenterObject colorAttachment0Desc] setTexture:[layerDrawable texture]];
id<MTLCommandBuffer> cb = [presenterObject newCommandBuffer];
pthread_mutex_lock([presenterObject mutexBufferUpdate]);
[presenterObject updateRenderBuffers];
[presenterObject renderForCommandBuffer:cb
outputPipelineState:[presenterObject outputDrawablePipeline]
@ -2096,32 +2014,9 @@
[cb presentDrawable:layerDrawable];
if (needsFetchBuffersLock)
{
if (processedInfo.bufferIndex == 0)
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_mutex_unlock([presenterObject mutexBufferUpdate]);
pthread_rwlock_unlock([[presenterObject sharedData] rwlockFramebufferAtIndex:0]);
pthread_mutex_unlock([presenterObject mutexTexProcessUpdate]);
}];
}
else
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_mutex_unlock([presenterObject mutexBufferUpdate]);
pthread_rwlock_unlock([[presenterObject sharedData] rwlockFramebufferAtIndex:1]);
pthread_mutex_unlock([presenterObject mutexTexProcessUpdate]);
}];
}
}
else
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
pthread_mutex_unlock([presenterObject mutexBufferUpdate]);
pthread_mutex_unlock([presenterObject mutexTexProcessUpdate]);
}];
}
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
sem_post([presenterObject semTexProcessUpdate]);
}];
[cb commit];
}
@ -2265,10 +2160,14 @@ MacMetalDisplayPresenter::~MacMetalDisplayPresenter()
pthread_mutex_destroy(&this->_mutexProcessPtr);
pthread_rwlock_destroy(&this->_cpuFilterRWLock[NDSDisplayID_Main][0]);
pthread_rwlock_destroy(&this->_cpuFilterRWLock[NDSDisplayID_Touch][0]);
pthread_rwlock_destroy(&this->_cpuFilterRWLock[NDSDisplayID_Main][1]);
pthread_rwlock_destroy(&this->_cpuFilterRWLock[NDSDisplayID_Touch][1]);
sem_close(this->_semCPUFilter[NDSDisplayID_Main][0]);
sem_close(this->_semCPUFilter[NDSDisplayID_Main][1]);
sem_close(this->_semCPUFilter[NDSDisplayID_Touch][0]);
sem_close(this->_semCPUFilter[NDSDisplayID_Touch][1]);
sem_unlink("desmume_semCPUFilterMain0");
sem_unlink("desmume_semCPUFilterMain1");
sem_unlink("desmume_semCPUFilterTouch0");
sem_unlink("desmume_semCPUFilterTouch1");
}
void MacMetalDisplayPresenter::__InstanceInit(MacClientSharedObject *sharedObject)
@ -2286,10 +2185,49 @@ void MacMetalDisplayPresenter::__InstanceInit(MacClientSharedObject *sharedObjec
pthread_mutex_init(&_mutexProcessPtr, NULL);
pthread_rwlock_init(&_cpuFilterRWLock[NDSDisplayID_Main][0], NULL);
pthread_rwlock_init(&_cpuFilterRWLock[NDSDisplayID_Touch][0], NULL);
pthread_rwlock_init(&_cpuFilterRWLock[NDSDisplayID_Main][1], NULL);
pthread_rwlock_init(&_cpuFilterRWLock[NDSDisplayID_Touch][1], NULL);
_semCPUFilter[NDSDisplayID_Main][0] = sem_open("desmume_semCPUFilterMain0", O_CREAT | O_EXCL, 0777, 1);
if (_semCPUFilter[NDSDisplayID_Main][0] == SEM_FAILED)
{
sem_unlink("desmume_semCPUFilterMain0");
_semCPUFilter[NDSDisplayID_Main][0] = sem_open("desmume_semCPUFilterMain0", O_CREAT | O_EXCL, 0777, 1);
if (_semCPUFilter[NDSDisplayID_Main][0] == SEM_FAILED)
{
puts("desmume_semCPUFilterMain0 failed!");
}
}
_semCPUFilter[NDSDisplayID_Main][1] = sem_open("desmume_semCPUFilterMain1", O_CREAT | O_EXCL, 0777, 1);
if (_semCPUFilter[NDSDisplayID_Main][1] == SEM_FAILED)
{
sem_unlink("desmume_semCPUFilterMain1");
_semCPUFilter[NDSDisplayID_Main][1] = sem_open("desmume_semCPUFilterMain1", O_CREAT | O_EXCL, 0777, 1);
if (_semCPUFilter[NDSDisplayID_Main][1] == SEM_FAILED)
{
puts("desmume_semCPUFilterMain1 failed!");
}
}
_semCPUFilter[NDSDisplayID_Touch][0] = sem_open("desmume_semCPUFilterTouch0", O_CREAT | O_EXCL, 0777, 1);
if (_semCPUFilter[NDSDisplayID_Touch][0] == SEM_FAILED)
{
sem_unlink("desmume_semCPUFilterTouch0");
_semCPUFilter[NDSDisplayID_Touch][0] = sem_open("desmume_semCPUFilterTouch0", O_CREAT | O_EXCL, 0777, 1);
if (_semCPUFilter[NDSDisplayID_Touch][0] == SEM_FAILED)
{
puts("desmume_semCPUFilterTouch0 failed!");
}
}
_semCPUFilter[NDSDisplayID_Touch][1] = sem_open("desmume_semCPUFilterTouch1", O_CREAT | O_EXCL, 0777, 1);
if (_semCPUFilter[NDSDisplayID_Touch][1] == SEM_FAILED)
{
sem_unlink("desmume_semCPUFilterTouch1");
_semCPUFilter[NDSDisplayID_Touch][1] = sem_open("desmume_semCPUFilterTouch1", O_CREAT | O_EXCL, 0777, 1);
if (_semCPUFilter[NDSDisplayID_Touch][1] == SEM_FAILED)
{
puts("desmume_semCPUFilterTouch1 failed!");
}
}
}
void MacMetalDisplayPresenter::_UpdateNormalSize()
@ -2329,9 +2267,9 @@ void MacMetalDisplayPresenter::_LoadNativeDisplayByID(const NDSDisplayID display
const uint8_t bufferIndex = fetchObjMutable.GetLastFetchIndex();
pthread_rwlock_wrlock(&this->_cpuFilterRWLock[displayID][bufferIndex]);
sem_wait(this->_semCPUFilter[displayID][bufferIndex]);
fetchObjMutable.CopyFromSrcClone(vf->GetSrcBufferPtr(), displayID, bufferIndex);
pthread_rwlock_unlock(&this->_cpuFilterRWLock[displayID][bufferIndex]);
sem_post(this->_semCPUFilter[displayID][bufferIndex]);
}
}
@ -2351,9 +2289,9 @@ pthread_mutex_t* MacMetalDisplayPresenter::GetMutexProcessPtr()
return &this->_mutexProcessPtr;
}
pthread_rwlock_t* MacMetalDisplayPresenter::GetCPUFilterRWLock(const NDSDisplayID displayID, const uint8_t bufferIndex)
sem_t* MacMetalDisplayPresenter::GetCPUFilterSemaphore(const NDSDisplayID displayID, const uint8_t bufferIndex)
{
return &this->_cpuFilterRWLock[displayID][bufferIndex];
return this->_semCPUFilter[displayID][bufferIndex];
}
void MacMetalDisplayPresenter::Init()
@ -2406,11 +2344,6 @@ void MacMetalDisplayPresenter::ProcessDisplays()
[this->_presenterObject processDisplays];
}
void MacMetalDisplayPresenter::UpdateLayout()
{
[this->_presenterObject updateRenderBuffers];
}
void MacMetalDisplayPresenter::CopyFrameToBuffer(uint32_t *dstBuffer)
{
[this->_presenterObject renderToBuffer:dstBuffer];

View File

@ -513,6 +513,19 @@ kernel void nds_fetch666ConvertOnly(const uint2 position [[thread_position_in_gr
outTexture.write(float4(outColor, 1.0f), position);
}
kernel void nds_fetch888PassthroughOnly(const uint2 position [[thread_position_in_grid]],
const texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]])
{
if ( (position.x > inTexture.get_width() - 1) || (position.y > inTexture.get_height() - 1) )
{
return;
}
const float3 outColor = inTexture.read(position).rgb;
outTexture.write(float4(outColor, 1.0f), position);
}
float3 nds_apply_master_brightness(const float3 inColor, const uchar mode, const float intensity)
{
switch (mode)

View File

@ -194,14 +194,14 @@ void MacOGLClientFetchObject::FetchFromBufferIndex(const u8 index)
MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)this->_clientData;
this->_useDirectToCPUFilterPipeline = ([sharedViewObject numberViewsUsingDirectToCPUFiltering] > 0);
pthread_rwlock_rdlock([sharedViewObject rwlockFramebufferAtIndex:index]);
sem_wait([sharedViewObject semaphoreFramebufferAtIndex:index]);
CGLLockContext(this->_context);
CGLSetCurrentContext(this->_context);
this->OGLClientFetchObject::FetchFromBufferIndex(index);
CGLUnlockContext(this->_context);
pthread_rwlock_unlock([sharedViewObject rwlockFramebufferAtIndex:index]);
sem_post([sharedViewObject semaphoreFramebufferAtIndex:index]);
}
GLuint MacOGLClientFetchObject::GetFetchTexture(const NDSDisplayID displayID)
@ -435,7 +435,7 @@ void MacOGLDisplayPresenter::WriteLockEmuFramebuffer(const uint8_t bufferIndex)
const GPUClientFetchObject &fetchObj = this->GetFetchObject();
MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)fetchObj.GetClientData();
pthread_rwlock_wrlock([sharedViewObject rwlockFramebufferAtIndex:bufferIndex]);
sem_wait([sharedViewObject semaphoreFramebufferAtIndex:bufferIndex]);
}
void MacOGLDisplayPresenter::ReadLockEmuFramebuffer(const uint8_t bufferIndex)
@ -443,7 +443,7 @@ void MacOGLDisplayPresenter::ReadLockEmuFramebuffer(const uint8_t bufferIndex)
const GPUClientFetchObject &fetchObj = this->GetFetchObject();
MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)fetchObj.GetClientData();
pthread_rwlock_rdlock([sharedViewObject rwlockFramebufferAtIndex:bufferIndex]);
sem_wait([sharedViewObject semaphoreFramebufferAtIndex:bufferIndex]);
}
void MacOGLDisplayPresenter::UnlockEmuFramebuffer(const uint8_t bufferIndex)
@ -451,7 +451,7 @@ void MacOGLDisplayPresenter::UnlockEmuFramebuffer(const uint8_t bufferIndex)
const GPUClientFetchObject &fetchObj = this->GetFetchObject();
MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)fetchObj.GetClientData();
pthread_rwlock_unlock([sharedViewObject rwlockFramebufferAtIndex:bufferIndex]);
sem_post([sharedViewObject semaphoreFramebufferAtIndex:bufferIndex]);
}
#pragma mark -