Cocoa Port: Rework triple buffering for Metal display views yet again. This should fix the performance regression introduced in commit a65ceae9 for the larger custom framebuffer sizes.

This commit is contained in:
rogerman 2018-01-30 16:26:05 -08:00
parent 2a58246eb5
commit f9c32c9e79
9 changed files with 313 additions and 161 deletions

View File

@ -2,7 +2,7 @@
Copyright (C) 2006 yopyop
Copyright (C) 2006-2007 Theo Berkau
Copyright (C) 2007 shash
Copyright (C) 2008-2017 DeSmuME team
Copyright (C) 2008-2018 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -7578,7 +7578,7 @@ void GPUSubsystem::ForceFrameStop()
if (this->_frameNeedsFinish)
{
this->_frameNeedsFinish = false;
this->_event->DidFrameEnd(false, this->_displayInfo);
this->_event->DidFrameEnd(this->_willFrameSkip, this->_displayInfo);
}
}
@ -7616,8 +7616,6 @@ void GPUSubsystem::UpdateRenderProperties()
this->_engineSub->isLineOutputNative[l] = true;
}
this->_displayInfo.bufferIndex = (this->_displayInfo.bufferIndex + 1) % this->_displayInfo.framebufferPageCount;
const size_t nativeFramebufferSize = GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * this->_displayInfo.pixelBytes;
const size_t customFramebufferSize = this->_displayInfo.customWidth * this->_displayInfo.customHeight * this->_displayInfo.pixelBytes;
@ -8176,14 +8174,7 @@ void GPUSubsystem::RenderLine(const size_t l)
this->_engineSub->ApplySettings();
this->_event->DidApplyGPUSettingsEnd();
u8 targetBufferIndex = this->_displayInfo.bufferIndex;
if ( (l == 0) && !this->_willFrameSkip )
{
targetBufferIndex = (targetBufferIndex + 1) % this->_displayInfo.framebufferPageCount;
}
this->_event->DidFrameBegin(this->_willFrameSkip, targetBufferIndex, l);
this->_event->DidFrameBegin(l, this->_willFrameSkip, this->_displayInfo.framebufferPageCount, this->_displayInfo.bufferIndex);
this->_frameNeedsFinish = true;
}
@ -8375,6 +8366,14 @@ void GPUSubsystem::ClearWithColor(const u16 colorBGRA5551)
}
}
void GPUEventHandlerDefault::DidFrameBegin(const size_t line, const bool isFrameSkipRequested, const size_t pageCount, u8 &selectedBufferIndexInOut)
{
if ( (pageCount > 1) && (line == 0) && !isFrameSkipRequested )
{
selectedBufferIndexInOut = ((selectedBufferIndexInOut + 1) % pageCount);
}
}
GPUClientFetchObject::GPUClientFetchObject()
{
for (size_t i = 0; i < MAX_FRAMEBUFFER_PAGES; i++)

View File

@ -2,7 +2,7 @@
Copyright (C) 2006 yopyop
Copyright (C) 2006-2007 Theo Berkau
Copyright (C) 2007 shash
Copyright (C) 2009-2017 DeSmuME team
Copyright (C) 2009-2018 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -1722,7 +1722,7 @@ public:
class GPUEventHandler
{
public:
virtual void DidFrameBegin(bool isFrameSkipRequested, const u8 targetBufferIndex, const size_t line) = 0;
virtual void DidFrameBegin(const size_t line, const bool isFrameSkipRequested, const size_t pageCount, u8 &selectedBufferIndexInOut) = 0;
virtual void DidFrameEnd(bool isFrameSkipped, const NDSDisplayInfo &latestDisplayInfo) = 0;
virtual void DidRender3DBegin() = 0;
virtual void DidRender3DEnd() = 0;
@ -1738,7 +1738,7 @@ public:
class GPUEventHandlerDefault : public GPUEventHandler
{
public:
virtual void DidFrameBegin(bool isFrameSkipRequested, const u8 targetBufferIndex, const size_t line) {};
virtual void DidFrameBegin(const size_t line, const bool isFrameSkipRequested, const size_t pageCount, u8 &selectedBufferIndexInOut);
virtual void DidFrameEnd(bool isFrameSkipped, const NDSDisplayInfo &latestDisplayInfo) {};
virtual void DidRender3DBegin() {};
virtual void DidRender3DEnd() {};

View File

@ -1,6 +1,6 @@
/*
Copyright (C) 2006 yopyop
Copyright (C) 2008-2017 DeSmuME team
Copyright (C) 2008-2018 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -570,6 +570,8 @@ bool GameInfo::loadROM(std::string fname, u32 type)
void GameInfo::closeROM()
{
GPU->ForceFrameStop();
if (reader)
reader->DeInit(fROM);

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2014-2017 DeSmuME team
Copyright (C) 2014-2018 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -35,7 +35,7 @@
#include "ClientDisplayView.h"
#define OPENGL_FETCH_BUFFER_COUNT 3
#define OPENGL_FETCH_BUFFER_COUNT 2
class OGLVideoOutput;

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2013-2017 DeSmuME team
Copyright (C) 2013-2018 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -47,6 +47,15 @@
#define VIDEO_FLUSH_TIME_LIMIT_OFFSET 8 // The amount of time, in seconds, to wait for a flush to occur on a given CVDisplayLink before stopping it.
enum ClientDisplayBufferState
{
ClientDisplayBufferState_Idle = 0, // The buffer has already been read and is currently idle. It is a candidate for a read or write operation.
ClientDisplayBufferState_Writing = 1, // The buffer is currently being written. It cannot be accessed.
ClientDisplayBufferState_Ready = 2, // The buffer was just written to, but has not been read yet. It is a candidate for a read or write operation.
ClientDisplayBufferState_PendingRead = 3, // The buffer has been marked that it will be read. It must not be accessed.
ClientDisplayBufferState_Reading = 4 // The buffer is currently being read. It cannot be accessed.
};
class GPUEventHandlerOSX;
#ifdef ENABLE_SHARED_FETCH_OBJECT
@ -58,7 +67,11 @@ typedef std::map<CGDirectDisplayID, int64_t> DisplayLinkFlushTimeLimitMap;
{
GPUClientFetchObject *GPUFetchObject;
task_t _taskEmulationLoop;
semaphore_t _semFramebuffer[2];
OSSpinLock _spinlockFramebufferStates[MAX_FRAMEBUFFER_PAGES];
semaphore_t _semFramebuffer[MAX_FRAMEBUFFER_PAGES];
volatile ClientDisplayBufferState _framebufferState[MAX_FRAMEBUFFER_PAGES];
pthread_rwlock_t *_rwlockOutputList;
pthread_mutex_t _mutexDisplayLinkLists;
NSMutableArray *_cdsOutputList;
@ -80,7 +93,10 @@ typedef std::map<CGDirectDisplayID, int64_t> DisplayLinkFlushTimeLimitMap;
- (void) semaphoreFramebufferCreate;
- (void) semaphoreFramebufferDestroy;
- (semaphore_t) semaphoreFramebufferAtIndex:(const u8)bufferIndex;
- (u8) selectBufferIndex:(const u8)currentIndex pageCount:(size_t)pageCount;
- (semaphore_t) semaphoreFramebufferPageAtIndex:(const u8)bufferIndex;
- (ClientDisplayBufferState) framebufferStateAtIndex:(uint8_t)index;
- (void) setFramebufferState:(ClientDisplayBufferState)bufferState index:(uint8_t)index;
- (void) setOutputList:(NSMutableArray *)theOutputList rwlock:(pthread_rwlock_t *)theRWLock;
- (void) incrementViewsUsingDirectToCPUFiltering;

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2013-2017 DeSmuME team
Copyright (C) 2013-2018 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -76,7 +76,7 @@ public:
bool GetRender3DNeedsFinish();
virtual void DidFrameBegin(bool isFrameSkipRequested, const u8 targetBufferIndex, const size_t line);
virtual void DidFrameBegin(const size_t line, const bool isFrameSkipRequested, const size_t pageCount, u8 &selectedBufferIndexInOut);
virtual void DidFrameEnd(bool isFrameSkipped, const NDSDisplayInfo &latestDisplayInfo);
virtual void DidRender3DBegin();
virtual void DidRender3DEnd();
@ -183,12 +183,12 @@ public:
if (fetchObject == NULL)
{
fetchObject = new MacOGLClientFetchObject;
GPU->SetFramebufferPageCount(OPENGL_FETCH_BUFFER_COUNT);
}
fetchObject->Init();
gpuEvent->SetFetchObject(fetchObject);
GPU->SetFramebufferPageCount(OPENGL_FETCH_BUFFER_COUNT);
GPU->SetWillAutoResolveToCustomBuffer(false);
#endif
@ -254,16 +254,22 @@ public:
gpuEvent->FramebufferLock();
#ifdef ENABLE_SHARED_FETCH_OBJECT
semaphore_wait([[self sharedData] semaphoreFramebufferAtIndex:0]);
semaphore_wait([[self sharedData] semaphoreFramebufferAtIndex:1]);
const size_t maxPages = GPU->GetDisplayInfo().framebufferPageCount;
for (size_t i = 0; i < maxPages; i++)
{
semaphore_wait([[self sharedData] semaphoreFramebufferPageAtIndex:i]);
}
#endif
GPU->SetCustomFramebufferSize(w, h);
#ifdef ENABLE_SHARED_FETCH_OBJECT
fetchObject->SetFetchBuffers(GPU->GetDisplayInfo());
semaphore_signal([[self sharedData] semaphoreFramebufferAtIndex:1]);
semaphore_signal([[self sharedData] semaphoreFramebufferAtIndex:0]);
for (size_t i = maxPages - 1; i < maxPages; i--)
{
semaphore_signal([[self sharedData] semaphoreFramebufferPageAtIndex:i]);
}
#endif
gpuEvent->FramebufferUnlock();
@ -315,16 +321,22 @@ public:
if (colorFormat != dispInfo.colorFormat)
{
#ifdef ENABLE_SHARED_FETCH_OBJECT
semaphore_wait([[self sharedData] semaphoreFramebufferAtIndex:0]);
semaphore_wait([[self sharedData] semaphoreFramebufferAtIndex:1]);
const size_t maxPages = GPU->GetDisplayInfo().framebufferPageCount;
for (size_t i = 0; i < maxPages; i++)
{
semaphore_wait([[self sharedData] semaphoreFramebufferPageAtIndex:i]);
}
#endif
GPU->SetColorFormat((NDSColorFormat)colorFormat);
#ifdef ENABLE_SHARED_FETCH_OBJECT
fetchObject->SetFetchBuffers(GPU->GetDisplayInfo());
semaphore_signal([[self sharedData] semaphoreFramebufferAtIndex:1]);
semaphore_signal([[self sharedData] semaphoreFramebufferAtIndex:0]);
for (size_t i = maxPages - 1; i < maxPages; i--)
{
semaphore_signal([[self sharedData] semaphoreFramebufferPageAtIndex:i]);
}
#endif
}
@ -866,15 +878,20 @@ public:
gpuEvent->FramebufferLock();
#ifdef ENABLE_SHARED_FETCH_OBJECT
semaphore_wait([[self sharedData] semaphoreFramebufferAtIndex:0]);
semaphore_wait([[self sharedData] semaphoreFramebufferAtIndex:1]);
const size_t maxPages = GPU->GetDisplayInfo().framebufferPageCount;
for (size_t i = 0; i < maxPages; i++)
{
semaphore_wait([[self sharedData] semaphoreFramebufferPageAtIndex:i]);
}
#endif
GPU->ClearWithColor(colorBGRA5551);
#ifdef ENABLE_SHARED_FETCH_OBJECT
semaphore_signal([[self sharedData] semaphoreFramebufferAtIndex:1]);
semaphore_signal([[self sharedData] semaphoreFramebufferAtIndex:0]);
for (size_t i = maxPages - 1; i < maxPages; i--)
{
semaphore_signal([[self sharedData] semaphoreFramebufferPageAtIndex:i]);
}
#endif
gpuEvent->FramebufferUnlock();
@ -941,9 +958,14 @@ public:
pthread_mutex_init(&_mutexFetchExecute, NULL);
_taskEmulationLoop = 0;
_semFramebuffer[0] = 0;
_semFramebuffer[1] = 0;
for (size_t i = 0; i < MAX_FRAMEBUFFER_PAGES; i++)
{
_semFramebuffer[i] = 0;
_framebufferState[i] = ClientDisplayBufferState_Idle;
_spinlockFramebufferStates[i] = OS_SPINLOCK_INIT;
}
[[NSNotificationCenter defaultCenter] addObserver:self
selector:@selector(respondToScreenChange:)
name:@"NSApplicationDidChangeScreenParametersNotification"
@ -1003,30 +1025,136 @@ public:
- (void) semaphoreFramebufferCreate
{
_taskEmulationLoop = mach_task_self();
semaphore_create(_taskEmulationLoop, &_semFramebuffer[0], SYNC_POLICY_FIFO, 1);
semaphore_create(_taskEmulationLoop, &_semFramebuffer[1], SYNC_POLICY_FIFO, 1);
for (size_t i = 0; i < MAX_FRAMEBUFFER_PAGES; i++)
{
semaphore_create(_taskEmulationLoop, &_semFramebuffer[i], SYNC_POLICY_FIFO, 1);
}
}
- (void) semaphoreFramebufferDestroy
{
if (_semFramebuffer[0] != 0)
for (size_t i = MAX_FRAMEBUFFER_PAGES - 1; i < MAX_FRAMEBUFFER_PAGES; i--)
{
semaphore_destroy(_taskEmulationLoop, _semFramebuffer[0]);
_semFramebuffer[0] = 0;
}
if (_semFramebuffer[1] != 0)
{
semaphore_destroy(_taskEmulationLoop, _semFramebuffer[1]);
_semFramebuffer[1] = 0;
if (_semFramebuffer[i] != 0)
{
semaphore_destroy(_taskEmulationLoop, _semFramebuffer[i]);
_semFramebuffer[i] = 0;
}
}
}
- (semaphore_t) semaphoreFramebufferAtIndex:(const u8)bufferIndex
- (u8) selectBufferIndex:(const u8)currentIndex pageCount:(size_t)pageCount
{
u8 selectedIndex = currentIndex;
bool stillSearching = true;
// First, search for an idle buffer along with its corresponding semaphore.
if (stillSearching)
{
selectedIndex = (selectedIndex + 1) % pageCount;
for (; selectedIndex != currentIndex; selectedIndex = (selectedIndex + 1) % pageCount)
{
if ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle)
{
stillSearching = false;
break;
}
}
}
// Next, search for either an idle or a ready buffer along with its corresponding semaphore.
if (stillSearching)
{
selectedIndex = (selectedIndex + 1) % pageCount;
for (size_t spin = 0; spin < 100ULL * pageCount; selectedIndex = (selectedIndex + 1) % pageCount, spin++)
{
if ( ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle) ||
(([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Ready) && (selectedIndex != currentIndex)) )
{
stillSearching = false;
break;
}
}
}
// Since the most available buffers couldn't be taken, we're going to spin for some finite
// period of time until an idle buffer emerges. If that happens, then force wait on the
// buffer's corresponding semaphore.
if (stillSearching)
{
selectedIndex = (selectedIndex + 1) % pageCount;
for (size_t spin = 0; spin < 10000ULL * pageCount; selectedIndex = (selectedIndex + 1) % pageCount, spin++)
{
if ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle)
{
stillSearching = false;
break;
}
}
}
// In an effort to find something that is likely to be available shortly in the future,
// search for any idle, ready or reading buffer, and then force wait on its corresponding
// semaphore.
if (stillSearching)
{
selectedIndex = (selectedIndex + 1) % pageCount;
for (; selectedIndex != currentIndex; selectedIndex = (selectedIndex + 1) % pageCount)
{
if ( ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle) ||
([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Ready) ||
([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Reading) )
{
stillSearching = false;
break;
}
}
}
// As a last resort, search for any buffer that is not currently writing, and then force wait
// on its corresponding semaphore.
if (stillSearching)
{
selectedIndex = (selectedIndex + 1) % pageCount;
for (; selectedIndex != currentIndex; selectedIndex = (selectedIndex + 1) % pageCount)
{
if ( ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle) ||
([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Ready) ||
([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Reading) ||
([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_PendingRead) )
{
stillSearching = false;
break;
}
}
}
return selectedIndex;
}
- (semaphore_t) semaphoreFramebufferPageAtIndex:(const u8)bufferIndex
{
assert(bufferIndex < MAX_FRAMEBUFFER_PAGES);
return _semFramebuffer[bufferIndex];
}
- (ClientDisplayBufferState) framebufferStateAtIndex:(uint8_t)index
{
OSSpinLockLock(&_spinlockFramebufferStates[index]);
const ClientDisplayBufferState bufferState = _framebufferState[index];
OSSpinLockUnlock(&_spinlockFramebufferStates[index]);
return bufferState;
}
- (void) setFramebufferState:(ClientDisplayBufferState)bufferState index:(uint8_t)index
{
OSSpinLockLock(&_spinlockFramebufferStates[index]);
_framebufferState[index] = bufferState;
OSSpinLockUnlock(&_spinlockFramebufferStates[index]);
}
- (void) setOutputList:(NSMutableArray *)theOutputList rwlock:(pthread_rwlock_t *)theRWLock
{
pthread_rwlock_t *currentRWLock = _rwlockOutputList;
@ -1290,7 +1418,7 @@ void GPUEventHandlerOSX::SetFetchObject(GPUClientFetchObject *fetchObject)
this->_fetchObject = fetchObject;
}
void GPUEventHandlerOSX::DidFrameBegin(bool isFrameSkipRequested, const u8 targetBufferIndex, const size_t line)
void GPUEventHandlerOSX::DidFrameBegin(const size_t line, const bool isFrameSkipRequested, const size_t pageCount, u8 &selectedBufferIndexInOut)
{
this->FramebufferLock();
@ -1298,7 +1426,14 @@ void GPUEventHandlerOSX::DidFrameBegin(bool isFrameSkipRequested, const u8 targe
if (!isFrameSkipRequested)
{
MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)this->_fetchObject->GetClientData();
semaphore_wait([sharedViewObject semaphoreFramebufferAtIndex:targetBufferIndex]);
if ( (pageCount > 1) && (line == 0) )
{
selectedBufferIndexInOut = [sharedViewObject selectBufferIndex:selectedBufferIndexInOut pageCount:pageCount];
}
semaphore_wait([sharedViewObject semaphoreFramebufferPageAtIndex:selectedBufferIndexInOut]);
[sharedViewObject setFramebufferState:ClientDisplayBufferState_Writing index:selectedBufferIndexInOut];
}
#endif
}
@ -1310,7 +1445,8 @@ void GPUEventHandlerOSX::DidFrameEnd(bool isFrameSkipped, const NDSDisplayInfo &
if (!isFrameSkipped)
{
this->_fetchObject->SetFetchDisplayInfo(latestDisplayInfo);
semaphore_signal([sharedViewObject semaphoreFramebufferAtIndex:latestDisplayInfo.bufferIndex]);
[sharedViewObject setFramebufferState:ClientDisplayBufferState_Ready index:latestDisplayInfo.bufferIndex];
semaphore_signal([sharedViewObject semaphoreFramebufferPageAtIndex:latestDisplayInfo.bufferIndex]);
}
#endif

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2017 DeSmuME team
Copyright (C) 2017-2018 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -36,16 +36,7 @@
#endif
#define METAL_FETCH_BUFFER_COUNT 3
#define RENDER_BUFFER_COUNT 4
enum ClientDisplayBufferState
{
ClientDisplayBufferState_Idle = 0, // The buffer has already been read and is currently idle. It is a candidate for a read or write operation.
ClientDisplayBufferState_Writing = 1, // The buffer is currently being written. It cannot be accessed.
ClientDisplayBufferState_Ready = 2, // The buffer was just written to, but has not been read yet. It is a candidate for a read or write operation.
ClientDisplayBufferState_PendingRead = 3, // The buffer has been marked that it will be read. It must not be accessed.
ClientDisplayBufferState_Reading = 4 // The buffer is currently being read. It cannot be accessed.
};
#define RENDER_BUFFER_COUNT 6
class MacMetalFetchObject;
class MacMetalDisplayPresenter;
@ -89,6 +80,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
@interface MetalDisplayViewSharedData : MacClientSharedObject
{
id<MTLDevice> device;
id<MTLCommandQueue> _fetchCommandQueue;
id<MTLCommandQueue> commandQueue;
id<MTLLibrary> defaultLibrary;
@ -124,6 +116,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
MetalTexturePair texPairFetch;
id<MTLBlitCommandEncoder> bceFetch;
BOOL willFetchImmediate;
id<MTLTexture> texLQ2xLUT;
id<MTLTexture> texHQ2xLUT;
@ -156,6 +149,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
@property (assign) MetalTexturePair texPairFetch;
@property (assign) id<MTLBlitCommandEncoder> bceFetch;
@property (assign) BOOL willFetchImmediate;
@property (readonly, nonatomic) id<MTLTexture> texLQ2xLUT;
@property (readonly, nonatomic) id<MTLTexture> texHQ2xLUT;
@ -215,7 +209,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
OSSpinLock _spinlockRenderBufferStates[RENDER_BUFFER_COUNT];
dispatch_semaphore_t _semRenderBuffers[RENDER_BUFFER_COUNT];
ClientDisplayBufferState _renderBufferState[RENDER_BUFFER_COUNT];
volatile ClientDisplayBufferState _renderBufferState[RENDER_BUFFER_COUNT];
MetalTexturePair texPairProcess;
MetalRenderFrameInfo renderFrameInfo;

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2017 DeSmuME team
Copyright (C) 2017-2018 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -37,6 +37,7 @@
@synthesize texPairFetch;
@synthesize bceFetch;
@synthesize willFetchImmediate;
@synthesize texLQ2xLUT;
@synthesize texHQ2xLUT;
@ -66,6 +67,7 @@
[device retain];
commandQueue = [device newCommandQueue];
_fetchCommandQueue = [device newCommandQueue];
defaultLibrary = [device newDefaultLibrary];
_fetch555Pipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch555"] error:nil] retain];
_fetch666Pipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666"] error:nil] retain];
@ -148,7 +150,7 @@
idxBufferPtr[j+5] = k+0;
}
id<MTLCommandBuffer> cb = [commandQueue commandBufferWithUnretainedReferences];;
id<MTLCommandBuffer> cb = [_fetchCommandQueue commandBufferWithUnretainedReferences];;
id<MTLBlitCommandEncoder> bce = [cb blitCommandEncoder];
[bce copyFromBuffer:tempHUDIndexBuffer
@ -235,9 +237,10 @@
texPairFetch.main = [_texDisplayPostprocessNative[NDSDisplayID_Main][0] retain];
texPairFetch.touch = [_texDisplayPostprocessNative[NDSDisplayID_Touch][0] retain];
bceFetch = nil;
willFetchImmediate = YES;
// Set up the HQnx LUT textures.
SetupHQnxLUTs_Metal(device, commandQueue, texLQ2xLUT, texHQ2xLUT, texHQ3xLUT, texHQ4xLUT);
SetupHQnxLUTs_Metal(device, _fetchCommandQueue, texLQ2xLUT, texHQ2xLUT, texHQ3xLUT, texHQ4xLUT);
texCurrentHQnxLUT = nil;
return self;
@ -248,6 +251,7 @@
[device release];
[commandQueue release];
[_fetchCommandQueue release];
[defaultLibrary release];
[_fetch555Pipeline release];
[_fetch666Pipeline release];
@ -426,10 +430,9 @@
const size_t th = _fetchThreadsPerGroup.height;
_fetchThreadGroupsPerGridCustom = MTLSizeMake((w + tw - 1) / tw, (h + th - 1) / th, 1);
id<MTLCommandBuffer> cb = [commandQueue commandBufferWithUnretainedReferences];
id<MTLCommandBuffer> cb = [_fetchCommandQueue commandBufferWithUnretainedReferences];
MetalTexturePair newTexPair = [self setFetchTextureBindingsAtIndex:dispInfo.bufferIndex commandBuffer:cb];
[cb commit];
[cb waitUntilCompleted];
const MetalTexturePair oldTexPair = [self texPairFetch];
@ -625,108 +628,107 @@
- (void) fetchFromBufferIndex:(const u8)index
{
id<MTLCommandBuffer> cb = [commandQueue commandBufferWithUnretainedReferences];
id<MTLCommandBuffer> cb = [_fetchCommandQueue commandBufferWithUnretainedReferences];
[cb enqueue];
[self setWillFetchImmediate:YES];
if (!_isSharedBufferTextureSupported)
{
/*
semaphore_wait([self semaphoreFramebufferPageAtIndex:index]);
[self setFramebufferState:ClientDisplayBufferState_Reading index:index];
id<MTLBlitCommandEncoder> bce = [cb blitCommandEncoder];
[self setBceFetch:bce];
*/
semaphore_wait([self semaphoreFramebufferAtIndex:index]);
GPUFetchObject->GPUClientFetchObject::FetchFromBufferIndex(index);
semaphore_signal([self semaphoreFramebufferAtIndex:index]);
/*
[self setBceFetch:nil];
[bce endEncoding];
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
semaphore_signal([self semaphoreFramebufferAtIndex:index]);
}];
[cb commit];
cb = [commandQueue commandBufferWithUnretainedReferences];
*/
}
else
{
GPUFetchObject->GPUClientFetchObject::FetchFromBufferIndex(index);
if ([self willFetchImmediate])
{
[self setFramebufferState:ClientDisplayBufferState_Idle index:index];
semaphore_signal([self semaphoreFramebufferPageAtIndex:index]);
}
else
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
[self setFramebufferState:ClientDisplayBufferState_Idle index:index];
semaphore_signal([self semaphoreFramebufferPageAtIndex:index]);
}];
[cb commit];
cb = [_fetchCommandQueue commandBufferWithUnretainedReferences];
[cb enqueue];
}
}
const MetalTexturePair newTexPair = [self setFetchTextureBindingsAtIndex:index commandBuffer:cb];
[newTexPair.main retain];
[newTexPair.touch retain];
//[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
const MetalTexturePair oldTexPair = [self texPairFetch];
[newTexPair.main retain];
[newTexPair.touch retain];
[self setTexPairFetch:newTexPair];
[oldTexPair.main release];
[oldTexPair.touch release];
//}];
}];
[cb commit];
}
- (void) fetchNativeDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id<MTLBlitCommandEncoder>)bce
{
if (_isSharedBufferTextureSupported)
{
return;
}
id<MTLTexture> targetDestination = _texDisplayFetchNative[displayID][bufferIndex];
const NDSDisplayInfo &currentDisplayInfo = GPUFetchObject->GetFetchDisplayInfoForBufferIndex(bufferIndex);
[targetDestination replaceRegion:MTLRegionMake2D(0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT)
mipmapLevel:0
withBytes:currentDisplayInfo.nativeBuffer[displayID]
bytesPerRow:_nativeLineSize];
/*
const id<MTLBuffer> targetSource = _bufDisplayFetchNative[displayID][bufferIndex];
[bce copyFromBuffer:targetSource
sourceOffset:0
sourceBytesPerRow:_nativeLineSize
sourceBytesPerImage:_nativeBufferSize
sourceSize:MTLSizeMake(GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 1)
toTexture:targetDestination
destinationSlice:0
destinationLevel:0
destinationOrigin:MTLOriginMake(0, 0, 0)];
*/
if ([self willFetchImmediate])
{
[targetDestination replaceRegion:MTLRegionMake2D(0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT)
mipmapLevel:0
withBytes:currentDisplayInfo.nativeBuffer[displayID]
bytesPerRow:_nativeLineSize];
}
else
{
[bce copyFromBuffer:_bufDisplayFetchNative[displayID][bufferIndex]
sourceOffset:0
sourceBytesPerRow:_nativeLineSize
sourceBytesPerImage:_nativeBufferSize
sourceSize:MTLSizeMake(GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 1)
toTexture:targetDestination
destinationSlice:0
destinationLevel:0
destinationOrigin:MTLOriginMake(0, 0, 0)];
}
}
- (void) fetchCustomDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id<MTLBlitCommandEncoder>)bce
{
if (_isSharedBufferTextureSupported)
{
return;
}
const NDSDisplayInfo &currentDisplayInfo = GPUFetchObject->GetFetchDisplayInfoForBufferIndex(bufferIndex);
id<MTLTexture> targetDestination = _texDisplayFetchCustom[displayID][bufferIndex];
[targetDestination replaceRegion:MTLRegionMake2D(0, 0, currentDisplayInfo.customWidth, currentDisplayInfo.customHeight)
mipmapLevel:0
withBytes:currentDisplayInfo.customBuffer[displayID]
bytesPerRow:_customLineSize];
/*
const id<MTLBuffer> targetSource = _bufDisplayFetchCustom[displayID][bufferIndex];
[bce copyFromBuffer:targetSource
sourceOffset:0
sourceBytesPerRow:_customLineSize
sourceBytesPerImage:_customBufferSize
sourceSize:MTLSizeMake(currentDisplayInfo.customWidth, currentDisplayInfo.customHeight, 1)
toTexture:targetDestination
destinationSlice:0
destinationLevel:0
destinationOrigin:MTLOriginMake(0, 0, 0)];
*/
if ( (currentDisplayInfo.customWidth < GPU_FRAMEBUFFER_NATIVE_WIDTH * 5) && (currentDisplayInfo.customHeight < GPU_FRAMEBUFFER_NATIVE_HEIGHT * 5) )
{
[targetDestination replaceRegion:MTLRegionMake2D(0, 0, currentDisplayInfo.customWidth, currentDisplayInfo.customHeight)
mipmapLevel:0
withBytes:currentDisplayInfo.customBuffer[displayID]
bytesPerRow:_customLineSize];
}
else
{
[self setWillFetchImmediate:NO];
[bce copyFromBuffer:_bufDisplayFetchCustom[displayID][bufferIndex]
sourceOffset:0
sourceBytesPerRow:_customLineSize
sourceBytesPerImage:_customBufferSize
sourceSize:MTLSizeMake(currentDisplayInfo.customWidth, currentDisplayInfo.customHeight, 1)
toTexture:targetDestination
destinationSlice:0
destinationLevel:0
destinationOrigin:MTLOriginMake(0, 0, 0)];
}
}
@end
@ -1385,10 +1387,10 @@
[cce endEncoding];
[newTexProcess.main retain];
[newTexProcess.touch retain];
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
[newTexProcess.main retain];
[newTexProcess.touch retain];
const MetalTexturePair oldTexPair = [self texPairProcess];
[self setTexPairProcess:newTexProcess];
[oldTexPair.main release];
@ -1547,15 +1549,16 @@
}
}
[newTexProcess.main retain];
[newTexProcess.touch retain];
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
[newTexProcess.main retain];
[newTexProcess.touch retain];
const MetalTexturePair oldTexPair = [self texPairProcess];
[self setTexPairProcess:newTexProcess];
[oldTexPair.main release];
[oldTexPair.touch release];
}];
[cb commit];
if ([self needsProcessFrameWait])
@ -1626,10 +1629,10 @@
newTexProcess.touch = _texDisplayPixelScaler[NDSDisplayID_Touch];
}
[newTexProcess.main retain];
[newTexProcess.touch retain];
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
[newTexProcess.main retain];
[newTexProcess.touch retain];
const MetalTexturePair oldTexPair = [self texPairProcess];
[self setTexPairProcess:newTexProcess];
[oldTexPair.main release];
@ -1650,12 +1653,12 @@
}
else
{
[newTexProcess.main retain];
[newTexProcess.touch retain];
id<MTLCommandBuffer> cb = [self newCommandBuffer];
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
[newTexProcess.main retain];
[newTexProcess.touch retain];
const MetalTexturePair oldTexPair = [self texPairProcess];
[self setTexPairProcess:newTexProcess];
[oldTexPair.main release];
@ -1781,7 +1784,7 @@
if (stillSearching)
{
selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT;
for (size_t spin = 0; spin < 10ULL * RENDER_BUFFER_COUNT; selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT, spin++)
for (size_t spin = 0; spin < 100ULL * RENDER_BUFFER_COUNT; selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT, spin++)
{
if ( ([self renderBufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle) ||
(([self renderBufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Ready) && (selectedIndex != mrfi.renderIndex)) )
@ -1802,7 +1805,7 @@
if (stillSearching)
{
selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT;
for (size_t spin = 0; spin < 100000ULL * RENDER_BUFFER_COUNT; selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT, spin++)
for (size_t spin = 0; spin < 10000ULL * RENDER_BUFFER_COUNT; selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT, spin++)
{
if ([self renderBufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle)
{

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2017 DeSmuME team
Copyright (C) 2017-2018 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -195,14 +195,16 @@ void MacOGLClientFetchObject::FetchFromBufferIndex(const u8 index)
MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)this->_clientData;
this->_useDirectToCPUFilterPipeline = ([sharedViewObject numberViewsUsingDirectToCPUFiltering] > 0);
semaphore_wait([sharedViewObject semaphoreFramebufferAtIndex:index]);
semaphore_wait([sharedViewObject semaphoreFramebufferPageAtIndex:index]);
[sharedViewObject setFramebufferState:ClientDisplayBufferState_Reading index:index];
CGLLockContext(this->_context);
CGLSetCurrentContext(this->_context);
this->OGLClientFetchObject::FetchFromBufferIndex(index);
CGLUnlockContext(this->_context);
semaphore_signal([sharedViewObject semaphoreFramebufferAtIndex:index]);
[sharedViewObject setFramebufferState:ClientDisplayBufferState_Idle index:index];
semaphore_signal([sharedViewObject semaphoreFramebufferPageAtIndex:index]);
}
GLuint MacOGLClientFetchObject::GetFetchTexture(const NDSDisplayID displayID)
@ -436,7 +438,7 @@ void MacOGLDisplayPresenter::WriteLockEmuFramebuffer(const uint8_t bufferIndex)
const GPUClientFetchObject &fetchObj = this->GetFetchObject();
MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)fetchObj.GetClientData();
semaphore_wait([sharedViewObject semaphoreFramebufferAtIndex:bufferIndex]);
semaphore_wait([sharedViewObject semaphoreFramebufferPageAtIndex:bufferIndex]);
}
void MacOGLDisplayPresenter::ReadLockEmuFramebuffer(const uint8_t bufferIndex)
@ -444,7 +446,7 @@ void MacOGLDisplayPresenter::ReadLockEmuFramebuffer(const uint8_t bufferIndex)
const GPUClientFetchObject &fetchObj = this->GetFetchObject();
MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)fetchObj.GetClientData();
semaphore_wait([sharedViewObject semaphoreFramebufferAtIndex:bufferIndex]);
semaphore_wait([sharedViewObject semaphoreFramebufferPageAtIndex:bufferIndex]);
}
void MacOGLDisplayPresenter::UnlockEmuFramebuffer(const uint8_t bufferIndex)
@ -452,7 +454,7 @@ void MacOGLDisplayPresenter::UnlockEmuFramebuffer(const uint8_t bufferIndex)
const GPUClientFetchObject &fetchObj = this->GetFetchObject();
MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)fetchObj.GetClientData();
semaphore_signal([sharedViewObject semaphoreFramebufferAtIndex:bufferIndex]);
semaphore_signal([sharedViewObject semaphoreFramebufferPageAtIndex:bufferIndex]);
}
#pragma mark -