From da3970d817ec2345de888d6e80a4649d3b7f138e Mon Sep 17 00:00:00 2001 From: rogerman Date: Thu, 25 Oct 2018 01:05:36 -0700 Subject: [PATCH] Cocoa Port: Improve the performance and robustness of Metal display views. - Add a unique sequence number to fetched frames to ensure that older frames are not drawn after newer frames. - After much research, finally settle on a method for fetching the NDS framebuffers -- using a MTLBlitCommandEncoder to blit a MTLBuffer to a MTLTexture. It is faster than uploading a texture using [id replaceRegion:mipmapLevel:withBytes:bytesPerRow:], and also faster than using a pinned-memory backed linear texture. This method will be the way going forward for fetching framebuffers in Metal. --- desmume/src/GPU.cpp | 13 +- desmume/src/GPU.h | 7 +- desmume/src/frontend/cocoa/cocoa_GPU.mm | 18 -- .../cocoa/userinterface/MacMetalDisplayView.h | 28 +- .../userinterface/MacMetalDisplayView.mm | 265 +++++++----------- 5 files changed, 126 insertions(+), 205 deletions(-) diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index 01e6bce2b..810995681 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -8104,7 +8104,7 @@ GPUSubsystem::GPUSubsystem() _pending3DRendererID = RENDERID_NULL; _needChange3DRenderer = false; - _videoFrameCount = 0; + _videoFrameIndex = 0; _render3DFrameCount = 0; _frameNeedsFinish = false; _willFrameSkip = false; @@ -8134,6 +8134,7 @@ GPUSubsystem::GPUSubsystem() _displayInfo.isDisplayEnabled[NDSDisplayID_Touch] = true; _displayInfo.bufferIndex = 0; + _displayInfo.sequenceNumber = 0; _displayInfo.masterNativeBuffer = _masterFramebuffer; _displayInfo.masterCustomBuffer = (u8 *)_masterFramebuffer + (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2 * _displayInfo.pixelBytes); @@ -8205,12 +8206,12 @@ GPUSubsystem::~GPUSubsystem() void GPUSubsystem::_UpdateFPSRender3D() { - this->_videoFrameCount++; - if (this->_videoFrameCount == 60) + this->_videoFrameIndex++; + if (this->_videoFrameIndex == 60) { this->_render3DFrameCount = gfx3d.render3DFrameCount; gfx3d.render3DFrameCount = 0; - this->_videoFrameCount = 0; + this->_videoFrameIndex = 0; } } @@ -8232,7 +8233,7 @@ void GPUSubsystem::Reset() } this->_willFrameSkip = false; - this->_videoFrameCount = 0; + this->_videoFrameIndex = 0; this->_render3DFrameCount = 0; this->_backlightIntensityTotal[NDSDisplayID_Main] = 0.0f; this->_backlightIntensityTotal[NDSDisplayID_Touch] = 0.0f; @@ -8290,6 +8291,7 @@ void GPUSubsystem::ForceFrameStop() if (this->_frameNeedsFinish) { this->_frameNeedsFinish = false; + this->_displayInfo.sequenceNumber++; this->_event->DidFrameEnd(this->_willFrameSkip, this->_displayInfo); } } @@ -9009,6 +9011,7 @@ void GPUSubsystem::RenderLine(const size_t l) if (this->_frameNeedsFinish) { this->_frameNeedsFinish = false; + this->_displayInfo.sequenceNumber++; this->_event->DidFrameEnd(this->_willFrameSkip, this->_displayInfo); } } diff --git a/desmume/src/GPU.h b/desmume/src/GPU.h index 49eb4ce4b..b88393cd2 100644 --- a/desmume/src/GPU.h +++ b/desmume/src/GPU.h @@ -1156,7 +1156,10 @@ typedef struct // Frame render state information. These fields will change per frame, depending on how each display was rendered. - u8 bufferIndex; // Index of this frame's buffer set. + u8 bufferIndex; // Index of a specific framebuffer page for the GPU emulation to write data into. + // Indexing starts at 0, and must be less than framebufferPageCount. + // A specific index can be chosen at the DidFrameBegin event. + size_t sequenceNumber; // A unique number assigned to each frame that increments for each DidFrameEnd event. Never resets. void *masterNativeBuffer; // Pointer to the head of the master native buffer. void *masterCustomBuffer; // Pointer to the head of the master custom buffer. @@ -1765,7 +1768,7 @@ private: int _pending3DRendererID; bool _needChange3DRenderer; - u32 _videoFrameCount; // Internal variable that increments when a video frame is completed. Resets every 60 video frames. + u32 _videoFrameIndex; // Increments whenever a video frame is completed. Resets every 60 video frames. u32 _render3DFrameCount; // The current 3D rendering frame count, saved to this variable once every 60 video frames. bool _frameNeedsFinish; bool _willFrameSkip; diff --git a/desmume/src/frontend/cocoa/cocoa_GPU.mm b/desmume/src/frontend/cocoa/cocoa_GPU.mm index 8bcdf9ff5..7ba7c6d02 100644 --- a/desmume/src/frontend/cocoa/cocoa_GPU.mm +++ b/desmume/src/frontend/cocoa/cocoa_GPU.mm @@ -1235,24 +1235,6 @@ public: } } - // As a last resort, search for any buffer that is not currently writing, and then force wait - // on its corresponding semaphore. - if (stillSearching) - { - selectedIndex = (selectedIndex + 1) % pageCount; - for (; selectedIndex != currentIndex; selectedIndex = (selectedIndex + 1) % pageCount) - { - if ( ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle) || - ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Ready) || - ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Reading) || - ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_PendingRead) ) - { - stillSearching = false; - break; - } - } - } - return selectedIndex; } diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h index 0bfb3faaf..a5daa76ff 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h @@ -42,17 +42,23 @@ class MacMetalFetchObject; class MacMetalDisplayPresenter; class MacMetalDisplayView; -union MetalTexturePair +struct MetalTexturePair { - id tex[2]; + uint8_t bufferIndex; + size_t fetchSequenceNumber; - struct + union { - id main; - id touch; + id tex[2]; + + struct + { + id main; + id touch; + }; }; }; -typedef union MetalTexturePair MetalTexturePair; +typedef struct MetalTexturePair MetalTexturePair; struct MetalRenderFrameInfo { @@ -116,7 +122,6 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; MetalTexturePair texPairFetch; id bceFetch; - BOOL willFetchImmediate; id texLQ2xLUT; id texHQ2xLUT; @@ -124,15 +129,11 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; id texHQ4xLUT; id texCurrentHQnxLUT; - MTLResourceOptions preferredResourceStorageMode; - MTLSize _fetchThreadsPerGroup; MTLSize _fetchThreadGroupsPerGridNative; MTLSize _fetchThreadGroupsPerGridCustom; MTLSize deposterizeThreadsPerGroup; MTLSize deposterizeThreadGroupsPerGrid; - - BOOL _isSharedBufferTextureSupported; } @property (readonly, nonatomic) id device; @@ -149,7 +150,6 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; @property (assign) MetalTexturePair texPairFetch; @property (assign) id bceFetch; -@property (assign) BOOL willFetchImmediate; @property (readonly, nonatomic) id texLQ2xLUT; @property (readonly, nonatomic) id texHQ2xLUT; @@ -163,7 +163,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; @property (readonly, nonatomic) MTLSize deposterizeThreadGroupsPerGrid; - (void) setFetchBuffersWithDisplayInfo:(const NDSDisplayInfo &)dispInfo; -- (MetalTexturePair) setFetchTextureBindingsAtIndex:(const u8)index commandBuffer:(id)cb; +- (MetalTexturePair) setFetchTextureBindingsAtIndex:(const uint8_t)index commandBuffer:(id)cb; - (void) fetchFromBufferIndex:(const u8)index; - (void) fetchNativeDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id)bce; - (void) fetchCustomDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id)bce; @@ -265,6 +265,8 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; MacMetalDisplayPresenterObject *presenterObject; dispatch_semaphore_t _semDrawable; id layerDrawable; + MetalTexturePair _displayTexturePair; + size_t _displaySequenceNumber; } @property (readonly, nonatomic) MacMetalDisplayPresenterObject *presenterObject; diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm index ce64ca16d..322f4dc8f 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm @@ -37,7 +37,6 @@ @synthesize texPairFetch; @synthesize bceFetch; -@synthesize willFetchImmediate; @synthesize texLQ2xLUT; @synthesize texHQ2xLUT; @@ -76,27 +75,6 @@ _fetch666ConvertOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666ConvertOnly"] error:nil] retain]; deposterizePipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"src_filter_deposterize"] error:nil] retain]; - if ( IsOSXVersion(10, 13, 0) || IsOSXVersion(10, 13, 1) || IsOSXVersion(10, 13, 2) || IsOSXVersion(10, 13, 3) || IsOSXVersion(10, 13, 4) ) - { - // On macOS High Sierra, there is currently a bug with newBufferWithBytesNoCopy:length:options:deallocator - // that causes it to crash with MTLResourceStorageModeManaged. So for these macOS versions, replace - // MTLResourceStorageModeManaged with MTLResourceStorageModeShared. While this solution causes a very small - // drop in performance, it is still far superior to use Metal rather than OpenGL. - // - // As of this writing, the current version of macOS is v10.13.1. Disabling MTLResourceStorageModeManaged on - // every point release up to v10.13.4 should, I hope, give Apple enough time to fix their bugs with this! - preferredResourceStorageMode = MTLResourceStorageModeShared; - } - else - { - preferredResourceStorageMode = MTLResourceStorageModeManaged; - } - - // TODO: In practice, linear textures with buffer-backed storage won't actually work since synchronization has - // been removed, so keep this feature disabled until synchronization is reworked. - //_isSharedBufferTextureSupported = IsOSXVersionSupported(10, 13, 0) && (preferredResourceStorageMode == MTLResourceStorageModeManaged); - _isSharedBufferTextureSupported = NO; - size_t tw = GetNearestPositivePOT((uint32_t)[_fetch555Pipeline threadExecutionWidth]); while ( (tw > [_fetch555Pipeline threadExecutionWidth]) || (tw > GPU_FRAMEBUFFER_NATIVE_WIDTH) ) { @@ -197,10 +175,8 @@ width:GPU_FRAMEBUFFER_NATIVE_WIDTH height:GPU_FRAMEBUFFER_NATIVE_HEIGHT mipmapped:NO]; - - [newTexDisplayDesc setResourceOptions:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined]; - [newTexDisplayDesc setStorageMode:MTLStorageModeManaged]; - [newTexDisplayDesc setCpuCacheMode:MTLCPUCacheModeWriteCombined]; + [newTexDisplayDesc setResourceOptions:MTLResourceStorageModePrivate]; + [newTexDisplayDesc setStorageMode:MTLStorageModePrivate]; [newTexDisplayDesc setUsage:MTLTextureUsageShaderRead]; MTLTextureDescriptor *newTexPostprocessDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm @@ -234,10 +210,11 @@ _texDisplayPostprocessCustom[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexPostprocessDesc]; } + texPairFetch.bufferIndex = 0; + texPairFetch.fetchSequenceNumber = 0; texPairFetch.main = [_texDisplayPostprocessNative[NDSDisplayID_Main][0] retain]; texPairFetch.touch = [_texDisplayPostprocessNative[NDSDisplayID_Touch][0] retain]; bceFetch = nil; - willFetchImmediate = YES; // Set up the HQnx LUT textures. SetupHQnxLUTs_Metal(device, _fetchCommandQueue, texLQ2xLUT, texHQ2xLUT, texHQ3xLUT, texHQ4xLUT); @@ -312,10 +289,8 @@ width:GPU_FRAMEBUFFER_NATIVE_WIDTH height:GPU_FRAMEBUFFER_NATIVE_HEIGHT mipmapped:NO]; - - [newTexDisplayNativeDesc setResourceOptions:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined]; - [newTexDisplayNativeDesc setStorageMode:MTLStorageModeManaged]; - [newTexDisplayNativeDesc setCpuCacheMode:MTLCPUCacheModeWriteCombined]; + [newTexDisplayNativeDesc setResourceOptions:MTLResourceStorageModePrivate]; + [newTexDisplayNativeDesc setStorageMode:MTLStorageModePrivate]; [newTexDisplayNativeDesc setUsage:MTLTextureUsageShaderRead]; MTLTextureDescriptor *newTexPostprocessNativeDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm @@ -330,10 +305,8 @@ width:w height:h mipmapped:NO]; - - [newTexDisplayCustomDesc setResourceOptions:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined]; - [newTexDisplayCustomDesc setStorageMode:MTLStorageModeManaged]; - [newTexDisplayCustomDesc setCpuCacheMode:MTLCPUCacheModeWriteCombined]; + [newTexDisplayCustomDesc setResourceOptions:MTLResourceStorageModePrivate]; + [newTexDisplayCustomDesc setStorageMode:MTLStorageModePrivate]; [newTexDisplayCustomDesc setUsage:MTLTextureUsageShaderRead]; MTLTextureDescriptor *newTexPostprocessCustomDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm @@ -380,22 +353,8 @@ [_texDisplayPostprocessNative[NDSDisplayID_Main][i] release]; [_texDisplayPostprocessNative[NDSDisplayID_Touch][i] release]; -#ifdef MAC_OS_X_VERSION_10_13 - if (_isSharedBufferTextureSupported) - { - if (@available(macOS 10_13, *)) - { - _texDisplayFetchNative[NDSDisplayID_Main][i] = [_bufDisplayFetchNative[NDSDisplayID_Main][i] newTextureWithDescriptor:newTexDisplayNativeDesc offset:0 bytesPerRow:_nativeLineSize]; - _texDisplayFetchNative[NDSDisplayID_Touch][i] = [_bufDisplayFetchNative[NDSDisplayID_Touch][i] newTextureWithDescriptor:newTexDisplayNativeDesc offset:0 bytesPerRow:_nativeLineSize]; - } - } - else -#endif - { - _texDisplayFetchNative[NDSDisplayID_Main][i] = [device newTextureWithDescriptor:newTexDisplayNativeDesc]; - _texDisplayFetchNative[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexDisplayNativeDesc]; - } - + _texDisplayFetchNative[NDSDisplayID_Main][i] = [device newTextureWithDescriptor:newTexDisplayNativeDesc]; + _texDisplayFetchNative[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexDisplayNativeDesc]; _texDisplayPostprocessNative[NDSDisplayID_Main][i] = [device newTextureWithDescriptor:newTexPostprocessNativeDesc]; _texDisplayPostprocessNative[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexPostprocessNativeDesc]; } @@ -409,22 +368,8 @@ [_texDisplayPostprocessCustom[NDSDisplayID_Main][i] release]; [_texDisplayPostprocessCustom[NDSDisplayID_Touch][i] release]; -#ifdef MAC_OS_X_VERSION_10_13 - if (_isSharedBufferTextureSupported) - { - if (@available(macOS 10_13, *)) - { - _texDisplayFetchCustom[NDSDisplayID_Main][i] = [_bufDisplayFetchCustom[NDSDisplayID_Main][i] newTextureWithDescriptor:newTexDisplayCustomDesc offset:0 bytesPerRow:_customLineSize]; - _texDisplayFetchCustom[NDSDisplayID_Touch][i] = [_bufDisplayFetchCustom[NDSDisplayID_Touch][i] newTextureWithDescriptor:newTexDisplayCustomDesc offset:0 bytesPerRow:_customLineSize]; - } - } - else -#endif - { - _texDisplayFetchCustom[NDSDisplayID_Main][i] = [device newTextureWithDescriptor:newTexDisplayCustomDesc]; - _texDisplayFetchCustom[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexDisplayCustomDesc]; - } - + _texDisplayFetchCustom[NDSDisplayID_Main][i] = [device newTextureWithDescriptor:newTexDisplayCustomDesc]; + _texDisplayFetchCustom[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexDisplayCustomDesc]; _texDisplayPostprocessCustom[NDSDisplayID_Main][i] = [device newTextureWithDescriptor:newTexPostprocessCustomDesc]; _texDisplayPostprocessCustom[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexPostprocessCustomDesc]; } @@ -450,13 +395,14 @@ [oldTexPair.touch release]; } -- (MetalTexturePair) setFetchTextureBindingsAtIndex:(const u8)index commandBuffer:(id)cb +- (MetalTexturePair) setFetchTextureBindingsAtIndex:(const uint8_t)index commandBuffer:(id)cb { - MetalTexturePair targetTexPair = {nil, nil}; const NDSDisplayInfo ¤tDisplayInfo = GPUFetchObject->GetFetchDisplayInfoForBufferIndex(index); const bool isMainEnabled = currentDisplayInfo.isDisplayEnabled[NDSDisplayID_Main]; const bool isTouchEnabled = currentDisplayInfo.isDisplayEnabled[NDSDisplayID_Touch]; + MetalTexturePair targetTexPair = {index, currentDisplayInfo.sequenceNumber, nil, nil}; + if (isMainEnabled || isTouchEnabled) { if (isMainEnabled) @@ -637,37 +583,24 @@ id cb = [_fetchCommandQueue commandBufferWithUnretainedReferences]; [cb enqueue]; - [self setWillFetchImmediate:YES]; + semaphore_wait([self semaphoreFramebufferPageAtIndex:index]); + [self setFramebufferState:ClientDisplayBufferState_Reading index:index]; - if (!_isSharedBufferTextureSupported) - { - semaphore_wait([self semaphoreFramebufferPageAtIndex:index]); - [self setFramebufferState:ClientDisplayBufferState_Reading index:index]; - - id bce = [cb blitCommandEncoder]; - [self setBceFetch:bce]; - GPUFetchObject->GPUClientFetchObject::FetchFromBufferIndex(index); - [self setBceFetch:nil]; - [bce endEncoding]; - - if ([self willFetchImmediate]) - { - [self setFramebufferState:ClientDisplayBufferState_Idle index:index]; - semaphore_signal([self semaphoreFramebufferPageAtIndex:index]); - } - else - { - [cb addCompletedHandler:^(id block) { - [self setFramebufferState:ClientDisplayBufferState_Idle index:index]; - semaphore_signal([self semaphoreFramebufferPageAtIndex:index]); - }]; - - [cb commit]; - - cb = [_fetchCommandQueue commandBufferWithUnretainedReferences]; - [cb enqueue]; - } - } + id bce = [cb blitCommandEncoder]; + [self setBceFetch:bce]; + GPUFetchObject->GPUClientFetchObject::FetchFromBufferIndex(index); + [self setBceFetch:nil]; + [bce endEncoding]; + + [cb addCompletedHandler:^(id block) { + [self setFramebufferState:ClientDisplayBufferState_Idle index:index]; + semaphore_signal([self semaphoreFramebufferPageAtIndex:index]); + }]; + + [cb commit]; + + cb = [_fetchCommandQueue commandBufferWithUnretainedReferences]; + [cb enqueue]; const MetalTexturePair newTexPair = [self setFetchTextureBindingsAtIndex:index commandBuffer:cb]; [newTexPair.main retain]; @@ -685,56 +618,43 @@ - (void) fetchNativeDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id)bce { - id targetDestination = _texDisplayFetchNative[displayID][bufferIndex]; - const NDSDisplayInfo ¤tDisplayInfo = GPUFetchObject->GetFetchDisplayInfoForBufferIndex(bufferIndex); + if (bce == nil) + { + return; + } - if ([self willFetchImmediate]) - { - [targetDestination replaceRegion:MTLRegionMake2D(0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT) - mipmapLevel:0 - withBytes:currentDisplayInfo.nativeBuffer[displayID] - bytesPerRow:_nativeLineSize]; - } - else - { - [bce copyFromBuffer:_bufDisplayFetchNative[displayID][bufferIndex] - sourceOffset:0 - sourceBytesPerRow:_nativeLineSize - sourceBytesPerImage:_nativeBufferSize - sourceSize:MTLSizeMake(GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 1) - toTexture:targetDestination - destinationSlice:0 - destinationLevel:0 - destinationOrigin:MTLOriginMake(0, 0, 0)]; - } + id targetDestination = _texDisplayFetchNative[displayID][bufferIndex]; + + [bce copyFromBuffer:_bufDisplayFetchNative[displayID][bufferIndex] + sourceOffset:0 + sourceBytesPerRow:_nativeLineSize + sourceBytesPerImage:_nativeBufferSize + sourceSize:MTLSizeMake(GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 1) + toTexture:targetDestination + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; } - (void) fetchCustomDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id)bce { + if (bce == nil) + { + return; + } + const NDSDisplayInfo ¤tDisplayInfo = GPUFetchObject->GetFetchDisplayInfoForBufferIndex(bufferIndex); id targetDestination = _texDisplayFetchCustom[displayID][bufferIndex]; - if ( (currentDisplayInfo.customWidth < GPU_FRAMEBUFFER_NATIVE_WIDTH * 5) && (currentDisplayInfo.customHeight < GPU_FRAMEBUFFER_NATIVE_HEIGHT * 5) ) - { - [targetDestination replaceRegion:MTLRegionMake2D(0, 0, currentDisplayInfo.customWidth, currentDisplayInfo.customHeight) - mipmapLevel:0 - withBytes:currentDisplayInfo.customBuffer[displayID] - bytesPerRow:_customLineSize]; - } - else - { - [self setWillFetchImmediate:NO]; - - [bce copyFromBuffer:_bufDisplayFetchCustom[displayID][bufferIndex] - sourceOffset:0 - sourceBytesPerRow:_customLineSize - sourceBytesPerImage:_customBufferSize - sourceSize:MTLSizeMake(currentDisplayInfo.customWidth, currentDisplayInfo.customHeight, 1) - toTexture:targetDestination - destinationSlice:0 - destinationLevel:0 - destinationOrigin:MTLOriginMake(0, 0, 0)]; - } + [bce copyFromBuffer:_bufDisplayFetchCustom[displayID][bufferIndex] + sourceOffset:0 + sourceBytesPerRow:_customLineSize + sourceBytesPerImage:_customBufferSize + sourceSize:MTLSizeMake(currentDisplayInfo.customWidth, currentDisplayInfo.customHeight, 1) + toTexture:targetDestination + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; } - (void) flushMultipleViews:(const std::vector &)cdvFlushList @@ -851,6 +771,8 @@ needsScreenVerticesUpdate = YES; needsHUDVerticesUpdate = YES; + texPairProcess.bufferIndex = 0; + texPairProcess.fetchSequenceNumber = 0; texPairProcess.main = nil; texPairProcess.touch = nil; @@ -1178,8 +1100,11 @@ _hudTexCoordBuffer[i] = [[sharedData device] newBufferWithLength:HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE options:MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined]; } - texPairProcess.main = [[sharedData texPairFetch].main retain]; - texPairProcess.touch = [[sharedData texPairFetch].touch retain]; + MetalTexturePair texPairFetch = [sharedData texPairFetch]; + texPairProcess.bufferIndex = texPairFetch.bufferIndex; + texPairProcess.fetchSequenceNumber = texPairFetch.fetchSequenceNumber; + texPairProcess.main = [texPairFetch.main retain]; + texPairProcess.touch = [texPairFetch.touch retain]; VideoFilter *vfMain = cdp->GetPixelScalerObject(NDSDisplayID_Main); _bufCPUFilterSrcMain = [[sharedData device] newBufferWithBytesNoCopy:vfMain->GetSrcBufferPtr() @@ -1322,16 +1247,16 @@ - (void) processDisplays { - const uint8_t bufferIndex = [sharedData GPUFetchObject]->GetLastFetchIndex(); - const NDSDisplayInfo &fetchDisplayInfo = [sharedData GPUFetchObject]->GetFetchDisplayInfoForBufferIndex(bufferIndex); + const MetalTexturePair texFetch = [sharedData texPairFetch]; + const NDSDisplayInfo &fetchDisplayInfo = [sharedData GPUFetchObject]->GetFetchDisplayInfoForBufferIndex(texFetch.bufferIndex); const ClientDisplayMode mode = cdp->GetPresenterProperties().mode; const bool useDeposterize = cdp->GetSourceDeposterize(); const NDSDisplayID selectedDisplaySourceMain = cdp->GetSelectedDisplaySourceForDisplay(NDSDisplayID_Main); const NDSDisplayID selectedDisplaySourceTouch = cdp->GetSelectedDisplaySourceForDisplay(NDSDisplayID_Touch); - const MetalTexturePair texFetch = [sharedData texPairFetch]; - MetalTexturePair newTexProcess; + newTexProcess.bufferIndex = texFetch.bufferIndex; + newTexProcess.fetchSequenceNumber = texFetch.fetchSequenceNumber; newTexProcess.main = (selectedDisplaySourceMain == NDSDisplayID_Main) ? texFetch.main : texFetch.touch; newTexProcess.touch = (selectedDisplaySourceTouch == NDSDisplayID_Touch) ? texFetch.touch : texFetch.main; @@ -1867,24 +1792,6 @@ } } - // As a last resort, search for any buffer that is not currently writing, and then force wait - // on its corresponding semaphore. - if (stillSearching) - { - selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT; - for (; selectedIndex != mrfi.renderIndex; selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT) - { - if ( ([self renderBufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle) || - ([self renderBufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Ready) || - ([self renderBufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Reading) || - ([self renderBufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_PendingRead) ) - { - stillSearching = false; - break; - } - } - } - if (forceWait) { dispatch_semaphore_wait(_semRenderBuffers[selectedIndex], DISPATCH_TIME_FOREVER); @@ -2189,6 +2096,12 @@ _cdv = NULL; _semDrawable = dispatch_semaphore_create(3); layerDrawable = nil; + _displaySequenceNumber = 0; + + _displayTexturePair.bufferIndex = 0; + _displayTexturePair.fetchSequenceNumber = 0; + _displayTexturePair.main = nil; + _displayTexturePair.touch = nil; presenterObject = thePresenterObject; if (thePresenterObject != nil) @@ -2207,6 +2120,9 @@ [self setLayerDrawable:nil]; dispatch_release(_semDrawable); + [_displayTexturePair.main release]; + [_displayTexturePair.touch release]; + [super dealloc]; } @@ -2223,6 +2139,22 @@ { @autoreleasepool { + const MetalTexturePair texProcess = [presenterObject texPairProcess]; + + if (texProcess.fetchSequenceNumber >= _displayTexturePair.fetchSequenceNumber) + { + id oldTexMain = _displayTexturePair.main; + id oldTexTouch = _displayTexturePair.touch; + + _displayTexturePair.bufferIndex = texProcess.bufferIndex; + _displayTexturePair.fetchSequenceNumber = texProcess.fetchSequenceNumber; + _displayTexturePair.main = [texProcess.main retain]; + _displayTexturePair.touch = [texProcess.touch retain]; + + [oldTexMain release]; + [oldTexTouch release]; + } + // Now that everything is set up, go ahead and draw everything. dispatch_semaphore_wait(_semDrawable, DISPATCH_TIME_FOREVER); id drawable = [self nextDrawable]; @@ -2231,13 +2163,12 @@ { [[presenterObject colorAttachment0Desc] setTexture:[drawable texture]]; - const MetalTexturePair texProcess = [presenterObject texPairProcess]; const MetalRenderFrameInfo mrfi = [presenterObject renderFrameInfo]; [presenterObject renderForCommandBuffer:cb outputPipelineState:[presenterObject outputDrawablePipeline] hudPipelineState:[[presenterObject sharedData] hudPipeline] - texDisplays:texProcess + texDisplays:_displayTexturePair mrfi:mrfi doYFlip:NO];