Cocoa Port: Improve the performance and robustness of Metal display views.

- Add a unique sequence number to fetched frames to ensure that older frames are not drawn after newer frames.
- After much research, finally settle on a method for fetching the NDS framebuffers -- using a MTLBlitCommandEncoder to blit a MTLBuffer to a MTLTexture. It is faster than uploading a texture using [id<MTLTexture> replaceRegion:mipmapLevel:withBytes:bytesPerRow:], and also faster than using a pinned-memory backed linear texture. This method will be the way going forward for fetching framebuffers in Metal.
This commit is contained in:
rogerman 2018-10-25 01:05:36 -07:00
parent 064527e24e
commit da3970d817
5 changed files with 126 additions and 205 deletions

View File

@ -8104,7 +8104,7 @@ GPUSubsystem::GPUSubsystem()
_pending3DRendererID = RENDERID_NULL; _pending3DRendererID = RENDERID_NULL;
_needChange3DRenderer = false; _needChange3DRenderer = false;
_videoFrameCount = 0; _videoFrameIndex = 0;
_render3DFrameCount = 0; _render3DFrameCount = 0;
_frameNeedsFinish = false; _frameNeedsFinish = false;
_willFrameSkip = false; _willFrameSkip = false;
@ -8134,6 +8134,7 @@ GPUSubsystem::GPUSubsystem()
_displayInfo.isDisplayEnabled[NDSDisplayID_Touch] = true; _displayInfo.isDisplayEnabled[NDSDisplayID_Touch] = true;
_displayInfo.bufferIndex = 0; _displayInfo.bufferIndex = 0;
_displayInfo.sequenceNumber = 0;
_displayInfo.masterNativeBuffer = _masterFramebuffer; _displayInfo.masterNativeBuffer = _masterFramebuffer;
_displayInfo.masterCustomBuffer = (u8 *)_masterFramebuffer + (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2 * _displayInfo.pixelBytes); _displayInfo.masterCustomBuffer = (u8 *)_masterFramebuffer + (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2 * _displayInfo.pixelBytes);
@ -8205,12 +8206,12 @@ GPUSubsystem::~GPUSubsystem()
void GPUSubsystem::_UpdateFPSRender3D() void GPUSubsystem::_UpdateFPSRender3D()
{ {
this->_videoFrameCount++; this->_videoFrameIndex++;
if (this->_videoFrameCount == 60) if (this->_videoFrameIndex == 60)
{ {
this->_render3DFrameCount = gfx3d.render3DFrameCount; this->_render3DFrameCount = gfx3d.render3DFrameCount;
gfx3d.render3DFrameCount = 0; gfx3d.render3DFrameCount = 0;
this->_videoFrameCount = 0; this->_videoFrameIndex = 0;
} }
} }
@ -8232,7 +8233,7 @@ void GPUSubsystem::Reset()
} }
this->_willFrameSkip = false; this->_willFrameSkip = false;
this->_videoFrameCount = 0; this->_videoFrameIndex = 0;
this->_render3DFrameCount = 0; this->_render3DFrameCount = 0;
this->_backlightIntensityTotal[NDSDisplayID_Main] = 0.0f; this->_backlightIntensityTotal[NDSDisplayID_Main] = 0.0f;
this->_backlightIntensityTotal[NDSDisplayID_Touch] = 0.0f; this->_backlightIntensityTotal[NDSDisplayID_Touch] = 0.0f;
@ -8290,6 +8291,7 @@ void GPUSubsystem::ForceFrameStop()
if (this->_frameNeedsFinish) if (this->_frameNeedsFinish)
{ {
this->_frameNeedsFinish = false; this->_frameNeedsFinish = false;
this->_displayInfo.sequenceNumber++;
this->_event->DidFrameEnd(this->_willFrameSkip, this->_displayInfo); this->_event->DidFrameEnd(this->_willFrameSkip, this->_displayInfo);
} }
} }
@ -9009,6 +9011,7 @@ void GPUSubsystem::RenderLine(const size_t l)
if (this->_frameNeedsFinish) if (this->_frameNeedsFinish)
{ {
this->_frameNeedsFinish = false; this->_frameNeedsFinish = false;
this->_displayInfo.sequenceNumber++;
this->_event->DidFrameEnd(this->_willFrameSkip, this->_displayInfo); this->_event->DidFrameEnd(this->_willFrameSkip, this->_displayInfo);
} }
} }

View File

@ -1156,7 +1156,10 @@ typedef struct
// Frame render state information. These fields will change per frame, depending on how each display was rendered. // Frame render state information. These fields will change per frame, depending on how each display was rendered.
u8 bufferIndex; // Index of this frame's buffer set. u8 bufferIndex; // Index of a specific framebuffer page for the GPU emulation to write data into.
// Indexing starts at 0, and must be less than framebufferPageCount.
// A specific index can be chosen at the DidFrameBegin event.
size_t sequenceNumber; // A unique number assigned to each frame that increments for each DidFrameEnd event. Never resets.
void *masterNativeBuffer; // Pointer to the head of the master native buffer. void *masterNativeBuffer; // Pointer to the head of the master native buffer.
void *masterCustomBuffer; // Pointer to the head of the master custom buffer. void *masterCustomBuffer; // Pointer to the head of the master custom buffer.
@ -1765,7 +1768,7 @@ private:
int _pending3DRendererID; int _pending3DRendererID;
bool _needChange3DRenderer; bool _needChange3DRenderer;
u32 _videoFrameCount; // Internal variable that increments when a video frame is completed. Resets every 60 video frames. u32 _videoFrameIndex; // Increments whenever a video frame is completed. Resets every 60 video frames.
u32 _render3DFrameCount; // The current 3D rendering frame count, saved to this variable once every 60 video frames. u32 _render3DFrameCount; // The current 3D rendering frame count, saved to this variable once every 60 video frames.
bool _frameNeedsFinish; bool _frameNeedsFinish;
bool _willFrameSkip; bool _willFrameSkip;

View File

@ -1235,24 +1235,6 @@ public:
} }
} }
// As a last resort, search for any buffer that is not currently writing, and then force wait
// on its corresponding semaphore.
if (stillSearching)
{
selectedIndex = (selectedIndex + 1) % pageCount;
for (; selectedIndex != currentIndex; selectedIndex = (selectedIndex + 1) % pageCount)
{
if ( ([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle) ||
([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Ready) ||
([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Reading) ||
([self framebufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_PendingRead) )
{
stillSearching = false;
break;
}
}
}
return selectedIndex; return selectedIndex;
} }

View File

@ -42,17 +42,23 @@ class MacMetalFetchObject;
class MacMetalDisplayPresenter; class MacMetalDisplayPresenter;
class MacMetalDisplayView; class MacMetalDisplayView;
union MetalTexturePair struct MetalTexturePair
{ {
id<MTLTexture> tex[2]; uint8_t bufferIndex;
size_t fetchSequenceNumber;
struct union
{ {
id<MTLTexture> main; id<MTLTexture> tex[2];
id<MTLTexture> touch;
struct
{
id<MTLTexture> main;
id<MTLTexture> touch;
};
}; };
}; };
typedef union MetalTexturePair MetalTexturePair; typedef struct MetalTexturePair MetalTexturePair;
struct MetalRenderFrameInfo struct MetalRenderFrameInfo
{ {
@ -116,7 +122,6 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
MetalTexturePair texPairFetch; MetalTexturePair texPairFetch;
id<MTLBlitCommandEncoder> bceFetch; id<MTLBlitCommandEncoder> bceFetch;
BOOL willFetchImmediate;
id<MTLTexture> texLQ2xLUT; id<MTLTexture> texLQ2xLUT;
id<MTLTexture> texHQ2xLUT; id<MTLTexture> texHQ2xLUT;
@ -124,15 +129,11 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
id<MTLTexture> texHQ4xLUT; id<MTLTexture> texHQ4xLUT;
id<MTLTexture> texCurrentHQnxLUT; id<MTLTexture> texCurrentHQnxLUT;
MTLResourceOptions preferredResourceStorageMode;
MTLSize _fetchThreadsPerGroup; MTLSize _fetchThreadsPerGroup;
MTLSize _fetchThreadGroupsPerGridNative; MTLSize _fetchThreadGroupsPerGridNative;
MTLSize _fetchThreadGroupsPerGridCustom; MTLSize _fetchThreadGroupsPerGridCustom;
MTLSize deposterizeThreadsPerGroup; MTLSize deposterizeThreadsPerGroup;
MTLSize deposterizeThreadGroupsPerGrid; MTLSize deposterizeThreadGroupsPerGrid;
BOOL _isSharedBufferTextureSupported;
} }
@property (readonly, nonatomic) id<MTLDevice> device; @property (readonly, nonatomic) id<MTLDevice> device;
@ -149,7 +150,6 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
@property (assign) MetalTexturePair texPairFetch; @property (assign) MetalTexturePair texPairFetch;
@property (assign) id<MTLBlitCommandEncoder> bceFetch; @property (assign) id<MTLBlitCommandEncoder> bceFetch;
@property (assign) BOOL willFetchImmediate;
@property (readonly, nonatomic) id<MTLTexture> texLQ2xLUT; @property (readonly, nonatomic) id<MTLTexture> texLQ2xLUT;
@property (readonly, nonatomic) id<MTLTexture> texHQ2xLUT; @property (readonly, nonatomic) id<MTLTexture> texHQ2xLUT;
@ -163,7 +163,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
@property (readonly, nonatomic) MTLSize deposterizeThreadGroupsPerGrid; @property (readonly, nonatomic) MTLSize deposterizeThreadGroupsPerGrid;
- (void) setFetchBuffersWithDisplayInfo:(const NDSDisplayInfo &)dispInfo; - (void) setFetchBuffersWithDisplayInfo:(const NDSDisplayInfo &)dispInfo;
- (MetalTexturePair) setFetchTextureBindingsAtIndex:(const u8)index commandBuffer:(id<MTLCommandBuffer>)cb; - (MetalTexturePair) setFetchTextureBindingsAtIndex:(const uint8_t)index commandBuffer:(id<MTLCommandBuffer>)cb;
- (void) fetchFromBufferIndex:(const u8)index; - (void) fetchFromBufferIndex:(const u8)index;
- (void) fetchNativeDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id<MTLBlitCommandEncoder>)bce; - (void) fetchNativeDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id<MTLBlitCommandEncoder>)bce;
- (void) fetchCustomDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id<MTLBlitCommandEncoder>)bce; - (void) fetchCustomDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id<MTLBlitCommandEncoder>)bce;
@ -265,6 +265,8 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
MacMetalDisplayPresenterObject *presenterObject; MacMetalDisplayPresenterObject *presenterObject;
dispatch_semaphore_t _semDrawable; dispatch_semaphore_t _semDrawable;
id<CAMetalDrawable> layerDrawable; id<CAMetalDrawable> layerDrawable;
MetalTexturePair _displayTexturePair;
size_t _displaySequenceNumber;
} }
@property (readonly, nonatomic) MacMetalDisplayPresenterObject *presenterObject; @property (readonly, nonatomic) MacMetalDisplayPresenterObject *presenterObject;

View File

@ -37,7 +37,6 @@
@synthesize texPairFetch; @synthesize texPairFetch;
@synthesize bceFetch; @synthesize bceFetch;
@synthesize willFetchImmediate;
@synthesize texLQ2xLUT; @synthesize texLQ2xLUT;
@synthesize texHQ2xLUT; @synthesize texHQ2xLUT;
@ -76,27 +75,6 @@
_fetch666ConvertOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666ConvertOnly"] error:nil] retain]; _fetch666ConvertOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666ConvertOnly"] error:nil] retain];
deposterizePipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"src_filter_deposterize"] error:nil] retain]; deposterizePipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"src_filter_deposterize"] error:nil] retain];
if ( IsOSXVersion(10, 13, 0) || IsOSXVersion(10, 13, 1) || IsOSXVersion(10, 13, 2) || IsOSXVersion(10, 13, 3) || IsOSXVersion(10, 13, 4) )
{
// On macOS High Sierra, there is currently a bug with newBufferWithBytesNoCopy:length:options:deallocator
// that causes it to crash with MTLResourceStorageModeManaged. So for these macOS versions, replace
// MTLResourceStorageModeManaged with MTLResourceStorageModeShared. While this solution causes a very small
// drop in performance, it is still far superior to use Metal rather than OpenGL.
//
// As of this writing, the current version of macOS is v10.13.1. Disabling MTLResourceStorageModeManaged on
// every point release up to v10.13.4 should, I hope, give Apple enough time to fix their bugs with this!
preferredResourceStorageMode = MTLResourceStorageModeShared;
}
else
{
preferredResourceStorageMode = MTLResourceStorageModeManaged;
}
// TODO: In practice, linear textures with buffer-backed storage won't actually work since synchronization has
// been removed, so keep this feature disabled until synchronization is reworked.
//_isSharedBufferTextureSupported = IsOSXVersionSupported(10, 13, 0) && (preferredResourceStorageMode == MTLResourceStorageModeManaged);
_isSharedBufferTextureSupported = NO;
size_t tw = GetNearestPositivePOT((uint32_t)[_fetch555Pipeline threadExecutionWidth]); size_t tw = GetNearestPositivePOT((uint32_t)[_fetch555Pipeline threadExecutionWidth]);
while ( (tw > [_fetch555Pipeline threadExecutionWidth]) || (tw > GPU_FRAMEBUFFER_NATIVE_WIDTH) ) while ( (tw > [_fetch555Pipeline threadExecutionWidth]) || (tw > GPU_FRAMEBUFFER_NATIVE_WIDTH) )
{ {
@ -197,10 +175,8 @@
width:GPU_FRAMEBUFFER_NATIVE_WIDTH width:GPU_FRAMEBUFFER_NATIVE_WIDTH
height:GPU_FRAMEBUFFER_NATIVE_HEIGHT height:GPU_FRAMEBUFFER_NATIVE_HEIGHT
mipmapped:NO]; mipmapped:NO];
[newTexDisplayDesc setResourceOptions:MTLResourceStorageModePrivate];
[newTexDisplayDesc setResourceOptions:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined]; [newTexDisplayDesc setStorageMode:MTLStorageModePrivate];
[newTexDisplayDesc setStorageMode:MTLStorageModeManaged];
[newTexDisplayDesc setCpuCacheMode:MTLCPUCacheModeWriteCombined];
[newTexDisplayDesc setUsage:MTLTextureUsageShaderRead]; [newTexDisplayDesc setUsage:MTLTextureUsageShaderRead];
MTLTextureDescriptor *newTexPostprocessDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm MTLTextureDescriptor *newTexPostprocessDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm
@ -234,10 +210,11 @@
_texDisplayPostprocessCustom[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexPostprocessDesc]; _texDisplayPostprocessCustom[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexPostprocessDesc];
} }
texPairFetch.bufferIndex = 0;
texPairFetch.fetchSequenceNumber = 0;
texPairFetch.main = [_texDisplayPostprocessNative[NDSDisplayID_Main][0] retain]; texPairFetch.main = [_texDisplayPostprocessNative[NDSDisplayID_Main][0] retain];
texPairFetch.touch = [_texDisplayPostprocessNative[NDSDisplayID_Touch][0] retain]; texPairFetch.touch = [_texDisplayPostprocessNative[NDSDisplayID_Touch][0] retain];
bceFetch = nil; bceFetch = nil;
willFetchImmediate = YES;
// Set up the HQnx LUT textures. // Set up the HQnx LUT textures.
SetupHQnxLUTs_Metal(device, _fetchCommandQueue, texLQ2xLUT, texHQ2xLUT, texHQ3xLUT, texHQ4xLUT); SetupHQnxLUTs_Metal(device, _fetchCommandQueue, texLQ2xLUT, texHQ2xLUT, texHQ3xLUT, texHQ4xLUT);
@ -312,10 +289,8 @@
width:GPU_FRAMEBUFFER_NATIVE_WIDTH width:GPU_FRAMEBUFFER_NATIVE_WIDTH
height:GPU_FRAMEBUFFER_NATIVE_HEIGHT height:GPU_FRAMEBUFFER_NATIVE_HEIGHT
mipmapped:NO]; mipmapped:NO];
[newTexDisplayNativeDesc setResourceOptions:MTLResourceStorageModePrivate];
[newTexDisplayNativeDesc setResourceOptions:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined]; [newTexDisplayNativeDesc setStorageMode:MTLStorageModePrivate];
[newTexDisplayNativeDesc setStorageMode:MTLStorageModeManaged];
[newTexDisplayNativeDesc setCpuCacheMode:MTLCPUCacheModeWriteCombined];
[newTexDisplayNativeDesc setUsage:MTLTextureUsageShaderRead]; [newTexDisplayNativeDesc setUsage:MTLTextureUsageShaderRead];
MTLTextureDescriptor *newTexPostprocessNativeDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm MTLTextureDescriptor *newTexPostprocessNativeDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm
@ -330,10 +305,8 @@
width:w width:w
height:h height:h
mipmapped:NO]; mipmapped:NO];
[newTexDisplayCustomDesc setResourceOptions:MTLResourceStorageModePrivate];
[newTexDisplayCustomDesc setResourceOptions:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined]; [newTexDisplayCustomDesc setStorageMode:MTLStorageModePrivate];
[newTexDisplayCustomDesc setStorageMode:MTLStorageModeManaged];
[newTexDisplayCustomDesc setCpuCacheMode:MTLCPUCacheModeWriteCombined];
[newTexDisplayCustomDesc setUsage:MTLTextureUsageShaderRead]; [newTexDisplayCustomDesc setUsage:MTLTextureUsageShaderRead];
MTLTextureDescriptor *newTexPostprocessCustomDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm MTLTextureDescriptor *newTexPostprocessCustomDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm
@ -380,22 +353,8 @@
[_texDisplayPostprocessNative[NDSDisplayID_Main][i] release]; [_texDisplayPostprocessNative[NDSDisplayID_Main][i] release];
[_texDisplayPostprocessNative[NDSDisplayID_Touch][i] release]; [_texDisplayPostprocessNative[NDSDisplayID_Touch][i] release];
#ifdef MAC_OS_X_VERSION_10_13 _texDisplayFetchNative[NDSDisplayID_Main][i] = [device newTextureWithDescriptor:newTexDisplayNativeDesc];
if (_isSharedBufferTextureSupported) _texDisplayFetchNative[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexDisplayNativeDesc];
{
if (@available(macOS 10_13, *))
{
_texDisplayFetchNative[NDSDisplayID_Main][i] = [_bufDisplayFetchNative[NDSDisplayID_Main][i] newTextureWithDescriptor:newTexDisplayNativeDesc offset:0 bytesPerRow:_nativeLineSize];
_texDisplayFetchNative[NDSDisplayID_Touch][i] = [_bufDisplayFetchNative[NDSDisplayID_Touch][i] newTextureWithDescriptor:newTexDisplayNativeDesc offset:0 bytesPerRow:_nativeLineSize];
}
}
else
#endif
{
_texDisplayFetchNative[NDSDisplayID_Main][i] = [device newTextureWithDescriptor:newTexDisplayNativeDesc];
_texDisplayFetchNative[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexDisplayNativeDesc];
}
_texDisplayPostprocessNative[NDSDisplayID_Main][i] = [device newTextureWithDescriptor:newTexPostprocessNativeDesc]; _texDisplayPostprocessNative[NDSDisplayID_Main][i] = [device newTextureWithDescriptor:newTexPostprocessNativeDesc];
_texDisplayPostprocessNative[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexPostprocessNativeDesc]; _texDisplayPostprocessNative[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexPostprocessNativeDesc];
} }
@ -409,22 +368,8 @@
[_texDisplayPostprocessCustom[NDSDisplayID_Main][i] release]; [_texDisplayPostprocessCustom[NDSDisplayID_Main][i] release];
[_texDisplayPostprocessCustom[NDSDisplayID_Touch][i] release]; [_texDisplayPostprocessCustom[NDSDisplayID_Touch][i] release];
#ifdef MAC_OS_X_VERSION_10_13 _texDisplayFetchCustom[NDSDisplayID_Main][i] = [device newTextureWithDescriptor:newTexDisplayCustomDesc];
if (_isSharedBufferTextureSupported) _texDisplayFetchCustom[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexDisplayCustomDesc];
{
if (@available(macOS 10_13, *))
{
_texDisplayFetchCustom[NDSDisplayID_Main][i] = [_bufDisplayFetchCustom[NDSDisplayID_Main][i] newTextureWithDescriptor:newTexDisplayCustomDesc offset:0 bytesPerRow:_customLineSize];
_texDisplayFetchCustom[NDSDisplayID_Touch][i] = [_bufDisplayFetchCustom[NDSDisplayID_Touch][i] newTextureWithDescriptor:newTexDisplayCustomDesc offset:0 bytesPerRow:_customLineSize];
}
}
else
#endif
{
_texDisplayFetchCustom[NDSDisplayID_Main][i] = [device newTextureWithDescriptor:newTexDisplayCustomDesc];
_texDisplayFetchCustom[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexDisplayCustomDesc];
}
_texDisplayPostprocessCustom[NDSDisplayID_Main][i] = [device newTextureWithDescriptor:newTexPostprocessCustomDesc]; _texDisplayPostprocessCustom[NDSDisplayID_Main][i] = [device newTextureWithDescriptor:newTexPostprocessCustomDesc];
_texDisplayPostprocessCustom[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexPostprocessCustomDesc]; _texDisplayPostprocessCustom[NDSDisplayID_Touch][i] = [device newTextureWithDescriptor:newTexPostprocessCustomDesc];
} }
@ -450,13 +395,14 @@
[oldTexPair.touch release]; [oldTexPair.touch release];
} }
- (MetalTexturePair) setFetchTextureBindingsAtIndex:(const u8)index commandBuffer:(id<MTLCommandBuffer>)cb - (MetalTexturePair) setFetchTextureBindingsAtIndex:(const uint8_t)index commandBuffer:(id<MTLCommandBuffer>)cb
{ {
MetalTexturePair targetTexPair = {nil, nil};
const NDSDisplayInfo &currentDisplayInfo = GPUFetchObject->GetFetchDisplayInfoForBufferIndex(index); const NDSDisplayInfo &currentDisplayInfo = GPUFetchObject->GetFetchDisplayInfoForBufferIndex(index);
const bool isMainEnabled = currentDisplayInfo.isDisplayEnabled[NDSDisplayID_Main]; const bool isMainEnabled = currentDisplayInfo.isDisplayEnabled[NDSDisplayID_Main];
const bool isTouchEnabled = currentDisplayInfo.isDisplayEnabled[NDSDisplayID_Touch]; const bool isTouchEnabled = currentDisplayInfo.isDisplayEnabled[NDSDisplayID_Touch];
MetalTexturePair targetTexPair = {index, currentDisplayInfo.sequenceNumber, nil, nil};
if (isMainEnabled || isTouchEnabled) if (isMainEnabled || isTouchEnabled)
{ {
if (isMainEnabled) if (isMainEnabled)
@ -637,37 +583,24 @@
id<MTLCommandBuffer> cb = [_fetchCommandQueue commandBufferWithUnretainedReferences]; id<MTLCommandBuffer> cb = [_fetchCommandQueue commandBufferWithUnretainedReferences];
[cb enqueue]; [cb enqueue];
[self setWillFetchImmediate:YES]; semaphore_wait([self semaphoreFramebufferPageAtIndex:index]);
[self setFramebufferState:ClientDisplayBufferState_Reading index:index];
if (!_isSharedBufferTextureSupported) id<MTLBlitCommandEncoder> bce = [cb blitCommandEncoder];
{ [self setBceFetch:bce];
semaphore_wait([self semaphoreFramebufferPageAtIndex:index]); GPUFetchObject->GPUClientFetchObject::FetchFromBufferIndex(index);
[self setFramebufferState:ClientDisplayBufferState_Reading index:index]; [self setBceFetch:nil];
[bce endEncoding];
id<MTLBlitCommandEncoder> bce = [cb blitCommandEncoder];
[self setBceFetch:bce]; [cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
GPUFetchObject->GPUClientFetchObject::FetchFromBufferIndex(index); [self setFramebufferState:ClientDisplayBufferState_Idle index:index];
[self setBceFetch:nil]; semaphore_signal([self semaphoreFramebufferPageAtIndex:index]);
[bce endEncoding]; }];
if ([self willFetchImmediate]) [cb commit];
{
[self setFramebufferState:ClientDisplayBufferState_Idle index:index]; cb = [_fetchCommandQueue commandBufferWithUnretainedReferences];
semaphore_signal([self semaphoreFramebufferPageAtIndex:index]); [cb enqueue];
}
else
{
[cb addCompletedHandler:^(id<MTLCommandBuffer> block) {
[self setFramebufferState:ClientDisplayBufferState_Idle index:index];
semaphore_signal([self semaphoreFramebufferPageAtIndex:index]);
}];
[cb commit];
cb = [_fetchCommandQueue commandBufferWithUnretainedReferences];
[cb enqueue];
}
}
const MetalTexturePair newTexPair = [self setFetchTextureBindingsAtIndex:index commandBuffer:cb]; const MetalTexturePair newTexPair = [self setFetchTextureBindingsAtIndex:index commandBuffer:cb];
[newTexPair.main retain]; [newTexPair.main retain];
@ -685,56 +618,43 @@
- (void) fetchNativeDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id<MTLBlitCommandEncoder>)bce - (void) fetchNativeDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id<MTLBlitCommandEncoder>)bce
{ {
id<MTLTexture> targetDestination = _texDisplayFetchNative[displayID][bufferIndex]; if (bce == nil)
const NDSDisplayInfo &currentDisplayInfo = GPUFetchObject->GetFetchDisplayInfoForBufferIndex(bufferIndex); {
return;
}
if ([self willFetchImmediate]) id<MTLTexture> targetDestination = _texDisplayFetchNative[displayID][bufferIndex];
{
[targetDestination replaceRegion:MTLRegionMake2D(0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT) [bce copyFromBuffer:_bufDisplayFetchNative[displayID][bufferIndex]
mipmapLevel:0 sourceOffset:0
withBytes:currentDisplayInfo.nativeBuffer[displayID] sourceBytesPerRow:_nativeLineSize
bytesPerRow:_nativeLineSize]; sourceBytesPerImage:_nativeBufferSize
} sourceSize:MTLSizeMake(GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 1)
else toTexture:targetDestination
{ destinationSlice:0
[bce copyFromBuffer:_bufDisplayFetchNative[displayID][bufferIndex] destinationLevel:0
sourceOffset:0 destinationOrigin:MTLOriginMake(0, 0, 0)];
sourceBytesPerRow:_nativeLineSize
sourceBytesPerImage:_nativeBufferSize
sourceSize:MTLSizeMake(GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 1)
toTexture:targetDestination
destinationSlice:0
destinationLevel:0
destinationOrigin:MTLOriginMake(0, 0, 0)];
}
} }
- (void) fetchCustomDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id<MTLBlitCommandEncoder>)bce - (void) fetchCustomDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex blitCommandEncoder:(id<MTLBlitCommandEncoder>)bce
{ {
if (bce == nil)
{
return;
}
const NDSDisplayInfo &currentDisplayInfo = GPUFetchObject->GetFetchDisplayInfoForBufferIndex(bufferIndex); const NDSDisplayInfo &currentDisplayInfo = GPUFetchObject->GetFetchDisplayInfoForBufferIndex(bufferIndex);
id<MTLTexture> targetDestination = _texDisplayFetchCustom[displayID][bufferIndex]; id<MTLTexture> targetDestination = _texDisplayFetchCustom[displayID][bufferIndex];
if ( (currentDisplayInfo.customWidth < GPU_FRAMEBUFFER_NATIVE_WIDTH * 5) && (currentDisplayInfo.customHeight < GPU_FRAMEBUFFER_NATIVE_HEIGHT * 5) ) [bce copyFromBuffer:_bufDisplayFetchCustom[displayID][bufferIndex]
{ sourceOffset:0
[targetDestination replaceRegion:MTLRegionMake2D(0, 0, currentDisplayInfo.customWidth, currentDisplayInfo.customHeight) sourceBytesPerRow:_customLineSize
mipmapLevel:0 sourceBytesPerImage:_customBufferSize
withBytes:currentDisplayInfo.customBuffer[displayID] sourceSize:MTLSizeMake(currentDisplayInfo.customWidth, currentDisplayInfo.customHeight, 1)
bytesPerRow:_customLineSize]; toTexture:targetDestination
} destinationSlice:0
else destinationLevel:0
{ destinationOrigin:MTLOriginMake(0, 0, 0)];
[self setWillFetchImmediate:NO];
[bce copyFromBuffer:_bufDisplayFetchCustom[displayID][bufferIndex]
sourceOffset:0
sourceBytesPerRow:_customLineSize
sourceBytesPerImage:_customBufferSize
sourceSize:MTLSizeMake(currentDisplayInfo.customWidth, currentDisplayInfo.customHeight, 1)
toTexture:targetDestination
destinationSlice:0
destinationLevel:0
destinationOrigin:MTLOriginMake(0, 0, 0)];
}
} }
- (void) flushMultipleViews:(const std::vector<ClientDisplay3DView *> &)cdvFlushList - (void) flushMultipleViews:(const std::vector<ClientDisplay3DView *> &)cdvFlushList
@ -851,6 +771,8 @@
needsScreenVerticesUpdate = YES; needsScreenVerticesUpdate = YES;
needsHUDVerticesUpdate = YES; needsHUDVerticesUpdate = YES;
texPairProcess.bufferIndex = 0;
texPairProcess.fetchSequenceNumber = 0;
texPairProcess.main = nil; texPairProcess.main = nil;
texPairProcess.touch = nil; texPairProcess.touch = nil;
@ -1178,8 +1100,11 @@
_hudTexCoordBuffer[i] = [[sharedData device] newBufferWithLength:HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE options:MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined]; _hudTexCoordBuffer[i] = [[sharedData device] newBufferWithLength:HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE options:MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined];
} }
texPairProcess.main = [[sharedData texPairFetch].main retain]; MetalTexturePair texPairFetch = [sharedData texPairFetch];
texPairProcess.touch = [[sharedData texPairFetch].touch retain]; texPairProcess.bufferIndex = texPairFetch.bufferIndex;
texPairProcess.fetchSequenceNumber = texPairFetch.fetchSequenceNumber;
texPairProcess.main = [texPairFetch.main retain];
texPairProcess.touch = [texPairFetch.touch retain];
VideoFilter *vfMain = cdp->GetPixelScalerObject(NDSDisplayID_Main); VideoFilter *vfMain = cdp->GetPixelScalerObject(NDSDisplayID_Main);
_bufCPUFilterSrcMain = [[sharedData device] newBufferWithBytesNoCopy:vfMain->GetSrcBufferPtr() _bufCPUFilterSrcMain = [[sharedData device] newBufferWithBytesNoCopy:vfMain->GetSrcBufferPtr()
@ -1322,16 +1247,16 @@
- (void) processDisplays - (void) processDisplays
{ {
const uint8_t bufferIndex = [sharedData GPUFetchObject]->GetLastFetchIndex(); const MetalTexturePair texFetch = [sharedData texPairFetch];
const NDSDisplayInfo &fetchDisplayInfo = [sharedData GPUFetchObject]->GetFetchDisplayInfoForBufferIndex(bufferIndex); const NDSDisplayInfo &fetchDisplayInfo = [sharedData GPUFetchObject]->GetFetchDisplayInfoForBufferIndex(texFetch.bufferIndex);
const ClientDisplayMode mode = cdp->GetPresenterProperties().mode; const ClientDisplayMode mode = cdp->GetPresenterProperties().mode;
const bool useDeposterize = cdp->GetSourceDeposterize(); const bool useDeposterize = cdp->GetSourceDeposterize();
const NDSDisplayID selectedDisplaySourceMain = cdp->GetSelectedDisplaySourceForDisplay(NDSDisplayID_Main); const NDSDisplayID selectedDisplaySourceMain = cdp->GetSelectedDisplaySourceForDisplay(NDSDisplayID_Main);
const NDSDisplayID selectedDisplaySourceTouch = cdp->GetSelectedDisplaySourceForDisplay(NDSDisplayID_Touch); const NDSDisplayID selectedDisplaySourceTouch = cdp->GetSelectedDisplaySourceForDisplay(NDSDisplayID_Touch);
const MetalTexturePair texFetch = [sharedData texPairFetch];
MetalTexturePair newTexProcess; MetalTexturePair newTexProcess;
newTexProcess.bufferIndex = texFetch.bufferIndex;
newTexProcess.fetchSequenceNumber = texFetch.fetchSequenceNumber;
newTexProcess.main = (selectedDisplaySourceMain == NDSDisplayID_Main) ? texFetch.main : texFetch.touch; newTexProcess.main = (selectedDisplaySourceMain == NDSDisplayID_Main) ? texFetch.main : texFetch.touch;
newTexProcess.touch = (selectedDisplaySourceTouch == NDSDisplayID_Touch) ? texFetch.touch : texFetch.main; newTexProcess.touch = (selectedDisplaySourceTouch == NDSDisplayID_Touch) ? texFetch.touch : texFetch.main;
@ -1867,24 +1792,6 @@
} }
} }
// As a last resort, search for any buffer that is not currently writing, and then force wait
// on its corresponding semaphore.
if (stillSearching)
{
selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT;
for (; selectedIndex != mrfi.renderIndex; selectedIndex = (selectedIndex + 1) % RENDER_BUFFER_COUNT)
{
if ( ([self renderBufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Idle) ||
([self renderBufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Ready) ||
([self renderBufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_Reading) ||
([self renderBufferStateAtIndex:selectedIndex] == ClientDisplayBufferState_PendingRead) )
{
stillSearching = false;
break;
}
}
}
if (forceWait) if (forceWait)
{ {
dispatch_semaphore_wait(_semRenderBuffers[selectedIndex], DISPATCH_TIME_FOREVER); dispatch_semaphore_wait(_semRenderBuffers[selectedIndex], DISPATCH_TIME_FOREVER);
@ -2189,6 +2096,12 @@
_cdv = NULL; _cdv = NULL;
_semDrawable = dispatch_semaphore_create(3); _semDrawable = dispatch_semaphore_create(3);
layerDrawable = nil; layerDrawable = nil;
_displaySequenceNumber = 0;
_displayTexturePair.bufferIndex = 0;
_displayTexturePair.fetchSequenceNumber = 0;
_displayTexturePair.main = nil;
_displayTexturePair.touch = nil;
presenterObject = thePresenterObject; presenterObject = thePresenterObject;
if (thePresenterObject != nil) if (thePresenterObject != nil)
@ -2207,6 +2120,9 @@
[self setLayerDrawable:nil]; [self setLayerDrawable:nil];
dispatch_release(_semDrawable); dispatch_release(_semDrawable);
[_displayTexturePair.main release];
[_displayTexturePair.touch release];
[super dealloc]; [super dealloc];
} }
@ -2223,6 +2139,22 @@
{ {
@autoreleasepool @autoreleasepool
{ {
const MetalTexturePair texProcess = [presenterObject texPairProcess];
if (texProcess.fetchSequenceNumber >= _displayTexturePair.fetchSequenceNumber)
{
id<MTLTexture> oldTexMain = _displayTexturePair.main;
id<MTLTexture> oldTexTouch = _displayTexturePair.touch;
_displayTexturePair.bufferIndex = texProcess.bufferIndex;
_displayTexturePair.fetchSequenceNumber = texProcess.fetchSequenceNumber;
_displayTexturePair.main = [texProcess.main retain];
_displayTexturePair.touch = [texProcess.touch retain];
[oldTexMain release];
[oldTexTouch release];
}
// Now that everything is set up, go ahead and draw everything. // Now that everything is set up, go ahead and draw everything.
dispatch_semaphore_wait(_semDrawable, DISPATCH_TIME_FOREVER); dispatch_semaphore_wait(_semDrawable, DISPATCH_TIME_FOREVER);
id<CAMetalDrawable> drawable = [self nextDrawable]; id<CAMetalDrawable> drawable = [self nextDrawable];
@ -2231,13 +2163,12 @@
{ {
[[presenterObject colorAttachment0Desc] setTexture:[drawable texture]]; [[presenterObject colorAttachment0Desc] setTexture:[drawable texture]];
const MetalTexturePair texProcess = [presenterObject texPairProcess];
const MetalRenderFrameInfo mrfi = [presenterObject renderFrameInfo]; const MetalRenderFrameInfo mrfi = [presenterObject renderFrameInfo];
[presenterObject renderForCommandBuffer:cb [presenterObject renderForCommandBuffer:cb
outputPipelineState:[presenterObject outputDrawablePipeline] outputPipelineState:[presenterObject outputDrawablePipeline]
hudPipelineState:[[presenterObject sharedData] hudPipeline] hudPipelineState:[[presenterObject sharedData] hudPipeline]
texDisplays:texProcess texDisplays:_displayTexturePair
mrfi:mrfi mrfi:mrfi
doYFlip:NO]; doYFlip:NO];