diff --git a/desmume/src/frontend/cocoa/ClientDisplayView.cpp b/desmume/src/frontend/cocoa/ClientDisplayView.cpp index 57d1f3949..ad50f4ee2 100644 --- a/desmume/src/frontend/cocoa/ClientDisplayView.cpp +++ b/desmume/src/frontend/cocoa/ClientDisplayView.cpp @@ -2211,10 +2211,10 @@ void InitHQnxLUTs() lutValuesInited = true; - _LQ2xLUT = (LUTValues *)malloc(256*(2*2)*16 * sizeof(LUTValues)); - _HQ2xLUT = (LUTValues *)malloc(256*(2*2)*16 * sizeof(LUTValues)); - _HQ3xLUT = (LUTValues *)malloc(256*(3*3)*16 * sizeof(LUTValues) + 2); - _HQ4xLUT = (LUTValues *)malloc(256*(4*4)*16 * sizeof(LUTValues) + 4); // The bytes fix a mysterious crash that intermittently occurs. Don't know why this works... it just does. + _LQ2xLUT = (LUTValues *)malloc_alignedPage(256*(2*2)*16 * sizeof(LUTValues)); + _HQ2xLUT = (LUTValues *)malloc_alignedPage(256*(2*2)*16 * sizeof(LUTValues)); + _HQ3xLUT = (LUTValues *)malloc_alignedPage(256*(3*3)*16 * sizeof(LUTValues) + 2); + _HQ4xLUT = (LUTValues *)malloc_alignedPage(256*(4*4)*16 * sizeof(LUTValues) + 4); // The bytes fix a mysterious crash that intermittently occurs. Don't know why this works... it just does. #define MUR (compare & 0x01) // top-right #define MDR (compare & 0x02) // bottom-right diff --git a/desmume/src/frontend/cocoa/cocoa_GPU.mm b/desmume/src/frontend/cocoa/cocoa_GPU.mm index 662c592f2..9bfb49ec0 100644 --- a/desmume/src/frontend/cocoa/cocoa_GPU.mm +++ b/desmume/src/frontend/cocoa/cocoa_GPU.mm @@ -165,21 +165,16 @@ public: #ifdef ENABLE_APPLE_METAL if (IsOSXVersionSupported(10, 11, 0) && ![[NSUserDefaults standardUserDefaults] boolForKey:@"Debug_DisableMetal"]) { - // macOS v10.13.0 and v10.13.1 are specifically checked for here, because there are - // bugs in these versions of macOS that prevent Metal from working properly. - if (!IsOSXVersion(10, 13, 0) && !IsOSXVersion(10, 13, 1)) + fetchObject = new MacMetalFetchObject; + + if (fetchObject->GetClientData() == nil) { - fetchObject = new MacMetalFetchObject; - - if (fetchObject->GetClientData() == nil) - { - delete fetchObject; - fetchObject = NULL; - } - else - { - GPU->SetWillPostprocessDisplays(false); - } + delete fetchObject; + fetchObject = NULL; + } + else + { + GPU->SetWillPostprocessDisplays(false); } } #endif diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h index c3ed84f87..2ddb4129b 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h @@ -24,6 +24,7 @@ #import "DisplayViewCALayer.h" #import "../cocoa_GPU.h" +#import "../cocoa_util.h" #include "../ClientDisplayView.h" #ifdef BOOL @@ -100,6 +101,8 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; id texHQ4xLUT; id texCurrentHQnxLUT; + MTLResourceOptions preferredResourceStorageMode; + MTLSize _fetchThreadsPerGroup; MTLSize _fetchThreadGroupsPerGridNative; MTLSize _fetchThreadGroupsPerGridCustom; @@ -130,6 +133,8 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; @property (readonly, nonatomic) id texHQ4xLUT; @property (retain) id texCurrentHQnxLUT; +@property (readonly, nonatomic) MTLResourceOptions preferredResourceStorageMode; + @property (readonly, nonatomic) MTLSize deposterizeThreadsPerGroup; @property (readonly, nonatomic) MTLSize deposterizeThreadGroupsPerGrid; @@ -337,7 +342,7 @@ public: }; #pragma mark - -void SetupHQnxLUTs_Metal(id &device, id &texLQ2xLUT, id &texHQ2xLUT, id &texHQ3xLUT, id &texHQ4xLUT); +void SetupHQnxLUTs_Metal(id &device, id &commandQueue, id &texLQ2xLUT, id &texHQ2xLUT, id &texHQ3xLUT, id &texHQ4xLUT); void DeleteHQnxLUTs_Metal(id &texLQ2xLUT, id &texHQ2xLUT, id &texHQ3xLUT, id &texHQ4xLUT); #endif // _MAC_METALDISPLAYVIEW_H diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm index ee3acee75..f9b64e92f 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm @@ -22,6 +22,7 @@ @implementation MetalDisplayViewSharedData @synthesize device; +@synthesize preferredResourceStorageMode; @synthesize commandQueue; @synthesize defaultLibrary; @@ -72,6 +73,22 @@ _fetch666ConvertOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666ConvertOnly"] error:nil] retain]; deposterizePipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"src_filter_deposterize"] error:nil] retain]; + if ( IsOSXVersion(10, 13, 0) || IsOSXVersion(10, 13, 1) || IsOSXVersion(10, 13, 2) || IsOSXVersion(10, 13, 3) || IsOSXVersion(10, 13, 4) ) + { + // On macOS High Sierra, there is currently a bug with newBufferWithBytesNoCopy:length:options:deallocator + // that causes it to crash with MTLResourceStorageModeManaged. So for these macOS versions, replace + // MTLResourceStorageModeManaged with MTLResourceStorageModeShared. While this solution causes a very small + // drop in performance, it is still far superior to use Metal rather than OpenGL. + // + // As of this writing, the current version of macOS is v10.13.1. Disabling MTLResourceStorageModeManaged on + // every point release up to v10.13.4 should, I hope, give Apple enough time to fix their bugs with this! + preferredResourceStorageMode = MTLResourceStorageModeShared; + } + else + { + preferredResourceStorageMode = MTLResourceStorageModeManaged; + } + size_t tw = GetNearestPositivePOT((uint32_t)[_fetch555Pipeline threadExecutionWidth]); while ( (tw > [_fetch555Pipeline threadExecutionWidth]) || (tw > GPU_FRAMEBUFFER_NATIVE_WIDTH) ) { @@ -213,7 +230,7 @@ _isUsingFramebufferDirectly[NDSDisplayID_Touch][1] = 1; // Set up the HQnx LUT textures. - SetupHQnxLUTs_Metal(device, texLQ2xLUT, texHQ2xLUT, texHQ3xLUT, texHQ4xLUT); + SetupHQnxLUTs_Metal(device, commandQueue, texLQ2xLUT, texHQ2xLUT, texHQ3xLUT, texHQ4xLUT); texCurrentHQnxLUT = nil; _fetchEncoder = nil; @@ -315,42 +332,42 @@ _bufDisplayFetchNative[NDSDisplayID_Main][0] = [[device newBufferWithBytesNoCopy:dispInfo0.nativeBuffer[NDSDisplayID_Main] length:_nativeBufferSize - options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined + options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined deallocator:nil] retain]; _bufDisplayFetchNative[NDSDisplayID_Main][1] = [[device newBufferWithBytesNoCopy:dispInfo1.nativeBuffer[NDSDisplayID_Main] length:_nativeBufferSize - options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined + options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined deallocator:nil] retain]; _bufDisplayFetchNative[NDSDisplayID_Touch][0] = [[device newBufferWithBytesNoCopy:dispInfo0.nativeBuffer[NDSDisplayID_Touch] length:_nativeBufferSize - options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined + options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined deallocator:nil] retain]; _bufDisplayFetchNative[NDSDisplayID_Touch][1] = [[device newBufferWithBytesNoCopy:dispInfo1.nativeBuffer[NDSDisplayID_Touch] length:_nativeBufferSize - options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined + options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined deallocator:nil] retain]; _bufDisplayFetchCustom[NDSDisplayID_Main][0] = [[device newBufferWithBytesNoCopy:dispInfo0.customBuffer[NDSDisplayID_Main] length:_customBufferSize - options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined + options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined deallocator:nil] retain]; _bufDisplayFetchCustom[NDSDisplayID_Main][1] = [[device newBufferWithBytesNoCopy:dispInfo1.customBuffer[NDSDisplayID_Main] length:_customBufferSize - options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined + options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined deallocator:nil] retain]; _bufDisplayFetchCustom[NDSDisplayID_Touch][0] = [[device newBufferWithBytesNoCopy:dispInfo0.customBuffer[NDSDisplayID_Touch] length:_customBufferSize - options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined + options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined deallocator:nil] retain]; _bufDisplayFetchCustom[NDSDisplayID_Touch][1] = [[device newBufferWithBytesNoCopy:dispInfo1.customBuffer[NDSDisplayID_Touch] length:_customBufferSize - options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined + options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined deallocator:nil] retain]; if (_fetchPixelBytes != dispInfo.pixelBytes) @@ -662,7 +679,11 @@ { const id targetSource = _bufDisplayFetchNative[displayID][bufferIndex]; id targetDestination = _texDisplayFetchNative[displayID][bufferIndex]; - [targetSource didModifyRange:NSMakeRange(0, _nativeBufferSize)]; + + if (preferredResourceStorageMode == MTLResourceStorageModeManaged) + { + [targetSource didModifyRange:NSMakeRange(0, _nativeBufferSize)]; + } [_fetchEncoder copyFromBuffer:targetSource sourceOffset:0 @@ -681,7 +702,11 @@ const id targetSource = _bufDisplayFetchCustom[displayID][bufferIndex]; id targetDestination = _texDisplayFetchCustom[displayID][bufferIndex]; - [targetSource didModifyRange:NSMakeRange(0, _customBufferSize)]; + + if (preferredResourceStorageMode == MTLResourceStorageModeManaged) + { + [targetSource didModifyRange:NSMakeRange(0, _customBufferSize)]; + } [_fetchEncoder copyFromBuffer:targetSource sourceOffset:0 @@ -1114,23 +1139,23 @@ VideoFilter *vfMain = cdp->GetPixelScalerObject(NDSDisplayID_Main); _bufCPUFilterSrcMain = [[[sharedData device] newBufferWithBytesNoCopy:vfMain->GetSrcBufferPtr() length:vfMain->GetSrcWidth() * vfMain->GetSrcHeight() * sizeof(uint32_t) - options:MTLResourceStorageModeManaged + options:[sharedData preferredResourceStorageMode] deallocator:nil] retain]; [self setBufCPUFilterDstMain:[[sharedData device] newBufferWithBytesNoCopy:vfMain->GetDstBufferPtr() length:vfMain->GetDstWidth() * vfMain->GetDstHeight() * sizeof(uint32_t) - options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined + options:[sharedData preferredResourceStorageMode] | MTLResourceCPUCacheModeWriteCombined deallocator:nil]]; VideoFilter *vfTouch = cdp->GetPixelScalerObject(NDSDisplayID_Touch); _bufCPUFilterSrcTouch = [[[sharedData device] newBufferWithBytesNoCopy:vfTouch->GetSrcBufferPtr() length:vfTouch->GetSrcWidth() * vfTouch->GetSrcHeight() * sizeof(uint32_t) - options:MTLResourceStorageModeManaged + options:[sharedData preferredResourceStorageMode] deallocator:nil] retain]; [self setBufCPUFilterDstTouch:[[sharedData device] newBufferWithBytesNoCopy:vfTouch->GetDstBufferPtr() length:vfTouch->GetDstWidth() * vfTouch->GetDstHeight() * sizeof(uint32_t) - options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined + options:[sharedData preferredResourceStorageMode] | MTLResourceCPUCacheModeWriteCombined deallocator:nil]]; texHUDCharMap = nil; @@ -1149,13 +1174,13 @@ VideoFilter *vfMain = cdp->GetPixelScalerObject(NDSDisplayID_Main); [self setBufCPUFilterDstMain:[[sharedData device] newBufferWithBytesNoCopy:vfMain->GetDstBufferPtr() length:(vfMain->GetSrcWidth() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * (vfMain->GetSrcHeight() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * sizeof(uint32_t) - options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined + options:[sharedData preferredResourceStorageMode] | MTLResourceCPUCacheModeWriteCombined deallocator:nil]]; VideoFilter *vfTouch = cdp->GetPixelScalerObject(NDSDisplayID_Touch); [self setBufCPUFilterDstTouch:[[sharedData device] newBufferWithBytesNoCopy:vfTouch->GetDstBufferPtr() length:(vfTouch->GetSrcWidth() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * (vfTouch->GetSrcHeight() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * sizeof(uint32_t) - options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined + options:[sharedData preferredResourceStorageMode] | MTLResourceCPUCacheModeWriteCombined deallocator:nil]]; cb = [self newCommandBuffer]; @@ -1474,7 +1499,10 @@ if (shouldProcessDisplay[NDSDisplayID_Main]) { - [[self bufCPUFilterDstMain] didModifyRange:NSMakeRange(0, vfMain->GetDstWidth() * vfMain->GetDstHeight() * sizeof(uint32_t))]; + if ([sharedData preferredResourceStorageMode] == MTLResourceStorageModeManaged) + { + [[self bufCPUFilterDstMain] didModifyRange:NSMakeRange(0, vfMain->GetDstWidth() * vfMain->GetDstHeight() * sizeof(uint32_t))]; + } [bce copyFromBuffer:[self bufCPUFilterDstMain] sourceOffset:0 @@ -1502,7 +1530,10 @@ if (shouldProcessDisplay[NDSDisplayID_Touch]) { - [[self bufCPUFilterDstTouch] didModifyRange:NSMakeRange(0, vfTouch->GetDstWidth() * vfTouch->GetDstHeight() * sizeof(uint32_t))]; + if ([sharedData preferredResourceStorageMode] == MTLResourceStorageModeManaged) + { + [[self bufCPUFilterDstTouch] didModifyRange:NSMakeRange(0, vfTouch->GetDstWidth() * vfTouch->GetDstHeight() * sizeof(uint32_t))]; + } [bce copyFromBuffer:[self bufCPUFilterDstTouch] sourceOffset:0 @@ -2337,8 +2368,32 @@ void MacMetalDisplayView::FlushView() } #pragma mark - -void SetupHQnxLUTs_Metal(id &device, id &texLQ2xLUT, id &texHQ2xLUT, id &texHQ3xLUT, id &texHQ4xLUT) +void SetupHQnxLUTs_Metal(id &device, id &commandQueue, id &texLQ2xLUT, id &texHQ2xLUT, id &texHQ3xLUT, id &texHQ4xLUT) { + InitHQnxLUTs(); + + // Create the MTLBuffer objects to wrap the the existing LUT buffers that are already in memory. + id bufLQ2xLUT = [device newBufferWithBytesNoCopy:_LQ2xLUT + length:256 * 2 * 4 * 16 * sizeof(uint32_t) + options:MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined + deallocator:nil]; + + id bufHQ2xLUT = [device newBufferWithBytesNoCopy:_HQ2xLUT + length:256 * 2 * 4 * 16 * sizeof(uint32_t) + options:MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined + deallocator:nil]; + + id bufHQ3xLUT = [device newBufferWithBytesNoCopy:_HQ3xLUT + length:256 * 2 * 9 * 16 * sizeof(uint32_t) + options:MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined + deallocator:nil]; + + id bufHQ4xLUT = [device newBufferWithBytesNoCopy:_HQ4xLUT + length:256 * 2 * 16 * 16 * sizeof(uint32_t) + options:MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined + deallocator:nil]; + + // Create the MTLTexture objects that will be used as LUTs in the Metal shaders. MTLTextureDescriptor *texHQ2xLUTDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatBGRA8Unorm width:256 * 2 height:4 @@ -2346,9 +2401,8 @@ void SetupHQnxLUTs_Metal(id &device, id &texLQ2xLUT, id &device, id &texLQ2xLUT, id &device, id &texLQ2xLUT, id cb = [commandQueue commandBufferWithUnretainedReferences];; + id bce = [cb blitCommandEncoder]; - [texHQ2xLUT replaceRegion:MTLRegionMake3D(0, 0, 0, 256 * 2, 4, 16) - mipmapLevel:0 - slice:0 - withBytes:_HQ2xLUT - bytesPerRow:256 * 2 * sizeof(uint32_t) - bytesPerImage:256 * 2 * 4 * sizeof(uint32_t)]; + [bce copyFromBuffer:bufLQ2xLUT + sourceOffset:0 + sourceBytesPerRow:256 * 2 * sizeof(uint32_t) + sourceBytesPerImage:256 * 2 * 4 * sizeof(uint32_t) + sourceSize:MTLSizeMake(256 * 2, 4, 16) + toTexture:texLQ2xLUT + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; - [texHQ3xLUT replaceRegion:MTLRegionMake3D(0, 0, 0, 256 * 2, 9, 16) - mipmapLevel:0 - slice:0 - withBytes:_HQ3xLUT - bytesPerRow:256 * 2 * sizeof(uint32_t) - bytesPerImage:256 * 2 * 9 * sizeof(uint32_t)]; + [bce copyFromBuffer:bufHQ2xLUT + sourceOffset:0 + sourceBytesPerRow:256 * 2 * sizeof(uint32_t) + sourceBytesPerImage:256 * 2 * 4 * sizeof(uint32_t) + sourceSize:MTLSizeMake(256 * 2, 4, 16) + toTexture:texHQ2xLUT + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; - [texHQ4xLUT replaceRegion:MTLRegionMake3D(0, 0, 0, 256 * 2, 16, 16) - mipmapLevel:0 - slice:0 - withBytes:_HQ4xLUT - bytesPerRow:256 * 2 * sizeof(uint32_t) - bytesPerImage:256 * 2 * 16 * sizeof(uint32_t)]; + [bce copyFromBuffer:bufHQ3xLUT + sourceOffset:0 + sourceBytesPerRow:256 * 2 * sizeof(uint32_t) + sourceBytesPerImage:256 * 2 * 9 * sizeof(uint32_t) + sourceSize:MTLSizeMake(256 * 2, 9, 16) + toTexture:texHQ3xLUT + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + + [bce copyFromBuffer:bufHQ4xLUT + sourceOffset:0 + sourceBytesPerRow:256 * 2 * sizeof(uint32_t) + sourceBytesPerImage:256 * 2 * 16 * sizeof(uint32_t) + sourceSize:MTLSizeMake(256 * 2, 16, 16) + toTexture:texHQ4xLUT + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + + [bce endEncoding]; + + [cb commit]; + [cb waitUntilCompleted]; + + [bufLQ2xLUT release]; + [bufHQ2xLUT release]; + [bufHQ3xLUT release]; + [bufHQ4xLUT release]; } void DeleteHQnxLUTs_Metal(id &texLQ2xLUT, id &texHQ2xLUT, id &texHQ3xLUT, id &texHQ4xLUT)