Cocoa Port: Restore the ability to use Metal display views on macOS High Sierra.

- Also rework the way the HQnx LUTs are loaded in Metal.
This commit is contained in:
rogerman 2017-11-28 14:06:34 -08:00
parent cd6fbcd5ea
commit c81df97a92
4 changed files with 154 additions and 77 deletions

View File

@ -2211,10 +2211,10 @@ void InitHQnxLUTs()
lutValuesInited = true; lutValuesInited = true;
_LQ2xLUT = (LUTValues *)malloc(256*(2*2)*16 * sizeof(LUTValues)); _LQ2xLUT = (LUTValues *)malloc_alignedPage(256*(2*2)*16 * sizeof(LUTValues));
_HQ2xLUT = (LUTValues *)malloc(256*(2*2)*16 * sizeof(LUTValues)); _HQ2xLUT = (LUTValues *)malloc_alignedPage(256*(2*2)*16 * sizeof(LUTValues));
_HQ3xLUT = (LUTValues *)malloc(256*(3*3)*16 * sizeof(LUTValues) + 2); _HQ3xLUT = (LUTValues *)malloc_alignedPage(256*(3*3)*16 * sizeof(LUTValues) + 2);
_HQ4xLUT = (LUTValues *)malloc(256*(4*4)*16 * sizeof(LUTValues) + 4); // The bytes fix a mysterious crash that intermittently occurs. Don't know why this works... it just does. _HQ4xLUT = (LUTValues *)malloc_alignedPage(256*(4*4)*16 * sizeof(LUTValues) + 4); // The bytes fix a mysterious crash that intermittently occurs. Don't know why this works... it just does.
#define MUR (compare & 0x01) // top-right #define MUR (compare & 0x01) // top-right
#define MDR (compare & 0x02) // bottom-right #define MDR (compare & 0x02) // bottom-right

View File

@ -164,10 +164,6 @@ public:
#ifdef ENABLE_APPLE_METAL #ifdef ENABLE_APPLE_METAL
if (IsOSXVersionSupported(10, 11, 0) && ![[NSUserDefaults standardUserDefaults] boolForKey:@"Debug_DisableMetal"]) if (IsOSXVersionSupported(10, 11, 0) && ![[NSUserDefaults standardUserDefaults] boolForKey:@"Debug_DisableMetal"])
{
// macOS v10.13.0 and v10.13.1 are specifically checked for here, because there are
// bugs in these versions of macOS that prevent Metal from working properly.
if (!IsOSXVersion(10, 13, 0) && !IsOSXVersion(10, 13, 1))
{ {
fetchObject = new MacMetalFetchObject; fetchObject = new MacMetalFetchObject;
@ -181,7 +177,6 @@ public:
GPU->SetWillPostprocessDisplays(false); GPU->SetWillPostprocessDisplays(false);
} }
} }
}
#endif #endif
#ifdef ENABLE_SHARED_FETCH_OBJECT #ifdef ENABLE_SHARED_FETCH_OBJECT

View File

@ -24,6 +24,7 @@
#import "DisplayViewCALayer.h" #import "DisplayViewCALayer.h"
#import "../cocoa_GPU.h" #import "../cocoa_GPU.h"
#import "../cocoa_util.h"
#include "../ClientDisplayView.h" #include "../ClientDisplayView.h"
#ifdef BOOL #ifdef BOOL
@ -100,6 +101,8 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
id<MTLTexture> texHQ4xLUT; id<MTLTexture> texHQ4xLUT;
id<MTLTexture> texCurrentHQnxLUT; id<MTLTexture> texCurrentHQnxLUT;
MTLResourceOptions preferredResourceStorageMode;
MTLSize _fetchThreadsPerGroup; MTLSize _fetchThreadsPerGroup;
MTLSize _fetchThreadGroupsPerGridNative; MTLSize _fetchThreadGroupsPerGridNative;
MTLSize _fetchThreadGroupsPerGridCustom; MTLSize _fetchThreadGroupsPerGridCustom;
@ -130,6 +133,8 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
@property (readonly, nonatomic) id<MTLTexture> texHQ4xLUT; @property (readonly, nonatomic) id<MTLTexture> texHQ4xLUT;
@property (retain) id<MTLTexture> texCurrentHQnxLUT; @property (retain) id<MTLTexture> texCurrentHQnxLUT;
@property (readonly, nonatomic) MTLResourceOptions preferredResourceStorageMode;
@property (readonly, nonatomic) MTLSize deposterizeThreadsPerGroup; @property (readonly, nonatomic) MTLSize deposterizeThreadsPerGroup;
@property (readonly, nonatomic) MTLSize deposterizeThreadGroupsPerGrid; @property (readonly, nonatomic) MTLSize deposterizeThreadGroupsPerGrid;
@ -337,7 +342,7 @@ public:
}; };
#pragma mark - #pragma mark -
void SetupHQnxLUTs_Metal(id<MTLDevice> &device, id<MTLTexture> &texLQ2xLUT, id<MTLTexture> &texHQ2xLUT, id<MTLTexture> &texHQ3xLUT, id<MTLTexture> &texHQ4xLUT); void SetupHQnxLUTs_Metal(id<MTLDevice> &device, id<MTLCommandQueue> &commandQueue, id<MTLTexture> &texLQ2xLUT, id<MTLTexture> &texHQ2xLUT, id<MTLTexture> &texHQ3xLUT, id<MTLTexture> &texHQ4xLUT);
void DeleteHQnxLUTs_Metal(id<MTLTexture> &texLQ2xLUT, id<MTLTexture> &texHQ2xLUT, id<MTLTexture> &texHQ3xLUT, id<MTLTexture> &texHQ4xLUT); void DeleteHQnxLUTs_Metal(id<MTLTexture> &texLQ2xLUT, id<MTLTexture> &texHQ2xLUT, id<MTLTexture> &texHQ3xLUT, id<MTLTexture> &texHQ4xLUT);
#endif // _MAC_METALDISPLAYVIEW_H #endif // _MAC_METALDISPLAYVIEW_H

View File

@ -22,6 +22,7 @@
@implementation MetalDisplayViewSharedData @implementation MetalDisplayViewSharedData
@synthesize device; @synthesize device;
@synthesize preferredResourceStorageMode;
@synthesize commandQueue; @synthesize commandQueue;
@synthesize defaultLibrary; @synthesize defaultLibrary;
@ -72,6 +73,22 @@
_fetch666ConvertOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666ConvertOnly"] error:nil] retain]; _fetch666ConvertOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666ConvertOnly"] error:nil] retain];
deposterizePipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"src_filter_deposterize"] error:nil] retain]; deposterizePipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"src_filter_deposterize"] error:nil] retain];
if ( IsOSXVersion(10, 13, 0) || IsOSXVersion(10, 13, 1) || IsOSXVersion(10, 13, 2) || IsOSXVersion(10, 13, 3) || IsOSXVersion(10, 13, 4) )
{
// On macOS High Sierra, there is currently a bug with newBufferWithBytesNoCopy:length:options:deallocator
// that causes it to crash with MTLResourceStorageModeManaged. So for these macOS versions, replace
// MTLResourceStorageModeManaged with MTLResourceStorageModeShared. While this solution causes a very small
// drop in performance, it is still far superior to use Metal rather than OpenGL.
//
// As of this writing, the current version of macOS is v10.13.1. Disabling MTLResourceStorageModeManaged on
// every point release up to v10.13.4 should, I hope, give Apple enough time to fix their bugs with this!
preferredResourceStorageMode = MTLResourceStorageModeShared;
}
else
{
preferredResourceStorageMode = MTLResourceStorageModeManaged;
}
size_t tw = GetNearestPositivePOT((uint32_t)[_fetch555Pipeline threadExecutionWidth]); size_t tw = GetNearestPositivePOT((uint32_t)[_fetch555Pipeline threadExecutionWidth]);
while ( (tw > [_fetch555Pipeline threadExecutionWidth]) || (tw > GPU_FRAMEBUFFER_NATIVE_WIDTH) ) while ( (tw > [_fetch555Pipeline threadExecutionWidth]) || (tw > GPU_FRAMEBUFFER_NATIVE_WIDTH) )
{ {
@ -213,7 +230,7 @@
_isUsingFramebufferDirectly[NDSDisplayID_Touch][1] = 1; _isUsingFramebufferDirectly[NDSDisplayID_Touch][1] = 1;
// Set up the HQnx LUT textures. // Set up the HQnx LUT textures.
SetupHQnxLUTs_Metal(device, texLQ2xLUT, texHQ2xLUT, texHQ3xLUT, texHQ4xLUT); SetupHQnxLUTs_Metal(device, commandQueue, texLQ2xLUT, texHQ2xLUT, texHQ3xLUT, texHQ4xLUT);
texCurrentHQnxLUT = nil; texCurrentHQnxLUT = nil;
_fetchEncoder = nil; _fetchEncoder = nil;
@ -315,42 +332,42 @@
_bufDisplayFetchNative[NDSDisplayID_Main][0] = [[device newBufferWithBytesNoCopy:dispInfo0.nativeBuffer[NDSDisplayID_Main] _bufDisplayFetchNative[NDSDisplayID_Main][0] = [[device newBufferWithBytesNoCopy:dispInfo0.nativeBuffer[NDSDisplayID_Main]
length:_nativeBufferSize length:_nativeBufferSize
options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined
deallocator:nil] retain]; deallocator:nil] retain];
_bufDisplayFetchNative[NDSDisplayID_Main][1] = [[device newBufferWithBytesNoCopy:dispInfo1.nativeBuffer[NDSDisplayID_Main] _bufDisplayFetchNative[NDSDisplayID_Main][1] = [[device newBufferWithBytesNoCopy:dispInfo1.nativeBuffer[NDSDisplayID_Main]
length:_nativeBufferSize length:_nativeBufferSize
options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined
deallocator:nil] retain]; deallocator:nil] retain];
_bufDisplayFetchNative[NDSDisplayID_Touch][0] = [[device newBufferWithBytesNoCopy:dispInfo0.nativeBuffer[NDSDisplayID_Touch] _bufDisplayFetchNative[NDSDisplayID_Touch][0] = [[device newBufferWithBytesNoCopy:dispInfo0.nativeBuffer[NDSDisplayID_Touch]
length:_nativeBufferSize length:_nativeBufferSize
options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined
deallocator:nil] retain]; deallocator:nil] retain];
_bufDisplayFetchNative[NDSDisplayID_Touch][1] = [[device newBufferWithBytesNoCopy:dispInfo1.nativeBuffer[NDSDisplayID_Touch] _bufDisplayFetchNative[NDSDisplayID_Touch][1] = [[device newBufferWithBytesNoCopy:dispInfo1.nativeBuffer[NDSDisplayID_Touch]
length:_nativeBufferSize length:_nativeBufferSize
options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined
deallocator:nil] retain]; deallocator:nil] retain];
_bufDisplayFetchCustom[NDSDisplayID_Main][0] = [[device newBufferWithBytesNoCopy:dispInfo0.customBuffer[NDSDisplayID_Main] _bufDisplayFetchCustom[NDSDisplayID_Main][0] = [[device newBufferWithBytesNoCopy:dispInfo0.customBuffer[NDSDisplayID_Main]
length:_customBufferSize length:_customBufferSize
options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined
deallocator:nil] retain]; deallocator:nil] retain];
_bufDisplayFetchCustom[NDSDisplayID_Main][1] = [[device newBufferWithBytesNoCopy:dispInfo1.customBuffer[NDSDisplayID_Main] _bufDisplayFetchCustom[NDSDisplayID_Main][1] = [[device newBufferWithBytesNoCopy:dispInfo1.customBuffer[NDSDisplayID_Main]
length:_customBufferSize length:_customBufferSize
options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined
deallocator:nil] retain]; deallocator:nil] retain];
_bufDisplayFetchCustom[NDSDisplayID_Touch][0] = [[device newBufferWithBytesNoCopy:dispInfo0.customBuffer[NDSDisplayID_Touch] _bufDisplayFetchCustom[NDSDisplayID_Touch][0] = [[device newBufferWithBytesNoCopy:dispInfo0.customBuffer[NDSDisplayID_Touch]
length:_customBufferSize length:_customBufferSize
options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined
deallocator:nil] retain]; deallocator:nil] retain];
_bufDisplayFetchCustom[NDSDisplayID_Touch][1] = [[device newBufferWithBytesNoCopy:dispInfo1.customBuffer[NDSDisplayID_Touch] _bufDisplayFetchCustom[NDSDisplayID_Touch][1] = [[device newBufferWithBytesNoCopy:dispInfo1.customBuffer[NDSDisplayID_Touch]
length:_customBufferSize length:_customBufferSize
options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined options:preferredResourceStorageMode | MTLResourceCPUCacheModeWriteCombined
deallocator:nil] retain]; deallocator:nil] retain];
if (_fetchPixelBytes != dispInfo.pixelBytes) if (_fetchPixelBytes != dispInfo.pixelBytes)
@ -662,7 +679,11 @@
{ {
const id<MTLBuffer> targetSource = _bufDisplayFetchNative[displayID][bufferIndex]; const id<MTLBuffer> targetSource = _bufDisplayFetchNative[displayID][bufferIndex];
id<MTLTexture> targetDestination = _texDisplayFetchNative[displayID][bufferIndex]; id<MTLTexture> targetDestination = _texDisplayFetchNative[displayID][bufferIndex];
if (preferredResourceStorageMode == MTLResourceStorageModeManaged)
{
[targetSource didModifyRange:NSMakeRange(0, _nativeBufferSize)]; [targetSource didModifyRange:NSMakeRange(0, _nativeBufferSize)];
}
[_fetchEncoder copyFromBuffer:targetSource [_fetchEncoder copyFromBuffer:targetSource
sourceOffset:0 sourceOffset:0
@ -681,7 +702,11 @@
const id<MTLBuffer> targetSource = _bufDisplayFetchCustom[displayID][bufferIndex]; const id<MTLBuffer> targetSource = _bufDisplayFetchCustom[displayID][bufferIndex];
id<MTLTexture> targetDestination = _texDisplayFetchCustom[displayID][bufferIndex]; id<MTLTexture> targetDestination = _texDisplayFetchCustom[displayID][bufferIndex];
if (preferredResourceStorageMode == MTLResourceStorageModeManaged)
{
[targetSource didModifyRange:NSMakeRange(0, _customBufferSize)]; [targetSource didModifyRange:NSMakeRange(0, _customBufferSize)];
}
[_fetchEncoder copyFromBuffer:targetSource [_fetchEncoder copyFromBuffer:targetSource
sourceOffset:0 sourceOffset:0
@ -1114,23 +1139,23 @@
VideoFilter *vfMain = cdp->GetPixelScalerObject(NDSDisplayID_Main); VideoFilter *vfMain = cdp->GetPixelScalerObject(NDSDisplayID_Main);
_bufCPUFilterSrcMain = [[[sharedData device] newBufferWithBytesNoCopy:vfMain->GetSrcBufferPtr() _bufCPUFilterSrcMain = [[[sharedData device] newBufferWithBytesNoCopy:vfMain->GetSrcBufferPtr()
length:vfMain->GetSrcWidth() * vfMain->GetSrcHeight() * sizeof(uint32_t) length:vfMain->GetSrcWidth() * vfMain->GetSrcHeight() * sizeof(uint32_t)
options:MTLResourceStorageModeManaged options:[sharedData preferredResourceStorageMode]
deallocator:nil] retain]; deallocator:nil] retain];
[self setBufCPUFilterDstMain:[[sharedData device] newBufferWithBytesNoCopy:vfMain->GetDstBufferPtr() [self setBufCPUFilterDstMain:[[sharedData device] newBufferWithBytesNoCopy:vfMain->GetDstBufferPtr()
length:vfMain->GetDstWidth() * vfMain->GetDstHeight() * sizeof(uint32_t) length:vfMain->GetDstWidth() * vfMain->GetDstHeight() * sizeof(uint32_t)
options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined options:[sharedData preferredResourceStorageMode] | MTLResourceCPUCacheModeWriteCombined
deallocator:nil]]; deallocator:nil]];
VideoFilter *vfTouch = cdp->GetPixelScalerObject(NDSDisplayID_Touch); VideoFilter *vfTouch = cdp->GetPixelScalerObject(NDSDisplayID_Touch);
_bufCPUFilterSrcTouch = [[[sharedData device] newBufferWithBytesNoCopy:vfTouch->GetSrcBufferPtr() _bufCPUFilterSrcTouch = [[[sharedData device] newBufferWithBytesNoCopy:vfTouch->GetSrcBufferPtr()
length:vfTouch->GetSrcWidth() * vfTouch->GetSrcHeight() * sizeof(uint32_t) length:vfTouch->GetSrcWidth() * vfTouch->GetSrcHeight() * sizeof(uint32_t)
options:MTLResourceStorageModeManaged options:[sharedData preferredResourceStorageMode]
deallocator:nil] retain]; deallocator:nil] retain];
[self setBufCPUFilterDstTouch:[[sharedData device] newBufferWithBytesNoCopy:vfTouch->GetDstBufferPtr() [self setBufCPUFilterDstTouch:[[sharedData device] newBufferWithBytesNoCopy:vfTouch->GetDstBufferPtr()
length:vfTouch->GetDstWidth() * vfTouch->GetDstHeight() * sizeof(uint32_t) length:vfTouch->GetDstWidth() * vfTouch->GetDstHeight() * sizeof(uint32_t)
options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined options:[sharedData preferredResourceStorageMode] | MTLResourceCPUCacheModeWriteCombined
deallocator:nil]]; deallocator:nil]];
texHUDCharMap = nil; texHUDCharMap = nil;
@ -1149,13 +1174,13 @@
VideoFilter *vfMain = cdp->GetPixelScalerObject(NDSDisplayID_Main); VideoFilter *vfMain = cdp->GetPixelScalerObject(NDSDisplayID_Main);
[self setBufCPUFilterDstMain:[[sharedData device] newBufferWithBytesNoCopy:vfMain->GetDstBufferPtr() [self setBufCPUFilterDstMain:[[sharedData device] newBufferWithBytesNoCopy:vfMain->GetDstBufferPtr()
length:(vfMain->GetSrcWidth() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * (vfMain->GetSrcHeight() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * sizeof(uint32_t) length:(vfMain->GetSrcWidth() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * (vfMain->GetSrcHeight() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * sizeof(uint32_t)
options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined options:[sharedData preferredResourceStorageMode] | MTLResourceCPUCacheModeWriteCombined
deallocator:nil]]; deallocator:nil]];
VideoFilter *vfTouch = cdp->GetPixelScalerObject(NDSDisplayID_Touch); VideoFilter *vfTouch = cdp->GetPixelScalerObject(NDSDisplayID_Touch);
[self setBufCPUFilterDstTouch:[[sharedData device] newBufferWithBytesNoCopy:vfTouch->GetDstBufferPtr() [self setBufCPUFilterDstTouch:[[sharedData device] newBufferWithBytesNoCopy:vfTouch->GetDstBufferPtr()
length:(vfTouch->GetSrcWidth() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * (vfTouch->GetSrcHeight() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * sizeof(uint32_t) length:(vfTouch->GetSrcWidth() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * (vfTouch->GetSrcHeight() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * sizeof(uint32_t)
options:MTLResourceStorageModeManaged | MTLResourceCPUCacheModeWriteCombined options:[sharedData preferredResourceStorageMode] | MTLResourceCPUCacheModeWriteCombined
deallocator:nil]]; deallocator:nil]];
cb = [self newCommandBuffer]; cb = [self newCommandBuffer];
@ -1473,8 +1498,11 @@
} }
if (shouldProcessDisplay[NDSDisplayID_Main]) if (shouldProcessDisplay[NDSDisplayID_Main])
{
if ([sharedData preferredResourceStorageMode] == MTLResourceStorageModeManaged)
{ {
[[self bufCPUFilterDstMain] didModifyRange:NSMakeRange(0, vfMain->GetDstWidth() * vfMain->GetDstHeight() * sizeof(uint32_t))]; [[self bufCPUFilterDstMain] didModifyRange:NSMakeRange(0, vfMain->GetDstWidth() * vfMain->GetDstHeight() * sizeof(uint32_t))];
}
[bce copyFromBuffer:[self bufCPUFilterDstMain] [bce copyFromBuffer:[self bufCPUFilterDstMain]
sourceOffset:0 sourceOffset:0
@ -1501,8 +1529,11 @@
} }
if (shouldProcessDisplay[NDSDisplayID_Touch]) if (shouldProcessDisplay[NDSDisplayID_Touch])
{
if ([sharedData preferredResourceStorageMode] == MTLResourceStorageModeManaged)
{ {
[[self bufCPUFilterDstTouch] didModifyRange:NSMakeRange(0, vfTouch->GetDstWidth() * vfTouch->GetDstHeight() * sizeof(uint32_t))]; [[self bufCPUFilterDstTouch] didModifyRange:NSMakeRange(0, vfTouch->GetDstWidth() * vfTouch->GetDstHeight() * sizeof(uint32_t))];
}
[bce copyFromBuffer:[self bufCPUFilterDstTouch] [bce copyFromBuffer:[self bufCPUFilterDstTouch]
sourceOffset:0 sourceOffset:0
@ -2337,8 +2368,32 @@ void MacMetalDisplayView::FlushView()
} }
#pragma mark - #pragma mark -
void SetupHQnxLUTs_Metal(id<MTLDevice> &device, id<MTLTexture> &texLQ2xLUT, id<MTLTexture> &texHQ2xLUT, id<MTLTexture> &texHQ3xLUT, id<MTLTexture> &texHQ4xLUT) void SetupHQnxLUTs_Metal(id<MTLDevice> &device, id<MTLCommandQueue> &commandQueue, id<MTLTexture> &texLQ2xLUT, id<MTLTexture> &texHQ2xLUT, id<MTLTexture> &texHQ3xLUT, id<MTLTexture> &texHQ4xLUT)
{ {
InitHQnxLUTs();
// Create the MTLBuffer objects to wrap the the existing LUT buffers that are already in memory.
id<MTLBuffer> bufLQ2xLUT = [device newBufferWithBytesNoCopy:_LQ2xLUT
length:256 * 2 * 4 * 16 * sizeof(uint32_t)
options:MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined
deallocator:nil];
id<MTLBuffer> bufHQ2xLUT = [device newBufferWithBytesNoCopy:_HQ2xLUT
length:256 * 2 * 4 * 16 * sizeof(uint32_t)
options:MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined
deallocator:nil];
id<MTLBuffer> bufHQ3xLUT = [device newBufferWithBytesNoCopy:_HQ3xLUT
length:256 * 2 * 9 * 16 * sizeof(uint32_t)
options:MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined
deallocator:nil];
id<MTLBuffer> bufHQ4xLUT = [device newBufferWithBytesNoCopy:_HQ4xLUT
length:256 * 2 * 16 * 16 * sizeof(uint32_t)
options:MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined
deallocator:nil];
// Create the MTLTexture objects that will be used as LUTs in the Metal shaders.
MTLTextureDescriptor *texHQ2xLUTDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatBGRA8Unorm MTLTextureDescriptor *texHQ2xLUTDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatBGRA8Unorm
width:256 * 2 width:256 * 2
height:4 height:4
@ -2346,9 +2401,8 @@ void SetupHQnxLUTs_Metal(id<MTLDevice> &device, id<MTLTexture> &texLQ2xLUT, id<M
[texHQ2xLUTDesc setTextureType:MTLTextureType3D]; [texHQ2xLUTDesc setTextureType:MTLTextureType3D];
[texHQ2xLUTDesc setDepth:16]; [texHQ2xLUTDesc setDepth:16];
[texHQ2xLUTDesc setResourceOptions:MTLResourceStorageModeManaged]; [texHQ2xLUTDesc setResourceOptions:MTLResourceStorageModePrivate];
[texHQ2xLUTDesc setStorageMode:MTLStorageModeManaged]; [texHQ2xLUTDesc setStorageMode:MTLStorageModePrivate];
[texHQ2xLUTDesc setCpuCacheMode:MTLCPUCacheModeWriteCombined];
[texHQ2xLUTDesc setUsage:MTLTextureUsageShaderRead]; [texHQ2xLUTDesc setUsage:MTLTextureUsageShaderRead];
MTLTextureDescriptor *texHQ3xLUTDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatBGRA8Unorm MTLTextureDescriptor *texHQ3xLUTDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatBGRA8Unorm
@ -2357,9 +2411,8 @@ void SetupHQnxLUTs_Metal(id<MTLDevice> &device, id<MTLTexture> &texLQ2xLUT, id<M
mipmapped:NO]; mipmapped:NO];
[texHQ3xLUTDesc setTextureType:MTLTextureType3D]; [texHQ3xLUTDesc setTextureType:MTLTextureType3D];
[texHQ3xLUTDesc setDepth:16]; [texHQ3xLUTDesc setDepth:16];
[texHQ3xLUTDesc setResourceOptions:MTLResourceStorageModeManaged]; [texHQ3xLUTDesc setResourceOptions:MTLResourceStorageModePrivate];
[texHQ3xLUTDesc setStorageMode:MTLStorageModeManaged]; [texHQ3xLUTDesc setStorageMode:MTLStorageModePrivate];
[texHQ3xLUTDesc setCpuCacheMode:MTLCPUCacheModeWriteCombined];
[texHQ3xLUTDesc setUsage:MTLTextureUsageShaderRead]; [texHQ3xLUTDesc setUsage:MTLTextureUsageShaderRead];
MTLTextureDescriptor *texHQ4xLUTDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatBGRA8Unorm MTLTextureDescriptor *texHQ4xLUTDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatBGRA8Unorm
@ -2368,44 +2421,68 @@ void SetupHQnxLUTs_Metal(id<MTLDevice> &device, id<MTLTexture> &texLQ2xLUT, id<M
mipmapped:NO]; mipmapped:NO];
[texHQ4xLUTDesc setTextureType:MTLTextureType3D]; [texHQ4xLUTDesc setTextureType:MTLTextureType3D];
[texHQ4xLUTDesc setDepth:16]; [texHQ4xLUTDesc setDepth:16];
[texHQ4xLUTDesc setResourceOptions:MTLResourceStorageModeManaged]; [texHQ4xLUTDesc setResourceOptions:MTLResourceStorageModePrivate];
[texHQ4xLUTDesc setStorageMode:MTLStorageModeManaged]; [texHQ4xLUTDesc setStorageMode:MTLStorageModePrivate];
[texHQ4xLUTDesc setCpuCacheMode:MTLCPUCacheModeWriteCombined];
[texHQ4xLUTDesc setUsage:MTLTextureUsageShaderRead]; [texHQ4xLUTDesc setUsage:MTLTextureUsageShaderRead];
texLQ2xLUT = [[device newTextureWithDescriptor:texHQ2xLUTDesc] retain]; texLQ2xLUT = [device newTextureWithDescriptor:texHQ2xLUTDesc];
texHQ2xLUT = [[device newTextureWithDescriptor:texHQ2xLUTDesc] retain]; texHQ2xLUT = [device newTextureWithDescriptor:texHQ2xLUTDesc];
texHQ3xLUT = [[device newTextureWithDescriptor:texHQ3xLUTDesc] retain]; texHQ3xLUT = [device newTextureWithDescriptor:texHQ3xLUTDesc];
texHQ4xLUT = [[device newTextureWithDescriptor:texHQ4xLUTDesc] retain]; texHQ4xLUT = [device newTextureWithDescriptor:texHQ4xLUTDesc];
InitHQnxLUTs(); // Copy the LUT buffers from main memory to the GPU.
[texLQ2xLUT replaceRegion:MTLRegionMake3D(0, 0, 0, 256 * 2, 4, 16) id<MTLCommandBuffer> cb = [commandQueue commandBufferWithUnretainedReferences];;
mipmapLevel:0 id<MTLBlitCommandEncoder> bce = [cb blitCommandEncoder];
slice:0
withBytes:_LQ2xLUT
bytesPerRow:256 * 2 * sizeof(uint32_t)
bytesPerImage:256 * 2 * 4 * sizeof(uint32_t)];
[texHQ2xLUT replaceRegion:MTLRegionMake3D(0, 0, 0, 256 * 2, 4, 16) [bce copyFromBuffer:bufLQ2xLUT
mipmapLevel:0 sourceOffset:0
slice:0 sourceBytesPerRow:256 * 2 * sizeof(uint32_t)
withBytes:_HQ2xLUT sourceBytesPerImage:256 * 2 * 4 * sizeof(uint32_t)
bytesPerRow:256 * 2 * sizeof(uint32_t) sourceSize:MTLSizeMake(256 * 2, 4, 16)
bytesPerImage:256 * 2 * 4 * sizeof(uint32_t)]; toTexture:texLQ2xLUT
destinationSlice:0
destinationLevel:0
destinationOrigin:MTLOriginMake(0, 0, 0)];
[texHQ3xLUT replaceRegion:MTLRegionMake3D(0, 0, 0, 256 * 2, 9, 16) [bce copyFromBuffer:bufHQ2xLUT
mipmapLevel:0 sourceOffset:0
slice:0 sourceBytesPerRow:256 * 2 * sizeof(uint32_t)
withBytes:_HQ3xLUT sourceBytesPerImage:256 * 2 * 4 * sizeof(uint32_t)
bytesPerRow:256 * 2 * sizeof(uint32_t) sourceSize:MTLSizeMake(256 * 2, 4, 16)
bytesPerImage:256 * 2 * 9 * sizeof(uint32_t)]; toTexture:texHQ2xLUT
destinationSlice:0
destinationLevel:0
destinationOrigin:MTLOriginMake(0, 0, 0)];
[texHQ4xLUT replaceRegion:MTLRegionMake3D(0, 0, 0, 256 * 2, 16, 16) [bce copyFromBuffer:bufHQ3xLUT
mipmapLevel:0 sourceOffset:0
slice:0 sourceBytesPerRow:256 * 2 * sizeof(uint32_t)
withBytes:_HQ4xLUT sourceBytesPerImage:256 * 2 * 9 * sizeof(uint32_t)
bytesPerRow:256 * 2 * sizeof(uint32_t) sourceSize:MTLSizeMake(256 * 2, 9, 16)
bytesPerImage:256 * 2 * 16 * sizeof(uint32_t)]; toTexture:texHQ3xLUT
destinationSlice:0
destinationLevel:0
destinationOrigin:MTLOriginMake(0, 0, 0)];
[bce copyFromBuffer:bufHQ4xLUT
sourceOffset:0
sourceBytesPerRow:256 * 2 * sizeof(uint32_t)
sourceBytesPerImage:256 * 2 * 16 * sizeof(uint32_t)
sourceSize:MTLSizeMake(256 * 2, 16, 16)
toTexture:texHQ4xLUT
destinationSlice:0
destinationLevel:0
destinationOrigin:MTLOriginMake(0, 0, 0)];
[bce endEncoding];
[cb commit];
[cb waitUntilCompleted];
[bufLQ2xLUT release];
[bufHQ2xLUT release];
[bufHQ3xLUT release];
[bufHQ4xLUT release];
} }
void DeleteHQnxLUTs_Metal(id<MTLTexture> &texLQ2xLUT, id<MTLTexture> &texHQ2xLUT, id<MTLTexture> &texHQ3xLUT, id<MTLTexture> &texHQ4xLUT) void DeleteHQnxLUTs_Metal(id<MTLTexture> &texLQ2xLUT, id<MTLTexture> &texHQ2xLUT, id<MTLTexture> &texHQ3xLUT, id<MTLTexture> &texHQ4xLUT)