From cd6fbcd5ea80d7072b3bfec0519e8020b025342e Mon Sep 17 00:00:00 2001 From: rogerman Date: Tue, 28 Nov 2017 00:53:50 -0800 Subject: [PATCH] Cocoa Port: In the Metal framebuffer fetcher, further optimize 18-bit to 32-bit color conversions whenever the master brightness does not need to be applied, which is the most typical use case. --- .../cocoa/userinterface/MacMetalDisplayView.h | 3 +- .../userinterface/MacMetalDisplayView.mm | 48 ++++++++++++------- .../MacMetalDisplayViewShaders.metal | 39 ++++++++++----- 3 files changed, 58 insertions(+), 32 deletions(-) diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h index 563b78ea6..c3ed84f87 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h @@ -60,9 +60,10 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; id defaultLibrary; id _fetch555Pipeline; - id _fetch555ConvertOnlyPipeline; id _fetch666Pipeline; id _fetch888Pipeline; + id _fetch555ConvertOnlyPipeline; + id _fetch666ConvertOnlyPipeline; id deposterizePipeline; id hudPipeline; id hudRGBAPipeline; diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm index 5f3612f64..ee3acee75 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm @@ -66,9 +66,10 @@ commandQueue = [[device newCommandQueue] retain]; defaultLibrary = [[device newDefaultLibrary] retain]; _fetch555Pipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch555"] error:nil] retain]; - _fetch555ConvertOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch555ConvertOnly"] error:nil] retain]; _fetch666Pipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666"] error:nil] retain]; _fetch888Pipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch888"] error:nil] retain]; + _fetch555ConvertOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch555ConvertOnly"] error:nil] retain]; + _fetch666ConvertOnlyPipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666ConvertOnly"] error:nil] retain]; deposterizePipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"src_filter_deposterize"] error:nil] retain]; size_t tw = GetNearestPositivePOT((uint32_t)[_fetch555Pipeline threadExecutionWidth]); @@ -227,9 +228,10 @@ [commandQueue release]; [defaultLibrary release]; [_fetch555Pipeline release]; - [_fetch555ConvertOnlyPipeline release]; [_fetch666Pipeline release]; [_fetch888Pipeline release]; + [_fetch555ConvertOnlyPipeline release]; + [_fetch666ConvertOnlyPipeline release]; [deposterizePipeline release]; [hudPipeline release]; [hudRGBAPipeline release]; @@ -477,22 +479,20 @@ id cce = [cb computeCommandEncoder]; - if (currentDisplayInfo.needConvertColorFormat[NDSDisplayID_Main] || currentDisplayInfo.needConvertColorFormat[NDSDisplayID_Touch] || - currentDisplayInfo.needApplyMasterBrightness[NDSDisplayID_Main] || currentDisplayInfo.needApplyMasterBrightness[NDSDisplayID_Touch]) + if (currentDisplayInfo.needApplyMasterBrightness[NDSDisplayID_Main] || currentDisplayInfo.needApplyMasterBrightness[NDSDisplayID_Touch]) { - switch (currentDisplayInfo.colorFormat) + if (currentDisplayInfo.colorFormat == NDSColorFormat_BGR555_Rev) { - case NDSColorFormat_BGR555_Rev: - [cce setComputePipelineState:_fetch555Pipeline]; - break; - - case NDSColorFormat_BGR666_Rev: - [cce setComputePipelineState:_fetch666Pipeline]; - break; - - case NDSColorFormat_BGR888_Rev: - [cce setComputePipelineState:_fetch888Pipeline]; - break; + [cce setComputePipelineState:_fetch555Pipeline]; + } + else if ( (currentDisplayInfo.colorFormat == NDSColorFormat_BGR666_Rev) && + (currentDisplayInfo.needConvertColorFormat[NDSDisplayID_Main] || currentDisplayInfo.needConvertColorFormat[NDSDisplayID_Touch]) ) + { + [cce setComputePipelineState:_fetch666Pipeline]; + } + else + { + [cce setComputePipelineState:_fetch888Pipeline]; } if (isMainEnabled) @@ -559,13 +559,25 @@ isUsingFramebufferDirectlyTouch = false; } } - else + else if (currentDisplayInfo.colorFormat != NDSColorFormat_BGR888_Rev) { + bool isPipelineStateSet = false; + if (currentDisplayInfo.colorFormat == NDSColorFormat_BGR555_Rev) { // 16-bit textures aren't handled natively in Metal for macOS, so we need to explicitly convert to 32-bit here. [cce setComputePipelineState:_fetch555ConvertOnlyPipeline]; - + isPipelineStateSet = true; + } + else if ( (currentDisplayInfo.colorFormat == NDSColorFormat_BGR666_Rev) && + (currentDisplayInfo.needConvertColorFormat[NDSDisplayID_Main] || currentDisplayInfo.needConvertColorFormat[NDSDisplayID_Touch]) ) + { + [cce setComputePipelineState:_fetch666ConvertOnlyPipeline]; + isPipelineStateSet = true; + } + + if (isPipelineStateSet) + { if (isMainEnabled) { if (!currentDisplayInfo.didPerformCustomRender[NDSDisplayID_Main]) diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayViewShaders.metal b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayViewShaders.metal index a21f2f863..dcdc8bada 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayViewShaders.metal +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayViewShaders.metal @@ -443,19 +443,6 @@ kernel void nds_fetch555(const uint2 position [[thread_position_in_grid]], outTexture.write(float4(outColor, 1.0f), position); } -kernel void nds_fetch555ConvertOnly(const uint2 position [[thread_position_in_grid]], - const texture2d inTexture [[texture(0)]], - texture2d outTexture [[texture(1)]]) -{ - if ( (position.x > inTexture.get_width() - 1) || (position.y > inTexture.get_height() - 1) ) - { - return; - } - - const float4 outColor = unpack_unorm1555_to_unorm8888( (ushort)inTexture.read(position).r ); - outTexture.write(float4(outColor.rgb, 1.0f), position); -} - kernel void nds_fetch666(const uint2 position [[thread_position_in_grid]], const constant uchar *brightnessMode [[buffer(0)]], const constant uchar *brightnessIntensity [[buffer(1)]], @@ -500,6 +487,32 @@ kernel void nds_fetch888(const uint2 position [[thread_position_in_grid]], outTexture.write(float4(outColor, 1.0f), position); } +kernel void nds_fetch555ConvertOnly(const uint2 position [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + if ( (position.x > inTexture.get_width() - 1) || (position.y > inTexture.get_height() - 1) ) + { + return; + } + + const float4 outColor = unpack_unorm1555_to_unorm8888( (ushort)inTexture.read(position).r ); + outTexture.write(float4(outColor.rgb, 1.0f), position); +} + +kernel void nds_fetch666ConvertOnly(const uint2 position [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + if ( (position.x > inTexture.get_width() - 1) || (position.y > inTexture.get_height() - 1) ) + { + return; + } + + const float3 outColor = inTexture.read(position).rgb * float3(255.0f/63.0f); + outTexture.write(float4(outColor, 1.0f), position); +} + float3 nds_apply_master_brightness(const float3 inColor, const uchar mode, const float intensity) { switch (mode)