diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h index 7f29b4e85..d3c14e0dc 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h @@ -102,7 +102,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; id _fetch666ConvertOnlyPipeline; id _fetch888ConvertOnlyPipeline; id deposterizePipeline; - id hudPipeline; + id hudBGRAPipeline; id hudRGBAPipeline; id samplerHUDBox; @@ -151,7 +151,7 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; @property (readonly, nonatomic) id defaultLibrary; @property (readonly, nonatomic) id deposterizePipeline; -@property (readonly, nonatomic) id hudPipeline; +@property (readonly, nonatomic) id hudBGRAPipeline; @property (readonly, nonatomic) id hudRGBAPipeline; @property (readonly, nonatomic) id samplerHUDBox; @property (readonly, nonatomic) id samplerHUDText; @@ -188,7 +188,6 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; MTLRenderPassDescriptor *_outputRenderPassDesc; MTLRenderPassColorAttachmentDescriptor *colorAttachment0Desc; id pixelScalePipeline; - id outputRGBAPipeline; id outputDrawablePipeline; MTLPixelFormat drawableFormat; @@ -229,7 +228,6 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; @property (assign, nonatomic) MetalDisplayViewSharedData *sharedData; @property (readonly, nonatomic) MTLRenderPassColorAttachmentDescriptor *colorAttachment0Desc; @property (retain) id pixelScalePipeline; -@property (retain) id outputRGBAPipeline; @property (retain) id outputDrawablePipeline; @property (assign) MTLPixelFormat drawableFormat; @property (retain) id bufCPUFilterDstMain; @@ -257,7 +255,8 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties; hudPipelineState:(id)hudPipelineState texDisplays:(MetalTexturePair)texDisplay mrfi:(MetalRenderFrameInfo)mrfi - doYFlip:(BOOL)willFlip; + doYFlip:(BOOL)willFlip + doSwapRB:(BOOL)willSwapRB; - (void) renderStartAtIndex:(uint8_t)index; - (void) renderFinishAtIndex:(uint8_t)index; - (ClientDisplayBufferState) renderBufferStateAtIndex:(uint8_t)index; diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm index f9ae0a244..06cdbfb99 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm @@ -33,7 +33,7 @@ @synthesize defaultLibrary; @synthesize deposterizePipeline; -@synthesize hudPipeline; +@synthesize hudBGRAPipeline; @synthesize hudRGBAPipeline; @synthesize samplerHUDBox; @synthesize samplerHUDText; @@ -184,13 +184,14 @@ [[[hudPipelineDesc vertexBuffers] objectAtIndexedSubscript:3] setMutability:MTLMutabilityImmutable]; [[[hudPipelineDesc vertexBuffers] objectAtIndexedSubscript:4] setMutability:MTLMutabilityImmutable]; [[[hudPipelineDesc vertexBuffers] objectAtIndexedSubscript:5] setMutability:MTLMutabilityImmutable]; + [[[hudPipelineDesc fragmentBuffers] objectAtIndexedSubscript:0] setMutability:MTLMutabilityImmutable]; } #endif [[[hudPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setPixelFormat:MTLPixelFormatBGRA8Unorm]; - hudPipeline = [[device newRenderPipelineStateWithDescriptor:hudPipelineDesc error:nil] retain]; + hudBGRAPipeline = [[device newRenderPipelineStateWithDescriptor:hudPipelineDesc error:nil] retain]; - [[[hudPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setPixelFormat:MTLPixelFormatBGRA8Unorm]; + [[[hudPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setPixelFormat:MTLPixelFormatRGBA8Unorm]; hudRGBAPipeline = [[device newRenderPipelineStateWithDescriptor:hudPipelineDesc error:nil] retain]; [hudPipelineDesc release]; @@ -318,7 +319,7 @@ [_fetch666ConvertOnlyPipeline release]; [_fetch888ConvertOnlyPipeline release]; [deposterizePipeline release]; - [hudPipeline release]; + [hudBGRAPipeline release]; [hudRGBAPipeline release]; [hudIndexBuffer release]; @@ -812,7 +813,6 @@ @synthesize sharedData; @synthesize colorAttachment0Desc; @synthesize pixelScalePipeline; -@synthesize outputRGBAPipeline; @synthesize outputDrawablePipeline; @synthesize drawableFormat; @synthesize bufCPUFilterDstMain; @@ -855,9 +855,8 @@ [colorAttachment0Desc setTexture:nil]; pixelScalePipeline = nil; - outputRGBAPipeline = nil; outputDrawablePipeline = nil; - drawableFormat = MTLPixelFormatInvalid; + drawableFormat = MTLPixelFormatRGBA8Unorm; _texDisplaySrcDeposterize[NDSDisplayID_Main][0] = nil; _texDisplaySrcDeposterize[NDSDisplayID_Touch][0] = nil; @@ -951,7 +950,6 @@ [texPairProcess.touch release]; [self setPixelScalePipeline:nil]; - [self setOutputRGBAPipeline:nil]; [self setOutputDrawablePipeline:nil]; [self setTexHUDCharMap:nil]; @@ -1180,14 +1178,8 @@ break; } - [[[outputPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setPixelFormat:MTLPixelFormatBGRA8Unorm]; - [self setOutputRGBAPipeline:[[sharedData device] newRenderPipelineStateWithDescriptor:outputPipelineDesc error:nil]]; - - if ([self drawableFormat] != MTLPixelFormatInvalid) - { - [[[outputPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setPixelFormat:[self drawableFormat]]; - [self setOutputDrawablePipeline:[[sharedData device] newRenderPipelineStateWithDescriptor:outputPipelineDesc error:nil]]; - } + [[[outputPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setPixelFormat:[self drawableFormat]]; + [self setOutputDrawablePipeline:[[sharedData device] newRenderPipelineStateWithDescriptor:outputPipelineDesc error:nil]]; #if HAVE_OSAVAILABLE && defined(MAC_OS_X_VERSION_10_13) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_13) if (@available(macOS 10.13, *)) @@ -1197,6 +1189,7 @@ [[[outputPipelineDesc vertexBuffers] objectAtIndexedSubscript:2] setMutability:MTLMutabilityImmutable]; [[[outputPipelineDesc vertexBuffers] objectAtIndexedSubscript:3] setMutability:MTLMutabilityImmutable]; [[[outputPipelineDesc fragmentBuffers] objectAtIndexedSubscript:0] setMutability:MTLMutabilityImmutable]; + [[[outputPipelineDesc fragmentBuffers] objectAtIndexedSubscript:1] setMutability:MTLMutabilityImmutable]; } #endif @@ -1223,18 +1216,12 @@ [[[outputPipelineDesc vertexBuffers] objectAtIndexedSubscript:2] setMutability:MTLMutabilityImmutable]; [[[outputPipelineDesc vertexBuffers] objectAtIndexedSubscript:3] setMutability:MTLMutabilityImmutable]; [[[outputPipelineDesc fragmentBuffers] objectAtIndexedSubscript:0] setMutability:MTLMutabilityImmutable]; + [[[outputPipelineDesc fragmentBuffers] objectAtIndexedSubscript:1] setMutability:MTLMutabilityImmutable]; } #endif - [[[outputPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setPixelFormat:MTLPixelFormatBGRA8Unorm]; - outputRGBAPipeline = [[[sharedData device] newRenderPipelineStateWithDescriptor:outputPipelineDesc error:nil] retain]; - - if ([self drawableFormat] != MTLPixelFormatInvalid) - { - [[[outputPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setPixelFormat:[self drawableFormat]]; - outputDrawablePipeline = [[[sharedData device] newRenderPipelineStateWithDescriptor:outputPipelineDesc error:nil] retain]; - } - + [[[outputPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setPixelFormat:[self drawableFormat]]; + outputDrawablePipeline = [[[sharedData device] newRenderPipelineStateWithDescriptor:outputPipelineDesc error:nil] retain]; [outputPipelineDesc release]; // Set up processing textures. @@ -1988,6 +1975,7 @@ texDisplays:(MetalTexturePair)texDisplay mrfi:(MetalRenderFrameInfo)mrfi doYFlip:(BOOL)willFlip + doSwapRB:(BOOL)willSwapRB { // Generate the command encoder. id rce = [cb renderCommandEncoderWithDescriptor:_outputRenderPassDesc]; @@ -2011,6 +1999,7 @@ [rce setVertexBytes:_texCoordBuffer length:sizeof(_texCoordBuffer) atIndex:1]; [rce setVertexBytes:&_cdvPropertiesBuffer length:sizeof(_cdvPropertiesBuffer) atIndex:2]; [rce setVertexBytes:&doYFlip length:sizeof(uint8_t) atIndex:3]; + [rce setFragmentBytes:&willSwapRB length:sizeof(uint8_t) atIndex:1]; switch (cdp->GetPresenterProperties().mode) { @@ -2093,6 +2082,7 @@ [rce setVertexBytes:&_cdvPropertiesBuffer length:sizeof(_cdvPropertiesBuffer) atIndex:3]; [rce setVertexBytes:&doYFlip length:sizeof(uint8_t) atIndex:4]; [rce setFragmentTexture:[self texHUDCharMap] atIndex:0]; + [rce setFragmentBytes:&willSwapRB length:sizeof(uint8_t) atIndex:0]; // First, draw the inputs. if (mrfi.willDrawHUDInput) @@ -2176,7 +2166,12 @@ @autoreleasepool { - MTLTextureDescriptor *texRenderDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm + // Note that this method should ALWAYS return 32-bit RGBA format, not BGRA format. + MTLPixelFormat viewDrawableFormat = [self drawableFormat]; + BOOL willSwapRB = (viewDrawableFormat == MTLPixelFormatBGRA8Unorm) ? YES : NO; + id hudPipelineState = (willSwapRB) ? [sharedData hudBGRAPipeline] : [sharedData hudRGBAPipeline]; + + MTLTextureDescriptor *texRenderDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:viewDrawableFormat width:clientWidth height:clientHeight mipmapped:NO]; @@ -2195,11 +2190,12 @@ const MetalRenderFrameInfo mrfi = [self renderFrameInfo]; [self renderForCommandBuffer:cb - outputPipelineState:[self outputRGBAPipeline] - hudPipelineState:[sharedData hudRGBAPipeline] + outputPipelineState:[self outputDrawablePipeline] + hudPipelineState:hudPipelineState texDisplays:texProcess mrfi:mrfi - doYFlip:NO]; + doYFlip:NO + doSwapRB:willSwapRB]; id bce = [cb blitCommandEncoder]; @@ -2348,13 +2344,15 @@ [oldTexTouch release]; const MetalRenderFrameInfo mrfi = [presenterObject renderFrameInfo]; + id hudPipelineState = ([presenterObject drawableFormat] == MTLPixelFormatBGRA8Unorm) ? [[presenterObject sharedData] hudBGRAPipeline] : [[presenterObject sharedData] hudRGBAPipeline]; [presenterObject renderForCommandBuffer:cb outputPipelineState:[presenterObject outputDrawablePipeline] - hudPipelineState:[[presenterObject sharedData] hudPipeline] + hudPipelineState:hudPipelineState texDisplays:_displayTexturePair mrfi:mrfi - doYFlip:NO]; + doYFlip:NO + doSwapRB:NO]; [cb addScheduledHandler:^(id block) { [presenterObject setRenderBufferState:ClientDisplayBufferState_Reading index:mrfi.renderIndex]; diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayViewShaders.metal b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayViewShaders.metal index 8d44f0e28..ef71e8124 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayViewShaders.metal +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayViewShaders.metal @@ -195,9 +195,11 @@ vertex HUDVtx hud_vertex(const device float2 *inPosition [[buffer(0)]], fragment float4 hud_fragment(const HUDVtx vtx [[stage_in]], const texture2d tex [[texture(0)]], - const sampler samp [[sampler(0)]]) + const sampler samp [[sampler(0)]], + const constant uint8_t &doSwapRB [[buffer(0)]]) { - return tex.sample(samp, vtx.texCoord, (vtx.lowerHUDMipMapLevel) ? level(-0.50f) : level(0.00f)) * vtx.color; + const float4 outColor = tex.sample(samp, vtx.texCoord, (vtx.lowerHUDMipMapLevel) ? level(-0.50f) : level(0.00f)) * vtx.color; + return (doSwapRB == 0) ? outColor.rgba : outColor.bgra; } #pragma mark Output Filters @@ -279,9 +281,10 @@ vertex DisplayVtxTex16 display_output_sampletex16_vertex(const device float2 *in // Input Pixel Mapping: 00 fragment float4 output_filter_nearest(const DisplayVtx vtx [[stage_in]], const texture2d tex [[texture(0)]], - const device float *inBacklightIntensity [[buffer(0)]]) + const device float *inBacklightIntensity [[buffer(0)]], + const constant uint8_t &doSwapRB [[buffer(1)]]) { - return float4(tex.sample(genSampler, vtx.texCoord).rgb * *inBacklightIntensity, 1.0f); + return (doSwapRB == 0) ? float4(tex.sample(genSampler, vtx.texCoord).rgb * *inBacklightIntensity, 1.0f) : float4(tex.sample(genSampler, vtx.texCoord).bgr * *inBacklightIntensity, 1.0f); } //--------------------------------------- @@ -289,9 +292,10 @@ fragment float4 output_filter_nearest(const DisplayVtx vtx [[stage_in]], // 02|03 fragment float4 output_filter_bilinear(const DisplayVtx vtx [[stage_in]], const texture2d tex [[texture(0)]], - const device float *inBacklightIntensity [[buffer(0)]]) + const device float *inBacklightIntensity [[buffer(0)]], + const constant uint8_t &doSwapRB [[buffer(1)]]) { - return float4(tex.sample(outputSamplerBilinear, vtx.texCoord).rgb * *inBacklightIntensity, 1.0f); + return (doSwapRB == 0) ? float4(tex.sample(outputSamplerBilinear, vtx.texCoord).rgb * *inBacklightIntensity, 1.0f) : float4(tex.sample(outputSamplerBilinear, vtx.texCoord).bgr * *inBacklightIntensity, 1.0f); } //--------------------------------------- @@ -301,7 +305,8 @@ fragment float4 output_filter_bilinear(const DisplayVtx vtx [[stage_in]], // 12|13|14|15 fragment float4 output_filter_bicubic_bspline(const DisplayVtxTex16 vtx [[stage_in]], const texture2d tex [[texture(0)]], - const device float *inBacklightIntensity [[buffer(0)]]) + const device float *inBacklightIntensity [[buffer(0)]], + const constant uint8_t &doSwapRB [[buffer(1)]]) { float2 f = fract(vtx.texCoord05); float4 wx = bicubic_weight_bspline(f.x); @@ -328,7 +333,7 @@ fragment float4 output_filter_bicubic_bspline(const DisplayVtxTex16 vtx [[stage_ + tex.sample(genSampler, vtx.texCoord14) * wx.b + tex.sample(genSampler, vtx.texCoord15) * wx.a) * wy.a; - return float4(outFragment.rgb * *inBacklightIntensity, 1.0f); + return (doSwapRB == 0) ? float4(outFragment.rgb * *inBacklightIntensity, 1.0f) : float4(outFragment.bgr * *inBacklightIntensity, 1.0f); } //--------------------------------------- @@ -338,7 +343,8 @@ fragment float4 output_filter_bicubic_bspline(const DisplayVtxTex16 vtx [[stage_ // 12|13|14|15 fragment float4 output_filter_bicubic_mitchell_netravali(const DisplayVtxTex16 vtx [[stage_in]], const texture2d tex [[texture(0)]], - const device float *inBacklightIntensity [[buffer(0)]]) + const device float *inBacklightIntensity [[buffer(0)]], + const constant uint8_t &doSwapRB [[buffer(1)]]) { float2 f = fract(vtx.texCoord05); float4 wx = bicubic_weight_mitchell_netravali(f.x); @@ -365,7 +371,7 @@ fragment float4 output_filter_bicubic_mitchell_netravali(const DisplayVtxTex16 v + tex.sample(genSampler, vtx.texCoord14) * wx.b + tex.sample(genSampler, vtx.texCoord15) * wx.a) * wy.a; - return float4(outFragment.rgb * *inBacklightIntensity, 1.0f); + return (doSwapRB == 0) ? float4(outFragment.rgb * *inBacklightIntensity, 1.0f) : float4(outFragment.bgr * *inBacklightIntensity, 1.0f); } //--------------------------------------- @@ -375,7 +381,8 @@ fragment float4 output_filter_bicubic_mitchell_netravali(const DisplayVtxTex16 v // 12|13|14|15 fragment float4 output_filter_lanczos2(const DisplayVtxTex16 vtx [[stage_in]], const texture2d tex [[texture(0)]], - const device float *inBacklightIntensity [[buffer(0)]]) + const device float *inBacklightIntensity [[buffer(0)]], + const constant uint8_t &doSwapRB [[buffer(1)]]) { const float2 f = fract(vtx.texCoord05); float4 wx = bicubic_weight_lanczos2(f.x); @@ -402,7 +409,7 @@ fragment float4 output_filter_lanczos2(const DisplayVtxTex16 vtx [[stage_in]], + tex.sample(genSampler, vtx.texCoord14) * wx.b + tex.sample(genSampler, vtx.texCoord15) * wx.a) * wy.a; - return float4(outFragment.rgb * *inBacklightIntensity, 1.0f); + return (doSwapRB == 0) ? float4(outFragment.rgb * *inBacklightIntensity, 1.0f) : float4(outFragment.bgr * *inBacklightIntensity, 1.0f); } //--------------------------------------- @@ -414,7 +421,8 @@ fragment float4 output_filter_lanczos2(const DisplayVtxTex16 vtx [[stage_in]], // 30|31|32|33|34|35 fragment float4 output_filter_lanczos3(const DisplayVtxTex16 vtx [[stage_in]], const texture2d tex [[texture(0)]], - const device float *inBacklightIntensity [[buffer(0)]]) + const device float *inBacklightIntensity [[buffer(0)]], + const constant uint8_t &doSwapRB [[buffer(1)]]) { const float2 f = fract(vtx.texCoord05); float3 wx1 = bicubic_weight_lanczos3(0.5f - f.x * 0.5f); @@ -478,7 +486,7 @@ fragment float4 output_filter_lanczos3(const DisplayVtxTex16 vtx [[stage_in]], + tex.sample(genSampler, vtx.texCoord05 + float2( 2.0, 3.0)) * wx1.b + tex.sample(genSampler, vtx.texCoord05 + float2( 3.0, 3.0)) * wx2.b) * wy2.b; - return float4(outFragment.rgb * *inBacklightIntensity, 1.0f); + return (doSwapRB == 0) ? float4(outFragment.rgb * *inBacklightIntensity, 1.0f) : float4(outFragment.bgr * *inBacklightIntensity, 1.0f); } #pragma mark NDS Emulation Functions