Cocoa Port: Do a small optimization when doing video output framebuffer fetches for Metal display views.
This commit is contained in:
parent
aeea0ea46a
commit
0c0bd5144e
|
@ -18,9 +18,12 @@
|
|||
#ifndef _METAL_RENDERER_COMMON_H_
|
||||
#define _METAL_RENDERER_COMMON_H_
|
||||
|
||||
float4 unpack_unorm1555_to_unorm8888(const ushort color16);
|
||||
float4 unpack_rgba5551_to_unorm8888(const ushort color16);
|
||||
|
||||
ushort pack_color_to_unorm5551(const float4 inColor);
|
||||
float4 pack_color_to_unorm6665(const float4 inColor);
|
||||
ushort pack_unorm8888_to_rgba5551(const float4 inColor);
|
||||
uchar4 pack_unorm8888_to_rgba6665(const float4 inColor);
|
||||
uchar4 pack_unorm8888_to_rgba8888(const float4 inColor);
|
||||
|
||||
float4 convert_unorm666X_to_unorm8888(const float4 inColor);
|
||||
|
||||
#endif // _METAL_RENDERER_COMMON_H_
|
||||
|
|
|
@ -21,7 +21,7 @@ using namespace metal;
|
|||
#include "MetalRendererCommonShaders.h"
|
||||
|
||||
|
||||
float4 unpack_unorm1555_to_unorm8888(const ushort color16)
|
||||
float4 unpack_rgba5551_to_unorm8888(const ushort color16)
|
||||
{
|
||||
return float4((float)((color16 >> 0) & 0x1F) / 31.0f,
|
||||
(float)((color16 >> 5) & 0x1F) / 31.0f,
|
||||
|
@ -29,9 +29,9 @@ float4 unpack_unorm1555_to_unorm8888(const ushort color16)
|
|||
(float)(color16 >> 15));
|
||||
}
|
||||
|
||||
ushort pack_color_to_unorm5551(const float4 inColor)
|
||||
ushort pack_unorm8888_to_rgba5551(const float4 inColor)
|
||||
{
|
||||
ushort4 color16 = (ushort4)((inColor * 31.0f) + 0.35f);
|
||||
ushort4 color16 = ushort4( (inColor * 31.0f) + 0.1f );
|
||||
|
||||
color16.g <<= 5;
|
||||
color16.b <<= 10;
|
||||
|
@ -40,7 +40,17 @@ ushort pack_color_to_unorm5551(const float4 inColor)
|
|||
return (color16.r | color16.g | color16.b | color16.a);
|
||||
}
|
||||
|
||||
float4 pack_color_to_unorm6665(const float4 inColor)
|
||||
uchar4 pack_unorm8888_to_rgba6665(const float4 inColor)
|
||||
{
|
||||
return inColor * float4(63.0f/255.0f, 63.0f/255.0f, 63.0f/255.0f, 31.0f/255.0f);
|
||||
return uchar4( (inColor * float4(63.0f, 63.0f, 63.0f, 31.0f)) + 0.1f );
|
||||
}
|
||||
|
||||
uchar4 pack_unorm8888_to_rgba8888(const float4 inColor)
|
||||
{
|
||||
return uchar4( (inColor * 255.0f) + 0.1f );
|
||||
}
|
||||
|
||||
float4 convert_unorm666X_to_unorm8888(const float4 inColor)
|
||||
{
|
||||
return float4( inColor.rgb * (255.0f/63.0f), 1.0f );
|
||||
}
|
||||
|
|
|
@ -129,8 +129,9 @@ typedef DisplayViewShaderProperties DisplayViewShaderProperties;
|
|||
id<MTLTexture> texHQ4xLUT;
|
||||
id<MTLTexture> texCurrentHQnxLUT;
|
||||
|
||||
MTLSize _fetchThreadsPerGroup;
|
||||
MTLSize _fetchThreadsPerGroupNative;
|
||||
MTLSize _fetchThreadGroupsPerGridNative;
|
||||
MTLSize _fetchThreadsPerGroupCustom;
|
||||
MTLSize _fetchThreadGroupsPerGridCustom;
|
||||
MTLSize deposterizeThreadsPerGroup;
|
||||
MTLSize deposterizeThreadGroupsPerGrid;
|
||||
|
|
|
@ -71,10 +71,10 @@
|
|||
MTLComputePipelineDescriptor *computePipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
|
||||
[computePipelineDesc setThreadGroupSizeIsMultipleOfThreadExecutionWidth:YES];
|
||||
|
||||
[computePipelineDesc setComputeFunction:[defaultLibrary newFunctionWithName:@"nds_fetch555ConvertOnly"]];
|
||||
[computePipelineDesc setComputeFunction:[defaultLibrary newFunctionWithName:@"convert_texture_rgb555_to_unorm8888"]];
|
||||
_fetch555ConvertOnlyPipeline = [[device newComputePipelineStateWithDescriptor:computePipelineDesc options:MTLPipelineOptionNone reflection:nil error:nil] retain];
|
||||
|
||||
[computePipelineDesc setComputeFunction:[defaultLibrary newFunctionWithName:@"nds_fetch666ConvertOnly"]];
|
||||
[computePipelineDesc setComputeFunction:[defaultLibrary newFunctionWithName:@"convert_texture_unorm666X_to_unorm8888"]];
|
||||
_fetch666ConvertOnlyPipeline = [[device newComputePipelineStateWithDescriptor:computePipelineDesc options:MTLPipelineOptionNone reflection:nil error:nil] retain];
|
||||
|
||||
[computePipelineDesc setComputeFunction:[defaultLibrary newFunctionWithName:@"src_filter_deposterize"]];
|
||||
|
@ -99,22 +99,27 @@
|
|||
|
||||
[computePipelineDesc release];
|
||||
|
||||
size_t tw = GetNearestPositivePOT((uint32_t)[_fetch555Pipeline threadExecutionWidth]);
|
||||
while ( (tw > [_fetch555Pipeline threadExecutionWidth]) || (tw > GPU_FRAMEBUFFER_NATIVE_WIDTH) )
|
||||
NSUInteger tw = [_fetch555Pipeline threadExecutionWidth];
|
||||
while ( ((GPU_FRAMEBUFFER_NATIVE_WIDTH % tw) != 0) || (tw > GPU_FRAMEBUFFER_NATIVE_WIDTH) )
|
||||
{
|
||||
tw >>= 1;
|
||||
}
|
||||
|
||||
size_t th = [_fetch555Pipeline maxTotalThreadsPerThreadgroup] / tw;
|
||||
NSUInteger th = [_fetch555Pipeline maxTotalThreadsPerThreadgroup] / tw;
|
||||
while ( ((GPU_FRAMEBUFFER_NATIVE_HEIGHT % th) != 0) || (th > GPU_FRAMEBUFFER_NATIVE_HEIGHT) )
|
||||
{
|
||||
th >>= 1;
|
||||
}
|
||||
|
||||
_fetchThreadsPerGroup = MTLSizeMake(tw, th, 1);
|
||||
_fetchThreadsPerGroupNative = MTLSizeMake(tw, th, 1);
|
||||
_fetchThreadGroupsPerGridNative = MTLSizeMake(GPU_FRAMEBUFFER_NATIVE_WIDTH / tw,
|
||||
GPU_FRAMEBUFFER_NATIVE_HEIGHT / th,
|
||||
1);
|
||||
|
||||
_fetchThreadsPerGroupCustom = _fetchThreadsPerGroupNative;
|
||||
_fetchThreadGroupsPerGridCustom = _fetchThreadGroupsPerGridNative;
|
||||
|
||||
deposterizeThreadsPerGroup = _fetchThreadsPerGroup;
|
||||
deposterizeThreadsPerGroup = _fetchThreadsPerGroupNative;
|
||||
deposterizeThreadGroupsPerGrid = _fetchThreadGroupsPerGridNative;
|
||||
|
||||
MTLRenderPipelineDescriptor *hudPipelineDesc = [[MTLRenderPipelineDescriptor alloc] init];
|
||||
|
@ -413,9 +418,22 @@
|
|||
|
||||
_fetchPixelBytes = dispInfo.pixelBytes;
|
||||
|
||||
const size_t tw = _fetchThreadsPerGroup.width;
|
||||
const size_t th = _fetchThreadsPerGroup.height;
|
||||
_fetchThreadGroupsPerGridCustom = MTLSizeMake((w + tw - 1) / tw, (h + th - 1) / th, 1);
|
||||
NSUInteger tw = [_fetch555Pipeline threadExecutionWidth];
|
||||
while ( ((w % tw) != 0) || (tw > w) )
|
||||
{
|
||||
tw >>= 1;
|
||||
}
|
||||
|
||||
NSUInteger th = [_fetch555Pipeline maxTotalThreadsPerThreadgroup] / tw;
|
||||
while ( ((h % th) != 0) || (th > h) )
|
||||
{
|
||||
th >>= 1;
|
||||
}
|
||||
|
||||
_fetchThreadsPerGroupCustom = MTLSizeMake(tw, th, 1);
|
||||
_fetchThreadGroupsPerGridCustom = MTLSizeMake(w / tw,
|
||||
h / th,
|
||||
1);
|
||||
|
||||
id<MTLCommandBuffer> cb = [_fetchCommandQueue commandBufferWithUnretainedReferences];
|
||||
MetalTexturePair newTexPair = [self setFetchTextureBindingsAtIndex:dispInfo.bufferIndex commandBuffer:cb];
|
||||
|
@ -498,7 +516,7 @@
|
|||
[cce setTexture:_texDisplayFetchNative[NDSDisplayID_Main][index] atIndex:0];
|
||||
[cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Main][index] atIndex:1];
|
||||
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroup];
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroupNative];
|
||||
|
||||
targetTexPair.main = _texDisplayPostprocessNative[NDSDisplayID_Main][index];
|
||||
}
|
||||
|
@ -507,7 +525,7 @@
|
|||
[cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Main][index] atIndex:0];
|
||||
[cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Main][index] atIndex:1];
|
||||
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroup];
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroupCustom];
|
||||
|
||||
targetTexPair.main = _texDisplayPostprocessCustom[NDSDisplayID_Main][index];
|
||||
}
|
||||
|
@ -528,7 +546,7 @@
|
|||
[cce setTexture:_texDisplayFetchNative[NDSDisplayID_Touch][index] atIndex:0];
|
||||
[cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Touch][index] atIndex:1];
|
||||
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroup];
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroupNative];
|
||||
|
||||
targetTexPair.touch = _texDisplayPostprocessNative[NDSDisplayID_Touch][index];
|
||||
}
|
||||
|
@ -537,7 +555,7 @@
|
|||
[cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Touch][index] atIndex:0];
|
||||
[cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Touch][index] atIndex:1];
|
||||
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroup];
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroupCustom];
|
||||
|
||||
targetTexPair.touch = _texDisplayPostprocessCustom[NDSDisplayID_Touch][index];
|
||||
}
|
||||
|
@ -572,7 +590,7 @@
|
|||
[cce setTexture:_texDisplayFetchNative[NDSDisplayID_Main][index] atIndex:0];
|
||||
[cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Main][index] atIndex:1];
|
||||
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroup];
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroupNative];
|
||||
|
||||
targetTexPair.main = _texDisplayPostprocessNative[NDSDisplayID_Main][index];
|
||||
}
|
||||
|
@ -581,7 +599,7 @@
|
|||
[cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Main][index] atIndex:0];
|
||||
[cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Main][index] atIndex:1];
|
||||
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroup];
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroupCustom];
|
||||
|
||||
targetTexPair.main = _texDisplayPostprocessCustom[NDSDisplayID_Main][index];
|
||||
}
|
||||
|
@ -594,7 +612,7 @@
|
|||
[cce setTexture:_texDisplayFetchNative[NDSDisplayID_Touch][index] atIndex:0];
|
||||
[cce setTexture:_texDisplayPostprocessNative[NDSDisplayID_Touch][index] atIndex:1];
|
||||
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridNative
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroup];
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroupNative];
|
||||
|
||||
targetTexPair.touch = _texDisplayPostprocessNative[NDSDisplayID_Touch][index];
|
||||
}
|
||||
|
@ -603,7 +621,7 @@
|
|||
[cce setTexture:_texDisplayFetchCustom[NDSDisplayID_Touch][index] atIndex:0];
|
||||
[cce setTexture:_texDisplayPostprocessCustom[NDSDisplayID_Touch][index] atIndex:1];
|
||||
[cce dispatchThreadgroups:_fetchThreadGroupsPerGridCustom
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroup];
|
||||
threadsPerThreadgroup:_fetchThreadsPerGroupCustom];
|
||||
|
||||
targetTexPair.touch = _texDisplayPostprocessCustom[NDSDisplayID_Touch][index];
|
||||
}
|
||||
|
@ -1023,17 +1041,21 @@
|
|||
_texDisplayPixelScaler[NDSDisplayID_Main] = [[sharedData device] newTextureWithDescriptor:texDisplayPixelScaleDesc];
|
||||
_texDisplayPixelScaler[NDSDisplayID_Touch] = [[sharedData device] newTextureWithDescriptor:texDisplayPixelScaleDesc];
|
||||
|
||||
size_t tw = GetNearestPositivePOT((uint32_t)[[self pixelScalePipeline] threadExecutionWidth]);
|
||||
while ( (tw > [[self pixelScalePipeline] threadExecutionWidth]) || (tw > GPU_FRAMEBUFFER_NATIVE_WIDTH) )
|
||||
NSUInteger tw = [[self pixelScalePipeline] threadExecutionWidth];
|
||||
while ( ((newScalerWidth % tw) != 0) || (tw > newScalerWidth) )
|
||||
{
|
||||
tw >>= 1;
|
||||
}
|
||||
|
||||
const size_t th = [[self pixelScalePipeline] maxTotalThreadsPerThreadgroup] / tw;
|
||||
NSUInteger th = [[self pixelScalePipeline] maxTotalThreadsPerThreadgroup] / tw;
|
||||
while ( ((newScalerHeight % th) != 0) || (th > newScalerHeight) )
|
||||
{
|
||||
th >>= 1;
|
||||
}
|
||||
|
||||
_pixelScalerThreadsPerGroup = MTLSizeMake(tw, th, 1);
|
||||
_pixelScalerThreadGroupsPerGrid = MTLSizeMake(GPU_FRAMEBUFFER_NATIVE_WIDTH / tw,
|
||||
GPU_FRAMEBUFFER_NATIVE_HEIGHT / th,
|
||||
_pixelScalerThreadGroupsPerGrid = MTLSizeMake(newScalerWidth / tw,
|
||||
newScalerHeight / th,
|
||||
1);
|
||||
}
|
||||
else
|
||||
|
|
|
@ -432,12 +432,7 @@ kernel void nds_fetch555(const uint2 position [[thread_position_in_grid]],
|
|||
{
|
||||
const uint h = inTexture.get_height();
|
||||
|
||||
if ( (position.x > inTexture.get_width() - 1) || (position.y > h - 1) )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const float4 inColor = unpack_unorm1555_to_unorm8888( (ushort)inTexture.read(position).r );
|
||||
const float4 inColor = unpack_rgba5551_to_unorm8888( (ushort)inTexture.read(position).r );
|
||||
float3 outColor = inColor.rgb;
|
||||
|
||||
const uint line = uint( (float)position.y / ((float)h / 192.0f) );
|
||||
|
@ -454,11 +449,6 @@ kernel void nds_fetch666(const uint2 position [[thread_position_in_grid]],
|
|||
{
|
||||
const uint h = inTexture.get_height();
|
||||
|
||||
if ( (position.x > inTexture.get_width() - 1) || (position.y > h - 1) )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const float4 inColor = inTexture.read(position);
|
||||
float3 outColor = inColor.rgb * float3(255.0f/63.0f);
|
||||
|
||||
|
@ -476,11 +466,6 @@ kernel void nds_fetch888(const uint2 position [[thread_position_in_grid]],
|
|||
{
|
||||
const uint h = inTexture.get_height();
|
||||
|
||||
if ( (position.x > inTexture.get_width() - 1) || (position.y > h - 1) )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const float4 inColor = inTexture.read(position);
|
||||
float3 outColor = inColor.rgb;
|
||||
|
||||
|
@ -490,30 +475,20 @@ kernel void nds_fetch888(const uint2 position [[thread_position_in_grid]],
|
|||
outTexture.write(float4(outColor, 1.0f), position);
|
||||
}
|
||||
|
||||
kernel void nds_fetch555ConvertOnly(const uint2 position [[thread_position_in_grid]],
|
||||
kernel void convert_texture_rgb555_to_unorm8888(const uint2 position [[thread_position_in_grid]],
|
||||
const texture2d<ushort, access::read> inTexture [[texture(0)]],
|
||||
texture2d<float, access::write> outTexture [[texture(1)]])
|
||||
{
|
||||
if ( (position.x > inTexture.get_width() - 1) || (position.y > inTexture.get_height() - 1) )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const float4 outColor = unpack_unorm1555_to_unorm8888( (ushort)inTexture.read(position).r );
|
||||
const float4 outColor = unpack_rgba5551_to_unorm8888( (ushort)inTexture.read(position).r );
|
||||
outTexture.write(float4(outColor.rgb, 1.0f), position);
|
||||
}
|
||||
|
||||
kernel void nds_fetch666ConvertOnly(const uint2 position [[thread_position_in_grid]],
|
||||
kernel void convert_texture_unorm666X_to_unorm8888(const uint2 position [[thread_position_in_grid]],
|
||||
const texture2d<float, access::read> inTexture [[texture(0)]],
|
||||
texture2d<float, access::write> outTexture [[texture(1)]])
|
||||
{
|
||||
if ( (position.x > inTexture.get_width() - 1) || (position.y > inTexture.get_height() - 1) )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const float3 outColor = inTexture.read(position).rgb * float3(255.0f/63.0f);
|
||||
outTexture.write(float4(outColor, 1.0f), position);
|
||||
const float4 outColor = convert_unorm666X_to_unorm8888( inTexture.read(position) );
|
||||
outTexture.write(outColor, position);
|
||||
}
|
||||
|
||||
float3 nds_apply_master_brightness(const float3 inColor, const uchar mode, const float intensity)
|
||||
|
|
Loading…
Reference in New Issue