MetalDevice: Support multisampling
This commit is contained in:
parent
6fbea12ed3
commit
a2f19143da
|
@ -0,0 +1,47 @@
|
|||
# Borrowed from PCSX2.
|
||||
|
||||
if(APPLE)
|
||||
function(add_metal_sources target sources)
|
||||
if(CMAKE_GENERATOR MATCHES "Xcode")
|
||||
# If we're generating an xcode project, you can just add the shaders to the main pcsx2 target and xcode will deal with them properly
|
||||
# This will make sure xcode supplies code completion, etc (if you use a custom command, it won't)
|
||||
set_target_properties(${target} PROPERTIES
|
||||
XCODE_ATTRIBUTE_MTL_ENABLE_DEBUG_INFO INCLUDE_SOURCE
|
||||
)
|
||||
foreach(shader IN LISTS sources)
|
||||
target_sources(${target} PRIVATE ${shader})
|
||||
set_source_files_properties(${shader} PROPERTIES LANGUAGE METAL)
|
||||
endforeach()
|
||||
else()
|
||||
function(generateMetallib std triple outputName)
|
||||
set(MetalShaderOut)
|
||||
set(flags
|
||||
-ffast-math
|
||||
$<$<NOT:$<CONFIG:Release,MinSizeRel>>:-gline-tables-only>
|
||||
$<$<NOT:$<CONFIG:Release,MinSizeRel>>:-MO>
|
||||
)
|
||||
foreach(shader IN LISTS sources)
|
||||
file(RELATIVE_PATH relativeShader "${CMAKE_SOURCE_DIR}" "${shader}")
|
||||
set(shaderOut ${CMAKE_CURRENT_BINARY_DIR}/${outputName}/${relativeShader}.air)
|
||||
list(APPEND MetalShaderOut ${shaderOut})
|
||||
get_filename_component(shaderDir ${shaderOut} DIRECTORY)
|
||||
add_custom_command(OUTPUT ${shaderOut}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${shaderDir}
|
||||
COMMAND xcrun metal ${flags} -std=${std} -target ${triple} -o ${shaderOut} -c ${shader}
|
||||
DEPENDS ${shader}
|
||||
)
|
||||
set(metallib ${CMAKE_CURRENT_BINARY_DIR}/${outputName}.metallib)
|
||||
endforeach()
|
||||
add_custom_command(OUTPUT ${metallib}
|
||||
COMMAND xcrun metallib -o ${metallib} ${MetalShaderOut}
|
||||
DEPENDS ${MetalShaderOut}
|
||||
)
|
||||
target_sources(${target} PRIVATE ${metallib})
|
||||
set_source_files_properties(${metallib} PROPERTIES MACOSX_PACKAGE_LOCATION Resources)
|
||||
endfunction()
|
||||
generateMetallib(macos-metal2.0 air64-apple-macos10.13 default)
|
||||
generateMetallib(macos-metal2.2 air64-apple-macos10.15 Metal22)
|
||||
generateMetallib(macos-metal2.3 air64-apple-macos11.0 Metal23)
|
||||
endif()
|
||||
endfunction()
|
||||
endif()
|
|
@ -565,12 +565,17 @@ bool GPU_HW::CreateBuffers()
|
|||
const u32 texture_height = VRAM_HEIGHT * m_resolution_scale;
|
||||
const u8 samples = static_cast<u8>(m_multisamples);
|
||||
|
||||
// Needed for Metal resolve.
|
||||
const GPUTexture::Type read_texture_type = (g_gpu_device->GetRenderAPI() == RenderAPI::Metal && m_multisamples > 1) ?
|
||||
GPUTexture::Type::RWTexture :
|
||||
GPUTexture::Type::Texture;
|
||||
|
||||
if (!(m_vram_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, samples,
|
||||
GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) ||
|
||||
!(m_vram_depth_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, samples,
|
||||
GPUTexture::Type::DepthStencil, VRAM_DS_FORMAT)) ||
|
||||
!(m_vram_read_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, 1,
|
||||
GPUTexture::Type::Texture, VRAM_RT_FORMAT)) ||
|
||||
!(m_vram_read_texture =
|
||||
g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, 1, read_texture_type, VRAM_RT_FORMAT)) ||
|
||||
!(m_display_private_texture = g_gpu_device->CreateTexture(
|
||||
((m_downsample_mode == GPUDownsampleMode::Adaptive) ? VRAM_WIDTH : GPU_MAX_DISPLAY_WIDTH) *
|
||||
m_resolution_scale,
|
||||
|
|
|
@ -264,6 +264,8 @@ if(WIN32)
|
|||
target_link_libraries(util PRIVATE WinPixEventRuntime::WinPixEventRuntime)
|
||||
endif()
|
||||
elseif(APPLE)
|
||||
include(AddMetalSources)
|
||||
|
||||
set(MAC_SOURCES
|
||||
cocoa_tools.h
|
||||
metal_device.h
|
||||
|
@ -272,6 +274,10 @@ elseif(APPLE)
|
|||
metal_stream_buffer.mm
|
||||
platform_misc_mac.mm
|
||||
)
|
||||
set(METAL_SOURCES
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/metal_shaders.metal"
|
||||
)
|
||||
set_property(GLOBAL PROPERTY UTIL_METAL_SOURCES ${METAL_SOURCES})
|
||||
target_sources(util PRIVATE ${MAC_SOURCES})
|
||||
find_library(IOK_LIBRARY IOKit REQUIRED)
|
||||
find_library(METAL_LIBRARY Metal)
|
||||
|
@ -286,6 +292,9 @@ endif()
|
|||
|
||||
function(add_util_resources target)
|
||||
if(APPLE)
|
||||
get_property(UTIL_METAL_SOURCES GLOBAL PROPERTY UTIL_METAL_SOURCES)
|
||||
add_metal_sources(${target} ${UTIL_METAL_SOURCES})
|
||||
|
||||
# Copy MoltenVK into the bundle
|
||||
unset(MOLTENVK_PATH CACHE)
|
||||
find_file(MOLTENVK_PATH NAMES
|
||||
|
|
|
@ -299,6 +299,10 @@ private:
|
|||
ALWAYS_INLINE NSView* GetWindowView() const { return (__bridge NSView*)m_window_info.window_handle; }
|
||||
|
||||
void SetFeatures();
|
||||
bool LoadShaders();
|
||||
|
||||
id<MTLFunction> GetFunctionFromLibrary(id<MTLLibrary> library, NSString* name);
|
||||
id<MTLComputePipelineState> CreateComputePipeline(id<MTLFunction> function, NSString* name);
|
||||
|
||||
std::unique_ptr<GPUShader> CreateShaderFromMSL(GPUShaderStage stage, const std::string_view& source,
|
||||
const std::string_view& entry_point);
|
||||
|
@ -354,6 +358,10 @@ private:
|
|||
MetalStreamBuffer m_uniform_buffer;
|
||||
MetalStreamBuffer m_texture_upload_buffer;
|
||||
|
||||
id<MTLLibrary> m_shaders = nil;
|
||||
std::vector<std::pair<std::pair<GPUTexture::Format, GPUTexture::Format>, id<MTLComputePipelineState>>>
|
||||
m_resolve_pipelines;
|
||||
|
||||
id<MTLCommandBuffer> m_upload_cmdbuf = nil;
|
||||
id<MTLBlitCommandEncoder> m_upload_encoder = nil;
|
||||
id<MTLBlitCommandEncoder> m_inline_upload_encoder = nil;
|
||||
|
|
|
@ -174,6 +174,12 @@ bool MetalDevice::CreateDevice(const std::string_view& adapter, bool threaded_pr
|
|||
CreateCommandBuffer();
|
||||
RenderBlankFrame();
|
||||
|
||||
if (!LoadShaders())
|
||||
{
|
||||
Log_ErrorPrint("Failed to load shaders.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!CreateBuffers())
|
||||
{
|
||||
Log_ErrorPrintf("Failed to create buffers.");
|
||||
|
@ -198,7 +204,7 @@ void MetalDevice::SetFeatures()
|
|||
}
|
||||
|
||||
m_max_multisamples = 0;
|
||||
for (u32 multisamples = 1; multisamples < 16; multisamples++)
|
||||
for (u32 multisamples = 1; multisamples < 16; multisamples *= 2)
|
||||
{
|
||||
if (![m_device supportsTextureSampleCount:multisamples])
|
||||
break;
|
||||
|
@ -211,11 +217,71 @@ void MetalDevice::SetFeatures()
|
|||
m_features.supports_texture_buffers = true;
|
||||
m_features.texture_buffers_emulated_with_ssbo = true;
|
||||
m_features.geometry_shaders = false;
|
||||
m_features.partial_msaa_resolve = true;
|
||||
m_features.partial_msaa_resolve = false;
|
||||
m_features.shader_cache = true;
|
||||
m_features.pipeline_cache = false;
|
||||
}
|
||||
|
||||
bool MetalDevice::LoadShaders()
|
||||
{
|
||||
@autoreleasepool
|
||||
{
|
||||
auto try_lib = [this](NSString* name) -> id<MTLLibrary> {
|
||||
NSBundle* bundle = [NSBundle mainBundle];
|
||||
NSString* path = [bundle pathForResource:name ofType:@"metallib"];
|
||||
if (path == nil)
|
||||
{
|
||||
// Xcode places it alongside the binary.
|
||||
path = [NSString stringWithFormat:@"%@/%@.metallib", [bundle bundlePath], name];
|
||||
if (![[NSFileManager defaultManager] fileExistsAtPath:path])
|
||||
return nil;
|
||||
}
|
||||
|
||||
id<MTLLibrary> lib = [m_device newLibraryWithFile:path error:nil];
|
||||
if (lib == nil)
|
||||
return nil;
|
||||
|
||||
return [lib retain];
|
||||
};
|
||||
|
||||
if (!(m_shaders = try_lib(@"Metal23")) && !(m_shaders = try_lib(@"Metal22")) &&
|
||||
!(m_shaders = try_lib(@"Metal21")) && !(m_shaders = try_lib(@"default")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
id<MTLFunction> MetalDevice::GetFunctionFromLibrary(id<MTLLibrary> library, NSString* name)
|
||||
{
|
||||
id<MTLFunction> function = [library newFunctionWithName:name];
|
||||
return function;
|
||||
}
|
||||
|
||||
id<MTLComputePipelineState> MetalDevice::CreateComputePipeline(id<MTLFunction> function, NSString* name)
|
||||
{
|
||||
MTLComputePipelineDescriptor* desc = [MTLComputePipelineDescriptor new];
|
||||
if (name != nil)
|
||||
[desc setLabel:name];
|
||||
[desc setComputeFunction:function];
|
||||
|
||||
NSError* err = nil;
|
||||
id<MTLComputePipelineState> pipeline = [m_device newComputePipelineStateWithDescriptor:desc
|
||||
options:MTLPipelineOptionNone
|
||||
reflection:nil
|
||||
error:&err];
|
||||
[desc release];
|
||||
if (pipeline == nil)
|
||||
{
|
||||
LogNSError(err, "Create compute pipeline failed:");
|
||||
return nil;
|
||||
}
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
void MetalDevice::DestroyDevice()
|
||||
{
|
||||
WaitForPreviousCommandBuffers();
|
||||
|
@ -243,6 +309,17 @@ void MetalDevice::DestroyDevice()
|
|||
[it.second release];
|
||||
m_cleanup_objects.clear();
|
||||
|
||||
for (auto& it : m_resolve_pipelines)
|
||||
{
|
||||
if (it.second != nil)
|
||||
[it.second release];
|
||||
}
|
||||
m_resolve_pipelines.clear();
|
||||
if (m_shaders != nil)
|
||||
{
|
||||
[m_shaders release];
|
||||
m_shaders = nil;
|
||||
}
|
||||
if (m_queue != nil)
|
||||
{
|
||||
[m_queue release];
|
||||
|
@ -736,7 +813,7 @@ std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::Grap
|
|||
|
||||
// General
|
||||
const MTLPrimitiveType primitive = primitives[static_cast<u8>(config.primitive)];
|
||||
desc.rasterSampleCount = config.per_sample_shading ? config.samples : 1;
|
||||
desc.rasterSampleCount = config.samples;
|
||||
|
||||
// Metal-specific stuff
|
||||
desc.vertexBuffers[0].mutability = MTLMutabilityImmutable;
|
||||
|
@ -959,6 +1036,15 @@ std::unique_ptr<GPUTexture> MetalDevice::CreateTexture(u32 width, u32 height, u3
|
|||
desc.depth = levels;
|
||||
desc.pixelFormat = pixel_format;
|
||||
desc.mipmapLevelCount = levels;
|
||||
if (samples > 1)
|
||||
{
|
||||
desc.textureType = (layers > 1) ? MTLTextureType2DMultisampleArray : MTLTextureType2DMultisample;
|
||||
desc.sampleCount = samples;
|
||||
}
|
||||
else if (layers > 1)
|
||||
{
|
||||
desc.textureType = MTLTextureType2DArray;
|
||||
}
|
||||
|
||||
switch (type)
|
||||
{
|
||||
|
@ -1339,30 +1425,62 @@ void MetalDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 d
|
|||
void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
|
||||
GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height)
|
||||
{
|
||||
#if 0
|
||||
DebugAssert(src_level < src->GetLevels() && src_layer < src->GetLayers());
|
||||
DebugAssert((src_x + width) <= src->GetMipWidth(src_level));
|
||||
DebugAssert((src_y + height) <= src->GetMipHeight(src_level));
|
||||
DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers());
|
||||
DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level));
|
||||
DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level));
|
||||
DebugAssert(!dst->IsMultisampled() && src->IsMultisampled());
|
||||
DebugAssert((src_x + width) <= src->GetWidth());
|
||||
DebugAssert((src_y + height) <= src->GetHeight());
|
||||
DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers());
|
||||
DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level));
|
||||
DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level));
|
||||
DebugAssert(!dst->IsMultisampled() && src->IsMultisampled());
|
||||
|
||||
// DX11 can't resolve partial rects.
|
||||
Assert(src_x == dst_x && src_y == dst_y);
|
||||
// Only does first level for now..
|
||||
DebugAssert(dst_level == 0 && dst_layer == 0);
|
||||
|
||||
MetalTexture* dst11 = static_cast<MetalTexture*>(dst);
|
||||
MetalTexture* src11 = static_cast<MetalTexture*>(src);
|
||||
const GPUTexture::Format src_format = dst->GetFormat();
|
||||
const GPUTexture::Format dst_format = dst->GetFormat();
|
||||
id<MTLComputePipelineState> resolve_pipeline = nil;
|
||||
if (auto iter = std::find_if(m_resolve_pipelines.begin(), m_resolve_pipelines.end(),
|
||||
[src_format, dst_format](const auto& it) {
|
||||
return it.first.first == src_format && it.first.second == dst_format;
|
||||
});
|
||||
iter != m_resolve_pipelines.end())
|
||||
{
|
||||
resolve_pipeline = iter->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Need to compile it.
|
||||
@autoreleasepool
|
||||
{
|
||||
const bool is_depth = GPUTexture::IsDepthFormat(src_format);
|
||||
id<MTLFunction> function =
|
||||
[GetFunctionFromLibrary(m_shaders, is_depth ? @"depthResolveKernel" : @"colorResolveKernel") autorelease];
|
||||
if (function == nil)
|
||||
Panic("Failed to get resolve kernel");
|
||||
|
||||
src11->CommitClear(m_context.Get());
|
||||
dst11->CommitClear(m_context.Get());
|
||||
resolve_pipeline = [CreateComputePipeline(function, is_depth ? @"Depth Resolve" : @"Color Resolve") autorelease];
|
||||
if (resolve_pipeline != nil)
|
||||
[resolve_pipeline retain];
|
||||
m_resolve_pipelines.emplace_back(std::make_pair(src_format, dst_format), resolve_pipeline);
|
||||
}
|
||||
}
|
||||
if (resolve_pipeline == nil)
|
||||
Panic("Failed to get resolve pipeline");
|
||||
|
||||
m_context->ResolveSubresource(dst11->GetD3DTexture(), MetalCalcSubresource(dst_level, dst_layer, dst->GetLevels()),
|
||||
src11->GetD3DTexture(), MetalCalcSubresource(src_level, src_layer, src->GetLevels()),
|
||||
dst11->GetDXGIFormat());
|
||||
#else
|
||||
Panic("Fixme");
|
||||
#endif
|
||||
if (InRenderPass())
|
||||
EndRenderPass();
|
||||
|
||||
const u32 threadgroupHeight = resolve_pipeline.maxTotalThreadsPerThreadgroup / resolve_pipeline.threadExecutionWidth;
|
||||
const MTLSize intrinsicThreadgroupSize = MTLSizeMake(resolve_pipeline.threadExecutionWidth, threadgroupHeight, 1);
|
||||
const MTLSize threadgroupsInGrid =
|
||||
MTLSizeMake((src->GetWidth() + intrinsicThreadgroupSize.width - 1) / intrinsicThreadgroupSize.width,
|
||||
(src->GetHeight() + intrinsicThreadgroupSize.height - 1) / intrinsicThreadgroupSize.height, 1);
|
||||
|
||||
id<MTLComputeCommandEncoder> computeEncoder = [m_render_cmdbuf computeCommandEncoder];
|
||||
[computeEncoder setComputePipelineState:resolve_pipeline];
|
||||
[computeEncoder setTexture:static_cast<MetalTexture*>(src)->GetMTLTexture() atIndex:0];
|
||||
[computeEncoder setTexture:static_cast<MetalTexture*>(dst)->GetMTLTexture() atIndex:1];
|
||||
[computeEncoder dispatchThreadgroups:threadgroupsInGrid threadsPerThreadgroup:intrinsicThreadgroupSize];
|
||||
[computeEncoder endEncoding];
|
||||
}
|
||||
|
||||
void MetalDevice::ClearRenderTarget(GPUTexture* t, u32 c)
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
/// A custom resolve kernel that averages color at all sample points.
|
||||
#include <metal_stdlib>
|
||||
using namespace metal;
|
||||
|
||||
// https://developer.apple.com/documentation/metal/metal_sample_code_library/improving_edge-rendering_quality_with_multisample_antialiasing_msaa?language=objc
|
||||
kernel void
|
||||
colorResolveKernel(texture2d_ms<float, access::read> multisampledTexture [[texture(0)]],
|
||||
texture2d<float, access::write> resolvedTexture [[texture(1)]],
|
||||
uint2 gid [[thread_position_in_grid]])
|
||||
{
|
||||
const uint count = multisampledTexture.get_num_samples();
|
||||
|
||||
float4 resolved_color = 0;
|
||||
|
||||
for (uint i = 0; i < count; ++i)
|
||||
{
|
||||
resolved_color += multisampledTexture.read(gid, i);
|
||||
}
|
||||
|
||||
resolved_color /= count;
|
||||
|
||||
resolvedTexture.write(resolved_color, gid);
|
||||
}
|
||||
|
||||
kernel void
|
||||
depthResolveKernel(texture2d_ms<float, access::read> multisampledTexture [[texture(0)]],
|
||||
texture2d<float, access::write> resolvedTexture [[texture(1)]],
|
||||
uint2 gid [[thread_position_in_grid]])
|
||||
{
|
||||
const uint count = multisampledTexture.get_num_samples();
|
||||
|
||||
float resolved_depth = 0;
|
||||
|
||||
for (uint i = 0; i < count; ++i)
|
||||
{
|
||||
resolved_depth += multisampledTexture.read(gid, i).r;
|
||||
}
|
||||
|
||||
resolved_depth /= count;
|
||||
|
||||
resolvedTexture.write(float4(resolved_depth, 0, 0, 0), gid);
|
||||
}
|
Loading…
Reference in New Issue