diff --git a/desmume/src/frontend/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj b/desmume/src/frontend/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj index e0f27d053..fecab84e0 100644 --- a/desmume/src/frontend/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj +++ b/desmume/src/frontend/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj @@ -146,6 +146,8 @@ AB3BF4381E25D9AE003E2B24 /* DisplayViewCALayer.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3BF4371E25D9AE003E2B24 /* DisplayViewCALayer.mm */; }; AB3BF4391E25D9AE003E2B24 /* DisplayViewCALayer.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3BF4371E25D9AE003E2B24 /* DisplayViewCALayer.mm */; }; AB3BF43A1E25D9AE003E2B24 /* DisplayViewCALayer.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3BF4371E25D9AE003E2B24 /* DisplayViewCALayer.mm */; }; + AB3BF43E1E26289E003E2B24 /* MacMetalDisplayView.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3BF43B1E26289E003E2B24 /* MacMetalDisplayView.mm */; }; + AB3BF4421E262959003E2B24 /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB3BF4401E262943003E2B24 /* Metal.framework */; settings = {ATTRIBUTES = (Weak, ); }; }; AB3E34C9134AF4500056477A /* cocoa_output.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3E34C8134AF4500056477A /* cocoa_output.mm */; }; AB40562A169F5DBB0016AC3E /* assembler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB405600169F5DBB0016AC3E /* assembler.cpp */; }; AB40562B169F5DBB0016AC3E /* assembler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB405600169F5DBB0016AC3E /* assembler.cpp */; }; @@ -245,6 +247,9 @@ AB4FCEBD1692AB82000F498F /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB4FCEBC1692AB82000F498F /* Accelerate.framework */; }; AB4FCEBE1692AB82000F498F /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB4FCEBC1692AB82000F498F /* Accelerate.framework */; }; AB4FCEBF1692AB82000F498F /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB4FCEBC1692AB82000F498F /* Accelerate.framework */; }; + AB54718B1E27610500508C5C /* MacMetalDisplayViewShaders.metal in Sources */ = {isa = PBXBuildFile; fileRef = AB54718A1E27610500508C5C /* MacMetalDisplayViewShaders.metal */; }; + AB54718C1E27610500508C5C /* MacMetalDisplayViewShaders.metal in Sources */ = {isa = PBXBuildFile; fileRef = AB54718A1E27610500508C5C /* MacMetalDisplayViewShaders.metal */; }; + AB54718D1E27610500508C5C /* MacMetalDisplayViewShaders.metal in Sources */ = {isa = PBXBuildFile; fileRef = AB54718A1E27610500508C5C /* MacMetalDisplayViewShaders.metal */; }; AB5648FF186E6EA8002740F4 /* cocoa_slot2.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB5648FE186E6EA8002740F4 /* cocoa_slot2.mm */; }; AB564900186E6EA8002740F4 /* cocoa_slot2.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB5648FE186E6EA8002740F4 /* cocoa_slot2.mm */; }; AB564901186E6EA8002740F4 /* cocoa_slot2.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB5648FE186E6EA8002740F4 /* cocoa_slot2.mm */; }; @@ -278,6 +283,10 @@ AB6FBEF6139B6258007BB045 /* slot1_retail_nand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB6FBEF5139B6258007BB045 /* slot1_retail_nand.cpp */; }; AB74EC8A1738499C0026C41E /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB74EC891738499C0026C41E /* Carbon.framework */; }; AB75226E14C7BB51009B97B3 /* AppIcon_FirmwareConfig.icns in Resources */ = {isa = PBXBuildFile; fileRef = AB75226D14C7BB51009B97B3 /* AppIcon_FirmwareConfig.icns */; }; + AB78B5C11E384F2100297FED /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB3BF4401E262943003E2B24 /* Metal.framework */; settings = {ATTRIBUTES = (Weak, ); }; }; + AB78B5C21E384F2200297FED /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB3BF4401E262943003E2B24 /* Metal.framework */; settings = {ATTRIBUTES = (Weak, ); }; }; + AB78B5C31E384F4F00297FED /* MacMetalDisplayView.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3BF43B1E26289E003E2B24 /* MacMetalDisplayView.mm */; }; + AB78B5C41E384F4F00297FED /* MacMetalDisplayView.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3BF43B1E26289E003E2B24 /* MacMetalDisplayView.mm */; }; AB796C9C15CDCB0F00C59155 /* arm_jit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB796C9B15CDCB0F00C59155 /* arm_jit.cpp */; }; AB796C9F15CDCB0F00C59155 /* arm_jit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB796C9B15CDCB0F00C59155 /* arm_jit.cpp */; }; AB796CA715CDCBA200C59155 /* KeyNames.plist in Resources */ = {isa = PBXBuildFile; fileRef = AB02475B13886BF300E9F9AB /* KeyNames.plist */; }; @@ -1361,6 +1370,9 @@ AB3BF4321E2562F2003E2B24 /* QuartzCore.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = QuartzCore.framework; path = System/Library/Frameworks/QuartzCore.framework; sourceTree = SDKROOT; }; AB3BF4361E25D6B4003E2B24 /* DisplayViewCALayer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = DisplayViewCALayer.h; sourceTree = ""; }; AB3BF4371E25D9AE003E2B24 /* DisplayViewCALayer.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = DisplayViewCALayer.mm; sourceTree = ""; }; + AB3BF43B1E26289E003E2B24 /* MacMetalDisplayView.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MacMetalDisplayView.mm; sourceTree = ""; }; + AB3BF43F1E2628B6003E2B24 /* MacMetalDisplayView.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = MacMetalDisplayView.h; sourceTree = ""; }; + AB3BF4401E262943003E2B24 /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; }; AB3E34C7134AF4500056477A /* cocoa_output.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cocoa_output.h; sourceTree = ""; }; AB3E34C8134AF4500056477A /* cocoa_output.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = cocoa_output.mm; sourceTree = ""; }; AB4055ED169F59380016AC3E /* AsmJit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AsmJit.h; sourceTree = ""; }; @@ -1436,6 +1448,7 @@ AB47B52C18A3F722009A42AF /* xbrz.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = xbrz.cpp; sourceTree = ""; }; AB4C81E31B21676C00ACECD5 /* hq3x.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hq3x.cpp; sourceTree = ""; }; AB4FCEBC1692AB82000F498F /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; }; + AB54718A1E27610500508C5C /* MacMetalDisplayViewShaders.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = MacMetalDisplayViewShaders.metal; sourceTree = ""; }; AB5648FD186E6EA8002740F4 /* cocoa_slot2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cocoa_slot2.h; sourceTree = ""; }; AB5648FE186E6EA8002740F4 /* cocoa_slot2.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = cocoa_slot2.mm; sourceTree = ""; }; AB564902186E6EBC002740F4 /* Slot2WindowDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Slot2WindowDelegate.h; sourceTree = ""; }; @@ -1902,6 +1915,7 @@ AB564908186E6F1F002740F4 /* ForceFeedback.framework in Frameworks */, ABC572101344347000E7B0B1 /* Foundation.framework in Frameworks */, AB350BA51478AC96007165AC /* IOKit.framework in Frameworks */, + AB3BF4421E262959003E2B24 /* Metal.framework in Frameworks */, ABC570D5134431DA00E7B0B1 /* OpenGL.framework in Frameworks */, AB3BF4341E256309003E2B24 /* QuartzCore.framework in Frameworks */, AB4676F314AB12D60002FF94 /* libz.dylib in Frameworks */, @@ -1922,6 +1936,7 @@ AB564907186E6F0C002740F4 /* ForceFeedback.framework in Frameworks */, AB796D6915CDCBA200C59155 /* Foundation.framework in Frameworks */, AB796D6A15CDCBA200C59155 /* IOKit.framework in Frameworks */, + AB78B5C21E384F2200297FED /* Metal.framework in Frameworks */, AB796D6B15CDCBA200C59155 /* OpenGL.framework in Frameworks */, AB3BF4331E2562F2003E2B24 /* QuartzCore.framework in Frameworks */, AB796D6C15CDCBA200C59155 /* libz.dylib in Frameworks */, @@ -1942,6 +1957,7 @@ AB8F3D251A53AC2600A80BF6 /* ForceFeedback.framework in Frameworks */, AB8F3D261A53AC2600A80BF6 /* Foundation.framework in Frameworks */, AB8F3D271A53AC2600A80BF6 /* IOKit.framework in Frameworks */, + AB78B5C11E384F2100297FED /* Metal.framework in Frameworks */, AB8F3D281A53AC2600A80BF6 /* OpenGL.framework in Frameworks */, AB3BF4351E256309003E2B24 /* QuartzCore.framework in Frameworks */, AB8F3D291A53AC2600A80BF6 /* libz.dylib in Frameworks */, @@ -2032,6 +2048,7 @@ AB564906186E6F0C002740F4 /* ForceFeedback.framework */, 29B97325FDCFA39411CA2CEA /* Foundation.framework */, AB350BA41478AC96007165AC /* IOKit.framework */, + AB3BF4401E262943003E2B24 /* Metal.framework */, ABC570D4134431DA00E7B0B1 /* OpenGL.framework */, AB3BF4321E2562F2003E2B24 /* QuartzCore.framework */, AB0A0D1914AACA9600E83E91 /* libz.dylib */, @@ -2389,11 +2406,13 @@ AB29B32F16D4BEBF000EF671 /* InputManager.h */, AB3ACB6E14C2361100D7D192 /* inputPrefsView.h */, AB01005C170D07AF00D70FBE /* InputProfileController.h */, + AB3BF43F1E2628B6003E2B24 /* MacMetalDisplayView.h */, AB3BF4051E22FEA8003E2B24 /* MacOGLDisplayView.h */, AB3ACB7014C2361100D7D192 /* preferencesWindowDelegate.h */, ABAF0A3F1A96E67200B95B75 /* RomInfoPanel.h */, AB564902186E6EBC002740F4 /* Slot2WindowDelegate.h */, ABF2B9F81690412A000FF7C0 /* troubleshootingWindowDelegate.h */, + AB54718A1E27610500508C5C /* MacMetalDisplayViewShaders.metal */, AB3ACB6714C2361100D7D192 /* appDelegate.mm */, AB3ACB6914C2361100D7D192 /* cheatWindowDelegate.mm */, AB3BF4371E25D9AE003E2B24 /* DisplayViewCALayer.mm */, @@ -2403,6 +2422,7 @@ AB29B33016D4BEBF000EF671 /* InputManager.mm */, AB3ACB6F14C2361100D7D192 /* inputPrefsView.mm */, AB01005D170D07B000D70FBE /* InputProfileController.mm */, + AB3BF43B1E26289E003E2B24 /* MacMetalDisplayView.mm */, AB3BF4011E22FE01003E2B24 /* MacOGLDisplayView.mm */, AB3ACB7114C2361100D7D192 /* preferencesWindowDelegate.mm */, ABAF0A401A96E67200B95B75 /* RomInfoPanel.mm */, @@ -3856,12 +3876,14 @@ ABD1FEFE1345AC8400AF11D1 /* wifi.cpp in Sources */, ABFEA8051BB4EC1000B08C25 /* ftbbox.c in Sources */, AB9038B317C5ED2200F410BD /* slot1_retail_mcrom.cpp in Sources */, + AB54718D1E27610500508C5C /* MacMetalDisplayViewShaders.metal in Sources */, ABD1FF691345ACBF00AF11D1 /* xstring.cpp in Sources */, ABD104281346653B00AF11D1 /* main.m in Sources */, AB2ABA411C9F9CFA00173B15 /* rsemaphore.c in Sources */, ABA6574B14511EC90077E5E9 /* cocoa_cheat.mm in Sources */, ABD1041D1346652500AF11D1 /* cocoa_core.mm in Sources */, AB58F32D1364F44B0074C376 /* cocoa_file.mm in Sources */, + AB3BF43E1E26289E003E2B24 /* MacMetalDisplayView.mm in Sources */, AB3BF4041E22FE01003E2B24 /* MacOGLDisplayView.mm in Sources */, ABE7F53E13EE1C7900FD3A71 /* cocoa_firmware.mm in Sources */, ABD1041C1346652500AF11D1 /* cocoa_input.mm in Sources */, @@ -4043,6 +4065,7 @@ AB2EE12C17D57ED500F68622 /* slot1_retail_mcrom_debug.cpp in Sources */, AB5648FF186E6EA8002740F4 /* cocoa_slot2.mm in Sources */, AB796D2A15CDCBA200C59155 /* slot1_r4.cpp in Sources */, + AB78B5C41E384F4F00297FED /* MacMetalDisplayView.mm in Sources */, AB796D2C15CDCBA200C59155 /* slot1_retail_nand.cpp in Sources */, AB35BD8F1DEBF40800844310 /* encoding_utf.c in Sources */, ABE6840C189E33BC007FD69C /* OGLDisplayOutput.cpp in Sources */, @@ -4101,6 +4124,7 @@ AB000DD01CCC6B0700413F02 /* retro_stat.c in Sources */, AB796D5B15CDCBA200C59155 /* 2xsai.cpp in Sources */, ABADF1181DEA4C1200A142B1 /* Database.cpp in Sources */, + AB54718B1E27610500508C5C /* MacMetalDisplayViewShaders.metal in Sources */, AB796D5C15CDCBA200C59155 /* bilinear.cpp in Sources */, ABA7316B1BB51FDC00B26147 /* psaux.c in Sources */, AB796D5D15CDCBA200C59155 /* epx.cpp in Sources */, @@ -4174,6 +4198,7 @@ buildActionMask = 2147483647; files = ( ABFEA8CC1BB4EC1100B08C25 /* smooth.c in Sources */, + AB54718C1E27610500508C5C /* MacMetalDisplayViewShaders.metal in Sources */, ABFEA8421BB4EC1100B08C25 /* ftpatent.c in Sources */, ABFEA8A51BB4EC1100B08C25 /* sfnt.c in Sources */, ABFEA81B1BB4EC1000B08C25 /* ftfntfmt.c in Sources */, @@ -4236,6 +4261,7 @@ AB8F3C921A53AC2600A80BF6 /* firmware.cpp in Sources */, AB8F3C941A53AC2600A80BF6 /* gfx3d.cpp in Sources */, AB000DD41CCC6B3D00413F02 /* retro_dirent.c in Sources */, + AB78B5C31E384F4F00297FED /* MacMetalDisplayView.mm in Sources */, AB8F3C951A53AC2600A80BF6 /* GPU.cpp in Sources */, AB8F3C971A53AC2600A80BF6 /* guid.cpp in Sources */, AB8F3C981A53AC2600A80BF6 /* header.cpp in Sources */, diff --git a/desmume/src/frontend/cocoa/DefaultUserPrefs.plist b/desmume/src/frontend/cocoa/DefaultUserPrefs.plist index 7cb823a8c..cafd98073 100644 --- a/desmume/src/frontend/cocoa/DefaultUserPrefs.plist +++ b/desmume/src/frontend/cocoa/DefaultUserPrefs.plist @@ -90,6 +90,8 @@ General_AutoloadROMOption 10000 + General_DisplayViewsPreferMetal + General_DisplayWindowRestorableStates General_DoNotAskMigrate diff --git a/desmume/src/frontend/cocoa/cocoa_GPU.h b/desmume/src/frontend/cocoa/cocoa_GPU.h index b4b9cadb2..ce0142a9c 100644 --- a/desmume/src/frontend/cocoa/cocoa_GPU.h +++ b/desmume/src/frontend/cocoa/cocoa_GPU.h @@ -27,7 +27,7 @@ #endif #if defined(MAC_OS_X_VERSION_10_11) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_11) -//#define ENABLE_APPLE_METAL +#define ENABLE_APPLE_METAL #endif class GPUEventHandlerOSX; diff --git a/desmume/src/frontend/cocoa/cocoa_GPU.mm b/desmume/src/frontend/cocoa/cocoa_GPU.mm index a1f8b4a5e..c545f82e7 100644 --- a/desmume/src/frontend/cocoa/cocoa_GPU.mm +++ b/desmume/src/frontend/cocoa/cocoa_GPU.mm @@ -156,7 +156,7 @@ public: fetchObject = NULL; #ifdef ENABLE_APPLE_METAL - if (IsOSXVersionSupported(10, 11, 0)) + if (IsOSXVersionSupported(10, 11, 0) && [[NSUserDefaults standardUserDefaults] boolForKey:@"General_DisplayViewsPreferMetal"]) { fetchObject = new MacMetalFetchObject; if (fetchObject->GetClientData() == nil) diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h new file mode 100644 index 000000000..196ad9272 --- /dev/null +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.h @@ -0,0 +1,261 @@ +/* + Copyright (C) 2017 DeSmuME team + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the this software. If not, see . + */ + +#ifndef _MAC_METALDISPLAYVIEW_H +#define _MAC_METALDISPLAYVIEW_H + +#import +#import + +#import "DisplayViewCALayer.h" +#import "../cocoa_GPU.h" +#include "../ClientDisplayView.h" + +#ifdef BOOL +#undef BOOL +#endif + +class MacMetalFetchObject; +class MacMetalDisplayView; + +struct DisplayViewShaderProperties +{ + float width; + float height; + float rotation; + float viewScale; + uint32_t lowerHUDMipMapLevel; +}; +typedef DisplayViewShaderProperties DisplayViewShaderProperties; + +@interface MetalDisplayViewSharedData : MacClientSharedObject +{ + id device; + id commandQueue; + id defaultLibrary; + + id load16To32Pipeline; + id deposterizePipeline; + id hudPipeline; + + id _fetchEncoder; + + id samplerHUDBox; + id samplerHUDText; + + id hudIndexBuffer; + id _bufDisplayFetchNative[2][2]; + id _bufDisplayFetchCustom[2][2]; + + id texDisplayFetch16NativeMain; + id texDisplayFetch16NativeTouch; + id texDisplayFetch32NativeMain; + id texDisplayFetch32NativeTouch; + id texDisplayFetch16CustomMain; + id texDisplayFetch16CustomTouch; + id texDisplayFetch32CustomMain; + id texDisplayFetch32CustomTouch; + + id texLQ2xLUT; + id texHQ2xLUT; + id texHQ3xLUT; + id texHQ4xLUT; + id texCurrentHQnxLUT; + + MTLSize load16To32ThreadsPerGroup; + MTLSize load16To32ThreadGroupsPerGridNative; + MTLSize load16To32ThreadGroupsPerGridCustom; + MTLSize deposterizeThreadsPerGroup; + MTLSize deposterizeThreadGroupsPerGrid; + + size_t displayFetchNativeBufferSize; + size_t displayFetchCustomBufferSize; + + pthread_mutex_t _mutexFetch; +} + +@property (readonly, nonatomic) id device; +@property (readonly, nonatomic) id commandQueue; +@property (readonly, nonatomic) id defaultLibrary; + +@property (readonly, nonatomic) id load16To32Pipeline; +@property (readonly, nonatomic) id deposterizePipeline; +@property (readonly, nonatomic) id hudPipeline; +@property (readonly, nonatomic) id samplerHUDBox; +@property (readonly, nonatomic) id samplerHUDText; + +@property (readonly, nonatomic) id hudIndexBuffer; + +@property (readonly, nonatomic) id texDisplayFetch16NativeMain; +@property (readonly, nonatomic) id texDisplayFetch16NativeTouch; +@property (readonly, nonatomic) id texDisplayFetch32NativeMain; +@property (readonly, nonatomic) id texDisplayFetch32NativeTouch; +@property (retain) id texDisplayFetch16CustomMain; +@property (retain) id texDisplayFetch16CustomTouch; +@property (retain) id texDisplayFetch32CustomMain; +@property (retain) id texDisplayFetch32CustomTouch; + +@property (readonly, nonatomic) id texLQ2xLUT; +@property (readonly, nonatomic) id texHQ2xLUT; +@property (readonly, nonatomic) id texHQ3xLUT; +@property (readonly, nonatomic) id texHQ4xLUT; +@property (retain) id texCurrentHQnxLUT; + +@property (assign) size_t displayFetchNativeBufferSize; +@property (assign) size_t displayFetchCustomBufferSize; + +@property (readonly, nonatomic) MTLSize load16To32ThreadsPerGroup; +@property (readonly, nonatomic) MTLSize load16To32ThreadGroupsPerGridNative; +@property (assign) MTLSize load16To32ThreadGroupsPerGridCustom; +@property (readonly, nonatomic) MTLSize deposterizeThreadsPerGroup; +@property (readonly, nonatomic) MTLSize deposterizeThreadGroupsPerGrid; + +- (void) setFetchBuffersWithDisplayInfo:(const NDSDisplayInfo &)dispInfo; +- (void) fetchFromBufferIndex:(const u8)index; +- (void) fetchNativeDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex; +- (void) fetchCustomDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex; +- (void) convertFetch16To32UsingEncoder:(id)cce isMainNative:(BOOL)isMainNative isTouchNative:(BOOL)isTouchNative; + +@end + +@interface DisplayViewMetalLayer : CAMetalLayer +{ + MacMetalDisplayView *_cdv; + MetalDisplayViewSharedData *sharedData; + + MTLRenderPassDescriptor *_outputRenderPassDesc; + MTLRenderPassColorAttachmentDescriptor *colorAttachment0Desc; + id pixelScalePipeline; + id displayOutputPipeline; + + id _cdvPropertiesBuffer; + id _displayVtxPositionBuffer; + id _displayTexCoordBuffer; + id _hudVtxPositionBuffer; + id _hudTexCoordBuffer; + id bufCPUFilterSrcMain; + id bufCPUFilterSrcTouch; + id bufCPUFilterDstMain; + id bufCPUFilterDstTouch; + + id _texDisplaySrcDeposterize[2][2]; + id texDisplayPixelScaleMain; + id texDisplayPixelScaleTouch; + id _texDisplayOutput[2]; + id texHUDCharMap; + + MTLSize _pixelScalerThreadsPerGroup; + MTLSize _pixelScalerThreadGroupsPerGrid; + + BOOL needsViewportUpdate; + BOOL needsRotationScaleUpdate; + BOOL needsScreenVerticesUpdate; + BOOL needsHUDVerticesUpdate; + + dispatch_semaphore_t availableResources; +} + +@property (assign, nonatomic) MetalDisplayViewSharedData *sharedData; +@property (readonly, nonatomic) MTLRenderPassColorAttachmentDescriptor *colorAttachment0Desc; +@property (retain) id pixelScalePipeline; +@property (retain) id displayOutputPipeline; +@property (retain) id bufCPUFilterSrcMain; +@property (retain) id bufCPUFilterSrcTouch; +@property (retain) id bufCPUFilterDstMain; +@property (retain) id bufCPUFilterDstTouch; +@property (retain) id texDisplayPixelScaleMain; +@property (retain) id texDisplayPixelScaleTouch; +@property (retain) id texHUDCharMap; +@property (assign) BOOL needsViewportUpdate; +@property (assign) BOOL needsRotationScaleUpdate; +@property (assign) BOOL needsScreenVerticesUpdate; +@property (assign) BOOL needsHUDVerticesUpdate; +@property (assign, nonatomic) VideoFilterTypeID pixelScaler; +@property (assign, nonatomic) OutputFilterTypeID outputFilter; + +- (id) newCommandBuffer; +- (void) setupLayer; +- (void) resizeCPUPixelScalerUsingFilterID:(const VideoFilterTypeID)filterID; +- (void) copyHUDFontUsingFace:(const FT_Face &)fontFace size:(const size_t)glyphSize tileSize:(const size_t)glyphTileSize info:(GlyphInfo *)glyphInfo; +- (void) processDisplays; +- (void) renderToDrawable; + +@end + +#pragma mark - + +class MacMetalFetchObject : public GPUClientFetchObject +{ +protected: + bool _useCPUFilterPipeline; + uint32_t *_srcNativeCloneMaster; + uint32_t *_srcNativeClone[2][2]; + pthread_rwlock_t _srcCloneRWLock[2][2]; + + virtual void _FetchNativeDisplayByID(const NDSDisplayID displayID, const u8 bufferIndex); + virtual void _FetchCustomDisplayByID(const NDSDisplayID displayID, const u8 bufferIndex); + +public: + MacMetalFetchObject(); + virtual ~MacMetalFetchObject(); + + virtual void Init(); + virtual void CopyFromSrcClone(uint32_t *dstBufferPtr, const NDSDisplayID displayID, const u8 bufferIndex); + virtual void SetFetchBuffers(const NDSDisplayInfo ¤tDisplayInfo); + virtual void FetchFromBufferIndex(const u8 index); +}; + +#pragma mark - + +class MacMetalDisplayView : public ClientDisplay3DView, public DisplayViewCALayerInterface +{ +protected: + pthread_mutex_t *_mutexProcessPtr; + + virtual void _UpdateNormalSize(); + virtual void _UpdateOrder(); + virtual void _UpdateRotation(); + virtual void _UpdateClientSize(); + virtual void _UpdateViewScale(); + virtual void _LoadNativeDisplayByID(const NDSDisplayID displayID); + virtual void _ResizeCPUPixelScaler(const VideoFilterTypeID filterID); + +public: + MacMetalDisplayView(); + virtual ~MacMetalDisplayView(); + + pthread_mutex_t* GetMutexProcessPtr() const; + + virtual void Init(); + + virtual void CopyHUDFont(const FT_Face &fontFace, const size_t glyphSize, const size_t glyphTileSize, GlyphInfo *glyphInfo); + + // NDS screen filters + virtual void SetPixelScaler(const VideoFilterTypeID filterID); + virtual void SetOutputFilter(const OutputFilterTypeID filterID); + virtual void SetFiltersPreferGPU(const bool preferGPU); + + // Client view interface + virtual void ProcessDisplays(); + virtual void UpdateView(); +}; + +#pragma mark - +void SetupHQnxLUTs_Metal(id &device, id &texLQ2xLUT, id &texHQ2xLUT, id &texHQ3xLUT, id &texHQ4xLUT); +void DeleteHQnxLUTs_Metal(id &texLQ2xLUT, id &texHQ2xLUT, id &texHQ3xLUT, id &texHQ4xLUT); + +#endif // _MAC_METALDISPLAYVIEW_H diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm new file mode 100644 index 000000000..b4b57257d --- /dev/null +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm @@ -0,0 +1,1642 @@ +/* + Copyright (C) 2017 DeSmuME team + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the this software. If not, see . + */ + +#include "MacMetalDisplayView.h" + +#include "../../../common.h" + +@implementation MetalDisplayViewSharedData + +@synthesize device; +@synthesize commandQueue; +@synthesize defaultLibrary; + +@synthesize load16To32Pipeline; +@synthesize deposterizePipeline; +@synthesize hudPipeline; +@synthesize samplerHUDBox; +@synthesize samplerHUDText; + +@synthesize hudIndexBuffer; + +@synthesize texDisplayFetch16NativeMain; +@synthesize texDisplayFetch16NativeTouch; +@synthesize texDisplayFetch32NativeMain; +@synthesize texDisplayFetch32NativeTouch; +@synthesize texDisplayFetch16CustomMain; +@synthesize texDisplayFetch16CustomTouch; +@synthesize texDisplayFetch32CustomMain; +@synthesize texDisplayFetch32CustomTouch; + +@synthesize texLQ2xLUT; +@synthesize texHQ2xLUT; +@synthesize texHQ3xLUT; +@synthesize texHQ4xLUT; +@synthesize texCurrentHQnxLUT; + +@synthesize displayFetchNativeBufferSize; +@synthesize displayFetchCustomBufferSize; + +@synthesize load16To32ThreadsPerGroup; +@synthesize load16To32ThreadGroupsPerGridNative; +@synthesize load16To32ThreadGroupsPerGridCustom; +@synthesize deposterizeThreadsPerGroup; +@synthesize deposterizeThreadGroupsPerGrid; + +- (id)init +{ + self = [super init]; + if (self == nil) + { + return nil; + } + + device = MTLCreateSystemDefaultDevice(); + + if (device == nil) + { + [self release]; + return nil; + } + + [device retain]; + + commandQueue = [[device newCommandQueue] retain]; + defaultLibrary = [[device newDefaultLibrary] retain]; + load16To32Pipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"src16_unpack_unorm1555_to_unorm8888"] error:nil] retain]; + deposterizePipeline = [[device newComputePipelineStateWithFunction:[defaultLibrary newFunctionWithName:@"src_filter_deposterize"] error:nil] retain]; + + size_t tw = GetNearestPositivePOT((uint32_t)[load16To32Pipeline threadExecutionWidth]); + while ( (tw > [load16To32Pipeline threadExecutionWidth]) || (tw > GPU_FRAMEBUFFER_NATIVE_WIDTH) ) + { + tw >>= 1; + } + + size_t th = [load16To32Pipeline maxTotalThreadsPerThreadgroup] / tw; + + load16To32ThreadsPerGroup = MTLSizeMake(tw, th, 1); + load16To32ThreadGroupsPerGridNative = MTLSizeMake(GPU_FRAMEBUFFER_NATIVE_WIDTH / tw, + GPU_FRAMEBUFFER_NATIVE_HEIGHT / th, + 1); + + load16To32ThreadGroupsPerGridCustom = load16To32ThreadGroupsPerGridNative; + + deposterizeThreadsPerGroup = load16To32ThreadsPerGroup; + deposterizeThreadGroupsPerGrid = load16To32ThreadGroupsPerGridNative; + + MTLRenderPipelineDescriptor *hudPipelineDesc = [[MTLRenderPipelineDescriptor alloc] init]; + [[[hudPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setPixelFormat:MTLPixelFormatBGRA8Unorm]; + [[[hudPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setBlendingEnabled:YES]; + [[[hudPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setRgbBlendOperation:MTLBlendOperationAdd]; + [[[hudPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setAlphaBlendOperation:MTLBlendOperationAdd]; + [[[hudPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setSourceRGBBlendFactor:MTLBlendFactorSourceAlpha]; + [[[hudPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setSourceAlphaBlendFactor:MTLBlendFactorSourceAlpha]; + [[[hudPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setDestinationRGBBlendFactor:MTLBlendFactorOneMinusSourceAlpha]; + [[[hudPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setDestinationAlphaBlendFactor:MTLBlendFactorOneMinusSourceAlpha]; + [hudPipelineDesc setVertexFunction:[defaultLibrary newFunctionWithName:@"hud_vertex"]]; + [hudPipelineDesc setFragmentFunction:[defaultLibrary newFunctionWithName:@"hud_fragment"]]; + + hudPipeline = [[device newRenderPipelineStateWithDescriptor:hudPipelineDesc error:nil] retain]; + [hudPipelineDesc release]; + + hudIndexBuffer = [[device newBufferWithLength:(sizeof(uint16_t) * HUD_MAX_CHARACTERS * 6) options:MTLResourceStorageModeManaged] retain]; + + uint16_t *idxBufferPtr = (uint16_t *)[hudIndexBuffer contents]; + for (size_t i = 0, j = 0, k = 0; i < HUD_MAX_CHARACTERS; i++, j+=6, k+=4) + { + idxBufferPtr[j+0] = k+0; + idxBufferPtr[j+1] = k+1; + idxBufferPtr[j+2] = k+2; + idxBufferPtr[j+3] = k+2; + idxBufferPtr[j+4] = k+3; + idxBufferPtr[j+5] = k+0; + } + + [hudIndexBuffer didModifyRange:NSMakeRange(0, sizeof(uint16_t) * HUD_MAX_CHARACTERS * 6)]; + + _bufDisplayFetchNative[NDSDisplayID_Main][0] = nil; + _bufDisplayFetchNative[NDSDisplayID_Main][1] = nil; + _bufDisplayFetchNative[NDSDisplayID_Touch][0] = nil; + _bufDisplayFetchNative[NDSDisplayID_Touch][1] = nil; + _bufDisplayFetchCustom[NDSDisplayID_Main][0] = nil; + _bufDisplayFetchCustom[NDSDisplayID_Main][1] = nil; + _bufDisplayFetchCustom[NDSDisplayID_Touch][0] = nil; + _bufDisplayFetchCustom[NDSDisplayID_Touch][1] = nil; + displayFetchNativeBufferSize = 0; + displayFetchCustomBufferSize = 0; + + // Set up HUD texture samplers. + MTLSamplerDescriptor *samplerDesc = [[MTLSamplerDescriptor alloc] init]; + [samplerDesc setNormalizedCoordinates:YES]; + [samplerDesc setRAddressMode:MTLSamplerAddressModeClampToEdge]; + [samplerDesc setSAddressMode:MTLSamplerAddressModeClampToEdge]; + [samplerDesc setTAddressMode:MTLSamplerAddressModeClampToEdge]; + [samplerDesc setMinFilter:MTLSamplerMinMagFilterNearest]; + [samplerDesc setMagFilter:MTLSamplerMinMagFilterNearest]; + [samplerDesc setMipFilter:MTLSamplerMipFilterNearest]; + samplerHUDBox = [[device newSamplerStateWithDescriptor:samplerDesc] retain]; + + [samplerDesc setRAddressMode:MTLSamplerAddressModeClampToZero]; + [samplerDesc setSAddressMode:MTLSamplerAddressModeClampToZero]; + [samplerDesc setTAddressMode:MTLSamplerAddressModeClampToZero]; + [samplerDesc setMinFilter:MTLSamplerMinMagFilterLinear]; + [samplerDesc setMagFilter:MTLSamplerMinMagFilterLinear]; + [samplerDesc setMipFilter:MTLSamplerMipFilterLinear]; + samplerHUDText = [[device newSamplerStateWithDescriptor:samplerDesc] retain]; + + [samplerDesc release]; + + // Set up the loading textures. These are special because they copy the raw image data from the emulator to the GPU. + MTLTextureDescriptor *texDisplayLoad16Desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatR16Uint + width:GPU_FRAMEBUFFER_NATIVE_WIDTH + height:GPU_FRAMEBUFFER_NATIVE_HEIGHT + mipmapped:NO]; + [texDisplayLoad16Desc setResourceOptions:MTLResourceStorageModeManaged]; + [texDisplayLoad16Desc setStorageMode:MTLStorageModeManaged]; + [texDisplayLoad16Desc setCpuCacheMode:MTLCPUCacheModeWriteCombined]; + [texDisplayLoad16Desc setUsage:MTLTextureUsageShaderRead]; + + texDisplayFetch16NativeMain = [[device newTextureWithDescriptor:texDisplayLoad16Desc] retain]; + texDisplayFetch16NativeTouch = [[device newTextureWithDescriptor:texDisplayLoad16Desc] retain]; + texDisplayFetch16CustomMain = [[device newTextureWithDescriptor:texDisplayLoad16Desc] retain]; + texDisplayFetch16CustomTouch = [[device newTextureWithDescriptor:texDisplayLoad16Desc] retain]; + + MTLTextureDescriptor *texDisplayLoad32Desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm + width:GPU_FRAMEBUFFER_NATIVE_WIDTH + height:GPU_FRAMEBUFFER_NATIVE_HEIGHT + mipmapped:NO]; + [texDisplayLoad32Desc setResourceOptions:MTLResourceStorageModeManaged]; + [texDisplayLoad32Desc setStorageMode:MTLStorageModeManaged]; + [texDisplayLoad32Desc setCpuCacheMode:MTLCPUCacheModeWriteCombined]; + [texDisplayLoad32Desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite]; + + texDisplayFetch32NativeMain = [[device newTextureWithDescriptor:texDisplayLoad32Desc] retain]; + texDisplayFetch32NativeTouch = [[device newTextureWithDescriptor:texDisplayLoad32Desc] retain]; + texDisplayFetch32CustomMain = [[device newTextureWithDescriptor:texDisplayLoad32Desc] retain]; + texDisplayFetch32CustomTouch = [[device newTextureWithDescriptor:texDisplayLoad32Desc] retain]; + + uint16_t *blankBuffer = (uint16_t *)calloc(GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT, sizeof(uint32_t)); + const MTLRegion texRegionNative = MTLRegionMake2D(0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT); + [texDisplayFetch32NativeMain replaceRegion:texRegionNative + mipmapLevel:0 + withBytes:blankBuffer + bytesPerRow:GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(uint32_t)]; + [texDisplayFetch32NativeTouch replaceRegion:texRegionNative + mipmapLevel:0 + withBytes:blankBuffer + bytesPerRow:GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(uint32_t)]; + [texDisplayFetch32CustomMain replaceRegion:texRegionNative + mipmapLevel:0 + withBytes:blankBuffer + bytesPerRow:GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(uint32_t)]; + [texDisplayFetch32CustomTouch replaceRegion:texRegionNative + mipmapLevel:0 + withBytes:blankBuffer + bytesPerRow:GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(uint32_t)]; + free(blankBuffer); + + // Set up the HQnx LUT textures. + SetupHQnxLUTs_Metal(device, texLQ2xLUT, texHQ2xLUT, texHQ3xLUT, texHQ4xLUT); + texCurrentHQnxLUT = nil; + + _fetchEncoder = nil; + pthread_mutex_init(&_mutexFetch, NULL); + + return self; +} + +- (void)dealloc +{ + [device release]; + + [commandQueue release]; + [defaultLibrary release]; + [load16To32Pipeline release]; + [deposterizePipeline release]; + [hudPipeline release]; + [hudIndexBuffer release]; + + [texDisplayFetch16NativeMain release]; + [texDisplayFetch16NativeTouch release]; + [texDisplayFetch32NativeMain release]; + [texDisplayFetch32NativeTouch release]; + [self setTexDisplayFetch16CustomMain:nil]; + [self setTexDisplayFetch16CustomTouch:nil]; + [self setTexDisplayFetch32CustomMain:nil]; + [self setTexDisplayFetch32CustomTouch:nil]; + + DeleteHQnxLUTs_Metal(texLQ2xLUT, texHQ2xLUT, texHQ3xLUT, texHQ4xLUT); + [self setTexCurrentHQnxLUT:nil]; + + [samplerHUDBox release]; + [samplerHUDText release]; + + [_bufDisplayFetchNative[NDSDisplayID_Main][0] release]; + [_bufDisplayFetchNative[NDSDisplayID_Main][1] release]; + [_bufDisplayFetchNative[NDSDisplayID_Touch][0] release]; + [_bufDisplayFetchNative[NDSDisplayID_Touch][1] release]; + [_bufDisplayFetchCustom[NDSDisplayID_Main][0] release]; + [_bufDisplayFetchCustom[NDSDisplayID_Main][1] release]; + [_bufDisplayFetchCustom[NDSDisplayID_Touch][0] release]; + [_bufDisplayFetchCustom[NDSDisplayID_Touch][1] release]; + + pthread_mutex_destroy(&_mutexFetch); + + [super dealloc]; +} + +- (void) setFetchBuffersWithDisplayInfo:(const NDSDisplayInfo &)dispInfo +{ + const size_t nativeSize = GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * dispInfo.pixelBytes; + const size_t customSize = dispInfo.customWidth * dispInfo.customHeight * dispInfo.pixelBytes; + + _bufDisplayFetchNative[NDSDisplayID_Main][0] = [[device newBufferWithBytesNoCopy:(u8 *)dispInfo.masterFramebufferHead + length:nativeSize + options:MTLResourceStorageModeManaged + deallocator:nil] retain]; + + _bufDisplayFetchNative[NDSDisplayID_Main][1] = [[device newBufferWithBytesNoCopy:(u8 *)dispInfo.masterFramebufferHead + dispInfo.framebufferSize + length:nativeSize + options:MTLResourceStorageModeManaged + deallocator:nil] retain]; + + _bufDisplayFetchNative[NDSDisplayID_Touch][0] = [[device newBufferWithBytesNoCopy:(u8 *)dispInfo.masterFramebufferHead + (nativeSize * 1) + length:nativeSize + options:MTLResourceStorageModeManaged + deallocator:nil] retain]; + + _bufDisplayFetchNative[NDSDisplayID_Touch][1] = [[device newBufferWithBytesNoCopy:(u8 *)dispInfo.masterFramebufferHead + (nativeSize * 1) + dispInfo.framebufferSize + length:nativeSize + options:MTLResourceStorageModeManaged + deallocator:nil] retain]; + + _bufDisplayFetchCustom[NDSDisplayID_Main][0] = [[device newBufferWithBytesNoCopy:(u8 *)dispInfo.masterFramebufferHead + (nativeSize * 2) + length:customSize + options:MTLResourceStorageModeManaged + deallocator:nil] retain]; + + _bufDisplayFetchCustom[NDSDisplayID_Main][1] = [[device newBufferWithBytesNoCopy:(u8 *)dispInfo.masterFramebufferHead + (nativeSize * 2) + dispInfo.framebufferSize + length:customSize + options:MTLResourceStorageModeManaged + deallocator:nil] retain]; + + _bufDisplayFetchCustom[NDSDisplayID_Touch][0] = [[device newBufferWithBytesNoCopy:(u8 *)dispInfo.masterFramebufferHead + (nativeSize * 2) + customSize + dispInfo.framebufferSize + length:customSize + options:MTLResourceStorageModeManaged + deallocator:nil] retain]; + + _bufDisplayFetchCustom[NDSDisplayID_Touch][1] = [[device newBufferWithBytesNoCopy:(u8 *)dispInfo.masterFramebufferHead + (nativeSize * 2) + customSize + dispInfo.framebufferSize + length:customSize + options:MTLResourceStorageModeManaged + deallocator:nil] retain]; + + [self setDisplayFetchNativeBufferSize:nativeSize]; + [self setDisplayFetchCustomBufferSize:customSize]; +} + +- (void) fetchFromBufferIndex:(const u8)index +{ + pthread_mutex_lock(&_mutexFetch); + pthread_rwlock_rdlock([self rwlockFramebufferAtIndex:index]); + + id cb = [commandQueue commandBufferWithUnretainedReferences]; + _fetchEncoder = [cb blitCommandEncoder]; + + GPUFetchObject->GPUClientFetchObject::FetchFromBufferIndex(index); + + [_fetchEncoder endEncoding]; + [cb commit]; + + pthread_rwlock_unlock([self rwlockFramebufferAtIndex:index]); + pthread_mutex_unlock(&_mutexFetch); +} + +- (void) fetchNativeDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex +{ + const NDSDisplayInfo ¤tDisplayInfo = GPUFetchObject->GetFetchDisplayInfoForBufferIndex(bufferIndex); + + id texFetch16 = (displayID == NDSDisplayID_Main) ? texDisplayFetch16NativeMain : texDisplayFetch16NativeTouch; + id texFetch32 = (displayID == NDSDisplayID_Main) ? texDisplayFetch32NativeMain : texDisplayFetch32NativeTouch; + const size_t bufferSize = [self displayFetchNativeBufferSize]; + const id targetSource = (displayID == NDSDisplayID_Main) ? _bufDisplayFetchNative[NDSDisplayID_Main][currentDisplayInfo.bufferIndex] : _bufDisplayFetchNative[NDSDisplayID_Touch][currentDisplayInfo.bufferIndex]; + [targetSource didModifyRange:NSMakeRange(0, bufferSize)]; + + [_fetchEncoder copyFromBuffer:targetSource + sourceOffset:0 + sourceBytesPerRow:GPU_FRAMEBUFFER_NATIVE_WIDTH * currentDisplayInfo.pixelBytes + sourceBytesPerImage:bufferSize + sourceSize:MTLSizeMake(GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 1) + toTexture:(currentDisplayInfo.colorFormat == NDSColorFormat_BGR555_Rev) ? texFetch16 : texFetch32 + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; +} + +- (void) fetchCustomDisplayByID:(const NDSDisplayID)displayID bufferIndex:(const u8)bufferIndex +{ + const NDSDisplayInfo ¤tDisplayInfo = GPUFetchObject->GetFetchDisplayInfoForBufferIndex(bufferIndex); + const size_t w = currentDisplayInfo.customWidth; + const size_t h = currentDisplayInfo.customHeight; + + id texFetch16 = (displayID == NDSDisplayID_Main) ? [self texDisplayFetch16CustomMain] : [self texDisplayFetch16CustomTouch]; + id texFetch32 = (displayID == NDSDisplayID_Main) ? [self texDisplayFetch32CustomMain] : [self texDisplayFetch32CustomTouch]; + + // If the existing texture size is different than the incoming size, then remake the textures to match the incoming size. + if ( ([texFetch32 width] != w) || ([texFetch32 height] != h) ) + { + MTLTextureDescriptor *texDisplayLoad16Desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatR16Uint + width:w + height:h + mipmapped:NO]; + [texDisplayLoad16Desc setResourceOptions:MTLResourceStorageModeManaged]; + [texDisplayLoad16Desc setStorageMode:MTLStorageModeManaged]; + [texDisplayLoad16Desc setCpuCacheMode:MTLCPUCacheModeWriteCombined]; + [texDisplayLoad16Desc setUsage:MTLTextureUsageShaderRead]; + + MTLTextureDescriptor *texDisplayLoad32Desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm + width:w + height:h + mipmapped:NO]; + [texDisplayLoad32Desc setResourceOptions:MTLResourceStorageModePrivate]; + [texDisplayLoad32Desc setStorageMode:MTLStorageModePrivate]; + [texDisplayLoad32Desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite]; + + + if (displayID == NDSDisplayID_Main) + { + [self setTexDisplayFetch16CustomMain:[device newTextureWithDescriptor:texDisplayLoad16Desc]]; + [self setTexDisplayFetch32CustomMain:[device newTextureWithDescriptor:texDisplayLoad32Desc]]; + texFetch16 = [self texDisplayFetch16CustomMain]; + texFetch32 = [self texDisplayFetch32CustomMain]; + } + else + { + [self setTexDisplayFetch16CustomTouch:[device newTextureWithDescriptor:texDisplayLoad16Desc]]; + [self setTexDisplayFetch32CustomTouch:[device newTextureWithDescriptor:texDisplayLoad32Desc]]; + texFetch16 = [self texDisplayFetch16CustomTouch]; + texFetch32 = [self texDisplayFetch32CustomTouch]; + } + + const size_t tw = load16To32ThreadsPerGroup.width; + const size_t th = load16To32ThreadsPerGroup.height; + + [self setLoad16To32ThreadGroupsPerGridCustom:MTLSizeMake((currentDisplayInfo.customWidth + tw - 1) / tw, + (currentDisplayInfo.customHeight + th - 1) / th, + 1)]; + } + + const size_t bufferSize = [self displayFetchCustomBufferSize]; + const id targetSource = (displayID == NDSDisplayID_Main) ? _bufDisplayFetchCustom[NDSDisplayID_Main][currentDisplayInfo.bufferIndex] : _bufDisplayFetchCustom[NDSDisplayID_Touch][currentDisplayInfo.bufferIndex]; + [targetSource didModifyRange:NSMakeRange(0, bufferSize)]; + + [_fetchEncoder copyFromBuffer:targetSource + sourceOffset:0 + sourceBytesPerRow:currentDisplayInfo.customWidth * currentDisplayInfo.pixelBytes + sourceBytesPerImage:bufferSize + sourceSize:MTLSizeMake(currentDisplayInfo.customWidth, currentDisplayInfo.customHeight, 1) + toTexture:(currentDisplayInfo.colorFormat == NDSColorFormat_BGR555_Rev) ? texFetch16 : texFetch32 + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; +} + +- (void) convertFetch16To32UsingEncoder:(id)cce isMainNative:(BOOL)isMainNative isTouchNative:(BOOL)isTouchNative +{ + // 16-bit textures aren't handled natively in Metal for macOS, so we need to explicitly convert to 32-bit here. + [cce setComputePipelineState:load16To32Pipeline]; + + if (isMainNative) + { + [cce setTexture:texDisplayFetch16NativeMain atIndex:0]; + [cce setTexture:texDisplayFetch32NativeMain atIndex:1]; + [cce dispatchThreadgroups:load16To32ThreadGroupsPerGridNative + threadsPerThreadgroup:load16To32ThreadsPerGroup]; + } + else + { + [cce setTexture:texDisplayFetch16CustomMain atIndex:0]; + [cce setTexture:texDisplayFetch32CustomMain atIndex:1]; + [cce dispatchThreadgroups:[self load16To32ThreadGroupsPerGridCustom] + threadsPerThreadgroup:[self load16To32ThreadsPerGroup]]; + } + + if (isTouchNative) + { + [cce setTexture:texDisplayFetch16NativeTouch atIndex:0]; + [cce setTexture:texDisplayFetch32NativeTouch atIndex:1]; + [cce dispatchThreadgroups:load16To32ThreadGroupsPerGridNative + threadsPerThreadgroup:load16To32ThreadsPerGroup]; + } + else + { + [cce setTexture:[self texDisplayFetch16CustomTouch] atIndex:0]; + [cce setTexture:[self texDisplayFetch32CustomTouch] atIndex:1]; + [cce dispatchThreadgroups:[self load16To32ThreadGroupsPerGridCustom] + threadsPerThreadgroup:[self load16To32ThreadsPerGroup]]; + } +} + +@end + +@implementation DisplayViewMetalLayer + +@synthesize sharedData; +@synthesize colorAttachment0Desc; +@synthesize pixelScalePipeline; +@synthesize displayOutputPipeline; +@synthesize bufCPUFilterSrcMain; +@synthesize bufCPUFilterSrcTouch; +@synthesize bufCPUFilterDstMain; +@synthesize bufCPUFilterDstTouch; +@synthesize texDisplayPixelScaleMain; +@synthesize texDisplayPixelScaleTouch; +@synthesize texHUDCharMap; +@synthesize needsViewportUpdate; +@synthesize needsRotationScaleUpdate; +@synthesize needsScreenVerticesUpdate; +@synthesize needsHUDVerticesUpdate; +@dynamic pixelScaler; +@dynamic outputFilter; + +- (id)init +{ + self = [super init]; + if(self == nil) + { + return nil; + } + + sharedData = nil; + availableResources = dispatch_semaphore_create(3); + + _cdv = new MacMetalDisplayView(); + _cdv->SetFrontendLayer(self); + + _outputRenderPassDesc = [[MTLRenderPassDescriptor renderPassDescriptor] retain]; + colorAttachment0Desc = [[_outputRenderPassDesc colorAttachments] objectAtIndexedSubscript:0]; + [colorAttachment0Desc setLoadAction:MTLLoadActionClear]; + [colorAttachment0Desc setStoreAction:MTLStoreActionStore]; + [colorAttachment0Desc setClearColor:MTLClearColorMake(0.0, 0.0, 0.0, 1.0)]; + + pixelScalePipeline = nil; + displayOutputPipeline = nil; + + _cdvPropertiesBuffer = nil; + _displayVtxPositionBuffer = nil; + _displayTexCoordBuffer = nil; + _hudVtxPositionBuffer = nil; + _hudTexCoordBuffer = nil; + + _texDisplaySrcDeposterize[NDSDisplayID_Main][0] = nil; + _texDisplaySrcDeposterize[NDSDisplayID_Touch][0] = nil; + _texDisplaySrcDeposterize[NDSDisplayID_Main][1] = nil; + _texDisplaySrcDeposterize[NDSDisplayID_Touch][1] = nil; + bufCPUFilterSrcMain = nil; + bufCPUFilterSrcTouch = nil; + bufCPUFilterDstMain = nil; + bufCPUFilterDstTouch = nil; + texDisplayPixelScaleMain = nil; + texDisplayPixelScaleTouch = nil; + _texDisplayOutput[0] = nil; + _texDisplayOutput[1] = nil; + texHUDCharMap = nil; + + [self setOpaque:YES]; + + _pixelScalerThreadsPerGroup = MTLSizeMake(1, 1, 1); + _pixelScalerThreadGroupsPerGrid = MTLSizeMake(1, 1, 1); + + needsViewportUpdate = YES; + needsRotationScaleUpdate = YES; + needsScreenVerticesUpdate = YES; + needsHUDVerticesUpdate = YES; + + return self; +} + +- (void)dealloc +{ + [_outputRenderPassDesc release]; + + [_cdvPropertiesBuffer release]; + [_displayVtxPositionBuffer release]; + [_displayTexCoordBuffer release]; + [_hudVtxPositionBuffer release]; + [_hudTexCoordBuffer release]; + + [_texDisplaySrcDeposterize[NDSDisplayID_Main][0] release]; + [_texDisplaySrcDeposterize[NDSDisplayID_Touch][0] release]; + [_texDisplaySrcDeposterize[NDSDisplayID_Main][1] release]; + [_texDisplaySrcDeposterize[NDSDisplayID_Touch][1] release]; + + [self setBufCPUFilterSrcMain:nil]; + [self setBufCPUFilterSrcTouch:nil]; + [self setBufCPUFilterDstMain:nil]; + [self setBufCPUFilterDstTouch:nil]; + [self setTexDisplayPixelScaleMain:nil]; + [self setTexDisplayPixelScaleTouch:nil]; + + [self setPixelScalePipeline:nil]; + [self setDisplayOutputPipeline:nil]; + [self setTexHUDCharMap:nil]; + + [self setSharedData:nil]; + delete _cdv; + + [super dealloc]; +} + +- (ClientDisplay3DView *)clientDisplay3DView +{ + return _cdv; +} + +- (VideoFilterTypeID) pixelScaler +{ + return _cdv->GetPixelScaler(); +} + +- (void) setPixelScaler:(VideoFilterTypeID)filterID +{ + id currentHQnxLUT = nil; + + switch (filterID) + { + case VideoFilterTypeID_Nearest2X: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_nearest2x"] error:nil]]; + break; + + case VideoFilterTypeID_Scanline: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_scanline"] error:nil]]; + break; + + case VideoFilterTypeID_EPX: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_2xEPX"] error:nil]]; + break; + + case VideoFilterTypeID_EPXPlus: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_2xEPXPlus"] error:nil]]; + break; + + case VideoFilterTypeID_2xSaI: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_2xSaI"] error:nil]]; + break; + + case VideoFilterTypeID_Super2xSaI: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_Super2xSaI"] error:nil]]; + break; + + case VideoFilterTypeID_SuperEagle: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_2xSuperEagle"] error:nil]]; + break; + + case VideoFilterTypeID_LQ2X: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_LQ2x"] error:nil]]; + currentHQnxLUT = [sharedData texLQ2xLUT]; + break; + + case VideoFilterTypeID_LQ2XS: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_LQ2xS"] error:nil]]; + currentHQnxLUT = [sharedData texLQ2xLUT]; + break; + + case VideoFilterTypeID_HQ2X: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ2x"] error:nil]]; + currentHQnxLUT = [sharedData texHQ2xLUT]; + break; + + case VideoFilterTypeID_HQ2XS: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ2xS"] error:nil]]; + currentHQnxLUT = [sharedData texHQ2xLUT]; + break; + + case VideoFilterTypeID_HQ3X: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ3x"] error:nil]]; + currentHQnxLUT = [sharedData texHQ3xLUT]; + break; + + case VideoFilterTypeID_HQ3XS: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ3xS"] error:nil]]; + currentHQnxLUT = [sharedData texHQ3xLUT]; + break; + + case VideoFilterTypeID_HQ4X: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ4x"] error:nil]]; + currentHQnxLUT = [sharedData texHQ4xLUT]; + break; + + case VideoFilterTypeID_HQ4XS: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_HQ4xS"] error:nil]]; + currentHQnxLUT = [sharedData texHQ4xLUT]; + break; + + case VideoFilterTypeID_2xBRZ: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_2xBRZ"] error:nil]]; + break; + + case VideoFilterTypeID_3xBRZ: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_3xBRZ"] error:nil]]; + break; + + case VideoFilterTypeID_4xBRZ: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_4xBRZ"] error:nil]]; + break; + + case VideoFilterTypeID_5xBRZ: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_5xBRZ"] error:nil]]; + break; + + case VideoFilterTypeID_6xBRZ: + [self setPixelScalePipeline:[[self device] newComputePipelineStateWithFunction:[[sharedData defaultLibrary] newFunctionWithName:@"pixel_scaler_6xBRZ"] error:nil]]; + break; + + case VideoFilterTypeID_None: + default: + [self setPixelScalePipeline:nil]; + break; + } + + [sharedData setTexCurrentHQnxLUT:currentHQnxLUT]; + + const VideoFilterAttributes vfAttr = VideoFilter::GetAttributesByID(filterID); + const size_t newScalerWidth = GPU_FRAMEBUFFER_NATIVE_WIDTH * vfAttr.scaleMultiply / vfAttr.scaleDivide; + const size_t newScalerHeight = GPU_FRAMEBUFFER_NATIVE_HEIGHT * vfAttr.scaleMultiply / vfAttr.scaleDivide; + + MTLTextureDescriptor *texDisplayPixelScaleDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm + width:newScalerWidth + height:newScalerHeight + mipmapped:NO]; + [texDisplayPixelScaleDesc setResourceOptions:MTLResourceStorageModePrivate]; + [texDisplayPixelScaleDesc setStorageMode:MTLStorageModePrivate]; + [texDisplayPixelScaleDesc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite]; + + [self setTexDisplayPixelScaleMain:[[self device] newTextureWithDescriptor:texDisplayPixelScaleDesc]]; + [self setTexDisplayPixelScaleTouch:[[self device] newTextureWithDescriptor:texDisplayPixelScaleDesc]]; + + if ([self pixelScalePipeline] != nil) + { + size_t tw = GetNearestPositivePOT((uint32_t)[[self pixelScalePipeline] threadExecutionWidth]); + while ( (tw > [[self pixelScalePipeline] threadExecutionWidth]) || (tw > GPU_FRAMEBUFFER_NATIVE_WIDTH) ) + { + tw >>= 1; + } + + const size_t th = [[self pixelScalePipeline] maxTotalThreadsPerThreadgroup] / tw; + + _pixelScalerThreadsPerGroup = MTLSizeMake(tw, th, 1); + _pixelScalerThreadGroupsPerGrid = MTLSizeMake(GPU_FRAMEBUFFER_NATIVE_WIDTH / tw, + GPU_FRAMEBUFFER_NATIVE_HEIGHT / th, + 1); + } + else + { + _pixelScalerThreadsPerGroup = MTLSizeMake(1, 1, 1); + _pixelScalerThreadGroupsPerGrid = MTLSizeMake(1, 1, 1); + } +} + +- (OutputFilterTypeID) outputFilter +{ + return _cdv->GetOutputFilter(); +} + +- (void) setOutputFilter:(OutputFilterTypeID)filterID +{ + MTLRenderPipelineDescriptor *outputPipelineDesc = [[MTLRenderPipelineDescriptor alloc] init]; + [[[outputPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setPixelFormat:[self pixelFormat]]; + [outputPipelineDesc setAlphaToOneEnabled:YES]; + + switch (filterID) + { + case OutputFilterTypeID_NearestNeighbor: + [outputPipelineDesc setVertexFunction:[[sharedData defaultLibrary] newFunctionWithName:@"display_output_vertex"]]; + [outputPipelineDesc setFragmentFunction:[[sharedData defaultLibrary] newFunctionWithName:@"output_filter_nearest"]]; + break; + + case OutputFilterTypeID_BicubicBSpline: + [outputPipelineDesc setVertexFunction:[[sharedData defaultLibrary] newFunctionWithName:@"display_output_bicubic_vertex"]]; + [outputPipelineDesc setFragmentFunction:[[sharedData defaultLibrary] newFunctionWithName:@"output_filter_bicubic_bspline"]]; + break; + + case OutputFilterTypeID_BicubicMitchell: + [outputPipelineDesc setVertexFunction:[[sharedData defaultLibrary] newFunctionWithName:@"display_output_bicubic_vertex"]]; + [outputPipelineDesc setFragmentFunction:[[sharedData defaultLibrary] newFunctionWithName:@"output_filter_bicubic_mitchell_netravali"]]; + break; + + case OutputFilterTypeID_Lanczos2: + [outputPipelineDesc setVertexFunction:[[sharedData defaultLibrary] newFunctionWithName:@"display_output_bicubic_vertex"]]; + [outputPipelineDesc setFragmentFunction:[[sharedData defaultLibrary] newFunctionWithName:@"output_filter_lanczos2"]]; + break; + + case OutputFilterTypeID_Lanczos3: + [outputPipelineDesc setVertexFunction:[[sharedData defaultLibrary] newFunctionWithName:@"display_output_bicubic_vertex"]]; + [outputPipelineDesc setFragmentFunction:[[sharedData defaultLibrary] newFunctionWithName:@"output_filter_lanczos3"]]; + break; + + case OutputFilterTypeID_Bilinear: + default: + [outputPipelineDesc setVertexFunction:[[sharedData defaultLibrary] newFunctionWithName:@"display_output_vertex"]]; + [outputPipelineDesc setFragmentFunction:[[sharedData defaultLibrary] newFunctionWithName:@"output_filter_bilinear"]]; + break; + } + + [self setDisplayOutputPipeline:[[self device] newRenderPipelineStateWithDescriptor:outputPipelineDesc error:nil]]; + [outputPipelineDesc release]; +} + +- (id) newCommandBuffer +{ + return [[sharedData commandQueue] commandBufferWithUnretainedReferences]; +} + +- (void) setupLayer +{ + [self setDevice:[sharedData device]]; + + MTLRenderPipelineDescriptor *outputPipelineDesc = [[MTLRenderPipelineDescriptor alloc] init]; + [[[outputPipelineDesc colorAttachments] objectAtIndexedSubscript:0] setPixelFormat:[self pixelFormat]]; + [outputPipelineDesc setAlphaToOneEnabled:YES]; + [outputPipelineDesc setVertexFunction:[[sharedData defaultLibrary] newFunctionWithName:@"display_output_vertex"]]; + [outputPipelineDesc setFragmentFunction:[[sharedData defaultLibrary] newFunctionWithName:@"output_filter_bilinear"]]; + + displayOutputPipeline = [[[self device] newRenderPipelineStateWithDescriptor:outputPipelineDesc error:nil] retain]; + [outputPipelineDesc release]; + + _cdvPropertiesBuffer = [[[self device] newBufferWithLength:sizeof(DisplayViewShaderProperties) options:MTLResourceStorageModeManaged] retain]; + _displayVtxPositionBuffer = [[[self device] newBufferWithLength:(sizeof(float) * (4 * 8)) options:MTLResourceStorageModeManaged] retain]; + _displayTexCoordBuffer = [[[self device] newBufferWithLength:(sizeof(float) * (4 * 8)) options:MTLResourceStorageModeManaged] retain]; + _hudVtxPositionBuffer = [[[self device] newBufferWithLength:HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE options:MTLResourceStorageModeManaged] retain]; + _hudTexCoordBuffer = [[[self device] newBufferWithLength:HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE options:MTLResourceStorageModeManaged] retain]; + + DisplayViewShaderProperties *viewProps = (DisplayViewShaderProperties *)[_cdvPropertiesBuffer contents]; + viewProps->width = _cdv->GetViewProperties().clientWidth; + viewProps->height = _cdv->GetViewProperties().clientHeight; + viewProps->rotation = _cdv->GetViewProperties().rotation; + viewProps->viewScale = _cdv->GetViewProperties().viewScale; + viewProps->lowerHUDMipMapLevel = 0; + [_cdvPropertiesBuffer didModifyRange:NSMakeRange(0, sizeof(DisplayViewShaderProperties))]; + + // Set up processing textures. + MTLTextureDescriptor *texDisplaySrcDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm + width:GPU_FRAMEBUFFER_NATIVE_WIDTH + height:GPU_FRAMEBUFFER_NATIVE_HEIGHT + mipmapped:NO]; + [texDisplaySrcDesc setResourceOptions:MTLResourceStorageModePrivate]; + [texDisplaySrcDesc setStorageMode:MTLStorageModePrivate]; + [texDisplaySrcDesc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite]; + + _texDisplaySrcDeposterize[NDSDisplayID_Main][0] = [[[self device] newTextureWithDescriptor:texDisplaySrcDesc] retain]; + _texDisplaySrcDeposterize[NDSDisplayID_Touch][0] = [[[self device] newTextureWithDescriptor:texDisplaySrcDesc] retain]; + _texDisplaySrcDeposterize[NDSDisplayID_Main][1] = [[[self device] newTextureWithDescriptor:texDisplaySrcDesc] retain]; + _texDisplaySrcDeposterize[NDSDisplayID_Touch][1] = [[[self device] newTextureWithDescriptor:texDisplaySrcDesc] retain]; + + MTLTextureDescriptor *texDisplayPixelScaleDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm + width:GPU_FRAMEBUFFER_NATIVE_WIDTH*2 + height:GPU_FRAMEBUFFER_NATIVE_HEIGHT*2 + mipmapped:NO]; + [texDisplayPixelScaleDesc setResourceOptions:MTLResourceStorageModePrivate]; + [texDisplayPixelScaleDesc setStorageMode:MTLStorageModePrivate]; + [texDisplayPixelScaleDesc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite]; + + [self setTexDisplayPixelScaleMain:[[self device] newTextureWithDescriptor:texDisplayPixelScaleDesc]]; + [self setTexDisplayPixelScaleTouch:[[self device] newTextureWithDescriptor:texDisplayPixelScaleDesc]]; + + _texDisplayOutput[NDSDisplayID_Main] = [sharedData texDisplayFetch32NativeMain]; + _texDisplayOutput[NDSDisplayID_Touch] = [sharedData texDisplayFetch32NativeTouch]; + + VideoFilter *vfMain = _cdv->GetPixelScalerObject(NDSDisplayID_Main); + [self setBufCPUFilterSrcMain:[[self device] newBufferWithBytesNoCopy:vfMain->GetSrcBufferPtr() + length:vfMain->GetSrcWidth() * vfMain->GetSrcHeight() * sizeof(uint32_t) + options:MTLResourceStorageModeManaged + deallocator:nil]]; + + [self setBufCPUFilterDstMain:[[self device] newBufferWithBytesNoCopy:vfMain->GetDstBufferPtr() + length:vfMain->GetDstWidth() * vfMain->GetDstHeight() * sizeof(uint32_t) + options:MTLResourceStorageModeManaged + deallocator:nil]]; + + VideoFilter *vfTouch = _cdv->GetPixelScalerObject(NDSDisplayID_Touch); + [self setBufCPUFilterSrcTouch:[[self device] newBufferWithBytesNoCopy:vfTouch->GetSrcBufferPtr() + length:vfTouch->GetSrcWidth() * vfTouch->GetSrcHeight() * sizeof(uint32_t) + options:MTLResourceStorageModeManaged + deallocator:nil]]; + + [self setBufCPUFilterDstTouch:[[self device] newBufferWithBytesNoCopy:vfTouch->GetDstBufferPtr() + length:vfTouch->GetDstWidth() * vfTouch->GetDstHeight() * sizeof(uint32_t) + options:MTLResourceStorageModeManaged + deallocator:nil]]; + + texHUDCharMap = nil; +} + +- (void) resizeCPUPixelScalerUsingFilterID:(const VideoFilterTypeID)filterID +{ + const VideoFilterAttributes vfAttr = VideoFilter::GetAttributesByID(filterID); + + VideoFilter *vfMain = _cdv->GetPixelScalerObject(NDSDisplayID_Main); + [self setBufCPUFilterDstMain:[[self device] newBufferWithBytesNoCopy:vfMain->GetDstBufferPtr() + length:(vfMain->GetSrcWidth() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * (vfMain->GetSrcHeight() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * sizeof(uint32_t) + options:MTLResourceStorageModeManaged + deallocator:nil]]; + + VideoFilter *vfTouch = _cdv->GetPixelScalerObject(NDSDisplayID_Touch); + [self setBufCPUFilterDstTouch:[[self device] newBufferWithBytesNoCopy:vfTouch->GetDstBufferPtr() + length:(vfTouch->GetSrcWidth() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * (vfTouch->GetSrcHeight() * vfAttr.scaleMultiply / vfAttr.scaleDivide) * sizeof(uint32_t) + options:MTLResourceStorageModeManaged + deallocator:nil]]; +} + +- (void) copyHUDFontUsingFace:(const FT_Face &)fontFace + size:(const size_t)glyphSize + tileSize:(const size_t)glyphTileSize + info:(GlyphInfo *)glyphInfo +{ + FT_Error error = FT_Err_Ok; + size_t texLevel = 0; + for (size_t tileSize = glyphTileSize; tileSize >= 4; texLevel++, tileSize >>= 1) + { } + + MTLTextureDescriptor *texHUDCharMapDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm + width:16 * glyphTileSize + height:16 * glyphTileSize + mipmapped:YES]; + [texHUDCharMapDesc setResourceOptions:MTLResourceStorageModeManaged]; + [texHUDCharMapDesc setStorageMode:MTLStorageModeManaged]; + [texHUDCharMapDesc setCpuCacheMode:MTLCPUCacheModeWriteCombined]; + [texHUDCharMapDesc setUsage:MTLTextureUsageShaderRead]; + [texHUDCharMapDesc setMipmapLevelCount:texLevel]; + + [self setTexHUDCharMap:[[self device] newTextureWithDescriptor:texHUDCharMapDesc]]; + + texLevel = 0; + for (size_t tileSize = glyphTileSize, gSize = glyphSize; tileSize >= 4; texLevel++, tileSize >>= 1, gSize = (GLfloat)tileSize * 0.75f) + { + const size_t charMapBufferPixCount = (16 * tileSize) * (16 * tileSize); + + const uint32_t fontColor = 0x00FFFFFF; + uint32_t *charMapBuffer = (uint32_t *)malloc(charMapBufferPixCount * 2 * sizeof(uint32_t)); + for (size_t i = 0; i < charMapBufferPixCount; i++) + { + charMapBuffer[i] = fontColor; + } + + error = FT_Set_Char_Size(fontFace, gSize << 6, gSize << 6, 72, 72); + if (error) + { + printf("OGLVideoOutput: FreeType failed to set the font size!\n"); + } + + const FT_GlyphSlot glyphSlot = fontFace->glyph; + + // Fill the box with a translucent black color. + for (size_t rowIndex = 0; rowIndex < tileSize; rowIndex++) + { + for (size_t pixIndex = 0; pixIndex < tileSize; pixIndex++) + { + const uint32_t colorRGBA8888 = 0x50000000; + charMapBuffer[(tileSize + pixIndex) + (rowIndex * (16 * tileSize))] = colorRGBA8888; + } + } + + // Set up the glyphs. + for (unsigned char c = 32; c < 255; c++) + { + error = FT_Load_Char(fontFace, c, FT_LOAD_RENDER); + if (error) + { + continue; + } + + const uint16_t tileOffsetX = (c & 0x0F) * tileSize; + const uint16_t tileOffsetY = (c >> 4) * tileSize; + const uint16_t tileOffsetY_texture = tileOffsetY - (tileSize - gSize + (gSize / 16)); + const uint16_t texSize = tileSize * 16; + const GLuint glyphWidth = glyphSlot->bitmap.width; + + if (tileSize == glyphTileSize) + { + GlyphInfo &gi = glyphInfo[c]; + gi.width = (c != ' ') ? glyphWidth : (GLfloat)tileSize / 5.0f; + gi.texCoord[0] = (GLfloat)tileOffsetX / (GLfloat)texSize; gi.texCoord[1] = (GLfloat)tileOffsetY / (GLfloat)texSize; + gi.texCoord[2] = (GLfloat)(tileOffsetX + glyphWidth) / (GLfloat)texSize; gi.texCoord[3] = (GLfloat)tileOffsetY / (GLfloat)texSize; + gi.texCoord[4] = (GLfloat)(tileOffsetX + glyphWidth) / (GLfloat)texSize; gi.texCoord[5] = (GLfloat)(tileOffsetY + tileSize) / (GLfloat)texSize; + gi.texCoord[6] = (GLfloat)tileOffsetX / (GLfloat)texSize; gi.texCoord[7] = (GLfloat)(tileOffsetY + tileSize) / (GLfloat)texSize; + } + + // Draw the glyph to the client-side buffer. + for (size_t rowIndex = 0; rowIndex < glyphSlot->bitmap.rows; rowIndex++) + { + for (size_t pixIndex = 0; pixIndex < glyphWidth; pixIndex++) + { + const uint32_t colorRGBA8888 = fontColor | ((uint32_t)((uint8_t *)(glyphSlot->bitmap.buffer))[pixIndex + (rowIndex * glyphWidth)] << 24); + charMapBuffer[(tileOffsetX + pixIndex) + ((tileOffsetY_texture + rowIndex + (tileSize - glyphSlot->bitmap_top)) * (16 * tileSize))] = colorRGBA8888; + } + } + } + + [[self texHUDCharMap] replaceRegion:MTLRegionMake2D(0, 0, 16 * tileSize, 16 * tileSize) + mipmapLevel:texLevel + withBytes:charMapBuffer + bytesPerRow:16 * tileSize * sizeof(uint32_t)]; + + free(charMapBuffer); + } +} + +- (void) processDisplays +{ + const NDSDisplayInfo &fetchDisplayInfo = _cdv->GetEmuDisplayInfo(); + const ClientDisplayMode mode = _cdv->GetViewProperties().mode; + const bool useDeposterize = _cdv->GetSourceDeposterize(); + + _texDisplayOutput[NDSDisplayID_Main] = (!fetchDisplayInfo.didPerformCustomRender[NDSDisplayID_Main]) ? [sharedData texDisplayFetch32NativeMain] : [sharedData texDisplayFetch32CustomMain]; + _texDisplayOutput[NDSDisplayID_Touch] = (!fetchDisplayInfo.didPerformCustomRender[NDSDisplayID_Touch]) ? [sharedData texDisplayFetch32NativeTouch] : [sharedData texDisplayFetch32CustomTouch]; + + if (useDeposterize || (_cdv->GetPixelScaler() != VideoFilterTypeID_None) || (fetchDisplayInfo.colorFormat == NDSColorFormat_BGR555_Rev)) + { + const bool willFilterOnGPU = _cdv->WillFilterOnGPU(); + const bool shouldProcessDisplay[2] = { !fetchDisplayInfo.didPerformCustomRender[NDSDisplayID_Main] && fetchDisplayInfo.isDisplayEnabled[NDSDisplayID_Main] && (mode == ClientDisplayMode_Main || mode == ClientDisplayMode_Dual), + !fetchDisplayInfo.didPerformCustomRender[NDSDisplayID_Touch] && fetchDisplayInfo.isDisplayEnabled[NDSDisplayID_Touch] && (mode == ClientDisplayMode_Touch || mode == ClientDisplayMode_Dual) }; + + VideoFilter *vfMain = _cdv->GetPixelScalerObject(NDSDisplayID_Main); + VideoFilter *vfTouch = _cdv->GetPixelScalerObject(NDSDisplayID_Touch); + + id cb = [[sharedData commandQueue] commandBufferWithUnretainedReferences]; + id cce = [cb computeCommandEncoder]; + + if (fetchDisplayInfo.colorFormat == NDSColorFormat_BGR555_Rev) + { + [sharedData convertFetch16To32UsingEncoder:cce + isMainNative:(fetchDisplayInfo.didPerformCustomRender[NDSDisplayID_Main]) ? NO : YES + isTouchNative:(fetchDisplayInfo.didPerformCustomRender[NDSDisplayID_Touch]) ? NO : YES]; + } + + // Run the video source filters and the pixel scalers + if (useDeposterize) + { + [cce setComputePipelineState:[sharedData deposterizePipeline]]; + + if (shouldProcessDisplay[NDSDisplayID_Main]) + { + [cce setTexture:[sharedData texDisplayFetch32NativeMain] atIndex:0]; + [cce setTexture:_texDisplaySrcDeposterize[NDSDisplayID_Main][0] atIndex:1]; + [cce dispatchThreadgroups:[sharedData deposterizeThreadGroupsPerGrid] + threadsPerThreadgroup:[sharedData deposterizeThreadsPerGroup]]; + + [cce setTexture:_texDisplaySrcDeposterize[NDSDisplayID_Main][0] atIndex:0]; + [cce setTexture:_texDisplaySrcDeposterize[NDSDisplayID_Main][1] atIndex:1]; + [cce dispatchThreadgroups:[sharedData deposterizeThreadGroupsPerGrid] + threadsPerThreadgroup:[sharedData deposterizeThreadsPerGroup]]; + + _texDisplayOutput[NDSDisplayID_Main] = _texDisplaySrcDeposterize[NDSDisplayID_Main][1]; + } + + if (shouldProcessDisplay[NDSDisplayID_Touch]) + { + [cce setTexture:[sharedData texDisplayFetch32NativeTouch] atIndex:0]; + [cce setTexture:_texDisplaySrcDeposterize[NDSDisplayID_Touch][0] atIndex:1]; + [cce dispatchThreadgroups:[sharedData deposterizeThreadGroupsPerGrid] + threadsPerThreadgroup:[sharedData deposterizeThreadsPerGroup]]; + + [cce setTexture:_texDisplaySrcDeposterize[NDSDisplayID_Touch][0] atIndex:0]; + [cce setTexture:_texDisplaySrcDeposterize[NDSDisplayID_Touch][1] atIndex:1]; + [cce dispatchThreadgroups:[sharedData deposterizeThreadGroupsPerGrid] + threadsPerThreadgroup:[sharedData deposterizeThreadsPerGroup]]; + + _texDisplayOutput[NDSDisplayID_Touch] = _texDisplaySrcDeposterize[NDSDisplayID_Touch][1]; + } + } + + // Run the pixel scalers. First attempt on the GPU. + if ( (_cdv->GetPixelScaler() != VideoFilterTypeID_None) && willFilterOnGPU ) + { + [cce setComputePipelineState:[self pixelScalePipeline]]; + + if (shouldProcessDisplay[NDSDisplayID_Main]) + { + [cce setTexture:_texDisplayOutput[NDSDisplayID_Main] atIndex:0]; + [cce setTexture:[self texDisplayPixelScaleMain] atIndex:1]; + [cce setTexture:[sharedData texCurrentHQnxLUT] atIndex:2]; + [cce dispatchThreadgroups:_pixelScalerThreadGroupsPerGrid threadsPerThreadgroup:_pixelScalerThreadsPerGroup]; + + _texDisplayOutput[NDSDisplayID_Main] = [self texDisplayPixelScaleMain]; + } + + if (shouldProcessDisplay[NDSDisplayID_Touch]) + { + [cce setTexture:_texDisplayOutput[NDSDisplayID_Touch] atIndex:0]; + [cce setTexture:[self texDisplayPixelScaleTouch] atIndex:1]; + [cce setTexture:[sharedData texCurrentHQnxLUT] atIndex:2]; + [cce dispatchThreadgroups:_pixelScalerThreadGroupsPerGrid threadsPerThreadgroup:_pixelScalerThreadsPerGroup]; + + _texDisplayOutput[NDSDisplayID_Touch] = [self texDisplayPixelScaleTouch]; + } + } + + [cce endEncoding]; + [cb commit]; + + // If the pixel scaler didn't run on the GPU, run the pixel scaler on the CPU after the command buffer commit. + if ( (_cdv->GetPixelScaler() != VideoFilterTypeID_None) && !willFilterOnGPU ) + { + if (useDeposterize) + { + // Hybrid CPU/GPU-based path (may cause a performance hit on pixel download) + id cpuFilterSrcCB = [[sharedData commandQueue] commandBufferWithUnretainedReferences]; + id bce = [cpuFilterSrcCB blitCommandEncoder]; + + if (shouldProcessDisplay[NDSDisplayID_Main]) + { + [bce copyFromTexture:_texDisplaySrcDeposterize[NDSDisplayID_Main][1] + sourceSlice:0 + sourceLevel:0 + sourceOrigin:MTLOriginMake(0, 0, 0) + sourceSize:MTLSizeMake(vfMain->GetSrcWidth(), vfMain->GetSrcHeight(), 1) + toBuffer:[self bufCPUFilterSrcMain] + destinationOffset:0 + destinationBytesPerRow:vfMain->GetSrcWidth() * sizeof(uint32_t) + destinationBytesPerImage:vfMain->GetSrcWidth() * vfMain->GetSrcHeight() * sizeof(uint32_t)]; + + [bce synchronizeResource:[self bufCPUFilterSrcMain]]; + } + + if (shouldProcessDisplay[NDSDisplayID_Touch]) + { + [bce copyFromTexture:_texDisplaySrcDeposterize[NDSDisplayID_Touch][1] + sourceSlice:0 + sourceLevel:0 + sourceOrigin:MTLOriginMake(0, 0, 0) + sourceSize:MTLSizeMake(vfTouch->GetSrcWidth(), vfTouch->GetSrcHeight(), 1) + toBuffer:[self bufCPUFilterSrcTouch] + destinationOffset:0 + destinationBytesPerRow:vfTouch->GetSrcWidth() * sizeof(uint32_t) + destinationBytesPerImage:vfTouch->GetSrcWidth() * vfTouch->GetSrcHeight() * sizeof(uint32_t)]; + + [bce synchronizeResource:[self bufCPUFilterSrcTouch]]; + } + + [bce endEncoding]; + [cpuFilterSrcCB commit]; + } + + pthread_mutex_lock(_cdv->GetMutexProcessPtr()); + + if (shouldProcessDisplay[NDSDisplayID_Main]) + { + vfMain->RunFilter(); + } + + if (shouldProcessDisplay[NDSDisplayID_Touch]) + { + vfTouch->RunFilter(); + } + + id cpuFilterDstCB = [[sharedData commandQueue] commandBufferWithUnretainedReferences]; + id bce = [cpuFilterDstCB blitCommandEncoder]; + + if (shouldProcessDisplay[NDSDisplayID_Main]) + { + [[self bufCPUFilterDstMain] didModifyRange:NSMakeRange(0, vfMain->GetDstWidth() * vfMain->GetDstHeight() * sizeof(uint32_t))]; + + [bce copyFromBuffer:[self bufCPUFilterDstMain] + sourceOffset:0 + sourceBytesPerRow:vfMain->GetDstWidth() * sizeof(uint32_t) + sourceBytesPerImage:vfMain->GetDstWidth() * vfMain->GetDstHeight() * sizeof(uint32_t) + sourceSize:MTLSizeMake(vfMain->GetDstWidth(), vfMain->GetDstHeight(), 1) + toTexture:[self texDisplayPixelScaleMain] + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + + _texDisplayOutput[NDSDisplayID_Main] = [self texDisplayPixelScaleMain]; + } + + if (shouldProcessDisplay[NDSDisplayID_Touch]) + { + [[self bufCPUFilterDstTouch] didModifyRange:NSMakeRange(0, vfTouch->GetDstWidth() * vfTouch->GetDstHeight() * sizeof(uint32_t))]; + + [bce copyFromBuffer:[self bufCPUFilterDstTouch] + sourceOffset:0 + sourceBytesPerRow:vfTouch->GetDstWidth() * sizeof(uint32_t) + sourceBytesPerImage:vfTouch->GetDstWidth() * vfTouch->GetDstHeight() * sizeof(uint32_t) + sourceSize:MTLSizeMake(vfTouch->GetDstWidth(), vfTouch->GetDstHeight(), 1) + toTexture:[self texDisplayPixelScaleTouch] + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + + _texDisplayOutput[NDSDisplayID_Touch] = [self texDisplayPixelScaleTouch]; + } + + [bce endEncoding]; + [cpuFilterDstCB commit]; + + pthread_mutex_unlock(_cdv->GetMutexProcessPtr()); + } + } + + // Update the texture coordinates + _cdv->SetScreenTextureCoordinates((float)[_texDisplayOutput[NDSDisplayID_Main] width], (float)[_texDisplayOutput[NDSDisplayID_Main] height], + (float)[_texDisplayOutput[NDSDisplayID_Touch] width], (float)[_texDisplayOutput[NDSDisplayID_Touch] height], + (float *)[_displayTexCoordBuffer contents]); + [_displayTexCoordBuffer didModifyRange:NSMakeRange(0, sizeof(float) * (4 * 8))]; +} + +- (void) renderToDrawable +{ + NSAutoreleasePool *renderAutoreleasePool = [[NSAutoreleasePool alloc] init]; + + id drawable = [self nextDrawable]; + id texture = [drawable texture]; + if (texture == nil) + { + [renderAutoreleasePool release]; + return; + } + + const NDSDisplayInfo &displayInfo = _cdv->GetEmuDisplayInfo(); + + dispatch_semaphore_wait(availableResources, DISPATCH_TIME_FOREVER); + + [[self colorAttachment0Desc] setTexture:texture]; + + id cb = [[sharedData commandQueue] commandBufferWithUnretainedReferences]; + id ce = [cb renderCommandEncoderWithDescriptor:_outputRenderPassDesc]; + + // Set up the view properties. + BOOL didChangeViewProperties = NO; + + if ([self needsViewportUpdate]) + { + MTLViewport newViewport; + newViewport.originX = 0.0; + newViewport.originY = 0.0; + newViewport.width = _cdv->GetViewProperties().clientWidth; + newViewport.height = _cdv->GetViewProperties().clientHeight; + newViewport.znear = 0.0; + newViewport.zfar = 1.0; + [ce setViewport:newViewport]; + + DisplayViewShaderProperties *viewProps = (DisplayViewShaderProperties *)[_cdvPropertiesBuffer contents]; + viewProps->width = _cdv->GetViewProperties().clientWidth; + viewProps->height = _cdv->GetViewProperties().clientHeight; + didChangeViewProperties = YES; + + [self setNeedsViewportUpdate:NO]; + } + + if ([self needsRotationScaleUpdate]) + { + DisplayViewShaderProperties *viewProps = (DisplayViewShaderProperties *)[_cdvPropertiesBuffer contents]; + viewProps->rotation = _cdv->GetViewProperties().rotation; + viewProps->viewScale = _cdv->GetViewProperties().viewScale; + viewProps->lowerHUDMipMapLevel = ( ((float)HUD_TEXTBOX_BASE_SCALE * _cdv->GetHUDObjectScale() / _cdv->GetScaleFactor()) >= (2.0/3.0) ) ? 0 : 1; + didChangeViewProperties = YES; + + [self setNeedsRotationScaleUpdate:NO]; + } + + if (didChangeViewProperties) + { + [_cdvPropertiesBuffer didModifyRange:NSMakeRange(0, sizeof(DisplayViewShaderProperties))]; + } + + // Draw the NDS displays. + if ([self needsScreenVerticesUpdate]) + { + _cdv->SetScreenVertices((float *)[_displayVtxPositionBuffer contents]); + [_displayVtxPositionBuffer didModifyRange:NSMakeRange(0, sizeof(float) * (4 * 8))]; + + [self setNeedsScreenVerticesUpdate:NO]; + } + + [ce setRenderPipelineState:[self displayOutputPipeline]]; + [ce setVertexBuffer:_displayVtxPositionBuffer offset:0 atIndex:0]; + [ce setVertexBuffer:_displayTexCoordBuffer offset:0 atIndex:1]; + [ce setVertexBuffer:_cdvPropertiesBuffer offset:0 atIndex:2]; + + switch (_cdv->GetViewProperties().mode) + { + case ClientDisplayMode_Main: + { + if (displayInfo.isDisplayEnabled[NDSDisplayID_Main]) + { + [ce setFragmentTexture:_texDisplayOutput[NDSDisplayID_Main] atIndex:0]; + [ce drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4]; + } + break; + } + + case ClientDisplayMode_Touch: + { + if (displayInfo.isDisplayEnabled[NDSDisplayID_Touch]) + { + [ce setFragmentTexture:_texDisplayOutput[NDSDisplayID_Touch] atIndex:0]; + [ce drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:4 vertexCount:4]; + } + break; + } + + case ClientDisplayMode_Dual: + { + const NDSDisplayID majorDisplayID = (_cdv->GetViewProperties().order == ClientDisplayOrder_MainFirst) ? NDSDisplayID_Main : NDSDisplayID_Touch; + const size_t majorDisplayVtx = (_cdv->GetViewProperties().order == ClientDisplayOrder_MainFirst) ? 8 : 12; + + switch (_cdv->GetViewProperties().layout) + { + case ClientDisplayLayout_Hybrid_2_1: + case ClientDisplayLayout_Hybrid_16_9: + case ClientDisplayLayout_Hybrid_16_10: + { + if (displayInfo.isDisplayEnabled[majorDisplayID]) + { + [ce setFragmentTexture:_texDisplayOutput[majorDisplayID] atIndex:0]; + [ce drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:majorDisplayVtx vertexCount:4]; + } + break; + } + + default: + break; + } + + if (displayInfo.isDisplayEnabled[NDSDisplayID_Main]) + { + [ce setFragmentTexture:_texDisplayOutput[NDSDisplayID_Main] atIndex:0]; + [ce drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4]; + } + + if (displayInfo.isDisplayEnabled[NDSDisplayID_Touch]) + { + [ce setFragmentTexture:_texDisplayOutput[NDSDisplayID_Touch] atIndex:0]; + [ce drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:4 vertexCount:4]; + } + } + + default: + break; + } + + // Draw the HUD. + const size_t hudLength = _cdv->GetHUDString().length(); + if ( _cdv->GetHUDVisibility() && (hudLength > 1) && ([self texHUDCharMap] != nil) ) + { + if (_cdv->HUDNeedsUpdate()) + { + _cdv->SetHUDVertices((float)_cdv->GetViewProperties().clientWidth, (float)_cdv->GetViewProperties().clientHeight, (float *)[_hudVtxPositionBuffer contents]); + [_hudVtxPositionBuffer didModifyRange:NSMakeRange(0, sizeof(float) * hudLength * 8)]; + + _cdv->SetHUDTextureCoordinates((float *)[_hudTexCoordBuffer contents]); + [_hudTexCoordBuffer didModifyRange:NSMakeRange(0, sizeof(float) * hudLength * 8)]; + + _cdv->ClearHUDNeedsUpdate(); + } + + [ce setRenderPipelineState:[sharedData hudPipeline]]; + [ce setVertexBuffer:_hudVtxPositionBuffer offset:0 atIndex:0]; + [ce setVertexBuffer:_hudTexCoordBuffer offset:0 atIndex:1]; + [ce setVertexBuffer:_cdvPropertiesBuffer offset:0 atIndex:2]; + [ce setFragmentTexture:[self texHUDCharMap] atIndex:0]; + + // First, draw the backing text box. + [ce setFragmentSamplerState:[sharedData samplerHUDBox] atIndex:0]; + [ce drawIndexedPrimitives:MTLPrimitiveTypeTriangle + indexCount:6 + indexType:MTLIndexTypeUInt16 + indexBuffer:[sharedData hudIndexBuffer] + indexBufferOffset:0]; + + // Next, draw each character inside the box. + [ce setFragmentSamplerState:[sharedData samplerHUDText] atIndex:0]; + [ce drawIndexedPrimitives:MTLPrimitiveTypeTriangle + indexCount:(hudLength - 1) * 6 + indexType:MTLIndexTypeUInt16 + indexBuffer:[sharedData hudIndexBuffer] + indexBufferOffset:6 * sizeof(uint16_t)]; + } + + [ce endEncoding]; + + [cb presentDrawable:drawable]; + [cb addCompletedHandler:^(id block) { + dispatch_semaphore_signal(availableResources); + }]; + + [cb commit]; + + [renderAutoreleasePool release]; +} + +@end + +#pragma mark - + +MacMetalFetchObject::MacMetalFetchObject() +{ + _useCPUFilterPipeline = true; + + pthread_rwlock_init(&_srcCloneRWLock[NDSDisplayID_Main][0], NULL); + pthread_rwlock_init(&_srcCloneRWLock[NDSDisplayID_Touch][0], NULL); + pthread_rwlock_init(&_srcCloneRWLock[NDSDisplayID_Main][1], NULL); + pthread_rwlock_init(&_srcCloneRWLock[NDSDisplayID_Touch][1], NULL); + + _srcNativeCloneMaster = (uint32_t *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2 * 2 * sizeof(uint32_t)); + _srcNativeClone[NDSDisplayID_Main][0] = _srcNativeCloneMaster + (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 0); + _srcNativeClone[NDSDisplayID_Touch][0] = _srcNativeCloneMaster + (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 1); + _srcNativeClone[NDSDisplayID_Main][1] = _srcNativeCloneMaster + (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); + _srcNativeClone[NDSDisplayID_Touch][1] = _srcNativeCloneMaster + (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 3); + memset(_srcNativeCloneMaster, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2 * sizeof(uint32_t)); + + _clientData = [[MetalDisplayViewSharedData alloc] init]; +} + +MacMetalFetchObject::~MacMetalFetchObject() +{ + [(MetalDisplayViewSharedData *)this->_clientData release]; + + pthread_rwlock_wrlock(&this->_srcCloneRWLock[NDSDisplayID_Main][0]); + pthread_rwlock_wrlock(&this->_srcCloneRWLock[NDSDisplayID_Touch][0]); + pthread_rwlock_wrlock(&this->_srcCloneRWLock[NDSDisplayID_Main][1]); + pthread_rwlock_wrlock(&this->_srcCloneRWLock[NDSDisplayID_Touch][1]); + free_aligned(this->_srcNativeCloneMaster); + this->_srcNativeCloneMaster = NULL; + this->_srcNativeClone[NDSDisplayID_Main][0] = NULL; + this->_srcNativeClone[NDSDisplayID_Touch][0] = NULL; + this->_srcNativeClone[NDSDisplayID_Main][1] = NULL; + this->_srcNativeClone[NDSDisplayID_Touch][1] = NULL; + pthread_rwlock_unlock(&this->_srcCloneRWLock[NDSDisplayID_Touch][1]); + pthread_rwlock_unlock(&this->_srcCloneRWLock[NDSDisplayID_Main][1]); + pthread_rwlock_unlock(&this->_srcCloneRWLock[NDSDisplayID_Touch][0]); + pthread_rwlock_unlock(&this->_srcCloneRWLock[NDSDisplayID_Main][0]); + + pthread_rwlock_destroy(&this->_srcCloneRWLock[NDSDisplayID_Main][0]); + pthread_rwlock_destroy(&this->_srcCloneRWLock[NDSDisplayID_Touch][0]); + pthread_rwlock_destroy(&this->_srcCloneRWLock[NDSDisplayID_Main][1]); + pthread_rwlock_destroy(&this->_srcCloneRWLock[NDSDisplayID_Touch][1]); +} + +void MacMetalFetchObject::Init() +{ + [(MacClientSharedObject *)this->_clientData setGPUFetchObject:this]; +} + +void MacMetalFetchObject::CopyFromSrcClone(uint32_t *dstBufferPtr, const NDSDisplayID displayID, const u8 bufferIndex) +{ + pthread_rwlock_rdlock(&this->_srcCloneRWLock[displayID][bufferIndex]); + memcpy(dstBufferPtr, this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(uint32_t)); + pthread_rwlock_unlock(&this->_srcCloneRWLock[displayID][bufferIndex]); +} + +void MacMetalFetchObject::SetFetchBuffers(const NDSDisplayInfo ¤tDisplayInfo) +{ + [(MetalDisplayViewSharedData *)this->_clientData setFetchBuffersWithDisplayInfo:currentDisplayInfo]; +} + +void MacMetalFetchObject::FetchFromBufferIndex(const u8 index) +{ + MacClientSharedObject *sharedViewObject = (MacClientSharedObject *)this->_clientData; + this->_useCPUFilterPipeline = ([sharedViewObject numberViewsUsingCPUFiltering] > 0); + + [(MetalDisplayViewSharedData *)this->_clientData fetchFromBufferIndex:index]; +} + +void MacMetalFetchObject::_FetchNativeDisplayByID(const NDSDisplayID displayID, const u8 bufferIndex) +{ + if (this->_useCPUFilterPipeline) + { + pthread_rwlock_wrlock(&this->_srcCloneRWLock[displayID][bufferIndex]); + + if (_fetchDisplayInfo[bufferIndex].pixelBytes == 2) + { + ColorspaceConvertBuffer555To8888Opaque((const uint16_t *)_fetchDisplayInfo[bufferIndex].nativeBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + } + else + { + ColorspaceConvertBuffer888XTo8888Opaque((const uint32_t *)_fetchDisplayInfo[bufferIndex].nativeBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + } + + pthread_rwlock_unlock(&this->_srcCloneRWLock[displayID][bufferIndex]); + } + + [(MetalDisplayViewSharedData *)this->_clientData fetchNativeDisplayByID:displayID bufferIndex:bufferIndex]; +} + +void MacMetalFetchObject::_FetchCustomDisplayByID(const NDSDisplayID displayID, const u8 bufferIndex) +{ + [(MetalDisplayViewSharedData *)this->_clientData fetchCustomDisplayByID:displayID bufferIndex:bufferIndex]; +} + +#pragma mark - + +MacMetalDisplayView::MacMetalDisplayView() +{ + _allowViewUpdates = false; + _canFilterOnGPU = true; + _filtersPreferGPU = true; + _willFilterOnGPU = true; + + _mutexProcessPtr = (pthread_mutex_t *)malloc(sizeof(pthread_mutex_t)); + pthread_mutex_init(_mutexProcessPtr, NULL); +} + +MacMetalDisplayView::~MacMetalDisplayView() +{ + pthread_mutex_destroy(this->_mutexProcessPtr); + free(this->_mutexProcessPtr); +} + +void MacMetalDisplayView::_UpdateNormalSize() +{ + [(DisplayViewMetalLayer *)this->GetFrontendLayer() setNeedsScreenVerticesUpdate:YES]; +} + +void MacMetalDisplayView::_UpdateOrder() +{ + [(DisplayViewMetalLayer *)this->GetFrontendLayer() setNeedsScreenVerticesUpdate:YES]; +} + +void MacMetalDisplayView::_UpdateRotation() +{ + [(DisplayViewMetalLayer *)this->GetFrontendLayer() setNeedsRotationScaleUpdate:YES]; +} + +void MacMetalDisplayView::_UpdateClientSize() +{ + [(DisplayViewMetalLayer *)this->GetFrontendLayer() setNeedsViewportUpdate:YES]; + [(DisplayViewMetalLayer *)this->GetFrontendLayer() setNeedsHUDVerticesUpdate:YES]; + this->ClientDisplay3DView::_UpdateClientSize(); +} + +void MacMetalDisplayView::_UpdateViewScale() +{ + this->ClientDisplay3DView::_UpdateViewScale(); + [(DisplayViewMetalLayer *)this->GetFrontendLayer() setNeedsRotationScaleUpdate:YES]; +} + +void MacMetalDisplayView::_LoadNativeDisplayByID(const NDSDisplayID displayID) +{ + if ((this->GetPixelScaler() != VideoFilterTypeID_None) && !this->WillFilterOnGPU() && !this->GetSourceDeposterize()) + { + MacMetalFetchObject &fetchObjMutable = (MacMetalFetchObject &)this->GetFetchObject(); + VideoFilter *vf = this->GetPixelScalerObject(displayID); + + fetchObjMutable.CopyFromSrcClone(vf->GetSrcBufferPtr(), displayID, this->GetEmuDisplayInfo().bufferIndex); + } +} + +void MacMetalDisplayView::_ResizeCPUPixelScaler(const VideoFilterTypeID filterID) +{ + this->ClientDisplay3DView::_ResizeCPUPixelScaler(filterID); + [(DisplayViewMetalLayer *)this->GetFrontendLayer() resizeCPUPixelScalerUsingFilterID:filterID]; +} + +pthread_mutex_t* MacMetalDisplayView::GetMutexProcessPtr() const +{ + return this->_mutexProcessPtr; +} + +void MacMetalDisplayView::Init() +{ + [(DisplayViewMetalLayer *)this->GetFrontendLayer() setupLayer]; +} + +void MacMetalDisplayView::CopyHUDFont(const FT_Face &fontFace, const size_t glyphSize, const size_t glyphTileSize, GlyphInfo *glyphInfo) +{ + [(DisplayViewMetalLayer *)this->GetFrontendLayer() copyHUDFontUsingFace:fontFace size:glyphSize tileSize:glyphTileSize info:glyphInfo]; +} + +// NDS screen filters +void MacMetalDisplayView::SetPixelScaler(const VideoFilterTypeID filterID) +{ + pthread_mutex_lock(this->_mutexProcessPtr); + + this->ClientDisplay3DView::SetPixelScaler(filterID); + [(DisplayViewMetalLayer *)this->GetFrontendLayer() setPixelScaler:this->_pixelScaler]; + this->_willFilterOnGPU = (this->GetFiltersPreferGPU()) ? ([(DisplayViewMetalLayer *)this->GetFrontendLayer() pixelScalePipeline] != nil) : false; + + pthread_mutex_unlock(this->_mutexProcessPtr); +} + +void MacMetalDisplayView::SetOutputFilter(const OutputFilterTypeID filterID) +{ + this->ClientDisplay3DView::SetOutputFilter(filterID); + [(DisplayViewMetalLayer *)this->GetFrontendLayer() setOutputFilter:filterID]; +} + +void MacMetalDisplayView::SetFiltersPreferGPU(const bool preferGPU) +{ + pthread_mutex_lock(this->_mutexProcessPtr); + + this->_filtersPreferGPU = preferGPU; + this->_willFilterOnGPU = (preferGPU) ? ([(DisplayViewMetalLayer *)this->GetFrontendLayer() pixelScalePipeline] != nil) : false; + + pthread_mutex_unlock(this->_mutexProcessPtr); +} + +// NDS GPU interface +void MacMetalDisplayView::ProcessDisplays() +{ + [(DisplayViewMetalLayer *)this->GetFrontendLayer() processDisplays]; +} + +void MacMetalDisplayView::UpdateView() +{ + if (this->_allowViewUpdates) + { + [(DisplayViewMetalLayer *)this->GetFrontendLayer() renderToDrawable]; + } +} + +#pragma mark - +void SetupHQnxLUTs_Metal(id &device, id &texLQ2xLUT, id &texHQ2xLUT, id &texHQ3xLUT, id &texHQ4xLUT) +{ + MTLTextureDescriptor *texHQ2xLUTDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatBGRA8Unorm + width:256 * 2 + height:4 + mipmapped:NO]; + + [texHQ2xLUTDesc setTextureType:MTLTextureType3D]; + [texHQ2xLUTDesc setDepth:16]; + [texHQ2xLUTDesc setResourceOptions:MTLResourceStorageModeManaged]; + [texHQ2xLUTDesc setStorageMode:MTLStorageModeManaged]; + [texHQ2xLUTDesc setCpuCacheMode:MTLCPUCacheModeWriteCombined]; + [texHQ2xLUTDesc setUsage:MTLTextureUsageShaderRead]; + + MTLTextureDescriptor *texHQ3xLUTDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatBGRA8Unorm + width:256 * 2 + height:9 + mipmapped:NO]; + [texHQ3xLUTDesc setTextureType:MTLTextureType3D]; + [texHQ3xLUTDesc setDepth:16]; + [texHQ3xLUTDesc setResourceOptions:MTLResourceStorageModeManaged]; + [texHQ3xLUTDesc setStorageMode:MTLStorageModeManaged]; + [texHQ3xLUTDesc setCpuCacheMode:MTLCPUCacheModeWriteCombined]; + [texHQ3xLUTDesc setUsage:MTLTextureUsageShaderRead]; + + MTLTextureDescriptor *texHQ4xLUTDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatBGRA8Unorm + width:256 * 2 + height:16 + mipmapped:NO]; + [texHQ4xLUTDesc setTextureType:MTLTextureType3D]; + [texHQ4xLUTDesc setDepth:16]; + [texHQ4xLUTDesc setResourceOptions:MTLResourceStorageModeManaged]; + [texHQ4xLUTDesc setStorageMode:MTLStorageModeManaged]; + [texHQ4xLUTDesc setCpuCacheMode:MTLCPUCacheModeWriteCombined]; + [texHQ4xLUTDesc setUsage:MTLTextureUsageShaderRead]; + + texLQ2xLUT = [[device newTextureWithDescriptor:texHQ2xLUTDesc] retain]; + texHQ2xLUT = [[device newTextureWithDescriptor:texHQ2xLUTDesc] retain]; + texHQ3xLUT = [[device newTextureWithDescriptor:texHQ3xLUTDesc] retain]; + texHQ4xLUT = [[device newTextureWithDescriptor:texHQ4xLUTDesc] retain]; + + InitHQnxLUTs(); + [texLQ2xLUT replaceRegion:MTLRegionMake3D(0, 0, 0, 256 * 2, 4, 16) + mipmapLevel:0 + slice:0 + withBytes:_LQ2xLUT + bytesPerRow:256 * 2 * sizeof(uint32_t) + bytesPerImage:256 * 2 * 4 * sizeof(uint32_t)]; + + [texHQ2xLUT replaceRegion:MTLRegionMake3D(0, 0, 0, 256 * 2, 4, 16) + mipmapLevel:0 + slice:0 + withBytes:_HQ2xLUT + bytesPerRow:256 * 2 * sizeof(uint32_t) + bytesPerImage:256 * 2 * 4 * sizeof(uint32_t)]; + + [texHQ3xLUT replaceRegion:MTLRegionMake3D(0, 0, 0, 256 * 2, 9, 16) + mipmapLevel:0 + slice:0 + withBytes:_HQ3xLUT + bytesPerRow:256 * 2 * sizeof(uint32_t) + bytesPerImage:256 * 2 * 9 * sizeof(uint32_t)]; + + [texHQ4xLUT replaceRegion:MTLRegionMake3D(0, 0, 0, 256 * 2, 16, 16) + mipmapLevel:0 + slice:0 + withBytes:_HQ4xLUT + bytesPerRow:256 * 2 * sizeof(uint32_t) + bytesPerImage:256 * 2 * 16 * sizeof(uint32_t)]; +} + +void DeleteHQnxLUTs_Metal(id &texLQ2xLUT, id &texHQ2xLUT, id &texHQ3xLUT, id &texHQ4xLUT) +{ + [texLQ2xLUT release]; + [texHQ2xLUT release]; + [texHQ3xLUT release]; + [texHQ4xLUT release]; +} diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayViewShaders.metal b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayViewShaders.metal new file mode 100644 index 000000000..9848d0422 --- /dev/null +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayViewShaders.metal @@ -0,0 +1,2764 @@ +/* + Copyright (C) 2017 DeSmuME team + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the this software. If not, see . + */ + +#include +using namespace metal; + +#define LANCZOS_FIX(c) max(abs(c), 1e-5) + +struct HUDVtx +{ + float4 position [[position]]; + float2 texCoord; + bool isBox; + bool lowerHUDMipMapLevel; +}; + +struct DisplayVtx +{ + float4 position [[position]]; + float2 texCoord; +}; + +struct DisplayViewShaderProperties +{ + float width; + float height; + float rotation; + float viewScale; + uint lowerHUDMipMapLevel; +}; + +float reduce(const float3 color); +float4 unpack_unorm1555_to_unorm8888(const ushort color16); +float3 color_interpolate_LTE(const float3 pixA, const float3 pixB, const float3 threshold); +float4 bicubic_weight_bspline(const float x); +float4 bicubic_weight_mitchell_netravali(const float x); +float4 bicubic_weight_lanczos2(const float x); +float3 bicubic_weight_lanczos3(const float x); +float dist_EPXPlus(const float3 pixA, const float3 pixB); +float GetEagleResult(const float v1, const float v2, const float v3, const float v4); +float3 Lerp(const float3 weight, const float3 p1, const float3 p2, const float3 p3); +bool InterpDiff(const float3 p1, const float3 p2); +float DistYCbCr(const float3 pixA, const float3 pixB); +bool IsPixEqual(const float3 pixA, const float3 pixB); +bool IsBlendingNeeded(const int4 blend); + +constexpr sampler genSampler = sampler(coord::pixel, address::clamp_to_edge, filter::nearest); +constexpr sampler outputSamplerBilinear = sampler(coord::pixel, address::clamp_to_edge, filter::linear); + +float reduce(const float3 color) +{ + return dot(color, float3(65536.0f, 256.0f, 1.0f)); +} + +float GetEagleResult(const float v1, const float v2, const float v3, const float v4) +{ + return sign(abs(v1-v3)+abs(v1-v4))-sign(abs(v2-v3)+abs(v2-v4)); +} + +float3 Lerp(const float3 weight, const float3 p1, const float3 p2, const float3 p3) +{ + return p1*weight.r + p2*weight.g + p3*weight.b; +} + +bool InterpDiff(const float3 p1, const float3 p2) +{ + const float3 diff = p1 - p2; + float3 yuv = float3( diff.r + diff.g + diff.b, + diff.r - diff.b, + -diff.r + (2.0f*diff.g) - diff.b ); + yuv = abs(yuv); + + return any( yuv > float3(192.0f/255.0f, 28.0f/255.0f, 48.0f/255.0f) ); +} + +float4 unpack_unorm1555_to_unorm8888(const ushort color16) +{ + return float4((float)((color16 >> 0) & 0x1F) / 31.0f, + (float)((color16 >> 5) & 0x1F) / 31.0f, + (float)((color16 >> 10) & 0x1F) / 31.0f, + (float)(color16 >> 16)); +} + +float3 color_interpolate_LTE(const float3 pixA, const float3 pixB, const float3 threshold) +{ + const float3 interpPix = mix(pixA, pixB, 0.5f); + const float3 pixCompare = float3( abs(pixB - pixA) <= threshold ); + + return mix(pixA, interpPix, pixCompare); +} + +float4 bicubic_weight_bspline(const float x) +{ + return float4( ((1.0f-x)*(1.0f-x)*(1.0f-x)) / 6.0f, + (4.0f - 6.0f*x*x + 3.0f*x*x*x) / 6.0f, + (1.0f + 3.0f*x + 3.0f*x*x - 3.0f*x*x*x) / 6.0f, + x*x*x / 6.0f ); +} + +float4 bicubic_weight_mitchell_netravali(const float x) +{ + return float4( (1.0f - 9.0f*x + 15.0f*x*x - 7.0f*x*x*x) / 18.0f, + (16.0f - 36.0f*x*x + 21.0f*x*x*x) / 18.0f, + (1.0f + 9.0f*x + 27.0f*x*x - 21.0f*x*x*x) / 18.0f, + (7.0f*x*x*x - 6.0f*x*x) / 18.0f ); +} + +float4 bicubic_weight_lanczos2(const float x) +{ + constexpr float RADIUS = 2.0f; + const float4 sample = LANCZOS_FIX(M_PI_F * float4(1.0f + x, x, 1.0f - x, 2.0f - x)); + return ( sin(sample) * sin(sample / RADIUS) / (sample * sample) ); +} + +float3 bicubic_weight_lanczos3(const float x) +{ + constexpr float RADIUS = 3.0f; + const float3 sample = LANCZOS_FIX(2.0f * M_PI_F * float3(x - 1.5f, x - 0.5f, x + 0.5f)); + return ( sin(sample) * sin(sample / RADIUS) / (sample * sample) ); +} + +float dist_EPXPlus(const float3 pixA, const float3 pixB) +{ + return dot(abs(pixA - pixB), float3(2.0f, 3.0f, 3.0f)); +} + +#pragma mark HUD Shader Functions + +vertex HUDVtx hud_vertex(const device float2 *inPosition [[buffer(0)]], + const device float2 *inTexCoord [[buffer(1)]], + const constant DisplayViewShaderProperties &viewProps [[buffer(2)]], + const uint vid [[vertex_id]]) +{ + const float2x2 projection = float2x2( float2(2.0/viewProps.width, 0.0), + float2( 0.0, 2.0/viewProps.height)); + + HUDVtx outVtx; + outVtx.position = float4(projection * inPosition[vid], 0.0, 1.0); + outVtx.texCoord = inTexCoord[vid]; + outVtx.isBox = (vid < 4); + outVtx.lowerHUDMipMapLevel = (viewProps.lowerHUDMipMapLevel == 1); + + return outVtx; +} + +fragment float4 hud_fragment(const HUDVtx vtx [[stage_in]], + const texture2d tex [[texture(0)]], + const sampler samp [[sampler(0)]]) +{ + return tex.sample(samp, vtx.texCoord, (vtx.lowerHUDMipMapLevel) ? level(-0.50f) : level(0.00f)); +} + +#pragma mark Output Filters + +vertex DisplayVtx display_output_vertex(const device float2 *inPosition [[buffer(0)]], + const device float2 *inTexCoord [[buffer(1)]], + const constant DisplayViewShaderProperties &viewProps [[buffer(2)]], + const uint vid [[vertex_id]]) +{ + const float angleRadians = viewProps.rotation * (M_PI_F/180.0); + + const float2x2 projection = float2x2( float2(2.0/viewProps.width, 0.0), + float2( 0.0, 2.0/viewProps.height)); + + const float2x2 rotation = float2x2( float2( cos(angleRadians), sin(angleRadians)), + float2(-sin(angleRadians), cos(angleRadians))); + + const float2x2 scale = float2x2( float2(viewProps.viewScale, 0.0), + float2( 0.0, viewProps.viewScale)); + + DisplayVtx outVtx; + outVtx.position = float4(projection * rotation * scale * inPosition[vid], 0.0, 1.0); + outVtx.texCoord = inTexCoord[vid]; + + return outVtx; +} + +vertex DisplayVtx display_output_bicubic_vertex(const device float2 *inPosition [[buffer(0)]], + const device float2 *inTexCoord [[buffer(1)]], + const constant DisplayViewShaderProperties &viewProps [[buffer(2)]], + const uint vid [[vertex_id]]) +{ + const float angleRadians = viewProps.rotation * (M_PI_F/180.0); + + const float2x2 projection = float2x2( float2(2.0/viewProps.width, 0.0), + float2( 0.0, 2.0/viewProps.height)); + + const float2x2 rotation = float2x2( float2( cos(angleRadians), sin(angleRadians)), + float2(-sin(angleRadians), cos(angleRadians))); + + const float2x2 scale = float2x2( float2(viewProps.viewScale, 0.0), + float2( 0.0, viewProps.viewScale)); + + DisplayVtx outVtx; + outVtx.position = float4(projection * rotation * scale * inPosition[vid], 0.0, 1.0); + outVtx.texCoord = floor(inTexCoord[vid] - 0.5f) + 0.5f; + + return outVtx; +} + +//--------------------------------------- +// Input Pixel Mapping: 00 +fragment float4 output_filter_nearest(const DisplayVtx vtx [[stage_in]], const texture2d tex [[texture(0)]]) +{ + return tex.sample(genSampler, vtx.texCoord); +} + +//--------------------------------------- +// Input Pixel Mapping: 00|01 +// 02|03 +fragment float4 output_filter_bilinear(const DisplayVtx vtx [[stage_in]], const texture2d tex [[texture(0)]]) +{ + return tex.sample(outputSamplerBilinear, vtx.texCoord); +} + +//--------------------------------------- +// Input Pixel Mapping: 00|01|02|03 +// 04|05|06|07 +// 08|09|10|11 +// 12|13|14|15 +fragment float4 output_filter_bicubic_bspline(const DisplayVtx vtx [[stage_in]], const texture2d tex [[texture(0)]]) +{ + float2 f = fract(vtx.texCoord); + float4 wx = bicubic_weight_bspline(f.x); + float4 wy = bicubic_weight_bspline(f.y); + + // Normalize weights + wx /= dot(wx, float4(1.0f)); + wy /= dot(wy, float4(1.0f)); + + float4 outFragment = (tex.sample(genSampler, vtx.texCoord, int2(-1,-1)) * wx.r + + tex.sample(genSampler, vtx.texCoord, int2( 0,-1)) * wx.g + + tex.sample(genSampler, vtx.texCoord, int2( 1,-1)) * wx.b + + tex.sample(genSampler, vtx.texCoord, int2( 2,-1)) * wx.a) * wy.r + + (tex.sample(genSampler, vtx.texCoord, int2(-1, 0)) * wx.r + + tex.sample(genSampler, vtx.texCoord, int2( 0, 0)) * wx.g + + tex.sample(genSampler, vtx.texCoord, int2( 1, 0)) * wx.b + + tex.sample(genSampler, vtx.texCoord, int2( 2, 0)) * wx.a) * wy.g + + (tex.sample(genSampler, vtx.texCoord, int2(-1, 1)) * wx.r + + tex.sample(genSampler, vtx.texCoord, int2( 0, 1)) * wx.g + + tex.sample(genSampler, vtx.texCoord, int2( 1, 1)) * wx.b + + tex.sample(genSampler, vtx.texCoord, int2( 2, 1)) * wx.a) * wy.b + + (tex.sample(genSampler, vtx.texCoord, int2(-1, 2)) * wx.r + + tex.sample(genSampler, vtx.texCoord, int2( 0, 2)) * wx.g + + tex.sample(genSampler, vtx.texCoord, int2( 1, 2)) * wx.b + + tex.sample(genSampler, vtx.texCoord, int2( 2, 2)) * wx.a) * wy.a; + + return float4(outFragment.rgb, 1.0f); +} + +//--------------------------------------- +// Input Pixel Mapping: 00|01|02|03 +// 04|05|06|07 +// 08|09|10|11 +// 12|13|14|15 +fragment float4 output_filter_bicubic_mitchell_netravali(const DisplayVtx vtx [[stage_in]], const texture2d tex [[texture(0)]]) +{ + float2 f = fract(vtx.texCoord); + float4 wx = bicubic_weight_mitchell_netravali(f.x); + float4 wy = bicubic_weight_mitchell_netravali(f.y); + + // Normalize weights + wx /= dot(wx, float4(1.0f)); + wy /= dot(wy, float4(1.0f)); + + float4 outFragment = (tex.sample(genSampler, vtx.texCoord, int2(-1,-1)) * wx.r + + tex.sample(genSampler, vtx.texCoord, int2( 0,-1)) * wx.g + + tex.sample(genSampler, vtx.texCoord, int2( 1,-1)) * wx.b + + tex.sample(genSampler, vtx.texCoord, int2( 2,-1)) * wx.a) * wy.r + + (tex.sample(genSampler, vtx.texCoord, int2(-1, 0)) * wx.r + + tex.sample(genSampler, vtx.texCoord, int2( 0, 0)) * wx.g + + tex.sample(genSampler, vtx.texCoord, int2( 1, 0)) * wx.b + + tex.sample(genSampler, vtx.texCoord, int2( 2, 0)) * wx.a) * wy.g + + (tex.sample(genSampler, vtx.texCoord, int2(-1, 1)) * wx.r + + tex.sample(genSampler, vtx.texCoord, int2( 0, 1)) * wx.g + + tex.sample(genSampler, vtx.texCoord, int2( 1, 1)) * wx.b + + tex.sample(genSampler, vtx.texCoord, int2( 2, 1)) * wx.a) * wy.b + + (tex.sample(genSampler, vtx.texCoord, int2(-1, 2)) * wx.r + + tex.sample(genSampler, vtx.texCoord, int2( 0, 2)) * wx.g + + tex.sample(genSampler, vtx.texCoord, int2( 1, 2)) * wx.b + + tex.sample(genSampler, vtx.texCoord, int2( 2, 2)) * wx.a) * wy.a; + + return float4(outFragment.rgb, 1.0f); +} + +//--------------------------------------- +// Input Pixel Mapping: 00|01|02|03 +// 04|05|06|07 +// 08|09|10|11 +// 12|13|14|15 +fragment float4 output_filter_lanczos2(const DisplayVtx vtx [[stage_in]], const texture2d tex [[texture(0)]]) +{ + const float2 f = fract(vtx.texCoord); + float4 wx = bicubic_weight_lanczos2(f.x); + float4 wy = bicubic_weight_lanczos2(f.y); + + // Normalize weights + wx /= dot(wx, float4(1.0f)); + wy /= dot(wy, float4(1.0f)); + + const float4 outFragment = (tex.sample(genSampler, vtx.texCoord, int2(-1,-1)) * wx.r + + tex.sample(genSampler, vtx.texCoord, int2( 0,-1)) * wx.g + + tex.sample(genSampler, vtx.texCoord, int2( 1,-1)) * wx.b + + tex.sample(genSampler, vtx.texCoord, int2( 2,-1)) * wx.a) * wy.r + + (tex.sample(genSampler, vtx.texCoord, int2(-1, 0)) * wx.r + + tex.sample(genSampler, vtx.texCoord, int2( 0, 0)) * wx.g + + tex.sample(genSampler, vtx.texCoord, int2( 1, 0)) * wx.b + + tex.sample(genSampler, vtx.texCoord, int2( 2, 0)) * wx.a) * wy.g + + (tex.sample(genSampler, vtx.texCoord, int2(-1, 1)) * wx.r + + tex.sample(genSampler, vtx.texCoord, int2( 0, 1)) * wx.g + + tex.sample(genSampler, vtx.texCoord, int2( 1, 1)) * wx.b + + tex.sample(genSampler, vtx.texCoord, int2( 2, 1)) * wx.a) * wy.b + + (tex.sample(genSampler, vtx.texCoord, int2(-1, 2)) * wx.r + + tex.sample(genSampler, vtx.texCoord, int2( 0, 2)) * wx.g + + tex.sample(genSampler, vtx.texCoord, int2( 1, 2)) * wx.b + + tex.sample(genSampler, vtx.texCoord, int2( 2, 2)) * wx.a) * wy.a; + + return float4(outFragment.rgb, 1.0f); +} + +//--------------------------------------- +// Input Pixel Mapping: 00|01|02|03|04|05 +// 06|07|08|09|10|11 +// 12|13|14|15|16|17 +// 18|19|20|21|22|23 +// 24|25|26|27|28|29 +// 30|31|32|33|34|35 +fragment float4 output_filter_lanczos3(const DisplayVtx vtx [[stage_in]], const texture2d tex [[texture(0)]]) +{ + const float2 f = fract(vtx.texCoord); + float3 wx1 = bicubic_weight_lanczos3(0.5f - f.x * 0.5f); + float3 wx2 = bicubic_weight_lanczos3(1.0f - f.x * 0.5f); + float3 wy1 = bicubic_weight_lanczos3(0.5f - f.y * 0.5f); + float3 wy2 = bicubic_weight_lanczos3(1.0f - f.y * 0.5f); + + // Normalize weights + const float sumX = dot(wx1, float3(1.0f)) + dot(wx2, float3(1.0f)); + const float sumY = dot(wy1, float3(1.0f)) + dot(wy2, float3(1.0f)); + wx1 /= sumX; + wx2 /= sumX; + wy1 /= sumY; + wy2 /= sumY; + + const float4 outFragment = (tex.sample(genSampler, vtx.texCoord, int2(-2,-2)) * wx1.r + + tex.sample(genSampler, vtx.texCoord, int2(-1,-2)) * wx2.r + + tex.sample(genSampler, vtx.texCoord, int2( 0,-2)) * wx1.g + + tex.sample(genSampler, vtx.texCoord, int2( 1,-2)) * wx2.g + + tex.sample(genSampler, vtx.texCoord, int2( 2,-2)) * wx1.b + + tex.sample(genSampler, vtx.texCoord, int2( 3,-2)) * wx2.b) * wy1.r + + (tex.sample(genSampler, vtx.texCoord, int2(-2,-1)) * wx1.r + + tex.sample(genSampler, vtx.texCoord, int2(-1,-1)) * wx2.r + + tex.sample(genSampler, vtx.texCoord, int2( 0,-1)) * wx1.g + + tex.sample(genSampler, vtx.texCoord, int2( 1,-1)) * wx2.g + + tex.sample(genSampler, vtx.texCoord, int2( 2,-1)) * wx1.b + + tex.sample(genSampler, vtx.texCoord, int2( 3,-1)) * wx2.b) * wy2.r + + (tex.sample(genSampler, vtx.texCoord, int2(-2, 0)) * wx1.r + + tex.sample(genSampler, vtx.texCoord, int2(-1, 0)) * wx2.r + + tex.sample(genSampler, vtx.texCoord, int2( 0, 0)) * wx1.g + + tex.sample(genSampler, vtx.texCoord, int2( 1, 0)) * wx2.g + + tex.sample(genSampler, vtx.texCoord, int2( 2, 0)) * wx1.b + + tex.sample(genSampler, vtx.texCoord, int2( 3, 0)) * wx2.b) * wy1.g + + (tex.sample(genSampler, vtx.texCoord, int2(-2, 1)) * wx1.r + + tex.sample(genSampler, vtx.texCoord, int2(-1, 1)) * wx2.r + + tex.sample(genSampler, vtx.texCoord, int2( 0, 1)) * wx1.g + + tex.sample(genSampler, vtx.texCoord, int2( 1, 1)) * wx2.g + + tex.sample(genSampler, vtx.texCoord, int2( 2, 1)) * wx1.b + + tex.sample(genSampler, vtx.texCoord, int2( 3, 1)) * wx2.b) * wy2.g + + (tex.sample(genSampler, vtx.texCoord, int2(-2, 2)) * wx1.r + + tex.sample(genSampler, vtx.texCoord, int2(-1, 2)) * wx2.r + + tex.sample(genSampler, vtx.texCoord, int2( 0, 2)) * wx1.g + + tex.sample(genSampler, vtx.texCoord, int2( 1, 2)) * wx2.g + + tex.sample(genSampler, vtx.texCoord, int2( 2, 2)) * wx1.b + + tex.sample(genSampler, vtx.texCoord, int2( 3, 2)) * wx2.b) * wy1.b + + (tex.sample(genSampler, vtx.texCoord, int2(-2, 3)) * wx1.r + + tex.sample(genSampler, vtx.texCoord, int2(-1, 3)) * wx2.r + + tex.sample(genSampler, vtx.texCoord, int2( 0, 3)) * wx1.g + + tex.sample(genSampler, vtx.texCoord, int2( 1, 3)) * wx2.g + + tex.sample(genSampler, vtx.texCoord, int2( 2, 3)) * wx1.b + + tex.sample(genSampler, vtx.texCoord, int2( 3, 3)) * wx2.b) * wy2.b; + + return float4(outFragment.rgb, 1.0f); +} + +#pragma mark Conversion Filters + +//--------------------------------------- +// Input Pixel Mapping: 00 +// +// Output Pixel Mapping: 00 +kernel void src16_unpack_unorm1555_to_unorm8888(const uint2 position [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + if ( (position.x > inTexture.get_width() - 1) || (position.y > inTexture.get_height() - 1) ) + { + return; + } + + outTexture.write( unpack_unorm1555_to_unorm8888( (ushort)inTexture.read(position).r ), position ); +} + +#pragma mark Source Filters + +//--------------------------------------- +// Input Pixel Mapping: 00|01|02 +// 03|04|05 +// 06|07|08 +// +// Output Pixel Mapping: 00 +kernel void src_filter_deposterize(const uint2 position [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src[9] = { + inTexture.sample(genSampler, float2(position), int2( 0, 0)).rgb, + inTexture.sample(genSampler, float2(position), int2( 1, 0)).rgb, + inTexture.sample(genSampler, float2(position), int2( 1, 1)).rgb, + inTexture.sample(genSampler, float2(position), int2( 0, 1)).rgb, + inTexture.sample(genSampler, float2(position), int2(-1, 1)).rgb, + inTexture.sample(genSampler, float2(position), int2(-1, 0)).rgb, + inTexture.sample(genSampler, float2(position), int2(-1,-1)).rgb, + inTexture.sample(genSampler, float2(position), int2( 0,-1)).rgb, + inTexture.sample(genSampler, float2(position), int2( 1,-1)).rgb + }; + + const float3 threshold = float3(0.1020); + const float2 weight = float2(0.90, 0.90 * 0.60); + + const float3 blend[9] = { + src[0], + color_interpolate_LTE(src[0], src[1], threshold), + color_interpolate_LTE(src[0], src[2], threshold), + color_interpolate_LTE(src[0], src[3], threshold), + color_interpolate_LTE(src[0], src[4], threshold), + color_interpolate_LTE(src[0], src[5], threshold), + color_interpolate_LTE(src[0], src[6], threshold), + color_interpolate_LTE(src[0], src[7], threshold), + color_interpolate_LTE(src[0], src[8], threshold) + }; + + const float3 outColor = mix( + mix( + mix( + mix(blend[0], blend[5], weight[0]), mix(blend[0], blend[1], weight[0]), + 0.50f), + mix( + mix(blend[0], blend[7], weight[0]), mix(blend[0], blend[3], weight[0]), + 0.50f), + 0.50f), + mix( + mix( + mix(blend[0], blend[6], weight[1]), mix(blend[0], blend[2], weight[1]), + 0.50f), + mix( + mix(blend[0], blend[8], weight[1]), mix(blend[0], blend[4], weight[1]), + 0.50f), + 0.50f), + 0.25f); + + outTexture.write( float4(outColor, 1.0f), position ); +} + +#pragma mark Pixel Scalers + +//--------------------------------------- +// Input Pixel Mapping: 00 +// +// Output Pixel Mapping: 00|01 +// 02|03 +kernel void pixel_scaler_nearest2x(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src = inTexture.read(inPosition).rgb; + const float4 dst = float4(src, 1.0f); + + const uint2 outPosition = inPosition * 2; + outTexture.write( dst, outPosition + uint2(0, 0) ); + outTexture.write( dst, outPosition + uint2(1, 0) ); + outTexture.write( dst, outPosition + uint2(0, 1) ); + outTexture.write( dst, outPosition + uint2(1, 1) ); +} + +//--------------------------------------- +// Input Pixel Mapping: 00 +// +// Output Pixel Mapping: 00|01 +// 02|03 +kernel void pixel_scaler_scanline(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src = inTexture.read(inPosition).rgb; + const uint2 outPosition = inPosition * 2; + + outTexture.write( float4(src * 1.000f, 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(src * 0.875f, 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(src * 0.875f, 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(src * 0.750f, 1.0f), outPosition + uint2(1, 1) ); +} + +//--------------------------------------- +// Input Pixel Mapping: --|07|-- +// 05|00|01 +// --|03|-- +// +// Output Pixel Mapping: 00|01 +// 02|03 +kernel void pixel_scaler_2xEPX(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src0 = inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb; + const float3 src1 = inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb; + const float3 src3 = inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb; + const float3 src5 = inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb; + const float3 src7 = inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb; + + const float v7 = reduce(src7); + const float v5 = reduce(src5); + const float v1 = reduce(src1); + const float v3 = reduce(src3); + + const bool pixCompare = (v5 != v1) && (v7 != v3); + + const float3 dst[4] = { + (pixCompare && (v7 == v5)) ? src7 : src0, + (pixCompare && (v1 == v7)) ? src1 : src0, + (pixCompare && (v5 == v3)) ? src5 : src0, + (pixCompare && (v3 == v1)) ? src3 : src0 + }; + + const uint2 outPosition = inPosition * 2; + outTexture.write( float4(dst[0], 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(dst[1], 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(dst[2], 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(dst[3], 1.0f), outPosition + uint2(1, 1) ); +} + +//--------------------------------------- +// Input Pixel Mapping: --|07|-- +// 05|00|01 +// --|03|-- +// +// Output Pixel Mapping: 00|01 +// 02|03 +kernel void pixel_scaler_2xEPXPlus(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src0 = inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb; + const float3 src1 = inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb; + const float3 src3 = inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb; + const float3 src5 = inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb; + const float3 src7 = inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb; + + const float3 dst[4] = { + ( dist_EPXPlus(src5, src7) < min(dist_EPXPlus(src5, src3), dist_EPXPlus(src1, src7)) ) ? mix(src5, src7, 0.5f) : src0, + ( dist_EPXPlus(src1, src7) < min(dist_EPXPlus(src5, src7), dist_EPXPlus(src1, src3)) ) ? mix(src1, src7, 0.5f) : src0, + ( dist_EPXPlus(src5, src3) < min(dist_EPXPlus(src5, src7), dist_EPXPlus(src1, src3)) ) ? mix(src5, src3, 0.5f) : src0, + ( dist_EPXPlus(src1, src3) < min(dist_EPXPlus(src5, src3), dist_EPXPlus(src1, src7)) ) ? mix(src1, src3, 0.5f) : src0 + }; + + const uint2 outPosition = inPosition * 2; + outTexture.write( float4(dst[0], 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(dst[1], 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(dst[2], 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(dst[3], 1.0f), outPosition + uint2(1, 1) ); +} + +//--------------------------------------- +// Input Pixel Mapping: 00|01|02|03 +// 04|05|06|07 +// 08|09|10|11 +// 12|13|14|15 +// +// Output Pixel Mapping: 00|01 +// 02|03 +// +//--------------------------------------- +// 2xSaI Pixel Mapping: I|E|F|J +// G|A|B|K +// H|C|D|L +// M|N|O|P +kernel void pixel_scaler_2xSaI(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + const float Iv = reduce( inTexture.sample(genSampler, float2(inPosition), int2(-1,-1)).rgb ); + const float Ev = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb ); + const float Fv = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb ); + const float Jv = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 2,-1)).rgb ); + + const float Gv = reduce( inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb ); + const float3 Ac = inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb; const float Av = reduce(Ac); + const float3 Bc = inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb; const float Bv = reduce(Bc); + const float Kv = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 2, 0)).rgb ); + + const float Hv = reduce( inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb ); + const float3 Cc = inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb; const float Cv = reduce(Cc); + const float3 Dc = inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb; const float Dv = reduce(Dc); + const float Lv = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 2, 1)).rgb ); + + const float Mv = reduce( inTexture.sample(genSampler, float2(inPosition), int2(-1, 2)).rgb ); + const float Nv = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 0, 2)).rgb ); + const float Ov = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 1, 2)).rgb ); + // Pv is unused, so skip this one. + + const bool compAD = (Av == Dv); + const bool compBC = (Bv == Cv); + + float3 dst[4] = { Ac, Ac, Ac, Ac }; + + if (compAD && !compBC) + { + dst[1] = ((Av == Ev) && (Bv == Lv)) || ((Av == Cv) && (Av == Fv) && (Bv != Ev) && (Bv == Jv)) ? Ac : mix(Ac, Bc, 0.5f); + dst[2] = ((Av == Gv) && (Cv == Ov)) || ((Av == Bv) && (Av == Hv) && (Cv != Gv) && (Cv == Mv)) ? Ac : mix(Ac, Cc, 0.5f); + } + else if (!compAD && compBC) + { + dst[1] = ((Bv == Fv) && (Av == Hv)) || ((Bv == Ev) && (Bv == Dv) && (Av != Fv) && (Av == Iv)) ? Bc : mix(Ac, Bc, 0.5f); + dst[2] = ((Cv == Hv) && (Av == Fv)) || ((Cv == Gv) && (Cv == Dv) && (Av != Hv) && (Av == Iv)) ? Cc : mix(Ac, Cc, 0.5f); + dst[3] = Bc; + } + else if (compAD && compBC) + { + dst[1] = (Av == Bv) ? Ac : mix(Ac, Bc, 0.5f); + dst[2] = (Av == Bv) ? Ac : mix(Ac, Cc, 0.5f); + + const float r = (Av == Bv) ? 1.0f : GetEagleResult(Av, Bv, Gv, Ev) - GetEagleResult(Bv, Av, Kv, Fv) - GetEagleResult(Bv, Av, Hv, Nv) + GetEagleResult(Av, Bv, Lv, Ov); + dst[3] = (r > 0.0f) ? Ac : ( (r < 0.0f) ? Bc : mix( mix(Ac, Bc, 0.5f), mix(Cc, Dc, 0.5f), 0.5f) ); + } + else + { + dst[1] = ((Av == Cv) && (Av == Fv) && (Bv != Ev) && (Bv == Jv)) ? Ac : ( ((Bv == Ev) && (Bv == Dv) && (Av != Fv) && (Av == Iv)) ? Bc : mix(Ac, Bc, 0.5f) ); + dst[2] = ((Av == Bv) && (Av == Hv) && (Cv != Gv) && (Cv == Mv)) ? Ac : ( ((Cv == Gv) && (Cv == Dv) && (Av != Hv) && (Av == Iv)) ? Cc : mix(Ac, Cc, 0.5f) ); + dst[3] = mix( mix(Ac, Bc, 0.5f), mix(Cc, Dc, 0.5f), 0.5f ); + } + + const uint2 outPosition = inPosition * 2; + outTexture.write( float4(dst[0], 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(dst[1], 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(dst[2], 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(dst[3], 1.0f), outPosition + uint2(1, 1) ); +} + +//--------------------------------------- +// Input Pixel Mapping: 00|01|02|03 +// 04|05|06|07 +// 08|09|10|11 +// 12|13|14|15 +// +// Output Pixel Mapping: 00|01 +// 02|03 +// +//--------------------------------------- +// S2xSaI Pixel Mapping: I|E|F|J +// G|A|B|K +// H|C|D|L +// M|N|O|P +kernel void pixel_scaler_Super2xSaI(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + const float Iv = reduce( inTexture.sample(genSampler, float2(inPosition), int2(-1,-1)).rgb ); + const float Ev = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb ); + const float Fv = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb ); + const float Jv = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 2,-1)).rgb ); + + const float Gv = reduce( inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb ); + const float3 Ac = inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb; const float Av = reduce(Ac); + const float3 Bc = inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb; const float Bv = reduce(Bc); + const float Kv = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 2, 0)).rgb ); + + const float Hv = reduce( inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb ); + const float3 Cc = inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb; const float Cv = reduce(Cc); + const float3 Dc = inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb; const float Dv = reduce(Dc); + const float Lv = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 2, 1)).rgb ); + + const float Mv = reduce( inTexture.sample(genSampler, float2(inPosition), int2(-1, 2)).rgb ); + const float Nv = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 0, 2)).rgb ); + const float Ov = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 1, 2)).rgb ); + const float Pv = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 2, 2)).rgb ); + + const bool compAD = (Av == Dv); + const bool compBC = (Bv == Cv); + + float3 dst[4]; + dst[0] = ( (compBC && !compAD && (Hv == Cv) && (Cv != Fv)) || ((Gv == Cv) && (Dv == Cv) && (Hv != Av) && (Cv != Iv)) ) ? mix(Ac, Cc, 0.5f) : Ac; + dst[1] = Bc; + dst[2] = ( (compAD && !compBC && (Gv == Av) && (Av != Ov)) || ((Av == Hv) && (Av == Bv) && (Gv != Cv) && (Av != Mv)) ) ? mix(Ac, Cc, 0.5f) : Cc; + dst[3] = Dc; + + if (compBC && !compAD) + { + dst[1] = dst[3] = Cc; + } + else if (!compBC && compAD) + { + dst[1] = dst[3] = Ac; + } + else if (compBC && compAD) + { + const float r = GetEagleResult(Bv, Av, Hv, Nv) + GetEagleResult(Bv, Av, Gv, Ev) + GetEagleResult(Bv, Av, Ov, Lv) + GetEagleResult(Bv, Av, Fv, Kv); + dst[1] = dst[3] = (r > 0.0f) ? Bc : ( (r < 0.0f) ? Ac : mix(Ac, Bc, 0.5f) ); + } + else + { + dst[1] = ( (Bv == Dv) && (Bv == Ev) && (Av != Fv) && (Bv != Iv) ) ? mix(Ac, Bc, 0.75f) : ( ( (Av == Cv) && (Av == Fv) && (Ev != Bv) && (Av != Jv) ) ? mix(Ac, Bc, 0.25f) : mix(Ac, Bc, 0.5f) ); + dst[3] = ( (Bv == Dv) && (Dv == Nv) && (Cv != Ov) && (Dv != Mv) ) ? mix(Cc, Dc, 0.75f) : ( ( (Av == Cv) && (Cv == Ov) && (Nv != Dv) && (Cv != Pv) ) ? mix(Cc, Dc, 0.25f) : mix(Cc, Dc, 0.5f) ); + } + + const uint2 outPosition = inPosition * 2; + outTexture.write( float4(dst[0], 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(dst[1], 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(dst[2], 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(dst[3], 1.0f), outPosition + uint2(1, 1) ); +} + +//--------------------------------------- +// Input Pixel Mapping: --|01|02|-- +// 04|05|06|07 +// 08|09|10|11 +// --|13|14|-- +// +// Output Pixel Mapping: 00|01 +// 02|03 +// +//--------------------------------------- +// SEagle Pixel Mapping: -|E|F|- +// G|A|B|K +// H|C|D|L +// -|N|O|- +kernel void pixel_scaler_2xSuperEagle(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + const float Ev = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb ); + const float Fv = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb ); + + const float Gv = reduce( inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb ); + const float3 Ac = inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb; const float Av = reduce(Ac); + const float3 Bc = inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb; const float Bv = reduce(Bc); + const float Kv = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 2, 0)).rgb ); + + const float Hv = reduce( inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb ); + const float3 Cc = inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb; const float Cv = reduce(Cc); + const float3 Dc = inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb; const float Dv = reduce(Dc); + const float Lv = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 2, 1)).rgb ); + + const float Nv = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 0, 2)).rgb ); + const float Ov = reduce( inTexture.sample(genSampler, float2(inPosition), int2( 1, 2)).rgb ); + + const bool compAD = (Av == Dv); + const bool compBC = (Bv == Cv); + + float3 dst[4] = { Ac, Bc, Cc, Dc }; + + if (compBC && !compAD) + { + dst[0] = (Cv == Hv || Bv == Fv) ? mix(Ac, Cc, 0.75f) : mix(Ac, Bc, 0.5f); + dst[1] = Cc; + //dst[2] = Cc; + dst[3] = mix( mix(Dc, Cc, 0.5f), mix(Dc, Cc, 0.75f), float(Bv == Kv || Cv == Nv) ); + } + else if (!compBC && compAD) + { + //dst[0] = Ac; + dst[1] = mix( mix(Ac, Bc, 0.5f), mix(Ac, Bc, 0.25f), float(Av == Ev || Dv == Lv) ); + dst[2] = mix( mix(Cc, Dc, 0.5f), mix(Ac, Cc, 0.25f), float(Dv == Ov || Av == Gv) ); + dst[3] = Ac; + } + else if (compBC && compAD) + { + const float r = GetEagleResult(Bv, Av, Hv, Nv) + GetEagleResult(Bv, Av, Gv, Ev) + GetEagleResult(Bv, Av, Ov, Lv) + GetEagleResult(Bv, Av, Fv, Kv); + if (r > 0.0f) + { + dst[0] = mix(Ac, Bc, 0.5f); + dst[1] = Cc; + //dst[2] = Cc; + dst[3] = dst[0]; + } + else if (r < 0.0f) + { + //dst[0] = Ac; + dst[1] = mix(Ac, Bc, 0.5f); + dst[2] = dst[1]; + dst[3] = Ac; + } + else + { + //dst[0] = Ac; + dst[1] = Cc; + //dst[2] = Cc; + dst[3] = Ac; + } + } + else + { + dst[0] = mix(mix(Bc, Cc, 0.5f), Ac, 0.75f); + dst[1] = mix(mix(Ac, Dc, 0.5f), Bc, 0.75f); + dst[2] = mix(mix(Ac, Dc, 0.5f), Cc, 0.75f); + dst[3] = mix(mix(Bc, Cc, 0.5f), Dc, 0.75f); + } + + const uint2 outPosition = inPosition * 2; + outTexture.write( float4(dst[0], 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(dst[1], 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(dst[2], 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(dst[3], 1.0f), outPosition + uint2(1, 1) ); +} + +//--------------------------------------- +// Input Pixel Mapping: 00|01|02 +// 03|04|05 +// 06|07|08 +// +// Output Pixel Mapping: 00|01 +// 02|03 +kernel void pixel_scaler_LQ2x(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + const texture3d lut [[texture(2)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src[9] = { + inTexture.sample(genSampler, float2(inPosition), int2(-1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb + }; + + const float v[9] = { + reduce(src[0]), + reduce(src[1]), + reduce(src[2]), + reduce(src[3]), + reduce(src[4]), + reduce(src[5]), + reduce(src[6]), + reduce(src[7]), + reduce(src[8]) + }; + + const int pattern = (int(v[0] != v[4]) * 1) + + (int(v[1] != v[4]) * 2) + + (int(v[2] != v[4]) * 4) + + (int(v[3] != v[4]) * 8) + + (int(v[5] != v[4]) * 16) + + (int(v[6] != v[4]) * 32) + + (int(v[7] != v[4]) * 64) + + (int(v[8] != v[4]) * 128); + + const int compare = (int(v[1] != v[5]) * 1) + + (int(v[5] != v[7]) * 2) + + (int(v[7] != v[3]) * 4) + + (int(v[3] != v[1]) * 8); + + const float3 p[4] = { + lut.read(uint3(pattern*2+0, 0, compare)).rgb, + lut.read(uint3(pattern*2+0, 1, compare)).rgb, + lut.read(uint3(pattern*2+0, 2, compare)).rgb, + lut.read(uint3(pattern*2+0, 3, compare)).rgb + }; + + const float3 w[4] = { + lut.read(uint3(pattern*2+1, 0, compare)).rgb, + lut.read(uint3(pattern*2+1, 1, compare)).rgb, + lut.read(uint3(pattern*2+1, 2, compare)).rgb, + lut.read(uint3(pattern*2+1, 3, compare)).rgb + }; + + const uint2 outPosition = inPosition * 2; + outTexture.write( float4(Lerp(w[0], src[int(p[0].r*255.0f/30.95f)], src[int(p[0].g*255.0f/30.95f)], src[int(p[0].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(Lerp(w[1], src[int(p[1].r*255.0f/30.95f)], src[int(p[1].g*255.0f/30.95f)], src[int(p[1].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(Lerp(w[2], src[int(p[2].r*255.0f/30.95f)], src[int(p[2].g*255.0f/30.95f)], src[int(p[2].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(Lerp(w[3], src[int(p[3].r*255.0f/30.95f)], src[int(p[3].g*255.0f/30.95f)], src[int(p[3].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 1) ); +} + +//--------------------------------------- +// Input Pixel Mapping: 00|01|02 +// 03|04|05 +// 06|07|08 +// +// Output Pixel Mapping: 00|01 +// 02|03 +kernel void pixel_scaler_LQ2xS(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + const texture3d lut [[texture(2)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src[9] = { + inTexture.sample(genSampler, float2(inPosition), int2(-1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb + }; + + float b[9]; + float minBright = 10.0f; + float maxBright = 0.0f; + + for (int i = 0; i < 9; i++) + { + b[i] = (src[i].r + src[i].r + src[i].r) + (src[i].g + src[i].g + src[i].g) + (src[i].b + src[i].b); + minBright = min(minBright, b[i]); + maxBright = max(maxBright, b[i]); + } + + const float diffBright = (maxBright - minBright) / 16.0f; + const int pattern = int(step((0.5f*1.0f/127.5f), diffBright)) * ((int(abs(b[0] - b[4]) > diffBright) * 1) + + (int(abs(b[1] - b[4]) > diffBright) * 2) + + (int(abs(b[2] - b[4]) > diffBright) * 4) + + (int(abs(b[3] - b[4]) > diffBright) * 8) + + (int(abs(b[5] - b[4]) > diffBright) * 16) + + (int(abs(b[6] - b[4]) > diffBright) * 32) + + (int(abs(b[7] - b[4]) > diffBright) * 64) + + (int(abs(b[8] - b[4]) > diffBright) * 128)); + + const float3 p[4] = { + lut.read(uint3(pattern*2+0, 0, 0)).rgb, + lut.read(uint3(pattern*2+0, 1, 0)).rgb, + lut.read(uint3(pattern*2+0, 2, 0)).rgb, + lut.read(uint3(pattern*2+0, 3, 0)).rgb + }; + + const float3 w[4] = { + lut.read(uint3(pattern*2+1, 0, 0)).rgb, + lut.read(uint3(pattern*2+1, 1, 0)).rgb, + lut.read(uint3(pattern*2+1, 2, 0)).rgb, + lut.read(uint3(pattern*2+1, 3, 0)).rgb + }; + + const uint2 outPosition = inPosition * 2; + outTexture.write( float4(Lerp(w[0], src[int(p[0].r*255.0f/30.95f)], src[int(p[0].g*255.0f/30.95f)], src[int(p[0].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(Lerp(w[1], src[int(p[1].r*255.0f/30.95f)], src[int(p[1].g*255.0f/30.95f)], src[int(p[1].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(Lerp(w[2], src[int(p[2].r*255.0f/30.95f)], src[int(p[2].g*255.0f/30.95f)], src[int(p[2].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(Lerp(w[3], src[int(p[3].r*255.0f/30.95f)], src[int(p[3].g*255.0f/30.95f)], src[int(p[3].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 1) ); +} + +//--------------------------------------- +// Input Pixel Mapping: 00|01|02 +// 03|04|05 +// 06|07|08 +// +// Output Pixel Mapping: 00|01 +// 02|03 +kernel void pixel_scaler_HQ2x(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + const texture3d lut [[texture(2)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src[9] = { + inTexture.sample(genSampler, float2(inPosition), int2(-1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb + }; + + const int pattern = (int(InterpDiff(src[0], src[4])) * 1) + + (int(InterpDiff(src[1], src[4])) * 2) + + (int(InterpDiff(src[2], src[4])) * 4) + + (int(InterpDiff(src[3], src[4])) * 8) + + (int(InterpDiff(src[5], src[4])) * 16) + + (int(InterpDiff(src[6], src[4])) * 32) + + (int(InterpDiff(src[7], src[4])) * 64) + + (int(InterpDiff(src[8], src[4])) * 128); + + const int compare = (int(InterpDiff(src[1], src[5])) * 1) + + (int(InterpDiff(src[5], src[7])) * 2) + + (int(InterpDiff(src[7], src[3])) * 4) + + (int(InterpDiff(src[3], src[1])) * 8); + + const float3 p[4] = { + lut.read(uint3(pattern*2+0, 0, compare)).rgb, + lut.read(uint3(pattern*2+0, 1, compare)).rgb, + lut.read(uint3(pattern*2+0, 2, compare)).rgb, + lut.read(uint3(pattern*2+0, 3, compare)).rgb + }; + + const float3 w[4] = { + lut.read(uint3(pattern*2+1, 0, compare)).rgb, + lut.read(uint3(pattern*2+1, 1, compare)).rgb, + lut.read(uint3(pattern*2+1, 2, compare)).rgb, + lut.read(uint3(pattern*2+1, 3, compare)).rgb + }; + + const uint2 outPosition = inPosition * 2; + outTexture.write( float4(Lerp(w[0], src[int(p[0].r*255.0f/30.95f)], src[int(p[0].g*255.0f/30.95f)], src[int(p[0].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(Lerp(w[1], src[int(p[1].r*255.0f/30.95f)], src[int(p[1].g*255.0f/30.95f)], src[int(p[1].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(Lerp(w[2], src[int(p[2].r*255.0f/30.95f)], src[int(p[2].g*255.0f/30.95f)], src[int(p[2].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(Lerp(w[3], src[int(p[3].r*255.0f/30.95f)], src[int(p[3].g*255.0f/30.95f)], src[int(p[3].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 1) ); +} + +//--------------------------------------- +// Input Pixel Mapping: 00|01|02 +// 03|04|05 +// 06|07|08 +// +// Output Pixel Mapping: 00|01 +// 02|03 +kernel void pixel_scaler_HQ2xS(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + const texture3d lut [[texture(2)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src[9] = { + inTexture.sample(genSampler, float2(inPosition), int2(-1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb + }; + + float b[9]; + float minBright = 10.0f; + float maxBright = 0.0f; + + for (int i = 0; i < 9; i++) + { + b[i] = (src[i].r + src[i].r + src[i].r) + (src[i].g + src[i].g + src[i].g) + (src[i].b + src[i].b); + minBright = min(minBright, b[i]); + maxBright = max(maxBright, b[i]); + } + + const float diffBright = (maxBright - minBright) * (7.0f/16.0f); + const int pattern = int(step((3.5f*7.0f/892.5f), diffBright)) * ((int(abs(b[0] - b[4]) > diffBright) * 1) + + (int(abs(b[1] - b[4]) > diffBright) * 2) + + (int(abs(b[2] - b[4]) > diffBright) * 4) + + (int(abs(b[3] - b[4]) > diffBright) * 8) + + (int(abs(b[5] - b[4]) > diffBright) * 16) + + (int(abs(b[6] - b[4]) > diffBright) * 32) + + (int(abs(b[7] - b[4]) > diffBright) * 64) + + (int(abs(b[8] - b[4]) > diffBright) * 128)); + + const float3 p[4] = { + lut.read(uint3(pattern*2+0, 0, 0)).rgb, + lut.read(uint3(pattern*2+0, 1, 0)).rgb, + lut.read(uint3(pattern*2+0, 2, 0)).rgb, + lut.read(uint3(pattern*2+0, 3, 0)).rgb + }; + + const float3 w[4] = { + lut.read(uint3(pattern*2+1, 0, 0)).rgb, + lut.read(uint3(pattern*2+1, 1, 0)).rgb, + lut.read(uint3(pattern*2+1, 2, 0)).rgb, + lut.read(uint3(pattern*2+1, 3, 0)).rgb + }; + + const uint2 outPosition = inPosition * 2; + outTexture.write( float4(Lerp(w[0], src[int(p[0].r*255.0f/30.95f)], src[int(p[0].g*255.0f/30.95f)], src[int(p[0].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(Lerp(w[1], src[int(p[1].r*255.0f/30.95f)], src[int(p[1].g*255.0f/30.95f)], src[int(p[1].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(Lerp(w[2], src[int(p[2].r*255.0f/30.95f)], src[int(p[2].g*255.0f/30.95f)], src[int(p[2].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(Lerp(w[3], src[int(p[3].r*255.0f/30.95f)], src[int(p[3].g*255.0f/30.95f)], src[int(p[3].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 1) ); +} + +//--------------------------------------- +// Input Pixel Mapping: 00|01|02 +// 03|04|05 +// 06|07|08 +// +// Output Pixel Mapping: 00|01|02 +// 03|04|05 +// 06|07|08 +kernel void pixel_scaler_HQ3x(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + const texture3d lut [[texture(2)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src[9] = { + inTexture.sample(genSampler, float2(inPosition), int2(-1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb + }; + + const int pattern = (int(InterpDiff(src[0], src[4])) * 1) + + (int(InterpDiff(src[1], src[4])) * 2) + + (int(InterpDiff(src[2], src[4])) * 4) + + (int(InterpDiff(src[3], src[4])) * 8) + + (int(InterpDiff(src[5], src[4])) * 16) + + (int(InterpDiff(src[6], src[4])) * 32) + + (int(InterpDiff(src[7], src[4])) * 64) + + (int(InterpDiff(src[8], src[4])) * 128); + + const int compare = (int(InterpDiff(src[1], src[5])) * 1) + + (int(InterpDiff(src[5], src[7])) * 2) + + (int(InterpDiff(src[7], src[3])) * 4) + + (int(InterpDiff(src[3], src[1])) * 8); + + const float3 p[9] = { + lut.read(uint3(pattern*2+0, 0, compare)).rgb, + lut.read(uint3(pattern*2+0, 1, compare)).rgb, + lut.read(uint3(pattern*2+0, 2, compare)).rgb, + lut.read(uint3(pattern*2+0, 3, compare)).rgb, + lut.read(uint3(pattern*2+0, 4, compare)).rgb, + lut.read(uint3(pattern*2+0, 5, compare)).rgb, + lut.read(uint3(pattern*2+0, 6, compare)).rgb, + lut.read(uint3(pattern*2+0, 7, compare)).rgb, + lut.read(uint3(pattern*2+0, 8, compare)).rgb + }; + + const float3 w[9] = { + lut.read(uint3(pattern*2+1, 0, compare)).rgb, + lut.read(uint3(pattern*2+1, 1, compare)).rgb, + lut.read(uint3(pattern*2+1, 2, compare)).rgb, + lut.read(uint3(pattern*2+1, 3, compare)).rgb, + lut.read(uint3(pattern*2+1, 4, compare)).rgb, + lut.read(uint3(pattern*2+1, 5, compare)).rgb, + lut.read(uint3(pattern*2+1, 6, compare)).rgb, + lut.read(uint3(pattern*2+1, 7, compare)).rgb, + lut.read(uint3(pattern*2+1, 8, compare)).rgb + }; + + const uint2 outPosition = inPosition * 3; + outTexture.write( float4(Lerp(w[0], src[int(p[0].r*255.0f/30.95f)], src[int(p[0].g*255.0f/30.95f)], src[int(p[0].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(Lerp(w[1], src[int(p[1].r*255.0f/30.95f)], src[int(p[1].g*255.0f/30.95f)], src[int(p[1].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(Lerp(w[2], src[int(p[2].r*255.0f/30.95f)], src[int(p[2].g*255.0f/30.95f)], src[int(p[2].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(2, 0) ); + outTexture.write( float4(Lerp(w[3], src[int(p[3].r*255.0f/30.95f)], src[int(p[3].g*255.0f/30.95f)], src[int(p[3].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(Lerp(w[4], src[int(p[4].r*255.0f/30.95f)], src[int(p[4].g*255.0f/30.95f)], src[int(p[4].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 1) ); + outTexture.write( float4(Lerp(w[5], src[int(p[5].r*255.0f/30.95f)], src[int(p[5].g*255.0f/30.95f)], src[int(p[5].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(2, 1) ); + outTexture.write( float4(Lerp(w[6], src[int(p[6].r*255.0f/30.95f)], src[int(p[6].g*255.0f/30.95f)], src[int(p[6].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 2) ); + outTexture.write( float4(Lerp(w[7], src[int(p[7].r*255.0f/30.95f)], src[int(p[7].g*255.0f/30.95f)], src[int(p[7].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 2) ); + outTexture.write( float4(Lerp(w[8], src[int(p[8].r*255.0f/30.95f)], src[int(p[8].g*255.0f/30.95f)], src[int(p[8].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(2, 2) ); +} + +//--------------------------------------- +// Input Pixel Mapping: 00|01|02 +// 03|04|05 +// 06|07|08 +// +// Output Pixel Mapping: 00|01|02 +// 03|04|05 +// 06|07|08 +kernel void pixel_scaler_HQ3xS(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + const texture3d lut [[texture(2)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src[9] = { + inTexture.sample(genSampler, float2(inPosition), int2(-1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb + }; + + float b[9]; + float minBright = 10.0f; + float maxBright = 0.0f; + + for (int i = 0; i < 9; i++) + { + b[i] = (src[i].r + src[i].r + src[i].r) + (src[i].g + src[i].g + src[i].g) + (src[i].b + src[i].b); + minBright = min(minBright, b[i]); + maxBright = max(maxBright, b[i]); + } + + const float diffBright = (maxBright - minBright) * (7.0f/16.0f); + const int pattern = int(step((3.5f*7.0f/892.5f), diffBright)) * ((int(abs(b[0] - b[4]) > diffBright) * 1) + + (int(abs(b[1] - b[4]) > diffBright) * 2) + + (int(abs(b[2] - b[4]) > diffBright) * 4) + + (int(abs(b[3] - b[4]) > diffBright) * 8) + + (int(abs(b[5] - b[4]) > diffBright) * 16) + + (int(abs(b[6] - b[4]) > diffBright) * 32) + + (int(abs(b[7] - b[4]) > diffBright) * 64) + + (int(abs(b[8] - b[4]) > diffBright) * 128)); + + const float3 p[9] = { + lut.read(uint3(pattern*2+0, 0, 0)).rgb, + lut.read(uint3(pattern*2+0, 1, 0)).rgb, + lut.read(uint3(pattern*2+0, 2, 0)).rgb, + lut.read(uint3(pattern*2+0, 3, 0)).rgb, + lut.read(uint3(pattern*2+0, 4, 0)).rgb, + lut.read(uint3(pattern*2+0, 5, 0)).rgb, + lut.read(uint3(pattern*2+0, 6, 0)).rgb, + lut.read(uint3(pattern*2+0, 7, 0)).rgb, + lut.read(uint3(pattern*2+0, 8, 0)).rgb + }; + + const float3 w[9] = { + lut.read(uint3(pattern*2+1, 0, 0)).rgb, + lut.read(uint3(pattern*2+1, 1, 0)).rgb, + lut.read(uint3(pattern*2+1, 2, 0)).rgb, + lut.read(uint3(pattern*2+1, 3, 0)).rgb, + lut.read(uint3(pattern*2+1, 4, 0)).rgb, + lut.read(uint3(pattern*2+1, 5, 0)).rgb, + lut.read(uint3(pattern*2+1, 6, 0)).rgb, + lut.read(uint3(pattern*2+1, 7, 0)).rgb, + lut.read(uint3(pattern*2+1, 8, 0)).rgb + }; + + const uint2 outPosition = inPosition * 3; + outTexture.write( float4(Lerp(w[0], src[int(p[0].r*255.0f/30.95f)], src[int(p[0].g*255.0f/30.95f)], src[int(p[0].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(Lerp(w[1], src[int(p[1].r*255.0f/30.95f)], src[int(p[1].g*255.0f/30.95f)], src[int(p[1].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(Lerp(w[2], src[int(p[2].r*255.0f/30.95f)], src[int(p[2].g*255.0f/30.95f)], src[int(p[2].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(2, 0) ); + outTexture.write( float4(Lerp(w[3], src[int(p[3].r*255.0f/30.95f)], src[int(p[3].g*255.0f/30.95f)], src[int(p[3].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(Lerp(w[4], src[int(p[4].r*255.0f/30.95f)], src[int(p[4].g*255.0f/30.95f)], src[int(p[4].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 1) ); + outTexture.write( float4(Lerp(w[5], src[int(p[5].r*255.0f/30.95f)], src[int(p[5].g*255.0f/30.95f)], src[int(p[5].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(2, 1) ); + outTexture.write( float4(Lerp(w[6], src[int(p[6].r*255.0f/30.95f)], src[int(p[6].g*255.0f/30.95f)], src[int(p[6].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 2) ); + outTexture.write( float4(Lerp(w[7], src[int(p[7].r*255.0f/30.95f)], src[int(p[7].g*255.0f/30.95f)], src[int(p[7].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 2) ); + outTexture.write( float4(Lerp(w[8], src[int(p[8].r*255.0f/30.95f)], src[int(p[8].g*255.0f/30.95f)], src[int(p[8].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(2, 2) ); +} + +//--------------------------------------- +// Input Pixel Mapping: 00|01|02 +// 03|04|05 +// 06|07|08 +// +// Output Pixel Mapping: 00|01|02|03 +// 04|05|06|07 +// 08|09|10|11 +// 12|13|14|15 +kernel void pixel_scaler_HQ4x(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + const texture3d lut [[texture(2)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src[9] = { + inTexture.sample(genSampler, float2(inPosition), int2(-1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb + }; + + const int pattern = (int(InterpDiff(src[0], src[4])) * 1) + + (int(InterpDiff(src[1], src[4])) * 2) + + (int(InterpDiff(src[2], src[4])) * 4) + + (int(InterpDiff(src[3], src[4])) * 8) + + (int(InterpDiff(src[5], src[4])) * 16) + + (int(InterpDiff(src[6], src[4])) * 32) + + (int(InterpDiff(src[7], src[4])) * 64) + + (int(InterpDiff(src[8], src[4])) * 128); + + const int compare = (int(InterpDiff(src[1], src[5])) * 1) + + (int(InterpDiff(src[5], src[7])) * 2) + + (int(InterpDiff(src[7], src[3])) * 4) + + (int(InterpDiff(src[3], src[1])) * 8); + + const float3 p[16] = { + lut.read(uint3(pattern*2+0, 0, compare)).rgb, + lut.read(uint3(pattern*2+0, 1, compare)).rgb, + lut.read(uint3(pattern*2+0, 2, compare)).rgb, + lut.read(uint3(pattern*2+0, 3, compare)).rgb, + lut.read(uint3(pattern*2+0, 4, compare)).rgb, + lut.read(uint3(pattern*2+0, 5, compare)).rgb, + lut.read(uint3(pattern*2+0, 6, compare)).rgb, + lut.read(uint3(pattern*2+0, 7, compare)).rgb, + lut.read(uint3(pattern*2+0, 8, compare)).rgb, + lut.read(uint3(pattern*2+0, 9, compare)).rgb, + lut.read(uint3(pattern*2+0, 10, compare)).rgb, + lut.read(uint3(pattern*2+0, 11, compare)).rgb, + lut.read(uint3(pattern*2+0, 12, compare)).rgb, + lut.read(uint3(pattern*2+0, 13, compare)).rgb, + lut.read(uint3(pattern*2+0, 14, compare)).rgb, + lut.read(uint3(pattern*2+0, 15, compare)).rgb + }; + + const float3 w[16] = { + lut.read(uint3(pattern*2+1, 0, compare)).rgb, + lut.read(uint3(pattern*2+1, 1, compare)).rgb, + lut.read(uint3(pattern*2+1, 2, compare)).rgb, + lut.read(uint3(pattern*2+1, 3, compare)).rgb, + lut.read(uint3(pattern*2+1, 4, compare)).rgb, + lut.read(uint3(pattern*2+1, 5, compare)).rgb, + lut.read(uint3(pattern*2+1, 6, compare)).rgb, + lut.read(uint3(pattern*2+1, 7, compare)).rgb, + lut.read(uint3(pattern*2+1, 8, compare)).rgb, + lut.read(uint3(pattern*2+1, 9, compare)).rgb, + lut.read(uint3(pattern*2+1, 10, compare)).rgb, + lut.read(uint3(pattern*2+1, 11, compare)).rgb, + lut.read(uint3(pattern*2+1, 12, compare)).rgb, + lut.read(uint3(pattern*2+1, 13, compare)).rgb, + lut.read(uint3(pattern*2+1, 14, compare)).rgb, + lut.read(uint3(pattern*2+1, 15, compare)).rgb + }; + + const uint2 outPosition = inPosition * 4; + outTexture.write( float4(Lerp(w[ 0], src[int(p[ 0].r*255.0f/30.95f)], src[int(p[ 0].g*255.0f/30.95f)], src[int(p[ 0].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(Lerp(w[ 1], src[int(p[ 1].r*255.0f/30.95f)], src[int(p[ 1].g*255.0f/30.95f)], src[int(p[ 1].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(Lerp(w[ 2], src[int(p[ 2].r*255.0f/30.95f)], src[int(p[ 2].g*255.0f/30.95f)], src[int(p[ 2].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(2, 0) ); + outTexture.write( float4(Lerp(w[ 3], src[int(p[ 3].r*255.0f/30.95f)], src[int(p[ 3].g*255.0f/30.95f)], src[int(p[ 3].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(3, 0) ); + outTexture.write( float4(Lerp(w[ 4], src[int(p[ 4].r*255.0f/30.95f)], src[int(p[ 4].g*255.0f/30.95f)], src[int(p[ 4].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(Lerp(w[ 5], src[int(p[ 5].r*255.0f/30.95f)], src[int(p[ 5].g*255.0f/30.95f)], src[int(p[ 5].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 1) ); + outTexture.write( float4(Lerp(w[ 6], src[int(p[ 6].r*255.0f/30.95f)], src[int(p[ 6].g*255.0f/30.95f)], src[int(p[ 6].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(2, 1) ); + outTexture.write( float4(Lerp(w[ 7], src[int(p[ 7].r*255.0f/30.95f)], src[int(p[ 7].g*255.0f/30.95f)], src[int(p[ 7].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(3, 1) ); + outTexture.write( float4(Lerp(w[ 8], src[int(p[ 8].r*255.0f/30.95f)], src[int(p[ 8].g*255.0f/30.95f)], src[int(p[ 8].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 2) ); + outTexture.write( float4(Lerp(w[ 9], src[int(p[ 9].r*255.0f/30.95f)], src[int(p[ 9].g*255.0f/30.95f)], src[int(p[ 9].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 2) ); + outTexture.write( float4(Lerp(w[10], src[int(p[10].r*255.0f/30.95f)], src[int(p[10].g*255.0f/30.95f)], src[int(p[10].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(2, 2) ); + outTexture.write( float4(Lerp(w[11], src[int(p[11].r*255.0f/30.95f)], src[int(p[11].g*255.0f/30.95f)], src[int(p[11].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(3, 2) ); + outTexture.write( float4(Lerp(w[12], src[int(p[12].r*255.0f/30.95f)], src[int(p[12].g*255.0f/30.95f)], src[int(p[12].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 3) ); + outTexture.write( float4(Lerp(w[13], src[int(p[13].r*255.0f/30.95f)], src[int(p[13].g*255.0f/30.95f)], src[int(p[13].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 3) ); + outTexture.write( float4(Lerp(w[14], src[int(p[14].r*255.0f/30.95f)], src[int(p[14].g*255.0f/30.95f)], src[int(p[14].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(2, 3) ); + outTexture.write( float4(Lerp(w[15], src[int(p[15].r*255.0f/30.95f)], src[int(p[15].g*255.0f/30.95f)], src[int(p[15].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(3, 3) ); +} + +//--------------------------------------- +// Input Pixel Mapping: 00|01|02 +// 03|04|05 +// 06|07|08 +// +// Output Pixel Mapping: 00|01|02|03 +// 04|05|06|07 +// 08|09|10|11 +// 12|13|14|15 +kernel void pixel_scaler_HQ4xS(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + const texture3d lut [[texture(2)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src[9] = { + inTexture.sample(genSampler, float2(inPosition), int2(-1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb + }; + + float b[9]; + float minBright = 10.0f; + float maxBright = 0.0f; + + for (int i = 0; i < 9; i++) + { + b[i] = (src[i].r + src[i].r + src[i].r) + (src[i].g + src[i].g + src[i].g) + (src[i].b + src[i].b); + minBright = min(minBright, b[i]); + maxBright = max(maxBright, b[i]); + } + + const float diffBright = (maxBright - minBright) * (7.0f/16.0f); + const int pattern = int(step((3.5f*7.0f/892.5f), diffBright)) * ((int(abs(b[0] - b[4]) > diffBright) * 1) + + (int(abs(b[1] - b[4]) > diffBright) * 2) + + (int(abs(b[2] - b[4]) > diffBright) * 4) + + (int(abs(b[3] - b[4]) > diffBright) * 8) + + (int(abs(b[5] - b[4]) > diffBright) * 16) + + (int(abs(b[6] - b[4]) > diffBright) * 32) + + (int(abs(b[7] - b[4]) > diffBright) * 64) + + (int(abs(b[8] - b[4]) > diffBright) * 128)); + + const float3 p[16] = { + lut.read(uint3(pattern*2+0, 0, 0)).rgb, + lut.read(uint3(pattern*2+0, 1, 0)).rgb, + lut.read(uint3(pattern*2+0, 2, 0)).rgb, + lut.read(uint3(pattern*2+0, 3, 0)).rgb, + lut.read(uint3(pattern*2+0, 4, 0)).rgb, + lut.read(uint3(pattern*2+0, 5, 0)).rgb, + lut.read(uint3(pattern*2+0, 6, 0)).rgb, + lut.read(uint3(pattern*2+0, 7, 0)).rgb, + lut.read(uint3(pattern*2+0, 8, 0)).rgb, + lut.read(uint3(pattern*2+0, 9, 0)).rgb, + lut.read(uint3(pattern*2+0, 10, 0)).rgb, + lut.read(uint3(pattern*2+0, 11, 0)).rgb, + lut.read(uint3(pattern*2+0, 12, 0)).rgb, + lut.read(uint3(pattern*2+0, 13, 0)).rgb, + lut.read(uint3(pattern*2+0, 14, 0)).rgb, + lut.read(uint3(pattern*2+0, 15, 0)).rgb + }; + + const float3 w[16] = { + lut.read(uint3(pattern*2+1, 0, 0)).rgb, + lut.read(uint3(pattern*2+1, 1, 0)).rgb, + lut.read(uint3(pattern*2+1, 2, 0)).rgb, + lut.read(uint3(pattern*2+1, 3, 0)).rgb, + lut.read(uint3(pattern*2+1, 4, 0)).rgb, + lut.read(uint3(pattern*2+1, 5, 0)).rgb, + lut.read(uint3(pattern*2+1, 6, 0)).rgb, + lut.read(uint3(pattern*2+1, 7, 0)).rgb, + lut.read(uint3(pattern*2+1, 8, 0)).rgb, + lut.read(uint3(pattern*2+1, 9, 0)).rgb, + lut.read(uint3(pattern*2+1, 10, 0)).rgb, + lut.read(uint3(pattern*2+1, 11, 0)).rgb, + lut.read(uint3(pattern*2+1, 12, 0)).rgb, + lut.read(uint3(pattern*2+1, 13, 0)).rgb, + lut.read(uint3(pattern*2+1, 14, 0)).rgb, + lut.read(uint3(pattern*2+1, 15, 0)).rgb + }; + + const uint2 outPosition = inPosition * 4; + outTexture.write( float4(Lerp(w[ 0], src[int(p[ 0].r*255.0f/30.95f)], src[int(p[ 0].g*255.0f/30.95f)], src[int(p[ 0].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(Lerp(w[ 1], src[int(p[ 1].r*255.0f/30.95f)], src[int(p[ 1].g*255.0f/30.95f)], src[int(p[ 1].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(Lerp(w[ 2], src[int(p[ 2].r*255.0f/30.95f)], src[int(p[ 2].g*255.0f/30.95f)], src[int(p[ 2].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(2, 0) ); + outTexture.write( float4(Lerp(w[ 3], src[int(p[ 3].r*255.0f/30.95f)], src[int(p[ 3].g*255.0f/30.95f)], src[int(p[ 3].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(3, 0) ); + outTexture.write( float4(Lerp(w[ 4], src[int(p[ 4].r*255.0f/30.95f)], src[int(p[ 4].g*255.0f/30.95f)], src[int(p[ 4].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(Lerp(w[ 5], src[int(p[ 5].r*255.0f/30.95f)], src[int(p[ 5].g*255.0f/30.95f)], src[int(p[ 5].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 1) ); + outTexture.write( float4(Lerp(w[ 6], src[int(p[ 6].r*255.0f/30.95f)], src[int(p[ 6].g*255.0f/30.95f)], src[int(p[ 6].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(2, 1) ); + outTexture.write( float4(Lerp(w[ 7], src[int(p[ 7].r*255.0f/30.95f)], src[int(p[ 7].g*255.0f/30.95f)], src[int(p[ 7].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(3, 1) ); + outTexture.write( float4(Lerp(w[ 8], src[int(p[ 8].r*255.0f/30.95f)], src[int(p[ 8].g*255.0f/30.95f)], src[int(p[ 8].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 2) ); + outTexture.write( float4(Lerp(w[ 9], src[int(p[ 9].r*255.0f/30.95f)], src[int(p[ 9].g*255.0f/30.95f)], src[int(p[ 9].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 2) ); + outTexture.write( float4(Lerp(w[10], src[int(p[10].r*255.0f/30.95f)], src[int(p[10].g*255.0f/30.95f)], src[int(p[10].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(2, 2) ); + outTexture.write( float4(Lerp(w[11], src[int(p[11].r*255.0f/30.95f)], src[int(p[11].g*255.0f/30.95f)], src[int(p[11].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(3, 2) ); + outTexture.write( float4(Lerp(w[12], src[int(p[12].r*255.0f/30.95f)], src[int(p[12].g*255.0f/30.95f)], src[int(p[12].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(0, 3) ); + outTexture.write( float4(Lerp(w[13], src[int(p[13].r*255.0f/30.95f)], src[int(p[13].g*255.0f/30.95f)], src[int(p[13].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(1, 3) ); + outTexture.write( float4(Lerp(w[14], src[int(p[14].r*255.0f/30.95f)], src[int(p[14].g*255.0f/30.95f)], src[int(p[14].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(2, 3) ); + outTexture.write( float4(Lerp(w[15], src[int(p[15].r*255.0f/30.95f)], src[int(p[15].g*255.0f/30.95f)], src[int(p[15].b*255.0f/30.95f)]), 1.0f), outPosition + uint2(3, 3) ); +} + +#define BLEND_NONE 0 +#define BLEND_NORMAL 1 +#define BLEND_DOMINANT 2 +#define LUMINANCE_WEIGHT 1.0 +#define EQUAL_COLOR_TOLERANCE 30.0/255.0 +#define STEEP_DIRECTION_THRESHOLD 2.2 +#define DOMINANT_DIRECTION_THRESHOLD 3.6 + +float DistYCbCr(const float3 pixA, const float3 pixB) +{ + const float3 w = float3(0.2627f, 0.6780f, 0.0593f); + const float scaleB = 0.5f / (1.0f - w.b); + const float scaleR = 0.5f / (1.0f - w.r); + float3 diff = pixA - pixB; + float Y = dot(diff, w); + float Cb = scaleB * (diff.b - Y); + float Cr = scaleR * (diff.r - Y); + + return sqrt( ((LUMINANCE_WEIGHT*Y) * (LUMINANCE_WEIGHT*Y)) + (Cb * Cb) + (Cr * Cr) ); +} + +bool IsPixEqual(const float3 pixA, const float3 pixB) +{ + return (DistYCbCr(pixA, pixB) < EQUAL_COLOR_TOLERANCE); +} + +bool IsBlendingNeeded(const int4 blend) +{ + return any(blend != int4(BLEND_NONE)); +} + +//--------------------------------------- +// Input Pixel Mapping: --|21|22|23|-- +// 19|06|07|08|09 +// 18|05|00|01|10 +// 17|04|03|02|11 +// --|15|14|13|-- +// +// Output Pixel Mapping: 00|01 +// 03|02 +kernel void pixel_scaler_2xBRZ(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src[25] = { + inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2,-2)).rgb + }; + + const float v[9] = { + reduce(src[0]), + reduce(src[1]), + reduce(src[2]), + reduce(src[3]), + reduce(src[4]), + reduce(src[5]), + reduce(src[6]), + reduce(src[7]), + reduce(src[8]) + }; + + int4 blendResult = int4(BLEND_NONE); + + // Preprocess corners + // Pixel Tap Mapping: --|--|--|--|-- + // --|--|07|08|-- + // --|05|00|01|10 + // --|04|03|02|11 + // --|--|14|13|-- + + // Corner (1, 1) + if ( !((v[0] == v[1] && v[3] == v[2]) || (v[0] == v[3] && v[1] == v[2])) ) + { + const float dist_03_01 = DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + DistYCbCr(src[14], src[ 2]) + DistYCbCr(src[ 2], src[10]) + (4.0 * DistYCbCr(src[ 3], src[ 1])); + const float dist_00_02 = DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[ 3], src[13]) + DistYCbCr(src[ 7], src[ 1]) + DistYCbCr(src[ 1], src[11]) + (4.0 * DistYCbCr(src[ 0], src[ 2])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_03_01) < dist_00_02; + + blendResult[2] = ((dist_03_01 < dist_00_02) && (v[0] != v[1]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + + // Pixel Tap Mapping: --|--|--|--|-- + // --|06|07|--|-- + // 18|05|00|01|-- + // 17|04|03|02|-- + // --|15|14|--|-- + // Corner (0, 1) + if ( !((v[5] == v[0] && v[4] == v[3]) || (v[5] == v[4] && v[0] == v[3])) ) + { + const float dist_04_00 = DistYCbCr(src[17], src[ 5]) + DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[15], src[ 3]) + DistYCbCr(src[ 3], src[ 1]) + (4.0 * DistYCbCr(src[ 4], src[ 0])); + const float dist_05_03 = DistYCbCr(src[18], src[ 4]) + DistYCbCr(src[ 4], src[14]) + DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + (4.0 * DistYCbCr(src[ 5], src[ 3])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_03) < dist_04_00; + + blendResult[3] = ((dist_04_00 > dist_05_03) && (v[0] != v[5]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|--|22|23|-- + // --|06|07|08|09 + // --|05|00|01|10 + // --|--|03|02|-- + // --|--|--|--|-- + // Corner (1, 0) + if ( !((v[7] == v[8] && v[0] == v[1]) || (v[7] == v[0] && v[8] == v[1])) ) + { + const float dist_00_08 = DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[ 7], src[23]) + DistYCbCr(src[ 3], src[ 1]) + DistYCbCr(src[ 1], src[ 9]) + (4.0 * DistYCbCr(src[ 0], src[ 8])); + const float dist_07_01 = DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + DistYCbCr(src[22], src[ 8]) + DistYCbCr(src[ 8], src[10]) + (4.0 * DistYCbCr(src[ 7], src[ 1])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_07_01) < dist_00_08; + + blendResult[1] = ((dist_00_08 > dist_07_01) && (v[0] != v[7]) && (v[0] != v[1])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|21|22|--|-- + // 19|06|07|08|-- + // 18|05|00|01|-- + // --|04|03|--|-- + // --|--|--|--|-- + // Corner (0, 0) + if ( !((v[6] == v[7] && v[5] == v[0]) || (v[6] == v[5] && v[7] == v[0])) ) + { + const float dist_05_07 = DistYCbCr(src[18], src[ 6]) + DistYCbCr(src[ 6], src[22]) + DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + (4.0 * DistYCbCr(src[ 5], src[ 7])); + const float dist_06_00 = DistYCbCr(src[19], src[ 5]) + DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[21], src[ 7]) + DistYCbCr(src[ 7], src[ 1]) + (4.0 * DistYCbCr(src[ 6], src[ 0])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_07) < dist_06_00; + + blendResult[0] = ((dist_05_07 < dist_06_00) && (v[0] != v[5]) && (v[0] != v[7])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + float3 dst[4] = { + src[0], + src[0], + src[0], + src[0] + }; + + // Scale pixel + if (IsBlendingNeeded(blendResult)) + { + float4 dist_01_04 = float4( DistYCbCr(src[1], src[4]), DistYCbCr(src[7], src[2]), DistYCbCr(src[5], src[8]), DistYCbCr(src[3], src[6]) ); + float4 dist_03_08 = float4( DistYCbCr(src[3], src[8]), DistYCbCr(src[1], src[6]), DistYCbCr(src[7], src[4]), DistYCbCr(src[5], src[2]) ); + bool4 haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08); + bool4 haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04); + bool4 needBlend = (blendResult.zyxw != int4(BLEND_NONE)); + bool4 doLineBlend = (blendResult.zyxw >= int4(BLEND_DOMINANT)); + float3 blendPix[4]; + + haveShallowLine[0] = haveShallowLine[0] && (v[0] != v[4]) && (v[5] != v[4]); + haveSteepLine[0] = haveSteepLine[0] && (v[0] != v[8]) && (v[7] != v[8]); + doLineBlend[0] = ( doLineBlend[0] || + !((blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && !IsPixEqual(src[0], src[2])) ) ); + blendPix[0] = ( DistYCbCr(src[0], src[1]) <= DistYCbCr(src[0], src[3]) ) ? src[1] : src[3]; + + dst[1] = mix(dst[1], blendPix[0], (needBlend[0] && doLineBlend[0] && haveSteepLine[0]) ? 0.25f : 0.00f); + dst[2] = mix(dst[2], blendPix[0], (needBlend[0]) ? ((doLineBlend[0]) ? ((haveShallowLine[0]) ? ((haveSteepLine[0]) ? 5.0f/6.0f : 0.75f) : ((haveSteepLine[0]) ? 0.75f : 0.50f)) : 1.0f - (M_PI_F/4.0f)) : 0.00f); + dst[3] = mix(dst[3], blendPix[0], (needBlend[0] && doLineBlend[0] && haveShallowLine[0]) ? 0.25f : 0.00f); + + haveShallowLine[1] = haveShallowLine[1] && (v[0] != v[2]) && (v[3] != v[2]); + haveSteepLine[1] = haveSteepLine[1] && (v[0] != v[6]) && (v[5] != v[6]); + doLineBlend[1] = ( doLineBlend[1] || + !((blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && !IsPixEqual(src[0], src[8])) ) ); + blendPix[1] = ( DistYCbCr(src[0], src[7]) <= DistYCbCr(src[0], src[1]) ) ? src[7] : src[1]; + + dst[0] = mix(dst[0], blendPix[1], (needBlend[1] && doLineBlend[1] && haveSteepLine[1]) ? 0.25f : 0.00f); + dst[1] = mix(dst[1], blendPix[1], (needBlend[1]) ? ((doLineBlend[1]) ? ((haveShallowLine[1]) ? ((haveSteepLine[1]) ? 5.0f/6.0f : 0.75f) : ((haveSteepLine[1]) ? 0.75f : 0.50f)) : 1.0f - (M_PI_F/4.0f)) : 0.00f); + dst[2] = mix(dst[2], blendPix[1], (needBlend[1] && doLineBlend[1] && haveShallowLine[1]) ? 0.25f : 0.00f); + + haveShallowLine[2] = haveShallowLine[2] && (v[0] != v[8]) && (v[1] != v[8]); + haveSteepLine[2] = haveSteepLine[2] && (v[0] != v[4]) && (v[3] != v[4]); + doLineBlend[2] = ( doLineBlend[2] || + !((blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && !IsPixEqual(src[0], src[6])) ) ); + blendPix[2] = ( DistYCbCr(src[0], src[5]) <= DistYCbCr(src[0], src[7]) ) ? src[5] : src[7]; + + dst[3] = mix(dst[3], blendPix[2], (needBlend[2] && doLineBlend[2] && haveSteepLine[2]) ? 0.25f : 0.00f); + dst[0] = mix(dst[0], blendPix[2], (needBlend[2]) ? ((doLineBlend[2]) ? ((haveShallowLine[2]) ? ((haveSteepLine[2]) ? 5.0f/6.0f : 0.75f) : ((haveSteepLine[2]) ? 0.75f : 0.50f)) : 1.0f - (M_PI_F/4.0f)) : 0.00f); + dst[1] = mix(dst[1], blendPix[2], (needBlend[2] && doLineBlend[2] && haveShallowLine[2]) ? 0.25f : 0.00f); + + haveShallowLine[3] = haveShallowLine[3] && (v[0] != v[6]) && (v[7] != v[6]); + haveSteepLine[3] = haveSteepLine[3] && (v[0] != v[2]) && (v[1] != v[2]); + doLineBlend[3] = ( doLineBlend[3] || + !((blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && !IsPixEqual(src[0], src[4])) ) ); + blendPix[3] = ( DistYCbCr(src[0], src[3]) <= DistYCbCr(src[0], src[5]) ) ? src[3] : src[5]; + + dst[2] = mix(dst[2], blendPix[3], (needBlend[3] && doLineBlend[3] && haveSteepLine[3]) ? 0.25f : 0.00f); + dst[3] = mix(dst[3], blendPix[3], (needBlend[3]) ? ((doLineBlend[3]) ? ((haveShallowLine[3]) ? ((haveSteepLine[3]) ? 5.0f/6.0f : 0.75f) : ((haveSteepLine[3]) ? 0.75f : 0.50f)) : 1.0f - (M_PI_F/4.0f)) : 0.00f); + dst[0] = mix(dst[0], blendPix[3], (needBlend[3] && doLineBlend[3] && haveShallowLine[3]) ? 0.25f : 0.00f); + } + + const uint2 outPosition = inPosition * 2; + outTexture.write( float4(dst[0], 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(dst[1], 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(dst[3], 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(dst[2], 1.0f), outPosition + uint2(1, 1) ); +} + +//--------------------------------------- +// Input Pixel Mapping: --|21|22|23|-- +// 19|06|07|08|09 +// 18|05|00|01|10 +// 17|04|03|02|11 +// --|15|14|13|-- +// +// Output Pixel Mapping: 06|07|08 +// 05|00|01 +// 04|03|02 +kernel void pixel_scaler_3xBRZ(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src[25] = { + inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2,-2)).rgb + }; + + const float v[9] = { + reduce(src[0]), + reduce(src[1]), + reduce(src[2]), + reduce(src[3]), + reduce(src[4]), + reduce(src[5]), + reduce(src[6]), + reduce(src[7]), + reduce(src[8]) + }; + + int4 blendResult = int4(BLEND_NONE); + + // Preprocess corners + // Pixel Tap Mapping: --|--|--|--|-- + // --|--|07|08|-- + // --|05|00|01|10 + // --|04|03|02|11 + // --|--|14|13|-- + + // Corner (1, 1) + if ( !((v[0] == v[1] && v[3] == v[2]) || (v[0] == v[3] && v[1] == v[2])) ) + { + const float dist_03_01 = DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + DistYCbCr(src[14], src[ 2]) + DistYCbCr(src[ 2], src[10]) + (4.0 * DistYCbCr(src[ 3], src[ 1])); + const float dist_00_02 = DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[ 3], src[13]) + DistYCbCr(src[ 7], src[ 1]) + DistYCbCr(src[ 1], src[11]) + (4.0 * DistYCbCr(src[ 0], src[ 2])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_03_01) < dist_00_02; + + blendResult[2] = ((dist_03_01 < dist_00_02) && (v[0] != v[1]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + + // Pixel Tap Mapping: --|--|--|--|-- + // --|06|07|--|-- + // 18|05|00|01|-- + // 17|04|03|02|-- + // --|15|14|--|-- + // Corner (0, 1) + if ( !((v[5] == v[0] && v[4] == v[3]) || (v[5] == v[4] && v[0] == v[3])) ) + { + const float dist_04_00 = DistYCbCr(src[17], src[ 5]) + DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[15], src[ 3]) + DistYCbCr(src[ 3], src[ 1]) + (4.0 * DistYCbCr(src[ 4], src[ 0])); + const float dist_05_03 = DistYCbCr(src[18], src[ 4]) + DistYCbCr(src[ 4], src[14]) + DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + (4.0 * DistYCbCr(src[ 5], src[ 3])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_03) < dist_04_00; + + blendResult[3] = ((dist_04_00 > dist_05_03) && (v[0] != v[5]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|--|22|23|-- + // --|06|07|08|09 + // --|05|00|01|10 + // --|--|03|02|-- + // --|--|--|--|-- + // Corner (1, 0) + if ( !((v[7] == v[8] && v[0] == v[1]) || (v[7] == v[0] && v[8] == v[1])) ) + { + const float dist_00_08 = DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[ 7], src[23]) + DistYCbCr(src[ 3], src[ 1]) + DistYCbCr(src[ 1], src[ 9]) + (4.0 * DistYCbCr(src[ 0], src[ 8])); + const float dist_07_01 = DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + DistYCbCr(src[22], src[ 8]) + DistYCbCr(src[ 8], src[10]) + (4.0 * DistYCbCr(src[ 7], src[ 1])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_07_01) < dist_00_08; + + blendResult[1] = ((dist_00_08 > dist_07_01) && (v[0] != v[7]) && (v[0] != v[1])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|21|22|--|-- + // 19|06|07|08|-- + // 18|05|00|01|-- + // --|04|03|--|-- + // --|--|--|--|-- + // Corner (0, 0) + if ( !((v[6] == v[7] && v[5] == v[0]) || (v[6] == v[5] && v[7] == v[0])) ) + { + const float dist_05_07 = DistYCbCr(src[18], src[ 6]) + DistYCbCr(src[ 6], src[22]) + DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + (4.0 * DistYCbCr(src[ 5], src[ 7])); + const float dist_06_00 = DistYCbCr(src[19], src[ 5]) + DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[21], src[ 7]) + DistYCbCr(src[ 7], src[ 1]) + (4.0 * DistYCbCr(src[ 6], src[ 0])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_07) < dist_06_00; + + blendResult[0] = ((dist_05_07 < dist_06_00) && (v[0] != v[5]) && (v[0] != v[7])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + float3 dst[9] = { + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0] + }; + + // Scale pixel + if (IsBlendingNeeded(blendResult)) + { + float4 dist_01_04 = float4( DistYCbCr(src[1], src[4]), DistYCbCr(src[7], src[2]), DistYCbCr(src[5], src[8]), DistYCbCr(src[3], src[6]) ); + float4 dist_03_08 = float4( DistYCbCr(src[3], src[8]), DistYCbCr(src[1], src[6]), DistYCbCr(src[7], src[4]), DistYCbCr(src[5], src[2]) ); + bool4 haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08); + bool4 haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04); + bool4 needBlend = (blendResult.zyxw != int4(BLEND_NONE)); + bool4 doLineBlend = (blendResult.zyxw >= int4(BLEND_DOMINANT)); + float3 blendPix[4]; + + haveShallowLine[0] = haveShallowLine[0] && (v[0] != v[4]) && (v[5] != v[4]); + haveSteepLine[0] = haveSteepLine[0] && (v[0] != v[8]) && (v[7] != v[8]); + doLineBlend[0] = ( doLineBlend[0] || + !((blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && !IsPixEqual(src[0], src[2])) ) ); + blendPix[0] = ( DistYCbCr(src[0], src[1]) <= DistYCbCr(src[0], src[3]) ) ? src[1] : src[3]; + + dst[1] = mix(dst[1], blendPix[0], (needBlend[0] && doLineBlend[0]) ? ((haveSteepLine[0]) ? 0.750f : ((haveShallowLine[0]) ? 0.250f : 0.125f)) : 0.000f); + dst[2] = mix(dst[2], blendPix[0], (needBlend[0]) ? ((doLineBlend[0]) ? ((!haveShallowLine[0] && !haveSteepLine[0]) ? 0.875f : 1.000f) : 0.4545939598) : 0.000f); + dst[3] = mix(dst[3], blendPix[0], (needBlend[0] && doLineBlend[0]) ? ((haveShallowLine[0]) ? 0.750f : ((haveSteepLine[0]) ? 0.250f : 0.125f)) : 0.000f); + dst[4] = mix(dst[4], blendPix[0], (needBlend[0] && doLineBlend[0] && haveShallowLine[0]) ? 0.250f : 0.000f); + dst[8] = mix(dst[8], blendPix[0], (needBlend[0] && doLineBlend[0] && haveSteepLine[0]) ? 0.250f : 0.000f); + + haveShallowLine[1] = haveShallowLine[1] && (v[0] != v[2]) && (v[3] != v[2]); + haveSteepLine[1] = haveSteepLine[1] && (v[0] != v[6]) && (v[5] != v[6]); + doLineBlend[1] = ( doLineBlend[1] || + !((blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && !IsPixEqual(src[0], src[8])) ) ); + blendPix[1] = ( DistYCbCr(src[0], src[7]) <= DistYCbCr(src[0], src[1]) ) ? src[7] : src[1]; + + dst[7] = mix(dst[7], blendPix[1], (needBlend[1] && doLineBlend[1]) ? ((haveSteepLine[1]) ? 0.750f : ((haveShallowLine[1]) ? 0.250f : 0.125f)) : 0.000f); + dst[8] = mix(dst[8], blendPix[1], (needBlend[1]) ? ((doLineBlend[1]) ? ((!haveShallowLine[1] && !haveSteepLine[1]) ? 0.875f : 1.000f) : 0.4545939598f) : 0.000f); + dst[1] = mix(dst[1], blendPix[1], (needBlend[1] && doLineBlend[1]) ? ((haveShallowLine[1]) ? 0.750f : ((haveSteepLine[1]) ? 0.250f : 0.125f)) : 0.000f); + dst[2] = mix(dst[2], blendPix[1], (needBlend[1] && doLineBlend[1] && haveShallowLine[1]) ? 0.250f : 0.000f); + dst[6] = mix(dst[6], blendPix[1], (needBlend[1] && doLineBlend[1] && haveSteepLine[1]) ? 0.250f : 0.000f); + + haveShallowLine[2] = haveShallowLine[2] && (v[0] != v[8]) && (v[1] != v[8]); + haveSteepLine[2] = haveSteepLine[2] && (v[0] != v[4]) && (v[3] != v[4]); + doLineBlend[2] = ( doLineBlend[2] || + !((blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && !IsPixEqual(src[0], src[6])) ) ); + blendPix[2] = ( DistYCbCr(src[0], src[5]) <= DistYCbCr(src[0], src[7]) ) ? src[5] : src[7]; + + dst[5] = mix(dst[5], blendPix[2], (needBlend[2] && doLineBlend[2]) ? ((haveSteepLine[2]) ? 0.750f : ((haveShallowLine[2]) ? 0.250f : 0.125f)) : 0.000f); + dst[6] = mix(dst[6], blendPix[2], (needBlend[2]) ? ((doLineBlend[2]) ? ((!haveShallowLine[2] && !haveSteepLine[2]) ? 0.875f : 1.000f) : 0.4545939598f) : 0.000f); + dst[7] = mix(dst[7], blendPix[2], (needBlend[2] && doLineBlend[2]) ? ((haveShallowLine[2]) ? 0.750f : ((haveSteepLine[2]) ? 0.250f : 0.125f)) : 0.000f); + dst[8] = mix(dst[8], blendPix[2], (needBlend[2] && doLineBlend[2] && haveShallowLine[2]) ? 0.250f : 0.000f); + dst[4] = mix(dst[4], blendPix[2], (needBlend[2] && doLineBlend[2] && haveSteepLine[2]) ? 0.250f : 0.000f); + + haveShallowLine[3] = haveShallowLine[3] && (v[0] != v[6]) && (v[7] != v[6]); + haveSteepLine[3] = haveSteepLine[3] && (v[0] != v[2]) && (v[1] != v[2]); + doLineBlend[3] = ( doLineBlend[3] || + !((blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && !IsPixEqual(src[0], src[4])) ) ); + blendPix[3] = ( DistYCbCr(src[0], src[3]) <= DistYCbCr(src[0], src[5]) ) ? src[3] : src[5]; + + dst[3] = mix(dst[3], blendPix[3], (needBlend[3] && doLineBlend[3]) ? ((haveSteepLine[3]) ? 0.750f : ((haveShallowLine[3]) ? 0.250f : 0.125f)) : 0.000f); + dst[4] = mix(dst[4], blendPix[3], (needBlend[3]) ? ((doLineBlend[3]) ? ((!haveShallowLine[3] && !haveSteepLine[3]) ? 0.875f : 1.000f) : 0.4545939598f) : 0.000f); + dst[5] = mix(dst[5], blendPix[3], (needBlend[3] && doLineBlend[3]) ? ((haveShallowLine[3]) ? 0.750f : ((haveSteepLine[3]) ? 0.250f : 0.125f)) : 0.000f); + dst[6] = mix(dst[6], blendPix[3], (needBlend[3] && doLineBlend[3] && haveShallowLine[3]) ? 0.250f : 0.000f); + dst[2] = mix(dst[2], blendPix[3], (needBlend[3] && doLineBlend[3] && haveSteepLine[3]) ? 0.250f : 0.000f); + } + + const uint2 outPosition = inPosition * 3; + outTexture.write( float4(dst[6], 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(dst[7], 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(dst[8], 1.0f), outPosition + uint2(2, 0) ); + outTexture.write( float4(dst[5], 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(dst[0], 1.0f), outPosition + uint2(1, 1) ); + outTexture.write( float4(dst[1], 1.0f), outPosition + uint2(2, 1) ); + outTexture.write( float4(dst[4], 1.0f), outPosition + uint2(0, 2) ); + outTexture.write( float4(dst[3], 1.0f), outPosition + uint2(1, 2) ); + outTexture.write( float4(dst[2], 1.0f), outPosition + uint2(2, 2) ); +} + +//--------------------------------------- +// Input Pixel Mapping: --|21|22|23|-- +// 19|06|07|08|09 +// 18|05|00|01|10 +// 17|04|03|02|11 +// --|15|14|13|-- +// +// Output Pixel Mapping: 00|01|02|03 +// 04|05|06|07 +// 08|09|10|11 +// 12|13|14|15 +kernel void pixel_scaler_4xBRZ(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src[25] = { + inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2,-2)).rgb + }; + + const float v[9] = { + reduce(src[0]), + reduce(src[1]), + reduce(src[2]), + reduce(src[3]), + reduce(src[4]), + reduce(src[5]), + reduce(src[6]), + reduce(src[7]), + reduce(src[8]) + }; + + int4 blendResult = int4(BLEND_NONE); + + // Preprocess corners + // Pixel Tap Mapping: --|--|--|--|-- + // --|--|07|08|-- + // --|05|00|01|10 + // --|04|03|02|11 + // --|--|14|13|-- + + // Corner (1, 1) + if ( !((v[0] == v[1] && v[3] == v[2]) || (v[0] == v[3] && v[1] == v[2])) ) + { + const float dist_03_01 = DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + DistYCbCr(src[14], src[ 2]) + DistYCbCr(src[ 2], src[10]) + (4.0 * DistYCbCr(src[ 3], src[ 1])); + const float dist_00_02 = DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[ 3], src[13]) + DistYCbCr(src[ 7], src[ 1]) + DistYCbCr(src[ 1], src[11]) + (4.0 * DistYCbCr(src[ 0], src[ 2])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_03_01) < dist_00_02; + + blendResult[2] = ((dist_03_01 < dist_00_02) && (v[0] != v[1]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + + // Pixel Tap Mapping: --|--|--|--|-- + // --|06|07|--|-- + // 18|05|00|01|-- + // 17|04|03|02|-- + // --|15|14|--|-- + // Corner (0, 1) + if ( !((v[5] == v[0] && v[4] == v[3]) || (v[5] == v[4] && v[0] == v[3])) ) + { + const float dist_04_00 = DistYCbCr(src[17], src[ 5]) + DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[15], src[ 3]) + DistYCbCr(src[ 3], src[ 1]) + (4.0 * DistYCbCr(src[ 4], src[ 0])); + const float dist_05_03 = DistYCbCr(src[18], src[ 4]) + DistYCbCr(src[ 4], src[14]) + DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + (4.0 * DistYCbCr(src[ 5], src[ 3])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_03) < dist_04_00; + + blendResult[3] = ((dist_04_00 > dist_05_03) && (v[0] != v[5]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|--|22|23|-- + // --|06|07|08|09 + // --|05|00|01|10 + // --|--|03|02|-- + // --|--|--|--|-- + // Corner (1, 0) + if ( !((v[7] == v[8] && v[0] == v[1]) || (v[7] == v[0] && v[8] == v[1])) ) + { + const float dist_00_08 = DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[ 7], src[23]) + DistYCbCr(src[ 3], src[ 1]) + DistYCbCr(src[ 1], src[ 9]) + (4.0 * DistYCbCr(src[ 0], src[ 8])); + const float dist_07_01 = DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + DistYCbCr(src[22], src[ 8]) + DistYCbCr(src[ 8], src[10]) + (4.0 * DistYCbCr(src[ 7], src[ 1])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_07_01) < dist_00_08; + + blendResult[1] = ((dist_00_08 > dist_07_01) && (v[0] != v[7]) && (v[0] != v[1])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|21|22|--|-- + // 19|06|07|08|-- + // 18|05|00|01|-- + // --|04|03|--|-- + // --|--|--|--|-- + // Corner (0, 0) + if ( !((v[6] == v[7] && v[5] == v[0]) || (v[6] == v[5] && v[7] == v[0])) ) + { + const float dist_05_07 = DistYCbCr(src[18], src[ 6]) + DistYCbCr(src[ 6], src[22]) + DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + (4.0 * DistYCbCr(src[ 5], src[ 7])); + const float dist_06_00 = DistYCbCr(src[19], src[ 5]) + DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[21], src[ 7]) + DistYCbCr(src[ 7], src[ 1]) + (4.0 * DistYCbCr(src[ 6], src[ 0])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_07) < dist_06_00; + + blendResult[0] = ((dist_05_07 < dist_06_00) && (v[0] != v[5]) && (v[0] != v[7])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + float3 dst[16] = { + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0] + }; + + // Scale pixel + if (IsBlendingNeeded(blendResult)) + { + const float4 dist_01_04 = float4( DistYCbCr(src[1], src[4]), DistYCbCr(src[7], src[2]), DistYCbCr(src[5], src[8]), DistYCbCr(src[3], src[6]) ); + const float4 dist_03_08 = float4( DistYCbCr(src[3], src[8]), DistYCbCr(src[1], src[6]), DistYCbCr(src[7], src[4]), DistYCbCr(src[5], src[2]) ); + const bool4 haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08); + const bool4 haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04); + const bool4 needBlend = (blendResult.zyxw != int4(BLEND_NONE)); + const bool4 doLineBlend = (blendResult.zyxw >= int4(BLEND_DOMINANT)); + float3 blendPix[4]; + + haveShallowLine[0] = haveShallowLine[0] && (v[0] != v[4]) && (v[5] != v[4]); + haveSteepLine[0] = haveSteepLine[0] && (v[0] != v[8]) && (v[7] != v[8]); + doLineBlend[0] = ( doLineBlend[0] || + !((blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && !IsPixEqual(src[0], src[2])) ) ); + blendPix[0] = ( DistYCbCr(src[0], src[1]) <= DistYCbCr(src[0], src[3]) ) ? src[1] : src[3]; + + haveShallowLine[1] = haveShallowLine[1] && (v[0] != v[2]) && (v[3] != v[2]); + haveSteepLine[1] = haveSteepLine[1] && (v[0] != v[6]) && (v[5] != v[6]); + doLineBlend[1] = ( doLineBlend[1] || + !((blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && !IsPixEqual(src[0], src[8])) ) ); + blendPix[1] = ( DistYCbCr(src[0], src[7]) <= DistYCbCr(src[0], src[1]) ) ? src[7] : src[1]; + + haveShallowLine[2] = haveShallowLine[2] && (v[0] != v[8]) && (v[1] != v[8]); + haveSteepLine[2] = haveSteepLine[2] && (v[0] != v[4]) && (v[3] != v[4]); + doLineBlend[2] = ( doLineBlend[2] || + !((blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && !IsPixEqual(src[0], src[6])) ) ); + blendPix[2] = ( DistYCbCr(src[0], src[5]) <= DistYCbCr(src[0], src[7]) ) ? src[5] : src[7]; + + haveShallowLine[3] = haveShallowLine[3] && (v[0] != v[6]) && (v[7] != v[6]); + haveSteepLine[3] = haveSteepLine[3] && (v[0] != v[2]) && (v[1] != v[2]); + doLineBlend[3] = ( doLineBlend[3] || + !((blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && !IsPixEqual(src[0], src[4])) ) ); + blendPix[3] = ( DistYCbCr(src[0], src[3]) <= DistYCbCr(src[0], src[5]) ) ? src[3] : src[5]; + + dst[ 0] = mix(dst[ 0], blendPix[1], (needBlend[1] && doLineBlend[1] && haveSteepLine[1]) ? 0.25f : 0.00f); + dst[ 0] = mix(dst[ 0], blendPix[2], (needBlend[2]) ? ((doLineBlend[2]) ? 1.00f : 0.6848532563f) : 0.00f); + dst[ 0] = mix(dst[ 0], blendPix[3], (needBlend[3] && doLineBlend[3] && haveShallowLine[3]) ? 0.25f : 0.00f); + + dst[ 1] = mix(dst[ 1], blendPix[1], (needBlend[1] && doLineBlend[1] && haveSteepLine[1]) ? 0.75f : 0.00f); + dst[ 1] = mix(dst[ 1], blendPix[2], (needBlend[2]) ? ((doLineBlend[2]) ? ((haveShallowLine[2]) ? 1.00f : ((haveSteepLine[2]) ? 0.75f : 0.50f)) : 0.08677704501f) : 0.00f); + + dst[ 2] = mix(dst[ 2], blendPix[1], (needBlend[1]) ? ((doLineBlend[1]) ? ((haveSteepLine[1]) ? 1.00f : ((haveShallowLine[1]) ? 0.75f : 0.50f)) : 0.08677704501f) : 0.00f); + dst[ 2] = mix(dst[ 2], blendPix[2], (needBlend[2] && doLineBlend[2] && haveShallowLine[2]) ? 0.75f : 0.00f); + + dst[ 3] = mix(dst[ 3], blendPix[0], (needBlend[0] && doLineBlend[0] && haveSteepLine[0]) ? 0.25f : 0.00f); + dst[ 3] = mix(dst[ 3], blendPix[1], (needBlend[1]) ? ((doLineBlend[1]) ? 1.00f : 0.6848532563f) : 0.00f); + dst[ 3] = mix(dst[ 3], blendPix[2], (needBlend[2] && doLineBlend[2] && haveShallowLine[2]) ? 0.25f : 0.00f); + + dst[ 4] = mix(dst[ 4], blendPix[2], (needBlend[2]) ? ((doLineBlend[2]) ? ((haveSteepLine[2]) ? 1.00f : ((haveShallowLine[2]) ? 0.75f : 0.50f)) : 0.08677704501f) : 0.00f); + dst[ 4] = mix(dst[ 4], blendPix[3], (needBlend[3] && doLineBlend[3] && haveShallowLine[3]) ? 0.75f : 0.00f); + + dst[ 5] = mix(dst[ 5], blendPix[2], (needBlend[2] && doLineBlend[2]) ? ((haveShallowLine[2]) ? ((haveSteepLine[2]) ? 1.0f/3.0f : 0.25f) : ((haveSteepLine[2]) ? 0.25f : 0.00f)) : 0.00f); + + dst[ 6] = mix(dst[ 6], blendPix[1], (needBlend[1] && doLineBlend[1]) ? ((haveShallowLine[1]) ? ((haveSteepLine[1]) ? 1.0f/3.0f : 0.25f) : ((haveSteepLine[1]) ? 0.25f : 0.00f)) : 0.00f); + + dst[ 7] = mix(dst[ 7], blendPix[0], (needBlend[0] && doLineBlend[0] && haveSteepLine[0]) ? 0.75f : 0.00f); + dst[ 7] = mix(dst[ 7], blendPix[1], (needBlend[1]) ? ((doLineBlend[1]) ? ((haveShallowLine[1]) ? 1.00f : ((haveSteepLine[1]) ? 0.75f : 0.50f)) : 0.08677704501f) : 0.00f); + + dst[ 8] = mix(dst[ 8], blendPix[2], (needBlend[2] && doLineBlend[2] && haveSteepLine[2]) ? 0.75f : 0.00f); + dst[ 8] = mix(dst[ 8], blendPix[3], (needBlend[3]) ? ((doLineBlend[3]) ? ((haveShallowLine[3]) ? 1.00f : ((haveSteepLine[3]) ? 0.75f : 0.50f)) : 0.08677704501f) : 0.00f); + + dst[ 9] = mix(dst[ 9], blendPix[3], (needBlend[3] && doLineBlend[3]) ? ((haveShallowLine[3]) ? ((haveSteepLine[3]) ? 1.0f/3.0f : 0.25f) : ((haveSteepLine[3]) ? 0.25f : 0.00f)) : 0.00f); + + dst[10] = mix(dst[10], blendPix[0], (needBlend[0] && doLineBlend[0]) ? ((haveShallowLine[0]) ? ((haveSteepLine[0]) ? 1.0f/3.0f : 0.25f) : ((haveSteepLine[0]) ? 0.25f : 0.00f)) : 0.00f); + + dst[11] = mix(dst[11], blendPix[0], (needBlend[0]) ? ((doLineBlend[0]) ? ((haveSteepLine[0]) ? 1.00f : ((haveShallowLine[0]) ? 0.75f : 0.50f)) : 0.08677704501f) : 0.00f); + dst[11] = mix(dst[11], blendPix[1], (needBlend[1] && doLineBlend[1] && haveShallowLine[1]) ? 0.75f : 0.00f); + + dst[12] = mix(dst[12], blendPix[0], (needBlend[0] && doLineBlend[0] && haveShallowLine[0]) ? 0.25f : 0.00f); + dst[12] = mix(dst[12], blendPix[2], (needBlend[2] && doLineBlend[2] && haveSteepLine[2]) ? 0.25f : 0.00f); + dst[12] = mix(dst[12], blendPix[3], (needBlend[3]) ? ((doLineBlend[3]) ? 1.00f : 0.6848532563f) : 0.00f); + + dst[13] = mix(dst[13], blendPix[0], (needBlend[0] && doLineBlend[0] && haveShallowLine[0]) ? 0.75f : 0.00f); + dst[13] = mix(dst[13], blendPix[3], (needBlend[3]) ? ((doLineBlend[3]) ? ((haveSteepLine[3]) ? 1.00f : ((haveShallowLine[3]) ? 0.75f : 0.50f)) : 0.08677704501f) : 0.00f); + + dst[14] = mix(dst[14], blendPix[0], (needBlend[0]) ? ((doLineBlend[0]) ? ((haveShallowLine[0]) ? 1.00f : ((haveSteepLine[0]) ? 0.75f : 0.50f)) : 0.08677704501f) : 0.00f); + dst[14] = mix(dst[14], blendPix[3], (needBlend[3] && doLineBlend[3] && haveSteepLine[3]) ? 0.75f : 0.00f); + + dst[15] = mix(dst[15], blendPix[0], (needBlend[0]) ? ((doLineBlend[0]) ? 1.00f : 0.6848532563f) : 0.00f); + dst[15] = mix(dst[15], blendPix[1], (needBlend[1] && doLineBlend[1] && haveShallowLine[1]) ? 0.25f : 0.00f); + dst[15] = mix(dst[15], blendPix[3], (needBlend[3] && doLineBlend[3] && haveSteepLine[3]) ? 0.25f : 0.00f); + } + + const uint2 outPosition = inPosition * 4; + outTexture.write( float4(dst[ 0], 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(dst[ 1], 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(dst[ 2], 1.0f), outPosition + uint2(2, 0) ); + outTexture.write( float4(dst[ 3], 1.0f), outPosition + uint2(3, 0) ); + outTexture.write( float4(dst[ 4], 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(dst[ 5], 1.0f), outPosition + uint2(1, 1) ); + outTexture.write( float4(dst[ 6], 1.0f), outPosition + uint2(2, 1) ); + outTexture.write( float4(dst[ 7], 1.0f), outPosition + uint2(3, 1) ); + outTexture.write( float4(dst[ 8], 1.0f), outPosition + uint2(0, 2) ); + outTexture.write( float4(dst[ 9], 1.0f), outPosition + uint2(1, 2) ); + outTexture.write( float4(dst[10], 1.0f), outPosition + uint2(2, 2) ); + outTexture.write( float4(dst[11], 1.0f), outPosition + uint2(3, 2) ); + outTexture.write( float4(dst[12], 1.0f), outPosition + uint2(0, 3) ); + outTexture.write( float4(dst[13], 1.0f), outPosition + uint2(1, 3) ); + outTexture.write( float4(dst[14], 1.0f), outPosition + uint2(2, 3) ); + outTexture.write( float4(dst[15], 1.0f), outPosition + uint2(3, 3) ); +} + +//--------------------------------------- +// Input Pixel Mapping: --|21|22|23|-- +// 19|06|07|08|09 +// 18|05|00|01|10 +// 17|04|03|02|11 +// --|15|14|13|-- +// +// Output Pixel Mapping: 00|01|02|03|04 +// 05|06|07|08|09 +// 10|11|12|13|14 +// 15|16|17|18|19 +// 20|21|22|23|24 +kernel void pixel_scaler_5xBRZ(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src[25] = { + inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2,-2)).rgb + }; + + const float v[9] = { + reduce(src[0]), + reduce(src[1]), + reduce(src[2]), + reduce(src[3]), + reduce(src[4]), + reduce(src[5]), + reduce(src[6]), + reduce(src[7]), + reduce(src[8]) + }; + + int4 blendResult = int4(BLEND_NONE); + + // Preprocess corners + // Pixel Tap Mapping: --|--|--|--|-- + // --|--|07|08|-- + // --|05|00|01|10 + // --|04|03|02|11 + // --|--|14|13|-- + + // Corner (1, 1) + if ( !((v[0] == v[1] && v[3] == v[2]) || (v[0] == v[3] && v[1] == v[2])) ) + { + const float dist_03_01 = DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + DistYCbCr(src[14], src[ 2]) + DistYCbCr(src[ 2], src[10]) + (4.0 * DistYCbCr(src[ 3], src[ 1])); + const float dist_00_02 = DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[ 3], src[13]) + DistYCbCr(src[ 7], src[ 1]) + DistYCbCr(src[ 1], src[11]) + (4.0 * DistYCbCr(src[ 0], src[ 2])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_03_01) < dist_00_02; + + blendResult[2] = ((dist_03_01 < dist_00_02) && (v[0] != v[1]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + + // Pixel Tap Mapping: --|--|--|--|-- + // --|06|07|--|-- + // 18|05|00|01|-- + // 17|04|03|02|-- + // --|15|14|--|-- + // Corner (0, 1) + if ( !((v[5] == v[0] && v[4] == v[3]) || (v[5] == v[4] && v[0] == v[3])) ) + { + const float dist_04_00 = DistYCbCr(src[17], src[ 5]) + DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[15], src[ 3]) + DistYCbCr(src[ 3], src[ 1]) + (4.0 * DistYCbCr(src[ 4], src[ 0])); + const float dist_05_03 = DistYCbCr(src[18], src[ 4]) + DistYCbCr(src[ 4], src[14]) + DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + (4.0 * DistYCbCr(src[ 5], src[ 3])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_03) < dist_04_00; + + blendResult[3] = ((dist_04_00 > dist_05_03) && (v[0] != v[5]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|--|22|23|-- + // --|06|07|08|09 + // --|05|00|01|10 + // --|--|03|02|-- + // --|--|--|--|-- + // Corner (1, 0) + if ( !((v[7] == v[8] && v[0] == v[1]) || (v[7] == v[0] && v[8] == v[1])) ) + { + const float dist_00_08 = DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[ 7], src[23]) + DistYCbCr(src[ 3], src[ 1]) + DistYCbCr(src[ 1], src[ 9]) + (4.0 * DistYCbCr(src[ 0], src[ 8])); + const float dist_07_01 = DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + DistYCbCr(src[22], src[ 8]) + DistYCbCr(src[ 8], src[10]) + (4.0 * DistYCbCr(src[ 7], src[ 1])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_07_01) < dist_00_08; + + blendResult[1] = ((dist_00_08 > dist_07_01) && (v[0] != v[7]) && (v[0] != v[1])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|21|22|--|-- + // 19|06|07|08|-- + // 18|05|00|01|-- + // --|04|03|--|-- + // --|--|--|--|-- + // Corner (0, 0) + if ( !((v[6] == v[7] && v[5] == v[0]) || (v[6] == v[5] && v[7] == v[0])) ) + { + const float dist_05_07 = DistYCbCr(src[18], src[ 6]) + DistYCbCr(src[ 6], src[22]) + DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + (4.0 * DistYCbCr(src[ 5], src[ 7])); + const float dist_06_00 = DistYCbCr(src[19], src[ 5]) + DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[21], src[ 7]) + DistYCbCr(src[ 7], src[ 1]) + (4.0 * DistYCbCr(src[ 6], src[ 0])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_07) < dist_06_00; + + blendResult[0] = ((dist_05_07 < dist_06_00) && (v[0] != v[5]) && (v[0] != v[7])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + float3 dst[25] = { + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0] + }; + + // Scale pixel + if (IsBlendingNeeded(blendResult)) + { + const float4 dist_01_04 = float4( DistYCbCr(src[1], src[4]), DistYCbCr(src[7], src[2]), DistYCbCr(src[5], src[8]), DistYCbCr(src[3], src[6]) ); + const float4 dist_03_08 = float4( DistYCbCr(src[3], src[8]), DistYCbCr(src[1], src[6]), DistYCbCr(src[7], src[4]), DistYCbCr(src[5], src[2]) ); + const bool4 haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08); + const bool4 haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04); + const bool4 needBlend = (blendResult.zyxw != int4(BLEND_NONE)); + const bool4 doLineBlend = (blendResult.zyxw >= int4(BLEND_DOMINANT)); + float3 blendPix[4]; + + haveShallowLine[0] = haveShallowLine[0] && (v[0] != v[4]) && (v[5] != v[4]); + haveSteepLine[0] = haveSteepLine[0] && (v[0] != v[8]) && (v[7] != v[8]); + doLineBlend[0] = ( doLineBlend[0] || + !((blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && !IsPixEqual(src[0], src[2])) ) ); + blendPix[0] = ( DistYCbCr(src[0], src[1]) <= DistYCbCr(src[0], src[3]) ) ? src[1] : src[3]; + + haveShallowLine[1] = haveShallowLine[1] && (v[0] != v[2]) && (v[3] != v[2]); + haveSteepLine[1] = haveSteepLine[1] && (v[0] != v[6]) && (v[5] != v[6]); + doLineBlend[1] = ( doLineBlend[1] || + !((blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && !IsPixEqual(src[0], src[8])) ) ); + blendPix[1] = ( DistYCbCr(src[0], src[7]) <= DistYCbCr(src[0], src[1]) ) ? src[7] : src[1]; + + haveShallowLine[2] = haveShallowLine[2] && (v[0] != v[8]) && (v[1] != v[8]); + haveSteepLine[2] = haveSteepLine[2] && (v[0] != v[4]) && (v[3] != v[4]); + doLineBlend[2] = ( doLineBlend[2] || + !((blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && !IsPixEqual(src[0], src[6])) ) ); + blendPix[2] = ( DistYCbCr(src[0], src[5]) <= DistYCbCr(src[0], src[7]) ) ? src[5] : src[7]; + + haveShallowLine[3] = haveShallowLine[3] && (v[0] != v[6]) && (v[7] != v[6]); + haveSteepLine[3] = haveSteepLine[3] && (v[0] != v[2]) && (v[1] != v[2]); + doLineBlend[3] = ( doLineBlend[3] || + !((blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && !IsPixEqual(src[0], src[4])) ) ); + blendPix[3] = ( DistYCbCr(src[0], src[3]) <= DistYCbCr(src[0], src[5]) ) ? src[3] : src[5]; + + dst[ 0] = mix(dst[ 0], blendPix[1], (needBlend[1] && doLineBlend[1] && haveSteepLine[1]) ? 0.250f : 0.000f); + dst[ 0] = mix(dst[ 0], blendPix[2], (needBlend[2]) ? ((doLineBlend[2]) ? 1.000f : 0.8631434088f) : 0.000f); + dst[ 0] = mix(dst[ 0], blendPix[3], (needBlend[3] && doLineBlend[3] && haveShallowLine[3]) ? 0.250f : 0.000f); + + dst[ 1] = mix(dst[ 1], blendPix[1], (needBlend[1] && doLineBlend[1] && haveSteepLine[1]) ? 0.750f : 0.000f); + dst[ 1] = mix(dst[ 1], blendPix[2], (needBlend[2]) ? ((doLineBlend[2]) ? ((!haveShallowLine[2] && !haveSteepLine[2]) ? 0.875f : 1.000f) : 0.2306749731f) : 0.000f); + + dst[ 2] = mix(dst[ 2], blendPix[1], (needBlend[1] && doLineBlend[1]) ? ((haveSteepLine[1]) ? 1.000f : ((haveShallowLine[1]) ? 0.250f : 0.125f)) : 0.000f); + dst[ 2] = mix(dst[ 2], blendPix[2], (needBlend[2] && doLineBlend[2]) ? ((haveShallowLine[2]) ? 1.000f : ((haveSteepLine[2]) ? 0.250f : 0.125f)) : 0.000f); + + dst[ 3] = mix(dst[ 3], blendPix[1], (needBlend[1]) ? ((doLineBlend[1]) ? ((!haveShallowLine[1] && !haveSteepLine[1]) ? 0.875f : 1.000f) : 0.2306749731f) : 0.000f); + dst[ 3] = mix(dst[ 3], blendPix[2], (needBlend[2] && doLineBlend[2] && haveShallowLine[2]) ? 0.750f : 0.000f); + + dst[ 4] = mix(dst[ 4], blendPix[0], (needBlend[0] && doLineBlend[0] && haveSteepLine[0]) ? 0.250f : 0.000f); + dst[ 4] = mix(dst[ 4], blendPix[1], (needBlend[1]) ? ((doLineBlend[1]) ? 1.000f : 0.8631434088f) : 0.000f); + dst[ 4] = mix(dst[ 4], blendPix[2], (needBlend[2] && doLineBlend[2] && haveShallowLine[2]) ? 0.250f : 0.000f); + + dst[ 5] = mix(dst[ 5], blendPix[2], (needBlend[2]) ? ((doLineBlend[2]) ? ((!haveShallowLine[2] && !haveSteepLine[2]) ? 0.875f : 1.000f) : 0.2306749731f) : 0.000f); + dst[ 5] = mix(dst[ 5], blendPix[3], (needBlend[3] && doLineBlend[3] && haveShallowLine[3]) ? 0.750f : 0.000f); + + dst[ 6] = mix(dst[ 6], blendPix[2], (needBlend[2] && doLineBlend[2]) ? ((haveShallowLine[2]) ? ((haveSteepLine[2]) ? 2.0f/3.0f : 0.750f) : ((haveSteepLine[2]) ? 0.750f : 0.125f)) : 0.000f); + + dst[ 7] = mix(dst[ 7], blendPix[1], (needBlend[1] && doLineBlend[1] && haveSteepLine[1]) ? 0.250f : 0.000f); + dst[ 7] = mix(dst[ 7], blendPix[2], (needBlend[2] && doLineBlend[2] && haveShallowLine[2]) ? 0.250f : 0.000f); + + dst[ 8] = mix(dst[ 8], blendPix[1], (needBlend[1] && doLineBlend[1]) ? ((haveShallowLine[1]) ? ((haveSteepLine[1]) ? 2.0f/3.0f : 0.750f) : ((haveSteepLine[1]) ? 0.750f : 0.125f)) : 0.000f); + + dst[ 9] = mix(dst[ 9], blendPix[0], (needBlend[0] && doLineBlend[0] && haveSteepLine[0]) ? 0.750f : 0.000f); + dst[ 9] = mix(dst[ 9], blendPix[1], (needBlend[1]) ? ((doLineBlend[1]) ? ((!haveShallowLine[1] && !haveSteepLine[1]) ? 0.875f : 1.000f) : 0.2306749731f) : 0.000f); + + dst[10] = mix(dst[10], blendPix[2], (needBlend[2] && doLineBlend[2]) ? ((haveSteepLine[2]) ? 1.000f : ((haveShallowLine[2]) ? 0.250f : 0.125f)) : 0.000f); + dst[10] = mix(dst[10], blendPix[3], (needBlend[3] && doLineBlend[3]) ? ((haveShallowLine[3]) ? 1.000f : ((haveSteepLine[3]) ? 0.250f : 0.125f)) : 0.000f); + + dst[11] = mix(dst[11], blendPix[2], (needBlend[2] && doLineBlend[2] && haveSteepLine[2]) ? 0.250f : 0.000f); + dst[11] = mix(dst[11], blendPix[3], (needBlend[3] && doLineBlend[3] && haveShallowLine[3]) ? 0.250f : 0.000f); + + dst[13] = mix(dst[13], blendPix[0], (needBlend[0] && doLineBlend[0] && haveSteepLine[0]) ? 0.250f : 0.000f); + dst[13] = mix(dst[13], blendPix[1], (needBlend[1] && doLineBlend[1] && haveShallowLine[1]) ? 0.250f : 0.000f); + + dst[14] = mix(dst[14], blendPix[0], (needBlend[0] && doLineBlend[0]) ? ((haveSteepLine[0]) ? 1.000f : ((haveShallowLine[0]) ? 0.250f : 0.125f)) : 0.000f); + dst[14] = mix(dst[14], blendPix[1], (needBlend[1] && doLineBlend[1]) ? ((haveShallowLine[1]) ? 1.000f : ((haveSteepLine[1]) ? 0.250f : 0.125f)) : 0.000f); + + dst[15] = mix(dst[15], blendPix[2], (needBlend[2] && doLineBlend[2] && haveSteepLine[2]) ? 0.750f : 0.000f); + dst[15] = mix(dst[15], blendPix[3], (needBlend[3]) ? ((doLineBlend[3]) ? ((!haveShallowLine[3] && !haveSteepLine[3]) ? 0.875f : 1.000f) : 0.2306749731f) : 0.000f); + + dst[16] = mix(dst[16], blendPix[3], (needBlend[3] && doLineBlend[3]) ? ((haveShallowLine[3]) ? ((haveSteepLine[3]) ? 2.0f/3.0f : 0.750f) : ((haveSteepLine[3]) ? 0.750f : 0.125f)) : 0.000f); + + dst[17] = mix(dst[17], blendPix[0], (needBlend[0] && doLineBlend[0] && haveShallowLine[0]) ? 0.250f : 0.000f); + dst[17] = mix(dst[17], blendPix[3], (needBlend[3] && doLineBlend[3] && haveSteepLine[3]) ? 0.250f : 0.000f); + + dst[18] = mix(dst[18], blendPix[0], (needBlend[0] && doLineBlend[0]) ? ((haveShallowLine[0]) ? ((haveSteepLine[0]) ? 2.0f/3.0f : 0.750f) : ((haveSteepLine[0]) ? 0.750f : 0.125f)) : 0.000f); + + dst[19] = mix(dst[19], blendPix[0], (needBlend[0]) ? ((doLineBlend[0]) ? ((!haveShallowLine[0] && !haveSteepLine[0]) ? 0.875f : 1.000f) : 0.2306749731f) : 0.000f); + dst[19] = mix(dst[19], blendPix[1], (needBlend[1] && doLineBlend[1] && haveShallowLine[1]) ? 0.750f : 0.000f); + + dst[20] = mix(dst[20], blendPix[0], (needBlend[0] && doLineBlend[0] && haveShallowLine[0]) ? 0.250f : 0.000f); + dst[20] = mix(dst[20], blendPix[2], (needBlend[2] && doLineBlend[2] && haveSteepLine[2]) ? 0.250f : 0.000f); + dst[20] = mix(dst[20], blendPix[3], (needBlend[3]) ? ((doLineBlend[3]) ? 1.000f : 0.8631434088f) : 0.000f); + + dst[21] = mix(dst[21], blendPix[0], (needBlend[0] && doLineBlend[0] && haveShallowLine[0]) ? 0.750f : 0.000f); + dst[21] = mix(dst[21], blendPix[3], (needBlend[3]) ? ((doLineBlend[3]) ? ((!haveShallowLine[3] && !haveSteepLine[3]) ? 0.875f : 1.000f) : 0.2306749731f) : 0.000f); + + dst[22] = mix(dst[22], blendPix[0], (needBlend[0] && doLineBlend[0]) ? ((haveShallowLine[0]) ? 1.000f : ((haveSteepLine[0]) ? 0.250f : 0.125f)) : 0.000f); + dst[22] = mix(dst[22], blendPix[3], (needBlend[3] && doLineBlend[3]) ? ((haveSteepLine[3]) ? 1.000f : ((haveShallowLine[3]) ? 0.250f : 0.125f)) : 0.000f); + + dst[23] = mix(dst[23], blendPix[0], (needBlend[0]) ? ((doLineBlend[0]) ? ((!haveShallowLine[0] && !haveSteepLine[0]) ? 0.875f : 1.000f) : 0.2306749731f) : 0.000f); + dst[23] = mix(dst[23], blendPix[3], (needBlend[3] && doLineBlend[3] && haveSteepLine[3]) ? 0.750f : 0.000f); + + dst[24] = mix(dst[24], blendPix[0], (needBlend[0]) ? ((doLineBlend[0]) ? 1.000f : 0.8631434088f) : 0.000f); + dst[24] = mix(dst[24], blendPix[1], (needBlend[1] && doLineBlend[1] && haveShallowLine[1]) ? 0.250f : 0.000f); + dst[24] = mix(dst[24], blendPix[3], (needBlend[3] && doLineBlend[3] && haveSteepLine[3]) ? 0.250f : 0.000f); + } + + const uint2 outPosition = inPosition * 5; + outTexture.write( float4(dst[ 0], 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(dst[ 1], 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(dst[ 2], 1.0f), outPosition + uint2(2, 0) ); + outTexture.write( float4(dst[ 3], 1.0f), outPosition + uint2(3, 0) ); + outTexture.write( float4(dst[ 4], 1.0f), outPosition + uint2(4, 0) ); + outTexture.write( float4(dst[ 5], 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(dst[ 6], 1.0f), outPosition + uint2(1, 1) ); + outTexture.write( float4(dst[ 7], 1.0f), outPosition + uint2(2, 1) ); + outTexture.write( float4(dst[ 8], 1.0f), outPosition + uint2(3, 1) ); + outTexture.write( float4(dst[ 9], 1.0f), outPosition + uint2(4, 1) ); + outTexture.write( float4(dst[10], 1.0f), outPosition + uint2(0, 2) ); + outTexture.write( float4(dst[11], 1.0f), outPosition + uint2(1, 2) ); + outTexture.write( float4(dst[12], 1.0f), outPosition + uint2(2, 2) ); + outTexture.write( float4(dst[13], 1.0f), outPosition + uint2(3, 2) ); + outTexture.write( float4(dst[14], 1.0f), outPosition + uint2(4, 2) ); + outTexture.write( float4(dst[15], 1.0f), outPosition + uint2(0, 3) ); + outTexture.write( float4(dst[16], 1.0f), outPosition + uint2(1, 3) ); + outTexture.write( float4(dst[17], 1.0f), outPosition + uint2(2, 3) ); + outTexture.write( float4(dst[18], 1.0f), outPosition + uint2(3, 3) ); + outTexture.write( float4(dst[19], 1.0f), outPosition + uint2(4, 3) ); + outTexture.write( float4(dst[20], 1.0f), outPosition + uint2(0, 4) ); + outTexture.write( float4(dst[21], 1.0f), outPosition + uint2(1, 4) ); + outTexture.write( float4(dst[22], 1.0f), outPosition + uint2(2, 4) ); + outTexture.write( float4(dst[23], 1.0f), outPosition + uint2(3, 4) ); + outTexture.write( float4(dst[24], 1.0f), outPosition + uint2(4, 4) ); +} + +//---------------------------------------- +// Input Pixel Mapping: --|21|22|23|-- +// 19|06|07|08|09 +// 18|05|00|01|10 +// 17|04|03|02|11 +// --|15|14|13|-- +// +// Output Pixel Mapping: 00|01|02|03|04|05 +// 06|07|08|09|10|11 +// 12|13|14|15|16|17 +// 18|19|20|21|22|23 +// 24|25|26|27|28|29 +// 30|31|32|33|34|35 +kernel void pixel_scaler_6xBRZ(const uint2 inPosition [[thread_position_in_grid]], + const texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]]) +{ + const float3 src[25] = { + inTexture.sample(genSampler, float2(inPosition), int2( 0, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2, 2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2, 1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2, 0)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2,-1)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-2,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2(-1,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 0,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 1,-2)).rgb, + inTexture.sample(genSampler, float2(inPosition), int2( 2,-2)).rgb + }; + + const float v[9] = { + reduce(src[0]), + reduce(src[1]), + reduce(src[2]), + reduce(src[3]), + reduce(src[4]), + reduce(src[5]), + reduce(src[6]), + reduce(src[7]), + reduce(src[8]) + }; + + int4 blendResult = int4(BLEND_NONE); + + // Preprocess corners + // Pixel Tap Mapping: --|--|--|--|-- + // --|--|07|08|-- + // --|05|00|01|10 + // --|04|03|02|11 + // --|--|14|13|-- + + // Corner (1, 1) + if ( !((v[0] == v[1] && v[3] == v[2]) || (v[0] == v[3] && v[1] == v[2])) ) + { + const float dist_03_01 = DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + DistYCbCr(src[14], src[ 2]) + DistYCbCr(src[ 2], src[10]) + (4.0 * DistYCbCr(src[ 3], src[ 1])); + const float dist_00_02 = DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[ 3], src[13]) + DistYCbCr(src[ 7], src[ 1]) + DistYCbCr(src[ 1], src[11]) + (4.0 * DistYCbCr(src[ 0], src[ 2])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_03_01) < dist_00_02; + + blendResult[2] = ((dist_03_01 < dist_00_02) && (v[0] != v[1]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + + // Pixel Tap Mapping: --|--|--|--|-- + // --|06|07|--|-- + // 18|05|00|01|-- + // 17|04|03|02|-- + // --|15|14|--|-- + // Corner (0, 1) + if ( !((v[5] == v[0] && v[4] == v[3]) || (v[5] == v[4] && v[0] == v[3])) ) + { + const float dist_04_00 = DistYCbCr(src[17], src[ 5]) + DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[15], src[ 3]) + DistYCbCr(src[ 3], src[ 1]) + (4.0 * DistYCbCr(src[ 4], src[ 0])); + const float dist_05_03 = DistYCbCr(src[18], src[ 4]) + DistYCbCr(src[ 4], src[14]) + DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + (4.0 * DistYCbCr(src[ 5], src[ 3])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_03) < dist_04_00; + + blendResult[3] = ((dist_04_00 > dist_05_03) && (v[0] != v[5]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|--|22|23|-- + // --|06|07|08|09 + // --|05|00|01|10 + // --|--|03|02|-- + // --|--|--|--|-- + // Corner (1, 0) + if ( !((v[7] == v[8] && v[0] == v[1]) || (v[7] == v[0] && v[8] == v[1])) ) + { + const float dist_00_08 = DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[ 7], src[23]) + DistYCbCr(src[ 3], src[ 1]) + DistYCbCr(src[ 1], src[ 9]) + (4.0 * DistYCbCr(src[ 0], src[ 8])); + const float dist_07_01 = DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + DistYCbCr(src[22], src[ 8]) + DistYCbCr(src[ 8], src[10]) + (4.0 * DistYCbCr(src[ 7], src[ 1])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_07_01) < dist_00_08; + + blendResult[1] = ((dist_00_08 > dist_07_01) && (v[0] != v[7]) && (v[0] != v[1])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|21|22|--|-- + // 19|06|07|08|-- + // 18|05|00|01|-- + // --|04|03|--|-- + // --|--|--|--|-- + // Corner (0, 0) + if ( !((v[6] == v[7] && v[5] == v[0]) || (v[6] == v[5] && v[7] == v[0])) ) + { + const float dist_05_07 = DistYCbCr(src[18], src[ 6]) + DistYCbCr(src[ 6], src[22]) + DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + (4.0 * DistYCbCr(src[ 5], src[ 7])); + const float dist_06_00 = DistYCbCr(src[19], src[ 5]) + DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[21], src[ 7]) + DistYCbCr(src[ 7], src[ 1]) + (4.0 * DistYCbCr(src[ 6], src[ 0])); + const bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_07) < dist_06_00; + + blendResult[0] = ((dist_05_07 < dist_06_00) && (v[0] != v[5]) && (v[0] != v[7])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + float3 dst[36] = { + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0], + src[0] + }; + + //float3 dst[36] = {src[0]}; + + // Scale pixel + if (IsBlendingNeeded(blendResult)) + { + const float4 dist_01_04 = float4( DistYCbCr(src[1], src[4]), DistYCbCr(src[7], src[2]), DistYCbCr(src[5], src[8]), DistYCbCr(src[3], src[6]) ); + const float4 dist_03_08 = float4( DistYCbCr(src[3], src[8]), DistYCbCr(src[1], src[6]), DistYCbCr(src[7], src[4]), DistYCbCr(src[5], src[2]) ); + const bool4 haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08); + const bool4 haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04); + const bool4 needBlend = (blendResult.zyxw != int4(BLEND_NONE)); + const bool4 doLineBlend = (blendResult.zyxw >= int4(BLEND_DOMINANT)); + float3 blendPix[4]; + + haveShallowLine[0] = haveShallowLine[0] && (v[0] != v[4]) && (v[5] != v[4]); + haveSteepLine[0] = haveSteepLine[0] && (v[0] != v[8]) && (v[7] != v[8]); + doLineBlend[0] = ( doLineBlend[0] || + !((blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && !IsPixEqual(src[0], src[2])) ) ); + blendPix[0] = ( DistYCbCr(src[0], src[1]) <= DistYCbCr(src[0], src[3]) ) ? src[1] : src[3]; + + haveShallowLine[1] = haveShallowLine[1] && (v[0] != v[2]) && (v[3] != v[2]); + haveSteepLine[1] = haveSteepLine[1] && (v[0] != v[6]) && (v[5] != v[6]); + doLineBlend[1] = ( doLineBlend[1] || + !((blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && !IsPixEqual(src[0], src[8])) ) ); + blendPix[1] = ( DistYCbCr(src[0], src[7]) <= DistYCbCr(src[0], src[1]) ) ? src[7] : src[1]; + + haveShallowLine[2] = haveShallowLine[2] && (v[0] != v[8]) && (v[1] != v[8]); + haveSteepLine[2] = haveSteepLine[2] && (v[0] != v[4]) && (v[3] != v[4]); + doLineBlend[2] = ( doLineBlend[2] || + !((blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && !IsPixEqual(src[0], src[6])) ) ); + blendPix[2] = ( DistYCbCr(src[0], src[5]) <= DistYCbCr(src[0], src[7]) ) ? src[5] : src[7]; + + haveShallowLine[3] = haveShallowLine[3] && (v[0] != v[6]) && (v[7] != v[6]); + haveSteepLine[3] = haveSteepLine[3] && (v[0] != v[2]) && (v[1] != v[2]); + doLineBlend[3] = ( doLineBlend[3] || + !((blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && !IsPixEqual(src[0], src[4])) ) ); + blendPix[3] = ( DistYCbCr(src[0], src[3]) <= DistYCbCr(src[0], src[5]) ) ? src[3] : src[5]; + + dst[ 0] = mix(dst[ 0], blendPix[1], (needBlend[1] && doLineBlend[1] && haveSteepLine[1]) ? 0.250f : 0.000f); + dst[ 0] = mix(dst[ 0], blendPix[2], (needBlend[2]) ? ((doLineBlend[2]) ? 1.000f : 0.9711013910f) : 0.000f); + dst[ 0] = mix(dst[ 0], blendPix[3], (needBlend[3] && doLineBlend[3] && haveShallowLine[3]) ? 0.250f : 0.000f); + + dst[ 1] = mix(dst[ 1], blendPix[1], (needBlend[1] && doLineBlend[1] && haveSteepLine[1]) ? 0.750f : 0.000f); + dst[ 1] = mix(dst[ 1], blendPix[2], (needBlend[2]) ? ((doLineBlend[2]) ? 1.000f : 0.4236372243f) : 0.000f); + + dst[ 2] = mix(dst[ 2], blendPix[1], (needBlend[1] && doLineBlend[1] && haveSteepLine[1]) ? 1.000f : 0.000f); + dst[ 2] = mix(dst[ 2], blendPix[2], (needBlend[2]) ? ((doLineBlend[2]) ? ((haveShallowLine[2]) ? 1.000f : ((haveSteepLine[2]) ? 0.750f : 0.500f)) : 0.05652034508f) : 0.000f); + + dst[ 3] = mix(dst[ 3], blendPix[1], (needBlend[1]) ? ((doLineBlend[1]) ? ((haveSteepLine[1]) ? 1.000f : ((haveShallowLine[1]) ? 0.750f : 0.500f)) : 0.05652034508f) : 0.000f); + dst[ 3] = mix(dst[ 3], blendPix[2], (needBlend[2] && doLineBlend[2] && haveShallowLine[2]) ? 1.000f : 0.000f); + + dst[ 4] = mix(dst[ 4], blendPix[1], (needBlend[1]) ? ((doLineBlend[1]) ? 1.000f : 0.4236372243f) : 0.000f); + dst[ 4] = mix(dst[ 4], blendPix[2], (needBlend[2] && doLineBlend[2] && haveShallowLine[2]) ? 0.750f : 0.000f); + + dst[ 5] = mix(dst[ 5], blendPix[0], (needBlend[0] && doLineBlend[0] && haveSteepLine[0]) ? 0.250f : 0.000f); + dst[ 5] = mix(dst[ 5], blendPix[1], (needBlend[1]) ? ((doLineBlend[1]) ? 1.000f : 0.9711013910f) : 0.000f); + dst[ 5] = mix(dst[ 5], blendPix[2], (needBlend[2] && doLineBlend[2] && haveShallowLine[2]) ? 0.250f : 0.000f); + + dst[ 6] = mix(dst[ 6], blendPix[2], (needBlend[2]) ? ((doLineBlend[2]) ? 1.000f : 0.4236372243f) : 0.000f); + dst[ 6] = mix(dst[ 6], blendPix[3], (needBlend[3] && doLineBlend[3] && haveShallowLine[3]) ? 0.750f : 0.000f); + + dst[ 7] = mix(dst[ 7], blendPix[2], (needBlend[2] && doLineBlend[2]) ? ((!haveShallowLine[2] && !haveSteepLine[2]) ? 0.500f : 1.000f) : 0.000f); + + dst[ 8] = mix(dst[ 8], blendPix[1], (needBlend[1] && doLineBlend[1] && haveSteepLine[1]) ? 0.250f : 0.000f); + dst[ 8] = mix(dst[ 8], blendPix[2], (needBlend[2] && doLineBlend[2]) ? ((haveShallowLine[2]) ? 0.750f : ((haveSteepLine[2]) ? 0.250f : 0.000f)) : 0.000f); + + dst[ 9] = mix(dst[ 9], blendPix[1], (needBlend[1] && doLineBlend[1]) ? ((haveSteepLine[1]) ? 0.750f : ((haveShallowLine[1]) ? 0.250f : 0.000f)) : 0.000f); + dst[ 9] = mix(dst[ 9], blendPix[2], (needBlend[2] && doLineBlend[2] && haveShallowLine[2]) ? 0.250f : 0.000f); + + dst[10] = mix(dst[10], blendPix[1], (needBlend[1] && doLineBlend[1]) ? ((!haveShallowLine[1] && !haveSteepLine[1]) ? 0.500f : 1.000f) : 0.000f); + + dst[11] = mix(dst[11], blendPix[0], (needBlend[0] && doLineBlend[0] && haveSteepLine[0]) ? 0.750f : 0.000f); + dst[11] = mix(dst[11], blendPix[1], (needBlend[1]) ? ((doLineBlend[1]) ? 1.000f : 0.4236372243f) : 0.000); + + dst[12] = mix(dst[12], blendPix[2], (needBlend[2]) ? ((doLineBlend[2]) ? ((haveSteepLine[2]) ? 1.000f : ((haveShallowLine[2]) ? 0.750f : 0.500f)) : 0.05652034508f) : 0.000f); + dst[12] = mix(dst[12], blendPix[3], (needBlend[3] && doLineBlend[3] && haveShallowLine[3]) ? 1.000f : 0.000f); + + dst[13] = mix(dst[13], blendPix[2], (needBlend[2] && doLineBlend[2]) ? ((haveSteepLine[2]) ? 0.750f : ((haveShallowLine[2]) ? 0.250f : 0.000f)) : 0.000f); + dst[13] = mix(dst[13], blendPix[3], (needBlend[3] && doLineBlend[3] && haveShallowLine[3]) ? 0.250f : 0.000f); + + dst[16] = mix(dst[16], blendPix[0], (needBlend[0] && doLineBlend[0] && haveSteepLine[0]) ? 0.250f : 0.000f); + dst[16] = mix(dst[16], blendPix[1], (needBlend[1] && doLineBlend[1]) ? ((haveShallowLine[1]) ? 0.750f : ((haveSteepLine[1]) ? 0.250f : 0.000f)) : 0.000f); + + dst[17] = mix(dst[17], blendPix[0], (needBlend[0] && doLineBlend[0] && haveSteepLine[0]) ? 1.000f : 0.000f); + dst[17] = mix(dst[17], blendPix[1], (needBlend[1]) ? ((doLineBlend[1]) ? ((haveShallowLine[1]) ? 1.000f : ((haveSteepLine[1]) ? 0.750f : 0.500f)) : 0.05652034508f) : 0.000f); + + dst[18] = mix(dst[18], blendPix[2], (needBlend[2] && doLineBlend[2] && haveSteepLine[2]) ? 1.000f : 0.000f); + dst[18] = mix(dst[18], blendPix[3], (needBlend[3]) ? ((doLineBlend[3]) ? ((haveShallowLine[3]) ? 1.000f : ((haveSteepLine[3]) ? 0.750f : 0.500f)) : 0.05652034508f) : 0.000f); + + dst[19] = mix(dst[19], blendPix[2], (needBlend[2] && doLineBlend[2] && haveSteepLine[2]) ? 0.250f : 0.000f); + dst[19] = mix(dst[19], blendPix[3], (needBlend[3] && doLineBlend[3]) ? ((haveShallowLine[3]) ? 0.750f : ((haveSteepLine[3]) ? 0.250f : 0.000f)) : 0.000f); + + dst[22] = mix(dst[22], blendPix[0], (needBlend[0] && doLineBlend[0]) ? ((haveSteepLine[0]) ? 0.750f : ((haveShallowLine[0]) ? 0.250f : 0.000f)) : 0.000f); + dst[22] = mix(dst[22], blendPix[1], (needBlend[1] && doLineBlend[1] && haveShallowLine[1]) ? 0.250f : 0.000f); + + dst[23] = mix(dst[23], blendPix[0], (needBlend[0]) ? ((doLineBlend[0]) ? ((haveSteepLine[0]) ? 1.000f : ((haveShallowLine[0]) ? 0.750f : 0.500f)) : 0.05652034508f) : 0.000f); + dst[23] = mix(dst[23], blendPix[1], (needBlend[1] && doLineBlend[1] && haveShallowLine[1]) ? 1.000f : 0.000f); + + dst[24] = mix(dst[24], blendPix[2], (needBlend[2] && doLineBlend[2] && haveSteepLine[2]) ? 0.750f : 0.000f); + dst[24] = mix(dst[24], blendPix[3], (needBlend[3]) ? ((doLineBlend[3]) ? 1.000f : 0.4236372243f) : 0.000f); + + dst[25] = mix(dst[25], blendPix[3], (needBlend[3] && doLineBlend[3]) ? ((!haveShallowLine[3] && !haveSteepLine[3]) ? 0.500f : 1.000f) : 0.000f); + + dst[26] = mix(dst[26], blendPix[0], (needBlend[0] && doLineBlend[0] && haveShallowLine[0]) ? 0.250f : 0.000f); + dst[26] = mix(dst[26], blendPix[3], (needBlend[3] && doLineBlend[3]) ? ((haveSteepLine[3]) ? 0.750f : ((haveShallowLine[3]) ? 0.250f : 0.000f)) : 0.000f); + + dst[27] = mix(dst[27], blendPix[0], (needBlend[0] && doLineBlend[0]) ? ((haveShallowLine[0]) ? 0.750f : ((haveSteepLine[0]) ? 0.250f : 0.000f)) : 0.000f); + dst[27] = mix(dst[27], blendPix[3], (needBlend[3] && doLineBlend[3] && haveSteepLine[3]) ? 0.250f : 0.000f); + + dst[28] = mix(dst[28], blendPix[0], (needBlend[0] && doLineBlend[0]) ? ((!haveShallowLine[0] && !haveSteepLine[0]) ? 0.500f : 1.000f) : 0.000f); + + dst[29] = mix(dst[29], blendPix[0], (needBlend[0]) ? ((doLineBlend[0]) ? 1.000f : 0.4236372243f) : 0.000f); + dst[29] = mix(dst[29], blendPix[1], (needBlend[1] && doLineBlend[1] && haveShallowLine[1]) ? 0.750f : 0.000f); + + dst[30] = mix(dst[30], blendPix[0], (needBlend[0] && doLineBlend[0] && haveShallowLine[0]) ? 0.250f : 0.000f); + dst[30] = mix(dst[30], blendPix[2], (needBlend[2] && doLineBlend[2] && haveSteepLine[2]) ? 0.250f : 0.000f); + dst[30] = mix(dst[30], blendPix[3], (needBlend[3]) ? ((doLineBlend[3]) ? 1.000f : 0.9711013910f) : 0.000f); + + dst[31] = mix(dst[31], blendPix[0], (needBlend[0] && doLineBlend[0] && haveShallowLine[0]) ? 0.750f : 0.000f); + dst[31] = mix(dst[31], blendPix[3], (needBlend[3]) ? ((doLineBlend[3]) ? 1.000f : 0.4236372243f) : 0.000f); + + dst[32] = mix(dst[32], blendPix[0], (needBlend[0] && doLineBlend[0] && haveShallowLine[0]) ? 1.000f : 0.000f); + dst[32] = mix(dst[32], blendPix[3], (needBlend[3]) ? ((doLineBlend[3]) ? ((haveSteepLine[3]) ? 1.000f : ((haveShallowLine[3]) ? 0.750f : 0.500f)) : 0.05652034508f) : 0.000f); + + dst[33] = mix(dst[33], blendPix[0], (needBlend[0]) ? ((doLineBlend[0]) ? ((haveShallowLine[0]) ? 1.000f : ((haveSteepLine[0]) ? 0.750f : 0.500f)) : 0.05652034508f) : 0.000f); + dst[33] = mix(dst[33], blendPix[3], (needBlend[3] && doLineBlend[3] && haveSteepLine[3]) ? 1.000f : 0.000f); + + dst[34] = mix(dst[34], blendPix[0], (needBlend[0]) ? ((doLineBlend[0]) ? 1.000f : 0.4236372243f) : 0.000f); + dst[34] = mix(dst[34], blendPix[3], (needBlend[3] && doLineBlend[3] && haveSteepLine[3]) ? 0.750f : 0.000f); + + dst[35] = mix(dst[35], blendPix[0], (needBlend[0]) ? ((doLineBlend[0]) ? 1.000f : 0.9711013910f) : 0.000f); + dst[35] = mix(dst[35], blendPix[1], (needBlend[1] && doLineBlend[1] && haveShallowLine[1]) ? 0.250f : 0.000f); + dst[35] = mix(dst[35], blendPix[3], (needBlend[3] && doLineBlend[3] && haveSteepLine[3]) ? 0.250f : 0.000f); + } + + const uint2 outPosition = inPosition * 6; + outTexture.write( float4(dst[ 0], 1.0f), outPosition + uint2(0, 0) ); + outTexture.write( float4(dst[ 1], 1.0f), outPosition + uint2(1, 0) ); + outTexture.write( float4(dst[ 2], 1.0f), outPosition + uint2(2, 0) ); + outTexture.write( float4(dst[ 3], 1.0f), outPosition + uint2(3, 0) ); + outTexture.write( float4(dst[ 4], 1.0f), outPosition + uint2(4, 0) ); + outTexture.write( float4(dst[ 5], 1.0f), outPosition + uint2(5, 0) ); + outTexture.write( float4(dst[ 6], 1.0f), outPosition + uint2(0, 1) ); + outTexture.write( float4(dst[ 7], 1.0f), outPosition + uint2(1, 1) ); + outTexture.write( float4(dst[ 8], 1.0f), outPosition + uint2(2, 1) ); + outTexture.write( float4(dst[ 9], 1.0f), outPosition + uint2(3, 1) ); + outTexture.write( float4(dst[10], 1.0f), outPosition + uint2(4, 1) ); + outTexture.write( float4(dst[11], 1.0f), outPosition + uint2(5, 1) ); + outTexture.write( float4(dst[12], 1.0f), outPosition + uint2(0, 2) ); + outTexture.write( float4(dst[13], 1.0f), outPosition + uint2(1, 2) ); + outTexture.write( float4(dst[14], 1.0f), outPosition + uint2(2, 2) ); + outTexture.write( float4(dst[15], 1.0f), outPosition + uint2(3, 2) ); + outTexture.write( float4(dst[16], 1.0f), outPosition + uint2(4, 2) ); + outTexture.write( float4(dst[17], 1.0f), outPosition + uint2(5, 2) ); + outTexture.write( float4(dst[18], 1.0f), outPosition + uint2(0, 3) ); + outTexture.write( float4(dst[19], 1.0f), outPosition + uint2(1, 3) ); + outTexture.write( float4(dst[20], 1.0f), outPosition + uint2(2, 3) ); + outTexture.write( float4(dst[21], 1.0f), outPosition + uint2(3, 3) ); + outTexture.write( float4(dst[22], 1.0f), outPosition + uint2(4, 3) ); + outTexture.write( float4(dst[23], 1.0f), outPosition + uint2(5, 3) ); + outTexture.write( float4(dst[24], 1.0f), outPosition + uint2(0, 4) ); + outTexture.write( float4(dst[25], 1.0f), outPosition + uint2(1, 4) ); + outTexture.write( float4(dst[26], 1.0f), outPosition + uint2(2, 4) ); + outTexture.write( float4(dst[27], 1.0f), outPosition + uint2(3, 4) ); + outTexture.write( float4(dst[28], 1.0f), outPosition + uint2(4, 4) ); + outTexture.write( float4(dst[29], 1.0f), outPosition + uint2(5, 4) ); + outTexture.write( float4(dst[30], 1.0f), outPosition + uint2(0, 5) ); + outTexture.write( float4(dst[31], 1.0f), outPosition + uint2(1, 5) ); + outTexture.write( float4(dst[32], 1.0f), outPosition + uint2(2, 5) ); + outTexture.write( float4(dst[33], 1.0f), outPosition + uint2(3, 5) ); + outTexture.write( float4(dst[34], 1.0f), outPosition + uint2(4, 5) ); + outTexture.write( float4(dst[35], 1.0f), outPosition + uint2(5, 5) ); +}