Ares64 Performance Core (#3149)
* prep for performance ares64 core, needs work unmanaged side * get this going * rebuild this * apparently build didnt get cp'd? need to investigate * fix build, other shit * suppress these warnings * tweaks and builds * apparently bizinvoker doesnt like having LibAres64 class shared between non-waterbox and waterboxed, so split it. also states for performance core * builds * fix this option, describe supersampling properly * penguin64
This commit is contained in:
parent
05f11be191
commit
655ed7949e
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -27,7 +27,7 @@ namespace BizHawk.Client.Common
|
|||
(new[] { VSystemID.Raw.SNES },
|
||||
new[] { CoreNames.Faust, CoreNames.Snes9X, CoreNames.Bsnes, CoreNames.Bsnes115 }),
|
||||
(new[] { VSystemID.Raw.N64 },
|
||||
new[] { CoreNames.Mupen64Plus, CoreNames.Ares64, }),
|
||||
new[] { CoreNames.Mupen64Plus, CoreNames.Ares64Performance, CoreNames.Ares64Accuracy }),
|
||||
(new[] { VSystemID.Raw.SGB },
|
||||
new[] { CoreNames.Gambatte, CoreNames.Bsnes, CoreNames.Bsnes115}),
|
||||
(new[] { VSystemID.Raw.GB, VSystemID.Raw.GBC },
|
||||
|
|
|
@ -3,7 +3,7 @@ using System.ComponentModel;
|
|||
using BizHawk.Common;
|
||||
using BizHawk.Emulation.Common;
|
||||
|
||||
namespace BizHawk.Emulation.Cores.Consoles.Nintendo.Ares64
|
||||
namespace BizHawk.Emulation.Cores.Consoles.Nintendo.Ares64.Accuracy
|
||||
{
|
||||
public partial class Ares64 : ISettable<object, Ares64.Ares64SyncSettings>
|
||||
{
|
|
@ -6,13 +6,13 @@ using BizHawk.Emulation.Common;
|
|||
using BizHawk.Emulation.Cores.Properties;
|
||||
using BizHawk.Emulation.Cores.Waterbox;
|
||||
|
||||
namespace BizHawk.Emulation.Cores.Consoles.Nintendo.Ares64
|
||||
namespace BizHawk.Emulation.Cores.Consoles.Nintendo.Ares64.Accuracy
|
||||
{
|
||||
[PortedCore(CoreNames.Ares64, "ares team, Near", "v126", "https://ares-emulator.github.io/", isReleased: false)]
|
||||
[PortedCore(CoreNames.Ares64Accuracy, "ares team, Near", "v126", "https://ares-emulator.github.io/", isReleased: false)]
|
||||
[ServiceNotApplicable(new[] { typeof(IDriveLight), })]
|
||||
public partial class Ares64 : WaterboxCore, IRegionable
|
||||
{
|
||||
private readonly LibAres64 _core;
|
||||
private readonly LibAres64Accuracy _core;
|
||||
|
||||
[CoreConstructor(VSystemID.Raw.N64)]
|
||||
public Ares64(CoreLoadParameters<object, Ares64SyncSettings> lp)
|
||||
|
@ -40,7 +40,7 @@ namespace BizHawk.Emulation.Cores.Consoles.Nintendo.Ares64
|
|||
|
||||
N64Controller = CreateControllerDefinition(ControllerSettings);
|
||||
|
||||
_core = PreInit<LibAres64>(new WaterboxOptions
|
||||
_core = PreInit<LibAres64Accuracy>(new WaterboxOptions
|
||||
{
|
||||
Filename = "ares64.wbx",
|
||||
SbrkHeapSizeKB = 2 * 1024,
|
||||
|
@ -68,18 +68,31 @@ namespace BizHawk.Emulation.Cores.Consoles.Nintendo.Ares64
|
|||
VsyncDenominator = 1;
|
||||
}
|
||||
|
||||
LibAres64.LoadFlags loadFlags = 0;
|
||||
if (_syncSettings.RestrictAnalogRange)
|
||||
loadFlags |= LibAres64.LoadFlags.RestrictAnalogRange;
|
||||
if (pal)
|
||||
loadFlags |= LibAres64.LoadFlags.Pal;
|
||||
|
||||
var pif = Util.DecompressGzipFile(new MemoryStream(pal ? Resources.PIF_PAL_ROM.Value : Resources.PIF_NTSC_ROM.Value));
|
||||
|
||||
_exe.AddReadonlyFile(pif, pal ? "pif.pal.rom" : "pif.ntsc.rom");
|
||||
_exe.AddReadonlyFile(rom, "program.rom");
|
||||
|
||||
if (!_core.Init(ControllerSettings, _syncSettings.RestrictAnalogRange, pal))
|
||||
unsafe
|
||||
{
|
||||
fixed (byte* pifPtr = pif, romPtr = rom)
|
||||
{
|
||||
var loadData = new LibAres64.LoadData()
|
||||
{
|
||||
PifData = (IntPtr)pifPtr,
|
||||
PifLen = pif.Length,
|
||||
RomData = (IntPtr)romPtr,
|
||||
RomLen = rom.Length,
|
||||
};
|
||||
if (!_core.Init(loadData, ControllerSettings, loadFlags))
|
||||
{
|
||||
throw new InvalidOperationException("Init returned false!");
|
||||
}
|
||||
|
||||
_exe.RemoveReadonlyFile(pal ? "pif.pal.rom" : "pif.ntsc.rom");
|
||||
_exe.RemoveReadonlyFile("program.rom");
|
||||
}
|
||||
}
|
||||
|
||||
PostInit();
|
||||
DeterministicEmulation = true;
|
|
@ -0,0 +1,77 @@
|
|||
using System;
|
||||
using System.ComponentModel;
|
||||
|
||||
using Newtonsoft.Json;
|
||||
|
||||
using BizHawk.Common;
|
||||
using BizHawk.Emulation.Common;
|
||||
|
||||
namespace BizHawk.Emulation.Cores.Consoles.Nintendo.Ares64.Performance
|
||||
{
|
||||
public partial class Ares64 : ISettable<object, Ares64.Ares64SyncSettings>
|
||||
{
|
||||
private Ares64SyncSettings _syncSettings;
|
||||
|
||||
public object GetSettings() => null;
|
||||
|
||||
public Ares64SyncSettings GetSyncSettings() => _syncSettings.Clone();
|
||||
|
||||
public PutSettingsDirtyBits PutSettings(object o) => PutSettingsDirtyBits.None;
|
||||
|
||||
public PutSettingsDirtyBits PutSyncSettings(Ares64SyncSettings o)
|
||||
{
|
||||
var ret = Ares64SyncSettings.NeedsReboot(_syncSettings, o);
|
||||
_syncSettings = o;
|
||||
return ret ? PutSettingsDirtyBits.RebootCore : PutSettingsDirtyBits.None;
|
||||
}
|
||||
|
||||
public class Ares64SyncSettings
|
||||
{
|
||||
[DisplayName("Player 1 Controller")]
|
||||
[Description("")]
|
||||
[DefaultValue(LibAres64.ControllerType.Mempak)]
|
||||
public LibAres64.ControllerType P1Controller { get; set; }
|
||||
|
||||
[DisplayName("Player 2 Controller")]
|
||||
[Description("")]
|
||||
[DefaultValue(LibAres64.ControllerType.Unplugged)]
|
||||
public LibAres64.ControllerType P2Controller { get; set; }
|
||||
|
||||
[DisplayName("Player 3 Controller")]
|
||||
[Description("")]
|
||||
[DefaultValue(LibAres64.ControllerType.Unplugged)]
|
||||
public LibAres64.ControllerType P3Controller { get; set; }
|
||||
|
||||
[DisplayName("Player 4 Controller")]
|
||||
[Description("")]
|
||||
[DefaultValue(LibAres64.ControllerType.Unplugged)]
|
||||
public LibAres64.ControllerType P4Controller { get; set; }
|
||||
|
||||
[DisplayName("Restrict Analog Range")]
|
||||
[Description("Restricts analog range to account for physical limitations.")]
|
||||
[DefaultValue(false)]
|
||||
public bool RestrictAnalogRange { get; set; }
|
||||
|
||||
[DisplayName("Enable Vulkan")]
|
||||
[Description("Enables Vulkan RDP. May fallback to software RDP if your GPU does not support Vulkan.")]
|
||||
[DefaultValue(true)]
|
||||
public bool EnableVulkan { get; set; }
|
||||
|
||||
[DisplayName("Supersampling")]
|
||||
[Description("Scales HD and UHD resolutions back down to SD")]
|
||||
[DefaultValue(false)]
|
||||
public bool SuperSample { get; set; }
|
||||
|
||||
[DisplayName("Vulkan Upscale")]
|
||||
[Description("")]
|
||||
[DefaultValue(LibAres64.VulkanUpscaleOpts.SD)]
|
||||
public LibAres64.VulkanUpscaleOpts VulkanUpscale { get; set; }
|
||||
|
||||
public Ares64SyncSettings() => SettingsUtil.SetDefaultValues(this);
|
||||
|
||||
public Ares64SyncSettings Clone() => MemberwiseClone() as Ares64SyncSettings;
|
||||
|
||||
public static bool NeedsReboot(Ares64SyncSettings x, Ares64SyncSettings y) => !DeepEquality.DeepEquals(x, y);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,449 @@
|
|||
using System;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
|
||||
using BizHawk.BizInvoke;
|
||||
using BizHawk.Common;
|
||||
using BizHawk.Emulation.Common;
|
||||
using BizHawk.Emulation.Cores.Properties;
|
||||
using BizHawk.Emulation.Cores.Waterbox;
|
||||
|
||||
namespace BizHawk.Emulation.Cores.Consoles.Nintendo.Ares64.Performance
|
||||
{
|
||||
[PortedCore(CoreNames.Ares64Performance, "ares team, Near", "v126", "https://ares-emulator.github.io/", singleInstance: true, isReleased: false)]
|
||||
[ServiceNotApplicable(new[] { typeof(IDriveLight), })]
|
||||
public partial class Ares64 : IEmulator, IVideoProvider, ISoundProvider, IStatable, IInputPollable, ISaveRam, IRegionable
|
||||
{
|
||||
private static readonly LibAres64Performance _core;
|
||||
|
||||
static Ares64()
|
||||
{
|
||||
var resolver = new DynamicLibraryImportResolver(
|
||||
OSTailoredCode.IsUnixHost ? "libares64.so" : "libares64.dll", hasLimitedLifetime: false);
|
||||
_core = BizInvoker.GetInvoker<LibAres64Performance>(resolver, CallingConventionAdapters.Native);
|
||||
}
|
||||
|
||||
private readonly BasicServiceProvider _serviceProvider;
|
||||
|
||||
public IEmulatorServiceProvider ServiceProvider => _serviceProvider;
|
||||
|
||||
public int Frame { get; private set; }
|
||||
|
||||
public int LagCount { get; set; }
|
||||
|
||||
public bool IsLagFrame { get; set; }
|
||||
|
||||
[FeatureNotImplemented]
|
||||
public IInputCallbackSystem InputCallbacks => throw new NotImplementedException();
|
||||
|
||||
public string SystemId => VSystemID.Raw.N64;
|
||||
|
||||
public bool DeterministicEmulation => false;
|
||||
|
||||
public void ResetCounters()
|
||||
{
|
||||
Frame = 0;
|
||||
LagCount = 0;
|
||||
IsLagFrame = false;
|
||||
}
|
||||
|
||||
public void Dispose() => _core.Deinit();
|
||||
|
||||
[CoreConstructor(VSystemID.Raw.N64)]
|
||||
public Ares64(CoreLoadParameters<object, Ares64SyncSettings> lp)
|
||||
{
|
||||
if (lp.DeterministicEmulationRequested)
|
||||
{
|
||||
throw new InvalidOperationException("This core is not deterministic!");
|
||||
}
|
||||
|
||||
_serviceProvider = new(this);
|
||||
|
||||
_syncSettings = lp.SyncSettings ?? new();
|
||||
|
||||
int upscale = _syncSettings.EnableVulkan ? (int)_syncSettings.VulkanUpscale : 1;
|
||||
_videoBuffer = new int[640 * upscale * 576 * upscale];
|
||||
|
||||
ControllerSettings = new[]
|
||||
{
|
||||
_syncSettings.P1Controller,
|
||||
_syncSettings.P2Controller,
|
||||
_syncSettings.P3Controller,
|
||||
_syncSettings.P4Controller,
|
||||
};
|
||||
|
||||
N64Controller = CreateControllerDefinition(ControllerSettings);
|
||||
|
||||
var rom = lp.Roms[0].RomData;
|
||||
|
||||
Region = rom[0x3E] switch
|
||||
{
|
||||
0x44 or 0x46 or 0x49 or 0x50 or 0x53 or 0x55 or 0x58 or 0x59 => DisplayType.PAL,
|
||||
_ => DisplayType.NTSC,
|
||||
};
|
||||
|
||||
var pal = Region == DisplayType.PAL;
|
||||
|
||||
VsyncNumerator = pal ? 50 : 60000;
|
||||
VsyncDenominator = pal ? 1 : 1001;
|
||||
|
||||
LibAres64.LoadFlags loadFlags = 0;
|
||||
if (_syncSettings.RestrictAnalogRange)
|
||||
loadFlags |= LibAres64.LoadFlags.RestrictAnalogRange;
|
||||
if (pal)
|
||||
loadFlags |= LibAres64.LoadFlags.Pal;
|
||||
if (_syncSettings.EnableVulkan)
|
||||
loadFlags |= LibAres64.LoadFlags.UseVulkan;
|
||||
if (_syncSettings.SuperSample)
|
||||
loadFlags |= LibAres64.LoadFlags.SuperSample;
|
||||
|
||||
var pif = Util.DecompressGzipFile(new MemoryStream(pal ? Resources.PIF_PAL_ROM.Value : Resources.PIF_NTSC_ROM.Value));
|
||||
|
||||
unsafe
|
||||
{
|
||||
fixed (byte* pifPtr = pif, romPtr = rom)
|
||||
{
|
||||
var loadData = new LibAres64.LoadData()
|
||||
{
|
||||
PifData = (IntPtr)pifPtr,
|
||||
PifLen = pif.Length,
|
||||
RomData = (IntPtr)romPtr,
|
||||
RomLen = rom.Length,
|
||||
VulkanUpscale = upscale,
|
||||
};
|
||||
if (!_core.Init(loadData, ControllerSettings, loadFlags))
|
||||
{
|
||||
throw new InvalidOperationException("Init returned false!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ResetCounters();
|
||||
|
||||
var areas = new LibWaterboxCore.MemoryArea[256];
|
||||
_core.GetMemoryAreas(areas);
|
||||
_memoryAreas = areas.Where(a => a.Data != IntPtr.Zero && a.Size != 0 && !a.Flags.HasFlag(LibWaterboxCore.MemoryDomainFlags.FunctionHook))
|
||||
.ToArray();
|
||||
|
||||
var memoryDomains = _memoryAreas.Select(a => new WaterboxMemoryDomainPointer(a, _monitor)).ToList();
|
||||
var primaryDomain = memoryDomains
|
||||
.Where(md => md.Definition.Flags.HasFlag(LibWaterboxCore.MemoryDomainFlags.Primary))
|
||||
.Single();
|
||||
|
||||
var mdl = new MemoryDomainList(
|
||||
memoryDomains.Cast<MemoryDomain>().ToList()
|
||||
)
|
||||
{
|
||||
MainMemory = primaryDomain
|
||||
};
|
||||
_serviceProvider.Register<IMemoryDomains>(mdl);
|
||||
|
||||
_saveramAreas = memoryDomains
|
||||
.Where(md => md.Definition.Flags.HasFlag(LibWaterboxCore.MemoryDomainFlags.Saverammable))
|
||||
.ToArray();
|
||||
_saveramSize = (int)_saveramAreas.Sum(a => a.Size);
|
||||
}
|
||||
|
||||
public DisplayType Region { get; }
|
||||
|
||||
public ControllerDefinition ControllerDefinition => N64Controller;
|
||||
|
||||
private ControllerDefinition N64Controller { get; }
|
||||
|
||||
public LibAres64.ControllerType[] ControllerSettings { get; }
|
||||
|
||||
private static ControllerDefinition CreateControllerDefinition(LibAres64.ControllerType[] controllerSettings)
|
||||
{
|
||||
var ret = new ControllerDefinition("Nintendo 64 Controller");
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
if (controllerSettings[i] != LibAres64.ControllerType.Unplugged)
|
||||
{
|
||||
ret.BoolButtons.Add($"P{i + 1} DPad U");
|
||||
ret.BoolButtons.Add($"P{i + 1} DPad D");
|
||||
ret.BoolButtons.Add($"P{i + 1} DPad L");
|
||||
ret.BoolButtons.Add($"P{i + 1} DPad R");
|
||||
ret.BoolButtons.Add($"P{i + 1} Start");
|
||||
ret.BoolButtons.Add($"P{i + 1} Z");
|
||||
ret.BoolButtons.Add($"P{i + 1} B");
|
||||
ret.BoolButtons.Add($"P{i + 1} A");
|
||||
ret.BoolButtons.Add($"P{i + 1} C Up");
|
||||
ret.BoolButtons.Add($"P{i + 1} C Down");
|
||||
ret.BoolButtons.Add($"P{i + 1} C Left");
|
||||
ret.BoolButtons.Add($"P{i + 1} C Right");
|
||||
ret.BoolButtons.Add($"P{i + 1} L");
|
||||
ret.BoolButtons.Add($"P{i + 1} R");
|
||||
ret.AddXYPair($"P{i + 1} {{0}} Axis", AxisPairOrientation.RightAndUp, (-128).RangeTo(127), 0);
|
||||
if (controllerSettings[i] == LibAres64.ControllerType.Rumblepak)
|
||||
{
|
||||
ret.HapticsChannels.Add($"P{i + 1} Rumble Pak");
|
||||
}
|
||||
}
|
||||
}
|
||||
ret.BoolButtons.Add("Reset");
|
||||
ret.BoolButtons.Add("Power");
|
||||
return ret.MakeImmutable();
|
||||
}
|
||||
|
||||
private static LibAres64.Buttons GetButtons(IController controller, int num)
|
||||
{
|
||||
LibAres64.Buttons ret = 0;
|
||||
|
||||
if (controller.IsPressed($"P{num} DPad U"))
|
||||
ret |= LibAres64.Buttons.UP;
|
||||
if (controller.IsPressed($"P{num} DPad D"))
|
||||
ret |= LibAres64.Buttons.DOWN;
|
||||
if (controller.IsPressed($"P{num} DPad L"))
|
||||
ret |= LibAres64.Buttons.LEFT;
|
||||
if (controller.IsPressed($"P{num} DPad R"))
|
||||
ret |= LibAres64.Buttons.RIGHT;
|
||||
if (controller.IsPressed($"P{num} B"))
|
||||
ret |= LibAres64.Buttons.B;
|
||||
if (controller.IsPressed($"P{num} A"))
|
||||
ret |= LibAres64.Buttons.A;
|
||||
if (controller.IsPressed($"P{num} C Up"))
|
||||
ret |= LibAres64.Buttons.C_UP;
|
||||
if (controller.IsPressed($"P{num} C Down"))
|
||||
ret |= LibAres64.Buttons.C_DOWN;
|
||||
if (controller.IsPressed($"P{num} C Left"))
|
||||
ret |= LibAres64.Buttons.C_LEFT;
|
||||
if (controller.IsPressed($"P{num} C Right"))
|
||||
ret |= LibAres64.Buttons.C_RIGHT;
|
||||
if (controller.IsPressed($"P{num} L"))
|
||||
ret |= LibAres64.Buttons.L;
|
||||
if (controller.IsPressed($"P{num} R"))
|
||||
ret |= LibAres64.Buttons.R;
|
||||
if (controller.IsPressed($"P{num} Z"))
|
||||
ret |= LibAres64.Buttons.Z;
|
||||
if (controller.IsPressed($"P{num} Start"))
|
||||
ret |= LibAres64.Buttons.START;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
private LibWaterboxCore.FrameInfo FrameAdvancePrep(IController controller, bool render, bool rendersound)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
if (ControllerSettings[i] == LibAres64.ControllerType.Rumblepak)
|
||||
{
|
||||
controller.SetHapticChannelStrength($"P{i + 1} Rumble Pak", _core.GetRumbleStatus(i) ? int.MaxValue : 0);
|
||||
}
|
||||
}
|
||||
|
||||
return new LibAres64.FrameInfo
|
||||
{
|
||||
P1Buttons = GetButtons(controller, 1),
|
||||
P1XAxis = (short)controller.AxisValue("P1 X Axis"),
|
||||
P1YAxis = (short)controller.AxisValue("P1 Y Axis"),
|
||||
|
||||
P2Buttons = GetButtons(controller, 2),
|
||||
P2XAxis = (short)controller.AxisValue("P2 X Axis"),
|
||||
P2YAxis = (short)controller.AxisValue("P2 Y Axis"),
|
||||
|
||||
P3Buttons = GetButtons(controller, 3),
|
||||
P3XAxis = (short)controller.AxisValue("P3 X Axis"),
|
||||
P3YAxis = (short)controller.AxisValue("P3 Y Axis"),
|
||||
|
||||
P4Buttons = GetButtons(controller, 4),
|
||||
P4XAxis = (short)controller.AxisValue("P4 X Axis"),
|
||||
P4YAxis = (short)controller.AxisValue("P4 Y Axis"),
|
||||
|
||||
Reset = controller.IsPressed("Reset"),
|
||||
Power = controller.IsPressed("Power"),
|
||||
};
|
||||
}
|
||||
|
||||
public unsafe bool FrameAdvance(IController controller, bool render, bool rendersound = true)
|
||||
{
|
||||
_core.SetInputCallback(null);
|
||||
|
||||
fixed (int* vp = _videoBuffer)
|
||||
fixed (short* sp = _soundBuffer)
|
||||
{
|
||||
var frame = FrameAdvancePrep(controller, render, rendersound);
|
||||
frame.VideoBuffer = (IntPtr)vp;
|
||||
frame.SoundBuffer = (IntPtr)sp;
|
||||
|
||||
_core.FrameAdvance(frame);
|
||||
|
||||
Frame++;
|
||||
if (IsLagFrame = frame.Lagged != 0)
|
||||
LagCount++;
|
||||
|
||||
if (render)
|
||||
{
|
||||
BufferWidth = frame.Width;
|
||||
BufferHeight = frame.Height;
|
||||
}
|
||||
if (rendersound)
|
||||
{
|
||||
_numSamples = frame.Samples;
|
||||
}
|
||||
else
|
||||
{
|
||||
_numSamples = 0;
|
||||
}
|
||||
|
||||
FrameAdvancePost();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private void FrameAdvancePost()
|
||||
{
|
||||
if (BufferWidth == 0)
|
||||
{
|
||||
BufferWidth = BufferHeight == 239 ? 320 : 640;
|
||||
}
|
||||
}
|
||||
|
||||
public int[] GetVideoBuffer() => _videoBuffer;
|
||||
|
||||
private readonly int[] _videoBuffer;
|
||||
|
||||
public int VirtualWidth => 640;
|
||||
|
||||
public int VirtualHeight => 480;
|
||||
|
||||
public int BufferWidth { get; private set; }
|
||||
|
||||
public int BufferHeight { get; private set; }
|
||||
|
||||
public int VsyncNumerator { get; }
|
||||
|
||||
public int VsyncDenominator { get; }
|
||||
|
||||
public int BackgroundColor => unchecked((int)0xff000000);
|
||||
|
||||
public void SetSyncMode(SyncSoundMode mode)
|
||||
{
|
||||
if (mode == SyncSoundMode.Async)
|
||||
{
|
||||
throw new NotSupportedException("Async mode is not supported.");
|
||||
}
|
||||
}
|
||||
|
||||
public void GetSamplesSync(out short[] samples, out int nsamp)
|
||||
{
|
||||
samples = _soundBuffer;
|
||||
nsamp = _numSamples;
|
||||
}
|
||||
|
||||
public void GetSamplesAsync(short[] samples) => throw new InvalidOperationException("Async mode is not supported.");
|
||||
|
||||
public void DiscardSamples() {}
|
||||
|
||||
private readonly short[] _soundBuffer = new short[2048 * 2];
|
||||
|
||||
private int _numSamples;
|
||||
|
||||
public bool CanProvideAsync => false;
|
||||
|
||||
public SyncSoundMode SyncMode => SyncSoundMode.Sync;
|
||||
|
||||
private byte[] _stateBuffer = new byte[0];
|
||||
|
||||
public void SaveStateBinary(BinaryWriter writer)
|
||||
{
|
||||
var len = _core.SerializeSize();
|
||||
if (len != _stateBuffer.Length)
|
||||
{
|
||||
_stateBuffer = new byte[len];
|
||||
}
|
||||
_core.Serialize(_stateBuffer);
|
||||
writer.Write(_stateBuffer.Length);
|
||||
writer.Write(_stateBuffer);
|
||||
}
|
||||
|
||||
public void LoadStateBinary(BinaryReader reader)
|
||||
{
|
||||
var len = reader.ReadInt32();
|
||||
if (len != _core.SerializeSize())
|
||||
{
|
||||
throw new InvalidOperationException("Savestate size mismatch!");
|
||||
}
|
||||
if (len != _stateBuffer.Length)
|
||||
{
|
||||
_stateBuffer = new byte[len];
|
||||
}
|
||||
reader.Read(_stateBuffer, 0, len);
|
||||
if (!_core.Unserialize(_stateBuffer, len))
|
||||
{
|
||||
throw new Exception($"{nameof(_core.Unserialize)}() returned false!");
|
||||
}
|
||||
}
|
||||
|
||||
private readonly LibWaterboxCore.MemoryArea[] _memoryAreas;
|
||||
|
||||
private readonly WaterboxMemoryDomain[] _saveramAreas;
|
||||
private readonly int _saveramSize;
|
||||
|
||||
public unsafe bool SaveRamModified
|
||||
{
|
||||
get
|
||||
{
|
||||
if (_saveramSize == 0)
|
||||
return false;
|
||||
var buff = new byte[4096];
|
||||
fixed (byte* bp = buff)
|
||||
{
|
||||
foreach (var area in _saveramAreas)
|
||||
{
|
||||
var stream = new MemoryDomainStream(area);
|
||||
int cmp = (area.Definition.Flags & LibWaterboxCore.MemoryDomainFlags.OneFilled) != 0 ? -1 : 0;
|
||||
while (true)
|
||||
{
|
||||
int nread = stream.Read(buff, 0, 4096);
|
||||
if (nread == 0)
|
||||
break;
|
||||
|
||||
int* p = (int*)bp;
|
||||
int* pend = p + nread / sizeof(int);
|
||||
while (p < pend)
|
||||
{
|
||||
if (*p++ != cmp)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public byte[] CloneSaveRam()
|
||||
{
|
||||
if (_saveramSize == 0)
|
||||
return null;
|
||||
var ret = new byte[_saveramSize];
|
||||
var dest = new MemoryStream(ret, true);
|
||||
foreach (var area in _saveramAreas)
|
||||
{
|
||||
new MemoryDomainStream(area).CopyTo(dest);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
public void StoreSaveRam(byte[] data)
|
||||
{
|
||||
if (data.Length != _saveramSize)
|
||||
throw new InvalidOperationException("Saveram size mismatch");
|
||||
var source = new MemoryStream(data, false);
|
||||
foreach (var area in _saveramAreas)
|
||||
{
|
||||
WaterboxUtils.CopySome(source, new MemoryDomainStream(area), area.Size);
|
||||
}
|
||||
}
|
||||
|
||||
private readonly DummyMonitor _monitor = new();
|
||||
|
||||
private class DummyMonitor : IMonitor
|
||||
{
|
||||
public void Enter() { }
|
||||
|
||||
public void Exit() { }
|
||||
}
|
||||
}
|
||||
}
|
|
@ -59,10 +59,57 @@ namespace BizHawk.Emulation.Cores.Consoles.Nintendo.Ares64
|
|||
public bool Power;
|
||||
}
|
||||
|
||||
[Flags]
|
||||
public enum LoadFlags : uint
|
||||
{
|
||||
RestrictAnalogRange = 1 << 0,
|
||||
Pal = 1 << 1,
|
||||
// performance only flags
|
||||
UseVulkan = 1 << 2,
|
||||
SuperSample = 1 << 3,
|
||||
}
|
||||
|
||||
public enum VulkanUpscaleOpts : uint
|
||||
{
|
||||
SD = 1,
|
||||
HD = 2,
|
||||
UHD = 4,
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public class LoadData
|
||||
{
|
||||
public IntPtr PifData;
|
||||
public int PifLen;
|
||||
public IntPtr RomData;
|
||||
public int RomLen;
|
||||
// performance only data
|
||||
public int VulkanUpscale;
|
||||
}
|
||||
|
||||
[BizImport(CC)]
|
||||
public abstract bool Init(ControllerType[] controllerSettings, bool restrictAnalogRange, bool pal);
|
||||
public abstract bool Init(LoadData loadData, ControllerType[] controllerSettings, LoadFlags loadFlags);
|
||||
|
||||
[BizImport(CC)]
|
||||
public abstract bool GetRumbleStatus(int num);
|
||||
}
|
||||
|
||||
public abstract class LibAres64Accuracy : LibAres64
|
||||
{
|
||||
}
|
||||
|
||||
public abstract class LibAres64Performance : LibAres64
|
||||
{
|
||||
[BizImport(CC)]
|
||||
public abstract void Deinit();
|
||||
|
||||
[BizImport(CC)]
|
||||
public abstract int SerializeSize();
|
||||
|
||||
[BizImport(CC)]
|
||||
public abstract void Serialize(byte[] buf);
|
||||
|
||||
[BizImport(CC)]
|
||||
public abstract bool Unserialize(byte[] buf, int sz);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,7 +10,8 @@ namespace BizHawk.Emulation.Cores
|
|||
public static class CoreNames
|
||||
{
|
||||
public const string A7800Hawk = "A7800Hawk";
|
||||
public const string Ares64 = "Ares64";
|
||||
public const string Ares64Accuracy = "Ares64 (Accuracy)";
|
||||
public const string Ares64Performance = "Ares64 (Performance)";
|
||||
public const string Atari2600Hawk = "Atari2600Hawk";
|
||||
public const string Bsnes = "BSNES";
|
||||
public const string Bsnes115 = "BSNESv115+";
|
||||
|
|
|
@ -26,11 +26,21 @@ namespace BizHawk.Emulation.Cores
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (core is Ares64 ares64)
|
||||
else if (core is Consoles.Nintendo.Ares64.Accuracy.Ares64 ares64Acc)
|
||||
{
|
||||
for (var i = 0; i < 4; i++)
|
||||
{
|
||||
if (ares64.ControllerSettings[i] != LibAres64.ControllerType.Unplugged)
|
||||
if (ares64Acc.ControllerSettings[i] != LibAres64.ControllerType.Unplugged)
|
||||
{
|
||||
yield return StandardController(i + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (core is Consoles.Nintendo.Ares64.Performance.Ares64 ares64Perf)
|
||||
{
|
||||
for (var i = 0; i < 4; i++)
|
||||
{
|
||||
if (ares64Perf.ControllerSettings[i] != LibAres64.ControllerType.Unplugged)
|
||||
{
|
||||
yield return StandardController(i + 1);
|
||||
}
|
||||
|
|
|
@ -5,13 +5,14 @@ MAME_PATH = $(ROOT_DIR)/ares/thirdparty/mame
|
|||
|
||||
CXXFLAGS := -std=c++17 -msse4.2 \
|
||||
-I../libco -I.$(ROOT_DIR)/ares/ -I.$(ROOT_DIR)/ares/thirdparty/ -I.$(ARES_PATH) \
|
||||
-Werror=int-to-pointer-cast -Wno-unused-but-set-variable \
|
||||
-Werror=int-to-pointer-cast -Wno-unused-but-set-variable -Wno-delete-non-virtual-dtor \
|
||||
-Wno-parentheses -Wno-reorder -Wno-unused-variable \
|
||||
-Wno-sign-compare -Wno-switch -Wno-unused-local-typedefs \
|
||||
-fno-strict-aliasing -fwrapv -fno-operator-names \
|
||||
-I.$(MAME_PATH)/devices -I.$(MAME_PATH)/emu \
|
||||
-I.$(MAME_PATH)/lib/util -I.$(MAME_PATH)/mame \
|
||||
-I.$(MAME_PATH)/osd -DMAME_RDP -DLSB_FIRST -DPTR64 -DSDLMAME_EMSCRIPTEN
|
||||
-I.$(MAME_PATH)/osd -DMAME_RDP -DLSB_FIRST -DPTR64 -DSDLMAME_EMSCRIPTEN \
|
||||
-DWATERBOXED
|
||||
|
||||
TARGET = ares64.wbx
|
||||
|
|
@ -1,7 +1,16 @@
|
|||
#include <n64/n64.hpp>
|
||||
|
||||
#if WATERBOXED
|
||||
#include <emulibc.h>
|
||||
#include <waterboxcore.h>
|
||||
#endif
|
||||
|
||||
#include <vector>
|
||||
|
||||
#ifndef WATERBOXED
|
||||
#define ECL_EXPORT __attribute__((visibility("default")))
|
||||
#include "../emulibc/waterboxcore.h"
|
||||
#endif
|
||||
|
||||
#define EXPORT extern "C" ECL_EXPORT
|
||||
|
||||
|
@ -38,7 +47,7 @@ struct BizPlatform : ares::Platform
|
|||
auto video(ares::Node::Video::Screen, const u32*, u32, u32, u32) -> void override;
|
||||
auto input(ares::Node::Input::Input) -> void override;
|
||||
|
||||
ares::VFS::Pak bizpak = new vfs::directory;
|
||||
ares::VFS::Pak bizpak = nullptr;
|
||||
ares::Node::Audio::Stream stream = nullptr;
|
||||
u32* videobuf = nullptr;
|
||||
u32 pitch = 0;
|
||||
|
@ -84,16 +93,19 @@ auto BizPlatform::input(ares::Node::Input::Input node) -> void
|
|||
}
|
||||
};
|
||||
|
||||
static ares::Node::System root;
|
||||
static BizPlatform platform;
|
||||
static ares::Node::System root = nullptr;
|
||||
static BizPlatform* platform = nullptr;
|
||||
static array_view<u8>* pifData = nullptr;
|
||||
static array_view<u8>* romData = nullptr;
|
||||
static array_view<u8>* saveData = nullptr;
|
||||
|
||||
static inline void HackeryDoo()
|
||||
{
|
||||
root->run();
|
||||
root->run();
|
||||
platform.newframe = false;
|
||||
platform->newframe = false;
|
||||
f64 buf[2];
|
||||
while (platform.stream->pending()) platform.stream->read(buf);
|
||||
while (platform->stream->pending()) platform->stream->read(buf);
|
||||
}
|
||||
|
||||
typedef enum
|
||||
|
@ -311,46 +323,71 @@ static inline SaveType DetectSaveType(u8* rom)
|
|||
|
||||
namespace ares::Nintendo64 { extern bool RestrictAnalogRange; }
|
||||
|
||||
EXPORT bool Init(ControllerType* controllers, bool restrictAnalogRange, bool pal)
|
||||
bool Inited = false;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
FILE* f;
|
||||
array_view<u8>* data;
|
||||
u8* PifData;
|
||||
u32 PifLen;
|
||||
u8* RomData;
|
||||
u32 RomLen;
|
||||
#ifndef WATERBOXED
|
||||
u32 VulkanUpscale;
|
||||
#endif
|
||||
} LoadData;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
RESTRICT_ANALOG_RANGE = 1 << 0,
|
||||
IS_PAL = 1 << 1,
|
||||
#ifndef WATERBOXED
|
||||
USE_VULKAN = 1 << 2,
|
||||
SUPER_SAMPLE = 1 << 3,
|
||||
#endif
|
||||
} LoadFlags;
|
||||
|
||||
EXPORT void Deinit();
|
||||
|
||||
EXPORT bool Init(LoadData* loadData, ControllerType* controllers, LoadFlags loadFlags)
|
||||
{
|
||||
if (Inited) Deinit();
|
||||
|
||||
platform = new BizPlatform;
|
||||
platform->bizpak = new vfs::directory;
|
||||
|
||||
u8* data;
|
||||
u32 len;
|
||||
string name;
|
||||
|
||||
bool pal = loadFlags & IS_PAL;
|
||||
|
||||
name = pal ? "pif.pal.rom" : "pif.ntsc.rom";
|
||||
f = fopen(name, "rb");
|
||||
fseek(f, 0, SEEK_END);
|
||||
len = ftell(f);
|
||||
data = new array_view<u8>(new u8[len], len);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
fread((void*)data->data(), 1, len, f);
|
||||
fclose(f);
|
||||
platform.bizpak->append(name, *data);
|
||||
len = loadData->PifLen;
|
||||
data = new u8[len];
|
||||
memcpy(data, loadData->PifData, len);
|
||||
pifData = new array_view<u8>(data, len);
|
||||
platform->bizpak->append(name, *pifData);
|
||||
|
||||
name = "program.rom";
|
||||
f = fopen(name, "rb");
|
||||
fseek(f, 0, SEEK_END);
|
||||
len = ftell(f);
|
||||
data = new array_view<u8>(new u8[len], len);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
fread((void*)data->data(), 1, len, f);
|
||||
fclose(f);
|
||||
platform.bizpak->append(name, *data);
|
||||
len = loadData->RomLen;
|
||||
data = new u8[len];
|
||||
memcpy(data, loadData->RomData, len);
|
||||
romData = new array_view<u8>(data, len);
|
||||
platform->bizpak->append(name, *romData);
|
||||
|
||||
string region = pal ? "PAL" : "NTSC";
|
||||
platform.bizpak->setAttribute("region", region);
|
||||
platform->bizpak->setAttribute("region", region);
|
||||
|
||||
string cic = pal ? "CIC-NUS-7101" : "CIC-NUS-6102";
|
||||
u32 crc32 = Hash::CRC32({&((u8*)data->data())[0x40], 0x9C0}).value();
|
||||
u32 crc32 = Hash::CRC32({&data[0x40], 0x9C0}).value();
|
||||
if (crc32 == 0x1DEB51A9) cic = pal ? "CIC-NUS-7102" : "CIC-NUS-6101";
|
||||
if (crc32 == 0xC08E5BD6) cic = pal ? "CIC-NUS-7101" : "CIC-NUS-6102";
|
||||
if (crc32 == 0x03B8376A) cic = pal ? "CIC-NUS-7103" : "CIC-NUS-6103";
|
||||
if (crc32 == 0xCF7F41DC) cic = pal ? "CIC-NUS-7105" : "CIC-NUS-6105";
|
||||
if (crc32 == 0xD1059C6A) cic = pal ? "CIC-NUS-7106" : "CIC-NUS-6106";
|
||||
platform.bizpak->setAttribute("cic", cic);
|
||||
platform->bizpak->setAttribute("cic", cic);
|
||||
|
||||
SaveType save = DetectSaveType((u8*)data->data());
|
||||
SaveType save = DetectSaveType(data);
|
||||
if (save != NONE)
|
||||
{
|
||||
switch (save)
|
||||
|
@ -360,17 +397,25 @@ EXPORT bool Init(ControllerType* controllers, bool restrictAnalogRange, bool pal
|
|||
case SRAM32KB: len = 32 * 1024; name = "save.ram"; break;
|
||||
case SRAM96KB: len = 96 * 1024; name = "save.ram"; break;
|
||||
case FLASH128KB: len = 128 * 1024; name = "save.flash"; break;
|
||||
default: return false;
|
||||
default: Deinit(); return false;
|
||||
}
|
||||
data = new array_view<u8>(new u8[len], len);
|
||||
memset((void*)data->data(), 0xFF, len);
|
||||
platform.bizpak->append(name, *data);
|
||||
data = new u8[len];
|
||||
memset(data, 0xFF, len);
|
||||
saveData = new array_view<u8>(data, len);
|
||||
platform->bizpak->append(name, *saveData);
|
||||
}
|
||||
|
||||
ares::platform = &platform;
|
||||
ares::platform = platform;
|
||||
|
||||
#ifndef WATERBOXED
|
||||
ares::Nintendo64::option("Enable Vulkan", !!(loadFlags & USE_VULKAN));
|
||||
ares::Nintendo64::option("Quality", loadData->VulkanUpscale == 1 ? "SD" : (loadData->VulkanUpscale == 2 ? "HD" : "UHD"));
|
||||
ares::Nintendo64::option("Supersampling", !!(loadFlags & SUPER_SAMPLE));
|
||||
#endif
|
||||
|
||||
if (!ares::Nintendo64::load(root, {"[Nintendo] Nintendo 64 (", region, ")"}))
|
||||
{
|
||||
Deinit();
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -381,6 +426,7 @@ EXPORT bool Init(ControllerType* controllers, bool restrictAnalogRange, bool pal
|
|||
}
|
||||
else
|
||||
{
|
||||
Deinit();
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -393,7 +439,6 @@ EXPORT bool Init(ControllerType* controllers, bool restrictAnalogRange, bool pal
|
|||
auto peripheral = port->allocate("Gamepad");
|
||||
port->connect();
|
||||
|
||||
string name;
|
||||
switch (controllers[i])
|
||||
{
|
||||
case Mempak: name = "Controller Pak"; break;
|
||||
|
@ -408,22 +453,51 @@ EXPORT bool Init(ControllerType* controllers, bool restrictAnalogRange, bool pal
|
|||
}
|
||||
else
|
||||
{
|
||||
Deinit();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Deinit();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
ares::Nintendo64::RestrictAnalogRange = restrictAnalogRange;
|
||||
ares::Nintendo64::RestrictAnalogRange = loadFlags & RESTRICT_ANALOG_RANGE;
|
||||
|
||||
root->power(false);
|
||||
HackeryDoo();
|
||||
Inited = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
EXPORT void Deinit()
|
||||
{
|
||||
if (root) root->unload();
|
||||
if (platform)
|
||||
{
|
||||
if (platform->bizpak) platform->bizpak.reset();
|
||||
delete platform;
|
||||
}
|
||||
if (pifData)
|
||||
{
|
||||
delete[] (u8*)pifData->data();
|
||||
delete pifData;
|
||||
}
|
||||
if (romData)
|
||||
{
|
||||
delete[] (u8*)romData->data();
|
||||
delete romData;
|
||||
}
|
||||
if (saveData)
|
||||
{
|
||||
delete[] (u8*)saveData->data();
|
||||
delete saveData;
|
||||
}
|
||||
Inited = false;
|
||||
}
|
||||
|
||||
EXPORT bool GetRumbleStatus(u32 num)
|
||||
{
|
||||
ares::Nintendo64::Gamepad* c = nullptr;
|
||||
|
@ -437,6 +511,23 @@ EXPORT bool GetRumbleStatus(u32 num)
|
|||
return c ? c->motor->enable() : false;
|
||||
}
|
||||
|
||||
EXPORT u32 SerializeSize()
|
||||
{
|
||||
return root->serialize(false).size();
|
||||
}
|
||||
|
||||
EXPORT void Serialize(u8* buf)
|
||||
{
|
||||
auto s = root->serialize(false);
|
||||
memcpy(buf, s.data(), s.size());
|
||||
}
|
||||
|
||||
EXPORT bool Unserialize(u8* buf, u32 sz)
|
||||
{
|
||||
serializer s(buf, sz);
|
||||
return root->unserialize(s);
|
||||
}
|
||||
|
||||
#define MAYBE_ADD_MEMORY_DOMAIN(mem, name, flags) do { \
|
||||
if (ares::Nintendo64::mem.data) \
|
||||
{ \
|
||||
|
@ -544,39 +635,39 @@ EXPORT void FrameAdvance(MyFrameInfo* f)
|
|||
UPDATE_CONTROLLER(3);
|
||||
UPDATE_CONTROLLER(4);
|
||||
|
||||
platform.lagged = true;
|
||||
platform->lagged = true;
|
||||
|
||||
root->run();
|
||||
|
||||
f->Width = platform.width;
|
||||
f->Height = platform.height;
|
||||
if (platform.newframe)
|
||||
f->Width = platform->width;
|
||||
f->Height = platform->height;
|
||||
if (platform->newframe)
|
||||
{
|
||||
u32* src = platform.videobuf;
|
||||
u32* src = platform->videobuf;
|
||||
u32* dst = f->VideoBuffer;
|
||||
for (int i = 0; i < f->Height; i++)
|
||||
{
|
||||
memcpy(dst, src, f->Width * 4);
|
||||
dst += f->Width;
|
||||
src += platform.pitch;
|
||||
src += platform->pitch;
|
||||
}
|
||||
platform.newframe = false;
|
||||
platform->newframe = false;
|
||||
}
|
||||
|
||||
s16* soundbuf = f->SoundBuffer;
|
||||
while (platform.stream->pending())
|
||||
while (platform->stream->pending())
|
||||
{
|
||||
f64 buf[2];
|
||||
platform.stream->read(buf);
|
||||
platform->stream->read(buf);
|
||||
*soundbuf++ = (s16)std::clamp(buf[0] * 32768, -32768.0, 32767.0);
|
||||
*soundbuf++ = (s16)std::clamp(buf[1] * 32768, -32768.0, 32767.0);
|
||||
f->Samples++;
|
||||
}
|
||||
|
||||
f->Lagged = platform.lagged;
|
||||
f->Lagged = platform->lagged;
|
||||
}
|
||||
|
||||
EXPORT void SetInputCallback(void (*callback)())
|
||||
{
|
||||
platform.inputcb = callback;
|
||||
platform->inputcb = callback;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,160 @@
|
|||
ARES_PATH = $(ROOT_DIR)/ares/ares
|
||||
MAME_PATH = $(ROOT_DIR)/ares/thirdparty/mame
|
||||
SLJIT_PATH = $(ROOT_DIR)/ares/thirdparty/sljit
|
||||
|
||||
CCFLAGS := -std=c99 -Wall -Wno-format -Wno-parentheses
|
||||
|
||||
CXXFLAGS := -std=c++17 -msse4.2 -O3 -flto -fvisibility=internal \
|
||||
-I../libco -I.$(ROOT_DIR)/ares/ -I.$(ROOT_DIR)/ares/thirdparty/ -I.$(ARES_PATH) \
|
||||
-Werror=int-to-pointer-cast -Wno-unused-but-set-variable \
|
||||
-Wno-parentheses -Wno-reorder -Wno-unused-variable \
|
||||
-Wno-sign-compare -Wno-switch -Wno-unused-local-typedefs \
|
||||
-fno-strict-aliasing -fwrapv -fno-operator-names \
|
||||
-I.$(MAME_PATH)/devices -I.$(MAME_PATH)/emu \
|
||||
-I.$(MAME_PATH)/lib/util -I.$(MAME_PATH)/mame \
|
||||
-I.$(MAME_PATH)/osd -DMAME_RDP -DLSB_FIRST -DPTR64 -DSLJIT_HAVE_CONFIG_PRE=1 -DSLJIT_HAVE_CONFIG_POST=1 -fPIC
|
||||
|
||||
LDFLAGS := -shared
|
||||
|
||||
ifeq ($(OS),Windows_NT)
|
||||
CCFLAGS += -DVK_USE_PLATFORM_WIN32_KHR
|
||||
CXXFLAGS += -DVK_USE_PLATFORM_WIN32_KHR -DOSD_WINDOWS=1
|
||||
TARGET = libares64.dll
|
||||
else
|
||||
CXXFLAGS += -DSDLMAME_LINUX
|
||||
TARGET = libares64.so
|
||||
endif
|
||||
|
||||
SRCS_LIBCO = \
|
||||
$(ROOT_DIR)/ares/libco/libco.c
|
||||
|
||||
SRCS_PROCESSORS = \
|
||||
$(ARES_PATH)/component/processor/sm5k/sm5k.cpp
|
||||
|
||||
SRCS_ARES = \
|
||||
$(ARES_PATH)/ares/ares.cpp \
|
||||
$(ARES_PATH)/ares/memory/fixed-allocator.cpp
|
||||
|
||||
SRCS_N64 = \
|
||||
$(ARES_PATH)/n64/memory/memory.cpp \
|
||||
$(ARES_PATH)/n64/system/system.cpp \
|
||||
$(ARES_PATH)/n64/cartridge/cartridge.cpp \
|
||||
$(ARES_PATH)/n64/controller/controller.cpp \
|
||||
$(ARES_PATH)/n64/dd/dd.cpp \
|
||||
$(ARES_PATH)/n64/sp/sp.cpp \
|
||||
$(ARES_PATH)/n64/dp/dp.cpp \
|
||||
$(ARES_PATH)/n64/mi/mi.cpp \
|
||||
$(ARES_PATH)/n64/vi/vi.cpp \
|
||||
$(ARES_PATH)/n64/ai/ai.cpp \
|
||||
$(ARES_PATH)/n64/pi/pi.cpp \
|
||||
$(ARES_PATH)/n64/ri/ri.cpp \
|
||||
$(ARES_PATH)/n64/si/si.cpp \
|
||||
$(ARES_PATH)/n64/rdram/rdram.cpp \
|
||||
$(ARES_PATH)/n64/cpu/cpu.cpp \
|
||||
$(ARES_PATH)/n64/rdp/rdp.cpp \
|
||||
$(ARES_PATH)/n64/rsp/rsp.cpp \
|
||||
$(ARES_PATH)/n64/vulkan/vulkan.cpp
|
||||
|
||||
PARALLEL_RDP_IMPLEMENTATION = $(ARES_PATH)/n64/vulkan/parallel-rdp
|
||||
|
||||
SRCS_PARALLEL_RDP = \
|
||||
$(wildcard $(PARALLEL_RDP_IMPLEMENTATION)/parallel-rdp/*.cpp) \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/buffer.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/buffer_pool.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/command_buffer.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/command_pool.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/context.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/cookie.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/descriptor_set.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/device.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/event_manager.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/fence.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/fence_manager.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/image.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/memory_allocator.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/pipeline_event.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/query_pool.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/render_pass.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/sampler.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/semaphore.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/semaphore_manager.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/shader.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/texture_format.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/util/logging.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/util/thread_id.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/util/aligned_alloc.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/util/timer.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/util/timeline_trace_file.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/util/thread_name.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/volk/volk.c
|
||||
|
||||
PARALLEL_RDP_INCLUDE_DIRS = \
|
||||
-I.$(PARALLEL_RDP_IMPLEMENTATION)/parallel-rdp \
|
||||
-I.$(PARALLEL_RDP_IMPLEMENTATION)/volk \
|
||||
-I.$(PARALLEL_RDP_IMPLEMENTATION)/vulkan \
|
||||
-I.$(PARALLEL_RDP_IMPLEMENTATION)/vulkan-headers/include \
|
||||
-I.$(PARALLEL_RDP_IMPLEMENTATION)/util
|
||||
|
||||
CXXFLAGS += $(PARALLEL_RDP_INCLUDE_DIRS) -DVULKAN -DGRANITE_VULKAN_MT
|
||||
CCFLAGS += $(PARALLEL_RDP_INCLUDE_DIRS)
|
||||
|
||||
SRCS_MAME = \
|
||||
$(MAME_PATH)/emu/emucore.cpp \
|
||||
$(MAME_PATH)/lib/util/delegate.cpp \
|
||||
$(MAME_PATH)/lib/util/strformat.cpp \
|
||||
$(MAME_PATH)/mame/video/n64.cpp \
|
||||
$(MAME_PATH)/mame/video/pin64.cpp \
|
||||
$(MAME_PATH)/mame/video/rdpblend.cpp \
|
||||
$(MAME_PATH)/mame/video/rdptpipe.cpp \
|
||||
$(MAME_PATH)/osd/osdcore.cpp \
|
||||
$(MAME_PATH)/osd/osdsync.cpp
|
||||
|
||||
SRCS_SLJIT = \
|
||||
$(SLJIT_PATH)/../sljitAllocator.cpp \
|
||||
$(SLJIT_PATH)/sljit_src/sljitLir.c
|
||||
|
||||
SRCS = $(SRCS_LIBCO) $(SRCS_PROCESSORS) $(SRCS_ARES) $(SRCS_N64) $(SRCS_PARALLEL_RDP) $(SRCS_MAME) $(SRCS_SLJIT) BizInterface.cpp
|
||||
|
||||
ROOT_DIR := $(shell dirname $(realpath Performance.mak))
|
||||
OUTPUTDLL_DIR := $(realpath $(ROOT_DIR)/../../Assets/dll)
|
||||
OUTPUTDLLCOPY_DIR := $(realpath $(ROOT_DIR)/../../output/dll)
|
||||
OUT_DIR := $(ROOT_DIR)/obj
|
||||
OBJ_DIR := $(OUT_DIR)/release_performance
|
||||
|
||||
CC := gcc
|
||||
CXX := g++
|
||||
|
||||
_OBJS := $(addsuffix .o,$(realpath $(SRCS)))
|
||||
OBJS := $(patsubst $(ROOT_DIR)%,$(OBJ_DIR)%,$(_OBJS))
|
||||
|
||||
$(OBJ_DIR)/%.c.o: %.c
|
||||
@echo cc $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CC) -c -o $@ $< $(CCFLAGS) $(PER_FILE_FLAGS_$<)
|
||||
$(OBJ_DIR)/%.cpp.o: %.cpp
|
||||
@echo cxx $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CXX) -c -o $@ $< $(CXXFLAGS) $(PER_FILE_FLAGS_$<)
|
||||
|
||||
.DEFAULT_GOAL := install
|
||||
|
||||
.PHONY: release install
|
||||
|
||||
TARGET_RELEASE := $(OBJ_DIR)/$(TARGET)
|
||||
|
||||
release: $(TARGET_RELEASE)
|
||||
|
||||
$(TARGET_RELEASE): $(OBJS)
|
||||
@echo ld $@
|
||||
@$(CXX) -o $@ $(LDFLAGS) $(CCFLAGS) $(CXXFLAGS) $(OBJS)
|
||||
|
||||
install: $(TARGET_RELEASE)
|
||||
@cp -f $(TARGET_RELEASE) $(OUTPUTDLL_DIR)/$(TARGET)
|
||||
@cp -f $(TARGET_RELEASE) $(OUTPUTDLLCOPY_DIR)/$(TARGET)
|
||||
@echo Release build of $(TARGET) installed.
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -rf $(OUT_DIR)
|
||||
|
||||
-include $(OBJS:%o=%d)
|
|
@ -1,8 +1,12 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef WATERBOXED
|
||||
#include <emulibc.h>
|
||||
|
||||
#include <libco.h>
|
||||
#else
|
||||
#include <libco/libco.h>
|
||||
#endif
|
||||
|
||||
#include <sljit.h>
|
||||
|
||||
#include <nall/platform.hpp>
|
||||
|
@ -57,7 +61,11 @@ namespace ares {
|
|||
}
|
||||
|
||||
namespace Video {
|
||||
#ifdef WATERBOXED
|
||||
static constexpr bool Threaded = false;
|
||||
#else
|
||||
static constexpr bool Threaded = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace Constants {
|
||||
|
|
|
@ -3,10 +3,17 @@ Screen::Screen(string name, u32 width, u32 height) : Video(name) {
|
|||
_canvasHeight = height;
|
||||
|
||||
if(width && height) {
|
||||
#ifdef WATERBOXED
|
||||
_inputA = alloc_invisible<u32>(width * height);
|
||||
_inputB = alloc_invisible<u32>(width * height);
|
||||
_output = alloc_invisible<u32>(width * height);
|
||||
_rotate = alloc_invisible<u32>(width * height);
|
||||
#else
|
||||
_inputA = new u32[width * height]();
|
||||
_inputB = new u32[width * height]();
|
||||
_output = new u32[width * height]();
|
||||
_rotate = new u32[width * height]();
|
||||
#endif
|
||||
|
||||
if constexpr(ares::Video::Threaded) {
|
||||
_thread = nall::thread::create({&Screen::main, this});
|
||||
|
|
|
@ -16,6 +16,7 @@ struct Platform {
|
|||
virtual auto pak(Node::Object) -> shared_pointer<vfs::directory> { return {}; }
|
||||
virtual auto event(Event) -> void {}
|
||||
virtual auto log(string_view message) -> void {}
|
||||
virtual auto status(string_view message) -> void {}
|
||||
virtual auto video(Node::Video::Screen, const u32* data, u32 pitch, u32 width, u32 height) -> void {}
|
||||
virtual auto audio(Node::Audio::Stream) -> void {}
|
||||
virtual auto input(Node::Input::Input) -> void {}
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
struct Accuracy {
|
||||
//enable all accuracy flags
|
||||
#ifdef WATERBOXED
|
||||
static constexpr bool Reference = 1;
|
||||
#else
|
||||
static constexpr bool Reference = 0;
|
||||
#endif
|
||||
|
||||
struct CPU {
|
||||
static constexpr bool Interpreter = 0 | Reference;
|
||||
|
|
|
@ -33,10 +33,12 @@ Gamepad::~Gamepad() {
|
|||
}
|
||||
|
||||
auto Gamepad::save() -> void {
|
||||
/*
|
||||
if(!slot) return;
|
||||
if(slot->name() == "Controller Pak") {
|
||||
ram.save(pak->write("save.pak"));
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
auto Gamepad::allocate(string name) -> Node::Peripheral {
|
||||
|
|
|
@ -99,6 +99,7 @@ auto System::unload() -> void {
|
|||
vulkan.unload();
|
||||
#endif
|
||||
cartridgeSlot.unload();
|
||||
puts("unloading port 1");
|
||||
controllerPort1.unload();
|
||||
controllerPort2.unload();
|
||||
controllerPort3.unload();
|
||||
|
@ -119,12 +120,14 @@ auto System::unload() -> void {
|
|||
}
|
||||
|
||||
auto System::save() -> void {
|
||||
/*
|
||||
if(!node) return;
|
||||
cartridge.save();
|
||||
controllerPort1.save();
|
||||
controllerPort2.save();
|
||||
controllerPort3.save();
|
||||
controllerPort4.save();
|
||||
*/
|
||||
}
|
||||
|
||||
auto System::power(bool reset) -> void {
|
||||
|
|
|
@ -100,7 +100,7 @@ auto VI::writeWord(u32 address, u32 data_) -> void {
|
|||
n32 data = data_;
|
||||
|
||||
#if defined(VULKAN)
|
||||
vulkan.writeWord(address, data);
|
||||
if (vulkan.enable) vulkan.writeWord(address, data);
|
||||
#endif
|
||||
|
||||
if(address == 0) {
|
||||
|
|
|
@ -10,11 +10,16 @@ VI vi;
|
|||
auto VI::load(Node::Object parent) -> void {
|
||||
node = parent->append<Node::Object>("VI");
|
||||
|
||||
u32 width = 640;
|
||||
u32 height = 576;
|
||||
|
||||
#if defined(VULKAN)
|
||||
screen = node->append<Node::Video::Screen>("Screen", vulkan.outputUpscale * 640, vulkan.outputUpscale * 576);
|
||||
#else
|
||||
screen = node->append<Node::Video::Screen>("Screen", 640, 576);
|
||||
if (vulkan.enable) {
|
||||
width *= vulkan.outputUpscale;
|
||||
height *= vulkan.outputUpscale;
|
||||
}
|
||||
#endif
|
||||
screen = node->append<Node::Video::Screen>("Screen", width, height);
|
||||
screen->setRefresh({&VI::refresh, this});
|
||||
screen->colors((1 << 24) + (1 << 15), [&](n32 color) -> n64 {
|
||||
if(color < (1 << 24)) {
|
||||
|
@ -31,11 +36,16 @@ auto VI::load(Node::Object parent) -> void {
|
|||
return a << 48 | r << 32 | g << 16 | b << 0;
|
||||
}
|
||||
});
|
||||
|
||||
#if defined(VULKAN)
|
||||
if(vulkan.enable) {
|
||||
screen->setSize(vulkan.outputUpscale * 640, vulkan.outputUpscale * 480);
|
||||
if(!vulkan.supersampleScanout) {
|
||||
screen->setScale(1.0 / vulkan.outputUpscale, 1.0 / vulkan.outputUpscale);
|
||||
}
|
||||
} else {
|
||||
screen->setSize(640, 480);
|
||||
}
|
||||
#else
|
||||
screen->setSize(640, 480);
|
||||
#endif
|
||||
|
@ -62,8 +72,10 @@ auto VI::main() -> void {
|
|||
io.field = io.field + 1 & io.serrate;
|
||||
if(!io.field) {
|
||||
#if defined(VULKAN)
|
||||
if (vulkan.enable) {
|
||||
gpuOutputValid = vulkan.scanoutAsync(io.field);
|
||||
vulkan.frame();
|
||||
}
|
||||
#endif
|
||||
|
||||
refreshed = true;
|
||||
|
@ -81,7 +93,7 @@ auto VI::step(u32 clocks) -> void {
|
|||
|
||||
auto VI::refresh() -> void {
|
||||
#if defined(VULKAN)
|
||||
if(gpuOutputValid) {
|
||||
if(vulkan.enable && gpuOutputValid) {
|
||||
const u8* rgba = nullptr;
|
||||
u32 width = 0, height = 0;
|
||||
vulkan.mapScanoutRead(rgba, width, height);
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
31ea5eb2d6fcb2d8f1df5f0951364322d09ac01a
|
|
@ -0,0 +1,20 @@
|
|||
Copyright (c) 2020 Themaister
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,265 @@
|
|||
# paraLLEl-RDP
|
||||
|
||||
This project is a revival and complete rewrite of the old, defunct paraLLEl-RDP project.
|
||||
|
||||
The goal is to implement the Nintendo 64 RDP graphics chip as accurately as possible using Vulkan compute.
|
||||
The implementation aims to be bitexact with the
|
||||
[Angrylion-Plus](https://github.com/ata4/angrylion-rdp-plus) reference renderer where possible.
|
||||
|
||||
## Disclaimer
|
||||
|
||||
While paraLLEl-RDP uses [Angrylion-Plus](https://github.com/ata4/angrylion-rdp-plus)
|
||||
as an implementation reference, it is not a port, and not a derived codebase of said project.
|
||||
It is written from scratch by studying [Angrylion-Plus](https://github.com/ata4/angrylion-rdp-plus)
|
||||
and trying to understand what is going on.
|
||||
The test suite uses [Angrylion-Plus](https://github.com/ata4/angrylion-rdp-plus) as a reference
|
||||
to validate implementation and cross-checking behavior.
|
||||
|
||||
## Use cases
|
||||
|
||||
- **Much** faster LLE RDP emulation of N64 compared to a CPU implementation
|
||||
as parallel graphics workloads are offloaded to the GPU.
|
||||
Emulation performance is now completely bound by CPU and LLE RSP performance.
|
||||
Early benchmarking results suggest 2000 - 5000 VI/s being achieved on mid-range desktop GPUs based on timestamp data.
|
||||
There is no way the CPU emulation can keep up with that, but that means this should
|
||||
scale down to fairly gimped GPUs as well, assuming the driver requirements are met.
|
||||
- A backend renderer for standalone engines which aim to efficiently reproduce faithful N64 graphics.
|
||||
- Hopefully, an easier to understand implementation than the reference renderer.
|
||||
- An esoteric use case of advanced Vulkan compute programming.
|
||||
|
||||
## Missing features
|
||||
|
||||
The implementation is quite complete, and compatibility is very high in the limited amount of content I've tested.
|
||||
However, not every single feature is supported at this moment.
|
||||
Ticking the last boxes depends mostly on real content making use of said features.
|
||||
|
||||
- Color combiner chroma keying
|
||||
- Various "bugs" / questionable behavior that seems meaningless to emulate
|
||||
- Certain extreme edge cases in TMEM upload. The implementation has tests for many "crazy" edge cases though.
|
||||
- ... possibly other obscure features
|
||||
|
||||
The VI is essentially complete. A fancy deinterlacer might be useful to add since we have plenty of GPU cycles to spare in the graphics queue.
|
||||
The VI filtering is always turned on if game requests it, but features can selectively be turned off for the pixel purists.
|
||||
|
||||
## Environment variables for development / testing
|
||||
|
||||
### `RDP_DEBUG` / `RDP_DEBUG_X` / `RDP_DEBUG_Y`
|
||||
|
||||
Supports printf in shaders, which is extremely useful to drill down difficult bugs.
|
||||
Only printfs from certain pixels can be filtered through to avoid spam.
|
||||
|
||||
### `VI_DEBUG` / `VI_DEBUG_X` / `VI_DEBUG_Y`
|
||||
|
||||
Same as `RDP_DEBUG` but for the VI.
|
||||
|
||||
### `PARALLEL_RDP_MEASURE_SYNC_TIME`
|
||||
|
||||
Measures time stalled in `CommandProcessor::wait_for_timeline`. Useful to measure
|
||||
CPU overhead in hard-synced emulator integrations.
|
||||
|
||||
### `PARALLEL_RDP_SMALL_TYPES=0`
|
||||
|
||||
Force-disables 8/16-bit arithmetic support. Useful when suspecting driver bugs.
|
||||
|
||||
### `PARALLEL_RDP_UBERSHADER=1`
|
||||
|
||||
Forces the use of ubershaders. Can be extremely slow depending on the shader compiler.
|
||||
|
||||
### `PARALLEL_RDP_FORCE_SYNC_SHADER=1`
|
||||
|
||||
Disabled async pipeline optimization, and blocks for every shader compiler.
|
||||
Only use if the ubershader crashes, since this adds the dreaded shader compilation stalls.
|
||||
|
||||
### `PARALLEL_RDP_BENCH=1`
|
||||
|
||||
Measures RDP rendering time spent on GPU using Vulkan timestamps.
|
||||
At end of a run, reports average time spent per render pass,
|
||||
and how many render passes are flushed per frame.
|
||||
|
||||
### `PARALLEL_RDP_SUBGROUP=0`
|
||||
|
||||
Force-disables use of Vulkan subgroup operations,
|
||||
which are used to optimize the tile binning algorithm.
|
||||
|
||||
### `PARALLEL_RDP_ALLOW_EXTERNAL_HOST=0`
|
||||
|
||||
Disables use of `VK_EXT_external_memory_host`. For testing.
|
||||
|
||||
## Vulkan driver requirements
|
||||
|
||||
paraLLEl-RDP requires up-to-date Vulkan implementations. A lot of the great improvements over the previous implementation
|
||||
comes from the idea that we can implement N64's UMA by simply importing RDRAM directly as an SSBO and perform 8 and 16-bit
|
||||
data access over the bus. With the tile based architecture in paraLLEl-RDP, this works very well and actual
|
||||
PCI-e traffic is massively reduced. The bandwidth for doing this is also trivial. On iGPU systems, this also works really well, since
|
||||
it's all the same memory anyways.
|
||||
|
||||
Thus, the requirements are as follows. All of these features are widely supported, or will soon be in drivers.
|
||||
paraLLEl-RDP does not aim for compatibility with ancient hardware and drivers.
|
||||
Just use the reference renderer for that. This is enthusiast software for a niche audience.
|
||||
|
||||
- Vulkan 1.1
|
||||
- VK_KHR_8bit_storage / VK_KHR_16bit_storage
|
||||
- Optionally VK_KHR_shader_float16_int8 which enables small integer arithmetic
|
||||
- Optionally subgroup support with VK_EXT_subgroup_size_control
|
||||
- For integration in emulators, VK_EXT_external_memory_host is currently required (may be relaxed later at some performance cost)
|
||||
|
||||
### Tested drivers
|
||||
|
||||
paraLLEl-RDP has been tested on Linux and Windows on all desktop vendors.
|
||||
|
||||
- Intel Mesa (20.0.6) - Passes conformance
|
||||
- Intel Windows - Passes conformance (**CAVEAT**. Intel Windows requires 64 KiB alignment for host memory import, make sure to add some padding around RDRAM in an emulator to make this work well.)
|
||||
- AMD RADV LLVM (20.0.6) - Passes conformance
|
||||
- AMD RADV ACO - Passes conformance with bleeding edge drivers and `PARALLEL_RDP_SMALL_TYPES=0`.
|
||||
- Linux AMDGPU-PRO - Passes conformance, with caveat that 8/16-bit arithmetic does not work correctly for some tests.
|
||||
paraLLEl-RDP automatically disables small integer arithmetic for proprietary AMD driver.
|
||||
- AMD Windows - Passes conformance with same caveat and workaround as AMDGPU-PRO.
|
||||
- NVIDIA Linux - Passes conformance (**MAJOR CAVEAT**, NVIDIA Linux does not support VK_EXT_external_memory_host as of 2020-05-12.)
|
||||
- NVIDIA Windows - Passes conformance
|
||||
|
||||
## Implementation strategy
|
||||
|
||||
This project uses Vulkan compute shaders to implement a fully programmable rasterization pipeline.
|
||||
The overall rendering architecture is reused from [RetroWarp](https://github.com/Themaister/RetroWarp)
|
||||
with some further refinements.
|
||||
|
||||
The lower level Vulkan backend comes from [Granite](https://github.com/Themaister/Granite).
|
||||
|
||||
### Asynchronous pipeline optimization
|
||||
|
||||
Toggleable paths in RDP state is expressed as specialization constants. The rendering thread will
|
||||
detect new state combinations and kick off building pipelines which only specify exact state needed to render.
|
||||
This is a massive performance optimization.
|
||||
|
||||
The same shaders are used for an "ubershader" fallback when pipelines are not ready.
|
||||
In this case, specialization constants are simply not used.
|
||||
The same SPIR-V modules are reused to great effect using this Vulkan feature.
|
||||
|
||||
### Tile-based rendering
|
||||
|
||||
See [RetroWarp](https://github.com/Themaister/RetroWarp) for more details.
|
||||
|
||||
### GPU-driven TMEM management
|
||||
|
||||
TMEM management is fully GPU-driven, but this is a very complicated implementation.
|
||||
Certain combinations of formats are not supported, but such cases would produce
|
||||
meaningless results, and it is unclear that applications can make meaningful use of these "weird" uploads.
|
||||
|
||||
### Synchronization
|
||||
|
||||
Synchronizing the GPU and CPU emulation is one of the hot button issues of N64 emulation.
|
||||
The integration code is designed around a timeline of synchronization points which can be waited on by the CPU
|
||||
when appropriate. For accurate emulation, an OpSyncFull is generally followed by a full wait,
|
||||
but most games can be more relaxed and only synchronize with the CPU N frames later.
|
||||
Implementation of this behavior is outside the scope of paraLLEl-RDP, and is left up to the integration code.
|
||||
|
||||
### Asynchronous compute
|
||||
|
||||
GPUs with a dedicated compute queue is recommended for optimal performance since
|
||||
RDP shading work can happen on the compute queue, and won't be blocked by graphics workloads happening
|
||||
in the graphics queue, which will typically be VI scanout and frontend applying shaders on top.
|
||||
|
||||
## Project structure
|
||||
|
||||
This project implements several submodules which are quite useful.
|
||||
|
||||
### rdp-replayer
|
||||
|
||||
This app replays RDP dump files, which are produced by running content through an RDP dumper.
|
||||
An implementation can be found in e.g. parallel-N64. The file format is very simple and essentially
|
||||
contains a record of RDRAM changes and RDP command streams.
|
||||
This dump is replayed and a live comparison between the reference renderer can be compared to paraLLEl-RDP
|
||||
with visual output. The UI is extremely crude, and is not user-friendly, but good enough for my use.
|
||||
|
||||
### rdp-conformance
|
||||
|
||||
I made a somewhat comprehensive test suite for the RDP, with a custom higher level RDP command stream generator.
|
||||
There are roughly ~150 fuzz tests which exercise many aspects of the RDP.
|
||||
In order to pass the test, paraLLEl-RDP must produce bit-exact results compared to Angrylion,
|
||||
so the test condition is as stringent as possible.
|
||||
|
||||
#### A note on bitexactness
|
||||
|
||||
There are a few cases where bit-exactness is a meaningless term, such as the noise feature of the RDP.
|
||||
It is not particularly meaningful to exactly reproduce noise, since it is by its very nature unpredictable.
|
||||
For that reason, this repo references a fork of the reference renderer which implements deterministic "undefined behavior"
|
||||
where appropriate. The exact formulation of the noise generator is not very interesting as long as
|
||||
correct entropy and output range is reproduced.
|
||||
|
||||
##### Intentional differences from reference renderer
|
||||
|
||||
Certain effects invoke "undefined behavior" in the RDP and requires cycle accuracy to resolve bit-accurately with real RDP.
|
||||
Reference renderer attempts to emulate these effects, but to reproduce this behavior breaks any form of multi-threading.
|
||||
To be able to validate dumps in a sensible way with buggy content, I modified the reference slightly to make certain
|
||||
"undefined behavior" deterministic. This doesn't meaningfully change the rendered output in the cases I've seen in the wild.
|
||||
Some of these effects would be possible to emulate,
|
||||
but at the cost of lots of added complexity and it wouldn't be quite correct anyways given the cycle accuracy issue.
|
||||
|
||||
- CombinedColor/Alpha in first cycle is cleared to zero. Some games read this in first cycle,
|
||||
and reference renderer will read whatever was generated last pixel.
|
||||
This causes issues in some cases, where cycle accuracy would have caused the feedback to converge to zero over time.
|
||||
- Reading LODFrac in 1 cycle mode. This is currently ignored. The results generated seem non-sensical. Never seen this in the wild.
|
||||
- Using TexLOD in copy mode. This is currently ignored. The results generated seem non-sensical. Never seen this in the wild.
|
||||
- Reading MemoryColor in first blender cycle in 2-cycle mode. Reference seems to wait until the second cycle before updating this value,
|
||||
despite memory coverage being updated right away. The sensible thing to do is to allow reading memory color in first cycle.
|
||||
- Alpha testing in 2-cycle mode reads combined alpha from next pixel in reference.
|
||||
Just doing alpha testing in first cycle on current pixel is good enough.
|
||||
If this is correct hardware behavior, I consider this a hardware bug.
|
||||
- Reading Texel1 in cycle 1 of 2-cycle mode reads the Texel0 from next pixel.
|
||||
In the few cases I've seen this, the rendered output is slightly buggy, but it's hardly visible in motion.
|
||||
The workaround is just to read Texel0 from current pixel which still renders fine.
|
||||
|
||||
### vi-conformance
|
||||
|
||||
This is a conformance suite, except for the video interface (VI) unit.
|
||||
|
||||
### rdp-validate-dump
|
||||
|
||||
This tool replays an RDP dump headless and compares outputs between reference renderer and paraLLEl-RDP.
|
||||
To pass, bitexact output must be generated.
|
||||
|
||||
## Build
|
||||
|
||||
Checkout submodules. This pulls in Angrylion-Plus as well as Granite.
|
||||
|
||||
```
|
||||
git submodule update --init --recursive
|
||||
```
|
||||
|
||||
Standard CMake build.
|
||||
|
||||
```
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
cmake --build . --parallel (--config Release on MSVC)
|
||||
```
|
||||
|
||||
### Run test suite
|
||||
|
||||
You can run rdp-conformance and vi-conformance with ctest to verify if your driver is behaving correctly.
|
||||
|
||||
```
|
||||
ctest (-C Release on MSVC)
|
||||
```
|
||||
|
||||
### Embedding shaders in a C++ header
|
||||
|
||||
If embedding paraLLEl-RDP in an emulator project, it is helpful to pre-compile and bake SPIR-V shaders in a C++ header.
|
||||
Build slangmosh from Granite, and then run:
|
||||
|
||||
```
|
||||
slangmosh parallel-rdp/shaders/slangmosh.json --output slangmosh.hpp --vk11 --strip -O --namespace RDP
|
||||
```
|
||||
|
||||
### Generating a standalone code base for emulator integration
|
||||
|
||||
Run the `generate_standalone_codebase.sh $OUTDIR` script with an output directory `$OUTDIR/` as argument to generate a standalone code base which can be built without any special build system support.
|
||||
Include `$OUTDIR/config.mk` if building with Make to make your life easier.
|
||||
Note that `slangmosh` must be in your path for this script to run. It executes the command above to build `slangmosh.hpp`.
|
||||
|
||||
## License
|
||||
|
||||
paraLLEl-RDP is licensed under the permissive license MIT. See included LICENSE file.
|
||||
This implementation builds heavily on the knowledge (but not code) gained from studying the reference implementation,
|
||||
thus it felt fair to release it under a permissive license, so my work could be reused more easily.
|
|
@ -0,0 +1,53 @@
|
|||
# For use in standalone implementations.
|
||||
|
||||
PARALLEL_RDP_CFLAGS :=
|
||||
PARALLEL_RDP_CXXFLAGS := -DGRANITE_VULKAN_MT
|
||||
|
||||
PARALLEL_RDP_SOURCES_CXX := \
|
||||
$(wildcard $(PARALLEL_RDP_IMPLEMENTATION)/parallel-rdp/*.cpp) \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/buffer.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/buffer_pool.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/command_buffer.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/command_pool.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/context.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/cookie.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/descriptor_set.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/device.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/event_manager.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/fence.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/fence_manager.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/image.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/memory_allocator.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/pipeline_event.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/query_pool.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/render_pass.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/sampler.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/semaphore.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/semaphore_manager.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/shader.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/vulkan/texture_format.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/util/logging.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/util/thread_id.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/util/aligned_alloc.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/util/timer.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/util/timeline_trace_file.cpp \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/util/thread_name.cpp
|
||||
|
||||
PARALLEL_RDP_SOURCES_C := \
|
||||
$(PARALLEL_RDP_IMPLEMENTATION)/volk/volk.c
|
||||
|
||||
PARALLEL_RDP_INCLUDE_DIRS := \
|
||||
-I$(PARALLEL_RDP_IMPLEMENTATION)/parallel-rdp \
|
||||
-I$(PARALLEL_RDP_IMPLEMENTATION)/volk \
|
||||
-I$(PARALLEL_RDP_IMPLEMENTATION)/vulkan \
|
||||
-I$(PARALLEL_RDP_IMPLEMENTATION)/vulkan-headers/include \
|
||||
-I$(PARALLEL_RDP_IMPLEMENTATION)/util
|
||||
|
||||
PARALLEL_RDP_LDFLAGS := -pthread
|
||||
ifeq (,$(findstring win,$(platform)))
|
||||
PARALLEL_RDP_LDFLAGS += -ldl
|
||||
else
|
||||
PARALLEL_RDP_CFLAGS += -DVK_USE_PLATFORM_WIN32_KHR
|
||||
PARALLEL_RDP_LDFLAGS += -lwinmm
|
||||
endif
|
||||
|
|
@ -0,0 +1,135 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <chrono>
|
||||
#include "command_ring.hpp"
|
||||
#include "rdp_device.hpp"
|
||||
#include "thread_id.hpp"
|
||||
#include <assert.h>
|
||||
|
||||
namespace RDP
|
||||
{
|
||||
void CommandRing::init(
|
||||
#ifdef PARALLEL_RDP_SHADER_DIR
|
||||
Granite::Global::GlobalManagersHandle global_handles_,
|
||||
#endif
|
||||
CommandProcessor *processor_, unsigned count)
|
||||
{
|
||||
assert((count & (count - 1)) == 0);
|
||||
teardown_thread();
|
||||
processor = processor_;
|
||||
ring.resize(count);
|
||||
write_count = 0;
|
||||
read_count = 0;
|
||||
#ifdef PARALLEL_RDP_SHADER_DIR
|
||||
global_handles = std::move(global_handles_);
|
||||
#endif
|
||||
thr = std::thread(&CommandRing::thread_loop, this);
|
||||
}
|
||||
|
||||
void CommandRing::teardown_thread()
|
||||
{
|
||||
if (thr.joinable())
|
||||
{
|
||||
enqueue_command(0, nullptr);
|
||||
thr.join();
|
||||
}
|
||||
}
|
||||
|
||||
CommandRing::~CommandRing()
|
||||
{
|
||||
teardown_thread();
|
||||
}
|
||||
|
||||
void CommandRing::drain()
|
||||
{
|
||||
std::unique_lock<std::mutex> holder{lock};
|
||||
cond.wait(holder, [this]() {
|
||||
return write_count == completed_count;
|
||||
});
|
||||
}
|
||||
|
||||
void CommandRing::enqueue_command(unsigned num_words, const uint32_t *words)
|
||||
{
|
||||
std::unique_lock<std::mutex> holder{lock};
|
||||
cond.wait(holder, [this, num_words]() {
|
||||
return write_count + num_words + 1 <= read_count + ring.size();
|
||||
});
|
||||
|
||||
size_t mask = ring.size() - 1;
|
||||
ring[write_count++ & mask] = num_words;
|
||||
for (unsigned i = 0; i < num_words; i++)
|
||||
ring[write_count++ & mask] = words[i];
|
||||
|
||||
cond.notify_one();
|
||||
}
|
||||
|
||||
void CommandRing::thread_loop()
|
||||
{
|
||||
Util::register_thread_index(0);
|
||||
|
||||
#ifdef PARALLEL_RDP_SHADER_DIR
|
||||
// Here to let the RDP play nice with full Granite.
|
||||
// When we move to standalone Granite, we won't need to interact with global subsystems like this.
|
||||
Granite::Global::set_thread_context(*global_handles);
|
||||
global_handles.reset();
|
||||
#endif
|
||||
|
||||
std::vector<uint32_t> tmp_buffer;
|
||||
tmp_buffer.reserve(64);
|
||||
size_t mask = ring.size() - 1;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
bool is_idle = false;
|
||||
{
|
||||
std::unique_lock<std::mutex> holder{lock};
|
||||
if (cond.wait_for(holder, std::chrono::microseconds(500), [this]() { return write_count > read_count; }))
|
||||
{
|
||||
uint32_t num_words = ring[read_count++ & mask];
|
||||
tmp_buffer.resize(num_words);
|
||||
for (uint32_t i = 0; i < num_words; i++)
|
||||
tmp_buffer[i] = ring[read_count++ & mask];
|
||||
}
|
||||
else
|
||||
{
|
||||
// If we don't receive commands at a steady pace,
|
||||
// notify rendering thread that we should probably kick some work.
|
||||
tmp_buffer.resize(1);
|
||||
tmp_buffer[0] = uint32_t(Op::MetaIdle) << 24;
|
||||
is_idle = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (tmp_buffer.empty())
|
||||
break;
|
||||
|
||||
processor->enqueue_command_direct(tmp_buffer.size(), tmp_buffer.data());
|
||||
if (!is_idle)
|
||||
{
|
||||
std::lock_guard<std::mutex> holder{lock};
|
||||
completed_count = read_count;
|
||||
cond.notify_one();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <vector>
|
||||
|
||||
#ifdef PARALLEL_RDP_SHADER_DIR
|
||||
#include "global_managers.hpp"
|
||||
#endif
|
||||
|
||||
namespace RDP
|
||||
{
|
||||
class CommandProcessor;
|
||||
class CommandRing
|
||||
{
|
||||
public:
|
||||
void init(
|
||||
#ifdef PARALLEL_RDP_SHADER_DIR
|
||||
Granite::Global::GlobalManagersHandle global_handles,
|
||||
#endif
|
||||
CommandProcessor *processor, unsigned count);
|
||||
~CommandRing();
|
||||
void drain();
|
||||
|
||||
void enqueue_command(unsigned num_words, const uint32_t *words);
|
||||
|
||||
private:
|
||||
CommandProcessor *processor = nullptr;
|
||||
std::thread thr;
|
||||
std::mutex lock;
|
||||
std::condition_variable cond;
|
||||
|
||||
std::vector<uint32_t> ring;
|
||||
uint64_t write_count = 0;
|
||||
uint64_t read_count = 0;
|
||||
uint64_t completed_count = 0;
|
||||
|
||||
void thread_loop();
|
||||
void teardown_thread();
|
||||
#ifdef PARALLEL_RDP_SHADER_DIR
|
||||
Granite::Global::GlobalManagersHandle global_handles;
|
||||
#endif
|
||||
};
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,402 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Vulkan
|
||||
{
|
||||
class Program;
|
||||
class Shader;
|
||||
}
|
||||
|
||||
namespace RDP
|
||||
{
|
||||
template <typename Program, typename Shader> struct Shaders;
|
||||
using ShaderBank = Shaders<Vulkan::Program *, Vulkan::Shader *>;
|
||||
|
||||
// list of command IDs
|
||||
enum class Op
|
||||
{
|
||||
Nop = 0,
|
||||
|
||||
MetaSignalTimeline = 1,
|
||||
MetaFlush = 2,
|
||||
MetaIdle = 3,
|
||||
MetaSetQuirks = 4,
|
||||
|
||||
FillTriangle = 0x08,
|
||||
FillZBufferTriangle = 0x09,
|
||||
TextureTriangle = 0x0a,
|
||||
TextureZBufferTriangle = 0x0b,
|
||||
ShadeTriangle = 0x0c,
|
||||
ShadeZBufferTriangle = 0x0d,
|
||||
ShadeTextureTriangle = 0x0e,
|
||||
ShadeTextureZBufferTriangle = 0x0f,
|
||||
TextureRectangle = 0x24,
|
||||
TextureRectangleFlip = 0x25,
|
||||
SyncLoad = 0x26,
|
||||
SyncPipe = 0x27,
|
||||
SyncTile = 0x28,
|
||||
SyncFull = 0x29,
|
||||
SetKeyGB = 0x2a,
|
||||
SetKeyR = 0x2b,
|
||||
SetConvert = 0x2c,
|
||||
SetScissor = 0x2d,
|
||||
SetPrimDepth = 0x2e,
|
||||
SetOtherModes = 0x2f,
|
||||
LoadTLut = 0x30,
|
||||
SetTileSize = 0x32,
|
||||
LoadBlock = 0x33,
|
||||
LoadTile = 0x34,
|
||||
SetTile = 0x35,
|
||||
FillRectangle = 0x36,
|
||||
SetFillColor = 0x37,
|
||||
SetFogColor = 0x38,
|
||||
SetBlendColor = 0x39,
|
||||
SetPrimColor = 0x3a,
|
||||
SetEnvColor = 0x3b,
|
||||
SetCombine = 0x3c,
|
||||
SetTextureImage = 0x3d,
|
||||
SetMaskImage = 0x3e,
|
||||
SetColorImage = 0x3f
|
||||
};
|
||||
|
||||
enum class RGBMul : uint8_t
|
||||
{
|
||||
Combined = 0,
|
||||
Texel0 = 1,
|
||||
Texel1 = 2,
|
||||
Primitive = 3,
|
||||
Shade = 4,
|
||||
Env = 5,
|
||||
KeyScale = 6,
|
||||
CombinedAlpha = 7,
|
||||
Texel0Alpha = 8,
|
||||
Texel1Alpha = 9,
|
||||
PrimitiveAlpha = 10,
|
||||
ShadeAlpha = 11,
|
||||
EnvAlpha = 12,
|
||||
LODFrac = 13,
|
||||
PrimLODFrac = 14,
|
||||
ConvertK5 = 15,
|
||||
Zero = 16
|
||||
};
|
||||
|
||||
enum class RGBMulAdd : uint8_t
|
||||
{
|
||||
Combined = 0,
|
||||
Texel0 = 1,
|
||||
Texel1 = 2,
|
||||
Primitive = 3,
|
||||
Shade = 4,
|
||||
Env = 5,
|
||||
One = 6,
|
||||
Noise = 7,
|
||||
Zero = 8
|
||||
};
|
||||
|
||||
enum class RGBMulSub : uint8_t
|
||||
{
|
||||
Combined = 0,
|
||||
Texel0 = 1,
|
||||
Texel1 = 2,
|
||||
Primitive = 3,
|
||||
Shade = 4,
|
||||
Env = 5,
|
||||
KeyCenter = 6,
|
||||
ConvertK4 = 7,
|
||||
Zero = 8
|
||||
};
|
||||
|
||||
enum class RGBAdd : uint8_t
|
||||
{
|
||||
Combined = 0,
|
||||
Texel0 = 1,
|
||||
Texel1 = 2,
|
||||
Primitive = 3,
|
||||
Shade = 4,
|
||||
Env = 5,
|
||||
One = 6,
|
||||
Zero = 7
|
||||
};
|
||||
|
||||
enum class AlphaAddSub : uint8_t
|
||||
{
|
||||
CombinedAlpha = 0,
|
||||
Texel0Alpha = 1,
|
||||
Texel1Alpha = 2,
|
||||
PrimitiveAlpha = 3,
|
||||
ShadeAlpha = 4,
|
||||
EnvAlpha = 5,
|
||||
One = 6,
|
||||
Zero = 7
|
||||
};
|
||||
|
||||
enum class AlphaMul : uint8_t
|
||||
{
|
||||
LODFrac = 0,
|
||||
Texel0Alpha = 1,
|
||||
Texel1Alpha = 2,
|
||||
PrimitiveAlpha = 3,
|
||||
ShadeAlpha = 4,
|
||||
EnvAlpha = 5,
|
||||
PrimLODFrac = 6,
|
||||
Zero = 7
|
||||
};
|
||||
|
||||
enum class TextureSize : uint8_t
|
||||
{
|
||||
Bpp4 = 0,
|
||||
Bpp8 = 1,
|
||||
Bpp16 = 2,
|
||||
Bpp32 = 3
|
||||
};
|
||||
|
||||
enum class TextureFormat : uint8_t
|
||||
{
|
||||
RGBA = 0,
|
||||
YUV = 1,
|
||||
CI = 2,
|
||||
IA = 3,
|
||||
I = 4
|
||||
};
|
||||
|
||||
enum class RGBDitherMode : uint8_t
|
||||
{
|
||||
Magic = 0,
|
||||
Bayer = 1,
|
||||
Noise = 2,
|
||||
Off = 3
|
||||
};
|
||||
|
||||
enum class AlphaDitherMode : uint8_t
|
||||
{
|
||||
Pattern = 0,
|
||||
InvPattern = 1,
|
||||
Noise = 2,
|
||||
Off = 3
|
||||
};
|
||||
|
||||
enum class CycleType : uint8_t
|
||||
{
|
||||
Cycle1 = 0,
|
||||
Cycle2 = 1,
|
||||
Copy = 2,
|
||||
Fill = 3
|
||||
};
|
||||
|
||||
enum class BlendMode1A : uint8_t
|
||||
{
|
||||
PixelColor = 0,
|
||||
MemoryColor = 1,
|
||||
BlendColor = 2,
|
||||
FogColor = 3
|
||||
};
|
||||
|
||||
enum class BlendMode1B : uint8_t
|
||||
{
|
||||
PixelAlpha = 0,
|
||||
FogAlpha = 1,
|
||||
ShadeAlpha = 2,
|
||||
Zero = 3
|
||||
};
|
||||
|
||||
enum class BlendMode2A : uint8_t
|
||||
{
|
||||
PixelColor = 0,
|
||||
MemoryColor = 1,
|
||||
BlendColor = 2,
|
||||
FogColor = 3
|
||||
};
|
||||
|
||||
enum class BlendMode2B : uint8_t
|
||||
{
|
||||
InvPixelAlpha = 0,
|
||||
MemoryAlpha = 1,
|
||||
One = 2,
|
||||
Zero = 3
|
||||
};
|
||||
|
||||
enum class CoverageMode : uint8_t
|
||||
{
|
||||
Clamp = 0,
|
||||
Wrap = 1,
|
||||
Zap = 2,
|
||||
Save = 3
|
||||
};
|
||||
|
||||
enum class ZMode : uint8_t
|
||||
{
|
||||
Opaque = 0,
|
||||
Interpenetrating = 1,
|
||||
Transparent = 2,
|
||||
Decal = 3
|
||||
};
|
||||
|
||||
enum TileInfoFlagBits
|
||||
{
|
||||
TILE_INFO_CLAMP_S_BIT = 1 << 0,
|
||||
TILE_INFO_MIRROR_S_BIT = 1 << 1,
|
||||
TILE_INFO_CLAMP_T_BIT = 1 << 2,
|
||||
TILE_INFO_MIRROR_T_BIT = 1 << 3
|
||||
};
|
||||
using TileInfoFlags = uint8_t;
|
||||
|
||||
struct TileSize
|
||||
{
|
||||
uint32_t slo = 0;
|
||||
uint32_t shi = 0;
|
||||
uint32_t tlo = 0;
|
||||
uint32_t thi = 0;
|
||||
};
|
||||
|
||||
struct TileMeta
|
||||
{
|
||||
uint32_t offset = 0;
|
||||
uint32_t stride = 0;
|
||||
TextureFormat fmt = TextureFormat::RGBA;
|
||||
TextureSize size = TextureSize::Bpp16;
|
||||
uint8_t palette = 0;
|
||||
uint8_t mask_s = 0;
|
||||
uint8_t shift_s = 0;
|
||||
uint8_t mask_t = 0;
|
||||
uint8_t shift_t = 0;
|
||||
TileInfoFlags flags = 0;
|
||||
};
|
||||
|
||||
struct TileInfo
|
||||
{
|
||||
TileSize size;
|
||||
TileMeta meta;
|
||||
};
|
||||
|
||||
struct CombinerInputsRGB
|
||||
{
|
||||
RGBMulAdd muladd;
|
||||
RGBMulSub mulsub;
|
||||
RGBMul mul;
|
||||
RGBAdd add;
|
||||
};
|
||||
|
||||
struct CombinerInputsAlpha
|
||||
{
|
||||
AlphaAddSub muladd;
|
||||
AlphaAddSub mulsub;
|
||||
AlphaMul mul;
|
||||
AlphaAddSub add;
|
||||
};
|
||||
|
||||
struct CombinerInputs
|
||||
{
|
||||
CombinerInputsRGB rgb;
|
||||
CombinerInputsAlpha alpha;
|
||||
};
|
||||
|
||||
struct BlendModes
|
||||
{
|
||||
BlendMode1A blend_1a;
|
||||
BlendMode1B blend_1b;
|
||||
BlendMode2A blend_2a;
|
||||
BlendMode2B blend_2b;
|
||||
};
|
||||
|
||||
static_assert(sizeof(TileInfo) == 32, "TileInfo must be 32 bytes.");
|
||||
|
||||
enum class VIRegister
|
||||
{
|
||||
Control = 0,
|
||||
Origin,
|
||||
Width,
|
||||
Intr,
|
||||
VCurrentLine,
|
||||
Timing,
|
||||
VSync,
|
||||
HSync,
|
||||
Leap,
|
||||
HStart,
|
||||
VStart,
|
||||
VBurst,
|
||||
XScale,
|
||||
YScale,
|
||||
Count
|
||||
};
|
||||
|
||||
enum VIControlFlagBits
|
||||
{
|
||||
VI_CONTROL_TYPE_BLANK_BIT = 0 << 0,
|
||||
VI_CONTROL_TYPE_RESERVED_BIT = 1 << 0,
|
||||
VI_CONTROL_TYPE_RGBA5551_BIT = 2 << 0,
|
||||
VI_CONTROL_TYPE_RGBA8888_BIT = 3 << 0,
|
||||
VI_CONTROL_TYPE_MASK = 3 << 0,
|
||||
VI_CONTROL_GAMMA_DITHER_ENABLE_BIT = 1 << 2,
|
||||
VI_CONTROL_GAMMA_ENABLE_BIT = 1 << 3,
|
||||
VI_CONTROL_DIVOT_ENABLE_BIT = 1 << 4,
|
||||
VI_CONTROL_SERRATE_BIT = 1 << 6,
|
||||
VI_CONTROL_AA_MODE_RESAMP_EXTRA_ALWAYS_BIT = 0 << 8,
|
||||
VI_CONTROL_AA_MODE_RESAMP_EXTRA_BIT = 1 << 8,
|
||||
VI_CONTROL_AA_MODE_RESAMP_ONLY_BIT = 2 << 8,
|
||||
VI_CONTROL_AA_MODE_RESAMP_REPLICATE_BIT = 3 << 8,
|
||||
VI_CONTROL_AA_MODE_MASK = 3 << 8,
|
||||
VI_CONTROL_DITHER_FILTER_ENABLE_BIT = 1 << 16,
|
||||
VI_CONTROL_META_AA_BIT = 1 << 17,
|
||||
VI_CONTROL_META_SCALE_BIT = 1 << 18
|
||||
};
|
||||
using VIControlFlags = uint32_t;
|
||||
|
||||
static inline uint32_t make_vi_start_register(uint32_t start_value, uint32_t end_value)
|
||||
{
|
||||
return ((start_value & 0x3ff) << 16) | (end_value & 0x3ff);
|
||||
}
|
||||
|
||||
static inline uint32_t make_vi_scale_register(uint32_t scale_factor, uint32_t bias)
|
||||
{
|
||||
return ((bias & 0xfff) << 16) | (scale_factor & 0xfff);
|
||||
}
|
||||
|
||||
constexpr uint32_t VI_V_SYNC_NTSC = 525;
|
||||
constexpr uint32_t VI_V_SYNC_PAL = 625;
|
||||
constexpr uint32_t VI_H_OFFSET_NTSC = 108;
|
||||
constexpr uint32_t VI_H_OFFSET_PAL = 128;
|
||||
constexpr uint32_t VI_V_OFFSET_NTSC = 34;
|
||||
constexpr uint32_t VI_V_OFFSET_PAL = 44;
|
||||
constexpr uint32_t VI_V_RES_NTSC = 480;
|
||||
constexpr uint32_t VI_V_RES_PAL = 576;
|
||||
constexpr int VI_SCANOUT_WIDTH = 640;
|
||||
|
||||
static inline uint32_t make_default_v_start()
|
||||
{
|
||||
return make_vi_start_register(VI_V_OFFSET_NTSC, VI_V_OFFSET_NTSC + 224 * 2);
|
||||
}
|
||||
|
||||
static inline uint32_t make_default_h_start()
|
||||
{
|
||||
return make_vi_start_register(VI_H_OFFSET_NTSC, VI_H_OFFSET_NTSC + VI_SCANOUT_WIDTH);
|
||||
}
|
||||
|
||||
template <int bits>
|
||||
static int32_t sext(int32_t v)
|
||||
{
|
||||
struct { int32_t dummy : bits; } d;
|
||||
d.dummy = v;
|
||||
return d.dummy;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,389 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "rdp_common.hpp"
|
||||
|
||||
namespace RDP
|
||||
{
|
||||
enum TriangleSetupFlagBits
|
||||
{
|
||||
TRIANGLE_SETUP_FLIP_BIT = 1 << 0,
|
||||
TRIANGLE_SETUP_DO_OFFSET_BIT = 1 << 1,
|
||||
TRIANGLE_SETUP_SKIP_XFRAC_BIT = 1 << 2,
|
||||
TRIANGLE_SETUP_INTERLACE_FIELD_BIT = 1 << 3,
|
||||
TRIANGLE_SETUP_INTERLACE_KEEP_ODD_BIT = 1 << 4,
|
||||
TRIANGLE_SETUP_DISABLE_UPSCALING_BIT = 1 << 5,
|
||||
TRIANGLE_SETUP_NATIVE_LOD_BIT = 1 << 6
|
||||
};
|
||||
using TriangleSetupFlags = uint8_t;
|
||||
|
||||
enum StaticRasterizationFlagBits
|
||||
{
|
||||
RASTERIZATION_INTERLACE_FIELD_BIT = 1 << 0,
|
||||
RASTERIZATION_INTERLACE_KEEP_ODD_BIT = 1 << 1,
|
||||
RASTERIZATION_AA_BIT = 1 << 2,
|
||||
RASTERIZATION_PERSPECTIVE_CORRECT_BIT = 1 << 3,
|
||||
RASTERIZATION_TLUT_BIT = 1 << 4,
|
||||
RASTERIZATION_TLUT_TYPE_BIT = 1 << 5,
|
||||
RASTERIZATION_CVG_TIMES_ALPHA_BIT = 1 << 6,
|
||||
RASTERIZATION_ALPHA_CVG_SELECT_BIT = 1 << 7,
|
||||
RASTERIZATION_MULTI_CYCLE_BIT = 1 << 8,
|
||||
RASTERIZATION_TEX_LOD_ENABLE_BIT = 1 << 9,
|
||||
RASTERIZATION_SHARPEN_LOD_ENABLE_BIT = 1 << 10,
|
||||
RASTERIZATION_DETAIL_LOD_ENABLE_BIT = 1 << 11,
|
||||
RASTERIZATION_FILL_BIT = 1 << 12,
|
||||
RASTERIZATION_COPY_BIT = 1 << 13,
|
||||
RASTERIZATION_SAMPLE_MODE_BIT = 1 << 14,
|
||||
RASTERIZATION_ALPHA_TEST_BIT = 1 << 15,
|
||||
RASTERIZATION_ALPHA_TEST_DITHER_BIT = 1 << 16,
|
||||
RASTERIZATION_SAMPLE_MID_TEXEL_BIT = 1 << 17,
|
||||
RASTERIZATION_USES_TEXEL0_BIT = 1 << 18,
|
||||
RASTERIZATION_USES_TEXEL1_BIT = 1 << 19,
|
||||
RASTERIZATION_USES_LOD_BIT = 1 << 20,
|
||||
RASTERIZATION_USES_PIPELINED_TEXEL1_BIT = 1 << 21,
|
||||
RASTERIZATION_CONVERT_ONE_BIT = 1 << 22,
|
||||
RASTERIZATION_BILERP_0_BIT = 1 << 23,
|
||||
RASTERIZATION_BILERP_1_BIT = 1 << 24,
|
||||
RASTERIZATION_UPSCALING_LOG2_BIT_OFFSET = 26,
|
||||
RASTERIZATION_NEED_NOISE_BIT = 1 << 28,
|
||||
RASTERIZATION_USE_STATIC_TEXTURE_SIZE_FORMAT_BIT = 1 << 29,
|
||||
RASTERIZATION_USE_SPECIALIZATION_CONSTANT_BIT = 1 << 30
|
||||
};
|
||||
using StaticRasterizationFlags = uint32_t;
|
||||
|
||||
enum DepthBlendFlagBits
|
||||
{
|
||||
DEPTH_BLEND_DEPTH_TEST_BIT = 1 << 0,
|
||||
DEPTH_BLEND_DEPTH_UPDATE_BIT = 1 << 1,
|
||||
DEPTH_BLEND_FORCE_BLEND_BIT = 1 << 3,
|
||||
DEPTH_BLEND_IMAGE_READ_ENABLE_BIT = 1 << 4,
|
||||
DEPTH_BLEND_COLOR_ON_COVERAGE_BIT = 1 << 5,
|
||||
DEPTH_BLEND_MULTI_CYCLE_BIT = 1 << 6,
|
||||
DEPTH_BLEND_AA_BIT = 1 << 7,
|
||||
DEPTH_BLEND_DITHER_ENABLE_BIT = 1 << 8
|
||||
};
|
||||
using DepthBlendFlags = uint32_t;
|
||||
|
||||
struct TriangleSetup
|
||||
{
|
||||
int32_t xh, xm, xl;
|
||||
int16_t yh, ym;
|
||||
|
||||
int32_t dxhdy, dxmdy, dxldy;
|
||||
int16_t yl;
|
||||
TriangleSetupFlags flags;
|
||||
uint8_t tile;
|
||||
};
|
||||
|
||||
struct AttributeSetup
|
||||
{
|
||||
int32_t r, g, b, a;
|
||||
int32_t drdx, dgdx, dbdx, dadx;
|
||||
int32_t drde, dgde, dbde, dade;
|
||||
int32_t drdy, dgdy, dbdy, dady;
|
||||
|
||||
int32_t s, t, z, w;
|
||||
int32_t dsdx, dtdx, dzdx, dwdx;
|
||||
int32_t dsde, dtde, dzde, dwde;
|
||||
int32_t dsdy, dtdy, dzdy, dwdy;
|
||||
};
|
||||
|
||||
struct ConstantCombinerInputs
|
||||
{
|
||||
uint8_t muladd[4];
|
||||
uint8_t mulsub[4];
|
||||
uint8_t mul[4];
|
||||
uint8_t add[4];
|
||||
};
|
||||
|
||||
// Per-primitive state which is very dynamic in nature and does not change anything about the shader itself.
|
||||
struct DerivedSetup
|
||||
{
|
||||
ConstantCombinerInputs constants[2];
|
||||
uint8_t fog_color[4];
|
||||
uint8_t blend_color[4];
|
||||
uint32_t fill_color;
|
||||
uint16_t dz;
|
||||
uint8_t dz_compressed;
|
||||
uint8_t min_lod;
|
||||
int16_t convert_factors[4];
|
||||
};
|
||||
|
||||
static_assert((sizeof(TriangleSetup) & 15) == 0, "TriangleSetup must be aligned to 16 bytes.");
|
||||
static_assert((sizeof(AttributeSetup) & 15) == 0, "AttributeSetup must be aligned to 16 bytes.");
|
||||
static_assert(sizeof(DerivedSetup) == 56, "DerivedSetup is not 56 bytes.");
|
||||
|
||||
struct ScissorState
|
||||
{
|
||||
uint32_t xlo;
|
||||
uint32_t ylo;
|
||||
uint32_t xhi;
|
||||
uint32_t yhi;
|
||||
};
|
||||
|
||||
struct StaticRasterizationState
|
||||
{
|
||||
CombinerInputs combiner[2];
|
||||
StaticRasterizationFlags flags;
|
||||
uint32_t dither;
|
||||
uint32_t texture_size;
|
||||
uint32_t texture_fmt;
|
||||
};
|
||||
static_assert(sizeof(StaticRasterizationState) == 32, "StaticRasterizationState must be 32 bytes.");
|
||||
|
||||
struct DepthBlendState
|
||||
{
|
||||
BlendModes blend_cycles[2];
|
||||
DepthBlendFlags flags;
|
||||
CoverageMode coverage_mode;
|
||||
ZMode z_mode;
|
||||
uint8_t padding[2];
|
||||
};
|
||||
static_assert(sizeof(DepthBlendState) == 16, "DepthBlendState must be 16 bytes.");
|
||||
|
||||
struct InstanceIndices
|
||||
{
|
||||
uint8_t static_index;
|
||||
uint8_t depth_blend_index;
|
||||
uint8_t tile_instance_index;
|
||||
uint8_t padding[5];
|
||||
uint8_t tile_indices[8];
|
||||
};
|
||||
static_assert((sizeof(InstanceIndices) & 15) == 0, "InstanceIndices must be aligned to 16 bytes.");
|
||||
|
||||
struct UploadInfo
|
||||
{
|
||||
int32_t width, height;
|
||||
float min_t_mod, max_t_mod;
|
||||
|
||||
int32_t vram_addr;
|
||||
int32_t vram_width;
|
||||
int32_t vram_size;
|
||||
int32_t vram_effective_width;
|
||||
|
||||
int32_t tmem_offset;
|
||||
int32_t tmem_stride_words;
|
||||
int32_t tmem_size;
|
||||
int32_t tmem_fmt;
|
||||
|
||||
int32_t mode;
|
||||
float inv_tmem_stride_words;
|
||||
int32_t dxt;
|
||||
int32_t padding;
|
||||
};
|
||||
static_assert((sizeof(UploadInfo) & 15) == 0, "UploadInfo must be aligned to 16 bytes.");
|
||||
|
||||
struct SpanSetup
|
||||
{
|
||||
int32_t r, g, b, a;
|
||||
int32_t s, t, w, z;
|
||||
|
||||
int16_t xlo[4];
|
||||
int16_t xhi[4];
|
||||
|
||||
int32_t interpolation_base_x;
|
||||
int32_t start_x;
|
||||
int32_t end_x;
|
||||
int16_t lodlength;
|
||||
uint16_t valid_line;
|
||||
};
|
||||
static_assert((sizeof(SpanSetup) & 15) == 0, "SpanSetup is not aligned to 16 bytes.");
|
||||
|
||||
struct SpanInfoOffsets
|
||||
{
|
||||
int32_t offset, ylo, yhi, padding;
|
||||
};
|
||||
static_assert((sizeof(SpanInfoOffsets) == 16), "SpanInfoOffsets is not 16 bytes.");
|
||||
|
||||
struct SpanInterpolationJob
|
||||
{
|
||||
uint16_t primitive_index, base_y, max_y, padding;
|
||||
};
|
||||
static_assert((sizeof(SpanInterpolationJob) == 8), "SpanInterpolationJob is not 8 bytes.");
|
||||
|
||||
struct GlobalState
|
||||
{
|
||||
uint32_t addr_index;
|
||||
uint32_t depth_addr_index;
|
||||
uint32_t fb_width, fb_height;
|
||||
uint32_t group_mask;
|
||||
};
|
||||
|
||||
struct TileRasterWork
|
||||
{
|
||||
uint32_t tile_x, tile_y;
|
||||
uint32_t tile_instance;
|
||||
uint32_t primitive;
|
||||
};
|
||||
static_assert((sizeof(TileRasterWork) == 16), "TileRasterWork is not 16 bytes.");
|
||||
|
||||
struct GlobalFBInfo
|
||||
{
|
||||
uint32_t dx_shift;
|
||||
uint32_t dx_mask;
|
||||
uint32_t fb_size;
|
||||
uint32_t base_primitive_index;
|
||||
};
|
||||
|
||||
template <typename T, unsigned N>
|
||||
class StateCache
|
||||
{
|
||||
public:
|
||||
unsigned add(const T &t)
|
||||
{
|
||||
if (cached_index >= 0)
|
||||
if (memcmp(&elements[cached_index], &t, sizeof(T)) == 0)
|
||||
return unsigned(cached_index);
|
||||
|
||||
for (int i = int(count) - 1; i >= 0; i--)
|
||||
{
|
||||
if (memcmp(&elements[i], &t, sizeof(T)) == 0)
|
||||
{
|
||||
cached_index = i;
|
||||
return unsigned(i);
|
||||
}
|
||||
}
|
||||
|
||||
assert(count < N);
|
||||
memcpy(elements + count, &t, sizeof(T));
|
||||
unsigned ret = count++;
|
||||
cached_index = int(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool full() const
|
||||
{
|
||||
return count == N;
|
||||
}
|
||||
|
||||
unsigned size() const
|
||||
{
|
||||
return count;
|
||||
}
|
||||
|
||||
unsigned byte_size() const
|
||||
{
|
||||
return size() * sizeof(T);
|
||||
}
|
||||
|
||||
const T *data() const
|
||||
{
|
||||
return elements;
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
count = 0;
|
||||
cached_index = -1;
|
||||
}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return count == 0;
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned count = 0;
|
||||
int cached_index = -1;
|
||||
T elements[N];
|
||||
};
|
||||
|
||||
template <typename T, unsigned N>
|
||||
class StreamCache
|
||||
{
|
||||
public:
|
||||
void add(const T &t)
|
||||
{
|
||||
assert(count < N);
|
||||
memcpy(&elements[count++], &t, sizeof(T));
|
||||
}
|
||||
|
||||
bool full() const
|
||||
{
|
||||
return count == N;
|
||||
}
|
||||
|
||||
unsigned size() const
|
||||
{
|
||||
return count;
|
||||
}
|
||||
|
||||
unsigned byte_size() const
|
||||
{
|
||||
return size() * sizeof(T);
|
||||
}
|
||||
|
||||
const T *data() const
|
||||
{
|
||||
return elements;
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
count = 0;
|
||||
}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return count == 0;
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned count = 0;
|
||||
T elements[N];
|
||||
};
|
||||
|
||||
namespace Limits
|
||||
{
|
||||
constexpr unsigned MaxPrimitives = 256;
|
||||
constexpr unsigned MaxStaticRasterizationStates = 64;
|
||||
constexpr unsigned MaxDepthBlendStates = 64;
|
||||
constexpr unsigned MaxTileInfoStates = 256;
|
||||
constexpr unsigned NumSyncStates = 32;
|
||||
constexpr unsigned MaxNumTiles = 8;
|
||||
constexpr unsigned MaxTMEMInstances = 256;
|
||||
constexpr unsigned MaxSpanSetups = 32 * 1024;
|
||||
constexpr unsigned MaxWidth = 1024;
|
||||
constexpr unsigned MaxHeight = 1024;
|
||||
constexpr unsigned MaxTileInstances = 0x8000;
|
||||
}
|
||||
|
||||
namespace ImplementationConstants
|
||||
{
|
||||
constexpr unsigned DefaultWorkgroupSize = 64;
|
||||
|
||||
constexpr unsigned TileWidth = 8;
|
||||
constexpr unsigned TileHeight = 8;
|
||||
constexpr unsigned MaxTilesX = Limits::MaxWidth / TileWidth;
|
||||
constexpr unsigned MaxTilesY = Limits::MaxHeight / TileHeight;
|
||||
constexpr unsigned IncoherentPageSize = 1024;
|
||||
constexpr unsigned MaxPendingRenderPassesBeforeFlush = 8;
|
||||
constexpr unsigned MinimumPrimitivesForIdleFlush = 32;
|
||||
constexpr unsigned MinimumRenderPassesForIdleFlush = 2;
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,243 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <thread>
|
||||
#include <queue>
|
||||
#include "device.hpp"
|
||||
#include "video_interface.hpp"
|
||||
#include "rdp_renderer.hpp"
|
||||
#include "rdp_common.hpp"
|
||||
#include "command_ring.hpp"
|
||||
#include "worker_thread.hpp"
|
||||
#include "rdp_dump_write.hpp"
|
||||
|
||||
#ifndef GRANITE_VULKAN_MT
|
||||
#error "Granite Vulkan backend must be built with multithreading support."
|
||||
#endif
|
||||
|
||||
namespace RDP
|
||||
{
|
||||
struct RGBA
|
||||
{
|
||||
uint8_t r, g, b, a;
|
||||
};
|
||||
|
||||
enum CommandProcessorFlagBits
|
||||
{
|
||||
COMMAND_PROCESSOR_FLAG_HOST_VISIBLE_HIDDEN_RDRAM_BIT = 1 << 0,
|
||||
COMMAND_PROCESSOR_FLAG_HOST_VISIBLE_TMEM_BIT = 1 << 1,
|
||||
COMMAND_PROCESSOR_FLAG_UPSCALING_2X_BIT = 1 << 2,
|
||||
COMMAND_PROCESSOR_FLAG_UPSCALING_4X_BIT = 1 << 3,
|
||||
COMMAND_PROCESSOR_FLAG_UPSCALING_8X_BIT = 1 << 4,
|
||||
COMMAND_PROCESSOR_FLAG_SUPER_SAMPLED_READ_BACK_BIT = 1 << 5,
|
||||
COMMAND_PROCESSOR_FLAG_SUPER_SAMPLED_DITHER_BIT = 1 << 6
|
||||
};
|
||||
using CommandProcessorFlags = uint32_t;
|
||||
|
||||
struct CoherencyCopy
|
||||
{
|
||||
size_t src_offset = 0;
|
||||
size_t mask_offset = 0;
|
||||
size_t dst_offset = 0;
|
||||
size_t size = 0;
|
||||
std::atomic_uint32_t *counter_base = nullptr;
|
||||
unsigned counters = 0;
|
||||
};
|
||||
|
||||
struct CoherencyOperation
|
||||
{
|
||||
Vulkan::Fence fence;
|
||||
uint64_t timeline_value = 0;
|
||||
|
||||
uint8_t *dst = nullptr;
|
||||
const Vulkan::Buffer *src = nullptr;
|
||||
std::vector<CoherencyCopy> copies;
|
||||
std::atomic_uint32_t *unlock_cookie = nullptr;
|
||||
};
|
||||
|
||||
// These options control various behavior when upscaling to workaround glitches which arise naturally as part of upscaling.
|
||||
struct Quirks
|
||||
{
|
||||
inline Quirks()
|
||||
{
|
||||
u.options.native_resolution_tex_rect = true;
|
||||
u.options.native_texture_lod = false;
|
||||
}
|
||||
|
||||
inline void set_native_resolution_tex_rect(bool enable)
|
||||
{
|
||||
u.options.native_resolution_tex_rect = enable;
|
||||
}
|
||||
|
||||
inline void set_native_texture_lod(bool enable)
|
||||
{
|
||||
u.options.native_texture_lod = enable;
|
||||
}
|
||||
|
||||
union
|
||||
{
|
||||
struct Opts
|
||||
{
|
||||
// If true, force TEX_RECT and TEX_RECT_FLIP to render without upscaling.
|
||||
// Works around bilinear filtering bugs in Cycle1/Cycle2 mode where game assumed 1:1 pixel transfer.
|
||||
bool native_resolution_tex_rect;
|
||||
|
||||
// Forces LOD to be computed as 1x upscale.
|
||||
// Fixes content which relies on LOD computation to select textures in clever ways.
|
||||
bool native_texture_lod;
|
||||
} options;
|
||||
uint32_t words[1];
|
||||
} u;
|
||||
};
|
||||
|
||||
class CommandProcessor
|
||||
{
|
||||
public:
|
||||
CommandProcessor(Vulkan::Device &device,
|
||||
void *rdram_ptr,
|
||||
size_t rdram_offset,
|
||||
size_t rdram_size,
|
||||
size_t hidden_rdram_size,
|
||||
CommandProcessorFlags flags);
|
||||
|
||||
~CommandProcessor();
|
||||
|
||||
bool device_is_supported() const;
|
||||
|
||||
// Synchronization.
|
||||
void flush();
|
||||
uint64_t signal_timeline();
|
||||
void wait_for_timeline(uint64_t index);
|
||||
void idle();
|
||||
void begin_frame_context();
|
||||
|
||||
// Queues up state and drawing commands.
|
||||
void enqueue_command(unsigned num_words, const uint32_t *words);
|
||||
void enqueue_command_direct(unsigned num_words, const uint32_t *words);
|
||||
|
||||
void set_quirks(const Quirks &quirks);
|
||||
|
||||
// Interact with memory.
|
||||
void *begin_read_rdram();
|
||||
void end_write_rdram();
|
||||
void *begin_read_hidden_rdram();
|
||||
void end_write_hidden_rdram();
|
||||
size_t get_rdram_size() const;
|
||||
size_t get_hidden_rdram_size() const;
|
||||
void *get_tmem();
|
||||
|
||||
// Sets VI register
|
||||
void set_vi_register(VIRegister reg, uint32_t value);
|
||||
|
||||
Vulkan::ImageHandle scanout(const ScanoutOptions &opts = {});
|
||||
void scanout_sync(std::vector<RGBA> &colors, unsigned &width, unsigned &height);
|
||||
void scanout_async_buffer(VIScanoutBuffer &buffer, const ScanoutOptions &opts = {});
|
||||
|
||||
private:
|
||||
Vulkan::Device &device;
|
||||
Vulkan::BufferHandle rdram;
|
||||
Vulkan::BufferHandle hidden_rdram;
|
||||
Vulkan::BufferHandle tmem;
|
||||
size_t rdram_offset;
|
||||
size_t rdram_size;
|
||||
CommandProcessorFlags flags;
|
||||
#ifndef PARALLEL_RDP_SHADER_DIR
|
||||
std::unique_ptr<ShaderBank> shader_bank;
|
||||
#endif
|
||||
|
||||
CommandRing ring;
|
||||
|
||||
VideoInterface vi;
|
||||
Renderer renderer;
|
||||
|
||||
void clear_hidden_rdram();
|
||||
void clear_tmem();
|
||||
void clear_buffer(Vulkan::Buffer &buffer, uint32_t value);
|
||||
void init_renderer();
|
||||
void enqueue_command_inner(unsigned num_words, const uint32_t *words);
|
||||
|
||||
Vulkan::ImageHandle scanout(const ScanoutOptions &opts, VkImageLayout target_layout);
|
||||
|
||||
#define OP(x) void op_##x(const uint32_t *words)
|
||||
OP(fill_triangle); OP(fill_z_buffer_triangle); OP(texture_triangle); OP(texture_z_buffer_triangle);
|
||||
OP(shade_triangle); OP(shade_z_buffer_triangle); OP(shade_texture_triangle); OP(shade_texture_z_buffer_triangle);
|
||||
OP(texture_rectangle); OP(texture_rectangle_flip); OP(sync_load); OP(sync_pipe);
|
||||
OP(sync_tile); OP(sync_full); OP(set_key_gb); OP(set_key_r);
|
||||
OP(set_convert); OP(set_scissor); OP(set_prim_depth); OP(set_other_modes);
|
||||
OP(load_tlut); OP(set_tile_size); OP(load_block);
|
||||
OP(load_tile); OP(set_tile); OP(fill_rectangle); OP(set_fill_color);
|
||||
OP(set_fog_color); OP(set_blend_color); OP(set_prim_color); OP(set_env_color);
|
||||
OP(set_combine); OP(set_texture_image); OP(set_mask_image); OP(set_color_image);
|
||||
#undef OP
|
||||
|
||||
ScissorState scissor_state = {};
|
||||
StaticRasterizationState static_state = {};
|
||||
DepthBlendState depth_blend = {};
|
||||
|
||||
struct
|
||||
{
|
||||
uint32_t addr;
|
||||
uint32_t width;
|
||||
TextureFormat fmt;
|
||||
TextureSize size;
|
||||
} texture_image = {};
|
||||
|
||||
uint64_t timeline_value = 0;
|
||||
uint64_t thread_timeline_value = 0;
|
||||
|
||||
struct FenceExecutor
|
||||
{
|
||||
explicit inline FenceExecutor(Vulkan::Device *device_, uint64_t *ptr)
|
||||
: device(device_), value(ptr)
|
||||
{
|
||||
}
|
||||
|
||||
Vulkan::Device *device;
|
||||
uint64_t *value;
|
||||
bool is_sentinel(const CoherencyOperation &work) const;
|
||||
void perform_work(CoherencyOperation &work);
|
||||
void notify_work_locked(const CoherencyOperation &work);
|
||||
};
|
||||
WorkerThread<CoherencyOperation, FenceExecutor> timeline_worker;
|
||||
|
||||
uint8_t *host_rdram = nullptr;
|
||||
bool measure_stall_time = false;
|
||||
bool single_threaded_processing = false;
|
||||
bool is_supported = false;
|
||||
bool is_host_coherent = true;
|
||||
bool timestamp = false;
|
||||
|
||||
friend class Renderer;
|
||||
|
||||
void enqueue_coherency_operation(CoherencyOperation &&op);
|
||||
void drain_command_ring();
|
||||
void decode_triangle_setup(TriangleSetup &setup, const uint32_t *words) const;
|
||||
|
||||
Quirks quirks;
|
||||
|
||||
std::unique_ptr<RDPDumpWriter> dump_writer;
|
||||
bool dump_in_command_list = false;
|
||||
};
|
||||
}
|
|
@ -0,0 +1,151 @@
|
|||
/* Copyright (c) 2021 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "rdp_dump_write.hpp"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
namespace RDP
|
||||
{
|
||||
RDPDumpWriter::~RDPDumpWriter()
|
||||
{
|
||||
end();
|
||||
if (file)
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
bool RDPDumpWriter::init(const char *path, uint32_t dram_size, uint32_t hidden_dram_size)
|
||||
{
|
||||
if (file)
|
||||
return false;
|
||||
|
||||
rdp_dram_cache.clear();
|
||||
rdp_dram_cache.resize(dram_size);
|
||||
rdp_hidden_dram_cache.clear();
|
||||
rdp_hidden_dram_cache.resize(hidden_dram_size);
|
||||
|
||||
file = fopen(path, "wb");
|
||||
if (!file)
|
||||
return false;
|
||||
|
||||
fwrite("RDPDUMP2", 8, 1, file);
|
||||
fwrite(&dram_size, sizeof(dram_size), 1, file);
|
||||
fwrite(&hidden_dram_size, sizeof(hidden_dram_size), 1, file);
|
||||
return true;
|
||||
}
|
||||
|
||||
void RDPDumpWriter::end_frame()
|
||||
{
|
||||
if (!file)
|
||||
return;
|
||||
|
||||
uint32_t cmd = RDP_DUMP_CMD_END_FRAME;
|
||||
fwrite(&cmd, sizeof(cmd), 1, file);
|
||||
}
|
||||
|
||||
void RDPDumpWriter::end()
|
||||
{
|
||||
if (!file)
|
||||
return;
|
||||
|
||||
uint32_t cmd = RDP_DUMP_CMD_EOF;
|
||||
fwrite(&cmd, sizeof(cmd), 1, file);
|
||||
|
||||
fclose(file);
|
||||
file = nullptr;
|
||||
|
||||
rdp_dram_cache.clear();
|
||||
rdp_hidden_dram_cache.clear();
|
||||
}
|
||||
|
||||
void RDPDumpWriter::flush(const void *dram_, uint32_t size,
|
||||
RDPDumpCmd block_cmd, RDPDumpCmd flush_cmd,
|
||||
uint8_t *cache)
|
||||
{
|
||||
if (!file)
|
||||
return;
|
||||
|
||||
const auto *dram = static_cast<const uint8_t *>(dram_);
|
||||
const uint32_t block_size = 4 * 1024;
|
||||
uint32_t i = 0;
|
||||
|
||||
for (i = 0; i < size; i += block_size)
|
||||
{
|
||||
if (memcmp(dram + i, cache + i, block_size) != 0)
|
||||
{
|
||||
uint32_t cmd = block_cmd;
|
||||
fwrite(&cmd, sizeof(cmd), 1, file);
|
||||
fwrite(&i, sizeof(i), 1, file);
|
||||
fwrite(&block_size, sizeof(block_size), 1, file);
|
||||
fwrite(dram + i, 1, block_size, file);
|
||||
memcpy(cache + i, dram + i, block_size);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t cmd = flush_cmd;
|
||||
fwrite(&cmd, sizeof(cmd), 1, file);
|
||||
|
||||
}
|
||||
|
||||
void RDPDumpWriter::flush_dram(const void *dram_, uint32_t size)
|
||||
{
|
||||
flush(dram_, size, RDP_DUMP_CMD_UPDATE_DRAM, RDP_DUMP_CMD_UPDATE_DRAM_FLUSH, rdp_dram_cache.data());
|
||||
}
|
||||
|
||||
void RDPDumpWriter::flush_hidden_dram(const void *dram_, uint32_t size)
|
||||
{
|
||||
flush(dram_, size, RDP_DUMP_CMD_UPDATE_HIDDEN_DRAM, RDP_DUMP_CMD_UPDATE_HIDDEN_DRAM_FLUSH, rdp_hidden_dram_cache.data());
|
||||
}
|
||||
|
||||
void RDPDumpWriter::signal_complete()
|
||||
{
|
||||
if (!file)
|
||||
return;
|
||||
|
||||
uint32_t cmd = RDP_DUMP_CMD_SIGNAL_COMPLETE;
|
||||
fwrite(&cmd, sizeof(cmd), 1, file);
|
||||
}
|
||||
|
||||
void RDPDumpWriter::emit_command(uint32_t command, const uint32_t *cmd_data, uint32_t cmd_words)
|
||||
{
|
||||
if (!file)
|
||||
return;
|
||||
|
||||
uint32_t cmd = RDP_DUMP_CMD_RDP_COMMAND;
|
||||
fwrite(&cmd, sizeof(cmd), 1, file);
|
||||
fwrite(&command, sizeof(command), 1, file);
|
||||
fwrite(&cmd_words, sizeof(cmd_words), 1, file);
|
||||
fwrite(cmd_data, sizeof(*cmd_data), cmd_words, file);
|
||||
}
|
||||
|
||||
void RDPDumpWriter::set_vi_register(uint32_t vi_register, uint32_t value)
|
||||
{
|
||||
if (!file)
|
||||
return;
|
||||
|
||||
uint32_t cmd = RDP_DUMP_CMD_SET_VI_REGISTER;
|
||||
fwrite(&cmd, sizeof(cmd), 1, file);
|
||||
fwrite(&vi_register, sizeof(vi_register), 1, file);
|
||||
fwrite(&value, sizeof(value), 1, file);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
/* Copyright (c) 2021 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
namespace RDP
|
||||
{
|
||||
class RDPDumpWriter
|
||||
{
|
||||
public:
|
||||
~RDPDumpWriter();
|
||||
bool init(const char *path, uint32_t dram_size, uint32_t hidden_dram_size);
|
||||
void flush_dram(const void *dram, uint32_t size);
|
||||
void flush_hidden_dram(const void *dram, uint32_t size);
|
||||
void signal_complete();
|
||||
void emit_command(uint32_t command, const uint32_t *cmd_data, uint32_t cmd_words);
|
||||
void set_vi_register(uint32_t vi_register, uint32_t value);
|
||||
void end_frame();
|
||||
|
||||
private:
|
||||
enum RDPDumpCmd : uint32_t
|
||||
{
|
||||
RDP_DUMP_CMD_INVALID = 0,
|
||||
RDP_DUMP_CMD_UPDATE_DRAM = 1,
|
||||
RDP_DUMP_CMD_RDP_COMMAND = 2,
|
||||
RDP_DUMP_CMD_SET_VI_REGISTER = 3,
|
||||
RDP_DUMP_CMD_END_FRAME = 4,
|
||||
RDP_DUMP_CMD_SIGNAL_COMPLETE = 5,
|
||||
RDP_DUMP_CMD_EOF = 6,
|
||||
RDP_DUMP_CMD_UPDATE_DRAM_FLUSH = 7,
|
||||
RDP_DUMP_CMD_UPDATE_HIDDEN_DRAM = 8,
|
||||
RDP_DUMP_CMD_UPDATE_HIDDEN_DRAM_FLUSH = 9,
|
||||
RDP_DUMP_CMD_INT_MAX = 0x7fffffff
|
||||
};
|
||||
|
||||
FILE *file = nullptr;
|
||||
std::vector<uint8_t> rdp_dram_cache;
|
||||
std::vector<uint8_t> rdp_hidden_dram_cache;
|
||||
void flush(const void *dram_, uint32_t size, RDPDumpCmd block_cmd, RDPDumpCmd flush_cmd, uint8_t *cache);
|
||||
void end();
|
||||
};
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,393 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "rdp_data_structures.hpp"
|
||||
#include "device.hpp"
|
||||
#include "rdp_common.hpp"
|
||||
#include "worker_thread.hpp"
|
||||
#include <unordered_set>
|
||||
|
||||
namespace RDP
|
||||
{
|
||||
struct CoherencyOperation;
|
||||
|
||||
struct SyncObject
|
||||
{
|
||||
Vulkan::Fence fence;
|
||||
};
|
||||
|
||||
enum class FBFormat : uint32_t
|
||||
{
|
||||
I4 = 0,
|
||||
I8 = 1,
|
||||
RGBA5551 = 2,
|
||||
IA88 = 3,
|
||||
RGBA8888 = 4
|
||||
};
|
||||
|
||||
enum class UploadMode : uint32_t
|
||||
{
|
||||
Tile = 0,
|
||||
TLUT = 1,
|
||||
Block = 2
|
||||
};
|
||||
|
||||
struct LoadTileInfo
|
||||
{
|
||||
uint32_t tex_addr;
|
||||
uint32_t tex_width;
|
||||
uint16_t slo, tlo, shi, thi;
|
||||
TextureFormat fmt;
|
||||
TextureSize size;
|
||||
UploadMode mode;
|
||||
};
|
||||
|
||||
class CommandProcessor;
|
||||
|
||||
struct RendererOptions
|
||||
{
|
||||
unsigned upscaling_factor = 1;
|
||||
bool super_sampled_readback = false;
|
||||
bool super_sampled_readback_dither = false;
|
||||
};
|
||||
|
||||
class Renderer : public Vulkan::DebugChannelInterface
|
||||
{
|
||||
public:
|
||||
explicit Renderer(CommandProcessor &processor);
|
||||
~Renderer();
|
||||
void set_device(Vulkan::Device *device);
|
||||
|
||||
// If coherent is false, RDRAM is a buffer split into data in lower half and writemask state in upper half, each part being size large.
|
||||
// offset must be 0 in this case.
|
||||
void set_rdram(Vulkan::Buffer *buffer, uint8_t *host_rdram, size_t offset, size_t size, bool coherent);
|
||||
void set_hidden_rdram(Vulkan::Buffer *buffer);
|
||||
void set_tmem(Vulkan::Buffer *buffer);
|
||||
void set_shader_bank(const ShaderBank *bank);
|
||||
|
||||
bool init_renderer(const RendererOptions &options);
|
||||
|
||||
// setup may be mutated to apply various fixups to triangle setup.
|
||||
void draw_flat_primitive(TriangleSetup &setup);
|
||||
void draw_shaded_primitive(TriangleSetup &setup, const AttributeSetup &attr);
|
||||
|
||||
void set_color_framebuffer(uint32_t addr, uint32_t width, FBFormat fmt);
|
||||
void set_depth_framebuffer(uint32_t addr);
|
||||
|
||||
void set_scissor_state(const ScissorState &state);
|
||||
void set_static_rasterization_state(const StaticRasterizationState &state);
|
||||
void set_depth_blend_state(const DepthBlendState &state);
|
||||
|
||||
void set_tile(uint32_t tile, const TileMeta &info);
|
||||
void set_tile_size(uint32_t tile, uint32_t slo, uint32_t shi, uint32_t tlo, uint32_t thi);
|
||||
void load_tile(uint32_t tile, const LoadTileInfo &info);
|
||||
void load_tile_iteration(uint32_t tile, const LoadTileInfo &info, uint32_t tmem_offset);
|
||||
|
||||
void set_blend_color(uint32_t color);
|
||||
void set_fog_color(uint32_t color);
|
||||
void set_env_color(uint32_t color);
|
||||
void set_primitive_color(uint8_t min_level, uint8_t prim_lod_frac, uint32_t color);
|
||||
void set_fill_color(uint32_t color);
|
||||
void set_primitive_depth(uint16_t prim_depth, uint16_t prim_dz);
|
||||
void set_enable_primitive_depth(bool enable);
|
||||
void set_convert(uint16_t k0, uint16_t k1, uint16_t k2, uint16_t k3, uint16_t k4, uint16_t k5);
|
||||
void set_color_key(unsigned component, uint32_t width, uint32_t center, uint32_t scale);
|
||||
|
||||
// Called when the command thread has not seen any activity in a given period of time.
|
||||
// This is useful so we don't needlessly queue up work when we might as well kick it to the GPU.
|
||||
void notify_idle_command_thread();
|
||||
void flush_and_signal();
|
||||
|
||||
int resolve_shader_define(const char *name, const char *define) const;
|
||||
|
||||
void resolve_coherency_external(unsigned offset, unsigned length);
|
||||
void submit_update_upscaled_domain_external(Vulkan::CommandBuffer &cmd,
|
||||
unsigned addr, unsigned pixels, unsigned pixel_size_log2);
|
||||
unsigned get_scaling_factor() const;
|
||||
|
||||
const Vulkan::Buffer *get_upscaled_rdram_buffer() const;
|
||||
const Vulkan::Buffer *get_upscaled_hidden_rdram_buffer() const;
|
||||
|
||||
void lock_command_processing();
|
||||
void unlock_command_processing();
|
||||
|
||||
private:
|
||||
CommandProcessor &processor;
|
||||
Vulkan::Device *device = nullptr;
|
||||
Vulkan::Buffer *rdram = nullptr;
|
||||
|
||||
Vulkan::BufferHandle upscaling_reference_rdram;
|
||||
Vulkan::BufferHandle upscaling_multisampled_rdram;
|
||||
Vulkan::BufferHandle upscaling_multisampled_hidden_rdram;
|
||||
|
||||
struct
|
||||
{
|
||||
uint8_t *host_rdram = nullptr;
|
||||
Vulkan::BufferHandle staging_rdram;
|
||||
Vulkan::BufferHandle staging_readback;
|
||||
std::unique_ptr<std::atomic_uint32_t[]> pending_writes_for_page;
|
||||
std::vector<uint32_t> page_to_direct_copy;
|
||||
std::vector<uint32_t> page_to_masked_copy;
|
||||
std::vector<uint32_t> page_to_pending_readback;
|
||||
unsigned num_pages = 0;
|
||||
unsigned staging_readback_pages = 0;
|
||||
unsigned staging_readback_index = 0; // Ringbuffer the readbacks.
|
||||
} incoherent;
|
||||
|
||||
size_t rdram_offset = 0;
|
||||
size_t rdram_size = 0;
|
||||
bool is_host_coherent = false;
|
||||
Vulkan::Buffer *hidden_rdram = nullptr;
|
||||
Vulkan::Buffer *tmem = nullptr;
|
||||
const ShaderBank *shader_bank = nullptr;
|
||||
|
||||
bool init_caps();
|
||||
void init_blender_lut();
|
||||
void init_buffers(const RendererOptions &options);
|
||||
bool init_internal_upscaling_factor(const RendererOptions &options);
|
||||
|
||||
struct
|
||||
{
|
||||
uint32_t addr = 0;
|
||||
uint32_t depth_addr = 0;
|
||||
uint32_t width = 0;
|
||||
uint32_t deduced_height = 0;
|
||||
FBFormat fmt = FBFormat::I8;
|
||||
bool depth_write_pending = false;
|
||||
bool color_write_pending = false;
|
||||
} fb;
|
||||
|
||||
struct StreamCaches
|
||||
{
|
||||
ScissorState scissor_state = {};
|
||||
StaticRasterizationState static_raster_state = {};
|
||||
DepthBlendState depth_blend_state = {};
|
||||
|
||||
StateCache<StaticRasterizationState, Limits::MaxStaticRasterizationStates> static_raster_state_cache;
|
||||
StateCache<DepthBlendState, Limits::MaxDepthBlendStates> depth_blend_state_cache;
|
||||
StateCache<TileInfo, Limits::MaxTileInfoStates> tile_info_state_cache;
|
||||
|
||||
StreamCache<TriangleSetup, Limits::MaxPrimitives> triangle_setup;
|
||||
StreamCache<ScissorState, Limits::MaxPrimitives> scissor_setup;
|
||||
StreamCache<AttributeSetup, Limits::MaxPrimitives> attribute_setup;
|
||||
StreamCache<DerivedSetup, Limits::MaxPrimitives> derived_setup;
|
||||
StreamCache<InstanceIndices, Limits::MaxPrimitives> state_indices;
|
||||
StreamCache<SpanInfoOffsets, Limits::MaxPrimitives> span_info_offsets;
|
||||
StreamCache<SpanInterpolationJob, Limits::MaxSpanSetups> span_info_jobs;
|
||||
|
||||
std::vector<UploadInfo> tmem_upload_infos;
|
||||
unsigned max_shaded_tiles = 0;
|
||||
Vulkan::CommandBufferHandle cmd;
|
||||
} stream;
|
||||
|
||||
void ensure_command_buffer();
|
||||
|
||||
TileInfo tiles[Limits::MaxNumTiles];
|
||||
Vulkan::BufferHandle tmem_instances;
|
||||
Vulkan::BufferHandle span_setups;
|
||||
Vulkan::BufferHandle blender_divider_lut_buffer;
|
||||
Vulkan::BufferViewHandle blender_divider_buffer;
|
||||
|
||||
Vulkan::BufferHandle tile_binning_buffer;
|
||||
Vulkan::BufferHandle tile_binning_buffer_coarse;
|
||||
|
||||
Vulkan::BufferHandle indirect_dispatch_buffer;
|
||||
Vulkan::BufferHandle tile_work_list;
|
||||
Vulkan::BufferHandle per_tile_offsets;
|
||||
Vulkan::BufferHandle per_tile_shaded_color;
|
||||
Vulkan::BufferHandle per_tile_shaded_depth;
|
||||
Vulkan::BufferHandle per_tile_shaded_shaded_alpha;
|
||||
Vulkan::BufferHandle per_tile_shaded_coverage;
|
||||
|
||||
struct MappedBuffer
|
||||
{
|
||||
Vulkan::BufferHandle buffer;
|
||||
bool is_host = false;
|
||||
};
|
||||
|
||||
struct RenderBuffers
|
||||
{
|
||||
void init(Vulkan::Device &device, Vulkan::BufferDomain domain, RenderBuffers *borrow);
|
||||
static MappedBuffer create_buffer(Vulkan::Device &device, Vulkan::BufferDomain domain, VkDeviceSize size, MappedBuffer *borrow);
|
||||
|
||||
MappedBuffer triangle_setup;
|
||||
MappedBuffer attribute_setup;
|
||||
MappedBuffer derived_setup;
|
||||
MappedBuffer scissor_setup;
|
||||
|
||||
MappedBuffer static_raster_state;
|
||||
MappedBuffer depth_blend_state;
|
||||
MappedBuffer tile_info_state;
|
||||
|
||||
MappedBuffer state_indices;
|
||||
MappedBuffer span_info_offsets;
|
||||
|
||||
MappedBuffer span_info_jobs;
|
||||
Vulkan::BufferViewHandle span_info_jobs_view;
|
||||
};
|
||||
|
||||
struct RenderBuffersUpdater
|
||||
{
|
||||
void init(Vulkan::Device &device);
|
||||
void upload(Vulkan::Device &device, const StreamCaches &caches, Vulkan::CommandBuffer &cmd);
|
||||
|
||||
template <typename Cache>
|
||||
void upload(Vulkan::CommandBuffer &cmd, Vulkan::Device &device,
|
||||
const MappedBuffer &gpu, const MappedBuffer &cpu, const Cache &cache, bool &did_upload);
|
||||
|
||||
RenderBuffers cpu, gpu;
|
||||
};
|
||||
|
||||
struct InternalSynchronization
|
||||
{
|
||||
Vulkan::Fence fence;
|
||||
};
|
||||
|
||||
struct Constants
|
||||
{
|
||||
uint32_t blend_color = 0;
|
||||
uint32_t fog_color = 0;
|
||||
uint32_t env_color = 0;
|
||||
uint32_t primitive_color = 0;
|
||||
uint32_t fill_color = 0;
|
||||
uint8_t min_level = 0;
|
||||
uint8_t prim_lod_frac = 0;
|
||||
int32_t prim_depth = 0;
|
||||
uint16_t prim_dz = 0;
|
||||
uint16_t convert[6] = {};
|
||||
|
||||
uint16_t key_width[3] = {};
|
||||
uint8_t key_center[3] = {};
|
||||
uint8_t key_scale[3] = {};
|
||||
|
||||
bool use_prim_depth = false;
|
||||
} constants;
|
||||
|
||||
RenderBuffersUpdater buffer_instances[Limits::NumSyncStates];
|
||||
InternalSynchronization internal_sync[Limits::NumSyncStates];
|
||||
uint32_t sync_indices_needs_flush = 0;
|
||||
unsigned buffer_instance = 0;
|
||||
uint32_t base_primitive_index = 0;
|
||||
unsigned pending_render_passes = 0;
|
||||
unsigned pending_render_passes_upscaled = 0;
|
||||
unsigned pending_primitives = 0;
|
||||
unsigned pending_primitives_upscaled = 0;
|
||||
|
||||
bool tmem_upload_needs_flush(uint32_t addr) const;
|
||||
|
||||
bool render_pass_is_upscaled() const;
|
||||
bool should_render_upscaled() const;
|
||||
|
||||
void flush_queues();
|
||||
void submit_render_pass(Vulkan::CommandBuffer &cmd);
|
||||
void submit_render_pass_upscaled(Vulkan::CommandBuffer &cmd);
|
||||
void submit_render_pass_end(Vulkan::CommandBuffer &cmd);
|
||||
void submit_to_queue();
|
||||
void begin_new_context();
|
||||
void reset_context();
|
||||
bool need_flush() const;
|
||||
void maintain_queues();
|
||||
void maintain_queues_idle();
|
||||
void update_tmem_instances(Vulkan::CommandBuffer &cmd);
|
||||
void submit_span_setup_jobs(Vulkan::CommandBuffer &cmd, bool upscaled);
|
||||
void update_deduced_height(const TriangleSetup &setup);
|
||||
void submit_tile_binning_combined(Vulkan::CommandBuffer &cmd, bool upscaled);
|
||||
void clear_indirect_buffer(Vulkan::CommandBuffer &cmd);
|
||||
void submit_rasterization(Vulkan::CommandBuffer &cmd, Vulkan::Buffer &tmem, bool upscaled);
|
||||
void submit_depth_blend(Vulkan::CommandBuffer &cmd, Vulkan::Buffer &tmem, bool upscaled, bool force_write_mask);
|
||||
|
||||
enum class ResolveStage { Pre, Post, SSAAResolve };
|
||||
void submit_update_upscaled_domain(Vulkan::CommandBuffer &cmd, ResolveStage stage);
|
||||
void submit_update_upscaled_domain(Vulkan::CommandBuffer &cmd, ResolveStage stage,
|
||||
unsigned addr, unsigned depth_addr,
|
||||
unsigned width, unsigned height,
|
||||
unsigned pixel_size_log2);
|
||||
void submit_clear_super_sample_write_mask(Vulkan::CommandBuffer &cmd, unsigned width, unsigned height);
|
||||
|
||||
SpanInfoOffsets allocate_span_jobs(const TriangleSetup &setup);
|
||||
|
||||
DerivedSetup build_derived_attributes(const AttributeSetup &attr) const;
|
||||
void build_combiner_constants(DerivedSetup &setup, unsigned cycle) const;
|
||||
int filter_debug_channel_x = -1;
|
||||
int filter_debug_channel_y = -1;
|
||||
bool debug_channel = false;
|
||||
|
||||
void message(const std::string &tag, uint32_t code,
|
||||
uint32_t x, uint32_t y, uint32_t z,
|
||||
uint32_t num_words, const Vulkan::DebugChannelInterface::Word *words) override;
|
||||
|
||||
bool can_support_minimum_subgroup_size(unsigned size) const;
|
||||
bool supports_subgroup_size_control(uint32_t minimum_size, uint32_t maximum_size) const;
|
||||
|
||||
std::unordered_set<Util::Hash> pending_async_pipelines;
|
||||
|
||||
unsigned compute_conservative_max_num_tiles(const TriangleSetup &setup) const;
|
||||
|
||||
void deduce_static_texture_state(unsigned tile, unsigned max_lod_level);
|
||||
void deduce_noise_state();
|
||||
static StaticRasterizationState normalize_static_state(StaticRasterizationState state);
|
||||
void fixup_triangle_setup(TriangleSetup &setup) const;
|
||||
|
||||
struct Caps
|
||||
{
|
||||
int timestamp = 0;
|
||||
bool force_sync = false;
|
||||
bool ubershader = false;
|
||||
bool supports_small_integer_arithmetic = false;
|
||||
bool subgroup_tile_binning = false;
|
||||
bool subgroup_depth_blend = false;
|
||||
bool super_sample_readback = false;
|
||||
bool super_sample_readback_dither = false;
|
||||
unsigned upscaling = 1;
|
||||
unsigned max_num_tile_instances = Limits::MaxTileInstances;
|
||||
unsigned max_tiles_x = ImplementationConstants::MaxTilesX;
|
||||
unsigned max_tiles_y = ImplementationConstants::MaxTilesY;
|
||||
unsigned max_width = Limits::MaxWidth;
|
||||
unsigned max_height = Limits::MaxHeight;
|
||||
} caps;
|
||||
|
||||
struct PipelineExecutor
|
||||
{
|
||||
Vulkan::Device *device;
|
||||
bool is_sentinel(const Vulkan::DeferredPipelineCompile &compile) const;
|
||||
void perform_work(const Vulkan::DeferredPipelineCompile &compile) const;
|
||||
void notify_work_locked(const Vulkan::DeferredPipelineCompile &compile) const;
|
||||
};
|
||||
|
||||
std::unique_ptr<WorkerThread<Vulkan::DeferredPipelineCompile, PipelineExecutor>> pipeline_worker;
|
||||
|
||||
void resolve_coherency_host_to_gpu(Vulkan::CommandBuffer &cmd);
|
||||
void resolve_coherency_gpu_to_host(CoherencyOperation &op, Vulkan::CommandBuffer &cmd);
|
||||
uint32_t get_byte_size_for_bound_color_framebuffer() const;
|
||||
uint32_t get_byte_size_for_bound_depth_framebuffer() const;
|
||||
void mark_pages_for_gpu_read(uint32_t base_addr, uint32_t byte_count);
|
||||
void lock_pages_for_gpu_write(uint32_t base_addr, uint32_t byte_count);
|
||||
|
||||
std::atomic_uint32_t active_submissions;
|
||||
void enqueue_fence_wait(Vulkan::Fence fence);
|
||||
uint64_t last_submit_ns = 0;
|
||||
|
||||
std::mutex idle_lock;
|
||||
};
|
||||
}
|
|
@ -0,0 +1,130 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BINNING_H_
|
||||
#define BINNING_H_
|
||||
|
||||
// There are 4 critical Y coordinates to test when binning. Top, bottom, mid, and mid - 1.
|
||||
|
||||
const int SUBPIXELS_Y = 4;
|
||||
|
||||
ivec4 quantize_x(ivec4 x)
|
||||
{
|
||||
return x >> 15;
|
||||
}
|
||||
|
||||
int minimum4(ivec4 v)
|
||||
{
|
||||
ivec2 minimum2 = min(v.xy, v.zw);
|
||||
return min(minimum2.x, minimum2.y);
|
||||
}
|
||||
|
||||
int maximum4(ivec4 v)
|
||||
{
|
||||
ivec2 maximum2 = max(v.xy, v.zw);
|
||||
return max(maximum2.x, maximum2.y);
|
||||
}
|
||||
|
||||
ivec4 madd_32_64(ivec4 a, int b, int c, out ivec4 hi_bits)
|
||||
{
|
||||
ivec4 lo, hi;
|
||||
imulExtended(a, ivec4(b), hi, lo);
|
||||
uvec4 carry;
|
||||
lo = ivec4(uaddCarry(lo, uvec4(c), carry));
|
||||
hi += ivec4(carry);
|
||||
hi_bits = hi;
|
||||
return lo;
|
||||
}
|
||||
|
||||
ivec2 interpolate_xs(TriangleSetup setup, ivec4 ys, bool flip, int scaling)
|
||||
{
|
||||
int yh_interpolation_base = setup.yh & ~(SUBPIXELS_Y - 1);
|
||||
int ym_interpolation_base = setup.ym;
|
||||
|
||||
yh_interpolation_base *= scaling;
|
||||
ym_interpolation_base *= scaling;
|
||||
|
||||
// Interpolate in 64-bit so we can detect quirky overflow scenarios.
|
||||
ivec4 xh_hi, xm_hi, xl_hi;
|
||||
ivec4 xh = madd_32_64(ys - yh_interpolation_base, setup.dxhdy, scaling * setup.xh, xh_hi);
|
||||
ivec4 xm = madd_32_64(ys - yh_interpolation_base, setup.dxmdy, scaling * setup.xm, xm_hi);
|
||||
ivec4 xl = madd_32_64(ys - ym_interpolation_base, setup.dxldy, scaling * setup.xl, xl_hi);
|
||||
xl = mix(xl, xm, lessThan(ys, ivec4(scaling * setup.ym)));
|
||||
xl_hi = mix(xl_hi, xm_hi, lessThan(ys, ivec4(scaling * setup.ym)));
|
||||
|
||||
// Handle overflow scenarios. Saturate 64-bit signed to 32-bit signed without 64-bit math.
|
||||
xh = mix(xh, ivec4(0x7fffffff), greaterThan(xh_hi, ivec4(0)));
|
||||
xh = mix(xh, ivec4(-0x80000000), lessThan(xh_hi, ivec4(-1)));
|
||||
xl = mix(xl, ivec4(0x7fffffff), greaterThan(xl_hi, ivec4(0)));
|
||||
xl = mix(xl, ivec4(-0x80000000), lessThan(xl_hi, ivec4(-1)));
|
||||
|
||||
ivec4 xh_shifted = quantize_x(xh);
|
||||
ivec4 xl_shifted = quantize_x(xl);
|
||||
|
||||
ivec4 xleft, xright;
|
||||
if (flip)
|
||||
{
|
||||
xleft = xh_shifted;
|
||||
xright = xl_shifted;
|
||||
}
|
||||
else
|
||||
{
|
||||
xleft = xl_shifted;
|
||||
xright = xh_shifted;
|
||||
}
|
||||
|
||||
// If one of the results are out of range, we have overflow, and we need to be conservative when binning.
|
||||
int max_range = maximum4(max(abs(xleft), abs(xright)));
|
||||
ivec2 range;
|
||||
if (max_range <= 2047 * scaling)
|
||||
range = ivec2(minimum4(xleft), maximum4(xright));
|
||||
else
|
||||
range = ivec2(0, 0x7fffffff);
|
||||
|
||||
return range;
|
||||
}
|
||||
|
||||
bool bin_primitive(TriangleSetup setup, ivec2 lo, ivec2 hi, int scaling)
|
||||
{
|
||||
int start_y = lo.y * SUBPIXELS_Y;
|
||||
int end_y = (hi.y * SUBPIXELS_Y) + (SUBPIXELS_Y - 1);
|
||||
|
||||
// First, we clip start/end against y_lo, y_hi.
|
||||
start_y = max(start_y, scaling * int(setup.yh));
|
||||
end_y = min(end_y, scaling * int(setup.yl) - 1);
|
||||
|
||||
// Y is clipped out, exit early.
|
||||
if (end_y < start_y)
|
||||
return false;
|
||||
|
||||
bool flip = (setup.flags & TRIANGLE_SETUP_FLIP_BIT) != 0;
|
||||
|
||||
// Sample the X ranges for min and max Y, and potentially the mid-point as well.
|
||||
ivec4 ys = ivec4(start_y, end_y, clamp(setup.ym * scaling + ivec2(-1, 0), ivec2(start_y), ivec2(end_y)));
|
||||
ivec2 x_range = interpolate_xs(setup, ys, flip, scaling);
|
||||
|
||||
x_range.x = max(x_range.x, lo.x);
|
||||
x_range.y = min(x_range.y, hi.x);
|
||||
return x_range.x <= x_range.y;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,145 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BLENDER_H_
|
||||
#define BLENDER_H_
|
||||
|
||||
struct BlendInputs
|
||||
{
|
||||
u8x4 pixel_color;
|
||||
u8x4 memory_color;
|
||||
u8x4 fog_color;
|
||||
u8x4 blend_color;
|
||||
u8 shade_alpha;
|
||||
};
|
||||
|
||||
const int BLEND_MODE_1A_PIXEL_COLOR = 0;
|
||||
const int BLEND_MODE_1A_MEMORY_COLOR = 1;
|
||||
const int BLEND_MODE_1A_BLEND_COLOR = 2;
|
||||
const int BLEND_MODE_1A_FOG_COLOR = 3;
|
||||
|
||||
const int BLEND_MODE_1B_PIXEL_ALPHA = 0;
|
||||
const int BLEND_MODE_1B_FOG_ALPHA = 1;
|
||||
const int BLEND_MODE_1B_SHADE_ALPHA = 2;
|
||||
const int BLEND_MODE_1B_ZERO = 3;
|
||||
|
||||
const int BLEND_MODE_2A_PIXEL_COLOR = 0;
|
||||
const int BLEND_MODE_2A_MEMORY_COLOR = 1;
|
||||
const int BLEND_MODE_2A_BLEND_COLOR = 2;
|
||||
const int BLEND_MODE_2A_FOG_COLOR = 3;
|
||||
|
||||
const int BLEND_MODE_2B_INV_PIXEL_ALPHA = 0;
|
||||
const int BLEND_MODE_2B_MEMORY_ALPHA = 1;
|
||||
const int BLEND_MODE_2B_ONE = 2;
|
||||
const int BLEND_MODE_2B_ZERO = 3;
|
||||
|
||||
u8x3 blender(BlendInputs inputs, u8x4 blend_modes,
|
||||
bool force_blend, bool blend_en, bool color_on_coverage, bool coverage_wrap, u8x2 blend_shift,
|
||||
bool final_cycle)
|
||||
{
|
||||
u8x3 rgb1;
|
||||
switch (int(blend_modes.z))
|
||||
{
|
||||
case BLEND_MODE_2A_PIXEL_COLOR: rgb1 = inputs.pixel_color.rgb; break;
|
||||
case BLEND_MODE_2A_MEMORY_COLOR: rgb1 = inputs.memory_color.rgb; break;
|
||||
case BLEND_MODE_2A_BLEND_COLOR: rgb1 = inputs.blend_color.rgb; break;
|
||||
case BLEND_MODE_2A_FOG_COLOR: rgb1 = inputs.fog_color.rgb; break;
|
||||
}
|
||||
|
||||
if (final_cycle)
|
||||
{
|
||||
if (color_on_coverage && !coverage_wrap)
|
||||
return rgb1;
|
||||
}
|
||||
|
||||
u8x3 rgb0;
|
||||
switch (int(blend_modes.x))
|
||||
{
|
||||
case BLEND_MODE_1A_PIXEL_COLOR: rgb0 = inputs.pixel_color.rgb; break;
|
||||
case BLEND_MODE_1A_MEMORY_COLOR: rgb0 = inputs.memory_color.rgb; break;
|
||||
case BLEND_MODE_1A_BLEND_COLOR: rgb0 = inputs.blend_color.rgb; break;
|
||||
case BLEND_MODE_1A_FOG_COLOR: rgb0 = inputs.fog_color.rgb; break;
|
||||
}
|
||||
|
||||
if (final_cycle)
|
||||
{
|
||||
if (!blend_en || (blend_modes.y == BLEND_MODE_1B_PIXEL_ALPHA &&
|
||||
blend_modes.w == BLEND_MODE_2B_INV_PIXEL_ALPHA &&
|
||||
inputs.pixel_color.a == U8_C(0xff)))
|
||||
{
|
||||
return rgb0;
|
||||
}
|
||||
}
|
||||
|
||||
u8 a0;
|
||||
u8 a1;
|
||||
|
||||
switch (int(blend_modes.y))
|
||||
{
|
||||
case BLEND_MODE_1B_PIXEL_ALPHA: a0 = inputs.pixel_color.a; break;
|
||||
case BLEND_MODE_1B_FOG_ALPHA: a0 = inputs.fog_color.a; break;
|
||||
case BLEND_MODE_1B_SHADE_ALPHA: a0 = inputs.shade_alpha; break;
|
||||
case BLEND_MODE_1B_ZERO: a0 = U8_C(0); break;
|
||||
}
|
||||
|
||||
switch (int(blend_modes.w))
|
||||
{
|
||||
case BLEND_MODE_2B_INV_PIXEL_ALPHA: a1 = ~a0 & U8_C(0xff); break;
|
||||
case BLEND_MODE_2B_MEMORY_ALPHA: a1 = inputs.memory_color.a; break;
|
||||
case BLEND_MODE_2B_ONE: a1 = U8_C(0xff); break;
|
||||
case BLEND_MODE_2B_ZERO: a1 = U8_C(0); break;
|
||||
}
|
||||
|
||||
a0 >>= U8_C(3);
|
||||
a1 >>= U8_C(3);
|
||||
|
||||
if (blend_modes.w == BLEND_MODE_2B_MEMORY_ALPHA)
|
||||
{
|
||||
a0 = (a0 >> blend_shift.x) & U8_C(0x3c);
|
||||
a1 = (a1 >> blend_shift.y) | U8_C(3);
|
||||
}
|
||||
|
||||
i16x3 blended = i16x3(rgb0) * i16(a0) + i16x3(rgb1) * (i16(a1) + I16_C(1));
|
||||
|
||||
if (!final_cycle || force_blend)
|
||||
{
|
||||
rgb0 = u8x3(blended >> I16_C(5));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Serious funk here. Somehow the RDP implemented a divider to deal with weighted average.
|
||||
// Typically relevant when using blender shifters from interpenetrating Z mode.
|
||||
// Under normal condition, this is implemented as a straight integer divider, but
|
||||
// for edge cases, we need a look-up table. The results make no sense.
|
||||
int blend_sum = (int(a0) >> 2) + (int(a1) >> 2) + 1;
|
||||
blended >>= I16_C(2);
|
||||
blended &= I16_C(0x7ff);
|
||||
|
||||
rgb0.r = u8(texelFetch(uBlenderDividerLUT, (blend_sum << 11) | blended.x).x);
|
||||
rgb0.g = u8(texelFetch(uBlenderDividerLUT, (blend_sum << 11) | blended.y).x);
|
||||
rgb0.b = u8(texelFetch(uBlenderDividerLUT, (blend_sum << 11) | blended.z).x);
|
||||
}
|
||||
|
||||
return rgb0 & U8_C(0xff);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,78 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CLAMPING_H_
|
||||
#define CLAMPING_H_
|
||||
|
||||
#if SMALL_TYPES && 0
|
||||
// This path is buggy on RADV LLVM, disable for time being.
|
||||
i16x4 clamp_9bit_notrunc(i16x4 color)
|
||||
{
|
||||
// [-129, -256] should clamp to 0xff, subtracting by 0x80 will underflow back to positive numbers.
|
||||
// [-128, -1] should clamp to 0.
|
||||
color -= I16_C(0x80);
|
||||
// Sign-extend to 9-bit.
|
||||
color <<= I16_C(7);
|
||||
color >>= I16_C(7);
|
||||
color += I16_C(0x80);
|
||||
return clamp(color, i16x4(0), i16x4(0xff));
|
||||
}
|
||||
#else
|
||||
i16x4 clamp_9bit_notrunc(ivec4 color)
|
||||
{
|
||||
// [-129, -256] should clamp to 0xff, subtracting by 0x80 will underflow back to positive numbers.
|
||||
// [-128, -1] should clamp to 0.
|
||||
color -= 0x80;
|
||||
// Sign-extend to 9-bit.
|
||||
color = bitfieldExtract(color, 0, 9);
|
||||
color += 0x80;
|
||||
return i16x4(clamp(color, ivec4(0), ivec4(0xff)));
|
||||
}
|
||||
#endif
|
||||
|
||||
u8x4 clamp_9bit(i16x4 color)
|
||||
{
|
||||
return u8x4(clamp_9bit_notrunc(color));
|
||||
}
|
||||
|
||||
int clamp_9bit(int color)
|
||||
{
|
||||
return clamp(bitfieldExtract(color - 0x80, 0, 9) + 0x80, 0, 0xff);
|
||||
}
|
||||
|
||||
// Returns 18-bit UNORM depth.
|
||||
int clamp_z(int z)
|
||||
{
|
||||
// Similar to RGBA, we reserve an extra bit to deal with overflow and underflow.
|
||||
z -= (1 << 17);
|
||||
z <<= (31 - 18);
|
||||
z >>= (31 - 18);
|
||||
z += (1 << 17);
|
||||
|
||||
// [0x00000, 0x3ffff] maps to self.
|
||||
// [0x40000, 0x5ffff] maps to 0x3ffff.
|
||||
// [0x60000, 0x7ffff] maps to 0.
|
||||
|
||||
return clamp(z, 0, 0x3ffff);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,33 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
layout(local_size_x_id = 0) in;
|
||||
|
||||
layout(set = 0, binding = 0, std430) writeonly buffer ClearIndirectBuffer
|
||||
{
|
||||
uvec4 indirects[];
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
indirects[gl_GlobalInvocationID.x] = uvec4(0, 1, 1, 0);
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
layout(local_size_x_id = 0) in;
|
||||
|
||||
layout(set = 0, binding = 0, std430) writeonly buffer ToClear
|
||||
{
|
||||
uint elems[];
|
||||
} mask_ram;
|
||||
|
||||
void main()
|
||||
{
|
||||
mask_ram.elems[gl_GlobalInvocationID.x] = 0u;
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
layout(local_size_x_id = 0) in;
|
||||
layout(constant_id = 1) const int PAGE_STRIDE = 256;
|
||||
|
||||
layout(set = 0, binding = 0, std430) writeonly buffer SSBO
|
||||
{
|
||||
uint write_mask[];
|
||||
};
|
||||
|
||||
layout(set = 1, binding = 0, std140) uniform UBO
|
||||
{
|
||||
uvec4 offsets[1024];
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint offset = offsets[gl_WorkGroupID.x >> 2u][gl_WorkGroupID.x & 3u];
|
||||
offset *= PAGE_STRIDE;
|
||||
write_mask[offset + gl_LocalInvocationIndex] = 0u;
|
||||
}
|
|
@ -0,0 +1,284 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef COMBINER_H_
|
||||
#define COMBINER_H_
|
||||
|
||||
#include "clamping.h"
|
||||
|
||||
ivec4 special_expand(ivec4 value)
|
||||
{
|
||||
// Special sign-extend without explicit clamp.
|
||||
return bitfieldExtract(value - 0x80, 0, 9) + 0x80;
|
||||
}
|
||||
|
||||
i16x4 combiner_equation(ivec4 a, ivec4 b, ivec4 c, ivec4 d)
|
||||
{
|
||||
// Sign-extend multiplier to 9 bits.
|
||||
c = bitfieldExtract(c, 0, 9);
|
||||
|
||||
// Need this to deal with very specific 9-bit sign bits ...
|
||||
a = special_expand(a);
|
||||
b = special_expand(b);
|
||||
d = special_expand(d);
|
||||
|
||||
ivec4 color = (a - b) * c;
|
||||
color += 0x80;
|
||||
return i16x4(color >> 8) + i16x4(d);
|
||||
}
|
||||
|
||||
struct CombinerInputs
|
||||
{
|
||||
u8x4 constant_muladd;
|
||||
u8x4 constant_mulsub;
|
||||
u8x4 constant_mul;
|
||||
u8x4 constant_add;
|
||||
|
||||
u8x4 shade;
|
||||
i16x4 combined;
|
||||
i16x4 texel0;
|
||||
i16x4 texel1;
|
||||
i16 lod_frac;
|
||||
i16 noise;
|
||||
};
|
||||
|
||||
const int RGB_MULADD_COMBINED = 0;
|
||||
const int RGB_MULADD_TEXEL0 = 1;
|
||||
const int RGB_MULADD_TEXEL1 = 2;
|
||||
const int RGB_MULADD_SHADE = 4;
|
||||
const int RGB_MULADD_ONE = 6;
|
||||
const int RGB_MULADD_NOISE = 7;
|
||||
|
||||
const int RGB_MULSUB_COMBINED = 0;
|
||||
const int RGB_MULSUB_TEXEL0 = 1;
|
||||
const int RGB_MULSUB_TEXEL1 = 2;
|
||||
const int RGB_MULSUB_SHADE = 4;
|
||||
const int RGB_MULSUB_K4 = 7;
|
||||
|
||||
const int RGB_MUL_COMBINED = 0;
|
||||
const int RGB_MUL_TEXEL0 = 1;
|
||||
const int RGB_MUL_TEXEL1 = 2;
|
||||
const int RGB_MUL_SHADE = 4;
|
||||
const int RGB_MUL_COMBINED_ALPHA = 7;
|
||||
const int RGB_MUL_TEXEL0_ALPHA = 8;
|
||||
const int RGB_MUL_TEXEL1_ALPHA = 9;
|
||||
const int RGB_MUL_SHADE_ALPHA = 11;
|
||||
const int RGB_MUL_LOD_FRAC = 13;
|
||||
const int RGB_MUL_K5 = 15;
|
||||
|
||||
const int RGB_ADD_COMBINED = 0;
|
||||
const int RGB_ADD_TEXEL0 = 1;
|
||||
const int RGB_ADD_TEXEL1 = 2;
|
||||
const int RGB_ADD_SHADE = 4;
|
||||
const int RGB_ADD_ONE = 6;
|
||||
|
||||
const int ALPHA_ADDSUB_COMBINED = 0;
|
||||
const int ALPHA_ADDSUB_TEXEL0_ALPHA = 1;
|
||||
const int ALPHA_ADDSUB_TEXEL1_ALPHA = 2;
|
||||
const int ALPHA_ADDSUB_SHADE_ALPHA = 4;
|
||||
const int ALPHA_ADDSUB_ONE = 6;
|
||||
|
||||
const int ALPHA_MUL_LOD_FRAC = 0;
|
||||
const int ALPHA_MUL_TEXEL0_ALPHA = 1;
|
||||
const int ALPHA_MUL_TEXEL1_ALPHA = 2;
|
||||
const int ALPHA_MUL_SHADE_ALPHA = 4;
|
||||
|
||||
ivec4 select_muladd(CombinerInputs inputs, int selector_rgb, int selector_alpha)
|
||||
{
|
||||
ivec3 res;
|
||||
switch (selector_rgb)
|
||||
{
|
||||
case RGB_MULADD_COMBINED: res = inputs.combined.rgb; break;
|
||||
case RGB_MULADD_TEXEL0: res = inputs.texel0.rgb; break;
|
||||
case RGB_MULADD_TEXEL1: res = inputs.texel1.rgb; break;
|
||||
case RGB_MULADD_SHADE: res = inputs.shade.rgb; break;
|
||||
case RGB_MULADD_NOISE: res = ivec3(inputs.noise); break;
|
||||
case RGB_MULADD_ONE: res = ivec3(0x100); break;
|
||||
default: res = inputs.constant_muladd.rgb; break;
|
||||
}
|
||||
|
||||
int alpha;
|
||||
switch (selector_alpha)
|
||||
{
|
||||
case ALPHA_ADDSUB_COMBINED: alpha = inputs.combined.a; break;
|
||||
case ALPHA_ADDSUB_TEXEL0_ALPHA: alpha = inputs.texel0.a; break;
|
||||
case ALPHA_ADDSUB_TEXEL1_ALPHA: alpha = inputs.texel1.a; break;
|
||||
case ALPHA_ADDSUB_SHADE_ALPHA: alpha = inputs.shade.a; break;
|
||||
case ALPHA_ADDSUB_ONE: alpha = 0x100; break;
|
||||
default: alpha = inputs.constant_muladd.a; break;
|
||||
}
|
||||
return ivec4(res, alpha);
|
||||
}
|
||||
|
||||
ivec4 select_mulsub(CombinerInputs inputs, int selector_rgb, int selector_alpha)
|
||||
{
|
||||
ivec3 res;
|
||||
switch (selector_rgb)
|
||||
{
|
||||
case RGB_MULSUB_COMBINED: res = inputs.combined.rgb; break;
|
||||
case RGB_MULSUB_TEXEL0: res = inputs.texel0.rgb; break;
|
||||
case RGB_MULSUB_TEXEL1: res = inputs.texel1.rgb; break;
|
||||
case RGB_MULSUB_SHADE: res = inputs.shade.rgb; break;
|
||||
case RGB_MULSUB_K4: res = ivec3((int(inputs.constant_mulsub.g) << 8) | inputs.constant_mulsub.b); break;
|
||||
default: res = inputs.constant_mulsub.rgb; break;
|
||||
}
|
||||
|
||||
int alpha;
|
||||
switch (selector_alpha)
|
||||
{
|
||||
case ALPHA_ADDSUB_COMBINED: alpha = inputs.combined.a; break;
|
||||
case ALPHA_ADDSUB_TEXEL0_ALPHA: alpha = inputs.texel0.a; break;
|
||||
case ALPHA_ADDSUB_TEXEL1_ALPHA: alpha = inputs.texel1.a; break;
|
||||
case ALPHA_ADDSUB_SHADE_ALPHA: alpha = inputs.shade.a; break;
|
||||
case ALPHA_ADDSUB_ONE: alpha = 0x100; break;
|
||||
default: alpha = inputs.constant_mulsub.a; break;
|
||||
}
|
||||
return ivec4(res, alpha);
|
||||
}
|
||||
|
||||
ivec4 select_mul(CombinerInputs inputs, int selector_rgb, int selector_alpha)
|
||||
{
|
||||
ivec3 res;
|
||||
switch (selector_rgb)
|
||||
{
|
||||
case RGB_MUL_COMBINED: res = inputs.combined.rgb; break;
|
||||
case RGB_MUL_COMBINED_ALPHA: res = inputs.combined.aaa; break;
|
||||
case RGB_MUL_TEXEL0: res = inputs.texel0.rgb; break;
|
||||
case RGB_MUL_TEXEL1: res = inputs.texel1.rgb; break;
|
||||
case RGB_MUL_SHADE: res = inputs.shade.rgb; break;
|
||||
case RGB_MUL_TEXEL0_ALPHA: res = inputs.texel0.aaa; break;
|
||||
case RGB_MUL_TEXEL1_ALPHA: res = inputs.texel1.aaa; break;
|
||||
case RGB_MUL_SHADE_ALPHA: res = inputs.shade.aaa; break;
|
||||
case RGB_MUL_LOD_FRAC: res = ivec3(inputs.lod_frac); break;
|
||||
case RGB_MUL_K5: res = ivec3((int(inputs.constant_mul.g) << 8) | inputs.constant_mul.b); break;
|
||||
default: res = inputs.constant_mul.rgb; break;
|
||||
}
|
||||
|
||||
int alpha;
|
||||
switch (selector_alpha)
|
||||
{
|
||||
case ALPHA_MUL_LOD_FRAC: alpha = inputs.lod_frac; break;
|
||||
case ALPHA_MUL_TEXEL0_ALPHA: alpha = inputs.texel0.a; break;
|
||||
case ALPHA_MUL_TEXEL1_ALPHA: alpha = inputs.texel1.a; break;
|
||||
case ALPHA_MUL_SHADE_ALPHA: alpha = inputs.shade.a; break;
|
||||
default: alpha = inputs.constant_mul.a; break;
|
||||
}
|
||||
return ivec4(res, alpha);
|
||||
}
|
||||
|
||||
ivec4 select_add(CombinerInputs inputs, int selector_rgb, int selector_alpha)
|
||||
{
|
||||
ivec3 res;
|
||||
switch (selector_rgb)
|
||||
{
|
||||
case RGB_ADD_COMBINED: res = inputs.combined.rgb; break;
|
||||
case RGB_ADD_TEXEL0: res = inputs.texel0.rgb; break;
|
||||
case RGB_ADD_TEXEL1: res = inputs.texel1.rgb; break;
|
||||
case RGB_ADD_SHADE: res = inputs.shade.rgb; break;
|
||||
case RGB_ADD_ONE: res = ivec3(0x100); break;
|
||||
default: res = inputs.constant_add.rgb; break;
|
||||
}
|
||||
|
||||
int alpha;
|
||||
switch (selector_alpha)
|
||||
{
|
||||
case ALPHA_ADDSUB_COMBINED: alpha = inputs.combined.a; break;
|
||||
case ALPHA_ADDSUB_TEXEL0_ALPHA: alpha = inputs.texel0.a; break;
|
||||
case ALPHA_ADDSUB_TEXEL1_ALPHA: alpha = inputs.texel1.a; break;
|
||||
case ALPHA_ADDSUB_SHADE_ALPHA: alpha = inputs.shade.a; break;
|
||||
case ALPHA_ADDSUB_ONE: alpha = 0x100; break;
|
||||
default: alpha = inputs.constant_add.a; break;
|
||||
}
|
||||
return ivec4(res, alpha);
|
||||
}
|
||||
|
||||
i16x4 combiner_cycle0(CombinerInputs inputs, u8x4 combiner_inputs_rgb, u8x4 combiner_inputs_alpha, int alpha_dith,
|
||||
int coverage, bool cvg_times_alpha, bool alpha_cvg_select, bool alpha_test, out u8 alpha_test_reference)
|
||||
{
|
||||
ivec4 muladd = select_muladd(inputs, combiner_inputs_rgb.x, combiner_inputs_alpha.x);
|
||||
ivec4 mulsub = select_mulsub(inputs, combiner_inputs_rgb.y, combiner_inputs_alpha.y);
|
||||
ivec4 mul = select_mul(inputs, combiner_inputs_rgb.z, combiner_inputs_alpha.z);
|
||||
ivec4 add = select_add(inputs, combiner_inputs_rgb.w, combiner_inputs_alpha.w);
|
||||
|
||||
i16x4 combined = combiner_equation(muladd, mulsub, mul, add);
|
||||
|
||||
if (alpha_test)
|
||||
{
|
||||
int clamped_alpha = clamp_9bit(combined.a);
|
||||
// Expands 0xff to 0x100 to avoid having to divide by 2**n - 1.
|
||||
int expanded_alpha = clamped_alpha + ((clamped_alpha + 1) >> 8);
|
||||
|
||||
if (alpha_cvg_select)
|
||||
{
|
||||
int modulated_alpha;
|
||||
if (cvg_times_alpha)
|
||||
modulated_alpha = (expanded_alpha * coverage + 4) >> 3;
|
||||
else
|
||||
modulated_alpha = coverage << 5;
|
||||
expanded_alpha = modulated_alpha;
|
||||
}
|
||||
else
|
||||
expanded_alpha += alpha_dith;
|
||||
|
||||
alpha_test_reference = u8(clamp(expanded_alpha, 0, 0xff));
|
||||
}
|
||||
else
|
||||
alpha_test_reference = U8_C(0);
|
||||
|
||||
return combined;
|
||||
}
|
||||
|
||||
i16x4 combiner_cycle1(CombinerInputs inputs, u8x4 combiner_inputs_rgb, u8x4 combiner_inputs_alpha, int alpha_dith,
|
||||
inout int coverage, bool cvg_times_alpha, bool alpha_cvg_select)
|
||||
{
|
||||
ivec4 muladd = select_muladd(inputs, combiner_inputs_rgb.x, combiner_inputs_alpha.x);
|
||||
ivec4 mulsub = select_mulsub(inputs, combiner_inputs_rgb.y, combiner_inputs_alpha.y);
|
||||
ivec4 mul = select_mul(inputs, combiner_inputs_rgb.z, combiner_inputs_alpha.z);
|
||||
ivec4 add = select_add(inputs, combiner_inputs_rgb.w, combiner_inputs_alpha.w);
|
||||
|
||||
i16x4 combined = combiner_equation(muladd, mulsub, mul, add);
|
||||
|
||||
combined = clamp_9bit_notrunc(combined);
|
||||
|
||||
// Expands 0xff to 0x100 to avoid having to divide by 2**n - 1.
|
||||
int expanded_alpha = combined.a + ((combined.a + 1) >> 8);
|
||||
|
||||
int modulated_alpha;
|
||||
if (cvg_times_alpha)
|
||||
{
|
||||
modulated_alpha = (expanded_alpha * coverage + 4) >> 3;
|
||||
coverage = modulated_alpha >> 5;
|
||||
}
|
||||
else
|
||||
modulated_alpha = coverage << 5;
|
||||
|
||||
if (alpha_cvg_select)
|
||||
expanded_alpha = modulated_alpha;
|
||||
else
|
||||
expanded_alpha += alpha_dith;
|
||||
|
||||
combined.a = i16(clamp(expanded_alpha, 0, 0xff));
|
||||
|
||||
return combined;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,81 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef COVERAGE_H_
|
||||
#define COVERAGE_H_
|
||||
|
||||
#include "data_structures.h"
|
||||
|
||||
const int SUBPIXELS_LOG2 = 2;
|
||||
const int SUBPIXELS = 1 << SUBPIXELS_LOG2;
|
||||
|
||||
u8 compute_coverage(u16x4 xleft, u16x4 xright, int x)
|
||||
{
|
||||
u16x4 xshift = u16x4(0, 4, 2, 6) + (u16(x) << U16_C(3));
|
||||
bvec4 clip_lo_x01 = lessThan(xshift, xleft.xxyy);
|
||||
bvec4 clip_lo_x23 = lessThan(xshift, xleft.zzww);
|
||||
bvec4 clip_hi_x01 = greaterThanEqual(xshift, xright.xxyy);
|
||||
bvec4 clip_hi_x23 = greaterThanEqual(xshift, xright.zzww);
|
||||
|
||||
u8x4 clip_x0 = u8x4(clip_lo_x01) | u8x4(clip_hi_x01);
|
||||
u8x4 clip_x1 = u8x4(clip_lo_x23) | u8x4(clip_hi_x23);
|
||||
u8x4 clip_x = clip_x0 * u8x4(1, 2, 4, 8) + clip_x1 * u8x4(16, 32, 64, 128);
|
||||
u8 clip_coverage = (clip_x.x | clip_x.y) | (clip_x.z | clip_x.w);
|
||||
return ~clip_coverage & U8_C(0xff);
|
||||
}
|
||||
|
||||
const int COVERAGE_CLAMP = 0;
|
||||
const int COVERAGE_WRAP = 1;
|
||||
const int COVERAGE_ZAP = 2;
|
||||
const int COVERAGE_SAVE = 3;
|
||||
|
||||
int blend_coverage(int coverage, int memory_coverage, bool blend_en, int mode)
|
||||
{
|
||||
int res = 0;
|
||||
switch (mode)
|
||||
{
|
||||
case COVERAGE_CLAMP:
|
||||
{
|
||||
if (blend_en)
|
||||
res = min(7, memory_coverage + coverage); // image_read_en to read memory coverage, otherwise, it's 7.
|
||||
else
|
||||
res = (coverage - 1) & 7;
|
||||
break;
|
||||
}
|
||||
|
||||
case COVERAGE_WRAP:
|
||||
res = (coverage + memory_coverage) & 7;
|
||||
break;
|
||||
|
||||
case COVERAGE_ZAP:
|
||||
res = 7;
|
||||
break;
|
||||
|
||||
case COVERAGE_SAVE:
|
||||
res = memory_coverage;
|
||||
break;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,345 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef DATA_STRUCTURES_H_
|
||||
#define DATA_STRUCTURES_H_
|
||||
|
||||
// Data structures which are supposed to match up with rdp_data_structures.hpp.
|
||||
// A little dirty to duplicate like this, but it's non-trivial to share headers with C++,
|
||||
// especially when we need to deal with small integer types.
|
||||
|
||||
const int TRIANGLE_SETUP_FLIP_BIT = 1 << 0;
|
||||
const int TRIANGLE_SETUP_DO_OFFSET_BIT = 1 << 1;
|
||||
const int TRIANGLE_SETUP_SKIP_XFRAC_BIT = 1 << 2;
|
||||
const int TRIANGLE_SETUP_INTERLACE_FIELD_BIT = 1 << 3;
|
||||
const int TRIANGLE_SETUP_INTERLACE_KEEP_ODD_BIT = 1 << 4;
|
||||
const int TRIANGLE_SETUP_DISABLE_UPSCALING_BIT = 1 << 5;
|
||||
const int TRIANGLE_SETUP_NATIVE_LOD_BIT = 1 << 6;
|
||||
|
||||
const int RASTERIZATION_INTERLACE_FIELD_BIT = 1 << 0;
|
||||
const int RASTERIZATION_INTERLACE_KEEP_ODD_BIT = 1 << 1;
|
||||
const int RASTERIZATION_AA_BIT = 1 << 2;
|
||||
const int RASTERIZATION_PERSPECTIVE_CORRECT_BIT = 1 << 3;
|
||||
const int RASTERIZATION_TLUT_BIT = 1 << 4;
|
||||
const int RASTERIZATION_TLUT_TYPE_BIT = 1 << 5;
|
||||
const int RASTERIZATION_CVG_TIMES_ALPHA_BIT = 1 << 6;
|
||||
const int RASTERIZATION_ALPHA_CVG_SELECT_BIT = 1 << 7;
|
||||
const int RASTERIZATION_MULTI_CYCLE_BIT = 1 << 8;
|
||||
const int RASTERIZATION_TEX_LOD_ENABLE_BIT = 1 << 9;
|
||||
const int RASTERIZATION_SHARPEN_LOD_ENABLE_BIT = 1 << 10;
|
||||
const int RASTERIZATION_DETAIL_LOD_ENABLE_BIT = 1 << 11;
|
||||
const int RASTERIZATION_FILL_BIT = 1 << 12;
|
||||
const int RASTERIZATION_COPY_BIT = 1 << 13;
|
||||
const int RASTERIZATION_SAMPLE_MODE_BIT = 1 << 14;
|
||||
const int RASTERIZATION_ALPHA_TEST_BIT = 1 << 15;
|
||||
const int RASTERIZATION_ALPHA_TEST_DITHER_BIT = 1 << 16;
|
||||
const int RASTERIZATION_SAMPLE_MID_TEXEL_BIT = 1 << 17;
|
||||
const int RASTERIZATION_USES_TEXEL0_BIT = 1 << 18;
|
||||
const int RASTERIZATION_USES_TEXEL1_BIT = 1 << 19;
|
||||
const int RASTERIZATION_USES_LOD_BIT = 1 << 20;
|
||||
const int RASTERIZATION_USES_PIPELINED_TEXEL1_BIT = 1 << 21;
|
||||
const int RASTERIZATION_CONVERT_ONE_BIT = 1 << 22;
|
||||
const int RASTERIZATION_BILERP_0_BIT = 1 << 23;
|
||||
const int RASTERIZATION_BILERP_1_BIT = 1 << 24;
|
||||
const int RASTERIZATION_UPSCALING_LOG2_BIT_OFFSET = 26;
|
||||
const int RASTERIZATION_NEED_NOISE_BIT = 1 << 28;
|
||||
const int RASTERIZATION_USE_STATIC_TEXTURE_SIZE_FORMAT_BIT = 1 << 29;
|
||||
const int RASTERIZATION_USE_SPECIALIZATION_CONSTANT_BIT = 1 << 30;
|
||||
|
||||
const int DEPTH_BLEND_DEPTH_TEST_BIT = 1 << 0;
|
||||
const int DEPTH_BLEND_DEPTH_UPDATE_BIT = 1 << 1;
|
||||
const int DEPTH_BLEND_FORCE_BLEND_BIT = 1 << 3;
|
||||
const int DEPTH_BLEND_IMAGE_READ_ENABLE_BIT = 1 << 4;
|
||||
const int DEPTH_BLEND_COLOR_ON_COVERAGE_BIT = 1 << 5;
|
||||
const int DEPTH_BLEND_MULTI_CYCLE_BIT = 1 << 6;
|
||||
const int DEPTH_BLEND_AA_BIT = 1 << 7;
|
||||
const int DEPTH_BLEND_DITHER_ENABLE_BIT = 1 << 8;
|
||||
|
||||
struct TriangleSetupMem
|
||||
{
|
||||
int xh, xm, xl;
|
||||
mem_i16 yh, ym;
|
||||
int dxhdy, dxmdy, dxldy;
|
||||
mem_i16 yl; mem_u8 flags; mem_u8 tile;
|
||||
};
|
||||
|
||||
#if SMALL_TYPES
|
||||
#define TriangleSetup TriangleSetupMem
|
||||
#else
|
||||
struct TriangleSetup
|
||||
{
|
||||
int xh, xm, xl;
|
||||
i16 yh, ym;
|
||||
int dxhdy, dxmdy, dxldy;
|
||||
i16 yl; u8 flags; u8 tile;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct AttributeSetupMem
|
||||
{
|
||||
ivec4 rgba;
|
||||
ivec4 drgba_dx;
|
||||
ivec4 drgba_de;
|
||||
ivec4 drgba_dy;
|
||||
|
||||
ivec4 stzw;
|
||||
ivec4 dstzw_dx;
|
||||
ivec4 dstzw_de;
|
||||
ivec4 dstzw_dy;
|
||||
};
|
||||
#define AttributeSetup AttributeSetupMem
|
||||
|
||||
struct SpanSetupMem
|
||||
{
|
||||
ivec4 rgba;
|
||||
ivec4 stzw;
|
||||
|
||||
mem_u16x4 xleft;
|
||||
mem_u16x4 xright;
|
||||
|
||||
int interpolation_base_x;
|
||||
int start_x;
|
||||
int end_x;
|
||||
mem_i16 lodlength;
|
||||
mem_u16 valid_line;
|
||||
};
|
||||
#if SMALL_TYPES
|
||||
#define SpanSetup SpanSetupMem
|
||||
#else
|
||||
struct SpanSetup
|
||||
{
|
||||
ivec4 rgba;
|
||||
ivec4 stzw;
|
||||
|
||||
u16x4 xleft;
|
||||
u16x4 xright;
|
||||
|
||||
int interpolation_base_x;
|
||||
int start_x;
|
||||
int end_x;
|
||||
i16 lodlength;
|
||||
u16 valid_line;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct SpanInfoOffsetsMem
|
||||
{
|
||||
int offset;
|
||||
int ylo;
|
||||
int yhi;
|
||||
int padding;
|
||||
};
|
||||
#define SpanInfoOffsets SpanInfoOffsetsMem
|
||||
|
||||
struct DerivedSetupMem
|
||||
{
|
||||
mem_u8x4 constant_muladd0;
|
||||
mem_u8x4 constant_mulsub0;
|
||||
mem_u8x4 constant_mul0;
|
||||
mem_u8x4 constant_add0;
|
||||
|
||||
mem_u8x4 constant_muladd1;
|
||||
mem_u8x4 constant_mulsub1;
|
||||
mem_u8x4 constant_mul1;
|
||||
mem_u8x4 constant_add1;
|
||||
|
||||
mem_u8x4 fog_color;
|
||||
mem_u8x4 blend_color;
|
||||
uint fill_color;
|
||||
|
||||
mem_u16 dz;
|
||||
mem_u8 dz_compressed;
|
||||
mem_u8 min_lod;
|
||||
|
||||
mem_i16x4 factors;
|
||||
};
|
||||
|
||||
#if SMALL_TYPES
|
||||
#define DerivedSetup DerivedSetupMem
|
||||
#else
|
||||
struct DerivedSetup
|
||||
{
|
||||
u8x4 constant_muladd0;
|
||||
u8x4 constant_mulsub0;
|
||||
u8x4 constant_mul0;
|
||||
u8x4 constant_add0;
|
||||
|
||||
u8x4 constant_muladd1;
|
||||
u8x4 constant_mulsub1;
|
||||
u8x4 constant_mul1;
|
||||
u8x4 constant_add1;
|
||||
|
||||
u8x4 fog_color;
|
||||
u8x4 blend_color;
|
||||
uint fill_color;
|
||||
|
||||
u16 dz;
|
||||
u8 dz_compressed;
|
||||
u8 min_lod;
|
||||
|
||||
i16x4 factors;
|
||||
};
|
||||
#endif
|
||||
|
||||
#define ScissorStateMem ivec4
|
||||
|
||||
struct ScissorState
|
||||
{
|
||||
int xlo, ylo, xhi, yhi;
|
||||
};
|
||||
|
||||
const int TILE_INFO_CLAMP_S_BIT = 1 << 0;
|
||||
const int TILE_INFO_MIRROR_S_BIT = 1 << 1;
|
||||
const int TILE_INFO_CLAMP_T_BIT = 1 << 2;
|
||||
const int TILE_INFO_MIRROR_T_BIT = 1 << 3;
|
||||
|
||||
struct TileInfoMem
|
||||
{
|
||||
uint slo;
|
||||
uint shi;
|
||||
uint tlo;
|
||||
uint thi;
|
||||
uint offset;
|
||||
uint stride;
|
||||
mem_u8 fmt;
|
||||
mem_u8 size;
|
||||
mem_u8 palette;
|
||||
mem_u8 mask_s;
|
||||
mem_u8 shift_s;
|
||||
mem_u8 mask_t;
|
||||
mem_u8 shift_t;
|
||||
mem_u8 flags;
|
||||
};
|
||||
|
||||
#if SMALL_TYPES
|
||||
#define TileInfo TileInfoMem
|
||||
#else
|
||||
struct TileInfo
|
||||
{
|
||||
uint slo;
|
||||
uint shi;
|
||||
uint tlo;
|
||||
uint thi;
|
||||
uint offset;
|
||||
uint stride;
|
||||
u8 fmt;
|
||||
u8 size;
|
||||
u8 palette;
|
||||
u8 mask_s;
|
||||
u8 shift_s;
|
||||
u8 mask_t;
|
||||
u8 shift_t;
|
||||
u8 flags;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct StaticRasterizationStateMem
|
||||
{
|
||||
mem_u8x4 combiner_inputs_rgb0;
|
||||
mem_u8x4 combiner_inputs_alpha0;
|
||||
mem_u8x4 combiner_inputs_rgb1;
|
||||
mem_u8x4 combiner_inputs_alpha1;
|
||||
uint flags;
|
||||
int dither;
|
||||
int texture_size;
|
||||
int texture_fmt;
|
||||
};
|
||||
|
||||
#if SMALL_TYPES
|
||||
#define StaticRasterizationState StaticRasterizationStateMem
|
||||
#else
|
||||
struct StaticRasterizationState
|
||||
{
|
||||
u8x4 combiner_inputs_rgb0;
|
||||
u8x4 combiner_inputs_alpha0;
|
||||
u8x4 combiner_inputs_rgb1;
|
||||
u8x4 combiner_inputs_alpha1;
|
||||
uint flags;
|
||||
int dither;
|
||||
int texture_size;
|
||||
int texture_fmt;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct DepthBlendStateMem
|
||||
{
|
||||
mem_u8x4 blend_modes0;
|
||||
mem_u8x4 blend_modes1;
|
||||
uint flags;
|
||||
mem_u8 coverage_mode;
|
||||
mem_u8 z_mode;
|
||||
mem_u8 padding0;
|
||||
mem_u8 padding1;
|
||||
};
|
||||
|
||||
#if SMALL_TYPES
|
||||
#define DepthBlendState DepthBlendStateMem
|
||||
#else
|
||||
struct DepthBlendState
|
||||
{
|
||||
u8x4 blend_modes0;
|
||||
u8x4 blend_modes1;
|
||||
uint flags;
|
||||
u8 coverage_mode;
|
||||
u8 z_mode;
|
||||
u8 padding0;
|
||||
u8 padding1;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct InstanceIndicesMem
|
||||
{
|
||||
mem_u8x4 static_depth_tmem;
|
||||
mem_u8x4 other;
|
||||
mem_u8 tile_infos[8];
|
||||
};
|
||||
|
||||
struct TMEMInstance16Mem
|
||||
{
|
||||
mem_u16 elems[2048];
|
||||
};
|
||||
|
||||
struct TMEMInstance8Mem
|
||||
{
|
||||
mem_u8 elems[4096];
|
||||
};
|
||||
|
||||
struct ShadedData
|
||||
{
|
||||
u8x4 combined;
|
||||
int z_dith;
|
||||
u8 coverage_count;
|
||||
u8 shade_alpha;
|
||||
};
|
||||
|
||||
const int COVERAGE_FILL_BIT = 0x40;
|
||||
const int COVERAGE_COPY_BIT = 0x20;
|
||||
|
||||
struct GlobalFBInfo
|
||||
{
|
||||
int dx_shift;
|
||||
int dx_mask;
|
||||
int fb_size;
|
||||
uint base_primitive_index;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,134 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef DATA_STRUCTURES_BUFFERS_H_
|
||||
#define DATA_STRUCTURES_BUFFERS_H_
|
||||
|
||||
#include "data_structures.h"
|
||||
|
||||
layout(set = 0, binding = 0, std430) buffer VRAM32
|
||||
{
|
||||
uint data[];
|
||||
} vram32;
|
||||
|
||||
layout(set = 0, binding = 0, std430) buffer VRAM16
|
||||
{
|
||||
mem_u16 data[];
|
||||
} vram16;
|
||||
|
||||
layout(set = 0, binding = 0, std430) buffer VRAM8
|
||||
{
|
||||
mem_u8 data[];
|
||||
} vram8;
|
||||
|
||||
layout(set = 0, binding = 1, std430) buffer HiddenVRAM
|
||||
{
|
||||
mem_u8 data[];
|
||||
} hidden_vram;
|
||||
|
||||
layout(set = 0, binding = 2, std430) readonly buffer TMEM16
|
||||
{
|
||||
TMEMInstance16Mem instances[];
|
||||
} tmem16;
|
||||
|
||||
layout(set = 0, binding = 2, std430) readonly buffer TMEM8
|
||||
{
|
||||
TMEMInstance8Mem instances[];
|
||||
} tmem8;
|
||||
|
||||
layout(set = 1, binding = 0, std430) readonly buffer TriangleSetupBuffer
|
||||
{
|
||||
TriangleSetupMem elems[];
|
||||
} triangle_setup;
|
||||
#include "load_triangle_setup.h"
|
||||
|
||||
layout(set = 1, binding = 1, std430) readonly buffer AttributeSetupBuffer
|
||||
{
|
||||
AttributeSetupMem elems[];
|
||||
} attribute_setup;
|
||||
#include "load_attribute_setup.h"
|
||||
|
||||
layout(set = 1, binding = 2, std430) readonly buffer DerivedSetupBuffer
|
||||
{
|
||||
DerivedSetupMem elems[];
|
||||
} derived_setup;
|
||||
#include "load_derived_setup.h"
|
||||
|
||||
layout(set = 1, binding = 3, std430) readonly buffer ScissorStateBuffer
|
||||
{
|
||||
ScissorStateMem elems[];
|
||||
} scissor_state;
|
||||
#include "load_scissor_state.h"
|
||||
|
||||
layout(set = 1, binding = 4, std430) readonly buffer StaticRasterStateBuffer
|
||||
{
|
||||
StaticRasterizationStateMem elems[];
|
||||
} static_raster_state;
|
||||
#include "load_static_raster_state.h"
|
||||
|
||||
layout(set = 1, binding = 5, std430) readonly buffer DepthBlendStateBuffer
|
||||
{
|
||||
DepthBlendStateMem elems[];
|
||||
} depth_blend_state;
|
||||
#include "load_depth_blend_state.h"
|
||||
|
||||
layout(set = 1, binding = 6, std430) readonly buffer StateIndicesBuffer
|
||||
{
|
||||
InstanceIndicesMem elems[];
|
||||
} state_indices;
|
||||
|
||||
layout(set = 1, binding = 7, std430) readonly buffer TileInfoBuffer
|
||||
{
|
||||
TileInfoMem elems[];
|
||||
} tile_infos;
|
||||
#include "load_tile_info.h"
|
||||
|
||||
layout(set = 1, binding = 8, std430) readonly buffer SpanSetups
|
||||
{
|
||||
SpanSetupMem elems[];
|
||||
} span_setups;
|
||||
#include "load_span_setup.h"
|
||||
|
||||
layout(set = 1, binding = 9, std430) readonly buffer SpanInfoOffsetBuffer
|
||||
{
|
||||
SpanInfoOffsetsMem elems[];
|
||||
} span_offsets;
|
||||
#include "load_span_offsets.h"
|
||||
|
||||
layout(set = 1, binding = 10) uniform utextureBuffer uBlenderDividerLUT;
|
||||
|
||||
layout(set = 1, binding = 11, std430) readonly buffer TileBinning
|
||||
{
|
||||
uint elems[];
|
||||
} tile_binning;
|
||||
|
||||
layout(set = 1, binding = 12, std430) readonly buffer TileBinningCoarse
|
||||
{
|
||||
uint elems[];
|
||||
} tile_binning_coarse;
|
||||
|
||||
layout(set = 2, binding = 0, std140) uniform GlobalConstants
|
||||
{
|
||||
GlobalFBInfo fb_info;
|
||||
} global_constants;
|
||||
|
||||
#endif
|
|
@ -0,0 +1,151 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef DEBUG_H_
|
||||
#define DEBUG_H_
|
||||
|
||||
#if defined(DEBUG_ENABLE) && DEBUG_ENABLE
|
||||
#include "debug_channel.h"
|
||||
|
||||
const uint CODE_ASSERT_EQUAL = 0;
|
||||
const uint CODE_ASSERT_NOT_EQUAL = 1;
|
||||
const uint CODE_ASSERT_LESS_THAN = 2;
|
||||
const uint CODE_ASSERT_LESS_THAN_EQUAL = 3;
|
||||
const uint CODE_GENERIC = 4;
|
||||
const uint CODE_HEX = 5;
|
||||
|
||||
void ASSERT_EQUAL_(int line, int a, int b)
|
||||
{
|
||||
if (a != b)
|
||||
add_debug_message(CODE_ASSERT_EQUAL, gl_GlobalInvocationID, ivec3(line, a, b));
|
||||
}
|
||||
|
||||
void ASSERT_NOT_EQUAL_(int line, int a, int b)
|
||||
{
|
||||
if (a == b)
|
||||
add_debug_message(CODE_ASSERT_NOT_EQUAL, gl_GlobalInvocationID, ivec3(line, a, b));
|
||||
}
|
||||
|
||||
void ASSERT_LESS_THAN_(int line, int a, int b)
|
||||
{
|
||||
if (a >= b)
|
||||
add_debug_message(CODE_ASSERT_LESS_THAN, gl_GlobalInvocationID, ivec3(line, a, b));
|
||||
}
|
||||
|
||||
void ASSERT_LESS_THAN_EQUAL_(int line, int a, int b)
|
||||
{
|
||||
if (a > b)
|
||||
add_debug_message(CODE_ASSERT_LESS_THAN_EQUAL, gl_GlobalInvocationID, ivec3(line, a, b));
|
||||
}
|
||||
|
||||
void ASSERT_EQUAL_(int line, uint a, uint b)
|
||||
{
|
||||
if (a != b)
|
||||
add_debug_message(CODE_ASSERT_EQUAL, gl_GlobalInvocationID, ivec3(line, a, b));
|
||||
}
|
||||
|
||||
void ASSERT_NOT_EQUAL_(int line, uint a, uint b)
|
||||
{
|
||||
if (a == b)
|
||||
add_debug_message(CODE_ASSERT_NOT_EQUAL, gl_GlobalInvocationID, ivec3(line, a, b));
|
||||
}
|
||||
|
||||
void ASSERT_LESS_THAN_(int line, uint a, uint b)
|
||||
{
|
||||
if (a >= b)
|
||||
add_debug_message(CODE_ASSERT_LESS_THAN, gl_GlobalInvocationID, ivec3(line, a, b));
|
||||
}
|
||||
|
||||
void ASSERT_LESS_THAN_EQUAL_(int line, uint a, uint b)
|
||||
{
|
||||
if (a > b)
|
||||
add_debug_message(CODE_ASSERT_LESS_THAN_EQUAL, gl_GlobalInvocationID, ivec3(line, a, b));
|
||||
}
|
||||
|
||||
void GENERIC_MESSAGE_(int line)
|
||||
{
|
||||
add_debug_message(CODE_GENERIC, gl_GlobalInvocationID, line);
|
||||
}
|
||||
|
||||
void GENERIC_MESSAGE_(int line, uint v)
|
||||
{
|
||||
add_debug_message(CODE_GENERIC, gl_GlobalInvocationID, uvec2(line, v));
|
||||
}
|
||||
|
||||
void GENERIC_MESSAGE_(int line, uvec2 v)
|
||||
{
|
||||
add_debug_message(CODE_GENERIC, gl_GlobalInvocationID, uvec3(line, v));
|
||||
}
|
||||
|
||||
void GENERIC_MESSAGE_(int line, uvec3 v)
|
||||
{
|
||||
add_debug_message(CODE_GENERIC, gl_GlobalInvocationID, uvec4(line, v));
|
||||
}
|
||||
|
||||
void HEX_MESSAGE_(int line)
|
||||
{
|
||||
add_debug_message(CODE_HEX, gl_GlobalInvocationID, line);
|
||||
}
|
||||
|
||||
void HEX_MESSAGE_(int line, uint v)
|
||||
{
|
||||
add_debug_message(CODE_HEX, gl_GlobalInvocationID, uvec2(line, v));
|
||||
}
|
||||
|
||||
void HEX_MESSAGE_(int line, uvec2 v)
|
||||
{
|
||||
add_debug_message(CODE_HEX, gl_GlobalInvocationID, uvec3(line, v));
|
||||
}
|
||||
|
||||
void HEX_MESSAGE_(int line, uvec3 v)
|
||||
{
|
||||
add_debug_message(CODE_HEX, gl_GlobalInvocationID, uvec4(line, v));
|
||||
}
|
||||
|
||||
#define ASERT_EQUAL(a, b) ASSERT_EQUAL_(__LINE__, a, b)
|
||||
#define ASERT_NOT_EQUAL(a, b) ASSERT_NOT_EQUAL_(__LINE__, a, b)
|
||||
#define ASERT_LESS_THAN(a, b) ASSERT_LESS_THAN_(__LINE__, a, b)
|
||||
#define ASERT_LESS_THAN_EQUAL(a, b) ASSERT_LESS_THAN_EQUAL_(__LINE__, a, b)
|
||||
#define GENERIC_MESSAGE0() GENERIC_MESSAGE_(__LINE__)
|
||||
#define GENERIC_MESSAGE1(a) GENERIC_MESSAGE_(__LINE__, a)
|
||||
#define GENERIC_MESSAGE2(a, b) GENERIC_MESSAGE_(__LINE__, uvec2(a, b))
|
||||
#define GENERIC_MESSAGE3(a, b, c) GENERIC_MESSAGE_(__LINE__, uvec3(a, b, c))
|
||||
#define HEX_MESSAGE0() HEX_MESSAGE_(__LINE__)
|
||||
#define HEX_MESSAGE1(a) HEX_MESSAGE_(__LINE__, a)
|
||||
#define HEX_MESSAGE2(a, b) HEX_MESSAGE_(__LINE__, uvec2(a, b))
|
||||
#define HEX_MESSAGE3(a, b, c) HEX_MESSAGE_(__LINE__, uvec3(a, b, c))
|
||||
#else
|
||||
#define ASERT_EQUAL(a, b)
|
||||
#define ASERT_NOT_EQUAL(a, b)
|
||||
#define ASERT_LESS_THAN(a, b)
|
||||
#define ASERT_LESS_THAN_EQUAL(a, b)
|
||||
#define GENERIC_MESSAGE0()
|
||||
#define GENERIC_MESSAGE1(a)
|
||||
#define GENERIC_MESSAGE2(a, b)
|
||||
#define GENERIC_MESSAGE3(a, b, c)
|
||||
#define HEX_MESSAGE0()
|
||||
#define HEX_MESSAGE1(a)
|
||||
#define HEX_MESSAGE2(a, b)
|
||||
#define HEX_MESSAGE3(a, b, c)
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,149 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#if SUBGROUP
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#extension GL_KHR_shader_subgroup_vote : require
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
#extension GL_KHR_shader_subgroup_arithmetic : require
|
||||
#endif
|
||||
#include "small_types.h"
|
||||
|
||||
layout(local_size_x_id = 3, local_size_y_id = 4) in;
|
||||
|
||||
#include "noise.h"
|
||||
#include "debug.h"
|
||||
#include "data_structures_buffers.h"
|
||||
#include "memory_interfacing.h"
|
||||
|
||||
layout(set = 0, binding = 3, std430) readonly buffer ColorBuffer
|
||||
{
|
||||
mem_u8x4 elems[];
|
||||
} color;
|
||||
|
||||
layout(set = 0, binding = 3, std430) readonly buffer ColorRawBuffer
|
||||
{
|
||||
uint elems[];
|
||||
} raw_color;
|
||||
|
||||
layout(set = 0, binding = 4, std430) readonly buffer DepthBuffer
|
||||
{
|
||||
int elems[];
|
||||
} depth;
|
||||
|
||||
layout(set = 0, binding = 5, std430) readonly buffer ShadeAlpha
|
||||
{
|
||||
mem_u8 elems[];
|
||||
} shade_alpha;
|
||||
|
||||
layout(set = 0, binding = 6, std430) readonly buffer Coverage
|
||||
{
|
||||
mem_i8 elems[];
|
||||
} coverage;
|
||||
|
||||
layout(std430, set = 0, binding = 7) readonly buffer TileInstanceOffset
|
||||
{
|
||||
uint elems[];
|
||||
} tile_instance_offsets;
|
||||
|
||||
layout(push_constant, std430) uniform Registers
|
||||
{
|
||||
uint fb_addr_index;
|
||||
uint fb_depth_addr_index;
|
||||
uint fb_width;
|
||||
uint fb_height;
|
||||
uint group_mask;
|
||||
} registers;
|
||||
|
||||
layout(constant_id = 5) const int MAX_PRIMITIVES = 256;
|
||||
layout(constant_id = 6) const int MAX_WIDTH = 1024;
|
||||
|
||||
const int TILE_BINNING_STRIDE = MAX_PRIMITIVES / 32;
|
||||
const int MAX_TILES_X = MAX_WIDTH / int(gl_WorkGroupSize.x);
|
||||
|
||||
// Overall architecture of the tiling is from RetroWarp.
|
||||
|
||||
void main()
|
||||
{
|
||||
int x = int(gl_GlobalInvocationID.x);
|
||||
int y = int(gl_GlobalInvocationID.y);
|
||||
ivec2 tile = ivec2(gl_WorkGroupID.xy);
|
||||
|
||||
int linear_tile = tile.x + tile.y * MAX_TILES_X;
|
||||
int linear_tile_base = linear_tile * TILE_BINNING_STRIDE;
|
||||
|
||||
uint coarse_binned = tile_binning_coarse.elems[linear_tile] & registers.group_mask;
|
||||
if (coarse_binned == 0u)
|
||||
return;
|
||||
|
||||
init_tile(gl_GlobalInvocationID.xy,
|
||||
registers.fb_width, registers.fb_height,
|
||||
registers.fb_addr_index, registers.fb_depth_addr_index);
|
||||
|
||||
while (coarse_binned != 0u)
|
||||
{
|
||||
int mask_index = findLSB(coarse_binned);
|
||||
coarse_binned &= ~uint(1 << mask_index);
|
||||
|
||||
uint tile_instance = tile_instance_offsets.elems[linear_tile_base + mask_index];
|
||||
uint binned = tile_binning.elems[linear_tile_base + mask_index];
|
||||
|
||||
while (binned != 0u)
|
||||
{
|
||||
int i = findLSB(binned);
|
||||
binned &= ~uint(1 << i);
|
||||
uint primitive_index = uint(i + 32 * mask_index);
|
||||
|
||||
uint index = tile_instance * (gl_WorkGroupSize.x * gl_WorkGroupSize.y) + gl_LocalInvocationIndex;
|
||||
int coverage = int(coverage.elems[index]);
|
||||
|
||||
if (coverage >= 0)
|
||||
{
|
||||
if ((coverage & COVERAGE_FILL_BIT) != 0)
|
||||
{
|
||||
fill_color(derived_setup.elems[primitive_index].fill_color);
|
||||
}
|
||||
else if ((coverage & COVERAGE_COPY_BIT) != 0)
|
||||
{
|
||||
uint word = raw_color.elems[index];
|
||||
copy_pipeline(word, primitive_index);
|
||||
}
|
||||
else
|
||||
{
|
||||
ShadedData shaded;
|
||||
shaded.combined = u8x4(color.elems[index]);
|
||||
shaded.z_dith = depth.elems[index];
|
||||
shaded.shade_alpha = u8(shade_alpha.elems[index]);
|
||||
shaded.coverage_count = u8(coverage);
|
||||
depth_blend(x, y, primitive_index, shaded);
|
||||
}
|
||||
}
|
||||
|
||||
tile_instance++;
|
||||
}
|
||||
}
|
||||
|
||||
finish_tile(gl_GlobalInvocationID.xy,
|
||||
registers.fb_width, registers.fb_height,
|
||||
registers.fb_addr_index, registers.fb_depth_addr_index);
|
||||
}
|
|
@ -0,0 +1,146 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef DEPTH_TEST_H_
|
||||
#define DEPTH_TEST_H_
|
||||
|
||||
#include "z_encode.h"
|
||||
|
||||
const int Z_MODE_OPAQUE = 0;
|
||||
const int Z_MODE_INTERPENETRATING = 1;
|
||||
const int Z_MODE_TRANSPARENT = 2;
|
||||
const int Z_MODE_DECAL = 3;
|
||||
|
||||
int combine_dz(int dz)
|
||||
{
|
||||
// Find largest POT which is <= dz.
|
||||
if (dz != 0)
|
||||
dz = 1 << findMSB(dz);
|
||||
return dz;
|
||||
}
|
||||
|
||||
bool depth_test(int z, int dz, int dz_compressed,
|
||||
u16 current_depth, u8 current_dz,
|
||||
inout int coverage_count, int current_coverage_count,
|
||||
bool z_compare, int z_mode,
|
||||
bool force_blend, bool aa_enable,
|
||||
out bool blend_en, out bool coverage_wrap, out u8x2 blend_shift)
|
||||
{
|
||||
bool depth_pass;
|
||||
|
||||
if (z_compare)
|
||||
{
|
||||
int memory_z = z_decompress(current_depth);
|
||||
int memory_dz = dz_decompress(current_dz);
|
||||
int precision_factor = (int(current_depth) >> 11) & 0xf;
|
||||
bool coplanar = false;
|
||||
|
||||
blend_shift.x = u8(clamp(dz_compressed - current_dz, 0, 4));
|
||||
blend_shift.y = u8(clamp(current_dz - dz_compressed, 0, 4));
|
||||
|
||||
if (precision_factor < 3)
|
||||
{
|
||||
if (memory_dz != 0x8000)
|
||||
memory_dz = max(memory_dz << 1, 16 >> precision_factor);
|
||||
else
|
||||
{
|
||||
coplanar = true;
|
||||
memory_dz = 0xffff;
|
||||
}
|
||||
}
|
||||
|
||||
int combined_dz = combine_dz(dz | memory_dz);
|
||||
int combined_dz_interpenetrate = combined_dz;
|
||||
combined_dz <<= 3;
|
||||
|
||||
bool farther = coplanar || ((z + combined_dz) >= memory_z);
|
||||
bool overflow = (coverage_count + current_coverage_count) >= 8;
|
||||
|
||||
blend_en = force_blend || (!overflow && aa_enable && farther);
|
||||
coverage_wrap = overflow;
|
||||
|
||||
depth_pass = false;
|
||||
bool max_z = memory_z == 0x3ffff;
|
||||
bool front = z < memory_z;
|
||||
int z_closest_possible = z - combined_dz;
|
||||
bool nearer = coplanar || (z_closest_possible <= memory_z);
|
||||
|
||||
switch (z_mode)
|
||||
{
|
||||
case Z_MODE_OPAQUE:
|
||||
{
|
||||
// The OPAQUE mode is normal less-than.
|
||||
// However, if z is sufficiently close enough to memory Z, we assume that we have the same surface
|
||||
// and we should simply increment coverage (blend_en).
|
||||
// If we overflow coverage, it is clear that we have a different surface, and here we should only
|
||||
// consider pure in-front test and overwrite coverage.
|
||||
depth_pass = max_z || (overflow ? front : nearer);
|
||||
break;
|
||||
}
|
||||
|
||||
case Z_MODE_INTERPENETRATING:
|
||||
{
|
||||
// This one is ... interesting as it affects coverage.
|
||||
if (!front || !farther || !overflow)
|
||||
{
|
||||
// If there is no decal-like intersect, treat this as normal opaque mode.
|
||||
depth_pass = max_z || (overflow ? front : nearer);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Modify coverage based on how far away current surface we are somehow?
|
||||
combined_dz_interpenetrate = dz_compress(combined_dz_interpenetrate & 0xffff);
|
||||
int cvg_coeff = ((memory_z >> combined_dz_interpenetrate) - (z >> combined_dz_interpenetrate)) & 0xf;
|
||||
coverage_count = min((cvg_coeff * coverage_count) >> 3, 8);
|
||||
depth_pass = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case Z_MODE_TRANSPARENT:
|
||||
{
|
||||
depth_pass = front || max_z;
|
||||
break;
|
||||
}
|
||||
|
||||
case Z_MODE_DECAL:
|
||||
{
|
||||
// Decals pass if |z - memory_z| <= max(dz, memory_dz).
|
||||
depth_pass = farther && nearer && !max_z;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
blend_shift.x = u8(0);
|
||||
blend_shift.y = u8(min(0xf - dz_compressed, 4));
|
||||
|
||||
bool overflow = (coverage_count + current_coverage_count) >= 8;
|
||||
blend_en = force_blend || (!overflow && aa_enable);
|
||||
coverage_wrap = overflow;
|
||||
depth_pass = true;
|
||||
}
|
||||
return depth_pass;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,70 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef DITHER_H_
|
||||
#define DITHER_H_
|
||||
|
||||
const u8 dither_matrices[2][16] = u8[][](
|
||||
u8[](U8_C(0), U8_C(6), U8_C(1), U8_C(7), U8_C(4), U8_C(2), U8_C(5), U8_C(3), U8_C(3), U8_C(5), U8_C(2), U8_C(4), U8_C(7), U8_C(1), U8_C(6), U8_C(0)),
|
||||
u8[](U8_C(0), U8_C(4), U8_C(1), U8_C(5), U8_C(4), U8_C(0), U8_C(5), U8_C(1), U8_C(3), U8_C(7), U8_C(2), U8_C(6), U8_C(7), U8_C(3), U8_C(6), U8_C(2)));
|
||||
|
||||
u8x3 rgb_dither(ivec3 orig_rgb, int dith)
|
||||
{
|
||||
ivec3 rgb_dith = (ivec3(dith) >> ivec3(0, 3, 6)) & 7;
|
||||
ivec3 rgb = mix((orig_rgb & 0xf8) + 8, ivec3(255), greaterThan(orig_rgb, ivec3(247)));
|
||||
ivec3 replace_sign = (rgb_dith - (orig_rgb & 7)) >> 31;
|
||||
ivec3 dither_diff = rgb - orig_rgb;
|
||||
rgb = orig_rgb + (dither_diff & replace_sign);
|
||||
return u8x3(rgb & 0xff);
|
||||
}
|
||||
|
||||
void dither_coefficients(int x, int y, int dither_mode_rgb, int dither_mode_alpha, out int rgb_dither, out int alpha_dither)
|
||||
{
|
||||
const int DITHER_SPLAT = (1 << 0) | (1 << 3) | (1 << 6);
|
||||
|
||||
if (dither_mode_rgb < 2)
|
||||
rgb_dither = int(dither_matrices[dither_mode_rgb][(y & 3) * 4 + (x & 3)]) * DITHER_SPLAT;
|
||||
else if (dither_mode_rgb == 2)
|
||||
rgb_dither = noise_get_dither_color();
|
||||
else
|
||||
rgb_dither = 0;
|
||||
|
||||
if (dither_mode_alpha == 3)
|
||||
alpha_dither = 0;
|
||||
else
|
||||
{
|
||||
if (dither_mode_alpha == 2)
|
||||
{
|
||||
alpha_dither = noise_get_dither_alpha();
|
||||
}
|
||||
else
|
||||
{
|
||||
alpha_dither = dither_mode_rgb >= 2 ?
|
||||
int(dither_matrices[dither_mode_rgb & 1][(y & 3) * 4 + (x & 3)]) : (rgb_dither & 7);
|
||||
|
||||
if (dither_mode_alpha == 1)
|
||||
alpha_dither = ~alpha_dither & 7;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,107 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include "small_types.h"
|
||||
layout(local_size_x = 16, local_size_y = 8) in;
|
||||
|
||||
// Copies VRAM into a texture which is then consumed by VI scanout.
|
||||
|
||||
layout(set = 0, binding = 0, rgba8ui) uniform writeonly uimage2D uAAInput;
|
||||
layout(set = 0, binding = 1, std430) readonly buffer RDRAM16
|
||||
{
|
||||
mem_u16 elems[];
|
||||
} vram16;
|
||||
|
||||
layout(set = 0, binding = 1, std430) readonly buffer RDRAM32
|
||||
{
|
||||
uint elems[];
|
||||
} vram32;
|
||||
|
||||
layout(set = 0, binding = 2, std430) readonly buffer HiddenRDRAM
|
||||
{
|
||||
mem_u8 elems[];
|
||||
} hidden_vram;
|
||||
|
||||
layout(push_constant, std430) uniform Registers
|
||||
{
|
||||
int fb_offset;
|
||||
int fb_width;
|
||||
ivec2 offset;
|
||||
ivec2 resolution;
|
||||
} registers;
|
||||
|
||||
layout(constant_id = 0) const int RDRAM_SIZE = 0;
|
||||
const int RDRAM_MASK_8 = RDRAM_SIZE - 1;
|
||||
const int RDRAM_MASK_16 = RDRAM_MASK_8 >> 1;
|
||||
const int RDRAM_MASK_32 = RDRAM_MASK_16 >> 1;
|
||||
layout(constant_id = 2) const int SCALING_LOG2 = 0;
|
||||
const int SCALING_FACTOR = 1 << SCALING_LOG2;
|
||||
|
||||
#include "vi_status.h"
|
||||
|
||||
uvec4 fetch_color(ivec2 coord)
|
||||
{
|
||||
ivec2 slice2d = coord & (SCALING_FACTOR - 1);
|
||||
coord >>= SCALING_LOG2;
|
||||
int slice = slice2d.y * SCALING_FACTOR + slice2d.x;
|
||||
|
||||
uvec4 color;
|
||||
if (FMT_RGBA8888)
|
||||
{
|
||||
int linear_coord = coord.y * registers.fb_width + coord.x + registers.fb_offset;
|
||||
linear_coord &= RDRAM_MASK_32;
|
||||
linear_coord += slice * (RDRAM_SIZE >> 2);
|
||||
uint word = uint(vram32.elems[linear_coord]);
|
||||
color = (uvec4(word) >> uvec4(24, 16, 8, 5)) & uvec4(0xff, 0xff, 0xff, 7);
|
||||
}
|
||||
else if (FMT_RGBA5551)
|
||||
{
|
||||
int linear_coord = coord.y * registers.fb_width + coord.x + registers.fb_offset;
|
||||
linear_coord &= RDRAM_MASK_16;
|
||||
linear_coord += slice * (RDRAM_SIZE >> 1);
|
||||
uint word = uint(vram16.elems[linear_coord ^ 1]);
|
||||
uint hidden_word = uint(hidden_vram.elems[linear_coord]);
|
||||
|
||||
uint r = (word >> 8u) & 0xf8u;
|
||||
uint g = (word >> 3u) & 0xf8u;
|
||||
uint b = (word << 2u) & 0xf8u;
|
||||
uint a = ((word & 1u) << 2u) | hidden_word;
|
||||
color = uvec4(r, g, b, a);
|
||||
}
|
||||
else
|
||||
color = uvec4(0);
|
||||
|
||||
if (!FETCH_AA)
|
||||
color.a = 7u;
|
||||
|
||||
return color;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
if (any(greaterThanEqual(gl_GlobalInvocationID.xy, registers.resolution)))
|
||||
return;
|
||||
|
||||
ivec2 coord = ivec2(gl_GlobalInvocationID.xy) + registers.offset;
|
||||
uvec4 col = fetch_color(coord);
|
||||
imageStore(uAAInput, ivec2(gl_GlobalInvocationID.xy), col);
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
#ifndef FB_FORMATS_H_
|
||||
#define FB_FORMATS_H_
|
||||
|
||||
const int FB_FMT_I4 = 0;
|
||||
const int FB_FMT_I8 = 1;
|
||||
const int FB_FMT_RGBA5551 = 2;
|
||||
const int FB_FMT_IA88 = 3;
|
||||
const int FB_FMT_RGBA8888 = 4;
|
||||
|
||||
#endif
|
|
@ -0,0 +1,32 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
void main()
|
||||
{
|
||||
if (gl_VertexIndex == 0)
|
||||
gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);
|
||||
else if (gl_VertexIndex == 1)
|
||||
gl_Position = vec4(-1.0, +3.0, 0.0, 1.0);
|
||||
else
|
||||
gl_Position = vec4(+3.0, -1.0, 0.0, 1.0);
|
||||
}
|
|
@ -0,0 +1,255 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef INTERPOLATION_H_
|
||||
#define INTERPOLATION_H_
|
||||
|
||||
#include "data_structures.h"
|
||||
#include "clamping.h"
|
||||
#include "perspective.h"
|
||||
|
||||
u8x4 interpolate_rgba(ivec4 rgba, ivec4 drgba_dx, ivec4 drgba_dy, int dx, int coverage)
|
||||
{
|
||||
rgba += ((drgba_dx & ~0x1f) >> SCALING_LOG2) * dx;
|
||||
|
||||
// RGBA is interpolated to 9-bit. The last bit is used to deal with clamping.
|
||||
// Slight underflow below 0 is clamped to 0 and slight overflow above 0xff is clamped to 0xff.
|
||||
|
||||
// Keep 2 sign bits of precision before we complete the centroid interpolation.
|
||||
i16x4 snapped_rgba = i16x4(rgba >> 14);
|
||||
|
||||
// Centroid clipping is based on the first coverage bit, and we interpolate at the first subpixel in scanline order.
|
||||
// With this layout we can just use findLSB to get correct result.
|
||||
// 0x01 0x02
|
||||
// 0x04 0x08
|
||||
// 0x10 0x20
|
||||
// 0x40 0x80
|
||||
int first_coverage = findLSB(coverage);
|
||||
i16 yoff = i16(first_coverage >> 1);
|
||||
i16 xoff = i16((first_coverage & 1) << 1) + (yoff & I16_C(1));
|
||||
snapped_rgba <<= I16_C(2 + SCALING_LOG2);
|
||||
snapped_rgba += xoff * i16x4(drgba_dx >> 14) + yoff * i16x4(drgba_dy >> 14);
|
||||
snapped_rgba >>= I16_C(4 + SCALING_LOG2);
|
||||
return clamp_9bit(snapped_rgba);
|
||||
}
|
||||
|
||||
void interpolate_st_copy(SpanSetup span, ivec4 dstzw_dx, int x, bool perspective, bool flip,
|
||||
out ivec2 st, out int s_offset)
|
||||
{
|
||||
int dx = flip ? (x - span.start_x) : (span.end_x - x);
|
||||
|
||||
// For copy pipe, we should duplicate pixels when scaling, there is no filtering we can (or should!) do.
|
||||
dx >>= SCALING_LOG2;
|
||||
|
||||
// Snap DX to where we perform interpolation (once per N output pixels).
|
||||
int snapped_dx = dx & global_constants.fb_info.dx_mask;
|
||||
s_offset = dx - snapped_dx;
|
||||
int lerp_dx = (dx >> global_constants.fb_info.dx_shift) * (flip ? 1 : -1);
|
||||
ivec3 stw = span.stzw.xyw + (dstzw_dx.xyw & ~0x1f) * lerp_dx;
|
||||
|
||||
if (perspective)
|
||||
{
|
||||
bool st_overflow;
|
||||
st = perspective_divide(stw >> 16, st_overflow);
|
||||
}
|
||||
else
|
||||
st = no_perspective_divide(stw >> 16);
|
||||
}
|
||||
|
||||
ivec2 interpolate_st_single(ivec4 stzw, ivec4 dstzw_dx, int dx, bool perspective)
|
||||
{
|
||||
ivec3 stw = stzw.xyw + ((dstzw_dx.xyw & ~0x1f) >> SCALING_LOG2) * dx;
|
||||
stw >>= 16;
|
||||
ivec2 st;
|
||||
|
||||
if (perspective)
|
||||
{
|
||||
bool st_overflow;
|
||||
st = perspective_divide(stw, st_overflow);
|
||||
}
|
||||
else
|
||||
st = no_perspective_divide(stw);
|
||||
|
||||
return st;
|
||||
}
|
||||
|
||||
void interpolate_stz(ivec4 stzw, ivec4 dstzw_dx, ivec4 dstzw_dy, int dx, int coverage, bool perspective, bool uses_lod,
|
||||
int flip_direction, out ivec2 st, out ivec2 st_dx, out ivec2 st_dy, out int z, inout bool st_overflow)
|
||||
{
|
||||
ivec3 stw = stzw.xyw + ((dstzw_dx.xyw & ~0x1f) >> SCALING_LOG2) * dx;
|
||||
ivec3 stw_dx, stw_dy;
|
||||
|
||||
if (uses_lod)
|
||||
{
|
||||
stw_dx = stw + flip_direction * ((dstzw_dx.xyw & ~0x1f) >> SCALING_LOG2);
|
||||
if (SCALING_FACTOR > 1)
|
||||
stw_dy = stw + abs(flip_direction) * ((dstzw_dy.xyw & ~0x7fff) >> SCALING_LOG2);
|
||||
else
|
||||
stw_dy = stw + ((dstzw_dy.xyw & ~0x7fff) >> SCALING_LOG2);
|
||||
}
|
||||
|
||||
if (perspective)
|
||||
{
|
||||
st = perspective_divide(stw >> 16, st_overflow);
|
||||
if (uses_lod)
|
||||
{
|
||||
st_dx = perspective_divide(stw_dx >> 16, st_overflow);
|
||||
st_dy = perspective_divide(stw_dy >> 16, st_overflow);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
st = no_perspective_divide(stw >> 16);
|
||||
if (uses_lod)
|
||||
{
|
||||
st_dx = no_perspective_divide(stw_dx >> 16);
|
||||
st_dy = no_perspective_divide(stw_dy >> 16);
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure that interpolation snaps as we expect on every "main" pixel,
|
||||
// for subpixels, interpolate with quantized step factor.
|
||||
z = stzw.z + dstzw_dx.z * (dx >> SCALING_LOG2) + (dstzw_dx.z >> SCALING_LOG2) * (dx & (SCALING_FACTOR - 1));
|
||||
|
||||
int snapped_z = z >> 10;
|
||||
int first_coverage = findLSB(coverage);
|
||||
int yoff = first_coverage >> 1;
|
||||
int xoff = ((first_coverage & 1) << 1) + (yoff & I16_C(1));
|
||||
snapped_z <<= 2 + SCALING_LOG2;
|
||||
snapped_z += xoff * (dstzw_dx.z >> 10) + yoff * (dstzw_dy.z >> 10);
|
||||
snapped_z >>= 5 + SCALING_LOG2;
|
||||
|
||||
z = clamp_z(snapped_z);
|
||||
}
|
||||
|
||||
#if 0
|
||||
u8x4 interpolate_rgba(TriangleSetup setup, AttributeSetup attr, int x, int y, int coverage)
|
||||
{
|
||||
bool do_offset = (setup.flags & TRIANGLE_SETUP_DO_OFFSET_BIT) != 0;
|
||||
int y_interpolation_base = int(setup.yh) >> 2;
|
||||
int xh = setup.xh + (y - y_interpolation_base) * (setup.dxhdy << 2);
|
||||
|
||||
ivec4 drgba_diff = ivec4(0);
|
||||
|
||||
// In do_offset mode, varyings are latched at last subpixel line instead of first (for some reason).
|
||||
if (do_offset)
|
||||
{
|
||||
xh += 3 * setup.dxhdy;
|
||||
ivec4 drgba_deh = attr.drgba_de & ~0x1ff;
|
||||
ivec4 drgba_dyh = attr.drgba_dy & ~0x1ff;
|
||||
drgba_diff = drgba_deh - (drgba_deh >> 2) - drgba_dyh + (drgba_dyh >> 2);
|
||||
}
|
||||
|
||||
int base_x = xh >> 16;
|
||||
int xfrac = (xh >> 8) & 0xff;
|
||||
|
||||
ivec4 rgba = attr.rgba;
|
||||
rgba += attr.drgba_de * (y - y_interpolation_base);
|
||||
rgba = ((rgba & ~0x1ff) + drgba_diff - xfrac * ((attr.drgba_dx >> 8) & ~1)) & ~0x3ff;
|
||||
rgba += (attr.drgba_dx & ~0x1f) * (x - base_x);
|
||||
|
||||
// RGBA is interpolated to 9-bit. The last bit is used to deal with clamping.
|
||||
// Slight underflow below 0 is clamped to 0 and slight overflow above 0xff is clamped to 0xff.
|
||||
|
||||
// Keep 2 sign bits of precision before we complete the centroid interpolation.
|
||||
i16x4 snapped_rgba = i16x4(rgba >> 14);
|
||||
|
||||
// Centroid clipping is based on the first coverage bit, and we interpolate at the first subpixel in scanline order.
|
||||
// FWIW, Angrylion has a very different coverage bit assignment, but we need this layout to avoid an awkward LUT.
|
||||
// With this layout we can just use findLSB instead.
|
||||
// 0x01 0x02
|
||||
// 0x04 0x08
|
||||
// 0x10 0x20
|
||||
// 0x40 0x80
|
||||
int first_coverage = findLSB(coverage);
|
||||
i16 yoff = i16(first_coverage >> 1);
|
||||
i16 xoff = i16((first_coverage & 1) << 1) + (yoff & I16_C(1));
|
||||
snapped_rgba <<= I16_C(2);
|
||||
snapped_rgba += xoff * i16x4(attr.drgba_dx >> 14) + yoff * i16x4(attr.drgba_dy >> 14);
|
||||
snapped_rgba >>= I16_C(4);
|
||||
return clamp_9bit(snapped_rgba);
|
||||
}
|
||||
|
||||
ivec3 interpolate_stw(TriangleSetup setup, AttributeSetup attr, int x, int y)
|
||||
{
|
||||
bool do_offset = (setup.flags & TRIANGLE_SETUP_DO_OFFSET_BIT) != 0;
|
||||
int y_interpolation_base = int(setup.yh) >> 2;
|
||||
int xh = setup.xh + (y - y_interpolation_base) * (setup.dxhdy << 2);
|
||||
|
||||
ivec3 dstw_diff = ivec3(0);
|
||||
|
||||
// In do_offset mode, varyings are latched at last subpixel line instead of first (for some reason).
|
||||
if (do_offset)
|
||||
{
|
||||
xh += 3 * setup.dxhdy;
|
||||
ivec3 dstw_deh = attr.dstzw_de.xyw & ~0x1ff;
|
||||
ivec3 dstw_dyh = attr.dstzw_dy.xyw & ~0x1ff;
|
||||
dstw_diff = dstw_deh - (dstw_deh >> 2) - dstw_dyh + (dstw_dyh >> 2);
|
||||
}
|
||||
|
||||
int base_x = xh >> 16;
|
||||
int xfrac = (xh >> 8) & 0xff;
|
||||
|
||||
ivec3 stw = attr.stzw.xyw;
|
||||
stw += attr.dstzw_de.xyw * (y - y_interpolation_base);
|
||||
stw = ((stw & ~0x1ff) + dstw_diff - xfrac * ((attr.dstzw_dx.xyw >> 8) & ~1)) & ~0x3ff;
|
||||
stw += (attr.dstzw_dx.xyw & ~0x1f) * (x - base_x);
|
||||
|
||||
ivec3 snapped_stw = stw >> 16;
|
||||
return snapped_stw;
|
||||
}
|
||||
|
||||
int interpolate_z(TriangleSetup setup, AttributeSetup attr, int x, int y, int coverage)
|
||||
{
|
||||
bool do_offset = (setup.flags & TRIANGLE_SETUP_DO_OFFSET_BIT) != 0;
|
||||
int y_interpolation_base = int(setup.yh) >> 2;
|
||||
int xh = setup.xh + (y - y_interpolation_base) * (setup.dxhdy << 2);
|
||||
|
||||
int dzdiff = 0;
|
||||
// In do_offset mode, varyings are latched at last subpixel line instead of first (for some reason).
|
||||
if (do_offset)
|
||||
{
|
||||
xh += 3 * setup.dxhdy;
|
||||
int dzdeh = attr.dstzw_de.z & ~0x1ff;
|
||||
int dzdyh = attr.dstzw_dy.z & ~0x1ff;
|
||||
dzdiff = dzdeh - (dzdeh >> 2) - dzdyh + (dzdyh >> 2);
|
||||
}
|
||||
|
||||
int base_x = xh >> 16;
|
||||
int xfrac = (xh >> 8) & 0xff;
|
||||
int z = attr.stzw.z;
|
||||
z += attr.dstzw_de.z * (y - y_interpolation_base);
|
||||
z = ((z & ~0x1ff) + dzdiff - xfrac * ((attr.dstzw_dx.z >> 8) & ~1)) & ~0x3ff;
|
||||
z += attr.dstzw_dx.z * (x - base_x);
|
||||
|
||||
int snapped_z = z >> 10;
|
||||
int first_coverage = findLSB(coverage);
|
||||
int yoff = first_coverage >> 1;
|
||||
int xoff = ((first_coverage & 1) << 1) + (yoff & 1s);
|
||||
snapped_z <<= 2;
|
||||
snapped_z += xoff * (attr.dstzw_dx.z >> 10) + yoff * (attr.dstzw_dy.z >> 10);
|
||||
snapped_z >>= 5;
|
||||
return clamp_z(snapped_z);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,31 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef LOAD_ATTRIBUTE_SETUP_H_
|
||||
#define LOAD_ATTRIBUTE_SETUP_H_
|
||||
|
||||
AttributeSetup load_attribute_setup(uint index)
|
||||
{
|
||||
return attribute_setup.elems[index];
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,41 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef LOAD_DEPTH_BLEND_STATE_H_
|
||||
#define LOAD_DEPTH_BLEND_STATE_H_
|
||||
|
||||
DepthBlendState load_depth_blend_state(uint index)
|
||||
{
|
||||
#if SMALL_TYPES
|
||||
return depth_blend_state.elems[index];
|
||||
#else
|
||||
return DepthBlendState(
|
||||
u8x4(depth_blend_state.elems[index].blend_modes0),
|
||||
u8x4(depth_blend_state.elems[index].blend_modes1),
|
||||
depth_blend_state.elems[index].flags,
|
||||
u8(depth_blend_state.elems[index].coverage_mode),
|
||||
u8(depth_blend_state.elems[index].z_mode),
|
||||
u8(0), u8(0));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,50 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef LOAD_DERIVED_SETUP_H_
|
||||
#define LOAD_DERIVED_SETUP_H_
|
||||
|
||||
DerivedSetup load_derived_setup(uint index)
|
||||
{
|
||||
#if SMALL_TYPES
|
||||
return derived_setup.elems[index];
|
||||
#else
|
||||
return DerivedSetup(
|
||||
u8x4(derived_setup.elems[index].constant_muladd0),
|
||||
u8x4(derived_setup.elems[index].constant_mulsub0),
|
||||
u8x4(derived_setup.elems[index].constant_mul0),
|
||||
u8x4(derived_setup.elems[index].constant_add0),
|
||||
u8x4(derived_setup.elems[index].constant_muladd1),
|
||||
u8x4(derived_setup.elems[index].constant_mulsub1),
|
||||
u8x4(derived_setup.elems[index].constant_mul1),
|
||||
u8x4(derived_setup.elems[index].constant_add1),
|
||||
u8x4(derived_setup.elems[index].fog_color),
|
||||
u8x4(derived_setup.elems[index].blend_color),
|
||||
uint(derived_setup.elems[index].fill_color),
|
||||
u16(derived_setup.elems[index].dz),
|
||||
u8(derived_setup.elems[index].dz_compressed),
|
||||
u8(derived_setup.elems[index].min_lod),
|
||||
i16x4(derived_setup.elems[index].factors));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,32 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef LOAD_SCISSOR_STATE_H_
|
||||
#define LOAD_SCISSOR_STATE_H_
|
||||
|
||||
ScissorState load_scissor_state(uint index)
|
||||
{
|
||||
ivec4 values = scissor_state.elems[index];
|
||||
return ScissorState(values.x, values.y, values.z, values.w);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,31 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef LOAD_SPAN_OFFSETS_H_
|
||||
#define LOAD_SPAN_OFFSETS_H_
|
||||
|
||||
SpanInfoOffsets load_span_offsets(uint index)
|
||||
{
|
||||
return span_offsets.elems[index];
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,44 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef LOAD_SPAN_SETUP_H_
|
||||
#define LOAD_SPAN_SETUP_H_
|
||||
|
||||
SpanSetup load_span_setup(uint index)
|
||||
{
|
||||
#if SMALL_TYPES
|
||||
return span_setups.elems[index];
|
||||
#else
|
||||
return SpanSetup(
|
||||
span_setups.elems[index].rgba,
|
||||
span_setups.elems[index].stzw,
|
||||
u16x4(uvec4(span_setups.elems[index].xleft)),
|
||||
u16x4(uvec4(span_setups.elems[index].xright)),
|
||||
span_setups.elems[index].interpolation_base_x,
|
||||
span_setups.elems[index].start_x,
|
||||
span_setups.elems[index].end_x,
|
||||
i16(span_setups.elems[index].lodlength),
|
||||
u16(span_setups.elems[index].valid_line));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,42 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef LOAD_STATIC_RASTER_STATE_H_
|
||||
#define LOAD_STATIC_RASTER_STATE_H_
|
||||
|
||||
StaticRasterizationState load_static_rasterization_state(uint index)
|
||||
{
|
||||
#if SMALL_TYPES
|
||||
return static_raster_state.elems[index];
|
||||
#else
|
||||
return StaticRasterizationState(
|
||||
u8x4(static_raster_state.elems[index].combiner_inputs_rgb0),
|
||||
u8x4(static_raster_state.elems[index].combiner_inputs_alpha0),
|
||||
u8x4(static_raster_state.elems[index].combiner_inputs_rgb1),
|
||||
u8x4(static_raster_state.elems[index].combiner_inputs_alpha1),
|
||||
static_raster_state.elems[index].flags,
|
||||
static_raster_state.elems[index].dither,
|
||||
0, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,49 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef LOAD_TILE_INFO_H_
|
||||
#define LOAD_TILE_INFO_H_
|
||||
|
||||
TileInfo load_tile_info(uint index)
|
||||
{
|
||||
#if SMALL_TYPES
|
||||
return tile_infos.elems[index];
|
||||
#else
|
||||
return TileInfo(
|
||||
tile_infos.elems[index].slo,
|
||||
tile_infos.elems[index].shi,
|
||||
tile_infos.elems[index].tlo,
|
||||
tile_infos.elems[index].thi,
|
||||
tile_infos.elems[index].offset,
|
||||
tile_infos.elems[index].stride,
|
||||
u8(tile_infos.elems[index].fmt),
|
||||
u8(tile_infos.elems[index].size),
|
||||
u8(tile_infos.elems[index].palette),
|
||||
u8(tile_infos.elems[index].mask_s),
|
||||
u8(tile_infos.elems[index].shift_s),
|
||||
u8(tile_infos.elems[index].mask_t),
|
||||
u8(tile_infos.elems[index].shift_t),
|
||||
u8(tile_infos.elems[index].flags));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,46 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef LOAD_TRIANGLE_SETUP_H_
|
||||
#define LOAD_TRIANGLE_SETUP_H_
|
||||
|
||||
TriangleSetup load_triangle_setup(uint index)
|
||||
{
|
||||
#if SMALL_TYPES
|
||||
return triangle_setup.elems[index];
|
||||
#else
|
||||
return TriangleSetup(
|
||||
triangle_setup.elems[index].xh,
|
||||
triangle_setup.elems[index].xm,
|
||||
triangle_setup.elems[index].xl,
|
||||
i16(triangle_setup.elems[index].yh),
|
||||
i16(triangle_setup.elems[index].ym),
|
||||
triangle_setup.elems[index].dxhdy,
|
||||
triangle_setup.elems[index].dxmdy,
|
||||
triangle_setup.elems[index].dxldy,
|
||||
i16(triangle_setup.elems[index].yl),
|
||||
u8(triangle_setup.elems[index].flags),
|
||||
u8(triangle_setup.elems[index].tile));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,70 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
layout(local_size_x_id = 0) in;
|
||||
layout(constant_id = 1) const int PAGE_STRIDE = 256;
|
||||
|
||||
layout(set = 0, binding = 0, std430) buffer RDRAM
|
||||
{
|
||||
uint rdram[];
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1, std430) readonly buffer StagingRDRAM
|
||||
{
|
||||
uint staging_rdram[];
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 2, std430) readonly buffer WriteMaskRDRAM
|
||||
{
|
||||
uint writemask[];
|
||||
};
|
||||
|
||||
layout(set = 1, binding = 0, std140) uniform UBO
|
||||
{
|
||||
uvec4 offsets[1024];
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint offset = offsets[gl_WorkGroupID.x >> 2u][gl_WorkGroupID.x & 3u];
|
||||
offset *= PAGE_STRIDE;
|
||||
offset += gl_LocalInvocationIndex;
|
||||
uint mask = writemask[offset];
|
||||
|
||||
if (mask == ~0u)
|
||||
{
|
||||
return;
|
||||
}
|
||||
else if (mask == 0u)
|
||||
{
|
||||
uint staging = staging_rdram[offset];
|
||||
rdram[offset] = staging;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint word = rdram[offset];
|
||||
uint staging = staging_rdram[offset];
|
||||
word = (word & mask) | (staging & ~mask);
|
||||
rdram[offset] = word;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,582 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef MEMORY_INTERFACING_H_
|
||||
#define MEMORY_INTERFACING_H_
|
||||
|
||||
#include "dither.h"
|
||||
#include "z_encode.h"
|
||||
#include "blender.h"
|
||||
#include "depth_test.h"
|
||||
#include "coverage.h"
|
||||
#include "fb_formats.h"
|
||||
|
||||
layout(constant_id = 0) const uint RDRAM_SIZE = 0;
|
||||
|
||||
layout(constant_id = 7) const int RDRAM_INCOHERENT_SCALING = 0;
|
||||
const bool RDRAM_INCOHERENT = (RDRAM_INCOHERENT_SCALING & 1) != 0;
|
||||
const int SCALING_LOG2 = RDRAM_INCOHERENT_SCALING >> 1;
|
||||
const int SCALING_FACTOR = 1 << SCALING_LOG2;
|
||||
const bool RDRAM_UNSCALED_WRITE_MASK = RDRAM_INCOHERENT && SCALING_LOG2 == 0;
|
||||
const bool RDRAM_SCALED_WRITE_MASK = RDRAM_INCOHERENT && SCALING_LOG2 != 0;
|
||||
|
||||
const uint RDRAM_MASK_8 = RDRAM_SIZE - 1u;
|
||||
const uint RDRAM_MASK_16 = RDRAM_MASK_8 >> 1u;
|
||||
const uint RDRAM_MASK_32 = RDRAM_MASK_8 >> 2u;
|
||||
|
||||
layout(constant_id = 1) const int FB_FMT = 0;
|
||||
layout(constant_id = 2) const bool FB_COLOR_DEPTH_ALIAS = false;
|
||||
|
||||
u8x4 current_color;
|
||||
bool current_color_dirty;
|
||||
|
||||
u16 current_depth;
|
||||
u8 current_dz;
|
||||
bool current_depth_dirty;
|
||||
|
||||
void load_vram_color(uint index, uint slice)
|
||||
{
|
||||
switch (FB_FMT)
|
||||
{
|
||||
case FB_FMT_I4:
|
||||
case FB_FMT_I8:
|
||||
{
|
||||
index &= RDRAM_MASK_8;
|
||||
index += slice * RDRAM_SIZE;
|
||||
u8 word = u8(vram8.data[index ^ 3u]);
|
||||
current_color = u8x4(word, word, word, u8(hidden_vram.data[index >> 1]));
|
||||
break;
|
||||
}
|
||||
|
||||
case FB_FMT_RGBA5551:
|
||||
{
|
||||
index &= RDRAM_MASK_16;
|
||||
index += slice * (RDRAM_SIZE >> 1);
|
||||
uint word = uint(vram16.data[index ^ 1u]);
|
||||
uvec3 rgb = uvec3(word >> 8u, word >> 3u, word << 2u) & 0xf8u;
|
||||
current_color = u8x4(rgb, (u8(hidden_vram.data[index]) << U8_C(5)) | u8((word & 1) << 7));
|
||||
break;
|
||||
}
|
||||
|
||||
case FB_FMT_IA88:
|
||||
{
|
||||
index &= RDRAM_MASK_16;
|
||||
index += slice * (RDRAM_SIZE >> 1);
|
||||
uint word = uint(vram16.data[index ^ 1u]);
|
||||
current_color = u8x4(u8x3(word >> 8u), word & 0xff);
|
||||
break;
|
||||
}
|
||||
|
||||
case FB_FMT_RGBA8888:
|
||||
{
|
||||
index &= RDRAM_MASK_32;
|
||||
index += slice * (RDRAM_SIZE >> 2);
|
||||
uint word = vram32.data[index];
|
||||
current_color = u8x4((uvec4(word) >> uvec4(24, 16, 8, 0)) & uvec4(0xff));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void alias_color_to_depth()
|
||||
{
|
||||
/* Inherit memory depth from color. */
|
||||
switch (FB_FMT)
|
||||
{
|
||||
case FB_FMT_RGBA5551:
|
||||
{
|
||||
current_dz = (current_color.a >> U8_C(3)) | (current_color.b & U8_C(8));
|
||||
uint word = (current_color.r & 0xf8u) << 6u;
|
||||
word |= (current_color.g & 0xf8u) << 1u;
|
||||
word |= (current_color.b & 0xf8u) >> 4u;
|
||||
current_depth = u16(word);
|
||||
break;
|
||||
}
|
||||
|
||||
case FB_FMT_IA88:
|
||||
{
|
||||
uvec2 col = current_color.ra;
|
||||
uint word = (col.x << 8u) | col.y;
|
||||
uint hidden_word = (word & 1u) * 3u;
|
||||
current_depth = u16(word >> 2u);
|
||||
current_dz = u8(((word & 3u) << 2u) | hidden_word);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void alias_depth_to_color()
|
||||
{
|
||||
uint word = (uint(current_depth) << 4u) | current_dz;
|
||||
|
||||
switch (FB_FMT)
|
||||
{
|
||||
case FB_FMT_RGBA5551:
|
||||
{
|
||||
current_color.r = u8((word >> 10u) & 0xf8u);
|
||||
current_color.g = u8((word >> 5u) & 0xf8u);
|
||||
current_color.b = u8((word >> 0u) & 0xf8u);
|
||||
current_color.a = u8((word & 7u) << 5u);
|
||||
break;
|
||||
}
|
||||
|
||||
case FB_FMT_IA88:
|
||||
{
|
||||
current_color.r = u8((word >> 10u) & 0xffu);
|
||||
current_color.a = u8((word >> 2u) & 0xffu);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
current_color_dirty = true;
|
||||
}
|
||||
|
||||
void load_vram_depth(uint index, uint slice)
|
||||
{
|
||||
index &= RDRAM_MASK_16;
|
||||
index += slice * (RDRAM_SIZE >> 1);
|
||||
u16 word = u16(vram16.data[index ^ 1u]);
|
||||
current_depth = word >> U16_C(2);
|
||||
current_dz = u8(hidden_vram.data[index]) | u8((word & U16_C(3)) << U16_C(2));
|
||||
}
|
||||
|
||||
void store_vram_color(uint index, uint slice)
|
||||
{
|
||||
//GENERIC_MESSAGE1(index);
|
||||
if (current_color_dirty)
|
||||
{
|
||||
switch (FB_FMT)
|
||||
{
|
||||
case FB_FMT_I4:
|
||||
{
|
||||
index &= RDRAM_MASK_8;
|
||||
index += slice * RDRAM_SIZE;
|
||||
vram8.data[index ^ 3u] = mem_u8(0);
|
||||
if ((index & 1u) != 0u)
|
||||
hidden_vram.data[index >> 1u] = mem_u8(current_color.a);
|
||||
|
||||
if (RDRAM_UNSCALED_WRITE_MASK)
|
||||
{
|
||||
// Need this memory barrier to ensure the mask readback does not read
|
||||
// an invalid value from RDRAM. If the mask is seen, the valid RDRAM value is
|
||||
// also coherent.
|
||||
memoryBarrierBuffer();
|
||||
vram8.data[(index ^ 3u) + RDRAM_SIZE] = mem_u8(0xff);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case FB_FMT_I8:
|
||||
{
|
||||
index &= RDRAM_MASK_8;
|
||||
index += slice * RDRAM_SIZE;
|
||||
vram8.data[index ^ 3u] = mem_u8(current_color.r);
|
||||
if ((index & 1u) != 0u)
|
||||
hidden_vram.data[index >> 1u] = mem_u8((current_color.r & 1) * 3);
|
||||
|
||||
if (RDRAM_UNSCALED_WRITE_MASK)
|
||||
{
|
||||
// Need this memory barrier to ensure the mask readback does not read
|
||||
// an invalid value from RDRAM. If the mask is seen, the valid RDRAM value is
|
||||
// also coherent.
|
||||
memoryBarrierBuffer();
|
||||
vram8.data[(index ^ 3u) + RDRAM_SIZE] = mem_u8(0xff);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case FB_FMT_RGBA5551:
|
||||
{
|
||||
index &= RDRAM_MASK_16;
|
||||
index += slice * (RDRAM_SIZE >> 1);
|
||||
uvec4 c = uvec4(current_color);
|
||||
c.rgb &= 0xf8u;
|
||||
uint cov = c.w >> 5u;
|
||||
uint word = (c.x << 8u) | (c.y << 3u) | (c.z >> 2u) | (cov >> 2u);
|
||||
vram16.data[index ^ 1u] = mem_u16(word);
|
||||
hidden_vram.data[index] = mem_u8(cov & U8_C(3));
|
||||
|
||||
if (RDRAM_UNSCALED_WRITE_MASK)
|
||||
{
|
||||
// Need this memory barrier to ensure the mask readback does not read
|
||||
// an invalid value from RDRAM. If the mask is seen, the valid RDRAM value is
|
||||
// also coherent.
|
||||
memoryBarrierBuffer();
|
||||
vram16.data[(index ^ 1u) + (RDRAM_SIZE >> 1u)] = mem_u16(0xffff);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case FB_FMT_IA88:
|
||||
{
|
||||
index &= RDRAM_MASK_16;
|
||||
index += slice * (RDRAM_SIZE >> 1);
|
||||
uvec2 col = current_color.ra;
|
||||
uint word = (col.x << 8u) | col.y;
|
||||
vram16.data[index ^ 1u] = mem_u16(word);
|
||||
hidden_vram.data[index] = mem_u8((col.y & 1) * 3);
|
||||
|
||||
if (RDRAM_UNSCALED_WRITE_MASK)
|
||||
{
|
||||
// Need this memory barrier to ensure the mask readback does not read
|
||||
// an invalid value from RDRAM. If the mask is seen, the valid RDRAM value is
|
||||
// also coherent.
|
||||
memoryBarrierBuffer();
|
||||
vram16.data[(index ^ 1u) + (RDRAM_SIZE >> 1u)] = mem_u16(0xffff);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case FB_FMT_RGBA8888:
|
||||
{
|
||||
index &= RDRAM_MASK_32;
|
||||
index += slice * (RDRAM_SIZE >> 2);
|
||||
uvec4 col = current_color;
|
||||
uint word = (col.r << 24u) | (col.g << 16u) | (col.b << 8u) | (col.a << 0u);
|
||||
vram32.data[index] = word;
|
||||
hidden_vram.data[2u * index] = mem_u8((current_color.g & 1) * 3);
|
||||
hidden_vram.data[2u * index + 1u] = mem_u8((current_color.a & 1) * 3);
|
||||
|
||||
if (RDRAM_UNSCALED_WRITE_MASK)
|
||||
{
|
||||
// Need this memory barrier to ensure the mask readback does not read
|
||||
// an invalid value from RDRAM. If the mask is seen, the valid RDRAM value is
|
||||
// also coherent.
|
||||
memoryBarrierBuffer();
|
||||
vram32.data[index + (RDRAM_SIZE >> 2u)] = ~0u;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void store_vram_depth(uint index, uint slice)
|
||||
{
|
||||
if (!FB_COLOR_DEPTH_ALIAS)
|
||||
{
|
||||
//GENERIC_MESSAGE1(index);
|
||||
if (current_depth_dirty)
|
||||
{
|
||||
index &= RDRAM_MASK_16;
|
||||
index += slice * (RDRAM_SIZE >> 1);
|
||||
vram16.data[index ^ 1u] = mem_u16((current_depth << U16_C(2)) | (current_dz >> U16_C(2)));
|
||||
hidden_vram.data[index] = mem_u8(current_dz & U16_C(3));
|
||||
|
||||
if (RDRAM_UNSCALED_WRITE_MASK)
|
||||
{
|
||||
// Need this memory barrier to ensure the mask readback does not read
|
||||
// an invalid value from RDRAM. If the mask is seen, the valid RDRAM value is
|
||||
// also coherent.
|
||||
memoryBarrierBuffer();
|
||||
vram16.data[(index ^ 1) + (RDRAM_SIZE >> 1u)] = mem_u16(0xffff);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint color_fb_index;
|
||||
|
||||
void init_tile(uvec2 coord, uint fb_width, uint fb_height, uint fb_addr_index, uint fb_depth_addr_index)
|
||||
{
|
||||
current_color_dirty = false;
|
||||
current_depth_dirty = false;
|
||||
if (all(lessThan(coord, uvec2(fb_width, fb_height))))
|
||||
{
|
||||
uvec2 slice2d = coord & (SCALING_FACTOR - 1);
|
||||
coord >>= SCALING_LOG2;
|
||||
uint slice = slice2d.y * SCALING_FACTOR + slice2d.x;
|
||||
|
||||
uint index = fb_addr_index + (fb_width >> SCALING_LOG2) * coord.y + coord.x;
|
||||
color_fb_index = index;
|
||||
load_vram_color(index, slice);
|
||||
|
||||
index = fb_depth_addr_index + (fb_width >> SCALING_LOG2) * coord.y + coord.x;
|
||||
load_vram_depth(index, slice);
|
||||
}
|
||||
}
|
||||
|
||||
void emit_scaled_write_masks(uvec2 unscaled_coord, uint unscaled_fb_width)
|
||||
{
|
||||
// Merge write masks across pixels.
|
||||
// We reserved a chunk of memory after scaled RDRAM to store 2 bits per pixel holding
|
||||
// a write mask for color and depth. The resolve stage will only resolve a pixel
|
||||
// and trigger a write if any sub-sample was marked as written.
|
||||
|
||||
// Write masks are organized in 4x4 blocks of unscaled pixels for locality purposes.
|
||||
// This guarantees a minimum number of loop iterations to resolve the write masks.
|
||||
uint unscaled_block = (unscaled_coord.y >> 2u) * ((unscaled_fb_width + 3u) >> 2u) + (unscaled_coord.x >> 2u);
|
||||
uvec2 unscaled_sub = unscaled_coord & 3u;
|
||||
uint word = uint(current_color_dirty) + 2u * uint(current_depth_dirty);
|
||||
word <<= 2u * (unscaled_sub.x + unscaled_sub.y * 4u);
|
||||
|
||||
#if SUBGROUP
|
||||
// This should only need one iteration .
|
||||
bool is_active = true;
|
||||
do
|
||||
{
|
||||
if (subgroupBroadcastFirst(unscaled_block) == unscaled_block)
|
||||
{
|
||||
uint merged = subgroupOr(word);
|
||||
if (subgroupElect())
|
||||
atomicOr(vram32.data[SCALING_FACTOR * SCALING_FACTOR * (RDRAM_SIZE >> 2) + unscaled_block], merged);
|
||||
is_active = false;
|
||||
}
|
||||
} while (is_active);
|
||||
#else
|
||||
// Just use atomics directly. With subgroup support, we can be a bit smarter about it.
|
||||
if (word != 0u)
|
||||
atomicOr(vram32.data[SCALING_FACTOR * SCALING_FACTOR * (RDRAM_SIZE >> 2) + unscaled_block], word);
|
||||
#endif
|
||||
}
|
||||
|
||||
void finish_tile(uvec2 coord, uint fb_width, uint fb_height, uint fb_addr_index, uint fb_depth_addr_index)
|
||||
{
|
||||
if (all(lessThan(coord, uvec2(fb_width, fb_height))))
|
||||
{
|
||||
uint unscaled_fb_width = fb_width >> SCALING_LOG2;
|
||||
|
||||
uvec2 slice2d = coord & (SCALING_FACTOR - 1);
|
||||
coord >>= SCALING_LOG2;
|
||||
uint slice = slice2d.y * SCALING_FACTOR + slice2d.x;
|
||||
|
||||
uint index = fb_addr_index + unscaled_fb_width * coord.y + coord.x;
|
||||
store_vram_color(index, slice);
|
||||
|
||||
index = fb_depth_addr_index + unscaled_fb_width * coord.y + coord.x;
|
||||
store_vram_depth(index, slice);
|
||||
|
||||
if (RDRAM_SCALED_WRITE_MASK)
|
||||
emit_scaled_write_masks(coord, unscaled_fb_width);
|
||||
}
|
||||
}
|
||||
|
||||
u8x4 decode_memory_color(bool image_read_en)
|
||||
{
|
||||
u8 memory_coverage = image_read_en ? (current_color.a & U8_C(0xe0)) : U8_C(0xe0);
|
||||
|
||||
u8x3 color;
|
||||
switch (FB_FMT)
|
||||
{
|
||||
case FB_FMT_I4:
|
||||
color = u8x3(0);
|
||||
memory_coverage = U8_C(0xe0);
|
||||
break;
|
||||
|
||||
case FB_FMT_I8:
|
||||
color = current_color.rrr;
|
||||
memory_coverage = U8_C(0xe0);
|
||||
break;
|
||||
|
||||
case FB_FMT_RGBA5551:
|
||||
color = current_color.rgb & U8_C(0xf8);
|
||||
break;
|
||||
|
||||
case FB_FMT_IA88:
|
||||
color = current_color.rrr;
|
||||
break;
|
||||
|
||||
case FB_FMT_RGBA8888:
|
||||
color = current_color.rgb;
|
||||
break;
|
||||
}
|
||||
return u8x4(color, memory_coverage);
|
||||
}
|
||||
|
||||
void write_color(u8x4 col)
|
||||
{
|
||||
if (FB_FMT == FB_FMT_I4)
|
||||
current_color.rgb = col.rgb;
|
||||
else
|
||||
current_color = col;
|
||||
current_color_dirty = true;
|
||||
}
|
||||
|
||||
void copy_pipeline(uint word, uint primitive_index)
|
||||
{
|
||||
switch (FB_FMT)
|
||||
{
|
||||
case FB_FMT_I4:
|
||||
{
|
||||
current_color = u8x4(0);
|
||||
current_color_dirty = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case FB_FMT_I8:
|
||||
{
|
||||
// Alpha testing needs to only look at the low dword for some bizarre reason.
|
||||
// I don't think alpha testing is supposed to be used at all with 8-bit FB ...
|
||||
word &= 0xffu;
|
||||
write_color(u8x4(word));
|
||||
break;
|
||||
}
|
||||
|
||||
case FB_FMT_RGBA5551:
|
||||
{
|
||||
uint r = (word >> 8) & 0xf8u;
|
||||
uint g = (word >> 3) & 0xf8u;
|
||||
uint b = (word << 2) & 0xf8u;
|
||||
uint a = (word & 1) * 0xe0u;
|
||||
write_color(u8x4(r, g, b, a));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (FB_COLOR_DEPTH_ALIAS)
|
||||
alias_color_to_depth();
|
||||
}
|
||||
|
||||
void fill_color(uint col)
|
||||
{
|
||||
switch (FB_FMT)
|
||||
{
|
||||
case FB_FMT_RGBA8888:
|
||||
{
|
||||
uint r = (col >> 24u) & 0xffu;
|
||||
uint g = (col >> 16u) & 0xffu;
|
||||
uint b = (col >> 8u) & 0xffu;
|
||||
uint a = (col >> 0u) & 0xffu;
|
||||
write_color(u8x4(r, g, b, a));
|
||||
break;
|
||||
}
|
||||
|
||||
case FB_FMT_RGBA5551:
|
||||
{
|
||||
col >>= ((color_fb_index & 1u) ^ 1u) * 16u;
|
||||
uint r = (col >> 8u) & 0xf8u;
|
||||
uint g = (col >> 3u) & 0xf8u;
|
||||
uint b = (col << 2u) & 0xf8u;
|
||||
uint a = (col & 1u) * 0xe0u;
|
||||
write_color(u8x4(r, g, b, a));
|
||||
break;
|
||||
}
|
||||
|
||||
case FB_FMT_IA88:
|
||||
{
|
||||
col >>= ((color_fb_index & 1u) ^ 1u) * 16u;
|
||||
col &= 0xffffu;
|
||||
uint r = (col >> 8u) & 0xffu;
|
||||
uint a = (col >> 0u) & 0xffu;
|
||||
write_color(u8x4(r, r, r, a));
|
||||
break;
|
||||
}
|
||||
|
||||
case FB_FMT_I8:
|
||||
{
|
||||
col >>= ((color_fb_index & 3u) ^ 3u) * 8u;
|
||||
col &= 0xffu;
|
||||
write_color(u8x4(col));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (FB_COLOR_DEPTH_ALIAS)
|
||||
alias_color_to_depth();
|
||||
}
|
||||
|
||||
void depth_blend(int x, int y, uint primitive_index, ShadedData shaded)
|
||||
{
|
||||
int z = shaded.z_dith >> 9;
|
||||
int dith = shaded.z_dith & 0x1ff;
|
||||
int coverage_count = shaded.coverage_count;
|
||||
u8x4 combined = shaded.combined;
|
||||
u8 shade_alpha = shaded.shade_alpha;
|
||||
|
||||
uint blend_state_index = uint(state_indices.elems[primitive_index].static_depth_tmem.y);
|
||||
DerivedSetup derived = load_derived_setup(primitive_index);
|
||||
DepthBlendState depth_blend = load_depth_blend_state(blend_state_index);
|
||||
|
||||
bool force_blend = (depth_blend.flags & DEPTH_BLEND_FORCE_BLEND_BIT) != 0;
|
||||
bool z_compare = (depth_blend.flags & DEPTH_BLEND_DEPTH_TEST_BIT) != 0;
|
||||
bool z_update = (depth_blend.flags & DEPTH_BLEND_DEPTH_UPDATE_BIT) != 0;
|
||||
bool image_read_enable = (depth_blend.flags & DEPTH_BLEND_IMAGE_READ_ENABLE_BIT) != 0;
|
||||
bool color_on_coverage = (depth_blend.flags & DEPTH_BLEND_COLOR_ON_COVERAGE_BIT) != 0;
|
||||
bool blend_multicycle = (depth_blend.flags & DEPTH_BLEND_MULTI_CYCLE_BIT) != 0;
|
||||
bool aa_enable = (depth_blend.flags & DEPTH_BLEND_AA_BIT) != 0;
|
||||
bool dither_en = (depth_blend.flags & DEPTH_BLEND_DITHER_ENABLE_BIT) != 0;
|
||||
|
||||
bool blend_en;
|
||||
bool coverage_wrap;
|
||||
u8x2 blend_shift;
|
||||
|
||||
u8x4 memory_color = decode_memory_color(image_read_enable);
|
||||
u8 memory_coverage = memory_color.a >> U8_C(5);
|
||||
|
||||
bool z_pass = depth_test(z, derived.dz, derived.dz_compressed,
|
||||
current_depth, current_dz,
|
||||
coverage_count, memory_coverage,
|
||||
z_compare, depth_blend.z_mode,
|
||||
force_blend, aa_enable,
|
||||
blend_en, coverage_wrap, blend_shift);
|
||||
|
||||
GENERIC_MESSAGE3(combined.x, combined.y, combined.z);
|
||||
|
||||
// Pixel tests.
|
||||
if (z_pass && (!aa_enable || coverage_count != 0))
|
||||
{
|
||||
// Blending
|
||||
BlendInputs blender_inputs =
|
||||
BlendInputs(combined, memory_color,
|
||||
derived.fog_color, derived.blend_color, shade_alpha);
|
||||
|
||||
u8x4 blend_modes = depth_blend.blend_modes0;
|
||||
if (blend_multicycle)
|
||||
{
|
||||
blender_inputs.pixel_color.rgb =
|
||||
blender(blender_inputs,
|
||||
blend_modes,
|
||||
force_blend, blend_en, color_on_coverage, coverage_wrap, blend_shift, false);
|
||||
blend_modes = depth_blend.blend_modes1;
|
||||
}
|
||||
u8x3 rgb = blender(blender_inputs,
|
||||
blend_modes,
|
||||
force_blend, blend_en, color_on_coverage, coverage_wrap, blend_shift, true);
|
||||
|
||||
// Dither
|
||||
if (dither_en)
|
||||
rgb = rgb_dither(rgb, dith);
|
||||
|
||||
// Coverage blending
|
||||
int new_coverage = blend_coverage(coverage_count, memory_coverage, blend_en, depth_blend.coverage_mode);
|
||||
|
||||
GENERIC_MESSAGE3(rgb.x, rgb.y, rgb.z);
|
||||
|
||||
// Writeback
|
||||
write_color(u8x4(rgb, new_coverage << 5));
|
||||
|
||||
// Z-writeback.
|
||||
if (z_update)
|
||||
{
|
||||
current_depth = z_compress(z);
|
||||
current_dz = u8(derived.dz_compressed);
|
||||
current_depth_dirty = true;
|
||||
|
||||
if (FB_COLOR_DEPTH_ALIAS)
|
||||
alias_depth_to_color();
|
||||
}
|
||||
else if (FB_COLOR_DEPTH_ALIAS)
|
||||
alias_color_to_depth();
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,71 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NOISE_H_
|
||||
#define NOISE_H_
|
||||
|
||||
u16 seeded_noise = U16_C(0);
|
||||
|
||||
// From: https://www.shadertoy.com/view/XlXcW4 with slight modifications.
|
||||
void reseed_noise(uint x, uint y, uint primitive_offset)
|
||||
{
|
||||
const uint NOISE_PRIME = 1103515245u;
|
||||
uvec3 seed = uvec3(x, y, primitive_offset);
|
||||
seed = ((seed >> 8u) ^ seed.yzx) * NOISE_PRIME;
|
||||
seed = ((seed >> 8u) ^ seed.yzx) * NOISE_PRIME;
|
||||
seed = ((seed >> 8u) ^ seed.yzx) * NOISE_PRIME;
|
||||
seeded_noise = u16(seed.x >> 16u);
|
||||
}
|
||||
|
||||
i16 noise_get_combiner()
|
||||
{
|
||||
return i16(((seeded_noise & U16_C(7u)) << U16_C(6u)) | U16_C(0x20u));
|
||||
}
|
||||
|
||||
int noise_get_dither_alpha()
|
||||
{
|
||||
return int(seeded_noise & U16_C(7u));
|
||||
}
|
||||
|
||||
int noise_get_dither_color()
|
||||
{
|
||||
// 3 bits of noise for RGB separately.
|
||||
return int(seeded_noise & U16_C(0x1ff));
|
||||
}
|
||||
|
||||
u8 noise_get_blend_threshold()
|
||||
{
|
||||
return u8(seeded_noise & U16_C(0xffu));
|
||||
}
|
||||
|
||||
uvec3 noise_get_full_gamma_dither()
|
||||
{
|
||||
uint seed = seeded_noise;
|
||||
return uvec3(seed & 0x3f, (seed >> 6u) & 0x3f, ((seed >> 9u) & 0x38) | (seed & 7u));
|
||||
}
|
||||
|
||||
uvec3 noise_get_partial_gamma_dither()
|
||||
{
|
||||
return (uvec3(seeded_noise) >> uvec3(0, 1, 2)) & 1u;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,114 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef PERSPECTIVE_H_
|
||||
#define PERSPECTIVE_H_
|
||||
|
||||
const i16x2 perspective_table[64] = i16x2[](
|
||||
i16x2(0x4000, -252 * 4), i16x2(0x3f04, -244 * 4), i16x2(0x3e10, -238 * 4), i16x2(0x3d22, -230 * 4),
|
||||
i16x2(0x3c3c, -223 * 4), i16x2(0x3b5d, -218 * 4), i16x2(0x3a83, -210 * 4), i16x2(0x39b1, -205 * 4),
|
||||
i16x2(0x38e4, -200 * 4), i16x2(0x381c, -194 * 4), i16x2(0x375a, -189 * 4), i16x2(0x369d, -184 * 4),
|
||||
i16x2(0x35e5, -179 * 4), i16x2(0x3532, -175 * 4), i16x2(0x3483, -170 * 4), i16x2(0x33d9, -166 * 4),
|
||||
i16x2(0x3333, -162 * 4), i16x2(0x3291, -157 * 4), i16x2(0x31f4, -155 * 4), i16x2(0x3159, -150 * 4),
|
||||
i16x2(0x30c3, -147 * 4), i16x2(0x3030, -143 * 4), i16x2(0x2fa1, -140 * 4), i16x2(0x2f15, -137 * 4),
|
||||
i16x2(0x2e8c, -134 * 4), i16x2(0x2e06, -131 * 4), i16x2(0x2d83, -128 * 4), i16x2(0x2d03, -125 * 4),
|
||||
i16x2(0x2c86, -123 * 4), i16x2(0x2c0b, -120 * 4), i16x2(0x2b93, -117 * 4), i16x2(0x2b1e, -115 * 4),
|
||||
i16x2(0x2aab, -113 * 4), i16x2(0x2a3a, -110 * 4), i16x2(0x29cc, -108 * 4), i16x2(0x2960, -106 * 4),
|
||||
i16x2(0x28f6, -104 * 4), i16x2(0x288e, -102 * 4), i16x2(0x2828, -100 * 4), i16x2(0x27c4, -98 * 4),
|
||||
i16x2(0x2762, -96 * 4), i16x2(0x2702, -94 * 4), i16x2(0x26a4, -92 * 4), i16x2(0x2648, -91 * 4),
|
||||
i16x2(0x25ed, -89 * 4), i16x2(0x2594, -87 * 4), i16x2(0x253d, -86 * 4), i16x2(0x24e7, -85 * 4),
|
||||
i16x2(0x2492, -83 * 4), i16x2(0x243f, -81 * 4), i16x2(0x23ee, -80 * 4), i16x2(0x239e, -79 * 4),
|
||||
i16x2(0x234f, -77 * 4), i16x2(0x2302, -76 * 4), i16x2(0x22b6, -74 * 4), i16x2(0x226c, -74 * 4),
|
||||
i16x2(0x2222, -72 * 4), i16x2(0x21da, -71 * 4), i16x2(0x2193, -70 * 4), i16x2(0x214d, -69 * 4),
|
||||
i16x2(0x2108, -67 * 4), i16x2(0x20c5, -67 * 4), i16x2(0x2082, -65 * 4), i16x2(0x2041, -65 * 4)
|
||||
);
|
||||
|
||||
ivec2 perspective_get_lut(int w)
|
||||
{
|
||||
int shift = min(14 - findMSB(w), 14);
|
||||
int normout = (w << shift) & 0x3fff;
|
||||
int wnorm = normout & 0xff;
|
||||
ivec2 table = ivec2(perspective_table[normout >> 8]);
|
||||
int rcp = ((table.y * wnorm) >> 10) + table.x;
|
||||
return ivec2(rcp, shift);
|
||||
}
|
||||
|
||||
ivec2 no_perspective_divide(ivec3 stw)
|
||||
{
|
||||
return stw.xy;
|
||||
}
|
||||
|
||||
// s16 divided by s1.15.
|
||||
// Classic approximation of a (x * rcp) >> shift with a LUT to find rcp.
|
||||
ivec2 perspective_divide(ivec3 stw, inout bool overflow)
|
||||
{
|
||||
int w = stw.z;
|
||||
bool w_carry = w <= 0;
|
||||
w &= 0x7fff;
|
||||
|
||||
ivec2 table = perspective_get_lut(w);
|
||||
int shift = table.y;
|
||||
ivec2 prod = stw.xy * table.x;
|
||||
|
||||
int temp_mask = ((1 << 30) - 1) & -((1 << 29) >> shift);
|
||||
ivec2 out_of_bounds = prod & temp_mask;
|
||||
|
||||
ivec2 temp;
|
||||
if (shift != 14)
|
||||
temp = prod = prod >> (13 - shift);
|
||||
else
|
||||
temp = prod << 1;
|
||||
|
||||
if (any(notEqual(out_of_bounds, ivec2(0))))
|
||||
{
|
||||
if (out_of_bounds.x != temp_mask && out_of_bounds.x != 0)
|
||||
{
|
||||
if ((prod.x & (1 << 29)) == 0)
|
||||
temp.x = 0x7fff;
|
||||
else
|
||||
temp.x = -0x8000;
|
||||
overflow = true;
|
||||
}
|
||||
|
||||
if (out_of_bounds.y != temp_mask && out_of_bounds.y != 0)
|
||||
{
|
||||
if ((prod.y & (1 << 29)) == 0)
|
||||
temp.y = 0x7fff;
|
||||
else
|
||||
temp.y = -0x8000;
|
||||
overflow = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (w_carry)
|
||||
{
|
||||
temp = ivec2(0x7fff);
|
||||
overflow = true;
|
||||
}
|
||||
|
||||
// Perspective divide produces a 17-bit signed coordinate, which is later clamped to 16-bit signed.
|
||||
// However, the LOD computation happens in 17 bits ...
|
||||
return clamp(temp, ivec2(-0x10000), ivec2(0xffff));
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,191 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include "small_types.h"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y_id = 1) in;
|
||||
|
||||
#include "debug.h"
|
||||
#include "data_structures.h"
|
||||
|
||||
layout(set = 0, binding = 0, std430) readonly buffer TriangleSetupBuffer
|
||||
{
|
||||
TriangleSetupMem elems[];
|
||||
} triangle_setup;
|
||||
#include "load_triangle_setup.h"
|
||||
|
||||
layout(set = 0, binding = 1, std430) readonly buffer AttributeSetupBuffer
|
||||
{
|
||||
AttributeSetupMem elems[];
|
||||
} attribute_setup;
|
||||
#include "load_attribute_setup.h"
|
||||
|
||||
layout(set = 0, binding = 2, std430) readonly buffer DerivedSetupBuffer
|
||||
{
|
||||
DerivedSetupMem elems[];
|
||||
} derived_setup;
|
||||
#include "load_derived_setup.h"
|
||||
|
||||
layout(set = 0, binding = 3, std430) readonly buffer StaticRasterStateBuffer
|
||||
{
|
||||
StaticRasterizationStateMem elems[];
|
||||
} static_raster_state;
|
||||
#include "load_static_raster_state.h"
|
||||
|
||||
layout(set = 0, binding = 4, std430) readonly buffer StateIndicesBuffer
|
||||
{
|
||||
InstanceIndicesMem elems[];
|
||||
} state_indices;
|
||||
|
||||
layout(set = 0, binding = 5, std430) readonly buffer SpanInfoOffsetBuffer
|
||||
{
|
||||
SpanInfoOffsetsMem elems[];
|
||||
} span_offsets;
|
||||
#include "load_span_offsets.h"
|
||||
|
||||
layout(set = 0, binding = 6, std430) readonly buffer SpanSetups
|
||||
{
|
||||
SpanSetupMem elems[];
|
||||
} span_setups;
|
||||
#include "load_span_setup.h"
|
||||
|
||||
layout(set = 0, binding = 7, std430) readonly buffer TMEM16
|
||||
{
|
||||
TMEMInstance16Mem instances[];
|
||||
} tmem16;
|
||||
|
||||
layout(set = 0, binding = 7, std430) readonly buffer TMEM8
|
||||
{
|
||||
TMEMInstance8Mem instances[];
|
||||
} tmem8;
|
||||
|
||||
layout(set = 0, binding = 8, std430) readonly buffer TileInfoBuffer
|
||||
{
|
||||
TileInfoMem elems[];
|
||||
} tile_infos;
|
||||
#include "load_tile_info.h"
|
||||
|
||||
layout(set = 2, binding = 0, std140) uniform GlobalConstants
|
||||
{
|
||||
GlobalFBInfo fb_info;
|
||||
} global_constants;
|
||||
|
||||
layout(constant_id = 2) const int STATIC_STATE_FLAGS = 0;
|
||||
layout(constant_id = 3) const int COMBINER_INPUTS_RGB0 = 0;
|
||||
layout(constant_id = 4) const int COMBINER_INPUTS_ALPHA0 = 0;
|
||||
layout(constant_id = 5) const int COMBINER_INPUTS_RGB1 = 0;
|
||||
layout(constant_id = 6) const int COMBINER_INPUTS_ALPHA1 = 0;
|
||||
layout(constant_id = 7) const int DITHER_TEX_SIZE_TEX_FMT = 0;
|
||||
|
||||
const int COMBINER_INPUT_RGB0_MULADD = (COMBINER_INPUTS_RGB0 >> 0) & 0xff;
|
||||
const int COMBINER_INPUT_RGB0_MULSUB = (COMBINER_INPUTS_RGB0 >> 8) & 0xff;
|
||||
const int COMBINER_INPUT_RGB0_MUL = (COMBINER_INPUTS_RGB0 >> 16) & 0xff;
|
||||
const int COMBINER_INPUT_RGB0_ADD = (COMBINER_INPUTS_RGB0 >> 24) & 0xff;
|
||||
|
||||
const int COMBINER_INPUT_ALPHA0_MULADD = (COMBINER_INPUTS_ALPHA0 >> 0) & 0xff;
|
||||
const int COMBINER_INPUT_ALPHA0_MULSUB = (COMBINER_INPUTS_ALPHA0 >> 8) & 0xff;
|
||||
const int COMBINER_INPUT_ALPHA0_MUL = (COMBINER_INPUTS_ALPHA0 >> 16) & 0xff;
|
||||
const int COMBINER_INPUT_ALPHA0_ADD = (COMBINER_INPUTS_ALPHA0 >> 24) & 0xff;
|
||||
|
||||
const int COMBINER_INPUT_RGB1_MULADD = (COMBINER_INPUTS_RGB1 >> 0) & 0xff;
|
||||
const int COMBINER_INPUT_RGB1_MULSUB = (COMBINER_INPUTS_RGB1 >> 8) & 0xff;
|
||||
const int COMBINER_INPUT_RGB1_MUL = (COMBINER_INPUTS_RGB1 >> 16) & 0xff;
|
||||
const int COMBINER_INPUT_RGB1_ADD = (COMBINER_INPUTS_RGB1 >> 24) & 0xff;
|
||||
|
||||
const int COMBINER_INPUT_ALPHA1_MULADD = (COMBINER_INPUTS_ALPHA1 >> 0) & 0xff;
|
||||
const int COMBINER_INPUT_ALPHA1_MULSUB = (COMBINER_INPUTS_ALPHA1 >> 8) & 0xff;
|
||||
const int COMBINER_INPUT_ALPHA1_MUL = (COMBINER_INPUTS_ALPHA1 >> 16) & 0xff;
|
||||
const int COMBINER_INPUT_ALPHA1_ADD = (COMBINER_INPUTS_ALPHA1 >> 24) & 0xff;
|
||||
|
||||
const int DITHER = (DITHER_TEX_SIZE_TEX_FMT >> 0) & 0xff;
|
||||
const int TEX_SIZE = (DITHER_TEX_SIZE_TEX_FMT >> 8) & 0xff;
|
||||
const int TEX_FMT = (DITHER_TEX_SIZE_TEX_FMT >> 16) & 0xff;
|
||||
|
||||
#define RASTERIZER_SPEC_CONSTANT
|
||||
|
||||
#include "noise.h"
|
||||
#include "shading.h"
|
||||
|
||||
layout(set = 0, binding = 9, std430) writeonly buffer ColorBuffer
|
||||
{
|
||||
mem_u8x4 elems[];
|
||||
} color;
|
||||
|
||||
layout(set = 0, binding = 9, std430) writeonly buffer ColorBufferRaw
|
||||
{
|
||||
uint elems[];
|
||||
} raw_color;
|
||||
|
||||
layout(set = 0, binding = 10, std430) writeonly buffer DepthBuffer
|
||||
{
|
||||
int elems[];
|
||||
} depth;
|
||||
|
||||
layout(set = 0, binding = 11, std430) writeonly buffer ShadeAlpha
|
||||
{
|
||||
mem_u8 elems[];
|
||||
} shade_alpha;
|
||||
|
||||
layout(set = 0, binding = 12, std430) writeonly buffer Coverage
|
||||
{
|
||||
mem_i8 elems[];
|
||||
} coverage;
|
||||
|
||||
layout(set = 1, binding = 0, std430) readonly buffer TileWorkList
|
||||
{
|
||||
uvec4 elems[];
|
||||
} tile_work_list;
|
||||
|
||||
void main()
|
||||
{
|
||||
uvec4 work = tile_work_list.elems[gl_WorkGroupID.x];
|
||||
int x = int(work.x * gl_WorkGroupSize.x + gl_LocalInvocationID.x);
|
||||
int y = int(work.y * gl_WorkGroupSize.y + gl_LocalInvocationID.y);
|
||||
uint tile_instance = work.z;
|
||||
uint primitive_index = work.w;
|
||||
|
||||
ShadedData shaded;
|
||||
i8 coverage_value;
|
||||
uint index = tile_instance * (gl_WorkGroupSize.x * gl_WorkGroupSize.y) + gl_LocalInvocationIndex;
|
||||
|
||||
if (shade_pixel(x, y, primitive_index, shaded))
|
||||
{
|
||||
coverage_value = i8(shaded.coverage_count);
|
||||
|
||||
if (coverage_value <= I8_C(8))
|
||||
{
|
||||
// Workaround curious bug with glslang, need to cast manually to uvec4 first.
|
||||
color.elems[index] = mem_u8x4(uvec4(shaded.combined));
|
||||
shade_alpha.elems[index] = mem_u8(shaded.shade_alpha);
|
||||
depth.elems[index] = shaded.z_dith;
|
||||
}
|
||||
else if ((coverage_value & COVERAGE_COPY_BIT) != 0)
|
||||
{
|
||||
// For copy pipe, we use a raw 32-bit word to represent the loaded texel.
|
||||
raw_color.elems[index] = shaded.z_dith;
|
||||
}
|
||||
}
|
||||
else
|
||||
coverage_value = I8_C(-1);
|
||||
|
||||
coverage.elems[index] = mem_i8(coverage_value);
|
||||
}
|
|
@ -0,0 +1,357 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SHADING_H_
|
||||
#define SHADING_H_
|
||||
|
||||
#ifdef RASTERIZER_SPEC_CONSTANT
|
||||
const int SCALING_LOG2 = (STATIC_STATE_FLAGS >> RASTERIZATION_UPSCALING_LOG2_BIT_OFFSET) & 3;
|
||||
const int SCALING_FACTOR = 1 << SCALING_LOG2;
|
||||
#endif
|
||||
|
||||
#include "coverage.h"
|
||||
#include "interpolation.h"
|
||||
#include "perspective.h"
|
||||
#include "texture.h"
|
||||
#include "dither.h"
|
||||
#include "combiner.h"
|
||||
|
||||
bool shade_pixel(int x, int y, uint primitive_index, out ShadedData shaded)
|
||||
{
|
||||
SpanInfoOffsets span_offsets = load_span_offsets(primitive_index);
|
||||
if ((y < (SCALING_FACTOR * span_offsets.ylo)) || (y > (span_offsets.yhi * SCALING_FACTOR + (SCALING_FACTOR - 1))))
|
||||
return false;
|
||||
|
||||
uint setup_flags = uint(triangle_setup.elems[primitive_index].flags);
|
||||
if (SCALING_FACTOR > 1)
|
||||
{
|
||||
if ((setup_flags & TRIANGLE_SETUP_DISABLE_UPSCALING_BIT) != 0u)
|
||||
{
|
||||
x &= ~(SCALING_FACTOR - 1);
|
||||
y &= ~(SCALING_FACTOR - 1);
|
||||
}
|
||||
}
|
||||
|
||||
SpanSetup span_setup = load_span_setup(SCALING_FACTOR * span_offsets.offset + (y - SCALING_FACTOR * span_offsets.ylo));
|
||||
if (span_setup.valid_line == U16_C(0))
|
||||
return false;
|
||||
|
||||
uint setup_tile = uint(triangle_setup.elems[primitive_index].tile);
|
||||
AttributeSetup attr = load_attribute_setup(primitive_index);
|
||||
|
||||
uvec4 states = uvec4(state_indices.elems[primitive_index].static_depth_tmem);
|
||||
uint static_state_index = states.x;
|
||||
uint tmem_instance_index = states.z;
|
||||
|
||||
StaticRasterizationState static_state = load_static_rasterization_state(static_state_index);
|
||||
uint static_state_flags = static_state.flags;
|
||||
int static_state_dither = static_state.dither;
|
||||
u8x4 combiner_inputs_rgb0 = static_state.combiner_inputs_rgb0;
|
||||
u8x4 combiner_inputs_alpha0 = static_state.combiner_inputs_alpha0;
|
||||
u8x4 combiner_inputs_rgb1 = static_state.combiner_inputs_rgb1;
|
||||
u8x4 combiner_inputs_alpha1 = static_state.combiner_inputs_alpha1;
|
||||
|
||||
#ifdef RASTERIZER_SPEC_CONSTANT
|
||||
if ((STATIC_STATE_FLAGS & RASTERIZATION_USE_SPECIALIZATION_CONSTANT_BIT) != 0)
|
||||
{
|
||||
static_state_flags = STATIC_STATE_FLAGS;
|
||||
static_state_dither = DITHER;
|
||||
|
||||
combiner_inputs_rgb0.x = u8(COMBINER_INPUT_RGB0_MULADD);
|
||||
combiner_inputs_rgb0.y = u8(COMBINER_INPUT_RGB0_MULSUB);
|
||||
combiner_inputs_rgb0.z = u8(COMBINER_INPUT_RGB0_MUL);
|
||||
combiner_inputs_rgb0.w = u8(COMBINER_INPUT_RGB0_ADD);
|
||||
|
||||
combiner_inputs_alpha0.x = u8(COMBINER_INPUT_ALPHA0_MULADD);
|
||||
combiner_inputs_alpha0.y = u8(COMBINER_INPUT_ALPHA0_MULSUB);
|
||||
combiner_inputs_alpha0.z = u8(COMBINER_INPUT_ALPHA0_MUL);
|
||||
combiner_inputs_alpha0.w = u8(COMBINER_INPUT_ALPHA0_ADD);
|
||||
|
||||
combiner_inputs_rgb1.x = u8(COMBINER_INPUT_RGB1_MULADD);
|
||||
combiner_inputs_rgb1.y = u8(COMBINER_INPUT_RGB1_MULSUB);
|
||||
combiner_inputs_rgb1.z = u8(COMBINER_INPUT_RGB1_MUL);
|
||||
combiner_inputs_rgb1.w = u8(COMBINER_INPUT_RGB1_ADD);
|
||||
|
||||
combiner_inputs_alpha1.x = u8(COMBINER_INPUT_ALPHA1_MULADD);
|
||||
combiner_inputs_alpha1.y = u8(COMBINER_INPUT_ALPHA1_MULSUB);
|
||||
combiner_inputs_alpha1.z = u8(COMBINER_INPUT_ALPHA1_MUL);
|
||||
combiner_inputs_alpha1.w = u8(COMBINER_INPUT_ALPHA1_ADD);
|
||||
}
|
||||
#endif
|
||||
|
||||
// This is a great case for specialization constants.
|
||||
bool tlut = (static_state_flags & RASTERIZATION_TLUT_BIT) != 0;
|
||||
bool tlut_type = (static_state_flags & RASTERIZATION_TLUT_TYPE_BIT) != 0;
|
||||
bool sample_quad = (static_state_flags & RASTERIZATION_SAMPLE_MODE_BIT) != 0;
|
||||
bool cvg_times_alpha = (static_state_flags & RASTERIZATION_CVG_TIMES_ALPHA_BIT) != 0;
|
||||
bool alpha_cvg_select = (static_state_flags & RASTERIZATION_ALPHA_CVG_SELECT_BIT) != 0;
|
||||
bool perspective = (static_state_flags & RASTERIZATION_PERSPECTIVE_CORRECT_BIT) != 0;
|
||||
bool tex_lod_en = (static_state_flags & RASTERIZATION_TEX_LOD_ENABLE_BIT) != 0;
|
||||
bool sharpen_lod_en = (static_state_flags & RASTERIZATION_SHARPEN_LOD_ENABLE_BIT) != 0;
|
||||
bool detail_lod_en = (static_state_flags & RASTERIZATION_DETAIL_LOD_ENABLE_BIT) != 0;
|
||||
bool aa_enable = (static_state_flags & RASTERIZATION_AA_BIT) != 0;
|
||||
bool multi_cycle = (static_state_flags & RASTERIZATION_MULTI_CYCLE_BIT) != 0;
|
||||
bool interlace_en = (static_state_flags & RASTERIZATION_INTERLACE_FIELD_BIT) != 0;
|
||||
bool fill_en = (static_state_flags & RASTERIZATION_FILL_BIT) != 0;
|
||||
bool copy_en = (static_state_flags & RASTERIZATION_COPY_BIT) != 0;
|
||||
bool alpha_test = (static_state_flags & RASTERIZATION_ALPHA_TEST_BIT) != 0;
|
||||
bool alpha_test_dither = (static_state_flags & RASTERIZATION_ALPHA_TEST_DITHER_BIT) != 0;
|
||||
bool mid_texel = (static_state_flags & RASTERIZATION_SAMPLE_MID_TEXEL_BIT) != 0;
|
||||
bool uses_texel0 = (static_state_flags & RASTERIZATION_USES_TEXEL0_BIT) != 0;
|
||||
bool uses_texel1 = (static_state_flags & RASTERIZATION_USES_TEXEL1_BIT) != 0;
|
||||
bool uses_pipelined_texel1 = (static_state_flags & RASTERIZATION_USES_PIPELINED_TEXEL1_BIT) != 0;
|
||||
bool uses_lod = (static_state_flags & RASTERIZATION_USES_LOD_BIT) != 0;
|
||||
bool convert_one = (static_state_flags & RASTERIZATION_CONVERT_ONE_BIT) != 0;
|
||||
bool bilerp0 = (static_state_flags & RASTERIZATION_BILERP_0_BIT) != 0;
|
||||
bool bilerp1 = (static_state_flags & RASTERIZATION_BILERP_1_BIT) != 0;
|
||||
|
||||
if ((static_state_flags & RASTERIZATION_NEED_NOISE_BIT) != 0)
|
||||
reseed_noise(x, y, primitive_index + global_constants.fb_info.base_primitive_index);
|
||||
|
||||
bool flip = (setup_flags & TRIANGLE_SETUP_FLIP_BIT) != 0;
|
||||
|
||||
if (copy_en)
|
||||
{
|
||||
bool valid = x >= span_setup.start_x && x <= span_setup.end_x;
|
||||
if (!valid)
|
||||
return false;
|
||||
|
||||
ivec2 st;
|
||||
int s_offset;
|
||||
interpolate_st_copy(span_setup, attr.dstzw_dx, x, perspective, flip, st, s_offset);
|
||||
|
||||
uint tile0 = uint(setup_tile) & 7u;
|
||||
uint tile_info_index0 = uint(state_indices.elems[primitive_index].tile_infos[tile0]);
|
||||
TileInfo tile_info0 = load_tile_info(tile_info_index0);
|
||||
#ifdef RASTERIZER_SPEC_CONSTANT
|
||||
if ((STATIC_STATE_FLAGS & RASTERIZATION_USE_STATIC_TEXTURE_SIZE_FORMAT_BIT) != 0)
|
||||
{
|
||||
tile_info0.fmt = u8(TEX_FMT);
|
||||
tile_info0.size = u8(TEX_SIZE);
|
||||
}
|
||||
#endif
|
||||
int texel0 = sample_texture_copy(tile_info0, tmem_instance_index, st, s_offset, tlut, tlut_type);
|
||||
shaded.z_dith = texel0;
|
||||
shaded.coverage_count = U8_C(COVERAGE_COPY_BIT);
|
||||
|
||||
if (alpha_test && global_constants.fb_info.fb_size == 2 && (texel0 & 1) == 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
else if (fill_en)
|
||||
{
|
||||
shaded.coverage_count = U8_C(COVERAGE_FILL_BIT);
|
||||
return x >= span_setup.start_x && x <= span_setup.end_x;
|
||||
}
|
||||
|
||||
int coverage = compute_coverage(span_setup.xleft, span_setup.xright, x);
|
||||
|
||||
// There is no way we can gain coverage here.
|
||||
// Reject work as fast as possible.
|
||||
if (coverage == 0)
|
||||
return false;
|
||||
|
||||
int coverage_count = bitCount(coverage);
|
||||
|
||||
// If we're not using AA, only the first coverage bit is relevant.
|
||||
if (!aa_enable && (coverage & 1) == 0)
|
||||
return false;
|
||||
|
||||
DerivedSetup derived = load_derived_setup(primitive_index);
|
||||
|
||||
int dx = x - span_setup.interpolation_base_x;
|
||||
int interpolation_direction = flip ? 1 : -1;
|
||||
|
||||
// Interpolate attributes.
|
||||
u8x4 shade = interpolate_rgba(span_setup.rgba, attr.drgba_dx, attr.drgba_dy,
|
||||
dx, coverage);
|
||||
|
||||
ivec2 st, st_dx, st_dy;
|
||||
int z;
|
||||
bool perspective_overflow = false;
|
||||
|
||||
int tex_interpolation_direction = interpolation_direction;
|
||||
if (SCALING_FACTOR > 1 && uses_lod)
|
||||
if ((setup_flags & TRIANGLE_SETUP_NATIVE_LOD_BIT) != 0)
|
||||
tex_interpolation_direction *= SCALING_FACTOR;
|
||||
|
||||
interpolate_stz(span_setup.stzw, attr.dstzw_dx, attr.dstzw_dy, dx, coverage, perspective, uses_lod,
|
||||
tex_interpolation_direction, st, st_dx, st_dy, z, perspective_overflow);
|
||||
|
||||
// Sample textures.
|
||||
uint tile0 = uint(setup_tile) & 7u;
|
||||
uint tile1 = (tile0 + 1) & 7u;
|
||||
uint max_level = uint(setup_tile) >> 3u;
|
||||
int min_lod = derived.min_lod;
|
||||
|
||||
i16 lod_frac;
|
||||
if (uses_lod)
|
||||
{
|
||||
compute_lod_2cycle(tile0, tile1, lod_frac, max_level, min_lod, st, st_dx, st_dy, perspective_overflow,
|
||||
tex_lod_en, sharpen_lod_en, detail_lod_en);
|
||||
}
|
||||
|
||||
i16x4 texel0, texel1;
|
||||
|
||||
if (uses_texel0)
|
||||
{
|
||||
uint tile_info_index0 = uint(state_indices.elems[primitive_index].tile_infos[tile0]);
|
||||
TileInfo tile_info0 = load_tile_info(tile_info_index0);
|
||||
#ifdef RASTERIZER_SPEC_CONSTANT
|
||||
if ((STATIC_STATE_FLAGS & RASTERIZATION_USE_STATIC_TEXTURE_SIZE_FORMAT_BIT) != 0)
|
||||
{
|
||||
tile_info0.fmt = u8(TEX_FMT);
|
||||
tile_info0.size = u8(TEX_SIZE);
|
||||
}
|
||||
#endif
|
||||
texel0 = sample_texture(tile_info0, tmem_instance_index, st, tlut, tlut_type, sample_quad, mid_texel, false, i16x4(0));
|
||||
if (!sample_quad && !bilerp0)
|
||||
texel0 = texture_convert_factors(texel0, derived.factors);
|
||||
}
|
||||
|
||||
// A very awkward mechanism where we peek into the next pixel, or in some cases, the next scanline's first pixel.
|
||||
if (uses_pipelined_texel1)
|
||||
{
|
||||
bool valid_line = uint(span_setups.elems[SCALING_FACTOR * span_offsets.offset + (y - SCALING_FACTOR * span_offsets.ylo + 1)].valid_line) != 0u;
|
||||
bool long_span = span_setup.lodlength >= 8;
|
||||
bool end_span = x == (flip ? span_setup.end_x : span_setup.start_x);
|
||||
|
||||
if (end_span && long_span && valid_line)
|
||||
{
|
||||
ivec3 stw = span_setups.elems[SCALING_FACTOR * span_offsets.offset + (y - SCALING_FACTOR * span_offsets.ylo + 1)].stzw.xyw >> 16;
|
||||
if (perspective)
|
||||
{
|
||||
bool st_overflow;
|
||||
st = perspective_divide(stw, st_overflow);
|
||||
}
|
||||
else
|
||||
st = no_perspective_divide(stw);
|
||||
}
|
||||
else
|
||||
st = interpolate_st_single(span_setup.stzw, attr.dstzw_dx, dx + interpolation_direction * SCALING_FACTOR, perspective);
|
||||
|
||||
tile1 = tile0;
|
||||
uses_texel1 = true;
|
||||
}
|
||||
|
||||
if (uses_texel1)
|
||||
{
|
||||
if (convert_one && !bilerp1)
|
||||
{
|
||||
texel1 = texture_convert_factors(texel0, derived.factors);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint tile_info_index1 = uint(state_indices.elems[primitive_index].tile_infos[tile1]);
|
||||
TileInfo tile_info1 = load_tile_info(tile_info_index1);
|
||||
#ifdef RASTERIZER_SPEC_CONSTANT
|
||||
if ((STATIC_STATE_FLAGS & RASTERIZATION_USE_STATIC_TEXTURE_SIZE_FORMAT_BIT) != 0)
|
||||
{
|
||||
tile_info1.fmt = u8(TEX_FMT);
|
||||
tile_info1.size = u8(TEX_SIZE);
|
||||
}
|
||||
#endif
|
||||
texel1 = sample_texture(tile_info1, tmem_instance_index, st, tlut, tlut_type, sample_quad, mid_texel,
|
||||
convert_one, texel0);
|
||||
|
||||
if (!sample_quad && !tlut && !bilerp1)
|
||||
texel1 = texture_convert_factors(texel1, derived.factors);
|
||||
}
|
||||
}
|
||||
|
||||
int rgb_dith, alpha_dith;
|
||||
dither_coefficients(x, y >> int(interlace_en), static_state_dither >> 2, static_state_dither & 3, rgb_dith, alpha_dith);
|
||||
|
||||
// Run combiner.
|
||||
u8x4 combined;
|
||||
u8 alpha_reference;
|
||||
if (multi_cycle)
|
||||
{
|
||||
CombinerInputs combined_inputs =
|
||||
CombinerInputs(derived.constant_muladd0, derived.constant_mulsub0, derived.constant_mul0, derived.constant_add0,
|
||||
shade, u8x4(0), texel0, texel1, lod_frac, noise_get_combiner());
|
||||
|
||||
combined_inputs.combined = combiner_cycle0(combined_inputs,
|
||||
combiner_inputs_rgb0,
|
||||
combiner_inputs_alpha0,
|
||||
alpha_dith, coverage_count, cvg_times_alpha, alpha_cvg_select,
|
||||
alpha_test, alpha_reference);
|
||||
|
||||
combined_inputs.constant_muladd = derived.constant_muladd1;
|
||||
combined_inputs.constant_mulsub = derived.constant_mulsub1;
|
||||
combined_inputs.constant_mul = derived.constant_mul1;
|
||||
combined_inputs.constant_add = derived.constant_add1;
|
||||
|
||||
// Pipelining, texel1 is promoted to texel0 in cycle1.
|
||||
// I don't think hardware ever intended for you to access texels in second cycle due to this nature.
|
||||
i16x4 tmp_texel = combined_inputs.texel0;
|
||||
combined_inputs.texel0 = combined_inputs.texel1;
|
||||
// Following the pipelining, texel1 should become texel0 of next pixel,
|
||||
// but let's not go there ...
|
||||
combined_inputs.texel1 = tmp_texel;
|
||||
|
||||
combined = u8x4(combiner_cycle1(combined_inputs,
|
||||
combiner_inputs_rgb1,
|
||||
combiner_inputs_alpha1,
|
||||
alpha_dith, coverage_count, cvg_times_alpha, alpha_cvg_select));
|
||||
}
|
||||
else
|
||||
{
|
||||
CombinerInputs combined_inputs =
|
||||
CombinerInputs(derived.constant_muladd1, derived.constant_mulsub1, derived.constant_mul1, derived.constant_add1,
|
||||
shade, u8x4(0), texel0, texel1, lod_frac, noise_get_combiner());
|
||||
|
||||
combined = u8x4(combiner_cycle1(combined_inputs,
|
||||
combiner_inputs_rgb1,
|
||||
combiner_inputs_alpha1,
|
||||
alpha_dith, coverage_count, cvg_times_alpha, alpha_cvg_select));
|
||||
|
||||
alpha_reference = combined.a;
|
||||
}
|
||||
|
||||
// After combiner, color can be modified to 0 through alpha-to-cvg, so check for potential write_enable here.
|
||||
// If we're not using AA, the first coverage bit is used instead, coverage count is ignored.
|
||||
if (aa_enable && coverage_count == 0)
|
||||
return false;
|
||||
|
||||
if (alpha_test)
|
||||
{
|
||||
u8 alpha_threshold;
|
||||
if (alpha_test_dither)
|
||||
alpha_threshold = noise_get_blend_threshold();
|
||||
else
|
||||
alpha_threshold = derived.blend_color.a;
|
||||
|
||||
if (alpha_reference < alpha_threshold)
|
||||
return false;
|
||||
}
|
||||
|
||||
shaded.combined = combined;
|
||||
shaded.z_dith = (z << 9) | rgb_dith;
|
||||
shaded.coverage_count = u8(coverage_count);
|
||||
// Shade alpha needs to be passed separately since it might affect the blending stage.
|
||||
shaded.shade_alpha = u8(min(shade.a + alpha_dith, 0xff));
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,126 @@
|
|||
{
|
||||
"include": [ "../../Granite/assets/shaders/inc" ],
|
||||
"shaders": [
|
||||
{
|
||||
"name": "tmem_update",
|
||||
"compute": true,
|
||||
"path": "tmem_update.comp"
|
||||
},
|
||||
{
|
||||
|
||||
"name": "span_setup",
|
||||
"compute": true,
|
||||
"path": "span_setup.comp"
|
||||
},
|
||||
{
|
||||
"name": "clear_indirect_buffer",
|
||||
"compute": true,
|
||||
"path": "clear_indirect_buffer.comp"
|
||||
},
|
||||
{
|
||||
"name": "tile_binning_combined",
|
||||
"compute": true,
|
||||
"path": "tile_binning_combined.comp",
|
||||
"variants": [
|
||||
{ "define": "SUBGROUP", "count": 2, "resolve": true },
|
||||
{ "define": "UBERSHADER", "count": 2, "resolve": true },
|
||||
{ "define": "SMALL_TYPES", "count": 2, "resolve": true }
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "ubershader",
|
||||
"path": "ubershader.comp",
|
||||
"compute": true,
|
||||
"variants": [
|
||||
{ "define": "SUBGROUP", "count": 2, "resolve": true },
|
||||
{ "define": "SMALL_TYPES", "count": 2, "resolve": true }
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "depth_blend",
|
||||
"path": "depth_blend.comp",
|
||||
"compute": true,
|
||||
"variants": [
|
||||
{ "define": "SUBGROUP", "count": 2, "resolve": true },
|
||||
{ "define": "SMALL_TYPES", "count": 2, "resolve": true }
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "rasterizer",
|
||||
"path": "rasterizer.comp",
|
||||
"compute": true,
|
||||
"variants": [
|
||||
{ "define": "SMALL_TYPES", "count": 2, "resolve": true }
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "fullscreen",
|
||||
"path": "fullscreen.vert"
|
||||
},
|
||||
{
|
||||
"name": "vi_scale",
|
||||
"path": "vi_scale.frag"
|
||||
},
|
||||
{
|
||||
"name": "vi_divot",
|
||||
"path": "vi_divot.frag",
|
||||
"variants": [
|
||||
{ "define": "FETCH_BUG", "count": 2 }
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "vi_fetch",
|
||||
"path": "vi_fetch.frag",
|
||||
"variants": [
|
||||
{ "define": "FETCH_BUG", "count": 2 }
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "vi_blend_fields",
|
||||
"path": "vi_blend_fields.frag"
|
||||
},
|
||||
{
|
||||
"name": "extract_vram",
|
||||
"path": "extract_vram.comp",
|
||||
"compute": true
|
||||
},
|
||||
{
|
||||
"name": "masked_rdram_resolve",
|
||||
"path": "masked_rdram_resolve.comp",
|
||||
"compute": true
|
||||
},
|
||||
{
|
||||
"name": "clear_write_mask",
|
||||
"path": "clear_write_mask.comp",
|
||||
"compute": true
|
||||
},
|
||||
{
|
||||
"name": "update_upscaled_domain_post",
|
||||
"path": "update_upscaled_domain_post.comp",
|
||||
"compute": true
|
||||
},
|
||||
{
|
||||
"name": "update_upscaled_domain_pre",
|
||||
"path": "update_upscaled_domain_pre.comp",
|
||||
"compute": true
|
||||
},
|
||||
{
|
||||
"name": "update_upscaled_domain_resolve",
|
||||
"path": "update_upscaled_domain_resolve.comp",
|
||||
"compute": true
|
||||
},
|
||||
{
|
||||
"name": "clear_super_sampled_write_mask",
|
||||
"path": "clear_super_sampled_write_mask.comp",
|
||||
"compute": true
|
||||
},
|
||||
{
|
||||
"name": "vi_deinterlace_vert",
|
||||
"path": "vi_deinterlace.vert"
|
||||
},
|
||||
{
|
||||
"name": "vi_deinterlace_frag",
|
||||
"path": "vi_deinterlace.frag"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,121 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// Utility header to smooth over the difference between
|
||||
// 8/16-bit integer arithmetic vs. just 8/16-bit storage.
|
||||
|
||||
#ifndef SMALL_INTEGERS_H_
|
||||
#define SMALL_INTEGERS_H_
|
||||
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
#extension GL_EXT_shader_8bit_storage : require
|
||||
|
||||
#if SMALL_TYPES
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||
|
||||
#define mem_u8 uint8_t
|
||||
#define mem_u16 uint16_t
|
||||
#define mem_u8x2 u8vec2
|
||||
#define mem_u16x2 u16vec2
|
||||
#define mem_u8x3 u8vec3
|
||||
#define mem_u16x3 u16vec3
|
||||
#define mem_u8x4 u8vec4
|
||||
#define mem_u16x4 u16vec4
|
||||
|
||||
#define mem_i8 int8_t
|
||||
#define mem_i16 int16_t
|
||||
#define mem_i8x2 i8vec2
|
||||
#define mem_i16x2 i16vec2
|
||||
#define mem_i8x3 i8vec3
|
||||
#define mem_i16x3 i16vec3
|
||||
#define mem_i8x4 i8vec4
|
||||
#define mem_i16x4 i16vec4
|
||||
|
||||
#define u8 uint8_t
|
||||
#define u16 uint16_t
|
||||
#define u8x2 u8vec2
|
||||
#define u16x2 u16vec2
|
||||
#define u8x3 u8vec3
|
||||
#define u16x3 u16vec3
|
||||
#define u8x4 u8vec4
|
||||
#define u16x4 u16vec4
|
||||
|
||||
#define i8 int8_t
|
||||
#define i16 int16_t
|
||||
#define i8x2 i8vec2
|
||||
#define i16x2 i16vec2
|
||||
#define i8x3 i8vec3
|
||||
#define i16x3 i16vec3
|
||||
#define i8x4 i8vec4
|
||||
#define i16x4 i16vec4
|
||||
|
||||
#define U8_C(x) uint8_t(x)
|
||||
#define I8_C(x) int8_t(x)
|
||||
#define U16_C(x) uint16_t(x)
|
||||
#define I16_C(x) int16_t(x)
|
||||
|
||||
#else
|
||||
|
||||
#define mem_u8 uint8_t
|
||||
#define mem_u16 uint16_t
|
||||
#define mem_u8x2 u8vec2
|
||||
#define mem_u16x2 u16vec2
|
||||
#define mem_u8x3 u8vec3
|
||||
#define mem_u16x3 u16vec3
|
||||
#define mem_u8x4 u8vec4
|
||||
#define mem_u16x4 u16vec4
|
||||
|
||||
#define mem_i8 int8_t
|
||||
#define mem_i16 int16_t
|
||||
#define mem_i8x2 i8vec2
|
||||
#define mem_i16x2 i16vec2
|
||||
#define mem_i8x3 i8vec3
|
||||
#define mem_i16x3 i16vec3
|
||||
#define mem_i8x4 i8vec4
|
||||
#define mem_i16x4 i16vec4
|
||||
|
||||
#define u8 int
|
||||
#define u16 int
|
||||
#define u8x2 ivec2
|
||||
#define u16x2 ivec2
|
||||
#define u8x3 ivec3
|
||||
#define u16x3 ivec3
|
||||
#define u8x4 ivec4
|
||||
#define u16x4 ivec4
|
||||
|
||||
#define i8 int
|
||||
#define i16 int
|
||||
#define i8x2 ivec2
|
||||
#define i16x2 ivec2
|
||||
#define i8x3 ivec3
|
||||
#define i16x3 ivec3
|
||||
#define i8x4 ivec4
|
||||
#define i16x4 ivec4
|
||||
|
||||
#define U8_C(x) int(x)
|
||||
#define I8_C(x) int(x)
|
||||
#define U16_C(x) int(x)
|
||||
#define I16_C(x) int(x)
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,227 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include "small_types.h"
|
||||
#include "debug.h"
|
||||
|
||||
layout(local_size_x_id = 0) in;
|
||||
layout(constant_id = 1) const int SCALING_LOG2 = 0;
|
||||
const int SCALING_FACTOR = 1 << SCALING_LOG2;
|
||||
#include "data_structures.h"
|
||||
|
||||
layout(std430, set = 0, binding = 0) readonly buffer TriangleSetupBuffer
|
||||
{
|
||||
TriangleSetupMem elems[];
|
||||
} triangle_setup;
|
||||
#include "load_triangle_setup.h"
|
||||
|
||||
layout(std430, set = 0, binding = 1) readonly buffer AttributeSetupBuffer
|
||||
{
|
||||
AttributeSetupMem elems[];
|
||||
} attribute_setup;
|
||||
#include "load_attribute_setup.h"
|
||||
|
||||
layout(set = 0, binding = 2, std430) readonly buffer ScissorStateBuffer
|
||||
{
|
||||
ScissorStateMem elems[];
|
||||
} scissor_state;
|
||||
#include "load_scissor_state.h"
|
||||
|
||||
layout(std430, set = 0, binding = 3) writeonly buffer SpanSetups
|
||||
{
|
||||
SpanSetupMem elems[];
|
||||
} span_setups;
|
||||
#include "store_span_setup.h"
|
||||
|
||||
layout(set = 1, binding = 0) uniform utextureBuffer uInterpolationJobs;
|
||||
|
||||
const int SUBPIXELS = 4;
|
||||
const int SUBPIXELS_LOG2 = 2;
|
||||
|
||||
// Convert a 16.16 signed value to 16.3. We have 8 subpixels in X direction after snapping.
|
||||
ivec4 quantize_x(ivec4 x)
|
||||
{
|
||||
ivec4 sticky = ivec4(notEqual(x & 0xfff, ivec4(0)));
|
||||
ivec4 snapped = ivec4((x >> 12) | sticky);
|
||||
return snapped;
|
||||
}
|
||||
|
||||
int min4(ivec4 v)
|
||||
{
|
||||
ivec2 v2 = min(v.xy, v.zw);
|
||||
return min(v2.x, v2.y);
|
||||
}
|
||||
|
||||
int max4(ivec4 v)
|
||||
{
|
||||
ivec2 v2 = max(v.xy, v.zw);
|
||||
return max(v2.x, v2.y);
|
||||
}
|
||||
|
||||
ivec4 interpolate_snapped(ivec4 dvalue, int dy)
|
||||
{
|
||||
int dy_shifted = dy >> SCALING_LOG2;
|
||||
int dy_masked = dy & (SCALING_FACTOR - 1);
|
||||
return dy_shifted * dvalue + dy_masked * (dvalue >> SCALING_LOG2);
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec3 job_indices = ivec3(texelFetch(uInterpolationJobs, int(gl_WorkGroupID.x)).xyz);
|
||||
int primitive_index = job_indices.x;
|
||||
int base_y = job_indices.y * SCALING_FACTOR;
|
||||
int max_y = job_indices.z * SCALING_FACTOR + (SCALING_FACTOR - 1);
|
||||
int y = base_y + int(gl_LocalInvocationIndex);
|
||||
if (y > max_y)
|
||||
return;
|
||||
|
||||
TriangleSetup setup = load_triangle_setup(primitive_index);
|
||||
AttributeSetup attr = load_attribute_setup(primitive_index);
|
||||
ScissorState scissor = load_scissor_state(primitive_index);
|
||||
|
||||
bool flip = (setup.flags & TRIANGLE_SETUP_FLIP_BIT) != 0;
|
||||
bool interlace_en = (setup.flags & TRIANGLE_SETUP_INTERLACE_FIELD_BIT) != 0;
|
||||
bool keep_odd_field = (setup.flags & TRIANGLE_SETUP_INTERLACE_KEEP_ODD_BIT) != 0;
|
||||
|
||||
SpanSetup span_setup;
|
||||
|
||||
// Interpolate RGBA, STZW to their scanline.
|
||||
{
|
||||
bool do_offset = (setup.flags & TRIANGLE_SETUP_DO_OFFSET_BIT) != 0;
|
||||
bool skip_xfrac = (setup.flags & TRIANGLE_SETUP_SKIP_XFRAC_BIT) != 0;
|
||||
int y_interpolation_base = int(setup.yh) >> 2;
|
||||
y_interpolation_base *= SCALING_FACTOR;
|
||||
|
||||
// For high-resolution interpolation, make sure we snap interpolation correctly at whole pixels,
|
||||
// and quantize derivatives in-between pixels.
|
||||
int dy = y - y_interpolation_base;
|
||||
|
||||
int xh = setup.xh * SCALING_FACTOR + dy * (setup.dxhdy << 2);
|
||||
|
||||
ivec4 drgba_diff = ivec4(0);
|
||||
ivec4 dstzw_diff = ivec4(0);
|
||||
|
||||
// In do_offset mode, varyings are latched at last subpixel line instead of first (for some reason).
|
||||
if (do_offset)
|
||||
{
|
||||
xh += (SCALING_FACTOR * 3) * setup.dxhdy;
|
||||
|
||||
ivec4 drgba_deh = attr.drgba_de & ~0x1ff;
|
||||
ivec4 drgba_dyh = attr.drgba_dy & ~0x1ff;
|
||||
drgba_diff = drgba_deh - (drgba_deh >> 2) - drgba_dyh + (drgba_dyh >> 2);
|
||||
|
||||
ivec4 dstzw_deh = attr.dstzw_de & ~0x1ff;
|
||||
ivec4 dstzw_dyh = attr.dstzw_dy & ~0x1ff;
|
||||
dstzw_diff = dstzw_deh - (dstzw_deh >> 2) - dstzw_dyh + (dstzw_dyh >> 2);
|
||||
}
|
||||
|
||||
int base_x = xh >> 15;
|
||||
int xfrac = skip_xfrac ? 0 : ((xh >> 7) & 0xff);
|
||||
|
||||
ivec4 rgba = attr.rgba + interpolate_snapped(attr.drgba_de, dy);
|
||||
rgba = ((rgba & ~0x1ff) + drgba_diff - interpolate_snapped((attr.drgba_dx >> 8) & ~1, xfrac)) & ~0x3ff;
|
||||
|
||||
ivec4 stzw = attr.stzw + interpolate_snapped(attr.dstzw_de, dy);
|
||||
stzw = ((stzw & ~0x1ff) + dstzw_diff - interpolate_snapped((attr.dstzw_dx >> 8) & ~1, xfrac)) & ~0x3ff;
|
||||
|
||||
span_setup.rgba = rgba;
|
||||
span_setup.stzw = stzw;
|
||||
span_setup.interpolation_base_x = base_x;
|
||||
}
|
||||
|
||||
// Check Y dimension.
|
||||
int yh_interpolation_base = int(setup.yh) & ~(SUBPIXELS - 1);
|
||||
int ym_interpolation_base = int(setup.ym);
|
||||
yh_interpolation_base *= SCALING_FACTOR;
|
||||
ym_interpolation_base *= SCALING_FACTOR;
|
||||
|
||||
int y_sub = int(y * SUBPIXELS);
|
||||
ivec4 y_subs = y_sub + ivec4(0, 1, 2, 3);
|
||||
int ylo = max(setup.yh, scissor.ylo) * SCALING_FACTOR;
|
||||
int yhi = min(setup.yl, scissor.yhi) * SCALING_FACTOR;
|
||||
|
||||
bvec4 clip_lo_y = lessThan(y_subs, ivec4(ylo));
|
||||
bvec4 clip_hi_y = greaterThanEqual(y_subs, ivec4(yhi));
|
||||
uvec4 clip_y = uvec4(clip_lo_y) | uvec4(clip_hi_y);
|
||||
|
||||
// Interpolate X at all 4 Y-subpixels.
|
||||
ivec4 xh = setup.xh * SCALING_FACTOR + (y_subs - yh_interpolation_base) * setup.dxhdy;
|
||||
ivec4 xm = setup.xm * SCALING_FACTOR + (y_subs - yh_interpolation_base) * setup.dxmdy;
|
||||
ivec4 xl = setup.xl * SCALING_FACTOR + (y_subs - ym_interpolation_base) * setup.dxldy;
|
||||
xl = mix(xl, xm, lessThan(y_subs, ivec4(SCALING_FACTOR * setup.ym)));
|
||||
|
||||
// If we have overflows, we can become sensitive to this in invalid_line check, where
|
||||
// checks that should pass fail, and vice versa.
|
||||
// Note that we shaved off one bit in triangle setup for upscaling purposes,
|
||||
// so this should be 28 bits normally.
|
||||
xl = bitfieldExtract(xl, 0, 27 + SCALING_LOG2);
|
||||
xh = bitfieldExtract(xh, 0, 27 + SCALING_LOG2);
|
||||
|
||||
ivec4 xh_shifted = quantize_x(xh);
|
||||
ivec4 xl_shifted = quantize_x(xl);
|
||||
|
||||
ivec4 xleft, xright;
|
||||
if (flip)
|
||||
{
|
||||
xleft = xh_shifted;
|
||||
xright = xl_shifted;
|
||||
}
|
||||
else
|
||||
{
|
||||
xleft = xl_shifted;
|
||||
xright = xh_shifted;
|
||||
}
|
||||
|
||||
bvec4 invalid_line = greaterThan(xleft >> 1, xright >> 1);
|
||||
|
||||
ivec4 lo_scissor = ivec4(SCALING_FACTOR * (scissor.xlo << 1));
|
||||
ivec4 hi_scissor = ivec4(SCALING_FACTOR * (scissor.xhi << 1));
|
||||
|
||||
bool all_over = all(greaterThanEqual(min(xleft, xright), hi_scissor));
|
||||
bool all_under = all(lessThan(max(xleft, xright), lo_scissor));
|
||||
|
||||
xleft = max(xleft, lo_scissor);
|
||||
xleft = min(xleft, hi_scissor);
|
||||
xright = max(xright, lo_scissor);
|
||||
xright = min(xright, hi_scissor);
|
||||
|
||||
invalid_line = bvec4(uvec4(invalid_line) | clip_y);
|
||||
|
||||
xleft = mix(xleft, ivec4(0xffff), invalid_line);
|
||||
xright = mix(xright, ivec4(0), invalid_line);
|
||||
|
||||
int start_x = min4(xleft) >> 3;
|
||||
int end_x = max4(xright) >> 3;
|
||||
|
||||
span_setup.xleft = xleft;
|
||||
span_setup.xright = xright;
|
||||
span_setup.start_x = start_x;
|
||||
span_setup.end_x = end_x;
|
||||
span_setup.valid_line = int(!all(invalid_line) && !all_over && !all_under);
|
||||
|
||||
if (interlace_en)
|
||||
if (((y >> SCALING_LOG2) & 1) != int(keep_odd_field))
|
||||
span_setup.valid_line = U16_C(0);
|
||||
|
||||
span_setup.lodlength = int(flip ? (end_x - span_setup.interpolation_base_x) : (span_setup.interpolation_base_x - start_x));
|
||||
store_span_setup(gl_GlobalInvocationID.x, span_setup);
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef STORE_SPAN_SETUP_H_
|
||||
#define STORE_SPAN_SETUP_H_
|
||||
|
||||
void store_span_setup(uint index, SpanSetup setup)
|
||||
{
|
||||
#if SMALL_TYPES
|
||||
span_setups.elems[index] = setup;
|
||||
#else
|
||||
span_setups.elems[index].rgba = setup.rgba;
|
||||
span_setups.elems[index].stzw = setup.stzw;
|
||||
span_setups.elems[index].xleft = mem_u16x4(uvec4(setup.xleft));
|
||||
span_setups.elems[index].xright = mem_u16x4(uvec4(setup.xright));
|
||||
span_setups.elems[index].interpolation_base_x = setup.interpolation_base_x;
|
||||
span_setups.elems[index].start_x = setup.start_x;
|
||||
span_setups.elems[index].end_x = setup.end_x;
|
||||
span_setups.elems[index].lodlength = mem_i16(setup.lodlength);
|
||||
span_setups.elems[index].valid_line = mem_u16(setup.valid_line);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,905 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef TEXTURE_H_
|
||||
#define TEXTURE_H_
|
||||
|
||||
#include "data_structures.h"
|
||||
|
||||
const int TEXTURE_FORMAT_RGBA = 0;
|
||||
const int TEXTURE_FORMAT_YUV = 1;
|
||||
const int TEXTURE_FORMAT_CI = 2;
|
||||
const int TEXTURE_FORMAT_IA = 3;
|
||||
const int TEXTURE_FORMAT_I = 4;
|
||||
|
||||
int texel_mask_s(TileInfo tile, int s)
|
||||
{
|
||||
if (tile.mask_s != 0)
|
||||
{
|
||||
int mask = 1 << tile.mask_s;
|
||||
if ((tile.flags & TILE_INFO_MIRROR_S_BIT) != 0)
|
||||
s ^= max((s & mask) - 1, 0);
|
||||
s &= mask - 1;
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
ivec2 texel_mask_s_copy(TileInfo tile, int s)
|
||||
{
|
||||
ivec2 multi_s = s + ivec2(0, 1);
|
||||
|
||||
if (tile.mask_s != 0)
|
||||
{
|
||||
int mask = 1 << tile.mask_s;
|
||||
if ((tile.flags & TILE_INFO_MIRROR_S_BIT) != 0)
|
||||
multi_s ^= max((multi_s & mask) - 1, 0);
|
||||
multi_s &= mask - 1;
|
||||
}
|
||||
|
||||
return multi_s;
|
||||
}
|
||||
|
||||
int texel_mask_t(TileInfo tile, int t)
|
||||
{
|
||||
if (tile.mask_t != 0)
|
||||
{
|
||||
int mask = 1 << tile.mask_t;
|
||||
if ((tile.flags & TILE_INFO_MIRROR_T_BIT) != 0)
|
||||
t ^= max((t & mask) - 1, 0);
|
||||
t &= mask - 1;
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
i16x4 convert_rgba16(uint word)
|
||||
{
|
||||
uvec3 rgb = (uvec3(word) >> uvec3(11, 6, 1)) & 31u;
|
||||
rgb = (rgb << 3u) | (rgb >> 2u);
|
||||
uint alpha = (word & 1u) * 0xffu;
|
||||
return i16x4(rgb, alpha);
|
||||
}
|
||||
|
||||
i16x4 convert_ia16(uint word)
|
||||
{
|
||||
uint intensity = word >> 8;
|
||||
uint alpha = word & 0xff;
|
||||
return i16x4(intensity, intensity, intensity, alpha);
|
||||
}
|
||||
|
||||
i16x4 sample_texel_rgba4(TileInfo tile, uint tmem_instance, uvec2 st)
|
||||
{
|
||||
uint byte_offset = tile.offset + tile.stride * st.y;
|
||||
byte_offset += st.x >> 1;
|
||||
byte_offset &= 0xfff;
|
||||
|
||||
uint shift = (~st.x & 1) * 4;
|
||||
|
||||
uint index = byte_offset;
|
||||
index ^= (st.y & 1) << 2;
|
||||
index ^= 3;
|
||||
|
||||
uint word = uint(tmem8.instances[tmem_instance].elems[index]);
|
||||
word = (word >> shift) & 0xf;
|
||||
word |= word << 4;
|
||||
return i16x4(word);
|
||||
}
|
||||
|
||||
i16x4 sample_texel_ia4(TileInfo tile, uint tmem_instance, uvec2 st)
|
||||
{
|
||||
uint byte_offset = tile.offset + tile.stride * st.y;
|
||||
byte_offset += st.x >> 1;
|
||||
byte_offset &= 0xfff;
|
||||
|
||||
uint shift = (~st.x & 1) * 4;
|
||||
|
||||
uint index = byte_offset;
|
||||
index ^= (st.y & 1) << 2;
|
||||
index ^= 3;
|
||||
|
||||
uint word = uint(tmem8.instances[tmem_instance].elems[index]);
|
||||
word = (word >> shift) & 0xf;
|
||||
|
||||
uint intensity = word & 0xe;
|
||||
intensity = (intensity << 4) | (intensity << 1) | (intensity >> 2);
|
||||
return i16x4(intensity, intensity, intensity, (word & 1) * 0xff);
|
||||
}
|
||||
|
||||
i16x4 sample_texel_ci4(TileInfo tile, uint tmem_instance, uvec2 st, uint pal)
|
||||
{
|
||||
uint byte_offset = tile.offset + tile.stride * st.y;
|
||||
byte_offset += st.x >> 1;
|
||||
byte_offset &= 0xfff;
|
||||
|
||||
uint shift = (~st.x & 1) * 4;
|
||||
|
||||
uint index = byte_offset;
|
||||
index ^= (st.y & 1) << 2;
|
||||
index ^= 3;
|
||||
|
||||
uint word = uint(tmem8.instances[tmem_instance].elems[index]);
|
||||
word = (word >> shift) & 0xf;
|
||||
word |= pal << 4;
|
||||
return i16x4(word);
|
||||
}
|
||||
|
||||
i16x4 sample_texel_ci4_tlut(TileInfo tile, uint tmem_instance, uvec2 st, uint pal, uint lut_offset, uint addr_xor, bool tlut_type)
|
||||
{
|
||||
uint byte_offset = tile.offset + tile.stride * st.y;
|
||||
byte_offset += st.x >> 1;
|
||||
byte_offset &= 0x7ff;
|
||||
|
||||
uint shift = (~st.x & 1) * 4;
|
||||
|
||||
uint index = byte_offset;
|
||||
index ^= (st.y & 1) << 2;
|
||||
index ^= 3;
|
||||
|
||||
uint word = uint(tmem8.instances[tmem_instance].elems[index]);
|
||||
word = (word >> shift) & 0xf;
|
||||
word |= pal << 4;
|
||||
|
||||
uint lut_entry = (word << 2) + lut_offset;
|
||||
lut_entry ^= addr_xor;
|
||||
|
||||
word = uint(tmem16.instances[tmem_instance].elems[0x400 | lut_entry]);
|
||||
return tlut_type ? convert_ia16(word) : convert_rgba16(word);
|
||||
}
|
||||
|
||||
i16x4 sample_texel_ci8_tlut(TileInfo tile, uint tmem_instance, uvec2 st, uint lut_offset, uint addr_xor, bool tlut_type)
|
||||
{
|
||||
uint byte_offset = tile.offset + tile.stride * st.y;
|
||||
byte_offset += st.x;
|
||||
byte_offset &= 0x7ff;
|
||||
|
||||
uint index = byte_offset;
|
||||
index ^= (st.y & 1) << 2;
|
||||
index ^= 3;
|
||||
|
||||
uint word = uint(tmem8.instances[tmem_instance].elems[index]);
|
||||
uint lut_entry = (word << 2) + lut_offset;
|
||||
lut_entry ^= addr_xor;
|
||||
|
||||
word = uint(tmem16.instances[tmem_instance].elems[0x400 | lut_entry]);
|
||||
return tlut_type ? convert_ia16(word) : convert_rgba16(word);
|
||||
}
|
||||
|
||||
i16x4 sample_texel_ci32(TileInfo tile, uint tmem_instance, uvec2 st)
|
||||
{
|
||||
uint byte_offset = tile.offset + tile.stride * st.y;
|
||||
byte_offset += st.x * 2;
|
||||
byte_offset &= 0xfff;
|
||||
|
||||
uint index = byte_offset >> 1;
|
||||
index ^= (st.y & 1) << 1;
|
||||
index ^= 1;
|
||||
|
||||
uint word = uint(tmem16.instances[tmem_instance].elems[index]);
|
||||
return i16x2(word >> 8, word & 0xff).xyxy;
|
||||
}
|
||||
|
||||
i16x4 sample_texel_ci32_tlut(TileInfo tile, uint tmem_instance, uvec2 st, uint lut_offset, uint addr_xor, bool tlut_type)
|
||||
{
|
||||
uint byte_offset = tile.offset + tile.stride * st.y;
|
||||
byte_offset += st.x * 2;
|
||||
byte_offset &= 0x7ff;
|
||||
|
||||
uint index = byte_offset >> 1;
|
||||
index ^= (st.y & 1) << 1;
|
||||
index ^= 1;
|
||||
|
||||
uint word = uint(tmem16.instances[tmem_instance].elems[index]);
|
||||
uint lut_entry = ((word >> 6) & ~3) + lut_offset;
|
||||
lut_entry ^= addr_xor;
|
||||
word = uint(tmem16.instances[tmem_instance].elems[0x400 | lut_entry]);
|
||||
return tlut_type ? convert_ia16(word) : convert_rgba16(word);
|
||||
}
|
||||
|
||||
i16x4 sample_texel_rgba8(TileInfo tile, uint tmem_instance, uvec2 st)
|
||||
{
|
||||
uint byte_offset = tile.offset + tile.stride * st.y;
|
||||
byte_offset += st.x;
|
||||
byte_offset &= 0xfff;
|
||||
|
||||
uint index = byte_offset;
|
||||
index ^= (st.y & 1) << 2;
|
||||
index ^= 3;
|
||||
|
||||
uint word = uint(tmem8.instances[tmem_instance].elems[index]);
|
||||
return i16x4(word);
|
||||
}
|
||||
|
||||
i16x4 sample_texel_ia8(TileInfo tile, uint tmem_instance, uvec2 st)
|
||||
{
|
||||
uint byte_offset = tile.offset + tile.stride * st.y;
|
||||
byte_offset += st.x;
|
||||
byte_offset &= 0xfff;
|
||||
|
||||
uint index = byte_offset;
|
||||
index ^= (st.y & 1) << 2;
|
||||
index ^= 3;
|
||||
|
||||
uint word = uint(tmem8.instances[tmem_instance].elems[index]);
|
||||
uint intensity = word >> 4;
|
||||
uint alpha = word & 0xf;
|
||||
alpha |= alpha << 4;
|
||||
intensity |= intensity << 4;
|
||||
return i16x4(intensity, intensity, intensity, alpha);
|
||||
}
|
||||
|
||||
i16x4 sample_texel_yuv16(TileInfo tile, uint tmem_instance, uvec2 st, uint chroma_x)
|
||||
{
|
||||
uint byte_offset = tile.offset + tile.stride * st.y;
|
||||
uint byte_offset_luma = byte_offset + st.x;
|
||||
byte_offset_luma &= 0x7ff;
|
||||
|
||||
uint byte_offset_chroma = byte_offset + chroma_x * 2;
|
||||
byte_offset_chroma &= 0x7ff;
|
||||
|
||||
uint index_luma = byte_offset_luma;
|
||||
index_luma ^= (st.y & 1) << 2;
|
||||
index_luma ^= 3;
|
||||
|
||||
uint index_chroma = byte_offset_chroma >> 1;
|
||||
index_chroma ^= (st.y & 1) << 1;
|
||||
index_chroma ^= 1;
|
||||
|
||||
u8 luma = u8(tmem8.instances[tmem_instance].elems[index_luma | 0x800]);
|
||||
u16 chroma = u16(tmem16.instances[tmem_instance].elems[index_chroma]);
|
||||
u8 u = u8((chroma >> U16_C(8)) & U16_C(0xff));
|
||||
u8 v = u8((chroma >> U16_C(0)) & U16_C(0xff));
|
||||
return i16x4(i16(u) - I16_C(0x80), i16(v) - I16_C(0x80), luma, luma);
|
||||
}
|
||||
|
||||
i16x4 sample_texel_rgba16(TileInfo tile, uint tmem_instance, uvec2 st)
|
||||
{
|
||||
uint byte_offset = tile.offset + tile.stride * st.y;
|
||||
byte_offset += st.x * 2;
|
||||
byte_offset &= 0xfff;
|
||||
|
||||
uint index = byte_offset >> 1;
|
||||
index ^= (st.y & 1) << 1;
|
||||
index ^= 1;
|
||||
|
||||
uint word = uint(tmem16.instances[tmem_instance].elems[index]);
|
||||
return convert_rgba16(word);
|
||||
}
|
||||
|
||||
i16x4 sample_texel_ia16(TileInfo tile, uint tmem_instance, uvec2 st)
|
||||
{
|
||||
uint byte_offset = tile.offset + tile.stride * st.y;
|
||||
byte_offset += st.x * 2;
|
||||
byte_offset &= 0xfff;
|
||||
|
||||
uint index = byte_offset >> 1;
|
||||
index ^= (st.y & 1) << 1;
|
||||
index ^= 1;
|
||||
|
||||
uint word = uint(tmem16.instances[tmem_instance].elems[index]);
|
||||
return convert_ia16(word);
|
||||
}
|
||||
|
||||
i16x4 sample_texel_rgba32(TileInfo tile, uint tmem_instance, uvec2 st)
|
||||
{
|
||||
uint byte_offset = tile.offset + tile.stride * st.y;
|
||||
byte_offset += st.x * 2;
|
||||
byte_offset &= 0x7ff;
|
||||
|
||||
uint index = byte_offset >> 1;
|
||||
index ^= (st.y & 1) << 1;
|
||||
index ^= 1;
|
||||
|
||||
uint lower_word = uint(tmem16.instances[tmem_instance].elems[index]);
|
||||
uint upper_word = uint(tmem16.instances[tmem_instance].elems[index | 0x400]);
|
||||
return i16x4(lower_word >> 8, lower_word & 0xff, upper_word >> 8, upper_word & 0xff);
|
||||
}
|
||||
|
||||
int clamp_and_shift_coord(bool clamp_bit, int coord, int lo, int hi, int shift)
|
||||
{
|
||||
// Clamp 17-bit coordinate to 16-bit coordinate here.
|
||||
coord = clamp(coord, -0x8000, 0x7fff);
|
||||
|
||||
if (shift < 11)
|
||||
coord >>= shift;
|
||||
else
|
||||
{
|
||||
coord <<= (32 - shift);
|
||||
coord >>= 16;
|
||||
}
|
||||
|
||||
if (clamp_bit)
|
||||
{
|
||||
bool clamp_hi = (coord >> 3) >= hi;
|
||||
if (clamp_hi)
|
||||
coord = (((hi >> 2) - (lo >> 2)) & 0x3ff) << 5;
|
||||
else
|
||||
coord = max(coord - (lo << 3), 0);
|
||||
}
|
||||
else
|
||||
coord -= lo << 3;
|
||||
|
||||
return coord;
|
||||
}
|
||||
|
||||
int shift_coord(int coord, int lo, int shift)
|
||||
{
|
||||
// Clamp 17-bit coordinate to 16-bit coordinate here.
|
||||
coord = clamp(coord, -0x8000, 0x7fff);
|
||||
|
||||
if (shift < 11)
|
||||
coord >>= shift;
|
||||
else
|
||||
{
|
||||
coord <<= (32 - shift);
|
||||
coord >>= 16;
|
||||
}
|
||||
coord -= lo << 3;
|
||||
return coord;
|
||||
}
|
||||
|
||||
// The copy pipe reads 4x16 words.
|
||||
int sample_texture_copy_word(TileInfo tile, uint tmem_instance, ivec2 st, int s_offset, bool tlut, bool tlut_type)
|
||||
{
|
||||
// For non-16bpp TMEM, the lower 32-bits are sampled based on direct 16-bit fetches. There are no shifts applied.
|
||||
bool high_word = s_offset < 2;
|
||||
bool replicate_8bpp = high_word && tile.size != 2 && !tlut;
|
||||
int samp;
|
||||
|
||||
int s_shamt = min(int(tile.size), 2);
|
||||
bool large_texel = int(tile.size) == 3;
|
||||
int idx_mask = (large_texel || tlut) ? 0x3ff : 0x7ff;
|
||||
|
||||
if (replicate_8bpp)
|
||||
{
|
||||
// The high word of 8-bpp replication is special in the sense that we sample 8-bpp correctly.
|
||||
// Sample the two possible words.
|
||||
st.x += 2 * s_offset;
|
||||
ivec2 s = texel_mask_s_copy(tile, st.x);
|
||||
int t = texel_mask_t(tile, st.y);
|
||||
|
||||
uint tbase = tile.offset + tile.stride * t;
|
||||
uvec2 nibble_offset = (tbase * 2 + (s << s_shamt)) & 0x1fffu;
|
||||
nibble_offset ^= (t & 1u) * 8u;
|
||||
uvec2 index = nibble_offset >> 2u;
|
||||
|
||||
index &= idx_mask;
|
||||
int samp0 = int(tmem16.instances[tmem_instance].elems[index.x ^ 1]);
|
||||
int samp1 = int(tmem16.instances[tmem_instance].elems[index.y ^ 1]);
|
||||
|
||||
if (tile.size == 1)
|
||||
{
|
||||
samp0 >>= 8 - 4 * int(nibble_offset.x & 2);
|
||||
samp1 >>= 8 - 4 * int(nibble_offset.y & 2);
|
||||
samp0 &= 0xff;
|
||||
samp1 &= 0xff;
|
||||
}
|
||||
else if (tile.size == 0)
|
||||
{
|
||||
samp0 >>= 12 - 4 * int(nibble_offset.x & 3u);
|
||||
samp1 >>= 12 - 4 * int(nibble_offset.y & 3u);
|
||||
samp0 = (samp0 & 0xf) * 0x11;
|
||||
samp1 = (samp1 & 0xf) * 0x11;
|
||||
}
|
||||
else
|
||||
{
|
||||
samp0 >>= 8;
|
||||
samp1 >>= 8;
|
||||
}
|
||||
|
||||
samp = (samp0 << 8) | samp1;
|
||||
}
|
||||
else
|
||||
{
|
||||
st.x += s_offset;
|
||||
int s = texel_mask_s(tile, st.x);
|
||||
int t = texel_mask_t(tile, st.y);
|
||||
|
||||
uint tbase = tile.offset + tile.stride * t;
|
||||
uint nibble_offset = (tbase * 2 + (s << s_shamt)) & 0x1fffu;
|
||||
nibble_offset ^= (t & 1u) * 8u;
|
||||
|
||||
uint index = nibble_offset >> 2u;
|
||||
index &= idx_mask;
|
||||
samp = int(tmem16.instances[tmem_instance].elems[index ^ 1]);
|
||||
|
||||
if (tlut)
|
||||
{
|
||||
if (tile.size == 0)
|
||||
{
|
||||
samp >>= 12 - 4 * (nibble_offset & 3);
|
||||
samp &= 0xf;
|
||||
samp |= tile.palette << 4;
|
||||
samp <<= 2;
|
||||
samp += s_offset;
|
||||
}
|
||||
else
|
||||
{
|
||||
samp >>= 8 - 4 * (nibble_offset & 2);
|
||||
samp &= 0xff;
|
||||
samp <<= 2;
|
||||
samp += s_offset;
|
||||
}
|
||||
samp = int(tmem16.instances[tmem_instance].elems[(samp | 0x400) ^ 1]);
|
||||
}
|
||||
}
|
||||
|
||||
return samp;
|
||||
}
|
||||
|
||||
int sample_texture_copy(TileInfo tile, uint tmem_instance, ivec2 st, int s_offset, bool tlut, bool tlut_type)
|
||||
{
|
||||
st.x = shift_coord(st.x, int(tile.slo), int(tile.shift_s));
|
||||
st.y = shift_coord(st.y, int(tile.tlo), int(tile.shift_t));
|
||||
st >>= 5;
|
||||
|
||||
int samp;
|
||||
if (global_constants.fb_info.fb_size == 0)
|
||||
{
|
||||
samp = 0;
|
||||
}
|
||||
else if (global_constants.fb_info.fb_size == 1)
|
||||
{
|
||||
samp = sample_texture_copy_word(tile, tmem_instance, st, s_offset >> 1, tlut, tlut_type);
|
||||
samp >>= 8 - 8 * (s_offset & 1);
|
||||
samp &= 0xff;
|
||||
}
|
||||
else
|
||||
{
|
||||
samp = sample_texture_copy_word(tile, tmem_instance, st, s_offset, tlut, tlut_type);
|
||||
}
|
||||
|
||||
return samp;
|
||||
}
|
||||
|
||||
i16x2 bilinear_3tap(i16x2 t00, i16x2 t10, i16x2 t01, i16x2 t11, ivec2 frac)
|
||||
{
|
||||
int sum_frac = frac.x + frac.y;
|
||||
i16x2 t_base = sum_frac >= 32 ? t11 : t00;
|
||||
i16x2 flip_frac = i16x2(sum_frac >= 32 ? (32 - frac.yx) : frac);
|
||||
i16x2 accum = (t10 - t_base) * flip_frac.x;
|
||||
accum += (t01 - t_base) * flip_frac.y;
|
||||
accum += I16_C(0x10);
|
||||
accum >>= I16_C(5);
|
||||
accum += t_base;
|
||||
return accum;
|
||||
}
|
||||
|
||||
i16x4 sample_texture(TileInfo tile, uint tmem_instance, ivec2 st, bool tlut, bool tlut_type, bool sample_quad, bool mid_texel, bool convert_one,
|
||||
i16x4 prev_cycle)
|
||||
{
|
||||
st.x = clamp_and_shift_coord((tile.flags & TILE_INFO_CLAMP_S_BIT) != 0, st.x, int(tile.slo), int(tile.shi), int(tile.shift_s));
|
||||
st.y = clamp_and_shift_coord((tile.flags & TILE_INFO_CLAMP_T_BIT) != 0, st.y, int(tile.tlo), int(tile.thi), int(tile.shift_t));
|
||||
|
||||
ivec2 frac;
|
||||
if (sample_quad)
|
||||
frac = st & 31;
|
||||
else
|
||||
frac = ivec2(0);
|
||||
|
||||
int sum_frac = frac.x + frac.y;
|
||||
st >>= 5;
|
||||
|
||||
int s0 = texel_mask_s(tile, st.x);
|
||||
int t0 = texel_mask_t(tile, st.y);
|
||||
int s1 = texel_mask_s(tile, st.x + 1);
|
||||
int t1 = texel_mask_t(tile, st.y + 1);
|
||||
|
||||
// Very specific weird logic going on with t0 and t1.
|
||||
int tdiff = max(t1 - t0, -255);
|
||||
t1 = (t0 & 0xff) + tdiff;
|
||||
t0 &= 0xff;
|
||||
|
||||
i16x4 t_base, t10, t01, t11;
|
||||
|
||||
mid_texel = all(bvec3(mid_texel, equal(frac, ivec2(0x10))));
|
||||
if (mid_texel)
|
||||
sum_frac = 0;
|
||||
|
||||
bool yuv = tile.fmt == TEXTURE_FORMAT_YUV;
|
||||
ivec2 base_st = sum_frac >= 0x20 ? ivec2(s1, t1) : ivec2(s0, t0);
|
||||
|
||||
if (tlut)
|
||||
{
|
||||
switch (int(tile.fmt))
|
||||
{
|
||||
case TEXTURE_FORMAT_RGBA:
|
||||
case TEXTURE_FORMAT_CI:
|
||||
case TEXTURE_FORMAT_IA:
|
||||
case TEXTURE_FORMAT_I:
|
||||
{
|
||||
// For TLUT, entries in the LUT are duplicated and we must make sure that we sample 3 different banks
|
||||
// when we look up the TLUT entry. In normal situations, this is irrelevant, but we're trying to be accurate here.
|
||||
bool upper = sum_frac >= 0x20;
|
||||
uint addr_xor = upper ? 2 : 1;
|
||||
|
||||
switch (int(tile.size))
|
||||
{
|
||||
case 0:
|
||||
t_base = sample_texel_ci4_tlut(tile, tmem_instance, base_st, tile.palette, upper ? 3 : 0, addr_xor, tlut_type);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_ci4_tlut(tile, tmem_instance, ivec2(s1, t0), tile.palette, 1, addr_xor,
|
||||
tlut_type);
|
||||
t01 = sample_texel_ci4_tlut(tile, tmem_instance, ivec2(s0, t1), tile.palette, 2, addr_xor,
|
||||
tlut_type);
|
||||
}
|
||||
if (mid_texel)
|
||||
{
|
||||
t11 = sample_texel_ci4_tlut(tile, tmem_instance, ivec2(s1, t1), tile.palette, 3, addr_xor,
|
||||
tlut_type);
|
||||
}
|
||||
break;
|
||||
|
||||
case 1:
|
||||
t_base = sample_texel_ci8_tlut(tile, tmem_instance, base_st, upper ? 3 : 0, addr_xor, tlut_type);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_ci8_tlut(tile, tmem_instance, ivec2(s1, t0), 1, addr_xor, tlut_type);
|
||||
t01 = sample_texel_ci8_tlut(tile, tmem_instance, ivec2(s0, t1), 2, addr_xor, tlut_type);
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_ci8_tlut(tile, tmem_instance, ivec2(s1, t1), 3, addr_xor, tlut_type);
|
||||
break;
|
||||
|
||||
default:
|
||||
t_base = sample_texel_ci32_tlut(tile, tmem_instance, base_st, upper ? 3 : 0, addr_xor, tlut_type);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_ci32_tlut(tile, tmem_instance, ivec2(s1, t0), 1, addr_xor, tlut_type);
|
||||
t01 = sample_texel_ci32_tlut(tile, tmem_instance, ivec2(s0, t1), 2, addr_xor, tlut_type);
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_ci32_tlut(tile, tmem_instance, ivec2(s1, t1), 3, addr_xor, tlut_type);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (int(tile.fmt))
|
||||
{
|
||||
case TEXTURE_FORMAT_RGBA:
|
||||
switch (int(tile.size))
|
||||
{
|
||||
case 0:
|
||||
t_base = sample_texel_rgba4(tile, tmem_instance, base_st);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_rgba4(tile, tmem_instance, ivec2(s1, t0));
|
||||
t01 = sample_texel_rgba4(tile, tmem_instance, ivec2(s0, t1));
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_rgba4(tile, tmem_instance, ivec2(s1, t1));
|
||||
break;
|
||||
|
||||
case 1:
|
||||
t_base = sample_texel_rgba8(tile, tmem_instance, base_st);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_rgba8(tile, tmem_instance, ivec2(s1, t0));
|
||||
t01 = sample_texel_rgba8(tile, tmem_instance, ivec2(s0, t1));
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_rgba8(tile, tmem_instance, ivec2(s1, t1));
|
||||
break;
|
||||
|
||||
case 2:
|
||||
t_base = sample_texel_rgba16(tile, tmem_instance, base_st);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_rgba16(tile, tmem_instance, ivec2(s1, t0));
|
||||
t01 = sample_texel_rgba16(tile, tmem_instance, ivec2(s0, t1));
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_rgba16(tile, tmem_instance, ivec2(s1, t1));
|
||||
break;
|
||||
|
||||
case 3:
|
||||
t_base = sample_texel_rgba32(tile, tmem_instance, base_st);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_rgba32(tile, tmem_instance, ivec2(s1, t0));
|
||||
t01 = sample_texel_rgba32(tile, tmem_instance, ivec2(s0, t1));
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_rgba32(tile, tmem_instance, ivec2(s1, t1));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case TEXTURE_FORMAT_YUV:
|
||||
{
|
||||
uint chroma_x0 = s0 >> 1;
|
||||
uint chroma_x1 = (s1 + (s1 - s0)) >> 1;
|
||||
|
||||
// Only implement 16bpp for now. It's the only one that gives meaningful results.
|
||||
t_base = sample_texel_yuv16(tile, tmem_instance, ivec2(s0, t0), chroma_x0);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_yuv16(tile, tmem_instance, ivec2(s1, t0), chroma_x1);
|
||||
t01 = sample_texel_yuv16(tile, tmem_instance, ivec2(s0, t1), chroma_x0);
|
||||
t11 = sample_texel_yuv16(tile, tmem_instance, ivec2(s1, t1), chroma_x1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case TEXTURE_FORMAT_CI:
|
||||
switch (int(tile.size))
|
||||
{
|
||||
case 0:
|
||||
t_base = sample_texel_ci4(tile, tmem_instance, base_st, tile.palette);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_ci4(tile, tmem_instance, ivec2(s1, t0), tile.palette);
|
||||
t01 = sample_texel_ci4(tile, tmem_instance, ivec2(s0, t1), tile.palette);
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_ci4(tile, tmem_instance, ivec2(s1, t1), tile.palette);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
t_base = sample_texel_rgba8(tile, tmem_instance, base_st);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_rgba8(tile, tmem_instance, ivec2(s1, t0));
|
||||
t01 = sample_texel_rgba8(tile, tmem_instance, ivec2(s0, t1));
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_rgba8(tile, tmem_instance, ivec2(s1, t1));
|
||||
break;
|
||||
|
||||
default:
|
||||
t_base = sample_texel_ci32(tile, tmem_instance, base_st);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_ci32(tile, tmem_instance, ivec2(s1, t0));
|
||||
t01 = sample_texel_ci32(tile, tmem_instance, ivec2(s0, t1));
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_ci32(tile, tmem_instance, ivec2(s1, t1));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case TEXTURE_FORMAT_IA:
|
||||
switch (int(tile.size))
|
||||
{
|
||||
case 0:
|
||||
t_base = sample_texel_ia4(tile, tmem_instance, base_st);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_ia4(tile, tmem_instance, ivec2(s1, t0));
|
||||
t01 = sample_texel_ia4(tile, tmem_instance, ivec2(s0, t1));
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_ia4(tile, tmem_instance, ivec2(s1, t1));
|
||||
break;
|
||||
|
||||
case 1:
|
||||
t_base = sample_texel_ia8(tile, tmem_instance, base_st);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_ia8(tile, tmem_instance, ivec2(s1, t0));
|
||||
t01 = sample_texel_ia8(tile, tmem_instance, ivec2(s0, t1));
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_ia8(tile, tmem_instance, ivec2(s1, t1));
|
||||
break;
|
||||
|
||||
case 2:
|
||||
t_base = sample_texel_ia16(tile, tmem_instance, base_st);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_ia16(tile, tmem_instance, ivec2(s1, t0));
|
||||
t01 = sample_texel_ia16(tile, tmem_instance, ivec2(s0, t1));
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_ia16(tile, tmem_instance, ivec2(s1, t1));
|
||||
break;
|
||||
|
||||
case 3:
|
||||
t_base = sample_texel_ci32(tile, tmem_instance, base_st);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_ci32(tile, tmem_instance, ivec2(s1, t0));
|
||||
t01 = sample_texel_ci32(tile, tmem_instance, ivec2(s0, t1));
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_ci32(tile, tmem_instance, ivec2(s1, t1));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case TEXTURE_FORMAT_I:
|
||||
switch (int(tile.size))
|
||||
{
|
||||
case 0:
|
||||
t_base = sample_texel_rgba4(tile, tmem_instance, base_st);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_rgba4(tile, tmem_instance, ivec2(s1, t0));
|
||||
t01 = sample_texel_rgba4(tile, tmem_instance, ivec2(s0, t1));
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_rgba4(tile, tmem_instance, ivec2(s1, t1));
|
||||
break;
|
||||
|
||||
case 1:
|
||||
t_base = sample_texel_rgba8(tile, tmem_instance, base_st);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_rgba8(tile, tmem_instance, ivec2(s1, t0));
|
||||
t01 = sample_texel_rgba8(tile, tmem_instance, ivec2(s0, t1));
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_rgba8(tile, tmem_instance, ivec2(s1, t1));
|
||||
break;
|
||||
|
||||
default:
|
||||
t_base = sample_texel_ci32(tile, tmem_instance, base_st);
|
||||
if (sample_quad)
|
||||
{
|
||||
t10 = sample_texel_ci32(tile, tmem_instance, ivec2(s1, t0));
|
||||
t01 = sample_texel_ci32(tile, tmem_instance, ivec2(s0, t1));
|
||||
}
|
||||
if (mid_texel)
|
||||
t11 = sample_texel_ci32(tile, tmem_instance, ivec2(s1, t1));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
i16x4 accum;
|
||||
|
||||
if (convert_one)
|
||||
{
|
||||
ivec4 prev_sext = bitfieldExtract(ivec4(prev_cycle), 0, 9);
|
||||
ivec2 factors = sum_frac >= 32 ? prev_sext.gr : prev_sext.rg;
|
||||
ivec4 converted = factors.r * (t10 - t_base) + factors.g * (t01 - t_base) + 0x80;
|
||||
converted >>= 8;
|
||||
converted += prev_sext.b;
|
||||
accum = i16x4(converted);
|
||||
}
|
||||
else if (yuv)
|
||||
{
|
||||
if (sample_quad)
|
||||
{
|
||||
int chroma_frac = ((s0 & 1) << 4) | (frac.x >> 1);
|
||||
i16x2 accum_chroma = bilinear_3tap(t_base.xy, t10.xy, t01.xy, t11.xy, ivec2(chroma_frac, frac.y));
|
||||
i16x2 accum_luma = bilinear_3tap(t_base.zw, t10.zw, t01.zw, t11.zw, frac);
|
||||
accum = i16x4(accum_chroma, accum_luma);
|
||||
}
|
||||
else
|
||||
accum = t_base;
|
||||
}
|
||||
else if (mid_texel)
|
||||
{
|
||||
accum = (t_base + t01 + t10 + t11 + I16_C(2)) >> I16_C(2);
|
||||
}
|
||||
else
|
||||
{
|
||||
i16x2 flip_frac = i16x2(sum_frac >= 32 ? (32 - frac.yx) : frac);
|
||||
accum = (t10 - t_base) * flip_frac.x;
|
||||
accum += (t01 - t_base) * flip_frac.y;
|
||||
accum += I16_C(0x10);
|
||||
accum >>= I16_C(5);
|
||||
accum += t_base;
|
||||
}
|
||||
return accum;
|
||||
}
|
||||
|
||||
void compute_lod_2cycle(inout uint tile0, inout uint tile1, out i16 lod_frac, uint max_level, int min_lod,
|
||||
ivec2 st, ivec2 st_dx, ivec2 st_dy,
|
||||
bool perspective_overflow, bool tex_lod_en, bool sharpen_tex_en, bool detail_tex_en)
|
||||
{
|
||||
bool magnify = false;
|
||||
bool distant = false;
|
||||
|
||||
uint tile_offset = 0;
|
||||
|
||||
if (perspective_overflow)
|
||||
{
|
||||
distant = true;
|
||||
lod_frac = i16(0xff);
|
||||
}
|
||||
else
|
||||
{
|
||||
ivec2 dx = st_dx - st;
|
||||
// Kinda abs, except it's 1 less than expected if negative.
|
||||
dx ^= dx >> 31;
|
||||
ivec2 dy = st_dy - st;
|
||||
// Kinda abs, except it's 1 less than expected if negative.
|
||||
dy ^= dy >> 31;
|
||||
|
||||
ivec2 max_d2 = max(dx, dy);
|
||||
int max_d = max(max_d2.x, max_d2.y);
|
||||
|
||||
if (max_d >= 0x4000)
|
||||
{
|
||||
distant = true;
|
||||
lod_frac = i16(0xff);
|
||||
tile_offset = max_level;
|
||||
}
|
||||
else if (max_d < 32) // LOD < 0
|
||||
{
|
||||
distant = max_level == 0u;
|
||||
magnify = true;
|
||||
|
||||
if (!sharpen_tex_en && !detail_tex_en)
|
||||
lod_frac = i16(distant ? 0xff : 0);
|
||||
else
|
||||
lod_frac = i16((max(min_lod, max_d) << 3) + (sharpen_tex_en ? -0x100 : 0));
|
||||
}
|
||||
else
|
||||
{
|
||||
int mip_base = max(findMSB(max_d >> 5), 0);
|
||||
distant = mip_base >= max_level;
|
||||
|
||||
if (distant && !sharpen_tex_en && !detail_tex_en)
|
||||
{
|
||||
lod_frac = i16(0xff);
|
||||
}
|
||||
else
|
||||
{
|
||||
lod_frac = i16(((max_d << 3) >> mip_base) & 0xff);
|
||||
tile_offset = mip_base;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (tex_lod_en)
|
||||
{
|
||||
if (distant)
|
||||
tile_offset = max_level;
|
||||
|
||||
if (!detail_tex_en)
|
||||
{
|
||||
tile0 = (tile0 + tile_offset) & 7u;
|
||||
if (distant || (!sharpen_tex_en && magnify))
|
||||
tile1 = tile0;
|
||||
else
|
||||
tile1 = (tile0 + 1) & 7;
|
||||
}
|
||||
else
|
||||
{
|
||||
tile1 = (tile0 + tile_offset + ((distant || magnify) ? 1 : 2)) & 7u;
|
||||
tile0 = (tile0 + tile_offset + (magnify ? 0 : 1)) & 7u;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i16x4 texture_convert_factors(i16x4 texel_in, i16x4 factors)
|
||||
{
|
||||
ivec4 texel = bitfieldExtract(ivec4(texel_in), 0, 9);
|
||||
|
||||
int r = texel.b + ((factors.x * texel.g + 0x80) >> 8);
|
||||
int g = texel.b + ((factors.y * texel.r + factors.z * texel.g + 0x80) >> 8);
|
||||
int b = texel.b + ((factors.w * texel.r + 0x80) >> 8);
|
||||
int a = texel.b;
|
||||
return i16x4(r, g, b, a);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,274 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
// Consumes result from tile_binning_prepass.comp, bins at a finer resolution (8x8 or 16x16 blocks).
|
||||
#include "small_types.h"
|
||||
|
||||
#if SUBGROUP
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#extension GL_KHR_shader_subgroup_vote : require
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
#extension GL_KHR_shader_subgroup_arithmetic : require
|
||||
layout(local_size_x_id = 0) in;
|
||||
#else
|
||||
// Reasonable default. For AMD (64 threads), subgroups are definitely supported, so this won't be hit.
|
||||
layout(local_size_x = 32) in;
|
||||
#endif
|
||||
|
||||
#include "debug.h"
|
||||
#include "data_structures.h"
|
||||
#include "binning.h"
|
||||
|
||||
layout(constant_id = 1) const int TILE_WIDTH = 8;
|
||||
layout(constant_id = 2) const int TILE_HEIGHT = 8;
|
||||
layout(constant_id = 3) const int MAX_PRIMITIVES = 256;
|
||||
layout(constant_id = 4) const int MAX_WIDTH = 1024;
|
||||
layout(constant_id = 5) const int TILE_INSTANCE_STRIDE = 0x8000;
|
||||
layout(constant_id = 6) const int SCALE_FACTOR = 1;
|
||||
|
||||
const int TILE_BINNING_STRIDE = MAX_PRIMITIVES / 32;
|
||||
const int MAX_TILES_X = MAX_WIDTH / TILE_WIDTH;
|
||||
|
||||
layout(set = 0, binding = 0, std430) readonly buffer TriangleSetupBuffer
|
||||
{
|
||||
TriangleSetupMem elems[];
|
||||
} triangle_setup;
|
||||
#include "load_triangle_setup.h"
|
||||
|
||||
layout(set = 0, binding = 1, std430) readonly buffer ScissorStateBuffer
|
||||
{
|
||||
ScissorStateMem elems[];
|
||||
} scissor_state;
|
||||
#include "load_scissor_state.h"
|
||||
|
||||
layout(set = 0, binding = 2, std430) readonly buffer StateIndicesBuffer
|
||||
{
|
||||
InstanceIndicesMem elems[];
|
||||
} state_indices;
|
||||
|
||||
layout(std430, set = 0, binding = 3) writeonly buffer TileBitmask
|
||||
{
|
||||
uint binned_bitmask[];
|
||||
};
|
||||
|
||||
layout(std430, set = 0, binding = 4) writeonly buffer TileBitmaskCoarse
|
||||
{
|
||||
uint binned_bitmask_coarse[];
|
||||
};
|
||||
|
||||
#if !UBERSHADER
|
||||
layout(std430, set = 0, binding = 5) writeonly buffer TileInstanceOffset
|
||||
{
|
||||
uint elems[];
|
||||
} tile_instance_offsets;
|
||||
|
||||
layout(std430, set = 0, binding = 6) buffer IndirectBuffer
|
||||
{
|
||||
uvec4 elems[];
|
||||
} indirect_counts;
|
||||
|
||||
// This can actually be uint16_t, but AMD doesn't seem to support loading uint16_t in SMEM unit,
|
||||
// the memory traffic for this data structure is not relevant anyways.
|
||||
struct TileRasterWork
|
||||
{
|
||||
uint tile_x, tile_y;
|
||||
uint tile_instance;
|
||||
uint primitive;
|
||||
};
|
||||
|
||||
layout(std430, set = 0, binding = 7) writeonly buffer WorkList
|
||||
{
|
||||
uvec4 elems[];
|
||||
} tile_raster_work;
|
||||
#endif
|
||||
|
||||
#if !UBERSHADER
|
||||
uint allocate_work_offset(uint variant_index)
|
||||
{
|
||||
#if !SUBGROUP
|
||||
return atomicAdd(indirect_counts.elems[variant_index].x, 1u);
|
||||
#else
|
||||
// Merge atomic operations. Compiler would normally do this,
|
||||
// but it might not have figured out that variant_index is uniform.
|
||||
uvec4 active_mask = subgroupBallot(true);
|
||||
uint count = subgroupBallotBitCount(active_mask);
|
||||
uint work_offset = 0u;
|
||||
if (subgroupElect())
|
||||
work_offset = atomicAdd(indirect_counts.elems[variant_index].x, count);
|
||||
work_offset = subgroupBroadcastFirst(work_offset);
|
||||
work_offset += subgroupBallotExclusiveBitCount(active_mask);
|
||||
return work_offset;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
layout(push_constant, std430) uniform Registers
|
||||
{
|
||||
uvec2 resolution;
|
||||
int primitive_count;
|
||||
} fb_info;
|
||||
|
||||
#if !SUBGROUP
|
||||
shared uint merged_mask_shared;
|
||||
#endif
|
||||
|
||||
void main()
|
||||
{
|
||||
int group_index = int(gl_WorkGroupID.x);
|
||||
ivec2 meta_tile = ivec2(gl_WorkGroupID.yz);
|
||||
|
||||
const int TILES_X = 8;
|
||||
const int TILES_Y = int(gl_WorkGroupSize.x) >> 3;
|
||||
|
||||
#if SUBGROUP
|
||||
// Spec is unclear how gl_LocalInvocationIndex is mapped to gl_SubgroupInvocationID, so synthesize our own.
|
||||
// We know the subgroups are fully occupied with VK_EXT_subgroup_size_control already.
|
||||
int local_index = int(gl_SubgroupInvocationID);
|
||||
int SUBGROUP_TILES_Y = int(gl_SubgroupSize) >> 3;
|
||||
#else
|
||||
int local_index = int(gl_LocalInvocationIndex);
|
||||
#endif
|
||||
|
||||
int inner_tile_x = local_index & 7;
|
||||
int inner_tile_y = local_index >> 3;
|
||||
#if SUBGROUP
|
||||
inner_tile_y += SUBGROUP_TILES_Y * int(gl_SubgroupID);
|
||||
#endif
|
||||
ivec2 tile = meta_tile * ivec2(TILES_X, TILES_Y) + ivec2(inner_tile_x, inner_tile_y);
|
||||
|
||||
int linear_tile = tile.y * MAX_TILES_X + tile.x;
|
||||
|
||||
ivec2 base_coord_meta = meta_tile * ivec2(TILE_WIDTH * TILES_X, TILE_HEIGHT * TILES_Y);
|
||||
#if SUBGROUP
|
||||
base_coord_meta.y += SUBGROUP_TILES_Y * TILE_HEIGHT * int(gl_SubgroupID);
|
||||
ivec2 end_coord_meta = min(base_coord_meta + ivec2(TILE_WIDTH * TILES_X, TILE_HEIGHT * SUBGROUP_TILES_Y), ivec2(fb_info.resolution)) - 1;
|
||||
#else
|
||||
ivec2 end_coord_meta = min(base_coord_meta + ivec2(TILE_WIDTH * TILES_X, TILE_HEIGHT * TILES_Y), ivec2(fb_info.resolution)) - 1;
|
||||
#endif
|
||||
|
||||
ivec2 base_coord = tile * ivec2(TILE_WIDTH, TILE_HEIGHT);
|
||||
ivec2 end_coord = min(base_coord + ivec2(TILE_WIDTH, TILE_HEIGHT), ivec2(fb_info.resolution)) - 1;
|
||||
|
||||
int primitive_count = fb_info.primitive_count;
|
||||
|
||||
#if !SUBGROUP
|
||||
if (local_index == 0)
|
||||
merged_mask_shared = 0u;
|
||||
barrier();
|
||||
#endif
|
||||
|
||||
bool binned = false;
|
||||
if (local_index < 32)
|
||||
{
|
||||
uint primitive_index = group_index * 32 + local_index;
|
||||
if (primitive_index < primitive_count)
|
||||
{
|
||||
ScissorState scissor = load_scissor_state(primitive_index);
|
||||
ivec2 clipped_base_coord = max(base_coord_meta, SCALE_FACTOR * (ivec2(scissor.xlo, scissor.ylo) >> 2) - 1);
|
||||
ivec2 clipped_end_coord = min(end_coord_meta, SCALE_FACTOR * (ivec2(scissor.xhi + 3, scissor.yhi + 3) >> 2) - 1);
|
||||
TriangleSetup setup = load_triangle_setup(primitive_index);
|
||||
binned = bin_primitive(setup, clipped_base_coord, clipped_end_coord, SCALE_FACTOR);
|
||||
}
|
||||
}
|
||||
|
||||
#if SUBGROUP
|
||||
uint merged_mask = subgroupBallot(binned).x;
|
||||
#else
|
||||
if (binned)
|
||||
atomicOr(merged_mask_shared, 1u << local_index);
|
||||
barrier();
|
||||
uint merged_mask = merged_mask_shared;
|
||||
#endif
|
||||
|
||||
uint binned_mask = 0u;
|
||||
while (merged_mask != 0u)
|
||||
{
|
||||
int bit = findLSB(merged_mask);
|
||||
merged_mask &= ~(1u << bit);
|
||||
uint primitive_index = group_index * 32 + bit;
|
||||
ScissorState scissor = load_scissor_state(primitive_index);
|
||||
ivec2 clipped_base_coord = max(base_coord, SCALE_FACTOR * (ivec2(scissor.xlo, scissor.ylo) >> 2));
|
||||
ivec2 clipped_end_coord = min(end_coord, SCALE_FACTOR * (ivec2(scissor.xhi + 3, scissor.yhi + 3) >> 2) - 1);
|
||||
TriangleSetup setup = load_triangle_setup(primitive_index);
|
||||
if (bin_primitive(setup, clipped_base_coord, clipped_end_coord, SCALE_FACTOR))
|
||||
binned_mask |= 1u << bit;
|
||||
}
|
||||
|
||||
binned_bitmask[linear_tile * TILE_BINNING_STRIDE + group_index] = binned_mask;
|
||||
if (binned_mask != 0u)
|
||||
atomicOr(binned_bitmask_coarse[linear_tile], 1u << group_index);
|
||||
else
|
||||
atomicAnd(binned_bitmask_coarse[linear_tile], ~(1u << group_index));
|
||||
|
||||
#if SUBGROUP
|
||||
#if !UBERSHADER
|
||||
uint bit_count = uint(bitCount(binned_mask));
|
||||
uint instance_offset = 0u;
|
||||
if (subgroupAny(bit_count != 0u))
|
||||
{
|
||||
// Allocate tile instance space for all threads in subgroup in one go.
|
||||
uint total_bit_count = subgroupAdd(bit_count);
|
||||
|
||||
if (subgroupElect())
|
||||
if (total_bit_count != 0u)
|
||||
instance_offset = atomicAdd(indirect_counts.elems[0].w, total_bit_count);
|
||||
|
||||
instance_offset = subgroupBroadcastFirst(instance_offset);
|
||||
instance_offset += subgroupInclusiveAdd(bit_count) - bit_count;
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#if !UBERSHADER
|
||||
uint bit_count = uint(bitCount(binned_mask));
|
||||
uint instance_offset = 0u;
|
||||
if (bit_count != 0u)
|
||||
instance_offset = atomicAdd(indirect_counts.elems[0].w, bit_count);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !UBERSHADER
|
||||
if (bit_count != 0u)
|
||||
tile_instance_offsets.elems[linear_tile * TILE_BINNING_STRIDE + group_index] = instance_offset;
|
||||
|
||||
#if SUBGROUP
|
||||
uint variant_mask = subgroupOr(binned_mask);
|
||||
#else
|
||||
uint variant_mask = binned_mask;
|
||||
#endif
|
||||
|
||||
while (variant_mask != 0u)
|
||||
{
|
||||
int bit = findLSB(variant_mask);
|
||||
variant_mask &= ~(1u << bit);
|
||||
int primitive_index = group_index * 32 + bit;
|
||||
|
||||
if ((binned_mask & (1u << bit)) != 0u)
|
||||
{
|
||||
uint variant_index = uint(state_indices.elems[primitive_index].static_depth_tmem.x);
|
||||
uint work_offset = allocate_work_offset(variant_index);
|
||||
tile_raster_work.elems[work_offset + uint(TILE_INSTANCE_STRIDE) * variant_index] =
|
||||
uvec4(tile.x, tile.y, instance_offset, primitive_index);
|
||||
instance_offset++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,577 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include "debug.h"
|
||||
#include "small_types.h"
|
||||
layout(local_size_x_id = 0) in;
|
||||
|
||||
layout(set = 0, binding = 0, std430) readonly buffer VRAM8Buffer
|
||||
{
|
||||
mem_u8 data[];
|
||||
} vram8;
|
||||
|
||||
layout(set = 0, binding = 0, std430) readonly buffer VRAM16Buffer
|
||||
{
|
||||
mem_u16 data[];
|
||||
} vram16;
|
||||
|
||||
layout(set = 0, binding = 0, std430) readonly buffer VRAM32Buffer
|
||||
{
|
||||
uint data[];
|
||||
} vram32;
|
||||
|
||||
layout(set = 0, binding = 1, std430) buffer TMEM16Buffer
|
||||
{
|
||||
mem_u16 data[2048];
|
||||
} tmem16;
|
||||
|
||||
struct TileInstance
|
||||
{
|
||||
mem_u16 data[2048];
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 2, std430) writeonly buffer TMEMInstances
|
||||
{
|
||||
TileInstance instances[];
|
||||
} tile_instances;
|
||||
|
||||
layout(push_constant, std430) uniform Registers
|
||||
{
|
||||
int num_uploads;
|
||||
} registers;
|
||||
|
||||
const int TEXTURE_FMT_RGBA = 0;
|
||||
const int TEXTURE_FMT_YUV = 1;
|
||||
const int TEXTURE_FMT_CI = 2;
|
||||
const int TEXTURE_FMT_IA = 3;
|
||||
const int TEXTURE_FMT_I = 4;
|
||||
|
||||
const int UPLOAD_MODE_TILE = 0;
|
||||
const int UPLOAD_MODE_TLUT = 1;
|
||||
const int UPLOAD_MODE_BLOCK = 2;
|
||||
|
||||
struct UploadInfo
|
||||
{
|
||||
int width, height;
|
||||
float min_t_mod, max_t_mod;
|
||||
|
||||
int vram_addr;
|
||||
int vram_width;
|
||||
int vram_size;
|
||||
int vram_effective_width;
|
||||
|
||||
int tmem_offset;
|
||||
int tmem_stride_words;
|
||||
int tmem_size;
|
||||
int tmem_fmt;
|
||||
|
||||
int mode;
|
||||
float inv_tmem_stride_words;
|
||||
int dxt;
|
||||
int padding;
|
||||
};
|
||||
|
||||
layout(set = 1, binding = 0, std140) uniform UploadInfos
|
||||
{
|
||||
UploadInfo upload_info[256];
|
||||
};
|
||||
|
||||
bool tmem_dirty;
|
||||
uint current_tmem_value;
|
||||
|
||||
int compute_upload_t(int offset, float inv_stride)
|
||||
{
|
||||
// This is still exact for all relevant inputs, and much faster than integer divide.
|
||||
return int((float(offset) + 0.5) * inv_stride);
|
||||
}
|
||||
|
||||
// In 32bpp upload mode we read 64 bits and split the result over the lower and upper TMEM.
|
||||
void update_tmem_32(UploadInfo info, int tmem16_index, bool upper_tmem, bool yuv)
|
||||
{
|
||||
int tmem16_offset = (info.tmem_offset & 0x7ff) >> 1;
|
||||
int tmem16_stride = info.tmem_stride_words;
|
||||
|
||||
int pixel_offset = (tmem16_index - tmem16_offset) & 0x3ff;
|
||||
int upload_x, upload_y;
|
||||
int upload_x_xor = 0;
|
||||
|
||||
if (info.mode == UPLOAD_MODE_BLOCK)
|
||||
{
|
||||
int word_offset = pixel_offset >> 1;
|
||||
|
||||
if (info.tmem_stride_words == 0)
|
||||
{
|
||||
// Trivial case, we can just compute T factor directly and set upload_x_xor.
|
||||
// Other than that, it works like a simple 1D upload.
|
||||
|
||||
// However, if DxT is weird, we might end up in a situation where this word is written multiple times,
|
||||
// or zero times.
|
||||
|
||||
int iteration_candidate_first = word_offset & ~1;
|
||||
int iteration_candidate_second = iteration_candidate_first + 1;
|
||||
int first_t = (iteration_candidate_first * info.dxt) >> 16;
|
||||
int second_t = (iteration_candidate_second * info.dxt) >> 16;
|
||||
if (first_t != second_t)
|
||||
{
|
||||
int iteration_candidate_first_write_index = iteration_candidate_first ^ (first_t & 1);
|
||||
int iteration_candidate_second_write_index = iteration_candidate_second ^ (second_t & 1);
|
||||
if (iteration_candidate_second_write_index == word_offset)
|
||||
upload_x_xor = (second_t & 1) << 1;
|
||||
else if (iteration_candidate_first_write_index == word_offset)
|
||||
upload_x_xor = (first_t & 1) << 1;
|
||||
else
|
||||
return;
|
||||
}
|
||||
else
|
||||
upload_x_xor ^= (first_t & 1) << 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Welp ... This is pure insanity, but if we want to be completely correct ...
|
||||
int min_t = compute_upload_t(word_offset & ~1, info.min_t_mod);
|
||||
int max_t = compute_upload_t(word_offset | 1, info.max_t_mod);
|
||||
|
||||
// If t has a range, then the solution to Y = (t = floor(X * dt / 2048)) * stride + X has a range space of:
|
||||
// Y - t_max * stride <= X <= Y - t_min * stride.
|
||||
int max_word_candidate = (word_offset | 1) - tmem16_stride * min_t;
|
||||
int min_word_candidate = (word_offset & ~1) - tmem16_stride * max_t;
|
||||
|
||||
// If we have constraints for X, we constraint T further.
|
||||
min_t = max(min_t, (min_word_candidate * info.dxt) >> 16);
|
||||
max_t = min(max_t, (max_word_candidate * info.dxt) >> 16);
|
||||
|
||||
bool found_candidate = false;
|
||||
for (int t = max_t; t >= min_t; t--)
|
||||
{
|
||||
// Check to see if t is a solution to the equation.
|
||||
// Potentially two targets could write here.
|
||||
int candidate_solution_first = (word_offset & ~1) - tmem16_stride * t;
|
||||
int candidate_solution_second = (word_offset | 1) - tmem16_stride * t;
|
||||
|
||||
int candidate_t_first = (candidate_solution_first * info.dxt) >> 16;
|
||||
int candidate_t_second = (candidate_solution_second * info.dxt) >> 16;
|
||||
|
||||
if (((candidate_solution_second + candidate_t_second * tmem16_stride) ^ (candidate_t_second & 1)) == word_offset)
|
||||
{
|
||||
found_candidate = true;
|
||||
pixel_offset = (candidate_solution_second << 1) + (pixel_offset & 1);
|
||||
break;
|
||||
}
|
||||
else if (((candidate_solution_first + candidate_t_first * tmem16_stride) ^ (candidate_t_first & 1)) == word_offset)
|
||||
{
|
||||
found_candidate = true;
|
||||
pixel_offset = (candidate_solution_first << 1) + (pixel_offset & 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// We strided over this 64bpp word.
|
||||
if (!found_candidate)
|
||||
return;
|
||||
}
|
||||
|
||||
upload_x = pixel_offset;
|
||||
upload_y = 0;
|
||||
}
|
||||
else if (tmem16_stride == 0)
|
||||
{
|
||||
// For TMEM stride of 0 we're essentially replaying the same line over and over and the final visible result
|
||||
// is what happened in Y == height - 1.
|
||||
upload_x = pixel_offset;
|
||||
upload_y = info.height - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
upload_y = compute_upload_t(pixel_offset, info.inv_tmem_stride_words);
|
||||
upload_x = pixel_offset - upload_y * tmem16_stride;
|
||||
|
||||
// If stride is smaller than width, we'll need to unroll the last line.
|
||||
if (upload_y >= info.height)
|
||||
{
|
||||
upload_x += tmem16_stride * (upload_y - info.height + 1);
|
||||
upload_y = info.height - 1;
|
||||
}
|
||||
}
|
||||
|
||||
int last_line_upload_x = upload_x ^ ((upload_y & 1) << 1);
|
||||
if (last_line_upload_x >= info.width && upload_y > 0)
|
||||
{
|
||||
// If the last line won't trigger a write, the previous line probably did.
|
||||
upload_y--;
|
||||
upload_x += tmem16_stride;
|
||||
}
|
||||
|
||||
int iteration_offset;
|
||||
|
||||
upload_x ^= ((upload_y & 1) << 1) | upload_x_xor;
|
||||
|
||||
if (info.vram_size == 3 || yuv)
|
||||
{
|
||||
iteration_offset = 4 * (upload_x & ~1);
|
||||
}
|
||||
else if (info.vram_size == 2)
|
||||
{
|
||||
// In 16bpp VRAM mode, we are supposed to step 4 pixels at a time (8 bytes), which will form 2 complete pixels.
|
||||
// However, in 32bpp tile mode we're not shifting the X value appropriately.
|
||||
// So, we're writing texels [0, 1, ..., 4, 5, ...], etc.
|
||||
if ((upload_x & 2) != 0)
|
||||
{
|
||||
// We're not writing in this line, but the previous line might have!
|
||||
// Interleaving patterns will form ...
|
||||
if (upload_y > 0)
|
||||
{
|
||||
upload_y--;
|
||||
upload_x += tmem16_stride;
|
||||
upload_x ^= 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
// These 2 words will never be written to.
|
||||
return;
|
||||
}
|
||||
}
|
||||
iteration_offset = 2 * (upload_x & ~1);
|
||||
}
|
||||
else if (info.vram_size == 1)
|
||||
{
|
||||
// 4 potential mirrors.
|
||||
for (int i = 0; i < 4 && upload_y > 0 && (upload_x & 6) != 0; i++)
|
||||
{
|
||||
upload_y--;
|
||||
upload_x += tmem16_stride;
|
||||
upload_x ^= 2;
|
||||
}
|
||||
|
||||
if ((upload_x & 6) != 0)
|
||||
{
|
||||
// These 6 words will never be written to.
|
||||
return;
|
||||
}
|
||||
|
||||
iteration_offset = upload_x & ~1;
|
||||
}
|
||||
|
||||
if (upload_x >= info.width)
|
||||
return;
|
||||
|
||||
int line_rdram_addr = info.vram_addr + ((upload_y * info.vram_width) << (info.vram_size - 1));
|
||||
|
||||
// The loading pipeline reads 64 bits per iteration.
|
||||
int rdram_addr = line_rdram_addr + iteration_offset + 4 * (upload_x & 1);
|
||||
|
||||
uint word;
|
||||
if ((rdram_addr & 3) == 0)
|
||||
{
|
||||
word = uint(vram32.data[rdram_addr >> 2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
word = (uint(vram8.data[rdram_addr ^ 3]) << 24) |
|
||||
(uint(vram8.data[(rdram_addr + 1) ^ 3]) << 16) |
|
||||
(uint(vram8.data[(rdram_addr + 2) ^ 3]) << 8) |
|
||||
uint(vram8.data[(rdram_addr + 3) ^ 3]);
|
||||
}
|
||||
|
||||
if (yuv)
|
||||
{
|
||||
// Lower TMEM receives interleaved UV samples, while upper receives Y.
|
||||
if (upper_tmem)
|
||||
{
|
||||
uint y0 = (word >> 16u) & 0xffu;
|
||||
uint y1 = (word >> 0u) & 0xffu;
|
||||
word = (y0 << 8u) | y1;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint u = (word >> 24u) & 0xffu;
|
||||
uint v = (word >> 8u) & 0xffu;
|
||||
word = (u << 8u) | v;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
word >>= 16u - 16u * uint(upper_tmem);
|
||||
word &= 0xffffu;
|
||||
}
|
||||
current_tmem_value = word;
|
||||
tmem_dirty = true;
|
||||
}
|
||||
|
||||
void update_tmem_16(UploadInfo info, int tmem16_index)
|
||||
{
|
||||
int tmem16_offset = (info.tmem_offset & 0xfff) >> 1;
|
||||
int tmem16_stride = info.tmem_stride_words;
|
||||
|
||||
int pixel_offset = (tmem16_index - tmem16_offset) & 0x7ff;
|
||||
int upload_x, upload_y;
|
||||
int upload_x_xor = 0;
|
||||
|
||||
if (info.mode == UPLOAD_MODE_BLOCK)
|
||||
{
|
||||
int word_offset = pixel_offset >> 2;
|
||||
|
||||
if (info.tmem_stride_words == 0)
|
||||
{
|
||||
// Trivial case, we can just compute T factor directly and set upload_x_xor.
|
||||
// Other than that, it works like a simple 1D upload.
|
||||
upload_x_xor = (((word_offset * info.dxt) >> 16) & 1) << 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Welp ... This is pure insanity, but if we want to be completely correct ...
|
||||
int min_t = compute_upload_t(word_offset, info.min_t_mod);
|
||||
int max_t = compute_upload_t(word_offset, info.max_t_mod);
|
||||
|
||||
// If t has a range, then the solution to Y = (t = floor(X * dt / 2048)) * stride + X has a range space of:
|
||||
// Y - t_max * stride <= X <= Y - t_min * stride.
|
||||
int max_word_candidate = word_offset - tmem16_stride * min_t;
|
||||
int min_word_candidate = word_offset - tmem16_stride * max_t;
|
||||
|
||||
// If we have constraints for X, we constraint T further.
|
||||
min_t = max(min_t, (min_word_candidate * info.dxt) >> 16);
|
||||
max_t = min(max_t, (max_word_candidate * info.dxt) >> 16);
|
||||
|
||||
bool found_candidate = false;
|
||||
for (int t = max_t; t >= min_t; t--)
|
||||
{
|
||||
// Check to see if t is a solution to the equation.
|
||||
int candidate_solution = word_offset - tmem16_stride * t;
|
||||
int computed_t = (candidate_solution * info.dxt) >> 16;
|
||||
if (candidate_solution + computed_t * tmem16_stride == word_offset)
|
||||
{
|
||||
found_candidate = true;
|
||||
upload_x_xor = (computed_t & 1) << 1;
|
||||
pixel_offset = (candidate_solution << 2) + (pixel_offset & 3);
|
||||
}
|
||||
}
|
||||
|
||||
// We strided over this 64bpp word.
|
||||
if (!found_candidate)
|
||||
return;
|
||||
}
|
||||
|
||||
upload_x = pixel_offset;
|
||||
upload_y = 0;
|
||||
}
|
||||
else if (tmem16_stride == 0)
|
||||
{
|
||||
// For TMEM stride of 0 we're essentially replaying the same line over and over and the final visible result
|
||||
// is what happened in Y == height - 1.
|
||||
upload_x = pixel_offset;
|
||||
upload_y = info.height - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
upload_y = compute_upload_t(pixel_offset, info.inv_tmem_stride_words);
|
||||
upload_x = pixel_offset - upload_y * tmem16_stride;
|
||||
|
||||
// If stride is smaller than width, we'll need to unroll the last line.
|
||||
if (upload_y >= info.height)
|
||||
{
|
||||
upload_x += tmem16_stride * (upload_y - info.height + 1);
|
||||
upload_y = info.height - 1;
|
||||
}
|
||||
}
|
||||
|
||||
// This is pure bullshit magic which arises as an edge case when
|
||||
// tile pixel size does not match texture image size.
|
||||
// Should not happen in normal applications.
|
||||
// This is basically doing scatter-as-gather, so we need to figure out
|
||||
// if there is no write to our texel after all (striding), or if there are multiple writes
|
||||
// to our texel, in which case we need to figure out the last writer.
|
||||
// This code is black magic, and it's made with blood, sweat and tears from testing with lots of trial and error.
|
||||
int iteration_offset;
|
||||
if (info.tmem_size != info.vram_size)
|
||||
{
|
||||
if (info.vram_size - info.tmem_size == 1)
|
||||
{
|
||||
// If TMEM is N bpp but VRAM is 2N bpp, we will get mirrored writes here.
|
||||
// Select which half of the 2N bpp load we observe in TMEM.
|
||||
iteration_offset = (upload_x & ~3) * 4;
|
||||
if ((upload_x & ~3) + 2 < (info.vram_effective_width >> (3 - info.vram_size)))
|
||||
iteration_offset += 8;
|
||||
}
|
||||
else if (info.tmem_size == 2 && info.vram_size == 1)
|
||||
{
|
||||
// In 8bpp VRAM mode, we are supposed to step 8 pixels at a time (8 bytes), which will form 4 complete pixels.
|
||||
// However, in 16bpp tile mode we're not shifting the X value appropriately.
|
||||
// So, we're writing texels [0, 1, 2, 3, ..., 8, 9, 10, 11], etc.
|
||||
if ((upload_x & 4) != 0)
|
||||
{
|
||||
// We're not writing in this line, but the previous line might have!
|
||||
// Interleaving patterns will form ...
|
||||
if ((tmem16_stride & 4) != 0 && upload_y > 0)
|
||||
{
|
||||
upload_y--;
|
||||
upload_x += tmem16_stride;
|
||||
}
|
||||
else
|
||||
{
|
||||
// These 4 words will never be written to.
|
||||
return;
|
||||
}
|
||||
}
|
||||
iteration_offset = upload_x & ~3;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Normal case TMEM size aligns with VRAM size.
|
||||
iteration_offset = (upload_x & ~3) * 2;
|
||||
}
|
||||
|
||||
if (upload_x >= info.width)
|
||||
return;
|
||||
|
||||
int line_rdram_addr = info.vram_addr + ((upload_y * info.vram_width) << (info.vram_size - 1));
|
||||
upload_x ^= ((upload_y & 1) << 1) | upload_x_xor;
|
||||
|
||||
// The loading pipeline reads 64 bits per iteration.
|
||||
int rdram_addr = line_rdram_addr + iteration_offset + 2 * (upload_x & 3);
|
||||
|
||||
uint word;
|
||||
if ((rdram_addr & 1) == 0)
|
||||
word = uint(vram16.data[(rdram_addr >> 1) ^ 1]);
|
||||
else
|
||||
word = (uint(vram8.data[rdram_addr ^ 3]) << 8) | uint(vram8.data[(rdram_addr + 1) ^ 3]);
|
||||
|
||||
current_tmem_value = word;
|
||||
tmem_dirty = true;
|
||||
}
|
||||
|
||||
void update_tmem_lut(UploadInfo info, int tmem16_index)
|
||||
{
|
||||
int tmem16_offset = (info.tmem_offset & 0xfff) >> 1;
|
||||
int pixel_offset = (tmem16_index - tmem16_offset) & 0x7ff;
|
||||
int pixel_offset_splat;
|
||||
|
||||
if (info.vram_size - info.tmem_size == 2)
|
||||
{
|
||||
pixel_offset_splat = pixel_offset >> 2;
|
||||
pixel_offset_splat <<= info.vram_size - 2;
|
||||
if (pixel_offset_splat >= info.vram_effective_width)
|
||||
return;
|
||||
}
|
||||
else if (info.vram_size - info.tmem_size == 1)
|
||||
{
|
||||
if ((pixel_offset & 4) == 0)
|
||||
{
|
||||
int shamt = info.tmem_size + (info.vram_size == 2 ? 2 : 0);
|
||||
pixel_offset_splat = (pixel_offset & ~7) >> shamt;
|
||||
if (pixel_offset_splat >= info.vram_effective_width)
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (info.vram_size == info.tmem_size)
|
||||
{
|
||||
if ((pixel_offset & 0xc) == 0)
|
||||
{
|
||||
int shamt = info.tmem_size + (info.vram_size == 2 ? 2 : 0);
|
||||
pixel_offset_splat = (pixel_offset & ~3) >> shamt;
|
||||
if (pixel_offset_splat >= info.vram_effective_width)
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (info.vram_size - info.tmem_size == -1)
|
||||
{
|
||||
if ((pixel_offset & 0x1c) == 0)
|
||||
{
|
||||
int shamt = info.tmem_size;
|
||||
pixel_offset_splat = (pixel_offset >> shamt) & ~7;
|
||||
if (pixel_offset_splat >= info.vram_effective_width)
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// 4bpp tile, 32bpp VRAM. Mirrored writes.
|
||||
int span_iteration = pixel_offset >> 2;
|
||||
span_iteration = span_iteration * 2;
|
||||
int span_pixel = span_iteration * 2;
|
||||
if (span_pixel + 2 < info.vram_effective_width)
|
||||
span_pixel += 2;
|
||||
|
||||
if (span_pixel >= info.vram_effective_width)
|
||||
return;
|
||||
|
||||
pixel_offset_splat = span_pixel;
|
||||
}
|
||||
|
||||
int rdram_addr = info.vram_addr + (pixel_offset_splat << (info.vram_size - 1));
|
||||
|
||||
// Odd behavior when we have unaligned TLUT uploads.
|
||||
rdram_addr += 2 * (rdram_addr & 1) * (pixel_offset & 3);
|
||||
|
||||
uint word;
|
||||
if ((rdram_addr & 1) == 0)
|
||||
word = uint(vram16.data[(rdram_addr >> 1) ^ 1]);
|
||||
else
|
||||
word = (uint(vram8.data[rdram_addr ^ 3]) << 8) | uint(vram8.data[(rdram_addr + 1) ^ 3]);
|
||||
|
||||
current_tmem_value = word;
|
||||
tmem_dirty = true;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
tmem_dirty = false;
|
||||
current_tmem_value = uint(tmem16.data[gl_GlobalInvocationID.x]);
|
||||
int tmem16_index = int(gl_GlobalInvocationID.x) ^ 1;
|
||||
bool upper_tmem = tmem16_index >= 0x400;
|
||||
|
||||
tile_instances.instances[0].data[gl_GlobalInvocationID.x] = mem_u16(current_tmem_value);
|
||||
|
||||
int num_uploads = registers.num_uploads;
|
||||
for (int i = 0; i < num_uploads; i++)
|
||||
{
|
||||
UploadInfo info = upload_info[i];
|
||||
if (info.mode == UPLOAD_MODE_TLUT)
|
||||
{
|
||||
update_tmem_lut(info, tmem16_index);
|
||||
}
|
||||
else
|
||||
{
|
||||
bool yuv = info.tmem_fmt == TEXTURE_FMT_YUV;
|
||||
if (info.tmem_size == 3 || yuv)
|
||||
update_tmem_32(info, tmem16_index & 0x3ff, upper_tmem, yuv);
|
||||
else if (info.tmem_fmt != TEXTURE_FMT_YUV)
|
||||
update_tmem_16(info, tmem16_index);
|
||||
}
|
||||
|
||||
tile_instances.instances[i + 1].data[gl_GlobalInvocationID.x] = mem_u16(current_tmem_value);
|
||||
}
|
||||
|
||||
if (tmem_dirty)
|
||||
tmem16.data[gl_GlobalInvocationID.x] = mem_u16(current_tmem_value);
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// RIP to any GPU which attempts to execute this monstrosity :)
|
||||
|
||||
#if SUBGROUP
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#extension GL_KHR_shader_subgroup_vote : require
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
#extension GL_KHR_shader_subgroup_arithmetic : require
|
||||
#endif
|
||||
#include "small_types.h"
|
||||
|
||||
layout(local_size_x_id = 3, local_size_y_id = 4) in;
|
||||
|
||||
#include "debug.h"
|
||||
#include "data_structures_buffers.h"
|
||||
|
||||
#include "noise.h"
|
||||
#include "memory_interfacing.h"
|
||||
#include "shading.h"
|
||||
|
||||
layout(push_constant, std430) uniform Registers
|
||||
{
|
||||
uint fb_addr_index;
|
||||
uint fb_depth_addr_index;
|
||||
uint fb_width;
|
||||
uint fb_height;
|
||||
uint group_mask;
|
||||
} registers;
|
||||
|
||||
layout(constant_id = 5) const int MAX_PRIMITIVES = 256;
|
||||
layout(constant_id = 6) const int MAX_WIDTH = 1024;
|
||||
|
||||
const int TILE_BINNING_STRIDE = MAX_PRIMITIVES / 32;
|
||||
const int MAX_TILES_X = MAX_WIDTH / int(gl_WorkGroupSize.x);
|
||||
|
||||
void main()
|
||||
{
|
||||
int x = int(gl_GlobalInvocationID.x);
|
||||
int y = int(gl_GlobalInvocationID.y);
|
||||
ivec2 tile = ivec2(gl_WorkGroupID.xy);
|
||||
|
||||
int linear_tile = tile.x + tile.y * MAX_TILES_X;
|
||||
int linear_tile_base = linear_tile * TILE_BINNING_STRIDE;
|
||||
|
||||
uint coarse_binned = tile_binning_coarse.elems[linear_tile] & registers.group_mask;
|
||||
if (coarse_binned == 0u)
|
||||
return;
|
||||
|
||||
init_tile(gl_GlobalInvocationID.xy,
|
||||
registers.fb_width, registers.fb_height,
|
||||
registers.fb_addr_index, registers.fb_depth_addr_index);
|
||||
|
||||
while (coarse_binned != 0u)
|
||||
{
|
||||
int mask_index = findLSB(coarse_binned);
|
||||
coarse_binned &= ~uint(1 << mask_index);
|
||||
|
||||
uint binned = tile_binning.elems[linear_tile_base + mask_index];
|
||||
while (binned != 0u)
|
||||
{
|
||||
int i = findLSB(binned);
|
||||
binned &= ~uint(1 << i);
|
||||
uint primitive_index = uint(i + 32 * mask_index);
|
||||
|
||||
ShadedData shaded;
|
||||
if (shade_pixel(x, y, primitive_index, shaded))
|
||||
{
|
||||
if ((shaded.coverage_count & COVERAGE_FILL_BIT) != 0)
|
||||
fill_color(derived_setup.elems[primitive_index].fill_color);
|
||||
else if ((shaded.coverage_count & COVERAGE_COPY_BIT) != 0)
|
||||
copy_pipeline(shaded.z_dith, primitive_index);
|
||||
else
|
||||
depth_blend(x, y, primitive_index, shaded);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
finish_tile(gl_GlobalInvocationID.xy,
|
||||
registers.fb_width, registers.fb_height,
|
||||
registers.fb_addr_index, registers.fb_depth_addr_index);
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "small_types.h"
|
||||
#include "fb_formats.h"
|
||||
|
||||
layout(local_size_x_id = 3) in;
|
||||
|
||||
layout(constant_id = 0) const int RDRAM_SIZE = 8 * 1024 * 1024;
|
||||
const int RDRAM_MASK_8 = RDRAM_SIZE - 1;
|
||||
const int RDRAM_MASK_16 = RDRAM_MASK_8 >> 1;
|
||||
const int RDRAM_MASK_32 = RDRAM_MASK_8 >> 2;
|
||||
layout(constant_id = 1) const int FB_SIZE_LOG2 = 0;
|
||||
layout(constant_id = 2) const bool COLOR_DEPTH_ALIAS = false;
|
||||
layout(constant_id = 4) const int NUM_SAMPLES = 1;
|
||||
|
||||
layout(push_constant) uniform Registers
|
||||
{
|
||||
uint num_pixels, fb_addr, fb_depth_addr;
|
||||
} registers;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer RDRAMSingleSampled8
|
||||
{
|
||||
uint8_t elems[];
|
||||
} vram8;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer RDRAMSingleSampled16
|
||||
{
|
||||
uint16_t elems[];
|
||||
} vram16;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer RDRAMSingleSampled32
|
||||
{
|
||||
uint elems[];
|
||||
} vram32;
|
||||
|
||||
layout(set = 0, binding = 2) buffer RDRAMUpscalingReference8
|
||||
{
|
||||
uint8_t elems[];
|
||||
} vram_reference8;
|
||||
|
||||
layout(set = 0, binding = 2) buffer RDRAMUpscalingReference16
|
||||
{
|
||||
uint16_t elems[];
|
||||
} vram_reference16;
|
||||
|
||||
layout(set = 0, binding = 2) buffer RDRAMUpscalingReference32
|
||||
{
|
||||
uint elems[];
|
||||
} vram_reference32;
|
||||
|
||||
void copy_rdram_8(uint index)
|
||||
{
|
||||
index &= RDRAM_MASK_8;
|
||||
uint real_word = uint(vram8.elems[index]);
|
||||
vram_reference8.elems[index] = uint8_t(real_word);
|
||||
}
|
||||
|
||||
void copy_rdram_16(uint index)
|
||||
{
|
||||
index &= RDRAM_MASK_16;
|
||||
uint real_word = uint(vram16.elems[index]);
|
||||
vram_reference16.elems[index] = uint16_t(real_word);
|
||||
}
|
||||
|
||||
void copy_rdram_32(uint index)
|
||||
{
|
||||
index &= RDRAM_MASK_32;
|
||||
uint real_word = vram32.elems[index];
|
||||
vram_reference32.elems[index] = real_word;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
if (index >= registers.num_pixels)
|
||||
return;
|
||||
|
||||
uint depth_index = index + registers.fb_depth_addr;
|
||||
uint color_index = index + registers.fb_addr;
|
||||
|
||||
switch (FB_SIZE_LOG2)
|
||||
{
|
||||
case 0:
|
||||
copy_rdram_8(color_index);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
copy_rdram_16(color_index);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
copy_rdram_32(color_index);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!COLOR_DEPTH_ALIAS)
|
||||
copy_rdram_16(depth_index);
|
||||
}
|
|
@ -0,0 +1,185 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "small_types.h"
|
||||
|
||||
layout(local_size_x_id = 3) in;
|
||||
|
||||
layout(constant_id = 0) const int RDRAM_SIZE = 8 * 1024 * 1024;
|
||||
const int RDRAM_MASK_8 = RDRAM_SIZE - 1;
|
||||
const int RDRAM_MASK_16 = RDRAM_MASK_8 >> 1;
|
||||
const int RDRAM_MASK_32 = RDRAM_MASK_8 >> 2;
|
||||
layout(constant_id = 1) const int FB_SIZE_LOG2 = 0;
|
||||
layout(constant_id = 2) const bool COLOR_DEPTH_ALIAS = false;
|
||||
layout(constant_id = 4) const int NUM_SAMPLES = 1;
|
||||
|
||||
layout(push_constant) uniform Registers
|
||||
{
|
||||
uint num_pixels, fb_addr, fb_depth_addr;
|
||||
} registers;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer RDRAMSingleSampled8
|
||||
{
|
||||
uint8_t elems[];
|
||||
} vram8;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer RDRAMSingleSampled16
|
||||
{
|
||||
uint16_t elems[];
|
||||
} vram16;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer RDRAMSingleSampled32
|
||||
{
|
||||
uint elems[];
|
||||
} vram32;
|
||||
|
||||
layout(set = 0, binding = 1) readonly buffer RDRAMHiddenSingleSampled
|
||||
{
|
||||
uint8_t elems[];
|
||||
} hidden_vram;
|
||||
|
||||
layout(set = 0, binding = 2) buffer RDRAMUpscalingReference8
|
||||
{
|
||||
uint8_t elems[];
|
||||
} vram_reference8;
|
||||
|
||||
layout(set = 0, binding = 2) buffer RDRAMUpscalingReference16
|
||||
{
|
||||
uint16_t elems[];
|
||||
} vram_reference16;
|
||||
|
||||
layout(set = 0, binding = 2) buffer RDRAMUpscalingReference32
|
||||
{
|
||||
uint elems[];
|
||||
} vram_reference32;
|
||||
|
||||
layout(set = 0, binding = 3) buffer RDRAMUpscaling8
|
||||
{
|
||||
uint8_t elems[];
|
||||
} vram_upscaled8;
|
||||
|
||||
layout(set = 0, binding = 3) buffer RDRAMUpscaling16
|
||||
{
|
||||
uint16_t elems[];
|
||||
} vram_upscaled16;
|
||||
|
||||
layout(set = 0, binding = 3) buffer RDRAMUpscaling32
|
||||
{
|
||||
uint elems[];
|
||||
} vram_upscaled32;
|
||||
|
||||
layout(set = 0, binding = 4) buffer RDRAMHiddenUpscaling
|
||||
{
|
||||
uint8_t elems[];
|
||||
} hidden_vram_upscaled;
|
||||
|
||||
void update_rdram_8(uint index)
|
||||
{
|
||||
index &= RDRAM_MASK_8;
|
||||
|
||||
uint real_word = uint(vram8.elems[index]);
|
||||
uint reference_word = uint(vram_reference8.elems[index]);
|
||||
|
||||
if (real_word != reference_word)
|
||||
{
|
||||
uint mirrored_index = index ^ 3u;
|
||||
uint real_hidden_word = uint(hidden_vram.elems[mirrored_index >> 1u]);
|
||||
for (int i = 0; i < NUM_SAMPLES; i++)
|
||||
{
|
||||
vram_upscaled8.elems[index + i * RDRAM_SIZE] = uint8_t(real_word);
|
||||
if ((mirrored_index & 1u) != 0u)
|
||||
hidden_vram_upscaled.elems[(mirrored_index >> 1u) + i * (RDRAM_SIZE >> 1)] = uint8_t(real_hidden_word);
|
||||
}
|
||||
vram_reference8.elems[index] = uint8_t(real_word);
|
||||
}
|
||||
}
|
||||
|
||||
void update_rdram_16(uint index)
|
||||
{
|
||||
index &= RDRAM_MASK_16;
|
||||
|
||||
uint real_word = uint(vram16.elems[index]);
|
||||
uint reference_word = uint(vram_reference16.elems[index]);
|
||||
|
||||
if (real_word != reference_word)
|
||||
{
|
||||
uint mirrored_index = index ^ 1u;
|
||||
uint real_hidden_word = uint(hidden_vram.elems[mirrored_index]);
|
||||
for (int i = 0; i < NUM_SAMPLES; i++)
|
||||
{
|
||||
vram_upscaled16.elems[index + i * (RDRAM_SIZE >> 1)] = uint16_t(real_word);
|
||||
hidden_vram_upscaled.elems[mirrored_index + i * (RDRAM_SIZE >> 1)] = uint8_t(real_hidden_word);
|
||||
}
|
||||
vram_reference16.elems[index] = uint16_t(real_word);
|
||||
}
|
||||
}
|
||||
|
||||
void update_rdram_32(uint index)
|
||||
{
|
||||
index &= RDRAM_MASK_32;
|
||||
|
||||
uint real_word = vram32.elems[index];
|
||||
uint reference_word = vram_reference32.elems[index];
|
||||
|
||||
if (real_word != reference_word)
|
||||
{
|
||||
uint real_hidden_word0 = uint(hidden_vram.elems[2u * index]);
|
||||
uint real_hidden_word1 = uint(hidden_vram.elems[2u * index + 1u]);
|
||||
|
||||
for (int i = 0; i < NUM_SAMPLES; i++)
|
||||
{
|
||||
vram_upscaled32.elems[index + i * (RDRAM_SIZE >> 2)] = real_word;
|
||||
hidden_vram_upscaled.elems[2u * index + i * (RDRAM_SIZE >> 1)] = uint8_t(real_hidden_word0);
|
||||
hidden_vram_upscaled.elems[2u * index + 1u + i * (RDRAM_SIZE >> 1)] = uint8_t(real_hidden_word1);
|
||||
}
|
||||
vram_reference32.elems[index] = real_word;
|
||||
}
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
if (index >= registers.num_pixels)
|
||||
return;
|
||||
|
||||
uint depth_index = index + registers.fb_depth_addr;
|
||||
uint color_index = index + registers.fb_addr;
|
||||
|
||||
switch (FB_SIZE_LOG2)
|
||||
{
|
||||
case 0:
|
||||
update_rdram_8(color_index);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
update_rdram_16(color_index);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
update_rdram_32(color_index);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!COLOR_DEPTH_ALIAS)
|
||||
update_rdram_16(depth_index);
|
||||
}
|
|
@ -0,0 +1,279 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "small_types.h"
|
||||
#include "fb_formats.h"
|
||||
|
||||
layout(local_size_x_id = 3) in;
|
||||
|
||||
layout(constant_id = 0) const int RDRAM_SIZE = 8 * 1024 * 1024;
|
||||
const int RDRAM_MASK_8 = RDRAM_SIZE - 1;
|
||||
const int RDRAM_MASK_16 = RDRAM_MASK_8 >> 1;
|
||||
const int RDRAM_MASK_32 = RDRAM_MASK_8 >> 2;
|
||||
layout(constant_id = 1) const int FB_SIZE_LOG2 = 0;
|
||||
layout(constant_id = 2) const bool COLOR_DEPTH_ALIAS = false;
|
||||
layout(constant_id = 4) const int NUM_SAMPLES = 1;
|
||||
layout(constant_id = 5) const bool DITHER = false;
|
||||
layout(constant_id = 6) const bool RDRAM_UNSCALED_WRITE_MASK = false;
|
||||
|
||||
layout(push_constant) uniform Registers
|
||||
{
|
||||
uint num_pixels, fb_addr, fb_depth_addr, width, height;
|
||||
} registers;
|
||||
|
||||
layout(set = 0, binding = 0) writeonly buffer RDRAMSingleSampled8
|
||||
{
|
||||
uint8_t elems[];
|
||||
} vram8;
|
||||
|
||||
layout(set = 0, binding = 0) writeonly buffer RDRAMSingleSampled16
|
||||
{
|
||||
uint16_t elems[];
|
||||
} vram16;
|
||||
|
||||
layout(set = 0, binding = 0) writeonly buffer RDRAMSingleSampled32
|
||||
{
|
||||
uint elems[];
|
||||
} vram32;
|
||||
|
||||
layout(set = 0, binding = 2) writeonly buffer RDRAMUpscalingReference8
|
||||
{
|
||||
uint8_t elems[];
|
||||
} vram_reference8;
|
||||
|
||||
layout(set = 0, binding = 2) writeonly buffer RDRAMUpscalingReference16
|
||||
{
|
||||
uint16_t elems[];
|
||||
} vram_reference16;
|
||||
|
||||
layout(set = 0, binding = 2) writeonly buffer RDRAMUpscalingReference32
|
||||
{
|
||||
uint elems[];
|
||||
} vram_reference32;
|
||||
|
||||
layout(set = 0, binding = 3) readonly buffer RDRAMUpscaling8
|
||||
{
|
||||
uint8_t elems[];
|
||||
} vram_upscaled8;
|
||||
|
||||
layout(set = 0, binding = 3) readonly buffer RDRAMUpscaling16
|
||||
{
|
||||
uint16_t elems[];
|
||||
} vram_upscaled16;
|
||||
|
||||
layout(set = 0, binding = 3) readonly buffer RDRAMUpscaling32
|
||||
{
|
||||
uint elems[];
|
||||
} vram_upscaled32;
|
||||
|
||||
layout(set = 0, binding = 4) readonly buffer RDRAMHiddenUpscaling
|
||||
{
|
||||
uint8_t elems[];
|
||||
} hidden_vram_upscaled;
|
||||
|
||||
void copy_rdram_8(uint index)
|
||||
{
|
||||
index &= RDRAM_MASK_8;
|
||||
index ^= 3u;
|
||||
|
||||
uint r = 0u;
|
||||
for (int i = 0; i < NUM_SAMPLES; i++)
|
||||
{
|
||||
uint real_word = uint(vram_upscaled8.elems[index]);
|
||||
r += real_word;
|
||||
}
|
||||
|
||||
r = (r + (NUM_SAMPLES >> 1)) / NUM_SAMPLES;
|
||||
vram_reference8.elems[index] = uint8_t(r);
|
||||
vram8.elems[index] = uint8_t(r);
|
||||
|
||||
if (RDRAM_UNSCALED_WRITE_MASK)
|
||||
{
|
||||
// Need this memory barrier to ensure the mask readback does not read
|
||||
// an invalid value from RDRAM. If the mask is seen, the valid RDRAM value is
|
||||
// also coherent.
|
||||
memoryBarrierBuffer();
|
||||
vram8.elems[index + RDRAM_SIZE] = mem_u8(0xff);
|
||||
}
|
||||
|
||||
// Don't bother writing back hidden VRAM. It is not visible to host anyways, and coverage is meaningless when it's filtered.
|
||||
// If host later decides to modify the CPU memory, then the hidden VRAM values become complete bogus either way.
|
||||
}
|
||||
|
||||
uvec4 decode_rgba5551(uint word)
|
||||
{
|
||||
return (uvec4(word) >> uvec4(11, 6, 1, 0)) & uvec4(0x1f, 0x1f, 0x1f, 1);
|
||||
}
|
||||
|
||||
uint encode_rgba5551(uvec4 color)
|
||||
{
|
||||
return (color.r << 11u) | (color.g << 6u) | (color.b << 1u) | color.a;
|
||||
}
|
||||
|
||||
const uint bayer_dither_lut[16] = uint[](
|
||||
0, 4, 1, 5,
|
||||
4, 0, 5, 1,
|
||||
3, 7, 2, 6,
|
||||
7, 3, 6, 2);
|
||||
|
||||
void copy_rdram_16(uint index, uint x, uint y)
|
||||
{
|
||||
index &= RDRAM_MASK_16;
|
||||
index ^= 1u;
|
||||
|
||||
uvec4 rgba = uvec4(0u);
|
||||
for (int i = 0; i < NUM_SAMPLES; i++)
|
||||
{
|
||||
uint real_word = uint(vram_upscaled16.elems[index + i * (RDRAM_SIZE >> 1)]);
|
||||
rgba += decode_rgba5551(real_word);
|
||||
}
|
||||
|
||||
if (DITHER)
|
||||
{
|
||||
uint dither_value = bayer_dither_lut[(y & 3u) * 4u + (x & 3u)] * NUM_SAMPLES;
|
||||
rgba = (8u * rgba + dither_value) / (8 * NUM_SAMPLES);
|
||||
}
|
||||
else
|
||||
{
|
||||
rgba = (rgba + (NUM_SAMPLES >> 1)) / NUM_SAMPLES;
|
||||
}
|
||||
|
||||
uint encoded = encode_rgba5551(rgba);
|
||||
vram16.elems[index] = uint16_t(encoded);
|
||||
vram_reference16.elems[index] = uint16_t(encoded);
|
||||
|
||||
if (RDRAM_UNSCALED_WRITE_MASK)
|
||||
{
|
||||
// Need this memory barrier to ensure the mask readback does not read
|
||||
// an invalid value from RDRAM. If the mask is seen, the valid RDRAM value is
|
||||
// also coherent.
|
||||
memoryBarrierBuffer();
|
||||
vram16.elems[index + (RDRAM_SIZE >> 1u)] = mem_u16(0xffff);
|
||||
}
|
||||
|
||||
// Don't bother writing back hidden VRAM. It is not visible to host anyways, and coverage is meaningless when it's filtered.
|
||||
// If host later decides to modify the CPU memory, then the hidden VRAM values become complete bogus either way.
|
||||
}
|
||||
|
||||
void copy_rdram_16_single_sample(uint index)
|
||||
{
|
||||
// Copies the first sample. We cannot meaningfully filter depth samples.
|
||||
// The first sample should overlap exactly with the single-sampled version.
|
||||
// Coverage clipping might slightly change the result, but shouldn't be different enough to break things.
|
||||
index &= RDRAM_MASK_16;
|
||||
index ^= 1u;
|
||||
uint upscaled_word = uint(vram_upscaled16.elems[index]);
|
||||
vram16.elems[index] = uint16_t(upscaled_word);
|
||||
vram_reference16.elems[index] = uint16_t(upscaled_word);
|
||||
|
||||
if (RDRAM_UNSCALED_WRITE_MASK)
|
||||
{
|
||||
// Need this memory barrier to ensure the mask readback does not read
|
||||
// an invalid value from RDRAM. If the mask is seen, the valid RDRAM value is
|
||||
// also coherent.
|
||||
memoryBarrierBuffer();
|
||||
vram16.elems[index + (RDRAM_SIZE >> 1u)] = mem_u16(0xffff);
|
||||
}
|
||||
|
||||
// Don't bother writing back hidden VRAM. It is not visible to host anyways, and coverage is meaningless when it's filtered.
|
||||
// If host later decides to modify the CPU memory, then the hidden VRAM values become complete bogus either way.
|
||||
}
|
||||
|
||||
uvec4 decode_rgba8(uint word)
|
||||
{
|
||||
return (uvec4(word) >> uvec4(24, 16, 8, 0)) & uvec4(0xff);
|
||||
}
|
||||
|
||||
uint encode_rgba8(uvec4 color)
|
||||
{
|
||||
return (color.r << 24u) | (color.g << 16u) | (color.b << 8u) | (color.a << 0u);
|
||||
}
|
||||
|
||||
void copy_rdram_32(uint index)
|
||||
{
|
||||
index &= RDRAM_MASK_32;
|
||||
|
||||
uvec4 rgba = uvec4(0u);
|
||||
for (int i = 0; i < NUM_SAMPLES; i++)
|
||||
{
|
||||
uint real_word = vram_upscaled32.elems[index + i * (RDRAM_SIZE >> 2)];
|
||||
rgba += decode_rgba8(real_word);
|
||||
}
|
||||
|
||||
rgba = (rgba + (NUM_SAMPLES >> 1)) / NUM_SAMPLES;
|
||||
uint encoded = encode_rgba8(rgba);
|
||||
vram32.elems[index] = encoded;
|
||||
vram_reference32.elems[index] = encoded;
|
||||
|
||||
if (RDRAM_UNSCALED_WRITE_MASK)
|
||||
{
|
||||
// Need this memory barrier to ensure the mask readback does not read
|
||||
// an invalid value from RDRAM. If the mask is seen, the valid RDRAM value is
|
||||
// also coherent.
|
||||
memoryBarrierBuffer();
|
||||
vram32.elems[index + (RDRAM_SIZE >> 2u)] = ~0u;
|
||||
}
|
||||
|
||||
// Don't bother writing back hidden VRAM. It is not visible to host anyways, and coverage is meaningless when it's filtered.
|
||||
// If host later decides to modify the CPU memory, then the hidden VRAM values become complete bogus either way.
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
uvec2 coord = gl_GlobalInvocationID.xy;
|
||||
if (coord.x >= registers.width)
|
||||
return;
|
||||
|
||||
uint index = coord.y * registers.width + coord.x;
|
||||
uint depth_index = index + registers.fb_depth_addr;
|
||||
uint color_index = index + registers.fb_addr;
|
||||
|
||||
uvec2 mask_coord = coord >> 2u;
|
||||
uint mask_index = mask_coord.x + mask_coord.y * ((registers.width + 3) >> 2u);
|
||||
uint write_mask = vram_upscaled32.elems[NUM_SAMPLES * (RDRAM_SIZE >> 2) + mask_index];
|
||||
uint shamt = 2u * ((coord.x & 3u) + 4u * (coord.y & 3u));
|
||||
write_mask = write_mask >> shamt;
|
||||
bool color_write_mask = (write_mask & 1u) != 0u;
|
||||
bool depth_write_mask = (write_mask & 2u) != 0u;
|
||||
|
||||
if (color_write_mask)
|
||||
{
|
||||
switch (FB_SIZE_LOG2)
|
||||
{
|
||||
case 0:
|
||||
copy_rdram_8(color_index);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
copy_rdram_16(color_index, coord.x, coord.y);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
copy_rdram_32(color_index);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!COLOR_DEPTH_ALIAS && depth_write_mask)
|
||||
copy_rdram_16_single_sample(depth_index);
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(set = 0, binding = 0) uniform texture2D uImage;
|
||||
|
||||
void main()
|
||||
{
|
||||
// A persistent pixel does not propagate more than one frame.
|
||||
vec4 input_pixel = texelFetch(uImage, ivec2(gl_FragCoord.xy), 0);
|
||||
FragColor = vec4(input_pixel.rgb * input_pixel.a, 0.0);
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef VI_DEBUG_H_
|
||||
#define VI_DEBUG_H_
|
||||
|
||||
#if defined(DEBUG_ENABLE) && DEBUG_ENABLE
|
||||
#include "debug_channel.h"
|
||||
|
||||
void GENERIC_MESSAGE_(int line)
|
||||
{
|
||||
add_debug_message(0, uvec3(gl_FragCoord.xy, 0), line);
|
||||
}
|
||||
|
||||
void GENERIC_MESSAGE_(int line, uint v)
|
||||
{
|
||||
add_debug_message(0, uvec3(gl_FragCoord.xy, 0), uvec2(line, v));
|
||||
}
|
||||
|
||||
void GENERIC_MESSAGE_(int line, uvec2 v)
|
||||
{
|
||||
add_debug_message(0, uvec3(gl_FragCoord.xy, 0), uvec3(line, v));
|
||||
}
|
||||
|
||||
void GENERIC_MESSAGE_(int line, uvec3 v)
|
||||
{
|
||||
add_debug_message(0, uvec3(gl_FragCoord.xy, 0), uvec4(line, v));
|
||||
}
|
||||
|
||||
#define GENERIC_MESSAGE0() GENERIC_MESSAGE_(__LINE__)
|
||||
#define GENERIC_MESSAGE1(a) GENERIC_MESSAGE_(__LINE__, a)
|
||||
#define GENERIC_MESSAGE2(a, b) GENERIC_MESSAGE_(__LINE__, uvec2(a, b))
|
||||
#define GENERIC_MESSAGE3(a, b, c) GENERIC_MESSAGE_(__LINE__, uvec3(a, b, c))
|
||||
#else
|
||||
#define GENERIC_MESSAGE0()
|
||||
#define GENERIC_MESSAGE1(a)
|
||||
#define GENERIC_MESSAGE2(a, b)
|
||||
#define GENERIC_MESSAGE3(a, b, c)
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,31 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
layout(location = 0) in vec2 vUV;
|
||||
layout(set = 0, binding = 0) uniform sampler2D uSampler;
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
|
||||
void main()
|
||||
{
|
||||
FragColor = textureLod(uSampler, vUV, 0.0);
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
layout(location = 0) out vec2 vUV;
|
||||
|
||||
layout(push_constant) uniform UBO
|
||||
{
|
||||
float y_offset;
|
||||
} registers;
|
||||
|
||||
void main()
|
||||
{
|
||||
if (gl_VertexIndex == 0)
|
||||
gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);
|
||||
else if (gl_VertexIndex == 1)
|
||||
gl_Position = vec4(-1.0, +3.0, 0.0, 1.0);
|
||||
else
|
||||
gl_Position = vec4(+3.0, -1.0, 0.0, 1.0);
|
||||
|
||||
vUV = vec2(gl_Position.x * 0.5 + 0.5, gl_Position.y * 0.5 + 0.5 + registers.y_offset);
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
#include "vi_debug.h"
|
||||
|
||||
layout(location = 0) out uvec4 FragColor;
|
||||
#if defined(FETCH_BUG) && FETCH_BUG
|
||||
layout(location = 1) out uvec4 FragColorFetchBug;
|
||||
#endif
|
||||
|
||||
layout(set = 0, binding = 0) uniform mediump utexture2DArray uFetchCache;
|
||||
|
||||
void swap(inout uint a, inout uint b)
|
||||
{
|
||||
uint tmp = a;
|
||||
a = b;
|
||||
b = tmp;
|
||||
}
|
||||
|
||||
uint median3(uint left, uint center, uint right)
|
||||
{
|
||||
if (left < center)
|
||||
swap(left, center);
|
||||
if (center < right)
|
||||
swap(center, right);
|
||||
if (left < center)
|
||||
swap(left, center);
|
||||
|
||||
return center;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec2 pix = ivec2(gl_FragCoord.xy);
|
||||
|
||||
uvec4 left = texelFetch(uFetchCache, ivec3(pix, 0), 0);
|
||||
uvec4 mid = texelFetchOffset(uFetchCache, ivec3(pix, 0), 0, ivec2(1, 0));
|
||||
uvec4 right = texelFetchOffset(uFetchCache, ivec3(pix, 0), 0, ivec2(2, 0));
|
||||
|
||||
if ((left.a & mid.a & right.a) == 7u)
|
||||
{
|
||||
FragColor = mid;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Median filter. TODO: Optimize with mid3?
|
||||
uint r = median3(left.r, mid.r, right.r);
|
||||
uint g = median3(left.g, mid.g, right.g);
|
||||
uint b = median3(left.b, mid.b, right.b);
|
||||
FragColor = uvec4(r, g, b, mid.a);
|
||||
}
|
||||
|
||||
#if defined(FETCH_BUG) && FETCH_BUG
|
||||
left = texelFetch(uFetchCache, ivec3(pix, 1), 0);
|
||||
mid = texelFetchOffset(uFetchCache, ivec3(pix, 1), 0, ivec2(1, 0));
|
||||
right = texelFetchOffset(uFetchCache, ivec3(pix, 1), 0, ivec2(2, 0));
|
||||
|
||||
if ((left.a & mid.a & right.a) == 7u)
|
||||
{
|
||||
FragColorFetchBug = mid;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Median filter. TODO: Optimize with mid3?
|
||||
uint r = median3(left.r, mid.r, right.r);
|
||||
uint g = median3(left.g, mid.g, right.g);
|
||||
uint b = median3(left.b, mid.b, right.b);
|
||||
FragColorFetchBug = uvec4(r, g, b, mid.a);
|
||||
}
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,164 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
#include "small_types.h"
|
||||
#include "vi_status.h"
|
||||
#include "vi_debug.h"
|
||||
|
||||
layout(set = 0, binding = 0) uniform mediump utexture2D uAAInput;
|
||||
|
||||
layout(location = 0) out uvec4 FragColor;
|
||||
#if defined(FETCH_BUG) && FETCH_BUG
|
||||
layout(location = 1) out uvec4 FragColorFetchBug;
|
||||
#endif
|
||||
|
||||
layout(push_constant) uniform Registers
|
||||
{
|
||||
ivec2 offset;
|
||||
} registers;
|
||||
|
||||
ivec2 pix;
|
||||
uvec4 fetch_color_offset(ivec2 offset)
|
||||
{
|
||||
return texelFetch(uAAInput, pix + offset, 0);
|
||||
}
|
||||
|
||||
void check_neighbor(uvec4 candidate,
|
||||
inout uvec3 lo, inout uvec3 hi,
|
||||
inout uvec3 second_lo, inout uvec3 second_hi)
|
||||
{
|
||||
if (candidate.a == 7u)
|
||||
{
|
||||
second_lo = min(second_lo, max(candidate.rgb, lo));
|
||||
second_hi = max(second_hi, min(candidate.rgb, hi));
|
||||
|
||||
lo = min(candidate.rgb, lo);
|
||||
hi = max(candidate.rgb, hi);
|
||||
}
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
pix = ivec2(gl_FragCoord.xy) + registers.offset;
|
||||
|
||||
uvec4 mid_pixel = fetch_color_offset(ivec2(0));
|
||||
|
||||
// AA-filter. If coverage is not full, we blend current pixel against background.
|
||||
uvec3 color;
|
||||
#if defined(FETCH_BUG) && FETCH_BUG
|
||||
uvec3 color_bug;
|
||||
#endif
|
||||
|
||||
if (mid_pixel.a != 7u)
|
||||
{
|
||||
uvec3 lo = mid_pixel.rgb;
|
||||
uvec3 hi = lo;
|
||||
uvec3 second_lo = lo;
|
||||
uvec3 second_hi = lo;
|
||||
|
||||
// Somehow, we're supposed to find the second lowest and second highest neighbor.
|
||||
uvec4 left_up = fetch_color_offset(ivec2(-1, -1));
|
||||
uvec4 right_up = fetch_color_offset(ivec2(+1, -1));
|
||||
uvec4 to_left = fetch_color_offset(ivec2(-2, 0));
|
||||
uvec4 to_right = fetch_color_offset(ivec2(+2, 0));
|
||||
uvec4 left_down = fetch_color_offset(ivec2(-1, +1));
|
||||
uvec4 right_down = fetch_color_offset(ivec2(+1, +1));
|
||||
|
||||
check_neighbor(left_up, lo, hi, second_lo, second_hi);
|
||||
check_neighbor(right_up, lo, hi, second_lo, second_hi);
|
||||
check_neighbor(to_left, lo, hi, second_lo, second_hi);
|
||||
check_neighbor(to_right, lo, hi, second_lo, second_hi);
|
||||
|
||||
#if defined(FETCH_BUG) && FETCH_BUG
|
||||
// In the fetch-bug state, we apparently do not read the lower values.
|
||||
// Instead, the lower values are treated as left and right.
|
||||
uvec3 lo_bug = lo;
|
||||
uvec3 hi_bug = hi;
|
||||
uvec3 second_lo_bug = second_lo;
|
||||
uvec3 second_hi_bug = second_hi;
|
||||
#endif
|
||||
|
||||
check_neighbor(left_down, lo, hi, second_lo, second_hi);
|
||||
check_neighbor(right_down, lo, hi, second_lo, second_hi);
|
||||
#if defined(FETCH_BUG) && FETCH_BUG
|
||||
check_neighbor(to_left, lo_bug, hi_bug, second_lo_bug, second_hi_bug);
|
||||
check_neighbor(to_right, lo_bug, hi_bug, second_lo_bug, second_hi_bug);
|
||||
second_lo = mix(second_lo, lo, equal(mid_pixel.rgb, lo));
|
||||
second_hi = mix(second_hi, hi, equal(mid_pixel.rgb, hi));
|
||||
second_lo_bug = mix(second_lo_bug, lo_bug, equal(mid_pixel.rgb, lo_bug));
|
||||
second_hi_bug = mix(second_hi_bug, hi_bug, equal(mid_pixel.rgb, hi_bug));
|
||||
#endif
|
||||
|
||||
uvec3 offset = second_lo + second_hi - (mid_pixel.rgb << 1u);
|
||||
uint coeff = 7u - mid_pixel.a;
|
||||
color = mid_pixel.rgb + (((offset * coeff) + 4u) >> 3u);
|
||||
color &= 0xffu;
|
||||
|
||||
#if defined(FETCH_BUG) && FETCH_BUG
|
||||
uvec3 offset_bug = second_lo_bug + second_hi_bug - (mid_pixel.rgb << 1u);
|
||||
color_bug = mid_pixel.rgb + (((offset_bug * coeff) + 4u) >> 3u);
|
||||
color_bug &= 0xffu;
|
||||
#endif
|
||||
}
|
||||
else if (DITHER_ENABLE)
|
||||
{
|
||||
// Dither filter.
|
||||
ivec3 tmp_color = ivec3(mid_pixel.rgb >> 3u);
|
||||
ivec3 tmp_accum = ivec3(0);
|
||||
for (int y = -1; y <= 0; y++)
|
||||
{
|
||||
for (int x = -1; x <= 1; x++)
|
||||
{
|
||||
ivec3 col = ivec3(fetch_color_offset(ivec2(x, y)).rgb >> 3u);
|
||||
tmp_accum += clamp(col - tmp_color, ivec3(-1), ivec3(1));
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(FETCH_BUG) && FETCH_BUG
|
||||
ivec3 tmp_accum_bug = tmp_accum;
|
||||
#endif
|
||||
|
||||
tmp_accum += clamp(ivec3(fetch_color_offset(ivec2(-1, 1)).rgb >> 3u) - tmp_color, ivec3(-1), ivec3(1));
|
||||
tmp_accum += clamp(ivec3(fetch_color_offset(ivec2(+1, 1)).rgb >> 3u) - tmp_color, ivec3(-1), ivec3(1));
|
||||
tmp_accum += clamp(ivec3(fetch_color_offset(ivec2(0, 1)).rgb >> 3u) - tmp_color, ivec3(-1), ivec3(1));
|
||||
color = (mid_pixel.rgb & 0xf8u) + tmp_accum;
|
||||
|
||||
#if defined(FETCH_BUG) && FETCH_BUG
|
||||
tmp_accum_bug += clamp(ivec3(fetch_color_offset(ivec2(-1, 0)).rgb >> 3u) - tmp_color, ivec3(-1), ivec3(1));
|
||||
tmp_accum_bug += clamp(ivec3(fetch_color_offset(ivec2(+1, 0)).rgb >> 3u) - tmp_color, ivec3(-1), ivec3(1));
|
||||
color_bug = (mid_pixel.rgb & 0xf8u) + tmp_accum_bug;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
color = mid_pixel.rgb;
|
||||
#if defined(FETCH_BUG) && FETCH_BUG
|
||||
color_bug = mid_pixel.rgb;
|
||||
#endif
|
||||
}
|
||||
|
||||
FragColor = uvec4(color, mid_pixel.a);
|
||||
#if defined(FETCH_BUG) && FETCH_BUG
|
||||
FragColorFetchBug = uvec4(color_bug, mid_pixel.a);
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,127 @@
|
|||
#version 450
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
|
||||
#include "small_types.h"
|
||||
#include "vi_status.h"
|
||||
#include "vi_debug.h"
|
||||
#include "noise.h"
|
||||
|
||||
layout(set = 0, binding = 0) uniform mediump utexture2DArray uDivotOutput;
|
||||
layout(set = 1, binding = 0) uniform mediump utextureBuffer uGammaTable;
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
|
||||
layout(push_constant, std430) uniform Registers
|
||||
{
|
||||
int x_base;
|
||||
int y_base;
|
||||
int h_offset;
|
||||
int v_offset;
|
||||
int x_add;
|
||||
int y_add;
|
||||
int frame_count;
|
||||
|
||||
int serrate_shift;
|
||||
int serrate_mask;
|
||||
int serrate_select;
|
||||
} registers;
|
||||
|
||||
uvec3 vi_lerp(uvec3 a, uvec3 b, uint l)
|
||||
{
|
||||
return (a + (((b - a) * l + 16u) >> 5u)) & 0xffu;
|
||||
}
|
||||
|
||||
uvec3 integer_gamma(uvec3 color)
|
||||
{
|
||||
uvec3 res;
|
||||
if (GAMMA_DITHER)
|
||||
{
|
||||
color = (color << 6) + noise_get_full_gamma_dither() + 256u;
|
||||
res = uvec3(
|
||||
texelFetch(uGammaTable, int(color.r)).r,
|
||||
texelFetch(uGammaTable, int(color.g)).r,
|
||||
texelFetch(uGammaTable, int(color.b)).r);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = uvec3(
|
||||
texelFetch(uGammaTable, int(color.r)).r,
|
||||
texelFetch(uGammaTable, int(color.g)).r,
|
||||
texelFetch(uGammaTable, int(color.b)).r);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
layout(constant_id = 2) const bool FETCH_BUG = false;
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec2 coord = ivec2(gl_FragCoord.xy) + ivec2(registers.h_offset, registers.v_offset);
|
||||
|
||||
if ((coord.y & registers.serrate_mask) != registers.serrate_select)
|
||||
discard;
|
||||
coord.y >>= registers.serrate_shift;
|
||||
|
||||
if (GAMMA_DITHER)
|
||||
reseed_noise(coord.x, coord.y, registers.frame_count);
|
||||
|
||||
int x = coord.x * registers.x_add + registers.x_base;
|
||||
int y = coord.y * registers.y_add + registers.y_base;
|
||||
ivec2 base_coord = ivec2(x, y) >> 10;
|
||||
uvec3 c00 = texelFetch(uDivotOutput, ivec3(base_coord, 0), 0).rgb;
|
||||
|
||||
int bug_offset = 0;
|
||||
if (FETCH_BUG)
|
||||
{
|
||||
// This is super awkward.
|
||||
// Basically there seems to be some kind of issue where if we interpolate in Y,
|
||||
// we're going to get buggy output.
|
||||
// If we hit this case, the next line we filter against will come from the "buggy" array slice.
|
||||
// Why this makes sense, I have no idea.
|
||||
int prev_y = (y - registers.y_add) >> 10;
|
||||
int next_y = (y + registers.y_add) >> 10;
|
||||
if (coord.y != 0 && base_coord.y == prev_y && base_coord.y != next_y)
|
||||
bug_offset = 1;
|
||||
}
|
||||
|
||||
if (SCALE_AA)
|
||||
{
|
||||
int x_frac = (x >> 5) & 31;
|
||||
int y_frac = (y >> 5) & 31;
|
||||
|
||||
uvec3 c10 = texelFetchOffset(uDivotOutput, ivec3(base_coord, 0), 0, ivec2(1, 0)).rgb;
|
||||
uvec3 c01 = texelFetchOffset(uDivotOutput, ivec3(base_coord, bug_offset), 0, ivec2(0, 1)).rgb;
|
||||
uvec3 c11 = texelFetchOffset(uDivotOutput, ivec3(base_coord, bug_offset), 0, ivec2(1)).rgb;
|
||||
|
||||
c00 = vi_lerp(c00, c01, y_frac);
|
||||
c10 = vi_lerp(c10, c11, y_frac);
|
||||
c00 = vi_lerp(c00, c10, x_frac);
|
||||
}
|
||||
|
||||
if (GAMMA_ENABLE)
|
||||
c00 = integer_gamma(c00);
|
||||
else if (GAMMA_DITHER)
|
||||
c00 = min(c00 + noise_get_partial_gamma_dither(), uvec3(0xff));
|
||||
|
||||
FragColor = vec4(vec3(c00) / 255.0, 1.0);
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef VI_STATUS_H_
|
||||
#define VI_STATUS_H_
|
||||
|
||||
layout(constant_id = 1) const int VI_STATUS = 0;
|
||||
const int VI_CONTROL_TYPE_BLANK_BIT = 0 << 0;
|
||||
const int VI_CONTROL_TYPE_RESERVED_BIT = 1 << 0;
|
||||
const int VI_CONTROL_TYPE_RGBA5551_BIT = 2 << 0;
|
||||
const int VI_CONTROL_TYPE_RGBA8888_BIT = 3 << 0;
|
||||
const int VI_CONTROL_TYPE_MASK = 3 << 0;
|
||||
const int VI_CONTROL_GAMMA_DITHER_ENABLE_BIT = 1 << 2;
|
||||
const int VI_CONTROL_GAMMA_ENABLE_BIT = 1 << 3;
|
||||
const int VI_CONTROL_DIVOT_ENABLE_BIT = 1 << 4;
|
||||
const int VI_CONTROL_SERRATE_BIT = 1 << 6;
|
||||
const int VI_CONTROL_DITHER_FILTER_ENABLE_BIT = 1 << 16;
|
||||
const int VI_CONTROL_META_AA_BIT = 1 << 17;
|
||||
const int VI_CONTROL_META_SCALE_BIT = 1 << 18;
|
||||
|
||||
const bool FMT_RGBA5551 = (VI_STATUS & VI_CONTROL_TYPE_MASK) == VI_CONTROL_TYPE_RGBA5551_BIT;
|
||||
const bool FMT_RGBA8888 = (VI_STATUS & VI_CONTROL_TYPE_MASK) == VI_CONTROL_TYPE_RGBA8888_BIT;
|
||||
const bool DITHER_ENABLE = (VI_STATUS & VI_CONTROL_DITHER_FILTER_ENABLE_BIT) != 0;
|
||||
const bool FETCH_AA = (VI_STATUS & VI_CONTROL_META_AA_BIT) != 0;
|
||||
const bool SCALE_AA = (VI_STATUS & VI_CONTROL_META_SCALE_BIT) != 0;
|
||||
const bool GAMMA_ENABLE = (VI_STATUS & VI_CONTROL_GAMMA_ENABLE_BIT) != 0;
|
||||
const bool GAMMA_DITHER = (VI_STATUS & VI_CONTROL_GAMMA_DITHER_ENABLE_BIT) != 0;
|
||||
|
||||
#endif
|
|
@ -0,0 +1,58 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef Z_ENCODE_H_
|
||||
#define Z_ENCODE_H_
|
||||
|
||||
// The Z compression is kind of clever, and uses inverted FP, with more precision close to 1.
|
||||
// The compressed Z result is 14 bits, and decompresses to 18-bit UNORM.
|
||||
int z_decompress(u16 z_)
|
||||
{
|
||||
int z = int(z_);
|
||||
int exponent = z >> 11;
|
||||
int mantissa = z & 0x7ff;
|
||||
int shift = max(6 - exponent, 0);
|
||||
int base = 0x40000 - (0x40000 >> exponent);
|
||||
return (mantissa << shift) + base;
|
||||
}
|
||||
|
||||
u16 z_compress(int z)
|
||||
{
|
||||
int inv_z = max(0x3ffff - z, 1);
|
||||
int exponent = 17 - findMSB(inv_z);
|
||||
exponent = clamp(exponent, 0, 7);
|
||||
int shift = max(6 - exponent, 0);
|
||||
int mantissa = (z >> shift) & 0x7ff;
|
||||
return u16((exponent << 11) + mantissa);
|
||||
}
|
||||
|
||||
int dz_decompress(int dz)
|
||||
{
|
||||
return 1 << dz;
|
||||
}
|
||||
|
||||
int dz_compress(int dz)
|
||||
{
|
||||
return max(findMSB(dz), 0);
|
||||
}
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,158 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include "device.hpp"
|
||||
#include "rdp_common.hpp"
|
||||
|
||||
namespace RDP
|
||||
{
|
||||
struct ScanoutOptions
|
||||
{
|
||||
unsigned crop_overscan_pixels = 0;
|
||||
unsigned downscale_steps = 0;
|
||||
|
||||
// Works around certain game bugs. Considered a hack if enabled.
|
||||
bool persist_frame_on_invalid_input = false;
|
||||
|
||||
// To be equivalent to reference behavior where
|
||||
// pixels persist for an extra frame.
|
||||
// Not hardware accurate, but needed for weave interlace mode.
|
||||
bool blend_previous_frame = false;
|
||||
|
||||
// Upscale deinterlacing deinterlaces by upscaling in Y, with an Y coordinate offset matching the field.
|
||||
// If disabled, weave interlacing is used.
|
||||
// Weave deinterlacing should *not* be used, except to run test suite!
|
||||
bool upscale_deinterlacing = true;
|
||||
|
||||
struct
|
||||
{
|
||||
bool aa = true;
|
||||
bool scale = true;
|
||||
bool serrate = true;
|
||||
bool dither_filter = true;
|
||||
bool divot_filter = true;
|
||||
bool gamma_dither = true;
|
||||
} vi;
|
||||
};
|
||||
|
||||
struct VIScanoutBuffer
|
||||
{
|
||||
Vulkan::BufferHandle buffer;
|
||||
Vulkan::Fence fence;
|
||||
unsigned width = 0;
|
||||
unsigned height = 0;
|
||||
};
|
||||
|
||||
class Renderer;
|
||||
|
||||
class VideoInterface : public Vulkan::DebugChannelInterface
|
||||
{
|
||||
public:
|
||||
void set_device(Vulkan::Device *device);
|
||||
void set_renderer(Renderer *renderer);
|
||||
void set_vi_register(VIRegister reg, uint32_t value);
|
||||
|
||||
void set_rdram(const Vulkan::Buffer *rdram, size_t offset, size_t size);
|
||||
void set_hidden_rdram(const Vulkan::Buffer *hidden_rdram);
|
||||
|
||||
int resolve_shader_define(const char *name, const char *define) const;
|
||||
|
||||
Vulkan::ImageHandle scanout(VkImageLayout target_layout, const ScanoutOptions &options = {}, unsigned scale_factor = 1);
|
||||
void scanout_memory_range(unsigned &offset, unsigned &length) const;
|
||||
void set_shader_bank(const ShaderBank *bank);
|
||||
|
||||
private:
|
||||
Vulkan::Device *device = nullptr;
|
||||
Renderer *renderer = nullptr;
|
||||
uint32_t vi_registers[unsigned(VIRegister::Count)] = {};
|
||||
const Vulkan::Buffer *rdram = nullptr;
|
||||
const Vulkan::Buffer *hidden_rdram = nullptr;
|
||||
Vulkan::BufferHandle gamma_lut;
|
||||
Vulkan::BufferViewHandle gamma_lut_view;
|
||||
const ShaderBank *shader_bank = nullptr;
|
||||
|
||||
void init_gamma_table();
|
||||
bool previous_frame_blank = false;
|
||||
bool debug_channel = false;
|
||||
int filter_debug_channel_x = -1;
|
||||
int filter_debug_channel_y = -1;
|
||||
|
||||
void message(const std::string &tag, uint32_t code,
|
||||
uint32_t x, uint32_t y, uint32_t z,
|
||||
uint32_t num_words, const Vulkan::DebugChannelInterface::Word *words) override;
|
||||
|
||||
// Frame state.
|
||||
uint32_t frame_count = 0;
|
||||
uint32_t last_valid_frame_count = 0;
|
||||
Vulkan::ImageHandle prev_scanout_image;
|
||||
VkImageLayout prev_image_layout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
|
||||
size_t rdram_offset = 0;
|
||||
size_t rdram_size = 0;
|
||||
bool timestamp = false;
|
||||
|
||||
struct Registers
|
||||
{
|
||||
int x_start, y_start;
|
||||
int h_start, v_start;
|
||||
int h_end, v_end;
|
||||
int h_res, v_res;
|
||||
int x_add, y_add;
|
||||
int v_sync;
|
||||
int vi_width;
|
||||
int vi_offset;
|
||||
int max_x, max_y;
|
||||
int v_current_line;
|
||||
bool left_clamp, right_clamp;
|
||||
bool is_pal;
|
||||
uint32_t status;
|
||||
};
|
||||
Registers decode_vi_registers() const;
|
||||
Vulkan::ImageHandle vram_fetch_stage(const Registers ®isters,
|
||||
unsigned scaling_factor) const;
|
||||
Vulkan::ImageHandle aa_fetch_stage(Vulkan::CommandBuffer &cmd,
|
||||
Vulkan::Image &vram_image,
|
||||
const Registers ®isters,
|
||||
unsigned scaling_factor) const;
|
||||
Vulkan::ImageHandle divot_stage(Vulkan::CommandBuffer &cmd,
|
||||
Vulkan::Image &aa_image,
|
||||
const Registers ®isters,
|
||||
unsigned scaling_factor) const;
|
||||
Vulkan::ImageHandle scale_stage(Vulkan::CommandBuffer &cmd,
|
||||
Vulkan::Image &divot_image,
|
||||
Registers registers,
|
||||
unsigned scaling_factor,
|
||||
bool degenerate,
|
||||
const ScanoutOptions &options) const;
|
||||
Vulkan::ImageHandle downscale_stage(Vulkan::CommandBuffer &cmd,
|
||||
Vulkan::Image &scale_image,
|
||||
unsigned scaling_factor,
|
||||
unsigned downscale_factor) const;
|
||||
Vulkan::ImageHandle upscale_deinterlace(Vulkan::CommandBuffer &cmd,
|
||||
Vulkan::Image &scale_image,
|
||||
unsigned scaling_factor, bool field_select) const;
|
||||
static bool need_fetch_bug_emulation(const Registers ®, unsigned scaling_factor);
|
||||
};
|
||||
}
|
|
@ -0,0 +1,122 @@
|
|||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <queue>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <condition_variable>
|
||||
#include <utility>
|
||||
|
||||
#ifdef PARALLEL_RDP_SHADER_DIR
|
||||
#include "global_managers.hpp"
|
||||
#endif
|
||||
|
||||
namespace RDP
|
||||
{
|
||||
template <typename T, typename Executor>
|
||||
class WorkerThread
|
||||
{
|
||||
public:
|
||||
explicit WorkerThread(
|
||||
#ifdef PARALLEL_RDP_SHADER_DIR
|
||||
Granite::Global::GlobalManagersHandle globals,
|
||||
#endif
|
||||
Executor exec)
|
||||
: executor(std::move(exec))
|
||||
#ifdef PARALLEL_RDP_SHADER_DIR
|
||||
, handles(std::move(globals))
|
||||
#endif
|
||||
{
|
||||
thr = std::thread(&WorkerThread::main_loop, this);
|
||||
}
|
||||
|
||||
~WorkerThread()
|
||||
{
|
||||
if (thr.joinable())
|
||||
{
|
||||
{
|
||||
std::lock_guard<std::mutex> holder{to_thread_mutex};
|
||||
work_queue.push({});
|
||||
to_thread_cond.notify_one();
|
||||
}
|
||||
thr.join();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Cond>
|
||||
void wait(Cond &&cond)
|
||||
{
|
||||
std::unique_lock<std::mutex> holder{to_main_mutex};
|
||||
to_main_cond.wait(holder, std::forward<Cond>(cond));
|
||||
}
|
||||
|
||||
void push(T &&t)
|
||||
{
|
||||
std::lock_guard<std::mutex> holder{to_thread_mutex};
|
||||
work_queue.push(std::move(t));
|
||||
to_thread_cond.notify_one();
|
||||
}
|
||||
|
||||
private:
|
||||
std::thread thr;
|
||||
std::mutex to_thread_mutex;
|
||||
std::condition_variable to_thread_cond;
|
||||
std::mutex to_main_mutex;
|
||||
std::condition_variable to_main_cond;
|
||||
std::queue<T> work_queue;
|
||||
Executor executor;
|
||||
|
||||
#ifdef PARALLEL_RDP_SHADER_DIR
|
||||
Granite::Global::GlobalManagersHandle handles;
|
||||
#endif
|
||||
|
||||
void main_loop()
|
||||
{
|
||||
#ifdef PARALLEL_RDP_SHADER_DIR
|
||||
Granite::Global::set_thread_context(*handles);
|
||||
handles.reset();
|
||||
#endif
|
||||
|
||||
for (;;)
|
||||
{
|
||||
T value;
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> holder{to_thread_mutex};
|
||||
to_thread_cond.wait(holder, [this]() { return !work_queue.empty(); });
|
||||
value = std::move(work_queue.front());
|
||||
work_queue.pop();
|
||||
}
|
||||
|
||||
if (executor.is_sentinel(value))
|
||||
break;
|
||||
|
||||
executor.perform_work(value);
|
||||
std::lock_guard<std::mutex> holder{to_main_mutex};
|
||||
executor.notify_work_locked(value);
|
||||
to_main_cond.notify_one();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
/* Copyright (c) 2017-2020 Hans-Kristian Arntzen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "aligned_alloc.hpp"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
namespace Util
|
||||
{
|
||||
void *memalign_alloc(size_t boundary, size_t size)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
return _aligned_malloc(size, boundary);
|
||||
#elif defined(_ISOC11_SOURCE)
|
||||
return aligned_alloc(boundary, size);
|
||||
#elif (_POSIX_C_SOURCE >= 200112L) || (_XOPEN_SOURCE >= 600)
|
||||
void *ptr = nullptr;
|
||||
if (posix_memalign(&ptr, boundary, size) < 0)
|
||||
return nullptr;
|
||||
return ptr;
|
||||
#else
|
||||
// Align stuff ourselves. Kinda ugly, but will work anywhere.
|
||||
void **place;
|
||||
uintptr_t addr = 0;
|
||||
void *ptr = malloc(boundary + size + sizeof(uintptr_t));
|
||||
|
||||
if (ptr == nullptr)
|
||||
return nullptr;
|
||||
|
||||
addr = ((uintptr_t)ptr + sizeof(uintptr_t) + boundary) & ~(boundary - 1);
|
||||
place = (void **) addr;
|
||||
place[-1] = ptr;
|
||||
|
||||
return (void *) addr;
|
||||
#endif
|
||||
}
|
||||
|
||||
void *memalign_calloc(size_t boundary, size_t size)
|
||||
{
|
||||
void *ret = memalign_alloc(boundary, size);
|
||||
if (ret)
|
||||
memset(ret, 0, size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void memalign_free(void *ptr)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
_aligned_free(ptr);
|
||||
#elif !defined(_ISOC11_SOURCE) && !((_POSIX_C_SOURCE >= 200112L) || (_XOPEN_SOURCE >= 600))
|
||||
if (ptr != nullptr)
|
||||
{
|
||||
void **p = (void **) ptr;
|
||||
free(p[-1]);
|
||||
}
|
||||
#else
|
||||
free(ptr);
|
||||
#endif
|
||||
}
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
/* Copyright (c) 2017-2020 Hans-Kristian Arntzen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdexcept>
|
||||
#include <new>
|
||||
|
||||
namespace Util
|
||||
{
|
||||
void *memalign_alloc(size_t boundary, size_t size);
|
||||
void *memalign_calloc(size_t boundary, size_t size);
|
||||
void memalign_free(void *ptr);
|
||||
|
||||
template <typename T>
|
||||
struct AlignedAllocation
|
||||
{
|
||||
static void *operator new(size_t size)
|
||||
{
|
||||
void *ret = ::Util::memalign_alloc(alignof(T), size);
|
||||
if (!ret) throw std::bad_alloc();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void *operator new[](size_t size)
|
||||
{
|
||||
void *ret = ::Util::memalign_alloc(alignof(T), size);
|
||||
if (!ret) throw std::bad_alloc();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void operator delete(void *ptr)
|
||||
{
|
||||
return ::Util::memalign_free(ptr);
|
||||
}
|
||||
|
||||
static void operator delete[](void *ptr)
|
||||
{
|
||||
return ::Util::memalign_free(ptr);
|
||||
}
|
||||
};
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
/* Copyright (c) 2017-2020 Hans-Kristian Arntzen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
namespace Util
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
#define leading_zeroes(x) ((x) == 0 ? 32 : __builtin_clz(x))
|
||||
#define trailing_zeroes(x) ((x) == 0 ? 32 : __builtin_ctz(x))
|
||||
#define trailing_ones(x) __builtin_ctz(~uint32_t(x))
|
||||
#elif defined(_MSC_VER)
|
||||
namespace Internal
|
||||
{
|
||||
static inline uint32_t clz(uint32_t x)
|
||||
{
|
||||
unsigned long result;
|
||||
if (_BitScanReverse(&result, x))
|
||||
return 31 - result;
|
||||
else
|
||||
return 32;
|
||||
}
|
||||
|
||||
static inline uint32_t ctz(uint32_t x)
|
||||
{
|
||||
unsigned long result;
|
||||
if (_BitScanForward(&result, x))
|
||||
return result;
|
||||
else
|
||||
return 32;
|
||||
}
|
||||
}
|
||||
|
||||
#define leading_zeroes(x) ::Util::Internal::clz(x)
|
||||
#define trailing_zeroes(x) ::Util::Internal::ctz(x)
|
||||
#define trailing_ones(x) ::Util::Internal::ctz(~uint32_t(x))
|
||||
#else
|
||||
#error "Implement me."
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
inline void for_each_bit(uint32_t value, const T &func)
|
||||
{
|
||||
while (value)
|
||||
{
|
||||
uint32_t bit = trailing_zeroes(value);
|
||||
func(bit);
|
||||
value &= ~(1u << bit);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void for_each_bit_range(uint32_t value, const T &func)
|
||||
{
|
||||
if (value == ~0u)
|
||||
{
|
||||
func(0, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t bit_offset = 0;
|
||||
while (value)
|
||||
{
|
||||
uint32_t bit = trailing_zeroes(value);
|
||||
bit_offset += bit;
|
||||
value >>= bit;
|
||||
uint32_t range = trailing_ones(value);
|
||||
func(bit_offset, range);
|
||||
value &= ~((1u << range) - 1);
|
||||
}
|
||||
}
|
||||
|
||||
inline uint32_t next_pow2(uint32_t v)
|
||||
{
|
||||
v--;
|
||||
v |= v >> 16;
|
||||
v |= v >> 8;
|
||||
v |= v >> 4;
|
||||
v |= v >> 2;
|
||||
v |= v >> 1;
|
||||
return v + 1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/* Copyright (c) 2017-2020 Hans-Kristian Arntzen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace Util
|
||||
{
|
||||
template <typename T>
|
||||
constexpr typename std::underlying_type<T>::type ecast(T x)
|
||||
{
|
||||
return static_cast<typename std::underlying_type<T>::type>(x);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
/* Copyright (c) 2017-2020 Hans-Kristian Arntzen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
|
||||
namespace Util
|
||||
{
|
||||
using Hash = uint64_t;
|
||||
|
||||
class Hasher
|
||||
{
|
||||
public:
|
||||
explicit Hasher(Hash h_)
|
||||
: h(h_)
|
||||
{
|
||||
}
|
||||
|
||||
Hasher() = default;
|
||||
|
||||
template <typename T>
|
||||
inline void data(const T *data_, size_t size)
|
||||
{
|
||||
size /= sizeof(*data_);
|
||||
for (size_t i = 0; i < size; i++)
|
||||
h = (h * 0x100000001b3ull) ^ data_[i];
|
||||
}
|
||||
|
||||
inline void u32(uint32_t value)
|
||||
{
|
||||
h = (h * 0x100000001b3ull) ^ value;
|
||||
}
|
||||
|
||||
inline void s32(int32_t value)
|
||||
{
|
||||
u32(uint32_t(value));
|
||||
}
|
||||
|
||||
inline void f32(float value)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f32;
|
||||
uint32_t u32;
|
||||
} u;
|
||||
u.f32 = value;
|
||||
u32(u.u32);
|
||||
}
|
||||
|
||||
inline void u64(uint64_t value)
|
||||
{
|
||||
u32(value & 0xffffffffu);
|
||||
u32(value >> 32);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void pointer(T *ptr)
|
||||
{
|
||||
u64(reinterpret_cast<uintptr_t>(ptr));
|
||||
}
|
||||
|
||||
inline void string(const char *str)
|
||||
{
|
||||
char c;
|
||||
u32(0xff);
|
||||
while ((c = *str++) != '\0')
|
||||
u32(uint8_t(c));
|
||||
}
|
||||
|
||||
inline void string(const std::string &str)
|
||||
{
|
||||
u32(0xff);
|
||||
for (auto &c : str)
|
||||
u32(uint8_t(c));
|
||||
}
|
||||
|
||||
inline Hash get() const
|
||||
{
|
||||
return h;
|
||||
}
|
||||
|
||||
private:
|
||||
Hash h = 0xcbf29ce484222325ull;
|
||||
};
|
||||
}
|
|
@ -0,0 +1,296 @@
|
|||
/* Copyright (c) 2017-2020 Hans-Kristian Arntzen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <atomic>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Util
|
||||
{
|
||||
class SingleThreadCounter
|
||||
{
|
||||
public:
|
||||
inline void add_ref()
|
||||
{
|
||||
count++;
|
||||
}
|
||||
|
||||
inline bool release()
|
||||
{
|
||||
return --count == 0;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t count = 1;
|
||||
};
|
||||
|
||||
class MultiThreadCounter
|
||||
{
|
||||
public:
|
||||
MultiThreadCounter()
|
||||
{
|
||||
count.store(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
inline void add_ref()
|
||||
{
|
||||
count.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
inline bool release()
|
||||
{
|
||||
auto result = count.fetch_sub(1, std::memory_order_acq_rel);
|
||||
return result == 1;
|
||||
}
|
||||
|
||||
private:
|
||||
std::atomic_size_t count;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class IntrusivePtr;
|
||||
|
||||
template <typename T, typename Deleter = std::default_delete<T>, typename ReferenceOps = SingleThreadCounter>
|
||||
class IntrusivePtrEnabled
|
||||
{
|
||||
public:
|
||||
using IntrusivePtrType = IntrusivePtr<T>;
|
||||
using EnabledBase = T;
|
||||
using EnabledDeleter = Deleter;
|
||||
using EnabledReferenceOp = ReferenceOps;
|
||||
|
||||
void release_reference()
|
||||
{
|
||||
if (reference_count.release())
|
||||
Deleter()(static_cast<T *>(this));
|
||||
}
|
||||
|
||||
void add_reference()
|
||||
{
|
||||
reference_count.add_ref();
|
||||
}
|
||||
|
||||
IntrusivePtrEnabled() = default;
|
||||
|
||||
IntrusivePtrEnabled(const IntrusivePtrEnabled &) = delete;
|
||||
|
||||
void operator=(const IntrusivePtrEnabled &) = delete;
|
||||
|
||||
protected:
|
||||
Util::IntrusivePtr<T> reference_from_this();
|
||||
|
||||
private:
|
||||
ReferenceOps reference_count;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class IntrusivePtr
|
||||
{
|
||||
public:
|
||||
template <typename U>
|
||||
friend class IntrusivePtr;
|
||||
|
||||
IntrusivePtr() = default;
|
||||
|
||||
explicit IntrusivePtr(T *handle)
|
||||
: data(handle)
|
||||
{
|
||||
}
|
||||
|
||||
T &operator*()
|
||||
{
|
||||
return *data;
|
||||
}
|
||||
|
||||
const T &operator*() const
|
||||
{
|
||||
return *data;
|
||||
}
|
||||
|
||||
T *operator->()
|
||||
{
|
||||
return data;
|
||||
}
|
||||
|
||||
const T *operator->() const
|
||||
{
|
||||
return data;
|
||||
}
|
||||
|
||||
explicit operator bool() const
|
||||
{
|
||||
return data != nullptr;
|
||||
}
|
||||
|
||||
bool operator==(const IntrusivePtr &other) const
|
||||
{
|
||||
return data == other.data;
|
||||
}
|
||||
|
||||
bool operator!=(const IntrusivePtr &other) const
|
||||
{
|
||||
return data != other.data;
|
||||
}
|
||||
|
||||
T *get()
|
||||
{
|
||||
return data;
|
||||
}
|
||||
|
||||
const T *get() const
|
||||
{
|
||||
return data;
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
using ReferenceBase = IntrusivePtrEnabled<
|
||||
typename T::EnabledBase,
|
||||
typename T::EnabledDeleter,
|
||||
typename T::EnabledReferenceOp>;
|
||||
|
||||
// Static up-cast here to avoid potential issues with multiple intrusive inheritance.
|
||||
// Also makes sure that the pointer type actually inherits from this type.
|
||||
if (data)
|
||||
static_cast<ReferenceBase *>(data)->release_reference();
|
||||
data = nullptr;
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
IntrusivePtr &operator=(const IntrusivePtr<U> &other)
|
||||
{
|
||||
static_assert(std::is_base_of<T, U>::value,
|
||||
"Cannot safely assign downcasted intrusive pointers.");
|
||||
|
||||
using ReferenceBase = IntrusivePtrEnabled<
|
||||
typename T::EnabledBase,
|
||||
typename T::EnabledDeleter,
|
||||
typename T::EnabledReferenceOp>;
|
||||
|
||||
reset();
|
||||
data = static_cast<T *>(other.data);
|
||||
|
||||
// Static up-cast here to avoid potential issues with multiple intrusive inheritance.
|
||||
// Also makes sure that the pointer type actually inherits from this type.
|
||||
if (data)
|
||||
static_cast<ReferenceBase *>(data)->add_reference();
|
||||
return *this;
|
||||
}
|
||||
|
||||
IntrusivePtr &operator=(const IntrusivePtr &other)
|
||||
{
|
||||
using ReferenceBase = IntrusivePtrEnabled<
|
||||
typename T::EnabledBase,
|
||||
typename T::EnabledDeleter,
|
||||
typename T::EnabledReferenceOp>;
|
||||
|
||||
if (this != &other)
|
||||
{
|
||||
reset();
|
||||
data = other.data;
|
||||
if (data)
|
||||
static_cast<ReferenceBase *>(data)->add_reference();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
IntrusivePtr(const IntrusivePtr<U> &other)
|
||||
{
|
||||
*this = other;
|
||||
}
|
||||
|
||||
IntrusivePtr(const IntrusivePtr &other)
|
||||
{
|
||||
*this = other;
|
||||
}
|
||||
|
||||
~IntrusivePtr()
|
||||
{
|
||||
reset();
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
IntrusivePtr &operator=(IntrusivePtr<U> &&other) noexcept
|
||||
{
|
||||
reset();
|
||||
data = other.data;
|
||||
other.data = nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
IntrusivePtr &operator=(IntrusivePtr &&other) noexcept
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
reset();
|
||||
data = other.data;
|
||||
other.data = nullptr;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
IntrusivePtr(IntrusivePtr<U> &&other) noexcept
|
||||
{
|
||||
*this = std::move(other);
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
IntrusivePtr(IntrusivePtr &&other) noexcept
|
||||
{
|
||||
*this = std::move(other);
|
||||
}
|
||||
|
||||
private:
|
||||
T *data = nullptr;
|
||||
};
|
||||
|
||||
template <typename T, typename Deleter, typename ReferenceOps>
|
||||
IntrusivePtr<T> IntrusivePtrEnabled<T, Deleter, ReferenceOps>::reference_from_this()
|
||||
{
|
||||
add_reference();
|
||||
return IntrusivePtr<T>(static_cast<T *>(this));
|
||||
}
|
||||
|
||||
template <typename Derived>
|
||||
using DerivedIntrusivePtrType = IntrusivePtr<Derived>;
|
||||
|
||||
template <typename T, typename... P>
|
||||
DerivedIntrusivePtrType<T> make_handle(P &&... p)
|
||||
{
|
||||
return DerivedIntrusivePtrType<T>(new T(std::forward<P>(p)...));
|
||||
}
|
||||
|
||||
template <typename Base, typename Derived, typename... P>
|
||||
typename Base::IntrusivePtrType make_derived_handle(P &&... p)
|
||||
{
|
||||
return typename Base::IntrusivePtrType(new Derived(std::forward<P>(p)...));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ThreadSafeIntrusivePtrEnabled = IntrusivePtrEnabled<T, std::default_delete<T>, MultiThreadCounter>;
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue