From c9d20426d3d56402a271b0c5dd58f1d4c5bc7ac9 Mon Sep 17 00:00:00 2001 From: James Groom Date: Thu, 27 Feb 2020 23:41:07 +0000 Subject: [PATCH] Cleanup HawkFile (#1865) --- BizHawk.Client.Common/RomLoader.cs | 5 +- .../SharpCompressArchiveFile.cs | 37 ++ .../SharpCompressArchiveHandler.cs | 87 --- .../SharpCompressDearchivalMethod.cs | 49 ++ BizHawk.Client.Common/XmlGame.cs | 4 +- .../Extensions/ToolExtensions.cs | 3 +- BizHawk.Client.EmuHawk/MainForm.FileLoader.cs | 4 +- BizHawk.Client.EmuHawk/Program.cs | 2 +- .../tools/HexEditor/HexEditor.cs | 6 +- BizHawk.Common/HawkFile.cs | 540 ------------------ .../HawkFile/HawkArchiveFileItem.cs | 27 + BizHawk.Common/HawkFile/HawkFile.cs | 312 ++++++++++ .../HawkFile/HawkFilePathAttribute.cs | 15 + .../HawkFile/IFileDearchivalMethod.cs | 11 + BizHawk.Common/HawkFile/IHawkArchiveFile.cs | 14 + 15 files changed, 475 insertions(+), 641 deletions(-) create mode 100644 BizHawk.Client.Common/SharpCompressArchiveFile.cs delete mode 100644 BizHawk.Client.Common/SharpCompressArchiveHandler.cs create mode 100644 BizHawk.Client.Common/SharpCompressDearchivalMethod.cs delete mode 100644 BizHawk.Common/HawkFile.cs create mode 100644 BizHawk.Common/HawkFile/HawkArchiveFileItem.cs create mode 100644 BizHawk.Common/HawkFile/HawkFile.cs create mode 100644 BizHawk.Common/HawkFile/HawkFilePathAttribute.cs create mode 100644 BizHawk.Common/HawkFile/IFileDearchivalMethod.cs create mode 100644 BizHawk.Common/HawkFile/IHawkArchiveFile.cs diff --git a/BizHawk.Client.Common/RomLoader.cs b/BizHawk.Client.Common/RomLoader.cs index 5bdd41b8df..fd85d7fa3b 100644 --- a/BizHawk.Client.Common/RomLoader.cs +++ b/BizHawk.Client.Common/RomLoader.cs @@ -259,7 +259,7 @@ namespace BizHawk.Client.Common return false; } - using var file = new HawkFile(); + using var file = new HawkFile(); // I'm almost certain that we'll see NREs unless Open or Parse is called, so I deprecated this ctor as a nag --yoshi // only try mounting a file if a filename was given if (!string.IsNullOrEmpty(path)) { @@ -965,9 +965,8 @@ namespace BizHawk.Client.Common case "83P": var ti83Bios = ((CoreFileProvider)nextComm.CoreFileProvider).GetFirmware("TI83", "Rom", true); var ti83BiosPath = ((CoreFileProvider)nextComm.CoreFileProvider).GetFirmwarePath("TI83", "Rom", true); - using (var ti83AsHawkFile = new HawkFile()) + using (var ti83AsHawkFile = new HawkFile(ti83BiosPath)) { - ti83AsHawkFile.Open(ti83BiosPath); var ti83BiosAsRom = new RomGame(ti83AsHawkFile); var ti83 = new TI83(nextComm, ti83BiosAsRom.GameInfo, ti83Bios, GetCoreSettings()); ti83.LinkPort.SendFileToCalc(File.OpenRead(path), false); diff --git a/BizHawk.Client.Common/SharpCompressArchiveFile.cs b/BizHawk.Client.Common/SharpCompressArchiveFile.cs new file mode 100644 index 0000000000..c5fef258bd --- /dev/null +++ b/BizHawk.Client.Common/SharpCompressArchiveFile.cs @@ -0,0 +1,37 @@ +#nullable enable + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; + +using BizHawk.Common; + +using SharpCompress.Archives; + +namespace BizHawk.Client.Common +{ + /// + public class SharpCompressArchiveFile : IHawkArchiveFile + { + private IArchive? _archive; + + private IEnumerable ArchiveFiles => (_archive ?? throw new ObjectDisposedException(nameof(SharpCompressArchiveFile))).Entries.Where(e => !e.IsDirectory); + + public SharpCompressArchiveFile(string path) => _archive = ArchiveFactory.Open(path); + + public void Dispose() + { + _archive?.Dispose(); + _archive = null; + } + + public void ExtractFile(int index, Stream stream) + { + using var entryStream = ArchiveFiles.ElementAt(index).OpenEntryStream(); + entryStream.CopyTo(stream); + } + + public List Scan() => ArchiveFiles.Select((e, i) => new HawkArchiveFileItem(e.Key.Replace('\\', '/'), e.Size, i, i)).ToList(); + } +} diff --git a/BizHawk.Client.Common/SharpCompressArchiveHandler.cs b/BizHawk.Client.Common/SharpCompressArchiveHandler.cs deleted file mode 100644 index b0340554c1..0000000000 --- a/BizHawk.Client.Common/SharpCompressArchiveHandler.cs +++ /dev/null @@ -1,87 +0,0 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; - -using BizHawk.Common; - -using SharpCompress.Archives; -using SharpCompress.Common; - -namespace BizHawk.Client.Common -{ - /// - /// An ArchiveHandler implemented using SharpCompress from NuGet - /// - /// - /// Intended for Unix, which can't use SevenZipSharp, but later we might sacrifice whatever speed advantage that library has for the lower workload of one cross-platform library. - /// - /// - public class SharpCompressArchiveHandler : IHawkFileArchiveHandler - { - private IArchive _archive; - - public void Dispose() - { - _archive?.Dispose(); - _archive = null; - } - - /// - /// whitelist extensions, to avoid thrown exceptions - /// - public string[] ArchiveExtensions = { ".zip", ".gz", ".gzip", ".tar", ".rar", ".7z" }; - - public bool CheckSignature(string fileName, out int offset, out bool isExecutable) - { - offset = 0; - isExecutable = false; - - var pathExt = Path.GetExtension(fileName)?.ToLower(); - if (!ArchiveExtensions.Contains(pathExt)) - return false; - - try - { - using var arcTest = ArchiveFactory.Open(fileName); - switch (arcTest.Type) - { - case ArchiveType.Zip: - case ArchiveType.SevenZip: - return true; - } - } - catch (Exception) - { - // ignored - } - return false; - } - - public IHawkFileArchiveHandler Construct(string path) - { - var ret = new SharpCompressArchiveHandler(); - ret.Open(path); - return ret; - } - - private void Open(string path) => _archive = ArchiveFactory.Open(path); - - public List Scan() => - _archive.Entries.Where(e => !e.IsDirectory) - .Select((e, i) => new HawkFileArchiveItem - { - Name = HawkFile.Util_FixArchiveFilename(e.Key), - Size = e.Size, - Index = i, - ArchiveIndex = i - }) - .ToList(); - - public void ExtractFile(int index, Stream stream) - { - using var entryStream = _archive.Entries.Where(e => !e.IsDirectory).ElementAt(index).OpenEntryStream(); - entryStream.CopyTo(stream); - } - } -} \ No newline at end of file diff --git a/BizHawk.Client.Common/SharpCompressDearchivalMethod.cs b/BizHawk.Client.Common/SharpCompressDearchivalMethod.cs new file mode 100644 index 0000000000..83725b25df --- /dev/null +++ b/BizHawk.Client.Common/SharpCompressDearchivalMethod.cs @@ -0,0 +1,49 @@ +#nullable enable + +using System.IO; +using System.Linq; + +using BizHawk.Common; + +using SharpCompress.Archives; +using SharpCompress.Common; + +namespace BizHawk.Client.Common +{ + /// A dearchival method for implemented using SharpCompress from NuGet. + public class SharpCompressDearchivalMethod : IFileDearchivalMethod + { + private SharpCompressDearchivalMethod() {} + + public bool CheckSignature(string fileName, out int offset, out bool isExecutable) + { + offset = 0; + isExecutable = false; + + if (!ArchiveExtensions.Contains(Path.GetExtension(fileName).ToLowerInvariant())) return false; + + try + { + using var arcTest = ArchiveFactory.Open(fileName); + switch (arcTest.Type) + { + case ArchiveType.Zip: + case ArchiveType.SevenZip: + return true; + } + } + catch + { + // ignored + } + return false; + } + + public SharpCompressArchiveFile Construct(string path) => new SharpCompressArchiveFile(path); + + /// whitelist as to avoid exceptions + private static readonly string[] ArchiveExtensions = { ".zip", ".gz", ".gzip", ".tar", ".rar", ".7z" }; + + public static readonly SharpCompressDearchivalMethod Instance = new SharpCompressDearchivalMethod(); + } +} \ No newline at end of file diff --git a/BizHawk.Client.Common/XmlGame.cs b/BizHawk.Client.Common/XmlGame.cs index a9fb8b8969..235972f499 100644 --- a/BizHawk.Client.Common/XmlGame.cs +++ b/BizHawk.Client.Common/XmlGame.cs @@ -61,11 +61,11 @@ namespace BizHawk.Client.Common { if (originalIndex == null) { - originalIndex = f.GetBoundIndex(); + originalIndex = f.BoundIndex; } f.Unbind(); - f.BindArchiveMember(ai); + f.BindArchiveMember(ai.Value); data = f.GetStream().ReadAllBytes(); } else diff --git a/BizHawk.Client.EmuHawk/Extensions/ToolExtensions.cs b/BizHawk.Client.EmuHawk/Extensions/ToolExtensions.cs index c231f4821a..4a306a43f9 100644 --- a/BizHawk.Client.EmuHawk/Extensions/ToolExtensions.cs +++ b/BizHawk.Client.EmuHawk/Extensions/ToolExtensions.cs @@ -63,8 +63,7 @@ namespace BizHawk.Client.EmuHawk.ToolExtensions if (crazyStuff) { //TODO - use standard methods to split filename (hawkfile acquire?) - var hf = new HawkFile(); - hf.Parse(physicalPath); + var hf = new HawkFile(physicalPath ?? throw new Exception("this will probably never appear but I can't be bothered checking --yoshi"), delayIOAndDearchive: true); bool canExplore = File.Exists(hf.FullPathWithoutMember); if (canExplore) diff --git a/BizHawk.Client.EmuHawk/MainForm.FileLoader.cs b/BizHawk.Client.EmuHawk/MainForm.FileLoader.cs index 4685f85921..7b29bb9888 100644 --- a/BizHawk.Client.EmuHawk/MainForm.FileLoader.cs +++ b/BizHawk.Client.EmuHawk/MainForm.FileLoader.cs @@ -180,9 +180,9 @@ namespace BizHawk.Client.EmuHawk * relevant files should be extracted, but see the note below for * further details. */ - var archiveHandler = new SharpCompressArchiveHandler(); + var dearchivalMethod = SharpCompressDearchivalMethod.Instance; - if (string.IsNullOrEmpty(archive) && archiveHandler.CheckSignature(file, out _, out _)) + if (string.IsNullOrEmpty(archive) && dearchivalMethod.CheckSignature(file, out _, out _)) { sortedFiles[LoadOrdering.Rom].Add(fileInformation); } diff --git a/BizHawk.Client.EmuHawk/Program.cs b/BizHawk.Client.EmuHawk/Program.cs index 75ed9e0ddf..fecf52ba7c 100644 --- a/BizHawk.Client.EmuHawk/Program.cs +++ b/BizHawk.Client.EmuHawk/Program.cs @@ -101,7 +101,7 @@ namespace BizHawk.Client.EmuHawk TempFileManager.Start(); - HawkFile.ArchiveHandlerFactory = new SharpCompressArchiveHandler(); + HawkFile.DearchivalMethod = SharpCompressDearchivalMethod.Instance; string cmdConfigFile = ArgParser.GetCmdConfigFile(args); if (cmdConfigFile != null) PathManager.SetDefaultIniPath(cmdConfigFile); diff --git a/BizHawk.Client.EmuHawk/tools/HexEditor/HexEditor.cs b/BizHawk.Client.EmuHawk/tools/HexEditor/HexEditor.cs index bd7da76e47..1bd730a814 100644 --- a/BizHawk.Client.EmuHawk/tools/HexEditor/HexEditor.cs +++ b/BizHawk.Client.EmuHawk/tools/HexEditor/HexEditor.cs @@ -410,8 +410,7 @@ namespace BizHawk.Client.EmuHawk return false; } - using var file = new HawkFile(); - file.Open(path); + using var file = new HawkFile(path); if (!file.Exists) { @@ -429,8 +428,7 @@ namespace BizHawk.Client.EmuHawk return new byte[] { 0xFF }; } - using var file = new HawkFile(); - file.Open(path); + using var file = new HawkFile(path); if (!file.Exists) { diff --git a/BizHawk.Common/HawkFile.cs b/BizHawk.Common/HawkFile.cs deleted file mode 100644 index 296ea5d888..0000000000 --- a/BizHawk.Common/HawkFile.cs +++ /dev/null @@ -1,540 +0,0 @@ -#nullable disable - -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; - -using BizHawk.Common.StringExtensions; - -// the HawkFile class is excessively engineered with the IHawkFileArchiveHandler to decouple the archive handling from the basic file handling. -// This is so we could drop in an unmanaged dearchiver library optionally later as a performance optimization without ruining the portability of the code. -// Also, we want to be able to use HawkFiles in BizHawk.Common without bringing in a large 7-zip dependency -namespace BizHawk.Common -{ - // TODO: - // split into "bind" and "open (the bound thing)" - // scan archive to flatten interior directories down to a path (maintain our own archive item list) - - /// - /// Bridge between HawkFile and the frontend's implementation of archive management - /// - public interface IHawkFileArchiveHandler : IDisposable - { - // TODO - could this receive a hawkfile itself? possibly handy, in very clever scenarios of mounting fake files - bool CheckSignature(string fileName, out int offset, out bool isExecutable); - - List Scan(); - - IHawkFileArchiveHandler Construct(string path); - - void ExtractFile(int index, Stream stream); - } - - /// - /// HawkFile allows a variety of objects (actual files, archive members) to be treated as normal filesystem objects to be opened, closed, and read. - /// It can understand paths in 'canonical' format which includes /path/to/archive.zip|member.rom as well as /path/to/file.rom - /// When opening an archive, it won't always be clear automatically which member should actually be used. - /// Therefore there is a concept of 'binding' where a HawkFile attaches itself to an archive member which is the file that it will actually be using. - /// - public sealed class HawkFile : IDisposable - { - private bool _exists; - private bool _rootExists; - private string _rootPath; - private string _memberPath; - private Stream _rootStream, _boundStream; - private IHawkFileArchiveHandler _extractor; - private bool _isArchive; - private List _archiveItems; - private int? _boundIndex; - - public HawkFile() { } - - /// - /// Set this with an instance which can construct archive handlers as necessary for archive handling. - /// - public static IHawkFileArchiveHandler ArchiveHandlerFactory { get; set; } - - /// - /// Gets a value indicating whether a bound file exists. if there is no bound file, it can't exist. - /// NOTE: this isn't set until the file is Opened. Not too great... - /// - public bool Exists => _exists; - - /// - /// Gets the directory containing the root - /// - public string Directory => Path.GetDirectoryName(_rootPath); - - /// - /// Gets a value indicating whether this instance is bound - /// - public bool IsBound => _boundStream != null; - - /// - /// returns the complete canonical full path ("c:\path\to\archive|member") of the bound file - /// - public string CanonicalFullPath => MakeCanonicalName(_rootPath, _memberPath); - - /// - /// returns the complete canonical name ("archive|member") of the bound file - /// - public string CanonicalName => MakeCanonicalName(Path.GetFileName(_rootPath), _memberPath); - - /// - /// returns the virtual name of the bound file (disregarding the archive). - /// Useful as a basic content identifier. - /// - public string Name => GetBoundNameFromCanonical(MakeCanonicalName(_rootPath, _memberPath)); - - /// - /// returns the complete full path of the bound file, excluding the archive member portion - /// - public string FullPathWithoutMember => _rootPath; - - /// - /// returns the member path part of the bound file - /// - public string ArchiveMemberPath => _memberPath; - - /// - /// returns the extension of Name - /// - public string Extension => Path.GetExtension(Name).ToUpper(); - - /// - /// Indicates whether this file is an archive - /// - public bool IsArchive => _isArchive; - - /// - /// Indicates whether the file is an archive member (IsArchive && IsBound[to member]) - /// - public bool IsArchiveMember => IsArchive && IsBound; - - /// is - public IList ArchiveItems => IsArchive - ? _archiveItems - : throw new InvalidOperationException("Cant get archive items from non-archive"); - - /// a stream for the currently bound file - /// no stream bound (haven't called or overload) - public Stream GetStream() - { - if (_boundStream == null) - { - throw new InvalidOperationException($"{nameof(HawkFile)}: Can't call {nameof(GetStream)}() before you've successfully bound something!"); - } - - return _boundStream; - } - - public int? GetBoundIndex() - { - return _boundIndex; - } - - /// - /// Utility: Uses full HawkFile processing to determine whether a file exists at the provided path - /// - public static bool ExistsAt(string path) - { - using var file = new HawkFile(path); - return file.Exists; - } - - /// reads all the contents of the file at - /// could not find - public static byte[] ReadAllBytes(string path) - { - using var file = new HawkFile(path); - if (!file.Exists) - { - throw new FileNotFoundException(path); - } - - using Stream stream = file.GetStream(); - using var ms = new MemoryStream((int)stream.Length); - stream.CopyTo(ms); - return ms.GetBuffer(); - } - - /// - /// attempts to read all the content from the file - /// - public byte[] ReadAllBytes() - { - using Stream stream = GetStream(); - var ms = new MemoryStream((int)stream.Length); - stream.CopyTo(ms); - return ms.GetBuffer(); - } - - /// - /// these extensions won't even be tried as archives (removes spurious archive detects since some of the signatures are pretty damn weak) - /// - public string[] NonArchiveExtensions = { ".smc", ".sfc", ".dll" }; - - /// - /// Parses the given filename to create an un-opened HawkFile with some information available about its path constitution - /// - public void Parse(string path) - { - bool isArchivePath = IsCanonicalArchivePath(path); - if (isArchivePath) - { - var parts = path.Split('|'); - path = parts[0]; - _memberPath = parts[1]; - - // we're gonna assume, on parsing, that this is - _isArchive = true; - } - _rootPath = path; - } - - /// Opens the file at . This may take a while if the file is an archive, as it may be accessed and scanned. - /// already opened via , this method, or - public void Open(string path) - { - if (_rootPath != null) - { - throw new InvalidOperationException($"Don't reopen a {nameof(HawkFile)}."); - } - - string autobind = null; - bool isArchivePath = IsCanonicalArchivePath(path); - if (isArchivePath) - { - var parts = path.Split('|'); - path = parts[0]; - autobind = parts[1]; - } - - var fi = new FileInfo(path); - - _rootExists = fi.Exists; - if (fi.Exists == false) - { - return; - } - - _rootPath = path; - _exists = true; - - AnalyzeArchive(path); - if (_extractor == null) - { - _rootStream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read); - // we could autobind here, but i dont want to - // bind it later with the desired extensions. - } - - if (autobind == null) - { - // non-archive files can be automatically bound this way - if (!isArchivePath) - { - BindRoot(); - } - } - else - { - autobind = autobind.ToUpperInvariant(); - if (_extractor != null) - { - var scanResults = _extractor.Scan(); - for (int i = 0; i < scanResults.Count; i++) - { - if (scanResults[i].Name.ToUpperInvariant() == autobind) - { - BindArchiveMember(i); - return; - } - } - } - - _exists = false; - } - } - - /// - /// Makes a new HawkFile based on the provided path. - /// - public HawkFile(string path) - { - Open(path); - } - - /// - /// binds the specified ArchiveItem which you should have gotten by interrogating an archive hawkfile - /// - public HawkFile BindArchiveMember(HawkFileArchiveItem item) - { - return BindArchiveMember(item.Index); - } - - /// - /// finds an ArchiveItem with the specified name (path) within the archive; returns null if it doesnt exist - /// - public HawkFileArchiveItem FindArchiveMember(string name) - { - return ArchiveItems.FirstOrDefault(ai => ai.Name == name); - } - - /// - /// binds a path within the archive; returns null if that path didnt exist. - /// - public HawkFile BindArchiveMember(string name) - { - var ai = FindArchiveMember(name); - if (ai == null) - { - return null; - } - - return BindArchiveMember(ai); - } - - /// binds the selected archive index - /// stream already bound - public HawkFile BindArchiveMember(int index) - { - if (!_rootExists) - { - return this; - } - - if (_boundStream != null) - { - throw new InvalidOperationException("stream already bound!"); - } - - _boundStream = new MemoryStream(); - int archiveIndex = _archiveItems[index].ArchiveIndex; - _extractor.ExtractFile(archiveIndex, _boundStream); - _boundStream.Position = 0; - _memberPath = _archiveItems[index].Name; // TODO - maybe go through our own list of names? maybe not, its indexes dont match.. - Console.WriteLine($"{nameof(HawkFile)} bound {CanonicalFullPath}"); - _boundIndex = archiveIndex; - return this; - } - - /// - /// Removes any existing binding - /// - public void Unbind() - { - if (_boundStream != null && _boundStream != _rootStream) - { - _boundStream.Close(); - } - - _boundStream = null; - _memberPath = null; - _boundIndex = null; - } - - /// - /// causes the root to be bound (in the case of non-archive files) - /// - private void BindRoot() - { - _boundStream = _rootStream; - Console.WriteLine($"{nameof(HawkFile)} bound {CanonicalFullPath}"); - } - - /// - /// Binds the first item in the archive (or the file itself). Supposing that there is anything in the archive. - /// - public HawkFile BindFirst() - { - BindFirstOf(); - return this; - } - - /// - /// binds one of the supplied extensions if there is only one match in the archive - /// - public HawkFile BindSoleItemOf(params string[] extensions) - { - return BindByExtensionCore(false, extensions); - } - - /// - /// Binds the first item in the archive (or the file itself) if the extension matches one of the supplied templates. - /// You probably should not use use BindSoleItemOf or the archive chooser instead - /// - public HawkFile BindFirstOf(params string[] extensions) - { - return BindByExtensionCore(true, extensions); - } - - /// stream already bound - private HawkFile BindByExtensionCore(bool first, params string[] extensions) - { - if (!_rootExists) - { - return this; - } - - if (_boundStream != null) - { - throw new InvalidOperationException("stream already bound!"); - } - - if (_extractor == null) - { - // open uncompressed file - var extension = Path.GetExtension(_rootPath).Substring(1).ToUpperInvariant(); - if (extensions.Length == 0 || extension.In(extensions)) - { - BindRoot(); - } - - return this; - } - - var candidates = new List(); - for (int i = 0; i < _archiveItems.Count; i++) - { - var e = _archiveItems[i]; - var extension = Path.GetExtension(e.Name).ToUpperInvariant(); - extension = extension.TrimStart('.'); - if (extensions.Length == 0 || extension.In(extensions)) - { - if (first) - { - BindArchiveMember(i); - return this; - } - - candidates.Add(i); - } - } - - if (candidates.Count == 1) - { - BindArchiveMember(candidates[0]); - } - - return this; - } - - private void ScanArchive() - { - _archiveItems = _extractor.Scan(); - } - - private void AnalyzeArchive(string path) - { - // no archive handler == no analysis - if (ArchiveHandlerFactory == null) - { - return; - } - - int offset; - bool isExecutable; - - var pathExt = Path.GetExtension(path).ToLower(); - if (NonArchiveExtensions.Contains(pathExt)) - { - return; - } - - if (ArchiveHandlerFactory.CheckSignature(path, out offset, out isExecutable)) - { - _extractor = ArchiveHandlerFactory.Construct(path); - try - { - ScanArchive(); - _isArchive = true; - } - catch - { - _extractor.Dispose(); - _extractor = null; - _archiveItems = null; - } - } - } - - public void Dispose() - { - Unbind(); - - _extractor?.Dispose(); - _rootStream?.Dispose(); - - _extractor = null; - _rootStream = null; - } - - /// - /// is the supplied path a canonical name including an archive? - /// - private static bool IsCanonicalArchivePath(string path) - { - return path.IndexOf('|') != -1; - } - - /// - /// Repairs paths from an archive which contain offensive characters - /// - public static string Util_FixArchiveFilename(string fn) - { - return fn.Replace('\\', '/'); - } - - /// - /// converts a canonical name to a bound name (the bound part, whether or not it is an archive) - /// - static string GetBoundNameFromCanonical(string canonical) - { - var parts = canonical.Split('|'); - return parts[parts.Length - 1]; - } - - /// - /// makes a canonical name from two parts - /// - string MakeCanonicalName(string root, string member) - { - if (member == null) - { - return root; - } - - return $"{root}|{member}"; - } - } - - /// - /// Members returned by IHawkFileArchiveHandler - /// - public class HawkFileArchiveItem - { - /// - /// Gets or sets the member name - /// - public string Name { get; set; } - - /// - /// Gets or sets the size of member file - /// - public long Size { get; set; } - - /// - /// Gets or sets the index of this archive item - /// - public int Index { get; set; } - - /// - /// Gets or sets the index WITHIN THE ARCHIVE (for internal tracking by a IHawkFileArchiveHandler) of the member - /// - public int ArchiveIndex { get; set; } - } -} - \ No newline at end of file diff --git a/BizHawk.Common/HawkFile/HawkArchiveFileItem.cs b/BizHawk.Common/HawkFile/HawkArchiveFileItem.cs new file mode 100644 index 0000000000..58ad999ca9 --- /dev/null +++ b/BizHawk.Common/HawkFile/HawkArchiveFileItem.cs @@ -0,0 +1,27 @@ +namespace BizHawk.Common +{ + /// Used by to represent archive members. + public readonly struct HawkArchiveFileItem + { + /// the index of the member within the archive, not to be confused with + /// this is for implementations to use internally + public readonly int ArchiveIndex; + + /// the index of this archive item + public readonly int Index; + + /// the member name + public readonly string Name; + + /// the size of member file + public readonly long Size; + + public HawkArchiveFileItem(string name, long size, int index, int archiveIndex) + { + Name = name; + Size = size; + Index = index; + ArchiveIndex = archiveIndex; + } + } +} diff --git a/BizHawk.Common/HawkFile/HawkFile.cs b/BizHawk.Common/HawkFile/HawkFile.cs new file mode 100644 index 0000000000..6477a15b9e --- /dev/null +++ b/BizHawk.Common/HawkFile/HawkFile.cs @@ -0,0 +1,312 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; + +using BizHawk.Common.StringExtensions; + +namespace BizHawk.Common +{ + /// + /// This class can represent a variety of file-like objects—"regular" files on disk, archive members, SMB shares(?)—encapsulating them so any may be opened/read/closed like files on disk.
+ /// When opening an archive, it won't always be clear automatically which member should actually be used. + /// Therefore, we define the concept of "binding": the attaches itself to an archive member, which is the file that it will actually be using.
+ /// We also define a simple extension to the Unix path format using '|': /path/to/file.rom is readable, but so is /path/to/archive.zip|member.rom. + /// Strings formatted this way are annotated [HawkFilePath]. + ///
+ /// + /// This class is defensively designed around to allow swapping out implementations (for speed) without adding any dependencies to this project.
+ /// TODO split into "bind" and "open <the bound thing>"
+ /// TODO scan archive to flatten interior directories down to a path (maintain our own archive item list) + ///
+ public sealed class HawkFile : IDisposable + { + private List? _archiveItems; + + private Stream? _boundStream; + + private IHawkArchiveFile? _extractor; + + private bool _rootExists; + + private Stream? _rootStream; + + /// These file extensions are assumed to not be archives (with default value, mitigates high false positive rate caused by weak archive detection signatures). + public IReadOnlyCollection NonArchiveExtensions = CommonNonArchiveExtensions; + + /// is + public IList ArchiveItems => (IsArchive ? _archiveItems : null) ?? throw new InvalidOperationException("Can't get archive items from non-archive"); + + /// the member path part of the bound file + public string? ArchiveMemberPath { get; private set; } + + public int? BoundIndex { get; private set; } + + /// returns the complete canonical full path ("c:\path\to\archive|member") of the bound file + [HawkFilePath] + public string? CanonicalFullPath => MakeCanonicalName(SafeFullPathWithoutMember, ArchiveMemberPath); + + /// returns the complete canonical name ("archive|member") of the bound file + [HawkFilePath] + public string? CanonicalName => MakeCanonicalName(Path.GetFileName(SafeFullPathWithoutMember), ArchiveMemberPath); + + /// Gets the directory containing the root + public string? Directory => Path.GetDirectoryName(SafeFullPathWithoutMember); + + /// iff a file is bound and the bound file exists + /// NOTE: this isn't set until the file is Opened. Not too great... + public bool Exists { get; private set; } + + /// returns the extension of Name + public string? Extension => Path.GetExtension(Name).ToUpperInvariant(); + + /// returns the complete full path of the bound file, excluding the archive member portion + /// assigned in and , but if neither is called may be and cause NREs + public string? FullPathWithoutMember { get; private set; } + + public bool IsArchive { get; private set; } + + /// Indicates whether the file is an archive member (IsArchive && IsBound[to member]) + public bool IsArchiveMember => IsArchive && IsBound; + + /// Gets a value indicating whether this instance is bound + public bool IsBound => _boundStream != null; + + /// returns the virtual name of the bound file (disregarding the archive). Useful as a basic content identifier. + public string Name => ArchiveMemberPath ?? SafeFullPathWithoutMember; + + private string SafeFullPathWithoutMember => FullPathWithoutMember ?? throw new NullReferenceException($"this is related to the deprecated no-arg ctor, {nameof(FullPathWithoutMember)} is only assigned in {nameof(Open)}/{nameof(Parse)}"); + + [Obsolete] + public HawkFile() {} + + /// Makes a new HawkFile based on the provided path. + /// If is , will be called instead of . + public HawkFile([HawkFilePath] string path, bool delayIOAndDearchive = false) + { + if (delayIOAndDearchive) Parse(path); + else Open(path); + } + + /// binds the specified ArchiveItem which you should have gotten by interrogating an archive hawkfile + public HawkFile? BindArchiveMember(HawkArchiveFileItem item) => BindArchiveMember(item.Index); + + /// binds the selected archive index + /// stream already bound + public HawkFile? BindArchiveMember(int index) + { + if (!_rootExists) return this; + if (_boundStream != null) throw new InvalidOperationException("stream already bound!"); + if (_archiveItems == null || _extractor == null) throw new InvalidOperationException("not an archive"); + + var archiveIndex = _archiveItems[index].ArchiveIndex; + _boundStream = new MemoryStream(); + _extractor.ExtractFile(archiveIndex, _boundStream); + _boundStream.Position = 0; + ArchiveMemberPath = _archiveItems[index].Name; // TODO - maybe go through our own list of names? maybe not, its indices don't match... +#if DEBUG + Console.WriteLine($"{nameof(HawkFile)} bound {CanonicalFullPath}"); +#endif + BoundIndex = archiveIndex; + return this; + } + + /// binds a path within the archive; returns null if that path didnt exist. + public HawkFile? BindArchiveMember(string? name) + { + var ai = FindArchiveMember(name); + return ai == null ? null : BindArchiveMember(ai.Value); + } + + /// stream already bound + private HawkFile? BindByExtensionCore(bool first, params string[] extensions) + { + if (!_rootExists) return this; + if (_boundStream != null) throw new InvalidOperationException("stream already bound!"); + + if (_archiveItems == null || _extractor == null) + { + // open uncompressed file + if (extensions.Length == 0 + || Path.GetExtension(SafeFullPathWithoutMember).Substring(1).In(extensions)) + { + BindRoot(); + } + } + else + { + if (extensions.Length != 0) + { + var candidates = _archiveItems.Where(item => Path.GetExtension(item.Name).Substring(1).In(extensions)).ToList(); + if (candidates.Count != 0 && first || candidates.Count == 1) BindArchiveMember(candidates[0].Index); + } + else if (first || _archiveItems.Count == 1) + { + BindArchiveMember(0); + } + } + + return this; + } + + /// Binds the first item in the archive (or the file itself), assuming that there is anything in the archive. + public HawkFile? BindFirst() => BindFirstOf(); + + /// Binds the first item in the archive (or the file itself) if the extension matches one of the supplied templates. + /// You probably should use or the archive chooser instead. + public HawkFile? BindFirstOf(params string[] extensions) => BindByExtensionCore(true, extensions); + + /// causes the root to be bound (in the case of non-archive files) + private void BindRoot() + { + _boundStream = _rootStream; +#if DEBUG + Console.WriteLine($"{nameof(HawkFile)} bound {CanonicalFullPath}"); +#endif + } + + /// binds one of the supplied extensions if there is only one match in the archive + public HawkFile? BindSoleItemOf(params string[] extensions) => BindByExtensionCore(false, extensions); + + public void Dispose() + { + Unbind(); + _extractor?.Dispose(); + _extractor = null; + _rootStream?.Dispose(); + _rootStream = null; + } + + /// finds an ArchiveItem with the specified name (path) within the archive; returns null if it doesnt exist + public HawkArchiveFileItem? FindArchiveMember(string? name) => ArchiveItems.FirstOrDefault(ai => ai.Name == name); + + /// a stream for the currently bound file + /// no stream bound (haven't called or overload) + public Stream GetStream() => _boundStream ?? throw new InvalidOperationException($"{nameof(HawkFile)}: Can't call {nameof(GetStream)}() before you've successfully bound something!"); + + /// Opens the file at . This may take a while if the file is an archive, as it may be accessed and scanned. + /// already opened via , this method, or + public void Open([HawkFilePath] string path) + { + if (FullPathWithoutMember != null) throw new InvalidOperationException($"Don't reopen a {nameof(HawkFile)}."); + + string? autobind = null; + var split = SplitArchiveMemberPath(path); + if (split != null) (path, autobind) = split.Value; + _rootExists = new FileInfo(path).Exists; + if (!_rootExists) return; + FullPathWithoutMember = path; + Exists = true; + + if (DearchivalMethod != null + && !NonArchiveExtensions.Contains(Path.GetExtension(path).ToLowerInvariant()) + && DearchivalMethod.CheckSignature(path, out _, out _)) + { + _extractor = DearchivalMethod.Construct(path); + try + { + _archiveItems = _extractor.Scan(); + IsArchive = true; + } + catch + { + _archiveItems = null; + _extractor.Dispose(); + _extractor = null; + } + } + if (_extractor == null) + { + _rootStream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read); + // we could autobind here, but i dont want to + // bind it later with the desired extensions. + } + + if (autobind == null) + { + // non-archive files can be automatically bound this way + BindRoot(); + } + else + { + if (_extractor != null) + { + var scanResults = _extractor.Scan(); + for (int i = 0, l = scanResults.Count; i < l; i++) + { + if (string.Equals(scanResults[i].Name, autobind, StringComparison.InvariantCultureIgnoreCase)) + { + BindArchiveMember(i); + return; + } + } + } + + Exists = false; + } + } + + /// an unopened with only some fields populated, specifically those where the value is in + public void Parse([HawkFilePath] string path) + { + var split = SplitArchiveMemberPath(path); + if (split != null) + { + (path, ArchiveMemberPath) = split.Value; + IsArchive = true; // we'll assume that the '|' is only used for archives + } + FullPathWithoutMember = path; + } + + /// attempts to read all the content from the file + public byte[] ReadAllBytes() + { + using var stream = GetStream(); + using var ms = new MemoryStream((int) stream.Length); + stream.CopyTo(ms); + return ms.GetBuffer(); + } + + /// Removes any existing binding + public void Unbind() + { + if (_boundStream != _rootStream) _boundStream?.Close(); + _boundStream = null; + ArchiveMemberPath = null; + BoundIndex = null; + } + + /// Set this with an instance which can construct archive handlers as necessary for archive handling. + public static IFileDearchivalMethod? DearchivalMethod; + + private static readonly IReadOnlyCollection CommonNonArchiveExtensions = new[] { ".smc", ".sfc", ".dll" }; + + /// Utility: Uses full HawkFile processing to determine whether a file exists at the provided path + public static bool ExistsAt(string path) + { + using var file = new HawkFile(path); + return file.Exists; + } + + [return: HawkFilePath] + private static string MakeCanonicalName(string root, string? member) => member == null ? root : $"{root}|{member}"; + + /// reads all the contents of the file at + /// could not find + public static byte[] ReadAllBytes(string path) + { + using var file = new HawkFile(path); + return file.Exists ? file.ReadAllBytes() : throw new FileNotFoundException(path); + } + + /// path / member path pair iff contains '|', otherwise + private static (string, string)? SplitArchiveMemberPath([HawkFilePath] string path) + { + var i = path.LastIndexOf('|'); +#if DEBUG + if (path.IndexOf('|') != i) Console.WriteLine($"{nameof(HawkFile)} path contains multiple '|'"); +#endif + return i == -1 ? ((string, string)?) null : (path.Substring(0, i), path.Substring(i + 1)); + } + } +} diff --git a/BizHawk.Common/HawkFile/HawkFilePathAttribute.cs b/BizHawk.Common/HawkFile/HawkFilePathAttribute.cs new file mode 100644 index 0000000000..4c8ae11d68 --- /dev/null +++ b/BizHawk.Common/HawkFile/HawkFilePathAttribute.cs @@ -0,0 +1,15 @@ +using System; + +namespace BizHawk.Common +{ + /// Indicates that a string value is formatted as a path, with an extension to the format: paths followed by '|' and then a relative path represent a member of an archive file. + /// + /// The archive's path may be absolute or relative. If the path doesn't specify a member (it's a regular path), it obviously may also be absolute or relative.
+ /// The last '|' is the separator if multiple appear in the path, but the behaviour of such paths generally is undefined. Warnings may be printed on Debug builds.
+ /// Paths are still OS-dependent. C:\path\to\file and C:\path\to\archive|member are valid on Windows, /path/to/file and /path/to/archive|member are valid everywhere else.
+ /// This attribute is for humans.
+ /// TODO how are local (\\?\C:\file.txt) and remote (\\?\UNC\Server\Share\file.txt) UNCs treated by WinForms, and are we able to handle at least the valid ones? --yoshi + ///
+ [AttributeUsage(AttributeTargets.Property | AttributeTargets.Parameter | AttributeTargets.ReturnValue)] + public sealed class HawkFilePathAttribute : Attribute {} +} diff --git a/BizHawk.Common/HawkFile/IFileDearchivalMethod.cs b/BizHawk.Common/HawkFile/IFileDearchivalMethod.cs new file mode 100644 index 0000000000..ce2857b4b5 --- /dev/null +++ b/BizHawk.Common/HawkFile/IFileDearchivalMethod.cs @@ -0,0 +1,11 @@ +namespace BizHawk.Common +{ + /// Used by to delegate archive management. + public interface IFileDearchivalMethod where T : IHawkArchiveFile + { + /// TODO could this receive a itself? possibly handy, in very clever scenarios of mounting fake files + bool CheckSignature(string fileName, out int offset, out bool isExecutable); + + T Construct(string path); + } +} diff --git a/BizHawk.Common/HawkFile/IHawkArchiveFile.cs b/BizHawk.Common/HawkFile/IHawkArchiveFile.cs new file mode 100644 index 0000000000..c5d2080246 --- /dev/null +++ b/BizHawk.Common/HawkFile/IHawkArchiveFile.cs @@ -0,0 +1,14 @@ +using System; +using System.Collections.Generic; +using System.IO; + +namespace BizHawk.Common +{ + /// + public interface IHawkArchiveFile : IDisposable + { + void ExtractFile(int index, Stream stream); + + List Scan(); + } +}