Cleanup HawkFile (#1865)

This commit is contained in:
James Groom 2020-02-27 23:41:07 +00:00 committed by GitHub
parent 5f5bf639c2
commit c9d20426d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 475 additions and 641 deletions

View File

@ -259,7 +259,7 @@ namespace BizHawk.Client.Common
return false;
}
using var file = new HawkFile();
using var file = new HawkFile(); // I'm almost certain that we'll see NREs unless Open or Parse is called, so I deprecated this ctor as a nag --yoshi
// only try mounting a file if a filename was given
if (!string.IsNullOrEmpty(path))
{
@ -965,9 +965,8 @@ namespace BizHawk.Client.Common
case "83P":
var ti83Bios = ((CoreFileProvider)nextComm.CoreFileProvider).GetFirmware("TI83", "Rom", true);
var ti83BiosPath = ((CoreFileProvider)nextComm.CoreFileProvider).GetFirmwarePath("TI83", "Rom", true);
using (var ti83AsHawkFile = new HawkFile())
using (var ti83AsHawkFile = new HawkFile(ti83BiosPath))
{
ti83AsHawkFile.Open(ti83BiosPath);
var ti83BiosAsRom = new RomGame(ti83AsHawkFile);
var ti83 = new TI83(nextComm, ti83BiosAsRom.GameInfo, ti83Bios, GetCoreSettings<TI83>());
ti83.LinkPort.SendFileToCalc(File.OpenRead(path), false);

View File

@ -0,0 +1,37 @@
#nullable enable
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using BizHawk.Common;
using SharpCompress.Archives;
namespace BizHawk.Client.Common
{
/// <see cref="SharpCompressDearchivalMethod"/>
public class SharpCompressArchiveFile : IHawkArchiveFile
{
private IArchive? _archive;
private IEnumerable<IArchiveEntry> ArchiveFiles => (_archive ?? throw new ObjectDisposedException(nameof(SharpCompressArchiveFile))).Entries.Where(e => !e.IsDirectory);
public SharpCompressArchiveFile(string path) => _archive = ArchiveFactory.Open(path);
public void Dispose()
{
_archive?.Dispose();
_archive = null;
}
public void ExtractFile(int index, Stream stream)
{
using var entryStream = ArchiveFiles.ElementAt(index).OpenEntryStream();
entryStream.CopyTo(stream);
}
public List<HawkArchiveFileItem> Scan() => ArchiveFiles.Select((e, i) => new HawkArchiveFileItem(e.Key.Replace('\\', '/'), e.Size, i, i)).ToList();
}
}

View File

@ -1,87 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using BizHawk.Common;
using SharpCompress.Archives;
using SharpCompress.Common;
namespace BizHawk.Client.Common
{
/// <summary>
/// An <see cref="IHawkFileArchiveHandler">ArchiveHandler</see> implemented using SharpCompress from NuGet
/// </summary>
/// <remarks>
/// Intended for Unix, which can't use SevenZipSharp, but later we might sacrifice whatever speed advantage that library has for the lower workload of one cross-platform library.
/// </remarks>
/// <seealso cref="SevenZipSharpArchiveHandler"/>
public class SharpCompressArchiveHandler : IHawkFileArchiveHandler
{
private IArchive _archive;
public void Dispose()
{
_archive?.Dispose();
_archive = null;
}
/// <summary>
/// whitelist extensions, to avoid thrown exceptions
/// </summary>
public string[] ArchiveExtensions = { ".zip", ".gz", ".gzip", ".tar", ".rar", ".7z" };
public bool CheckSignature(string fileName, out int offset, out bool isExecutable)
{
offset = 0;
isExecutable = false;
var pathExt = Path.GetExtension(fileName)?.ToLower();
if (!ArchiveExtensions.Contains(pathExt))
return false;
try
{
using var arcTest = ArchiveFactory.Open(fileName);
switch (arcTest.Type)
{
case ArchiveType.Zip:
case ArchiveType.SevenZip:
return true;
}
}
catch (Exception)
{
// ignored
}
return false;
}
public IHawkFileArchiveHandler Construct(string path)
{
var ret = new SharpCompressArchiveHandler();
ret.Open(path);
return ret;
}
private void Open(string path) => _archive = ArchiveFactory.Open(path);
public List<HawkFileArchiveItem> Scan() =>
_archive.Entries.Where(e => !e.IsDirectory)
.Select((e, i) => new HawkFileArchiveItem
{
Name = HawkFile.Util_FixArchiveFilename(e.Key),
Size = e.Size,
Index = i,
ArchiveIndex = i
})
.ToList();
public void ExtractFile(int index, Stream stream)
{
using var entryStream = _archive.Entries.Where(e => !e.IsDirectory).ElementAt(index).OpenEntryStream();
entryStream.CopyTo(stream);
}
}
}

View File

@ -0,0 +1,49 @@
#nullable enable
using System.IO;
using System.Linq;
using BizHawk.Common;
using SharpCompress.Archives;
using SharpCompress.Common;
namespace BizHawk.Client.Common
{
/// <summary>A <see cref="IFileDearchivalMethod">dearchival method</see> for <see cref="HawkFile"/> implemented using <c>SharpCompress</c> from NuGet.</summary>
public class SharpCompressDearchivalMethod : IFileDearchivalMethod<SharpCompressArchiveFile>
{
private SharpCompressDearchivalMethod() {}
public bool CheckSignature(string fileName, out int offset, out bool isExecutable)
{
offset = 0;
isExecutable = false;
if (!ArchiveExtensions.Contains(Path.GetExtension(fileName).ToLowerInvariant())) return false;
try
{
using var arcTest = ArchiveFactory.Open(fileName);
switch (arcTest.Type)
{
case ArchiveType.Zip:
case ArchiveType.SevenZip:
return true;
}
}
catch
{
// ignored
}
return false;
}
public SharpCompressArchiveFile Construct(string path) => new SharpCompressArchiveFile(path);
/// <remarks>whitelist as to avoid exceptions</remarks>
private static readonly string[] ArchiveExtensions = { ".zip", ".gz", ".gzip", ".tar", ".rar", ".7z" };
public static readonly SharpCompressDearchivalMethod Instance = new SharpCompressDearchivalMethod();
}
}

View File

@ -61,11 +61,11 @@ namespace BizHawk.Client.Common
{
if (originalIndex == null)
{
originalIndex = f.GetBoundIndex();
originalIndex = f.BoundIndex;
}
f.Unbind();
f.BindArchiveMember(ai);
f.BindArchiveMember(ai.Value);
data = f.GetStream().ReadAllBytes();
}
else

View File

@ -63,8 +63,7 @@ namespace BizHawk.Client.EmuHawk.ToolExtensions
if (crazyStuff)
{
//TODO - use standard methods to split filename (hawkfile acquire?)
var hf = new HawkFile();
hf.Parse(physicalPath);
var hf = new HawkFile(physicalPath ?? throw new Exception("this will probably never appear but I can't be bothered checking --yoshi"), delayIOAndDearchive: true);
bool canExplore = File.Exists(hf.FullPathWithoutMember);
if (canExplore)

View File

@ -180,9 +180,9 @@ namespace BizHawk.Client.EmuHawk
* relevant files should be extracted, but see the note below for
* further details.
*/
var archiveHandler = new SharpCompressArchiveHandler();
var dearchivalMethod = SharpCompressDearchivalMethod.Instance;
if (string.IsNullOrEmpty(archive) && archiveHandler.CheckSignature(file, out _, out _))
if (string.IsNullOrEmpty(archive) && dearchivalMethod.CheckSignature(file, out _, out _))
{
sortedFiles[LoadOrdering.Rom].Add(fileInformation);
}

View File

@ -101,7 +101,7 @@ namespace BizHawk.Client.EmuHawk
TempFileManager.Start();
HawkFile.ArchiveHandlerFactory = new SharpCompressArchiveHandler();
HawkFile.DearchivalMethod = SharpCompressDearchivalMethod.Instance;
string cmdConfigFile = ArgParser.GetCmdConfigFile(args);
if (cmdConfigFile != null) PathManager.SetDefaultIniPath(cmdConfigFile);

View File

@ -410,8 +410,7 @@ namespace BizHawk.Client.EmuHawk
return false;
}
using var file = new HawkFile();
file.Open(path);
using var file = new HawkFile(path);
if (!file.Exists)
{
@ -429,8 +428,7 @@ namespace BizHawk.Client.EmuHawk
return new byte[] { 0xFF };
}
using var file = new HawkFile();
file.Open(path);
using var file = new HawkFile(path);
if (!file.Exists)
{

View File

@ -1,540 +0,0 @@
#nullable disable
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using BizHawk.Common.StringExtensions;
// the HawkFile class is excessively engineered with the IHawkFileArchiveHandler to decouple the archive handling from the basic file handling.
// This is so we could drop in an unmanaged dearchiver library optionally later as a performance optimization without ruining the portability of the code.
// Also, we want to be able to use HawkFiles in BizHawk.Common without bringing in a large 7-zip dependency
namespace BizHawk.Common
{
// TODO:
// split into "bind" and "open (the bound thing)"
// scan archive to flatten interior directories down to a path (maintain our own archive item list)
/// <summary>
/// Bridge between HawkFile and the frontend's implementation of archive management
/// </summary>
public interface IHawkFileArchiveHandler : IDisposable
{
// TODO - could this receive a hawkfile itself? possibly handy, in very clever scenarios of mounting fake files
bool CheckSignature(string fileName, out int offset, out bool isExecutable);
List<HawkFileArchiveItem> Scan();
IHawkFileArchiveHandler Construct(string path);
void ExtractFile(int index, Stream stream);
}
/// <summary>
/// HawkFile allows a variety of objects (actual files, archive members) to be treated as normal filesystem objects to be opened, closed, and read.
/// It can understand paths in 'canonical' format which includes /path/to/archive.zip|member.rom as well as /path/to/file.rom
/// When opening an archive, it won't always be clear automatically which member should actually be used.
/// Therefore there is a concept of 'binding' where a HawkFile attaches itself to an archive member which is the file that it will actually be using.
/// </summary>
public sealed class HawkFile : IDisposable
{
private bool _exists;
private bool _rootExists;
private string _rootPath;
private string _memberPath;
private Stream _rootStream, _boundStream;
private IHawkFileArchiveHandler _extractor;
private bool _isArchive;
private List<HawkFileArchiveItem> _archiveItems;
private int? _boundIndex;
public HawkFile() { }
/// <summary>
/// Set this with an instance which can construct archive handlers as necessary for archive handling.
/// </summary>
public static IHawkFileArchiveHandler ArchiveHandlerFactory { get; set; }
/// <summary>
/// Gets a value indicating whether a bound file exists. if there is no bound file, it can't exist.
/// NOTE: this isn't set until the file is Opened. Not too great...
/// </summary>
public bool Exists => _exists;
/// <summary>
/// Gets the directory containing the root
/// </summary>
public string Directory => Path.GetDirectoryName(_rootPath);
/// <summary>
/// Gets a value indicating whether this instance is bound
/// </summary>
public bool IsBound => _boundStream != null;
/// <summary>
/// returns the complete canonical full path ("c:\path\to\archive|member") of the bound file
/// </summary>
public string CanonicalFullPath => MakeCanonicalName(_rootPath, _memberPath);
/// <summary>
/// returns the complete canonical name ("archive|member") of the bound file
/// </summary>
public string CanonicalName => MakeCanonicalName(Path.GetFileName(_rootPath), _memberPath);
/// <summary>
/// returns the virtual name of the bound file (disregarding the archive).
/// Useful as a basic content identifier.
/// </summary>
public string Name => GetBoundNameFromCanonical(MakeCanonicalName(_rootPath, _memberPath));
/// <summary>
/// returns the complete full path of the bound file, excluding the archive member portion
/// </summary>
public string FullPathWithoutMember => _rootPath;
/// <summary>
/// returns the member path part of the bound file
/// </summary>
public string ArchiveMemberPath => _memberPath;
/// <summary>
/// returns the extension of Name
/// </summary>
public string Extension => Path.GetExtension(Name).ToUpper();
/// <summary>
/// Indicates whether this file is an archive
/// </summary>
public bool IsArchive => _isArchive;
/// <summary>
/// Indicates whether the file is an archive member (IsArchive && IsBound[to member])
/// </summary>
public bool IsArchiveMember => IsArchive && IsBound;
/// <exception cref="InvalidOperationException"><see cref="IsArchive"/> is <see langword="false"/></exception>
public IList<HawkFileArchiveItem> ArchiveItems => IsArchive
? _archiveItems
: throw new InvalidOperationException("Cant get archive items from non-archive");
/// <returns>a stream for the currently bound file</returns>
/// <exception cref="InvalidOperationException">no stream bound (haven't called <see cref="BindArchiveMember(int)"/> or overload)</exception>
public Stream GetStream()
{
if (_boundStream == null)
{
throw new InvalidOperationException($"{nameof(HawkFile)}: Can't call {nameof(GetStream)}() before you've successfully bound something!");
}
return _boundStream;
}
public int? GetBoundIndex()
{
return _boundIndex;
}
/// <summary>
/// Utility: Uses full HawkFile processing to determine whether a file exists at the provided path
/// </summary>
public static bool ExistsAt(string path)
{
using var file = new HawkFile(path);
return file.Exists;
}
/// <summary>reads all the contents of the file at <paramref name="path"/></summary>
/// <exception cref="FileNotFoundException">could not find <paramref name="path"/></exception>
public static byte[] ReadAllBytes(string path)
{
using var file = new HawkFile(path);
if (!file.Exists)
{
throw new FileNotFoundException(path);
}
using Stream stream = file.GetStream();
using var ms = new MemoryStream((int)stream.Length);
stream.CopyTo(ms);
return ms.GetBuffer();
}
/// <summary>
/// attempts to read all the content from the file
/// </summary>
public byte[] ReadAllBytes()
{
using Stream stream = GetStream();
var ms = new MemoryStream((int)stream.Length);
stream.CopyTo(ms);
return ms.GetBuffer();
}
/// <summary>
/// these extensions won't even be tried as archives (removes spurious archive detects since some of the signatures are pretty damn weak)
/// </summary>
public string[] NonArchiveExtensions = { ".smc", ".sfc", ".dll" };
/// <summary>
/// Parses the given filename to create an un-opened HawkFile with some information available about its path constitution
/// </summary>
public void Parse(string path)
{
bool isArchivePath = IsCanonicalArchivePath(path);
if (isArchivePath)
{
var parts = path.Split('|');
path = parts[0];
_memberPath = parts[1];
// we're gonna assume, on parsing, that this is
_isArchive = true;
}
_rootPath = path;
}
/// <summary>Opens the file at <paramref name="path"/>. This may take a while if the file is an archive, as it may be accessed and scanned.</summary>
/// <exception cref="InvalidOperationException">already opened via <see cref="HawkFile(string)"/>, this method, or <see cref="Parse"/></exception>
public void Open(string path)
{
if (_rootPath != null)
{
throw new InvalidOperationException($"Don't reopen a {nameof(HawkFile)}.");
}
string autobind = null;
bool isArchivePath = IsCanonicalArchivePath(path);
if (isArchivePath)
{
var parts = path.Split('|');
path = parts[0];
autobind = parts[1];
}
var fi = new FileInfo(path);
_rootExists = fi.Exists;
if (fi.Exists == false)
{
return;
}
_rootPath = path;
_exists = true;
AnalyzeArchive(path);
if (_extractor == null)
{
_rootStream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
// we could autobind here, but i dont want to
// bind it later with the desired extensions.
}
if (autobind == null)
{
// non-archive files can be automatically bound this way
if (!isArchivePath)
{
BindRoot();
}
}
else
{
autobind = autobind.ToUpperInvariant();
if (_extractor != null)
{
var scanResults = _extractor.Scan();
for (int i = 0; i < scanResults.Count; i++)
{
if (scanResults[i].Name.ToUpperInvariant() == autobind)
{
BindArchiveMember(i);
return;
}
}
}
_exists = false;
}
}
/// <summary>
/// Makes a new HawkFile based on the provided path.
/// </summary>
public HawkFile(string path)
{
Open(path);
}
/// <summary>
/// binds the specified ArchiveItem which you should have gotten by interrogating an archive hawkfile
/// </summary>
public HawkFile BindArchiveMember(HawkFileArchiveItem item)
{
return BindArchiveMember(item.Index);
}
/// <summary>
/// finds an ArchiveItem with the specified name (path) within the archive; returns null if it doesnt exist
/// </summary>
public HawkFileArchiveItem FindArchiveMember(string name)
{
return ArchiveItems.FirstOrDefault(ai => ai.Name == name);
}
/// <summary>
/// binds a path within the archive; returns null if that path didnt exist.
/// </summary>
public HawkFile BindArchiveMember(string name)
{
var ai = FindArchiveMember(name);
if (ai == null)
{
return null;
}
return BindArchiveMember(ai);
}
/// <summary>binds the selected archive index</summary>
/// <exception cref="InvalidOperationException">stream already bound</exception>
public HawkFile BindArchiveMember(int index)
{
if (!_rootExists)
{
return this;
}
if (_boundStream != null)
{
throw new InvalidOperationException("stream already bound!");
}
_boundStream = new MemoryStream();
int archiveIndex = _archiveItems[index].ArchiveIndex;
_extractor.ExtractFile(archiveIndex, _boundStream);
_boundStream.Position = 0;
_memberPath = _archiveItems[index].Name; // TODO - maybe go through our own list of names? maybe not, its indexes dont match..
Console.WriteLine($"{nameof(HawkFile)} bound {CanonicalFullPath}");
_boundIndex = archiveIndex;
return this;
}
/// <summary>
/// Removes any existing binding
/// </summary>
public void Unbind()
{
if (_boundStream != null && _boundStream != _rootStream)
{
_boundStream.Close();
}
_boundStream = null;
_memberPath = null;
_boundIndex = null;
}
/// <summary>
/// causes the root to be bound (in the case of non-archive files)
/// </summary>
private void BindRoot()
{
_boundStream = _rootStream;
Console.WriteLine($"{nameof(HawkFile)} bound {CanonicalFullPath}");
}
/// <summary>
/// Binds the first item in the archive (or the file itself). Supposing that there is anything in the archive.
/// </summary>
public HawkFile BindFirst()
{
BindFirstOf();
return this;
}
/// <summary>
/// binds one of the supplied extensions if there is only one match in the archive
/// </summary>
public HawkFile BindSoleItemOf(params string[] extensions)
{
return BindByExtensionCore(false, extensions);
}
/// <summary>
/// Binds the first item in the archive (or the file itself) if the extension matches one of the supplied templates.
/// You probably should not use use BindSoleItemOf or the archive chooser instead
/// </summary>
public HawkFile BindFirstOf(params string[] extensions)
{
return BindByExtensionCore(true, extensions);
}
/// <exception cref="InvalidOperationException">stream already bound</exception>
private HawkFile BindByExtensionCore(bool first, params string[] extensions)
{
if (!_rootExists)
{
return this;
}
if (_boundStream != null)
{
throw new InvalidOperationException("stream already bound!");
}
if (_extractor == null)
{
// open uncompressed file
var extension = Path.GetExtension(_rootPath).Substring(1).ToUpperInvariant();
if (extensions.Length == 0 || extension.In(extensions))
{
BindRoot();
}
return this;
}
var candidates = new List<int>();
for (int i = 0; i < _archiveItems.Count; i++)
{
var e = _archiveItems[i];
var extension = Path.GetExtension(e.Name).ToUpperInvariant();
extension = extension.TrimStart('.');
if (extensions.Length == 0 || extension.In(extensions))
{
if (first)
{
BindArchiveMember(i);
return this;
}
candidates.Add(i);
}
}
if (candidates.Count == 1)
{
BindArchiveMember(candidates[0]);
}
return this;
}
private void ScanArchive()
{
_archiveItems = _extractor.Scan();
}
private void AnalyzeArchive(string path)
{
// no archive handler == no analysis
if (ArchiveHandlerFactory == null)
{
return;
}
int offset;
bool isExecutable;
var pathExt = Path.GetExtension(path).ToLower();
if (NonArchiveExtensions.Contains(pathExt))
{
return;
}
if (ArchiveHandlerFactory.CheckSignature(path, out offset, out isExecutable))
{
_extractor = ArchiveHandlerFactory.Construct(path);
try
{
ScanArchive();
_isArchive = true;
}
catch
{
_extractor.Dispose();
_extractor = null;
_archiveItems = null;
}
}
}
public void Dispose()
{
Unbind();
_extractor?.Dispose();
_rootStream?.Dispose();
_extractor = null;
_rootStream = null;
}
/// <summary>
/// is the supplied path a canonical name including an archive?
/// </summary>
private static bool IsCanonicalArchivePath(string path)
{
return path.IndexOf('|') != -1;
}
/// <summary>
/// Repairs paths from an archive which contain offensive characters
/// </summary>
public static string Util_FixArchiveFilename(string fn)
{
return fn.Replace('\\', '/');
}
/// <summary>
/// converts a canonical name to a bound name (the bound part, whether or not it is an archive)
/// </summary>
static string GetBoundNameFromCanonical(string canonical)
{
var parts = canonical.Split('|');
return parts[parts.Length - 1];
}
/// <summary>
/// makes a canonical name from two parts
/// </summary>
string MakeCanonicalName(string root, string member)
{
if (member == null)
{
return root;
}
return $"{root}|{member}";
}
}
/// <summary>
/// Members returned by IHawkFileArchiveHandler
/// </summary>
public class HawkFileArchiveItem
{
/// <summary>
/// Gets or sets the member name
/// </summary>
public string Name { get; set; }
/// <summary>
/// Gets or sets the size of member file
/// </summary>
public long Size { get; set; }
/// <summary>
/// Gets or sets the index of this archive item
/// </summary>
public int Index { get; set; }
/// <summary>
/// Gets or sets the index WITHIN THE ARCHIVE (for internal tracking by a IHawkFileArchiveHandler) of the member
/// </summary>
public int ArchiveIndex { get; set; }
}
}

View File

@ -0,0 +1,27 @@
namespace BizHawk.Common
{
/// <summary>Used by <see cref="IHawkArchiveFile"/> to represent archive members.</summary>
public readonly struct HawkArchiveFileItem
{
/// <value>the index of the member within the archive, not to be confused with <see cref="Index"/></value>
/// <remarks>this is for <see cref="IFileDearchivalMethod"/> implementations to use internally</remarks>
public readonly int ArchiveIndex;
/// <value>the index of this archive item</value>
public readonly int Index;
/// <value>the member name</value>
public readonly string Name;
/// <value>the size of member file</value>
public readonly long Size;
public HawkArchiveFileItem(string name, long size, int index, int archiveIndex)
{
Name = name;
Size = size;
Index = index;
ArchiveIndex = archiveIndex;
}
}
}

View File

@ -0,0 +1,312 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using BizHawk.Common.StringExtensions;
namespace BizHawk.Common
{
/// <summary>
/// This class can represent a variety of file-like objects—"regular" files on disk, archive members, SMB shares(?)—encapsulating them so any may be opened/read/closed like files on disk.<br/>
/// When opening an archive, it won't always be clear automatically which member should actually be used.
/// Therefore, we define the concept of "binding": the <see cref="HawkFile"/> attaches itself to an archive member, which is the file that it will actually be using.<br/>
/// We also define a simple extension to the Unix path format using <c>'|'</c>: <c>/path/to/file.rom</c> is readable, but so is <c>/path/to/archive.zip|member.rom</c>.
/// Strings formatted this way are annotated <see cref="HawkFilePathAttribute">[HawkFilePath]</see>.
/// </summary>
/// <remarks>
/// This class is defensively designed around <see cref="IFileDearchivalMethod"/> to allow swapping out implementations (for speed) without adding any dependencies to this project.<br/>
/// TODO split into "bind" and "open &lt;the bound thing>"<br/>
/// TODO scan archive to flatten interior directories down to a path (maintain our own archive item list)
/// </remarks>
public sealed class HawkFile : IDisposable
{
private List<HawkArchiveFileItem>? _archiveItems;
private Stream? _boundStream;
private IHawkArchiveFile? _extractor;
private bool _rootExists;
private Stream? _rootStream;
/// <summary>These file extensions are assumed to not be archives (with default value, mitigates high false positive rate caused by weak archive detection signatures).</summary>
public IReadOnlyCollection<string> NonArchiveExtensions = CommonNonArchiveExtensions;
/// <exception cref="InvalidOperationException"><see cref="IsArchive"/> is <see langword="false"/></exception>
public IList<HawkArchiveFileItem> ArchiveItems => (IsArchive ? _archiveItems : null) ?? throw new InvalidOperationException("Can't get archive items from non-archive");
/// <value>the member path part of the bound file</value>
public string? ArchiveMemberPath { get; private set; }
public int? BoundIndex { get; private set; }
/// <summary>returns the complete canonical full path ("c:\path\to\archive|member") of the bound file</summary>
[HawkFilePath]
public string? CanonicalFullPath => MakeCanonicalName(SafeFullPathWithoutMember, ArchiveMemberPath);
/// <summary>returns the complete canonical name ("archive|member") of the bound file</summary>
[HawkFilePath]
public string? CanonicalName => MakeCanonicalName(Path.GetFileName(SafeFullPathWithoutMember), ArchiveMemberPath);
/// <summary>Gets the directory containing the root</summary>
public string? Directory => Path.GetDirectoryName(SafeFullPathWithoutMember);
/// <value><see cref="true"/> iff a file is bound and the bound file exists</value>
/// <remarks>NOTE: this isn't set until the file is <see cref="Open">Opened</see>. Not too great...</remarks>
public bool Exists { get; private set; }
/// <summary>returns the extension of Name</summary>
public string? Extension => Path.GetExtension(Name).ToUpperInvariant();
/// <value>returns the complete full path of the bound file, excluding the archive member portion</value>
/// <remarks>assigned in <see cref="Open"/> and <see cref="Parse"/>, but if neither is called may be <see langword="null"/> and cause NREs</remarks>
public string? FullPathWithoutMember { get; private set; }
public bool IsArchive { get; private set; }
/// <summary>Indicates whether the file is an archive member (IsArchive && IsBound[to member])</summary>
public bool IsArchiveMember => IsArchive && IsBound;
/// <summary>Gets a value indicating whether this instance is bound</summary>
public bool IsBound => _boundStream != null;
/// <summary>returns the virtual name of the bound file (disregarding the archive). Useful as a basic content identifier.</summary>
public string Name => ArchiveMemberPath ?? SafeFullPathWithoutMember;
private string SafeFullPathWithoutMember => FullPathWithoutMember ?? throw new NullReferenceException($"this is related to the deprecated no-arg ctor, {nameof(FullPathWithoutMember)} is only assigned in {nameof(Open)}/{nameof(Parse)}");
[Obsolete]
public HawkFile() {}
/// <summary>Makes a new HawkFile based on the provided path.</summary>
/// <remarks>If <paramref name="delayIOAndDearchive"/> is <see langword="true"/>, <see cref="Parse"/> will be called instead of <see cref="Open"/>.</remarks>
public HawkFile([HawkFilePath] string path, bool delayIOAndDearchive = false)
{
if (delayIOAndDearchive) Parse(path);
else Open(path);
}
/// <summary>binds the specified ArchiveItem which you should have gotten by interrogating an archive hawkfile</summary>
public HawkFile? BindArchiveMember(HawkArchiveFileItem item) => BindArchiveMember(item.Index);
/// <summary>binds the selected archive index</summary>
/// <exception cref="InvalidOperationException">stream already bound</exception>
public HawkFile? BindArchiveMember(int index)
{
if (!_rootExists) return this;
if (_boundStream != null) throw new InvalidOperationException("stream already bound!");
if (_archiveItems == null || _extractor == null) throw new InvalidOperationException("not an archive");
var archiveIndex = _archiveItems[index].ArchiveIndex;
_boundStream = new MemoryStream();
_extractor.ExtractFile(archiveIndex, _boundStream);
_boundStream.Position = 0;
ArchiveMemberPath = _archiveItems[index].Name; // TODO - maybe go through our own list of names? maybe not, its indices don't match...
#if DEBUG
Console.WriteLine($"{nameof(HawkFile)} bound {CanonicalFullPath}");
#endif
BoundIndex = archiveIndex;
return this;
}
/// <summary>binds a path within the archive; returns null if that path didnt exist.</summary>
public HawkFile? BindArchiveMember(string? name)
{
var ai = FindArchiveMember(name);
return ai == null ? null : BindArchiveMember(ai.Value);
}
/// <exception cref="InvalidOperationException">stream already bound</exception>
private HawkFile? BindByExtensionCore(bool first, params string[] extensions)
{
if (!_rootExists) return this;
if (_boundStream != null) throw new InvalidOperationException("stream already bound!");
if (_archiveItems == null || _extractor == null)
{
// open uncompressed file
if (extensions.Length == 0
|| Path.GetExtension(SafeFullPathWithoutMember).Substring(1).In(extensions))
{
BindRoot();
}
}
else
{
if (extensions.Length != 0)
{
var candidates = _archiveItems.Where(item => Path.GetExtension(item.Name).Substring(1).In(extensions)).ToList();
if (candidates.Count != 0 && first || candidates.Count == 1) BindArchiveMember(candidates[0].Index);
}
else if (first || _archiveItems.Count == 1)
{
BindArchiveMember(0);
}
}
return this;
}
/// <summary>Binds the first item in the archive (or the file itself), assuming that there is anything in the archive.</summary>
public HawkFile? BindFirst() => BindFirstOf();
/// <summary>Binds the first item in the archive (or the file itself) if the extension matches one of the supplied templates.</summary>
/// <remarks>You probably should use <see cref="BindSoleItemOf"/> or the archive chooser instead.</remarks>
public HawkFile? BindFirstOf(params string[] extensions) => BindByExtensionCore(true, extensions);
/// <summary>causes the root to be bound (in the case of non-archive files)</summary>
private void BindRoot()
{
_boundStream = _rootStream;
#if DEBUG
Console.WriteLine($"{nameof(HawkFile)} bound {CanonicalFullPath}");
#endif
}
/// <summary>binds one of the supplied extensions if there is only one match in the archive</summary>
public HawkFile? BindSoleItemOf(params string[] extensions) => BindByExtensionCore(false, extensions);
public void Dispose()
{
Unbind();
_extractor?.Dispose();
_extractor = null;
_rootStream?.Dispose();
_rootStream = null;
}
/// <summary>finds an ArchiveItem with the specified name (path) within the archive; returns null if it doesnt exist</summary>
public HawkArchiveFileItem? FindArchiveMember(string? name) => ArchiveItems.FirstOrDefault(ai => ai.Name == name);
/// <returns>a stream for the currently bound file</returns>
/// <exception cref="InvalidOperationException">no stream bound (haven't called <see cref="BindArchiveMember(int)"/> or overload)</exception>
public Stream GetStream() => _boundStream ?? throw new InvalidOperationException($"{nameof(HawkFile)}: Can't call {nameof(GetStream)}() before you've successfully bound something!");
/// <summary>Opens the file at <paramref name="path"/>. This may take a while if the file is an archive, as it may be accessed and scanned.</summary>
/// <exception cref="InvalidOperationException">already opened via <see cref="HawkFile(string)"/>, this method, or <see cref="Parse"/></exception>
public void Open([HawkFilePath] string path)
{
if (FullPathWithoutMember != null) throw new InvalidOperationException($"Don't reopen a {nameof(HawkFile)}.");
string? autobind = null;
var split = SplitArchiveMemberPath(path);
if (split != null) (path, autobind) = split.Value;
_rootExists = new FileInfo(path).Exists;
if (!_rootExists) return;
FullPathWithoutMember = path;
Exists = true;
if (DearchivalMethod != null
&& !NonArchiveExtensions.Contains(Path.GetExtension(path).ToLowerInvariant())
&& DearchivalMethod.CheckSignature(path, out _, out _))
{
_extractor = DearchivalMethod.Construct(path);
try
{
_archiveItems = _extractor.Scan();
IsArchive = true;
}
catch
{
_archiveItems = null;
_extractor.Dispose();
_extractor = null;
}
}
if (_extractor == null)
{
_rootStream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
// we could autobind here, but i dont want to
// bind it later with the desired extensions.
}
if (autobind == null)
{
// non-archive files can be automatically bound this way
BindRoot();
}
else
{
if (_extractor != null)
{
var scanResults = _extractor.Scan();
for (int i = 0, l = scanResults.Count; i < l; i++)
{
if (string.Equals(scanResults[i].Name, autobind, StringComparison.InvariantCultureIgnoreCase))
{
BindArchiveMember(i);
return;
}
}
}
Exists = false;
}
}
/// <returns>an unopened <see cref="HawkFile"/> with only some fields populated, specifically those where the value is in <paramref name="path"/></returns>
public void Parse([HawkFilePath] string path)
{
var split = SplitArchiveMemberPath(path);
if (split != null)
{
(path, ArchiveMemberPath) = split.Value;
IsArchive = true; // we'll assume that the '|' is only used for archives
}
FullPathWithoutMember = path;
}
/// <summary>attempts to read all the content from the file</summary>
public byte[] ReadAllBytes()
{
using var stream = GetStream();
using var ms = new MemoryStream((int) stream.Length);
stream.CopyTo(ms);
return ms.GetBuffer();
}
/// <summary>Removes any existing binding</summary>
public void Unbind()
{
if (_boundStream != _rootStream) _boundStream?.Close();
_boundStream = null;
ArchiveMemberPath = null;
BoundIndex = null;
}
/// <summary>Set this with an instance which can construct archive handlers as necessary for archive handling.</summary>
public static IFileDearchivalMethod<IHawkArchiveFile>? DearchivalMethod;
private static readonly IReadOnlyCollection<string> CommonNonArchiveExtensions = new[] { ".smc", ".sfc", ".dll" };
/// <summary>Utility: Uses full HawkFile processing to determine whether a file exists at the provided path</summary>
public static bool ExistsAt(string path)
{
using var file = new HawkFile(path);
return file.Exists;
}
[return: HawkFilePath]
private static string MakeCanonicalName(string root, string? member) => member == null ? root : $"{root}|{member}";
/// <summary>reads all the contents of the file at <paramref name="path"/></summary>
/// <exception cref="FileNotFoundException">could not find <paramref name="path"/></exception>
public static byte[] ReadAllBytes(string path)
{
using var file = new HawkFile(path);
return file.Exists ? file.ReadAllBytes() : throw new FileNotFoundException(path);
}
/// <returns>path / member path pair iff <paramref name="path"/> contains <c>'|'</c>, <see langword="null"/> otherwise</returns>
private static (string, string)? SplitArchiveMemberPath([HawkFilePath] string path)
{
var i = path.LastIndexOf('|');
#if DEBUG
if (path.IndexOf('|') != i) Console.WriteLine($"{nameof(HawkFile)} path contains multiple '|'");
#endif
return i == -1 ? ((string, string)?) null : (path.Substring(0, i), path.Substring(i + 1));
}
}
}

View File

@ -0,0 +1,15 @@
using System;
namespace BizHawk.Common
{
/// <summary>Indicates that a string value is formatted as a path, with an extension to the format: paths followed by <c>'|'</c> and then a relative path represent a member of an archive file.</summary>
/// <remarks>
/// The archive's path may be absolute or relative. If the path doesn't specify a member (it's a regular path), it obviously may also be absolute or relative.<br/>
/// The last '|' is the separator if multiple appear in the path, but the behaviour of such paths generally is undefined. Warnings may be printed on Debug builds.<br/>
/// Paths are still OS-dependent. <c>C:\path\to\file</c> and <c>C:\path\to\archive|member</c> are valid on Windows, <c>/path/to/file</c> and <c>/path/to/archive|member</c> are valid everywhere else.<br/>
/// This attribute is for humans.<br/>
/// TODO how are local (<c>\\?\C:\file.txt</c>) and remote (<c>\\?\UNC\Server\Share\file.txt</c>) UNCs treated by WinForms, and are we able to handle at least the valid ones? --yoshi
/// </remarks>
[AttributeUsage(AttributeTargets.Property | AttributeTargets.Parameter | AttributeTargets.ReturnValue)]
public sealed class HawkFilePathAttribute : Attribute {}
}

View File

@ -0,0 +1,11 @@
namespace BizHawk.Common
{
/// <summary>Used by <see cref="HawkFile"/> to delegate archive management.</summary>
public interface IFileDearchivalMethod<out T> where T : IHawkArchiveFile
{
/// <remarks>TODO could this receive a <see cref="HawkFile"/> itself? possibly handy, in very clever scenarios of mounting fake files</remarks>
bool CheckSignature(string fileName, out int offset, out bool isExecutable);
T Construct(string path);
}
}

View File

@ -0,0 +1,14 @@
using System;
using System.Collections.Generic;
using System.IO;
namespace BizHawk.Common
{
/// <seealso cref="IFileDearchivalMethod"/>
public interface IHawkArchiveFile : IDisposable
{
void ExtractFile(int index, Stream stream);
List<HawkArchiveFileItem> Scan();
}
}