using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using BizHawk.Common.StringExtensions;
namespace BizHawk.Common
{
///
/// This class can represent a variety of file-like objects—"regular" files on disk, archive members, SMB shares(?)—encapsulating them so any may be opened/read/closed like files on disk.
/// When opening an archive, it won't always be clear automatically which member should actually be used.
/// Therefore, we define the concept of "binding": the attaches itself to an archive member, which is the file that it will actually be using.
/// We also define a simple extension to the Unix path format using '|': /path/to/file.rom is readable, but so is /path/to/archive.zip|member.rom.
/// Strings formatted this way are annotated [HawkFilePath].
///
///
/// This class is defensively designed around to allow swapping out implementations (for speed) without adding any dependencies to this project.
/// TODO split into "bind" and "open <the bound thing>"
/// TODO scan archive to flatten interior directories down to a path (maintain our own archive item list)
///
public sealed class HawkFile : IDisposable
{
private List? _archiveItems;
private Stream? _boundStream;
private IHawkArchiveFile? _extractor;
private bool _rootExists;
private Stream? _rootStream;
/// These file extensions are assumed to not be archives (with default value, mitigates high false positive rate caused by weak archive detection signatures).
public IReadOnlyCollection NonArchiveExtensions = CommonNonArchiveExtensions;
/// is
public IList ArchiveItems => (IsArchive ? _archiveItems : null) ?? throw new InvalidOperationException("Can't get archive items from non-archive");
/// the member path part of the bound file
public string? ArchiveMemberPath { get; private set; }
public int? BoundIndex { get; private set; }
/// returns the complete canonical full path ("c:\path\to\archive|member") of the bound file
[HawkFilePath]
public string? CanonicalFullPath => MakeCanonicalName(SafeFullPathWithoutMember, ArchiveMemberPath);
/// returns the complete canonical name ("archive|member") of the bound file
[HawkFilePath]
public string? CanonicalName => MakeCanonicalName(Path.GetFileName(SafeFullPathWithoutMember), ArchiveMemberPath);
/// Gets the directory containing the root
public string? Directory => Path.GetDirectoryName(SafeFullPathWithoutMember);
/// iff a file is bound and the bound file exists
/// NOTE: this isn't set until the file is Opened. Not too great...
public bool Exists { get; private set; }
/// returns the extension of Name
public string? Extension => Path.GetExtension(Name).ToUpperInvariant();
/// returns the complete full path of the bound file, excluding the archive member portion
/// assigned in and , but if neither is called may be and cause NREs
public string? FullPathWithoutMember { get; private set; }
public bool IsArchive { get; private set; }
/// Indicates whether the file is an archive member (IsArchive && IsBound[to member])
public bool IsArchiveMember => IsArchive && IsBound;
/// Gets a value indicating whether this instance is bound
public bool IsBound => _boundStream != null;
/// returns the virtual name of the bound file (disregarding the archive). Useful as a basic content identifier.
public string Name => ArchiveMemberPath ?? SafeFullPathWithoutMember;
private string SafeFullPathWithoutMember => FullPathWithoutMember ?? throw new NullReferenceException($"this is related to the deprecated no-arg ctor, {nameof(FullPathWithoutMember)} is only assigned in {nameof(Open)}/{nameof(Parse)}");
[Obsolete]
public HawkFile() {}
/// Makes a new HawkFile based on the provided path.
/// If is , will be called instead of .
public HawkFile([HawkFilePath] string path, bool delayIOAndDearchive = false)
{
if (delayIOAndDearchive) Parse(path);
else Open(path);
}
/// binds the specified ArchiveItem which you should have gotten by interrogating an archive hawkfile
public HawkFile? BindArchiveMember(HawkArchiveFileItem item) => BindArchiveMember(item.Index);
/// binds the selected archive index
/// stream already bound
public HawkFile? BindArchiveMember(int index)
{
if (!_rootExists) return this;
if (_boundStream != null) throw new InvalidOperationException("stream already bound!");
if (_archiveItems == null || _extractor == null) throw new InvalidOperationException("not an archive");
var archiveIndex = _archiveItems[index].ArchiveIndex;
_boundStream = new MemoryStream();
_extractor.ExtractFile(archiveIndex, _boundStream);
_boundStream.Position = 0;
ArchiveMemberPath = _archiveItems[index].Name; // TODO - maybe go through our own list of names? maybe not, its indices don't match...
#if DEBUG
Console.WriteLine($"{nameof(HawkFile)} bound {CanonicalFullPath}");
#endif
BoundIndex = archiveIndex;
return this;
}
/// binds a path within the archive; returns null if that path didnt exist.
public HawkFile? BindArchiveMember(string? name)
{
var ai = FindArchiveMember(name);
return ai == null ? null : BindArchiveMember(ai.Value);
}
/// stream already bound
private HawkFile? BindByExtensionCore(bool first, params string[] extensions)
{
if (!_rootExists) return this;
if (_boundStream != null) throw new InvalidOperationException("stream already bound!");
if (_archiveItems == null || _extractor == null)
{
// open uncompressed file
if (extensions.Length == 0
|| Path.GetExtension(SafeFullPathWithoutMember).Substring(1).In(extensions))
{
BindRoot();
}
}
else
{
if (extensions.Length != 0)
{
var candidates = _archiveItems.Where(item => Path.GetExtension(item.Name).Substring(1).In(extensions)).ToList();
if (candidates.Count != 0 && first || candidates.Count == 1) BindArchiveMember(candidates[0].Index);
}
else if (first || _archiveItems.Count == 1)
{
BindArchiveMember(0);
}
}
return this;
}
/// Binds the first item in the archive (or the file itself), assuming that there is anything in the archive.
public HawkFile? BindFirst() => BindFirstOf();
/// Binds the first item in the archive (or the file itself) if the extension matches one of the supplied templates.
/// You probably should use or the archive chooser instead.
public HawkFile? BindFirstOf(params string[] extensions) => BindByExtensionCore(true, extensions);
/// causes the root to be bound (in the case of non-archive files)
private void BindRoot()
{
_boundStream = _rootStream;
#if DEBUG
Console.WriteLine($"{nameof(HawkFile)} bound {CanonicalFullPath}");
#endif
}
/// binds one of the supplied extensions if there is only one match in the archive
public HawkFile? BindSoleItemOf(params string[] extensions) => BindByExtensionCore(false, extensions);
public void Dispose()
{
Unbind();
_extractor?.Dispose();
_extractor = null;
_rootStream?.Dispose();
_rootStream = null;
}
/// finds an ArchiveItem with the specified name (path) within the archive; returns null if it doesnt exist
public HawkArchiveFileItem? FindArchiveMember(string? name) => ArchiveItems.FirstOrDefault(ai => ai.Name == name);
/// a stream for the currently bound file
/// no stream bound (haven't called or overload)
public Stream GetStream() => _boundStream ?? throw new InvalidOperationException($"{nameof(HawkFile)}: Can't call {nameof(GetStream)}() before you've successfully bound something!");
/// Opens the file at . This may take a while if the file is an archive, as it may be accessed and scanned.
/// already opened via , this method, or
public void Open([HawkFilePath] string path)
{
if (FullPathWithoutMember != null) throw new InvalidOperationException($"Don't reopen a {nameof(HawkFile)}.");
string? autobind = null;
var split = SplitArchiveMemberPath(path);
if (split != null) (path, autobind) = split.Value;
_rootExists = new FileInfo(path).Exists;
if (!_rootExists) return;
FullPathWithoutMember = path;
Exists = true;
if (DearchivalMethod != null
&& !NonArchiveExtensions.Contains(Path.GetExtension(path).ToLowerInvariant())
&& DearchivalMethod.CheckSignature(path, out _, out _))
{
_extractor = DearchivalMethod.Construct(path);
try
{
_archiveItems = _extractor.Scan();
IsArchive = true;
}
catch
{
_archiveItems = null;
_extractor.Dispose();
_extractor = null;
}
}
if (_extractor == null)
{
_rootStream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
// we could autobind here, but i don't want to
// bind it later with the desired extensions.
}
if (autobind == null)
{
// non-archive files can be automatically bound this way
BindRoot();
}
else
{
if (_extractor != null)
{
var scanResults = _extractor.Scan();
for (int i = 0, l = scanResults.Count; i < l; i++)
{
if (string.Equals(scanResults[i].Name, autobind, StringComparison.InvariantCultureIgnoreCase))
{
BindArchiveMember(i);
return;
}
}
}
Exists = false;
}
}
/// an unopened with only some fields populated, specifically those where the value is in
public void Parse([HawkFilePath] string path)
{
var split = SplitArchiveMemberPath(path);
if (split != null)
{
(path, ArchiveMemberPath) = split.Value;
IsArchive = true; // we'll assume that the '|' is only used for archives
}
FullPathWithoutMember = path;
}
/// attempts to read all the content from the file
public byte[] ReadAllBytes()
{
using var stream = GetStream();
using var ms = new MemoryStream((int) stream.Length);
stream.CopyTo(ms);
return ms.GetBuffer();
}
/// Removes any existing binding
public void Unbind()
{
if (_boundStream != _rootStream) _boundStream?.Close();
_boundStream = null;
ArchiveMemberPath = null;
BoundIndex = null;
}
/// Set this with an instance which can construct archive handlers as necessary for archive handling.
public static IFileDearchivalMethod? DearchivalMethod;
private static readonly IReadOnlyCollection CommonNonArchiveExtensions = new[] { ".smc", ".sfc", ".dll" };
/// Utility: Uses full HawkFile processing to determine whether a file exists at the provided path
public static bool ExistsAt(string path)
{
using var file = new HawkFile(path);
return file.Exists;
}
[return: HawkFilePath]
private static string MakeCanonicalName(string root, string? member) => member == null ? root : $"{root}|{member}";
/// reads all the contents of the file at
/// could not find
public static byte[] ReadAllBytes(string path)
{
using var file = new HawkFile(path);
return file.Exists ? file.ReadAllBytes() : throw new FileNotFoundException(path);
}
/// path / member path pair iff contains '|', otherwise
private static (string, string)? SplitArchiveMemberPath([HawkFilePath] string path)
{
var i = path.LastIndexOf('|');
#if DEBUG
if (path.IndexOf('|') != i) Console.WriteLine($"{nameof(HawkFile)} path contains multiple '|'");
#endif
return i == -1 ? ((string, string)?) null : (path.Substring(0, i), path.Substring(i + 1));
}
}
}