2013-10-25 00:59:34 +00:00
using System ;
using System.Collections.Generic ;
using System.IO ;
using System.Linq ;
2014-07-03 15:05:02 +00:00
using BizHawk.Common.StringExtensions ;
2014-02-04 21:15:33 +00:00
// the HawkFile class is excessively engineered with the IHawkFileArchiveHandler to decouple the archive handling from the basic file handling.
// This is so we could drop in an unamanged dearchiver library optionally later as a performance optimization without ruining the portability of the code.
// Also, we want to be able to use HawkFiles in BizHawk.Common withuot bringing in a large 7-zip dependency
2013-10-27 22:07:40 +00:00
namespace BizHawk.Common
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
// TODO:
// split into "bind" and "open (the bound thing)"
// scan archive to flatten interior directories down to a path (maintain our own archive item list)
/// <summary>
/// Bridge between HawkFile and the frontend's implementation of archive management
/// </summary>
public interface IHawkFileArchiveHandler : IDisposable
{
// TODO - could this receive a hawkfile itself? possibly handy, in very clever scenarios of mounting fake files
bool CheckSignature ( string fileName , out int offset , out bool isExecutable ) ;
List < HawkFileArchiveItem > Scan ( ) ;
IHawkFileArchiveHandler Construct ( string path ) ;
void ExtractFile ( int index , Stream stream ) ;
}
2013-10-25 00:59:34 +00:00
2013-10-27 21:29:18 +00:00
/// <summary>
/// HawkFile allows a variety of objects (actual files, archive members) to be treated as normal filesystem objects to be opened, closed, and read.
/// It can understand paths in 'canonical' format which includes /path/to/archive.zip|member.rom as well as /path/to/file.rom
/// When opening an archive, it won't always be clear automatically which member should actually be used.
/// Therefore there is a concept of 'binding' where a HawkFile attaches itself to an archive member which is the file that it will actually be using.
/// </summary>
2013-10-27 17:07:37 +00:00
public sealed class HawkFile : IDisposable
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
private bool _exists ;
private bool _rootExists ;
private string _rootPath ;
private string _memberPath ;
private Stream _rootStream , _boundStream ;
private IHawkFileArchiveHandler _extractor ;
private List < HawkFileArchiveItem > _archiveItems ;
private int? _boundIndex ;
2013-10-27 21:29:18 +00:00
2014-02-04 21:15:33 +00:00
public HawkFile ( ) { }
2013-10-25 00:59:34 +00:00
2013-10-27 21:29:18 +00:00
/// <summary>
2014-02-04 21:15:33 +00:00
/// Set this with an instance which can construct archive handlers as necessary for archive handling.
2013-10-25 00:59:34 +00:00
/// </summary>
2014-02-04 21:15:33 +00:00
public static IHawkFileArchiveHandler ArchiveHandlerFactory { get ; set ; }
2013-10-25 00:59:34 +00:00
/// <summary>
2014-02-04 21:15:33 +00:00
/// Gets a value indicating whether a bound file exists. if there is no bound file, it can't exist
2013-10-25 00:59:34 +00:00
/// </summary>
2014-02-04 21:15:33 +00:00
public bool Exists { get { return _exists ; } }
2013-10-25 00:59:34 +00:00
/// <summary>
2014-02-04 21:15:33 +00:00
/// Gets the directory containing the root
2013-10-25 00:59:34 +00:00
/// </summary>
2014-02-04 21:15:33 +00:00
public string Directory { get { return Path . GetDirectoryName ( _rootPath ) ; } }
2013-10-25 00:59:34 +00:00
/// <summary>
2014-02-04 21:15:33 +00:00
/// Gets a value indicating whether this instance is bound
2013-10-25 00:59:34 +00:00
/// </summary>
2014-02-04 21:15:33 +00:00
public bool IsBound { get { return _boundStream ! = null ; } }
2013-10-25 00:59:34 +00:00
/// <summary>
/// returns the complete canonical full path ("c:\path\to\archive|member") of the bound file
/// </summary>
2014-02-04 21:15:33 +00:00
public string CanonicalFullPath { get { return MakeCanonicalName ( _rootPath , _memberPath ) ; } }
2013-10-25 00:59:34 +00:00
/// <summary>
/// returns the complete canonical name ("archive|member") of the bound file
/// </summary>
2014-02-04 21:15:33 +00:00
public string CanonicalName { get { return MakeCanonicalName ( Path . GetFileName ( _rootPath ) , _memberPath ) ; } }
2013-10-25 00:59:34 +00:00
/// <summary>
/// returns the virtual name of the bound file (disregarding the archive)
/// </summary>
2014-02-04 21:15:33 +00:00
public string Name { get { return GetBoundNameFromCanonical ( MakeCanonicalName ( _rootPath , _memberPath ) ) ; } }
2013-10-25 00:59:34 +00:00
/// <summary>
/// returns the extension of Name
/// </summary>
public string Extension { get { return Path . GetExtension ( Name ) . ToUpper ( ) ; } }
/// <summary>
/// Indicates whether this file is an archive
/// </summary>
2014-02-04 21:15:33 +00:00
public bool IsArchive { get { return _extractor ! = null ; } }
2013-10-25 00:59:34 +00:00
2014-02-04 21:15:33 +00:00
public IList < HawkFileArchiveItem > ArchiveItems
{
get
{
if ( ! IsArchive )
{
throw new InvalidOperationException ( "Cant get archive items from non-archive" ) ;
}
2013-10-25 00:59:34 +00:00
2014-02-04 21:15:33 +00:00
return _archiveItems ;
}
}
/// <summary>
/// returns a stream for the currently bound file
/// </summary>
public Stream GetStream ( )
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
if ( _boundStream = = null )
{
throw new InvalidOperationException ( "HawkFile: Can't call GetStream() before youve successfully bound something!" ) ;
}
return _boundStream ;
2013-10-25 00:59:34 +00:00
}
2014-02-04 21:15:33 +00:00
public int? GetBoundIndex ( )
{
return _boundIndex ;
}
2013-10-25 00:59:34 +00:00
2014-02-04 21:15:33 +00:00
/// <summary>
/// Utility: Uses full HawkFile processing to determine whether a file exists at the provided path
/// </summary>
public static bool ExistsAt ( string path )
{
using ( var file = new HawkFile ( path ) )
{
return file . Exists ;
}
}
2013-10-25 00:59:34 +00:00
2014-02-04 21:15:33 +00:00
/// <summary>
/// Utility: attempts to read all the content from the provided path.
/// </summary>
public static byte [ ] ReadAllBytes ( string path )
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
using ( var file = new HawkFile ( path ) )
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
if ( ! file . Exists )
{
throw new FileNotFoundException ( path ) ;
}
using ( Stream stream = file . GetStream ( ) )
{
var ms = new MemoryStream ( ( int ) stream . Length ) ;
stream . CopyTo ( ms ) ;
return ms . GetBuffer ( ) ;
}
2013-10-25 00:59:34 +00:00
}
}
/// <summary>
/// these extensions won't even be tried as archives (removes spurious archive detects since some of the signatures are pretty damn weak)
/// </summary>
2014-07-03 18:02:22 +00:00
public string [ ] NonArchiveExtensions = { } ;
2013-10-25 00:59:34 +00:00
public void Open ( string path )
{
2014-02-04 21:15:33 +00:00
if ( _rootPath ! = null )
{
throw new InvalidOperationException ( "Don't reopen a HawkFile." ) ;
}
2013-10-25 00:59:34 +00:00
string autobind = null ;
bool isArchivePath = IsCanonicalArchivePath ( path ) ;
if ( isArchivePath )
{
2014-02-04 21:15:33 +00:00
var parts = path . Split ( '|' ) ;
2013-10-25 00:59:34 +00:00
path = parts [ 0 ] ;
autobind = parts [ 1 ] ;
}
var fi = new FileInfo ( path ) ;
2014-02-04 21:15:33 +00:00
_rootExists = fi . Exists ;
2013-10-25 00:59:34 +00:00
if ( fi . Exists = = false )
2014-02-04 21:15:33 +00:00
{
2013-10-25 00:59:34 +00:00
return ;
2014-02-04 21:15:33 +00:00
}
2013-10-25 00:59:34 +00:00
2014-02-04 21:15:33 +00:00
_rootPath = path ;
_exists = true ;
2013-10-25 00:59:34 +00:00
AnalyzeArchive ( path ) ;
2014-02-04 21:15:33 +00:00
if ( _extractor = = null )
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
_rootStream = new FileStream ( path , FileMode . Open , FileAccess . Read , FileShare . Read ) ;
// we could autobind here, but i dont want to
// bind it later with the desired extensions.
2013-10-25 00:59:34 +00:00
}
if ( autobind = = null )
{
2014-02-04 21:15:33 +00:00
// non-archive files can be automatically bound this way
2013-10-25 00:59:34 +00:00
if ( ! isArchivePath )
2014-02-04 21:15:33 +00:00
{
2013-10-25 00:59:34 +00:00
BindRoot ( ) ;
2014-02-04 21:15:33 +00:00
}
2013-10-25 00:59:34 +00:00
}
else
{
autobind = autobind . ToUpperInvariant ( ) ;
2014-02-04 21:15:33 +00:00
if ( _extractor ! = null )
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
var scanResults = _extractor . Scan ( ) ;
2013-10-27 21:29:18 +00:00
for ( int i = 0 ; i < scanResults . Count ; i + + )
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
if ( scanResults [ i ] . Name . ToUpperInvariant ( ) = = autobind )
2013-10-27 21:29:18 +00:00
{
BindArchiveMember ( i ) ;
return ;
}
2013-10-25 00:59:34 +00:00
}
}
2014-02-04 21:15:33 +00:00
_exists = false ;
2013-10-25 00:59:34 +00:00
}
}
/// <summary>
2013-10-27 21:29:18 +00:00
/// Makes a new HawkFile based on the provided path.
2013-10-25 00:59:34 +00:00
/// </summary>
2013-10-27 21:29:18 +00:00
public HawkFile ( string path )
2013-10-25 00:59:34 +00:00
{
2013-10-27 21:29:18 +00:00
Open ( path ) ;
2013-10-25 00:59:34 +00:00
}
/// <summary>
/// binds the specified ArchiveItem which you should have gotten by interrogating an archive hawkfile
/// </summary>
2013-10-27 21:29:18 +00:00
public HawkFile BindArchiveMember ( HawkFileArchiveItem item )
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
return BindArchiveMember ( item . ArchiveIndex ) ;
2013-10-25 00:59:34 +00:00
}
/// <summary>
/// finds an ArchiveItem with the specified name (path) within the archive; returns null if it doesnt exist
/// </summary>
2013-10-27 21:29:18 +00:00
public HawkFileArchiveItem FindArchiveMember ( string name )
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
return ArchiveItems . FirstOrDefault ( ai = > ai . Name = = name ) ;
2013-10-25 00:59:34 +00:00
}
/// <summary>
/// binds a path within the archive; returns null if that path didnt exist.
/// </summary>
public HawkFile BindArchiveMember ( string name )
{
var ai = FindArchiveMember ( name ) ;
2014-02-04 21:15:33 +00:00
if ( ai = = null )
{
return null ;
}
return BindArchiveMember ( ai ) ;
2013-10-25 00:59:34 +00:00
}
/// <summary>
/// binds the selected archive index
/// </summary>
2013-10-27 21:29:18 +00:00
public HawkFile BindArchiveMember ( int index )
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
if ( ! _rootExists )
{
return this ;
}
if ( _boundStream ! = null )
{
throw new InvalidOperationException ( "stream already bound!" ) ;
}
_boundStream = new MemoryStream ( ) ;
int archiveIndex = _archiveItems [ index ] . ArchiveIndex ;
_extractor . ExtractFile ( archiveIndex , _boundStream ) ;
_boundStream . Position = 0 ;
_memberPath = _archiveItems [ index ] . Name ; // TODO - maybe go through our own list of names? maybe not, its indexes dont match..
2013-10-25 00:59:34 +00:00
Console . WriteLine ( "HawkFile bound " + CanonicalFullPath ) ;
2014-02-04 21:15:33 +00:00
_boundIndex = archiveIndex ;
2013-10-25 00:59:34 +00:00
return this ;
}
/// <summary>
/// Removes any existing binding
/// </summary>
public void Unbind ( )
{
2014-02-04 21:15:33 +00:00
if ( _boundStream ! = null & & _boundStream ! = _rootStream )
{
_boundStream . Close ( ) ;
}
_boundStream = null ;
_memberPath = null ;
_boundIndex = null ;
2013-10-25 00:59:34 +00:00
}
/// <summary>
/// causes the root to be bound (in the case of non-archive files)
/// </summary>
2014-02-04 21:15:33 +00:00
private void BindRoot ( )
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
_boundStream = _rootStream ;
2013-10-25 00:59:34 +00:00
Console . WriteLine ( "HawkFile bound " + CanonicalFullPath ) ;
}
/// <summary>
/// Binds the first item in the archive (or the file itself). Supposing that there is anything in the archive.
/// </summary>
public HawkFile BindFirst ( )
{
BindFirstOf ( ) ;
return this ;
}
/// <summary>
/// binds one of the supplied extensions if there is only one match in the archive
/// </summary>
public HawkFile BindSoleItemOf ( params string [ ] extensions )
{
return BindByExtensionCore ( false , extensions ) ;
}
/// <summary>
/// Binds the first item in the archive (or the file itself) if the extension matches one of the supplied templates.
2014-02-04 21:15:33 +00:00
/// You probably should not use use BindSoleItemOf or the archive chooser instead
2013-10-25 00:59:34 +00:00
/// </summary>
public HawkFile BindFirstOf ( params string [ ] extensions )
{
return BindByExtensionCore ( true , extensions ) ;
}
2014-02-04 21:15:33 +00:00
private HawkFile BindByExtensionCore ( bool first , params string [ ] extensions )
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
if ( ! _rootExists )
{
return this ;
}
if ( _boundStream ! = null )
{
throw new InvalidOperationException ( "stream already bound!" ) ;
}
2013-10-25 00:59:34 +00:00
2014-02-04 21:15:33 +00:00
if ( _extractor = = null )
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
// open uncompressed file
var extension = Path . GetExtension ( _rootPath ) . Substring ( 1 ) . ToUpperInvariant ( ) ;
2013-10-25 00:59:34 +00:00
if ( extensions . Length = = 0 | | extension . In ( extensions ) )
{
BindRoot ( ) ;
}
2014-02-04 21:15:33 +00:00
2013-10-25 00:59:34 +00:00
return this ;
}
var candidates = new List < int > ( ) ;
2014-02-04 21:15:33 +00:00
for ( int i = 0 ; i < _archiveItems . Count ; i + + )
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
var e = _archiveItems [ i ] ;
var extension = Path . GetExtension ( e . Name ) . ToUpperInvariant ( ) ;
2013-10-25 00:59:34 +00:00
extension = extension . TrimStart ( '.' ) ;
if ( extensions . Length = = 0 | | extension . In ( extensions ) )
{
if ( first )
{
BindArchiveMember ( i ) ;
return this ;
}
2014-02-04 21:15:33 +00:00
2013-10-25 00:59:34 +00:00
candidates . Add ( i ) ;
}
}
2014-02-04 21:15:33 +00:00
2013-10-25 00:59:34 +00:00
if ( candidates . Count = = 1 )
2014-02-04 21:15:33 +00:00
{
2013-10-25 00:59:34 +00:00
BindArchiveMember ( candidates [ 0 ] ) ;
2014-02-04 21:15:33 +00:00
}
2013-10-25 00:59:34 +00:00
return this ;
}
2014-02-04 21:15:33 +00:00
private void ScanArchive ( )
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
_archiveItems = _extractor . Scan ( ) ;
2013-10-25 00:59:34 +00:00
}
private void AnalyzeArchive ( string path )
{
2014-02-04 21:15:33 +00:00
// no archive handler == no analysis
2013-10-27 21:29:18 +00:00
if ( ArchiveHandlerFactory = = null )
2014-02-04 21:15:33 +00:00
{
2013-10-27 21:29:18 +00:00
return ;
2014-02-04 21:15:33 +00:00
}
2013-10-27 21:29:18 +00:00
2013-10-25 00:59:34 +00:00
int offset ;
bool isExecutable ;
if ( NonArchiveExtensions . Any ( ext = > Path . GetExtension ( path ) . Substring ( 1 ) . ToLower ( ) = = ext . ToLower ( ) ) )
{
return ;
}
2013-10-27 21:29:18 +00:00
if ( ArchiveHandlerFactory . CheckSignature ( path , out offset , out isExecutable ) )
2013-10-25 00:59:34 +00:00
{
2014-02-04 21:15:33 +00:00
_extractor = ArchiveHandlerFactory . Construct ( path ) ;
2013-10-25 00:59:34 +00:00
try
{
ScanArchive ( ) ;
}
catch
{
2014-02-04 21:15:33 +00:00
_extractor . Dispose ( ) ;
_extractor = null ;
_archiveItems = null ;
2013-10-25 00:59:34 +00:00
}
}
}
public void Dispose ( )
{
Unbind ( ) ;
2014-02-04 21:15:33 +00:00
if ( _extractor ! = null )
{
_extractor . Dispose ( ) ;
}
if ( _rootStream ! = null )
{
_rootStream . Dispose ( ) ;
}
2013-10-25 00:59:34 +00:00
2014-02-04 21:15:33 +00:00
_extractor = null ;
_rootStream = null ;
2013-10-25 00:59:34 +00:00
}
2013-10-27 21:29:18 +00:00
/// <summary>
/// is the supplied path a canonical name including an archive?
/// </summary>
static bool IsCanonicalArchivePath ( string path )
{
2014-02-04 21:15:33 +00:00
return path . IndexOf ( '|' ) ! = - 1 ;
2013-10-27 21:29:18 +00:00
}
/// <summary>
/// Repairs paths from an archive which contain offensive characters
/// </summary>
public static string Util_FixArchiveFilename ( string fn )
{
return fn . Replace ( '\\' , '/' ) ;
}
/// <summary>
/// converts a canonical name to a bound name (the bound part, whether or not it is an archive)
/// </summary>
static string GetBoundNameFromCanonical ( string canonical )
{
2014-02-04 21:15:33 +00:00
var parts = canonical . Split ( '|' ) ;
2013-10-27 21:29:18 +00:00
return parts [ parts . Length - 1 ] ;
}
/// <summary>
/// makes a canonical name from two parts
/// </summary>
string MakeCanonicalName ( string root , string member )
{
2014-02-04 21:15:33 +00:00
if ( member = = null )
{
return root ;
}
2013-10-27 22:07:40 +00:00
2014-02-04 21:15:33 +00:00
return string . Format ( "{0}|{1}" , root , member ) ;
}
}
2013-10-27 22:07:40 +00:00
/// <summary>
/// Members returned by IHawkFileArchiveHandler
/// </summary>
public class HawkFileArchiveItem
{
/// <summary>
2014-02-04 21:15:33 +00:00
/// Gets or sets the member name
2013-10-27 22:07:40 +00:00
/// </summary>
2014-02-04 21:15:33 +00:00
public string Name { get ; set ; }
2013-10-27 22:07:40 +00:00
/// <summary>
2014-02-04 21:15:33 +00:00
/// Gets or sets the size of member file
2013-10-27 22:07:40 +00:00
/// </summary>
2014-02-04 21:15:33 +00:00
public long Size { get ; set ; }
2013-10-27 22:07:40 +00:00
/// <summary>
2014-02-04 21:15:33 +00:00
/// Gets or sets the index of this archive item
2013-10-27 22:07:40 +00:00
/// </summary>
2014-02-04 21:15:33 +00:00
public int Index { get ; set ; }
2013-10-27 22:07:40 +00:00
/// <summary>
2014-02-04 21:15:33 +00:00
/// Gets or sets the index WITHIN THE ARCHIVE (for internal tracking by a IHawkFileArchiveHandler) of the member
2013-10-27 22:07:40 +00:00
/// </summary>
2014-02-04 21:15:33 +00:00
public int ArchiveIndex { get ; set ; }
2013-10-27 22:07:40 +00:00
}
2014-02-04 21:15:33 +00:00
}
2013-10-27 22:07:40 +00:00