rough draft of file ID system. not ready to be used just yet, but I think it can go the distance. I'll have to hook it up later.

This commit is contained in:
zeromus 2014-12-03 06:14:59 +00:00
parent 78209348dd
commit 26c35ca7b8
4 changed files with 560 additions and 2 deletions

View File

@ -126,7 +126,7 @@ namespace BizHawk.Client.Common
// N64 roms are in one of the following formats:
// .Z64 = No swapping
// .N64 = Word Swapped
// .V64 = Bytse Swapped
// .V64 = Byte Swapped
// File extension does not always match the format
int size = source.Length;

View File

@ -68,6 +68,12 @@ namespace BizHawk.Common
this[key].Add(val);
}
public void Add(K key, L val)
{
foreach (var v in val)
this[key].Add(v);
}
public bool ContainsKey(K key) { return dictionary.ContainsKey(key); }
IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); }

View File

@ -223,7 +223,7 @@
<DependentUpon>Atari7800.cs</DependentUpon>
</Compile>
<Compile Include="Consoles\Atari\7800\Atari7800.IInputPollable.cs">
<DependentUpon>Atari7800.cs</DependentUpon>
<DependentUpon>Atari7800.cs</DependentUpon>
</Compile>
<Compile Include="Consoles\Atari\7800\Atari7800.IMemoryDomains.cs">
<DependentUpon>Atari7800.cs</DependentUpon>
@ -548,6 +548,7 @@
<Compile Include="CPUs\Z80\Registers.cs" />
<Compile Include="CPUs\Z80\Tables.cs" />
<Compile Include="CPUs\Z80\Z80A.cs" />
<Compile Include="FileID.cs" />
<Compile Include="LibRetro.cs" />
<Compile Include="LibRetroEmulator.cs" />
<Compile Include="Properties\svnrev.cs" />

View File

@ -0,0 +1,551 @@
using System;
using System.IO;
using System.Collections.Generic;
using BizHawk.Common;
//HOW TO USE
//we dont expect anyone to use this fully yet. It's just over-engineered for future use.
//for now, just use it when you truly dont know what to do with a file.
//This system depends heavily on the provided extension. We're not going to exhaustively try every format all the time. If someone loads a cue which is named .sfc, we cant cope with that.
//However, common mistakes will be handled, on an as-needed basis.
//TODO - check for archives too? further, check archive contents (probably just based on filename)?
//TODO - parameter to enable checks vs firmware, game databases
//TODO (in client) - costly hashes could happen only once the file type is known (and a hash for that filetype could be used)
namespace BizHawk.Emulation.Cores
{
/// <summary>
/// Each of these should ideally represent a single file type.
/// However for now they just may resemble a console, and a core would know how to parse some set of those after making its own determination.
/// If formats are very similar but with small differences, and that determination can be made, then it will be in the ExtraInfo in the FileIDResult
/// </summary>
public enum FileIDType
{
None,
Multiple, //dont think this makes sense. shouldnt the multiple options be returned?
Disc, //an unknown disc
PSX, PSP,
Saturn, MegaCD,
PCE, SGX, TurboCD,
INES, FDS, UNIF,
SFC, N64,
GB, GBC, GBA, NDS,
COL,
SG, SMS, GG, S32X,
SMD, //http://en.wikibooks.org/wiki/Genesis_Programming#ROM_header
WS, WSC, NGC,
C64,
INT,
A26, A52, A78, LNX,
PSF,
}
public class FileIDResult
{
public FileIDResult()
{
}
public FileIDResult(FileIDType type, int confidence)
{
FileIDType = type;
Confidence = confidence;
}
public FileIDResult(FileIDType type)
{
FileIDType = type;
}
/// <summary>
/// a percentage between 0 and 100 assessing the confidence of this result
/// </summary>
public int Confidence;
/// <summary>
///
/// </summary>
public FileIDType FileIDType;
/// <summary>
/// extra information which could be easily gotten during the file ID (region, suspected homebrew, CRC invalid, etc.)
/// </summary>
public Dictionary<string, object> ExtraInfo = new Dictionary<string, object>();
}
public class FileIDResults : List<FileIDResult>
{
public FileIDResults() { }
public FileIDResults(FileIDResult item)
{
base.Add(item);
}
public new void Sort()
{
base.Sort((x, y) => x.Confidence.CompareTo(y.Confidence));
}
/// <summary>
/// indicates whether the client should try again after mounting the disc image for further inspection
/// </summary>
public bool ShouldTryDisc;
}
public class FileID
{
/// <summary>
/// parameters for an Identify job
/// </summary>
public class IdentifyParams
{
/// <summary>
/// The extension of the original file (with or without the .)
/// </summary>
public string Extension;
/// <summary>
/// a seekable stream which can be used
/// </summary>
public Stream SeekableStream;
/// <summary>
/// the file in question mounted as a disc
/// </summary>
public DiscSystem.Disc Disc;
}
class IdentifyJob
{
public Stream Stream;
public string Extension;
public DiscSystem.Disc Disc;
}
/// <summary>
/// performs wise heuristics to identify a file.
/// this will attempt to return early if a confident result can be produced.
/// </summary>
public FileIDResults Identify(IdentifyParams p)
{
IdentifyJob job = new IdentifyJob() {
Stream = p.SeekableStream,
Disc = p.Disc
};
//if we have a disc, that's a separate codepath
if (job.Disc != null)
return IdentifyDisc(job);
FileIDResults ret = new FileIDResults();
string ext = p.Extension;
if(ext != null)
{
ext = ext.TrimStart('.').ToUpper();
job.Extension = ext;
}
if (job.Extension == "CUE")
{
ret.ShouldTryDisc = true;
return ret;
}
if(job.Extension != null)
{
//first test everything associated with this extension
ExtensionInfo handler = null;
if (ExtensionHandlers.TryGetValue(ext, out handler))
{
foreach (var del in handler.Testers)
{
var fidr = del(job);
if (fidr.FileIDType == FileIDType.None)
continue;
ret.Add(fidr);
}
ret.Sort();
//add a low confidence result just based on extension, if it doesnt exist
if(ret.Find( (x) => x.FileIDType == handler.DefaultForExtension) == null)
{
var fidr = new FileIDResult(handler.DefaultForExtension, 5);
ret.Add(fidr);
}
}
}
ret.Sort();
//if we didnt find anything high confidence, try all the testers (TODO)
return ret;
}
/// <summary>
/// performs wise heuristics to identify a file (simple version)
/// </summary>
public FileIDType IdentifySimple(IdentifyParams p)
{
var ret = Identify(p);
if (ret.ShouldTryDisc)
return FileIDType.Disc;
if (ret.Count == 0)
return FileIDType.None;
else if(ret.Count == 1)
return ret[0].FileIDType;
else if (ret[0].Confidence == ret[1].Confidence)
return FileIDType.Multiple;
else return ret[0].FileIDType;
}
FileIDResults IdentifyDisc(IdentifyJob job)
{
//DiscSystem could use some newer approaches from this file (instead of parsing ISO filesystem... maybe?)
switch (job.Disc.DetectDiscType())
{
case DiscSystem.DiscType.SegaSaturn:
return new FileIDResults(new FileIDResult(FileIDType.Saturn, 100));
case DiscSystem.DiscType.SonyPSP:
return new FileIDResults(new FileIDResult(FileIDType.PSP, 100));
case DiscSystem.DiscType.SonyPSX:
return new FileIDResults(new FileIDResult(FileIDType.PSX, 100));
case DiscSystem.DiscType.MegaCD:
return new FileIDResults(new FileIDResult(FileIDType.MegaCD, 100));
case DiscSystem.DiscType.TurboCD:
return new FileIDResults(new FileIDResult(FileIDType.TurboCD, 5));
case DiscSystem.DiscType.UnknownCDFS:
case DiscSystem.DiscType.UnknownFormat:
default:
return new FileIDResults(new FileIDResult());
}
}
class SimpleMagicRecord
{
public int Offset;
public string Key;
public int Length = -1;
}
//some of these (NES, UNIF for instance) should be lower confidence probably...
//if you change some of the Length arguments for longer keys, please make notes about why
static class SimpleMagics
{
public static SimpleMagicRecord INES = new SimpleMagicRecord { Offset = 0, Key = "NES" };
public static SimpleMagicRecord UNIF = new SimpleMagicRecord { Offset = 0, Key = "UNIF" };
public static SimpleMagicRecord FDS = new SimpleMagicRecord { Offset = 0, Key = "\x01*NINTENDO-HVC*" };
//the GBA nintendo logo.. we'll only use 16 bytes of it but theyre all here, for reference
//we cant expect these roms to be normally sized, but we may be able to find other features of the header to use for extra checks
public static SimpleMagicRecord GBA = new SimpleMagicRecord { Offset = 4, Length = 16, Key = "\x24\xFF\xAE\x51\x69\x9A\xA2\x21\x3D\x84\x82\x0A\x84\xE4\x09\xAD\x11\x24\x8B\x98\xC0\x81\x7F\x21\xA3\x52\xBE\x19\x93\x09\xCE\x20\x10\x46\x4A\x4A\xF8\x27\x31\xEC\x58\xC7\xE8\x33\x82\xE3\xCE\xBF\x85\xF4\xDF\x94\xCE\x4B\x09\xC1\x94\x56\x8A\xC0\x13\x72\xA7\xFC\x9F\x84\x4D\x73\xA3\xCA\x9A\x61\x58\x97\xA3\x27\xFC\x03\x98\x76\x23\x1D\xC7\x61\x03\x04\xAE\x56\xBF\x38\x84\x00\x40\xA7\x0E\xFD\xFF\x52\xFE\x03\x6F\x95\x30\xF1\x97\xFB\xC0\x85\x60\xD6\x80\x25\xA9\x63\xBE\x03\x01\x4E\x38\xE2\xF9\xA2\x34\xFF\xBB\x3E\x03\x44\x78\x00\x90\xCB\x88\x11\x3A\x94\x65\xC0\x7C\x63\x87\xF0\x3C\xAF\xD6\x25\xE4\x8B\x38\x0A\xAC\x72\x21\xD4\xF8\x07" };
public static SimpleMagicRecord NDS = new SimpleMagicRecord { Offset = 0xC0, Length = 16, Key = "\x24\xFF\xAE\x51\x69\x9A\xA2\x21\x3D\x84\x82\x0A\x84\xE4\x09\xAD\x11\x24\x8B\x98\xC0\x81\x7F\x21\xA3\x52\xBE\x19\x93\x09\xCE\x20\x10\x46\x4A\x4A\xF8\x27\x31\xEC\x58\xC7\xE8\x33\x82\xE3\xCE\xBF\x85\xF4\xDF\x94\xCE\x4B\x09\xC1\x94\x56\x8A\xC0\x13\x72\xA7\xFC\x9F\x84\x4D\x73\xA3\xCA\x9A\x61\x58\x97\xA3\x27\xFC\x03\x98\x76\x23\x1D\xC7\x61\x03\x04\xAE\x56\xBF\x38\x84\x00\x40\xA7\x0E\xFD\xFF\x52\xFE\x03\x6F\x95\x30\xF1\x97\xFB\xC0\x85\x60\xD6\x80\x25\xA9\x63\xBE\x03\x01\x4E\x38\xE2\xF9\xA2\x34\xFF\xBB\x3E\x03\x44\x78\x00\x90\xCB\x88\x11\x3A\x94\x65\xC0\x7C\x63\x87\xF0\x3C\xAF\xD6\x25\xE4\x8B\x38\x0A\xAC\x72\x21\xD4\xF8\x07" };
public static SimpleMagicRecord GB = new SimpleMagicRecord { Offset=0x104, Length = 16, Key = "\xCE\xED\x66\x66\xCC\x0D\x00\x0B\x03\x73\x00\x83\x00\x0C\x00\x0D\x00\x08\x11\x1F\x88\x89\x00\x0E\xDC\xCC\x6E\xE6\xDD\xDD\xD9\x99\xBB\xBB\x67\x63\x6E\x0E\xEC\xCC\xDD\xDC\x99\x9F\xBB\xB9\x33\x3E" };
public static SimpleMagicRecord S32X = new SimpleMagicRecord { Offset = 0x100, Key = "SEGA 32X" };
public static SimpleMagicRecord SEGAGENESIS = new SimpleMagicRecord { Offset = 0x100, Key = "SEGA GENESIS" };
public static SimpleMagicRecord SEGAMEGADRIVE = new SimpleMagicRecord { Offset = 0x100, Key = "SEGA MEGA DRIVE" };
public static SimpleMagicRecord SEGASATURN = new SimpleMagicRecord { Offset = 0, Key = "SEGA SEGASATURN" };
public static SimpleMagicRecord SEGADISCSYSTEM = new SimpleMagicRecord { Offset = 0, Key = "SEGADISCSYSTEM" };
public static SimpleMagicRecord PSX = new SimpleMagicRecord { Offset = 0x24E0, Key = " Licensed by Sony Computer Entertainment" };
public static SimpleMagicRecord PSP = new SimpleMagicRecord { Offset = 0x8000, Key = "\x01CD001\x01\0x00PSP GAME" };
//https://sites.google.com/site/atari7800wiki/a78-header
public static SimpleMagicRecord A78 = new SimpleMagicRecord { Offset = 0, Key = "\x01ATARI7800" };
//could be at various offsets?
public static SimpleMagicRecord TMR_SEGA = new SimpleMagicRecord { Offset = 0x7FF0, Key = "TMR SEGA" };
}
class ExtensionInfo
{
public ExtensionInfo(FileIDType defaultForExtension, FormatTester tester)
{
Testers = new List<FormatTester>(1);
if(tester != null)
Testers.Add(tester);
DefaultForExtension = defaultForExtension;
}
public FileIDType DefaultForExtension;
public List<FormatTester> Testers;
}
/// <summary>
/// testers to try for each extension, along with a default for the extension
/// </summary>
static Dictionary<string, ExtensionInfo> ExtensionHandlers = new Dictionary<string, ExtensionInfo> {
{ "NES", new ExtensionInfo(FileIDType.INES, Test_INES ) },
{ "FDS", new ExtensionInfo(FileIDType.FDS, (j)=>Test_Simple(j,FileIDType.FDS,SimpleMagics.FDS) ) },
{ "GBA", new ExtensionInfo(FileIDType.GBA, (j)=>Test_Simple(j,FileIDType.GBA,SimpleMagics.GBA) ) },
{ "NDS", new ExtensionInfo(FileIDType.NDS, (j)=>Test_Simple(j,FileIDType.NDS,SimpleMagics.NDS) ) },
{ "UNF", new ExtensionInfo(FileIDType.UNIF, Test_UNIF ) },
{ "UNIF", new ExtensionInfo(FileIDType.UNIF, Test_UNIF ) },
{ "GB", new ExtensionInfo(FileIDType.GB, Test_GB_GBC ) },
{ "GBC", new ExtensionInfo(FileIDType.GBC, Test_GB_GBC ) },
{ "N64", new ExtensionInfo(FileIDType.N64, Test_N64 ) },
{ "Z64", new ExtensionInfo(FileIDType.N64, Test_N64 ) },
{ "V64", new ExtensionInfo(FileIDType.N64, Test_N64 ) },
{ "A78", new ExtensionInfo(FileIDType.A78, Test_A78 ) },
{ "SMS", new ExtensionInfo(FileIDType.SMS, Test_SMS ) },
{ "BIN", new ExtensionInfo(FileIDType.Multiple, Test_BIN_ISO ) },
{ "ISO", new ExtensionInfo(FileIDType.Multiple, Test_BIN_ISO ) },
{ "JAD", new ExtensionInfo(FileIDType.Multiple, Test_JAD_JAC ) },
{ "JAC", new ExtensionInfo(FileIDType.Multiple, Test_JAD_JAC ) },
//royal mess
{ "MD", new ExtensionInfo(FileIDType.SMD, null ) },
{ "SMD", new ExtensionInfo(FileIDType.SMD, null ) },
{ "GEN", new ExtensionInfo(FileIDType.SMD, null ) },
//nothing yet...
{ "PSF", new ExtensionInfo(FileIDType.PSF, null) },
{ "INT", new ExtensionInfo(FileIDType.INT, null) },
{ "SFC", new ExtensionInfo(FileIDType.SFC, null) },
{ "SMC", new ExtensionInfo(FileIDType.SFC, null) },
{ "LNX", new ExtensionInfo(FileIDType.LNX, null ) },
{ "SG", new ExtensionInfo(FileIDType.SG, null ) },
{ "SGX", new ExtensionInfo(FileIDType.SGX, null ) },
{ "COL", new ExtensionInfo(FileIDType.COL, null ) },
{ "A52", new ExtensionInfo(FileIDType.A52, null ) },
{ "A26", new ExtensionInfo(FileIDType.A26, null ) },
{ "PCE", new ExtensionInfo(FileIDType.PCE, null ) },
{ "GG", new ExtensionInfo(FileIDType.GG, null ) },
{ "WS", new ExtensionInfo(FileIDType.WS, null ) },
{ "WSC", new ExtensionInfo(FileIDType.WSC, null ) },
{ "NGC", new ExtensionInfo(FileIDType.NGC, null ) },
{ "32X", new ExtensionInfo(FileIDType.S32X, (j)=>Test_Simple(j,FileIDType.S32X,SimpleMagics.S32X) ) },
//various C64 formats.. can we distinguish between these?
{ "PRG", new ExtensionInfo(FileIDType.C64, null ) },
{ "D64", new ExtensionInfo(FileIDType.C64, null ) },
{ "T64", new ExtensionInfo(FileIDType.C64, null ) },
{ "G64", new ExtensionInfo(FileIDType.C64, null ) },
{ "CRT", new ExtensionInfo(FileIDType.C64, null ) },
{ "NIB", new ExtensionInfo(FileIDType.C64, null ) }, //not supported yet
//for now
{ "ROM", new ExtensionInfo(FileIDType.Multiple, null ) }, //could be MSX too
};
delegate FileIDResult FormatTester(IdentifyJob job);
/// <summary>
/// checks for the magic string (bytewise ASCII check) at the given address
/// </summary>
static bool CheckMagic(Stream stream, SimpleMagicRecord rec, params int[] offsets)
{
if (offsets.Length == 0)
return CheckMagicOne(stream, rec, 0);
else foreach (int n in offsets)
if (CheckMagicOne(stream, rec, n))
return true;
return false;
}
static bool CheckMagicOne(Stream stream, SimpleMagicRecord rec, int offset)
{
stream.Position = rec.Offset + offset;
string key = rec.Key;
int len = rec.Length;
if (len == -1)
len = key.Length;
for (int i = 0; i < len; i++)
{
int n = stream.ReadByte();
if (n == -1) return false;
if (n != key[i])
return false;
}
return true;
}
static int ReadByte(Stream stream, int ofs)
{
stream.Position = ofs;
return stream.ReadByte();
}
static FileIDResult Test_INES(IdentifyJob job)
{
if (!CheckMagic(job.Stream, SimpleMagics.INES))
return new FileIDResult();
var ret = new FileIDResult(FileIDType.INES, 100);
//an INES file should be a multiple of 8k, with the 16 byte header.
//if it isnt.. this is fishy.
if (((job.Stream.Length - 16) & (8 * 1024 - 1)) != 0)
ret.Confidence = 50;
return ret;
}
static FileIDResult Test_Simple(IdentifyJob job, FileIDType type, SimpleMagicRecord magic)
{
var ret = new FileIDResult(type);
if (CheckMagic(job.Stream, magic))
return new FileIDResult(type, 100);
else
return new FileIDResult();
}
static FileIDResult Test_UNIF(IdentifyJob job)
{
if (!CheckMagic(job.Stream, SimpleMagics.UNIF))
return new FileIDResult();
//TODO - simple parser (for starters, check for a known chunk being next, see http://wiki.nesdev.com/w/index.php/UNIF)
var ret = new FileIDResult(FileIDType.UNIF, 100);
return ret;
}
static FileIDResult Test_GB_GBC(IdentifyJob job)
{
if (!CheckMagic(job.Stream, SimpleMagics.GB))
return new FileIDResult();
var ret = new FileIDResult(FileIDType.GB, 100);
int type = ReadByte(job.Stream, 0x143);
if ((type & 0x80) != 0)
ret.FileIDType = FileIDType.GBC;
//could check cart type and rom size for extra info if necessary
return ret;
}
static FileIDResult Test_SMS(IdentifyJob job)
{
//http://www.smspower.org/Development/ROMHeader
//actually, not sure how to handle this yet
return new FileIDResult();
}
static FileIDResult Test_N64(IdentifyJob job)
{
// .Z64 = No swapping
// .N64 = Word Swapped
// .V64 = Byte Swapped
//not sure how to check for these yet...
var ret = new FileIDResult(FileIDType.N64, 5);
if (job.Extension == "V64") ret.ExtraInfo["byteswap"] = true;
if (job.Extension == "N64") ret.ExtraInfo["wordswap"] = true;
return ret;
}
static FileIDResult Test_A78(IdentifyJob job)
{
int len = (int)job.Stream.Length;
//we may have a header to analyze
if (len % 1024 == 128)
{
if (CheckMagic(job.Stream, SimpleMagics.A78))
new FileIDResult(FileIDType.A78, 100);
}
else if (len % 1024 == 0)
{
}
else { }
return new FileIDResult(0);
}
static FileIDResult Test_BIN_ISO(IdentifyJob job)
{
//ok, this is complicated.
//there are lots of mislabeled bins/isos so lets just treat them the same (mostly)
//if the BIN cant be recognized, but it is small, it is more likely some other rom BIN than a disc (turbocd or other)
if (job.Extension == "BIN")
{
//first we can check for SMD magic words.
//since this extension is ambiguous, we can't be completely sure about it. but it's almost surely accurate
if (CheckMagic(job.Stream, SimpleMagics.SEGAGENESIS))
{
var ret = new FileIDResult(FileIDType.SMD, 95);
ret.ExtraInfo["type"] = "genesis";
return ret;
}
if (CheckMagic(job.Stream, SimpleMagics.SEGAMEGADRIVE))
{
var ret = new FileIDResult(FileIDType.SMD, 95);
ret.ExtraInfo["type"] = "megadrive";
}
}
//well... guess it's a disc.
//since it's just a bin, we dont need the user to provide a DiscSystem disc.
//lets just analyze this as best we can.
//for PSX, we have a magic word to look for.
//it's at 0x24E0 with a mode2 (2352 byte) track 1.
//what if its 2048 byte?
//i found a ".iso" which was actually 2352 byte sectors..
//found a hilarious ".bin.iso" which was actually 2352 byte sectors
//so, I think it's possible that every valid PSX disc is mode2 in the track 1
if (CheckMagic(job.Stream, SimpleMagics.PSX))
{
var ret = new FileIDResult(FileIDType.PSX, 95);
//this is an unreliable way to get a PSX game!
ret.ExtraInfo["unreliable"] = true;
return ret;
}
//it's not proven that this is reliable. this is actually part of the mode1 CDFS header. perhaps it's mobile?
//if it's mobile, we'll need to mount it as an ISO file here via discsystem
if (CheckMagic(job.Stream, SimpleMagics.PSP))
return new FileIDResult(FileIDType.PSP, 95);
//if this was an ISO, we might discover the magic word at offset 0...
//if it was a mode2/2352 bin, we might discover it at offset 16 (after the sync)
if(CheckMagic(job.Stream, SimpleMagics.SEGASATURN,0))
return new FileIDResult(FileIDType.Saturn, job.Extension == "ISO" ? 95 : 90);
if (CheckMagic(job.Stream, SimpleMagics.SEGASATURN, 16))
return new FileIDResult(FileIDType.Saturn, job.Extension == "BIN" ? 95 : 90);
if (CheckMagic(job.Stream, SimpleMagics.SEGADISCSYSTEM, 0))
return new FileIDResult(FileIDType.MegaCD, job.Extension == "ISO" ? 95 : 90);
if (CheckMagic(job.Stream, SimpleMagics.SEGADISCSYSTEM, 16))
return new FileIDResult(FileIDType.MegaCD, job.Extension == "BIN" ? 95 : 90);
if (job.Extension == "ISO")
return new FileIDResult(FileIDType.Disc, 1);
else
return new FileIDResult(FileIDType.Multiple, 1);
}
static FileIDResult Test_JAD_JAC(IdentifyJob job)
{
//TBD
//just mount it as a disc and send it through the disc checker?
return null;
}
}
}