diff --git a/BizHawk.Client.Common/RomGame.cs b/BizHawk.Client.Common/RomGame.cs index 4bf3b6ec15..8c526cb2fd 100644 --- a/BizHawk.Client.Common/RomGame.cs +++ b/BizHawk.Client.Common/RomGame.cs @@ -126,7 +126,7 @@ namespace BizHawk.Client.Common // N64 roms are in one of the following formats: // .Z64 = No swapping // .N64 = Word Swapped - // .V64 = Bytse Swapped + // .V64 = Byte Swapped // File extension does not always match the format int size = source.Length; diff --git a/BizHawk.Common/CustomCollections.cs b/BizHawk.Common/CustomCollections.cs index 537b685720..2aeee81b32 100644 --- a/BizHawk.Common/CustomCollections.cs +++ b/BizHawk.Common/CustomCollections.cs @@ -68,6 +68,12 @@ namespace BizHawk.Common this[key].Add(val); } + public void Add(K key, L val) + { + foreach (var v in val) + this[key].Add(v); + } + public bool ContainsKey(K key) { return dictionary.ContainsKey(key); } IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); } diff --git a/BizHawk.Emulation.Cores/BizHawk.Emulation.Cores.csproj b/BizHawk.Emulation.Cores/BizHawk.Emulation.Cores.csproj index b88a55cb5e..216854d14b 100644 --- a/BizHawk.Emulation.Cores/BizHawk.Emulation.Cores.csproj +++ b/BizHawk.Emulation.Cores/BizHawk.Emulation.Cores.csproj @@ -223,7 +223,7 @@ Atari7800.cs - Atari7800.cs + Atari7800.cs Atari7800.cs @@ -548,6 +548,7 @@ + diff --git a/BizHawk.Emulation.Cores/FileID.cs b/BizHawk.Emulation.Cores/FileID.cs new file mode 100644 index 0000000000..06db1731a4 --- /dev/null +++ b/BizHawk.Emulation.Cores/FileID.cs @@ -0,0 +1,551 @@ +using System; +using System.IO; +using System.Collections.Generic; + +using BizHawk.Common; + +//HOW TO USE +//we dont expect anyone to use this fully yet. It's just over-engineered for future use. +//for now, just use it when you truly dont know what to do with a file. +//This system depends heavily on the provided extension. We're not going to exhaustively try every format all the time. If someone loads a cue which is named .sfc, we cant cope with that. +//However, common mistakes will be handled, on an as-needed basis. + +//TODO - check for archives too? further, check archive contents (probably just based on filename)? +//TODO - parameter to enable checks vs firmware, game databases +//TODO (in client) - costly hashes could happen only once the file type is known (and a hash for that filetype could be used) + +namespace BizHawk.Emulation.Cores +{ + /// + /// Each of these should ideally represent a single file type. + /// However for now they just may resemble a console, and a core would know how to parse some set of those after making its own determination. + /// If formats are very similar but with small differences, and that determination can be made, then it will be in the ExtraInfo in the FileIDResult + /// + public enum FileIDType + { + None, + Multiple, //dont think this makes sense. shouldnt the multiple options be returned? + + Disc, //an unknown disc + PSX, PSP, + Saturn, MegaCD, + + PCE, SGX, TurboCD, + INES, FDS, UNIF, + SFC, N64, + GB, GBC, GBA, NDS, + COL, + SG, SMS, GG, S32X, + SMD, //http://en.wikibooks.org/wiki/Genesis_Programming#ROM_header + + WS, WSC, NGC, + + C64, + INT, + A26, A52, A78, LNX, + + PSF, + } + + public class FileIDResult + { + public FileIDResult() + { + } + + public FileIDResult(FileIDType type, int confidence) + { + FileIDType = type; + Confidence = confidence; + } + + public FileIDResult(FileIDType type) + { + FileIDType = type; + } + + /// + /// a percentage between 0 and 100 assessing the confidence of this result + /// + public int Confidence; + + /// + /// + /// + public FileIDType FileIDType; + + /// + /// extra information which could be easily gotten during the file ID (region, suspected homebrew, CRC invalid, etc.) + /// + public Dictionary ExtraInfo = new Dictionary(); + } + + public class FileIDResults : List + { + public FileIDResults() { } + public FileIDResults(FileIDResult item) + { + base.Add(item); + } + public new void Sort() + { + base.Sort((x, y) => x.Confidence.CompareTo(y.Confidence)); + } + + /// + /// indicates whether the client should try again after mounting the disc image for further inspection + /// + public bool ShouldTryDisc; + } + + public class FileID + { + /// + /// parameters for an Identify job + /// + public class IdentifyParams + { + /// + /// The extension of the original file (with or without the .) + /// + public string Extension; + + /// + /// a seekable stream which can be used + /// + public Stream SeekableStream; + + /// + /// the file in question mounted as a disc + /// + public DiscSystem.Disc Disc; + } + + class IdentifyJob + { + public Stream Stream; + public string Extension; + public DiscSystem.Disc Disc; + } + + /// + /// performs wise heuristics to identify a file. + /// this will attempt to return early if a confident result can be produced. + /// + public FileIDResults Identify(IdentifyParams p) + { + IdentifyJob job = new IdentifyJob() { + Stream = p.SeekableStream, + Disc = p.Disc + }; + + //if we have a disc, that's a separate codepath + if (job.Disc != null) + return IdentifyDisc(job); + + FileIDResults ret = new FileIDResults(); + + string ext = p.Extension; + if(ext != null) + { + ext = ext.TrimStart('.').ToUpper(); + job.Extension = ext; + } + + if (job.Extension == "CUE") + { + ret.ShouldTryDisc = true; + return ret; + } + + if(job.Extension != null) + { + //first test everything associated with this extension + ExtensionInfo handler = null; + if (ExtensionHandlers.TryGetValue(ext, out handler)) + { + foreach (var del in handler.Testers) + { + var fidr = del(job); + if (fidr.FileIDType == FileIDType.None) + continue; + ret.Add(fidr); + } + + ret.Sort(); + + //add a low confidence result just based on extension, if it doesnt exist + if(ret.Find( (x) => x.FileIDType == handler.DefaultForExtension) == null) + { + var fidr = new FileIDResult(handler.DefaultForExtension, 5); + ret.Add(fidr); + } + } + } + + ret.Sort(); + + //if we didnt find anything high confidence, try all the testers (TODO) + + return ret; + } + + /// + /// performs wise heuristics to identify a file (simple version) + /// + public FileIDType IdentifySimple(IdentifyParams p) + { + var ret = Identify(p); + if (ret.ShouldTryDisc) + return FileIDType.Disc; + if (ret.Count == 0) + return FileIDType.None; + else if(ret.Count == 1) + return ret[0].FileIDType; + else if (ret[0].Confidence == ret[1].Confidence) + return FileIDType.Multiple; + else return ret[0].FileIDType; + } + + FileIDResults IdentifyDisc(IdentifyJob job) + { + //DiscSystem could use some newer approaches from this file (instead of parsing ISO filesystem... maybe?) + switch (job.Disc.DetectDiscType()) + { + case DiscSystem.DiscType.SegaSaturn: + return new FileIDResults(new FileIDResult(FileIDType.Saturn, 100)); + + case DiscSystem.DiscType.SonyPSP: + return new FileIDResults(new FileIDResult(FileIDType.PSP, 100)); + + case DiscSystem.DiscType.SonyPSX: + return new FileIDResults(new FileIDResult(FileIDType.PSX, 100)); + + case DiscSystem.DiscType.MegaCD: + return new FileIDResults(new FileIDResult(FileIDType.MegaCD, 100)); + + case DiscSystem.DiscType.TurboCD: + return new FileIDResults(new FileIDResult(FileIDType.TurboCD, 5)); + + case DiscSystem.DiscType.UnknownCDFS: + case DiscSystem.DiscType.UnknownFormat: + default: + return new FileIDResults(new FileIDResult()); + } + } + + class SimpleMagicRecord + { + public int Offset; + public string Key; + public int Length = -1; + } + + //some of these (NES, UNIF for instance) should be lower confidence probably... + //if you change some of the Length arguments for longer keys, please make notes about why + static class SimpleMagics + { + public static SimpleMagicRecord INES = new SimpleMagicRecord { Offset = 0, Key = "NES" }; + public static SimpleMagicRecord UNIF = new SimpleMagicRecord { Offset = 0, Key = "UNIF" }; + + public static SimpleMagicRecord FDS = new SimpleMagicRecord { Offset = 0, Key = "\x01*NINTENDO-HVC*" }; + + //the GBA nintendo logo.. we'll only use 16 bytes of it but theyre all here, for reference + //we cant expect these roms to be normally sized, but we may be able to find other features of the header to use for extra checks + public static SimpleMagicRecord GBA = new SimpleMagicRecord { Offset = 4, Length = 16, Key = "\x24\xFF\xAE\x51\x69\x9A\xA2\x21\x3D\x84\x82\x0A\x84\xE4\x09\xAD\x11\x24\x8B\x98\xC0\x81\x7F\x21\xA3\x52\xBE\x19\x93\x09\xCE\x20\x10\x46\x4A\x4A\xF8\x27\x31\xEC\x58\xC7\xE8\x33\x82\xE3\xCE\xBF\x85\xF4\xDF\x94\xCE\x4B\x09\xC1\x94\x56\x8A\xC0\x13\x72\xA7\xFC\x9F\x84\x4D\x73\xA3\xCA\x9A\x61\x58\x97\xA3\x27\xFC\x03\x98\x76\x23\x1D\xC7\x61\x03\x04\xAE\x56\xBF\x38\x84\x00\x40\xA7\x0E\xFD\xFF\x52\xFE\x03\x6F\x95\x30\xF1\x97\xFB\xC0\x85\x60\xD6\x80\x25\xA9\x63\xBE\x03\x01\x4E\x38\xE2\xF9\xA2\x34\xFF\xBB\x3E\x03\x44\x78\x00\x90\xCB\x88\x11\x3A\x94\x65\xC0\x7C\x63\x87\xF0\x3C\xAF\xD6\x25\xE4\x8B\x38\x0A\xAC\x72\x21\xD4\xF8\x07" }; + public static SimpleMagicRecord NDS = new SimpleMagicRecord { Offset = 0xC0, Length = 16, Key = "\x24\xFF\xAE\x51\x69\x9A\xA2\x21\x3D\x84\x82\x0A\x84\xE4\x09\xAD\x11\x24\x8B\x98\xC0\x81\x7F\x21\xA3\x52\xBE\x19\x93\x09\xCE\x20\x10\x46\x4A\x4A\xF8\x27\x31\xEC\x58\xC7\xE8\x33\x82\xE3\xCE\xBF\x85\xF4\xDF\x94\xCE\x4B\x09\xC1\x94\x56\x8A\xC0\x13\x72\xA7\xFC\x9F\x84\x4D\x73\xA3\xCA\x9A\x61\x58\x97\xA3\x27\xFC\x03\x98\x76\x23\x1D\xC7\x61\x03\x04\xAE\x56\xBF\x38\x84\x00\x40\xA7\x0E\xFD\xFF\x52\xFE\x03\x6F\x95\x30\xF1\x97\xFB\xC0\x85\x60\xD6\x80\x25\xA9\x63\xBE\x03\x01\x4E\x38\xE2\xF9\xA2\x34\xFF\xBB\x3E\x03\x44\x78\x00\x90\xCB\x88\x11\x3A\x94\x65\xC0\x7C\x63\x87\xF0\x3C\xAF\xD6\x25\xE4\x8B\x38\x0A\xAC\x72\x21\xD4\xF8\x07" }; + + public static SimpleMagicRecord GB = new SimpleMagicRecord { Offset=0x104, Length = 16, Key = "\xCE\xED\x66\x66\xCC\x0D\x00\x0B\x03\x73\x00\x83\x00\x0C\x00\x0D\x00\x08\x11\x1F\x88\x89\x00\x0E\xDC\xCC\x6E\xE6\xDD\xDD\xD9\x99\xBB\xBB\x67\x63\x6E\x0E\xEC\xCC\xDD\xDC\x99\x9F\xBB\xB9\x33\x3E" }; + + public static SimpleMagicRecord S32X = new SimpleMagicRecord { Offset = 0x100, Key = "SEGA 32X" }; + + public static SimpleMagicRecord SEGAGENESIS = new SimpleMagicRecord { Offset = 0x100, Key = "SEGA GENESIS" }; + public static SimpleMagicRecord SEGAMEGADRIVE = new SimpleMagicRecord { Offset = 0x100, Key = "SEGA MEGA DRIVE" }; + public static SimpleMagicRecord SEGASATURN = new SimpleMagicRecord { Offset = 0, Key = "SEGA SEGASATURN" }; + public static SimpleMagicRecord SEGADISCSYSTEM = new SimpleMagicRecord { Offset = 0, Key = "SEGADISCSYSTEM" }; + + public static SimpleMagicRecord PSX = new SimpleMagicRecord { Offset = 0x24E0, Key = " Licensed by Sony Computer Entertainment" }; + public static SimpleMagicRecord PSP = new SimpleMagicRecord { Offset = 0x8000, Key = "\x01CD001\x01\0x00PSP GAME" }; + + //https://sites.google.com/site/atari7800wiki/a78-header + public static SimpleMagicRecord A78 = new SimpleMagicRecord { Offset = 0, Key = "\x01ATARI7800" }; + + //could be at various offsets? + public static SimpleMagicRecord TMR_SEGA = new SimpleMagicRecord { Offset = 0x7FF0, Key = "TMR SEGA" }; + } + + class ExtensionInfo + { + public ExtensionInfo(FileIDType defaultForExtension, FormatTester tester) + { + Testers = new List(1); + if(tester != null) + Testers.Add(tester); + DefaultForExtension = defaultForExtension; + } + + public FileIDType DefaultForExtension; + public List Testers; + } + + /// + /// testers to try for each extension, along with a default for the extension + /// + static Dictionary ExtensionHandlers = new Dictionary { + { "NES", new ExtensionInfo(FileIDType.INES, Test_INES ) }, + { "FDS", new ExtensionInfo(FileIDType.FDS, (j)=>Test_Simple(j,FileIDType.FDS,SimpleMagics.FDS) ) }, + { "GBA", new ExtensionInfo(FileIDType.GBA, (j)=>Test_Simple(j,FileIDType.GBA,SimpleMagics.GBA) ) }, + { "NDS", new ExtensionInfo(FileIDType.NDS, (j)=>Test_Simple(j,FileIDType.NDS,SimpleMagics.NDS) ) }, + { "UNF", new ExtensionInfo(FileIDType.UNIF, Test_UNIF ) }, + { "UNIF", new ExtensionInfo(FileIDType.UNIF, Test_UNIF ) }, + { "GB", new ExtensionInfo(FileIDType.GB, Test_GB_GBC ) }, + { "GBC", new ExtensionInfo(FileIDType.GBC, Test_GB_GBC ) }, + { "N64", new ExtensionInfo(FileIDType.N64, Test_N64 ) }, + { "Z64", new ExtensionInfo(FileIDType.N64, Test_N64 ) }, + { "V64", new ExtensionInfo(FileIDType.N64, Test_N64 ) }, + { "A78", new ExtensionInfo(FileIDType.A78, Test_A78 ) }, + { "SMS", new ExtensionInfo(FileIDType.SMS, Test_SMS ) }, + + { "BIN", new ExtensionInfo(FileIDType.Multiple, Test_BIN_ISO ) }, + { "ISO", new ExtensionInfo(FileIDType.Multiple, Test_BIN_ISO ) }, + + { "JAD", new ExtensionInfo(FileIDType.Multiple, Test_JAD_JAC ) }, + { "JAC", new ExtensionInfo(FileIDType.Multiple, Test_JAD_JAC ) }, + + //royal mess + { "MD", new ExtensionInfo(FileIDType.SMD, null ) }, + { "SMD", new ExtensionInfo(FileIDType.SMD, null ) }, + { "GEN", new ExtensionInfo(FileIDType.SMD, null ) }, + + //nothing yet... + { "PSF", new ExtensionInfo(FileIDType.PSF, null) }, + { "INT", new ExtensionInfo(FileIDType.INT, null) }, + { "SFC", new ExtensionInfo(FileIDType.SFC, null) }, + { "SMC", new ExtensionInfo(FileIDType.SFC, null) }, + { "LNX", new ExtensionInfo(FileIDType.LNX, null ) }, + { "SG", new ExtensionInfo(FileIDType.SG, null ) }, + { "SGX", new ExtensionInfo(FileIDType.SGX, null ) }, + { "COL", new ExtensionInfo(FileIDType.COL, null ) }, + { "A52", new ExtensionInfo(FileIDType.A52, null ) }, + { "A26", new ExtensionInfo(FileIDType.A26, null ) }, + { "PCE", new ExtensionInfo(FileIDType.PCE, null ) }, + { "GG", new ExtensionInfo(FileIDType.GG, null ) }, + { "WS", new ExtensionInfo(FileIDType.WS, null ) }, + { "WSC", new ExtensionInfo(FileIDType.WSC, null ) }, + { "NGC", new ExtensionInfo(FileIDType.NGC, null ) }, + { "32X", new ExtensionInfo(FileIDType.S32X, (j)=>Test_Simple(j,FileIDType.S32X,SimpleMagics.S32X) ) }, + + //various C64 formats.. can we distinguish between these? + { "PRG", new ExtensionInfo(FileIDType.C64, null ) }, + { "D64", new ExtensionInfo(FileIDType.C64, null ) }, + { "T64", new ExtensionInfo(FileIDType.C64, null ) }, + { "G64", new ExtensionInfo(FileIDType.C64, null ) }, + { "CRT", new ExtensionInfo(FileIDType.C64, null ) }, + { "NIB", new ExtensionInfo(FileIDType.C64, null ) }, //not supported yet + + //for now + { "ROM", new ExtensionInfo(FileIDType.Multiple, null ) }, //could be MSX too + }; + + delegate FileIDResult FormatTester(IdentifyJob job); + + /// + /// checks for the magic string (bytewise ASCII check) at the given address + /// + static bool CheckMagic(Stream stream, SimpleMagicRecord rec, params int[] offsets) + { + if (offsets.Length == 0) + return CheckMagicOne(stream, rec, 0); + else foreach (int n in offsets) + if (CheckMagicOne(stream, rec, n)) + return true; + return false; + } + + static bool CheckMagicOne(Stream stream, SimpleMagicRecord rec, int offset) + { + stream.Position = rec.Offset + offset; + string key = rec.Key; + int len = rec.Length; + if (len == -1) + len = key.Length; + for (int i = 0; i < len; i++) + { + int n = stream.ReadByte(); + if (n == -1) return false; + if (n != key[i]) + return false; + } + return true; + } + + static int ReadByte(Stream stream, int ofs) + { + stream.Position = ofs; + return stream.ReadByte(); + } + + static FileIDResult Test_INES(IdentifyJob job) + { + if (!CheckMagic(job.Stream, SimpleMagics.INES)) + return new FileIDResult(); + + var ret = new FileIDResult(FileIDType.INES, 100); + + //an INES file should be a multiple of 8k, with the 16 byte header. + //if it isnt.. this is fishy. + if (((job.Stream.Length - 16) & (8 * 1024 - 1)) != 0) + ret.Confidence = 50; + + return ret; + } + + static FileIDResult Test_Simple(IdentifyJob job, FileIDType type, SimpleMagicRecord magic) + { + var ret = new FileIDResult(type); + + if (CheckMagic(job.Stream, magic)) + return new FileIDResult(type, 100); + else + return new FileIDResult(); + } + + static FileIDResult Test_UNIF(IdentifyJob job) + { + if (!CheckMagic(job.Stream, SimpleMagics.UNIF)) + return new FileIDResult(); + + //TODO - simple parser (for starters, check for a known chunk being next, see http://wiki.nesdev.com/w/index.php/UNIF) + + var ret = new FileIDResult(FileIDType.UNIF, 100); + + return ret; + } + + static FileIDResult Test_GB_GBC(IdentifyJob job) + { + if (!CheckMagic(job.Stream, SimpleMagics.GB)) + return new FileIDResult(); + + var ret = new FileIDResult(FileIDType.GB, 100); + int type = ReadByte(job.Stream, 0x143); + if ((type & 0x80) != 0) + ret.FileIDType = FileIDType.GBC; + + //could check cart type and rom size for extra info if necessary + + return ret; + } + + static FileIDResult Test_SMS(IdentifyJob job) + { + //http://www.smspower.org/Development/ROMHeader + + //actually, not sure how to handle this yet + return new FileIDResult(); + } + + static FileIDResult Test_N64(IdentifyJob job) + { + // .Z64 = No swapping + // .N64 = Word Swapped + // .V64 = Byte Swapped + + //not sure how to check for these yet... + var ret = new FileIDResult(FileIDType.N64, 5); + if (job.Extension == "V64") ret.ExtraInfo["byteswap"] = true; + if (job.Extension == "N64") ret.ExtraInfo["wordswap"] = true; + return ret; + } + + static FileIDResult Test_A78(IdentifyJob job) + { + int len = (int)job.Stream.Length; + + //we may have a header to analyze + if (len % 1024 == 128) + { + if (CheckMagic(job.Stream, SimpleMagics.A78)) + new FileIDResult(FileIDType.A78, 100); + } + else if (len % 1024 == 0) + { + } + else { } + + return new FileIDResult(0); + } + + static FileIDResult Test_BIN_ISO(IdentifyJob job) + { + //ok, this is complicated. + //there are lots of mislabeled bins/isos so lets just treat them the same (mostly) + //if the BIN cant be recognized, but it is small, it is more likely some other rom BIN than a disc (turbocd or other) + + if (job.Extension == "BIN") + { + //first we can check for SMD magic words. + //since this extension is ambiguous, we can't be completely sure about it. but it's almost surely accurate + if (CheckMagic(job.Stream, SimpleMagics.SEGAGENESIS)) + { + var ret = new FileIDResult(FileIDType.SMD, 95); + ret.ExtraInfo["type"] = "genesis"; + return ret; + } + + if (CheckMagic(job.Stream, SimpleMagics.SEGAMEGADRIVE)) + { + var ret = new FileIDResult(FileIDType.SMD, 95); + ret.ExtraInfo["type"] = "megadrive"; + } + } + + //well... guess it's a disc. + //since it's just a bin, we dont need the user to provide a DiscSystem disc. + //lets just analyze this as best we can. + + //for PSX, we have a magic word to look for. + //it's at 0x24E0 with a mode2 (2352 byte) track 1. + //what if its 2048 byte? + //i found a ".iso" which was actually 2352 byte sectors.. + //found a hilarious ".bin.iso" which was actually 2352 byte sectors + //so, I think it's possible that every valid PSX disc is mode2 in the track 1 + if (CheckMagic(job.Stream, SimpleMagics.PSX)) + { + var ret = new FileIDResult(FileIDType.PSX, 95); + //this is an unreliable way to get a PSX game! + ret.ExtraInfo["unreliable"] = true; + return ret; + } + + //it's not proven that this is reliable. this is actually part of the mode1 CDFS header. perhaps it's mobile? + //if it's mobile, we'll need to mount it as an ISO file here via discsystem + if (CheckMagic(job.Stream, SimpleMagics.PSP)) + return new FileIDResult(FileIDType.PSP, 95); + + //if this was an ISO, we might discover the magic word at offset 0... + //if it was a mode2/2352 bin, we might discover it at offset 16 (after the sync) + if(CheckMagic(job.Stream, SimpleMagics.SEGASATURN,0)) + return new FileIDResult(FileIDType.Saturn, job.Extension == "ISO" ? 95 : 90); + if (CheckMagic(job.Stream, SimpleMagics.SEGASATURN, 16)) + return new FileIDResult(FileIDType.Saturn, job.Extension == "BIN" ? 95 : 90); + if (CheckMagic(job.Stream, SimpleMagics.SEGADISCSYSTEM, 0)) + return new FileIDResult(FileIDType.MegaCD, job.Extension == "ISO" ? 95 : 90); + if (CheckMagic(job.Stream, SimpleMagics.SEGADISCSYSTEM, 16)) + return new FileIDResult(FileIDType.MegaCD, job.Extension == "BIN" ? 95 : 90); + + if (job.Extension == "ISO") + return new FileIDResult(FileIDType.Disc, 1); + else + return new FileIDResult(FileIDType.Multiple, 1); + } + + static FileIDResult Test_JAD_JAC(IdentifyJob job) + { + //TBD + //just mount it as a disc and send it through the disc checker? + return null; + } + + } +} \ No newline at end of file