add PSX disc hashing code and batch runner

This commit is contained in:
zeromus 2015-07-08 18:47:48 -05:00
parent dafc0bb3e8
commit aad1e0608c
2 changed files with 124 additions and 82 deletions

View File

@ -12,51 +12,6 @@ namespace BizHawk.Client.DBMan
{
class DiscHash
{
public class CRC32
{
// Lookup table for speed.
private static readonly uint[] CRC32Table;
static CRC32()
{
CRC32Table = new uint[256];
for (uint i = 0; i < 256; ++i)
{
uint crc = i;
for (int j = 8; j > 0; --j)
{
if ((crc & 1) == 1)
crc = ((crc >> 1) ^ 0xEDB88320);
else
crc >>= 1;
}
CRC32Table[i] = crc;
}
}
uint current = 0xFFFFFFFF;
public void Add(byte[] data, int offset, int size)
{
for (int i = 0; i < size; i++)
{
byte b = data[offset + i];
current = (((Result) >> 8) ^ CRC32Table[b ^ ((Result) & 0xFF)]);
}
}
byte[] smallbuf = new byte[8];
public void Add(int data)
{
smallbuf[0] = (byte)((data) & 0xFF);
smallbuf[1] = (byte)((data >> 8) & 0xFF);
smallbuf[2] = (byte)((data >> 16) & 0xFF);
smallbuf[3] = (byte)((data >> 24) & 0xFF);
Add(smallbuf, 0, 4);
}
public uint Result { get { return current ^ 0xFFFFFFFF; } }
}
Job job;
public void Run(string[] args)
{
@ -116,66 +71,42 @@ namespace BizHawk.Client.DBMan
Dictionary<uint, string> FoundHashes = new Dictionary<uint, string>();
object olock = new object();
int ctr = 0;
var todo = FindExtensionsRecurse(indir, ".CUE");
int progress = 0;
//loop over games
//loop over games (parallel doesnt work well when reading tons of data over the network, as we are here to do the complete redump hash)
var po = new ParallelOptions();
po.MaxDegreeOfParallelism = Environment.ProcessorCount - 1;
//po.MaxDegreeOfParallelism = 1;
//po.MaxDegreeOfParallelism = Environment.ProcessorCount - 1;
po.MaxDegreeOfParallelism = 1;
Parallel.ForEach(todo, po, (fiCue) =>
{
string name = Path.GetFileNameWithoutExtension(fiCue);
//if (!name.Contains("Arc the Lad II"))
// return;
CRC32 crc = new CRC32();
byte[] buffer2352 = new byte[2352];
//now look for the cue file
using (var disc = Disc.LoadAutomagic(fiCue))
{
var dsr = new DiscSectorReader(disc);
var hasher = new DiscHasher(disc);
//generate a hash with our own custom approach
//basically, the TOC and a few early sectors completely
//note: be sure to hash the leadout track, "A Ressha de Ikou 4" differs only by the track 1 / disc length between 1.0 and 1.1
//crc.Add((int)disc.TOC.Session1Format);
//crc.Add(disc.TOC.FirstRecordedTrackNumber);
//crc.Add(disc.TOC.LastRecordedTrackNumber);
//for (int i = 1; i <= 100; i++)
//{
// crc.Add((int)disc.TOC.TOCItems[i].Control);
// crc.Add(disc.TOC.TOCItems[i].Exists ? 1 : 0);
// crc.Add((int)disc.TOC.TOCItems[i].LBATimestamp.Sector);
//}
//"Arc the Lad II (J) 1.0 and 1.1 conflict up to 25 sectors (so use 26)
//Tekken 3 (Europe) (Alt) and Tekken 3 (Europe) conflict in track 2 and 3 unfortunately, not sure what to do about this yet
for (int i = 0; i < 26; i++)
{
dsr.ReadLBA_2352(i, buffer2352, 0);
crc.Add(buffer2352, 0, 2352);
}
uint bizHashId = hasher.Calculate_PSX_BizIDHash();
uint redumpHash = hasher.Calculate_PSX_RedumpHash();
lock (olock)
{
progress++;
Console.WriteLine("{0}/{1} [{2:X8}] {3}", progress, todo.Count, crc.Result, Path.GetFileNameWithoutExtension(fiCue));
outf.WriteLine("[{0:X8}] {1}", crc.Result, name);
if (FoundHashes.ContainsKey(crc.Result))
Console.WriteLine("{0}/{1} [{2:X8}] {3}", progress, todo.Count, bizHashId, Path.GetFileNameWithoutExtension(fiCue));
outf.WriteLine("bizhash:{0:X8} datahash:{1:X8} //{2}", bizHashId, redumpHash, name);
if (FoundHashes.ContainsKey(bizHashId))
{
Console.WriteLine("--> COLLISION WITH: {0}", FoundHashes[crc.Result]);
outf.WriteLine("--> COLLISION WITH: {0}", FoundHashes[crc.Result]);
Console.WriteLine("--> COLLISION WITH: {0}", FoundHashes[bizHashId]);
outf.WriteLine("--> COLLISION WITH: {0}", FoundHashes[bizHashId]);
}
else
FoundHashes[crc.Result] = name;
FoundHashes[bizHashId] = name;
Console.Out.Flush();
outf.Flush();
}
}

View File

@ -13,6 +13,117 @@ namespace BizHawk.Emulation.DiscSystem
Disc disc;
class SpecialCRC32
{
private static readonly uint[] CRC32Table;
static SpecialCRC32()
{
CRC32Table = new uint[256];
for (uint i = 0; i < 256; ++i)
{
uint crc = i;
for (int j = 8; j > 0; --j)
{
if ((crc & 1) == 1)
crc = ((crc >> 1) ^ 0xEDB88320);
else
crc >>= 1;
}
CRC32Table[i] = crc;
}
}
uint current = 0xFFFFFFFF;
public unsafe void Add(byte[] data, int offset, int size)
{
if (offset + size > data.Length)
throw new ArgumentOutOfRangeException();
if(offset<0)
throw new ArgumentOutOfRangeException();
fixed(byte* pData = data)
for (int i = 0; i < size; i++)
{
byte b = pData[offset + i];
current = CRC32Table[(current ^ b) & 0xFF] ^ (current>>8);
}
}
byte[] smallbuf = new byte[8];
public void Add(int data)
{
smallbuf[0] = (byte)((data) & 0xFF);
smallbuf[1] = (byte)((data >> 8) & 0xFF);
smallbuf[2] = (byte)((data >> 16) & 0xFF);
smallbuf[3] = (byte)((data >> 24) & 0xFF);
Add(smallbuf, 0, 4);
}
public uint Result { get { return current ^ 0xFFFFFFFF; } }
}
/// <summary>
/// calculates the hash for quick PSX Disc identification
/// </summary>
public uint Calculate_PSX_BizIDHash()
{
//notes about the hash:
//"Arc the Lad II (J) 1.0 and 1.1 conflict up to 25 sectors (so use 26)
//Tekken 3 (Europe) (Alt) and Tekken 3 (Europe) conflict in track 2 and 3 unfortunately, not sure what to do about this yet
//the TOC isn't needed!
//but it will help detect dumps with mangled TOCs which are all too common
//
//a special CRC32 is used to help us match redump's DB elsewhere
SpecialCRC32 crc = new SpecialCRC32();
byte[] buffer2352 = new byte[2352];
var dsr = new DiscSectorReader(disc);
dsr.Policy.DeterministicClearBuffer = false; //live dangerously
//hash the TOC
crc.Add((int)disc.TOC.Session1Format);
crc.Add(disc.TOC.FirstRecordedTrackNumber);
crc.Add(disc.TOC.LastRecordedTrackNumber);
for (int i = 1; i <= 100; i++)
{
crc.Add((int)disc.TOC.TOCItems[i].Control);
crc.Add(disc.TOC.TOCItems[i].Exists ? 1 : 0);
crc.Add((int)disc.TOC.TOCItems[i].LBATimestamp.Sector);
}
//hash first 26 sectors
for (int i = 0; i < 26; i++)
{
dsr.ReadLBA_2352(i, buffer2352, 0);
crc.Add(buffer2352, 0, 2352);
}
return crc.Result;
}
/// <summary>
/// calculates the complete disc hash for matching to a redump
/// </summary>
public uint Calculate_PSX_RedumpHash()
{
//a special CRC32 is used to help us match redump's DB
SpecialCRC32 crc = new SpecialCRC32();
byte[] buffer2352 = new byte[2352];
var dsr = new DiscSectorReader(disc);
dsr.Policy.DeterministicClearBuffer = false; //live dangerously
//read all sectors for redump hash
for (int i = 0; i < disc.Session1.LeadoutLBA; i++)
{
dsr.ReadLBA_2352(i, buffer2352, 0);
crc.Add(buffer2352, 0, 2352);
}
return crc.Result;
}
// gets an identifying hash. hashes the first 512 sectors of
// the first data track on the disc.
//TODO - this is a very platform-specific thing. hashing the TOC may be faster and be just as effective. so, rename it appropriately