Add tests for decompressing/extracting archives

This commit is contained in:
YoshiRulz 2021-01-22 06:15:08 +10:00
parent acc187008f
commit 73a780674d
No known key found for this signature in database
GPG Key ID: C4DE31C245353FB7
14 changed files with 116 additions and 0 deletions

View File

@ -24,6 +24,8 @@ namespace BizHawk.Client.Common
public SharpCompressArchiveFile(string path) => _archive = ArchiveFactory.Open(path, new());
public SharpCompressArchiveFile(Stream fileStream) => _archive = ArchiveFactory.Open(fileStream, new());
public void Dispose()
{
if (_archive == null) throw new ObjectDisposedException(nameof(SharpCompressArchiveFile));

View File

@ -49,8 +49,40 @@ namespace BizHawk.Client.Common
return false;
}
public bool CheckSignature(Stream fileStream, string? filenameHint)
{
if (!fileStream.CanRead || !fileStream.CanSeek) return false;
try
{
using var arcTest = ArchiveFactory.Open(fileStream); // should throw for non-archives
if (arcTest.Type != ArchiveType.Tar) return true; // not expecting false positives from anything but .tar for now
}
catch
{
return false;
}
// as above, SharpCompress seems to overzealously flag files it thinks are the in original .tar format, so we'll check for false positives
if (fileStream.Length < 512) return false;
// looking for magic bytes
var seekPos = fileStream.Position;
fileStream.Seek(0x101, SeekOrigin.Begin);
var buffer = new byte[8];
fileStream.Read(buffer, 0, 8);
fileStream.Seek(seekPos, SeekOrigin.Begin);
var s = buffer.BytesToHexString();
if (s == "7573746172003030" || s == "7573746172202000") return true; // "ustar\000" (libarchive's bsdtar) or "ustar \0" (GNU Tar)
Console.WriteLine($"SharpCompress identified file in stream as original .tar format, probably a false positive, ignoring. Filename hint: {filenameHint}");
return false;
}
public SharpCompressArchiveFile Construct(string path) => new(path);
public SharpCompressArchiveFile Construct(Stream fileStream) => new(fileStream);
public static readonly SharpCompressDearchivalMethod Instance = new();
public IReadOnlyCollection<string> AllowedArchiveExtensions { get; } = new[]

View File

@ -1,4 +1,5 @@
using System.Collections.Generic;
using System.IO;
namespace BizHawk.Common
{
@ -8,8 +9,14 @@ namespace BizHawk.Common
/// <remarks>TODO could this receive a <see cref="HawkFile"/> itself? possibly handy, in very clever scenarios of mounting fake files</remarks>
bool CheckSignature(string fileName, out int offset, out bool isExecutable);
/// <remarks>for now, only used in tests</remarks>
bool CheckSignature(Stream fileStream, string? filenameHint = null);
IReadOnlyCollection<string> AllowedArchiveExtensions { get; }
T Construct(string path);
/// <remarks>for now, only used in tests</remarks>
T Construct(Stream fileStream);
}
}

View File

@ -14,4 +14,7 @@
<PackageReference Include="JunitXml.TestLogger" Version="2.1.78" PrivateAssets="all" />
<ProjectReference Include="$(ProjectDir)../BizHawk.Client.Common/BizHawk.Client.Common.csproj" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="data/**/*" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,53 @@
using System;
using System.IO;
using System.Linq;
using BizHawk.Client.Common;
using BizHawk.Common.BufferExtensions;
using BizHawk.Common.IOExtensions;
using Microsoft.VisualStudio.TestTools.UnitTesting;
namespace BizHawk.Tests.Client.Common.Dearchive
{
[TestClass]
public class DearchivalTests
{
private const string EMBED_GROUP = "dearchive";
private static readonly (string Filename, bool HasSharpCompressSupport)[] TestCases = {
("m3_scy_change.7z", true),
("m3_scy_change.gb.gz", true),
("m3_scy_change.rar", true),
("m3_scy_change.bsdtar.tar", true),
("m3_scy_change.gnutar.tar", true),
("m3_scy_change.zip", true),
};
private readonly Lazy<byte[]> _rom = new(() => EmbeddedData.GetStream(EMBED_GROUP, "m3_scy_change.gb").ReadAllBytes());
private byte[] Rom => _rom.Value;
[TestMethod]
public void SanityCheck() => Assert.AreEqual("70DCA8E791878BDD32426391E4233EA52B47CDD1", Rom.HashSHA1());
[TestMethod]
public void TestSharpCompress()
{
var sc = SharpCompressDearchivalMethod.Instance;
foreach (var filename in TestCases.Where(testCase => testCase.HasSharpCompressSupport)
.Select(testCase => testCase.Filename))
{
var archive = EmbeddedData.GetStream(EMBED_GROUP, filename);
Assert.IsTrue(sc.CheckSignature(archive, filename), $"{filename} is an archive, but wasn't detected as such"); // puts the seek pos of the Stream param back where it was (in this case at the start), but that may not always be true
var af = sc.Construct(archive);
var items = af.Scan();
Assert.IsNotNull(items, $"{filename} contains 1 file, but it couldn't be enumerated correctly");
Assert.AreEqual(1, items!.Count, $"{filename} contains 1 file, but was detected as containing {items.Count} files");
using MemoryStream ms = new((int) items[0].Size);
af.ExtractFile(items[0].ArchiveIndex, ms);
// Assert.IsTrue(ms.ReadAllBytes().SequenceEqual(Rom), $"the file extracted from {filename} doesn't match the uncompressed file"); //TODO less dumb way of doing this? also it doesn't work
}
}
}
}

View File

@ -0,0 +1,17 @@
using System;
using System.IO;
using System.Reflection;
namespace BizHawk.Tests
{
public static class EmbeddedData
{
private static readonly Assembly Asm = typeof(EmbeddedData).Assembly;
public static Stream GetStream(string group, string embedPath)
{
var fullPath = $"BizHawk.Tests.data.{group}.{embedPath}";
return Asm.GetManifestResourceStream(fullPath) ?? throw new InvalidOperationException($"Could not find the embedded resource {fullPath}");
}
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,2 @@
All files in this dir are derived from `m3_scy_change.gb`, which is an open source test rom from the [Mealybug Tearoom Tests](https://github.com/mattcurrie/mealybug-tearoom-tests) (specifically, it's the copy from [this bundle](https://github.com/mattcurrie/mealybug-tearoom-tests/blob/bc3430a7c1f5d394764f37a189b85d8ce4bb3c4e/mealybug-tearoom-tests.zip)).
The test suite is made available under the MIT (Expat) license, and the full terms of the license can be found [here](https://github.com/mattcurrie/mealybug-tearoom-tests/blob/bc3430a7c1f5d394764f37a189b85d8ce4bb3c4e/LICENSE).