WIA: Reuse groups when writing
This is useful for the way Dolphin scrubs Wii discs. The encrypted data is what gets zeroed out, but this zeroed out data then gets decrypted before being stored, and the resulting data does not compress well. However, each block of decrypted scrubbed data is identical given the same encryption key, and there's nothing stopping us from making multiple group entries point to the same offset in the file, so we only have to store one copy of this data per partition. For reference, wit zeroes out the decrypted data, but Dolphin's WIA writer can't do this because it currently doesn't know which parts of the disc are scrubbed. This is also useful for things such as storing Datel discs full of 0x55 blocks (repesenting unreadable blocks) without compression enabled.
This commit is contained in:
parent
40e46aee57
commit
e5b9e1ba1f
|
@ -730,7 +730,8 @@ bool WIAFileReader::PurgeCompressor::Start()
|
|||
return true;
|
||||
}
|
||||
|
||||
bool WIAFileReader::PurgeCompressor::AddPrecedingDataOnlyForPurgeHashing(const u8* data, size_t size)
|
||||
bool WIAFileReader::PurgeCompressor::AddPrecedingDataOnlyForPurgeHashing(const u8* data,
|
||||
size_t size)
|
||||
{
|
||||
mbedtls_sha1_update_ret(&m_sha1_context, data, size);
|
||||
return true;
|
||||
|
@ -1361,10 +1362,28 @@ WIAFileReader::ConversionResult WIAFileReader::SetUpDataEntriesForWriting(
|
|||
return ConversionResult::Success;
|
||||
}
|
||||
|
||||
bool WIAFileReader::TryReuseGroup(std::vector<GroupEntry>* group_entries, size_t* groups_written,
|
||||
std::map<ReuseID, GroupEntry>* reusable_groups,
|
||||
std::optional<ReuseID> reuse_id)
|
||||
{
|
||||
if (!reuse_id)
|
||||
return false;
|
||||
|
||||
const auto it = reusable_groups->find(*reuse_id);
|
||||
if (it != reusable_groups->end())
|
||||
{
|
||||
(*group_entries)[*groups_written] = it->second;
|
||||
++*groups_written;
|
||||
}
|
||||
|
||||
return it != reusable_groups->end();
|
||||
}
|
||||
|
||||
WIAFileReader::ConversionResult WIAFileReader::CompressAndWriteGroup(
|
||||
File::IOFile* file, u64* bytes_written, std::vector<GroupEntry>* group_entries,
|
||||
size_t* groups_written, Compressor* compressor, bool compressed_exception_lists,
|
||||
const std::vector<u8>& exception_lists, const std::vector<u8>& main_data)
|
||||
const std::vector<u8>& exception_lists, const std::vector<u8>& main_data,
|
||||
std::map<ReuseID, GroupEntry>* reusable_groups, std::optional<ReuseID> reuse_id)
|
||||
{
|
||||
const auto all_zero = [](const std::vector<u8>& data) {
|
||||
return std::all_of(data.begin(), data.end(), [](u8 x) { return x == 0; });
|
||||
|
@ -1377,6 +1396,9 @@ WIAFileReader::ConversionResult WIAFileReader::CompressAndWriteGroup(
|
|||
return ConversionResult::Success;
|
||||
}
|
||||
|
||||
if (TryReuseGroup(group_entries, groups_written, reusable_groups, reuse_id))
|
||||
return ConversionResult::Success;
|
||||
|
||||
const u64 data_offset = *bytes_written;
|
||||
|
||||
if (compressor)
|
||||
|
@ -1450,6 +1472,9 @@ WIAFileReader::ConversionResult WIAFileReader::CompressAndWriteGroup(
|
|||
group_entry.data_size = Common::swap32(static_cast<u32>(*bytes_written - data_offset));
|
||||
++*groups_written;
|
||||
|
||||
if (reuse_id)
|
||||
reusable_groups->emplace(*reuse_id, group_entry);
|
||||
|
||||
if (!PadTo4(file, bytes_written))
|
||||
return ConversionResult::WriteFailed;
|
||||
|
||||
|
@ -1566,6 +1591,11 @@ WIAFileReader::ConvertToWIA(BlobReader* infile, const VolumeDisc* infile_volume,
|
|||
return ConversionResult::ReadFailed;
|
||||
// We intentially do not increment bytes_read here, since these bytes will be read again
|
||||
|
||||
const auto all_same = [](const std::vector<u8>& data) {
|
||||
const u8 first_byte = data.front();
|
||||
return std::all_of(data.begin(), data.end(), [first_byte](u8 x) { return x == first_byte; });
|
||||
};
|
||||
|
||||
using WiiBlockData = std::array<u8, VolumeWii::BLOCK_DATA_SIZE>;
|
||||
|
||||
std::vector<u8> buffer;
|
||||
|
@ -1573,6 +1603,8 @@ WIAFileReader::ConvertToWIA(BlobReader* infile, const VolumeDisc* infile_volume,
|
|||
std::vector<WiiBlockData> decryption_buffer;
|
||||
std::vector<VolumeWii::HashBlock> hash_buffer;
|
||||
|
||||
std::map<ReuseID, GroupEntry> reusable_groups;
|
||||
|
||||
if (!partition_entries.empty())
|
||||
{
|
||||
decryption_buffer.resize(VolumeWii::BLOCKS_PER_GROUP);
|
||||
|
@ -1617,98 +1649,119 @@ WIAFileReader::ConvertToWIA(BlobReader* infile, const VolumeDisc* infile_volume,
|
|||
return ConversionResult::ReadFailed;
|
||||
bytes_read += bytes_to_read;
|
||||
|
||||
std::vector<std::vector<HashExceptionEntry>> exception_lists(exception_lists_per_chunk);
|
||||
const auto create_reuse_id = [&partition_entry, bytes_to_write](u8 value, bool decrypted) {
|
||||
return ReuseID{&partition_entry.partition_key, bytes_to_write, decrypted, value};
|
||||
};
|
||||
|
||||
for (u64 j = 0; j < groups; ++j)
|
||||
std::optional<ReuseID> reuse_id;
|
||||
|
||||
// Set this group as reusable if the encrypted data is all_same
|
||||
if (all_same(buffer))
|
||||
reuse_id = create_reuse_id(buffer.front(), false);
|
||||
|
||||
if (!TryReuseGroup(&group_entries, &groups_written, &reusable_groups, reuse_id))
|
||||
{
|
||||
const u64 offset_of_group = j * VolumeWii::GROUP_TOTAL_SIZE;
|
||||
const u64 write_offset_of_group = j * VolumeWii::GROUP_DATA_SIZE;
|
||||
std::vector<std::vector<HashExceptionEntry>> exception_lists(exception_lists_per_chunk);
|
||||
|
||||
const u64 blocks_in_this_group =
|
||||
std::min<u64>(VolumeWii::BLOCKS_PER_GROUP, blocks - j * VolumeWii::BLOCKS_PER_GROUP);
|
||||
|
||||
for (u32 k = 0; k < VolumeWii::BLOCKS_PER_GROUP; ++k)
|
||||
for (u64 j = 0; j < groups; ++j)
|
||||
{
|
||||
if (k < blocks_in_this_group)
|
||||
const u64 offset_of_group = j * VolumeWii::GROUP_TOTAL_SIZE;
|
||||
const u64 write_offset_of_group = j * VolumeWii::GROUP_DATA_SIZE;
|
||||
|
||||
const u64 blocks_in_this_group = std::min<u64>(
|
||||
VolumeWii::BLOCKS_PER_GROUP, blocks - j * VolumeWii::BLOCKS_PER_GROUP);
|
||||
|
||||
for (u32 k = 0; k < VolumeWii::BLOCKS_PER_GROUP; ++k)
|
||||
{
|
||||
if (k < blocks_in_this_group)
|
||||
{
|
||||
const u64 offset_of_block = offset_of_group + k * VolumeWii::BLOCK_TOTAL_SIZE;
|
||||
VolumeWii::DecryptBlockData(buffer.data() + offset_of_block,
|
||||
decryption_buffer[k].data(), &aes_context);
|
||||
}
|
||||
else
|
||||
{
|
||||
decryption_buffer[k].fill(0);
|
||||
}
|
||||
}
|
||||
|
||||
VolumeWii::HashGroup(decryption_buffer.data(), hash_buffer.data());
|
||||
|
||||
for (u64 k = 0; k < blocks_in_this_group; ++k)
|
||||
{
|
||||
const u64 offset_of_block = offset_of_group + k * VolumeWii::BLOCK_TOTAL_SIZE;
|
||||
VolumeWii::DecryptBlockData(buffer.data() + offset_of_block,
|
||||
decryption_buffer[k].data(), &aes_context);
|
||||
const u64 hash_offset_of_block = k * VolumeWii::BLOCK_HEADER_SIZE;
|
||||
|
||||
VolumeWii::HashBlock hashes;
|
||||
VolumeWii::DecryptBlockHashes(buffer.data() + offset_of_block, &hashes, &aes_context);
|
||||
|
||||
const auto compare_hash = [&](size_t offset_in_block) {
|
||||
ASSERT(offset_in_block + sizeof(SHA1) <= VolumeWii::BLOCK_HEADER_SIZE);
|
||||
|
||||
const u8* desired_hash = reinterpret_cast<u8*>(&hashes) + offset_in_block;
|
||||
const u8* computed_hash = reinterpret_cast<u8*>(&hash_buffer[k]) + offset_in_block;
|
||||
|
||||
if (!std::equal(desired_hash, desired_hash + sizeof(SHA1), computed_hash))
|
||||
{
|
||||
const u64 hash_offset = hash_offset_of_block + offset_in_block;
|
||||
ASSERT(hash_offset <= std::numeric_limits<u16>::max());
|
||||
|
||||
HashExceptionEntry& exception = exception_lists[j].emplace_back();
|
||||
exception.offset = static_cast<u16>(Common::swap16(hash_offset));
|
||||
std::memcpy(exception.hash.data(), desired_hash, sizeof(SHA1));
|
||||
}
|
||||
};
|
||||
|
||||
const auto compare_hashes = [&compare_hash](size_t offset, size_t size) {
|
||||
for (size_t l = 0; l < size; l += sizeof(SHA1))
|
||||
// The std::min is to ensure that we don't go beyond the end of HashBlock with
|
||||
// padding_2, which is 32 bytes long (not divisible by sizeof(SHA1), which is 20).
|
||||
compare_hash(offset + std::min(l, size - sizeof(SHA1)));
|
||||
};
|
||||
|
||||
using HashBlock = VolumeWii::HashBlock;
|
||||
compare_hashes(offsetof(HashBlock, h0), sizeof(HashBlock::h0));
|
||||
compare_hashes(offsetof(HashBlock, padding_0), sizeof(HashBlock::padding_0));
|
||||
compare_hashes(offsetof(HashBlock, h1), sizeof(HashBlock::h1));
|
||||
compare_hashes(offsetof(HashBlock, padding_1), sizeof(HashBlock::padding_1));
|
||||
compare_hashes(offsetof(HashBlock, h2), sizeof(HashBlock::h2));
|
||||
compare_hashes(offsetof(HashBlock, padding_2), sizeof(HashBlock::padding_2));
|
||||
}
|
||||
else
|
||||
|
||||
for (u64 k = 0; k < blocks_in_this_group; ++k)
|
||||
{
|
||||
decryption_buffer[k].fill(0);
|
||||
std::memcpy(buffer.data() + write_offset_of_group + k * VolumeWii::BLOCK_DATA_SIZE,
|
||||
decryption_buffer[k].data(), VolumeWii::BLOCK_DATA_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
VolumeWii::HashGroup(decryption_buffer.data(), hash_buffer.data());
|
||||
bool have_exceptions = false;
|
||||
|
||||
for (u64 k = 0; k < blocks_in_this_group; ++k)
|
||||
exceptions_buffer.clear();
|
||||
for (const std::vector<HashExceptionEntry>& exception_list : exception_lists)
|
||||
{
|
||||
const u64 offset_of_block = offset_of_group + k * VolumeWii::BLOCK_TOTAL_SIZE;
|
||||
const u64 hash_offset_of_block = k * VolumeWii::BLOCK_HEADER_SIZE;
|
||||
|
||||
VolumeWii::HashBlock hashes;
|
||||
VolumeWii::DecryptBlockHashes(buffer.data() + offset_of_block, &hashes, &aes_context);
|
||||
|
||||
const auto compare_hash = [&](size_t offset_in_block) {
|
||||
ASSERT(offset_in_block + sizeof(SHA1) <= VolumeWii::BLOCK_HEADER_SIZE);
|
||||
|
||||
const u8* desired_hash = reinterpret_cast<u8*>(&hashes) + offset_in_block;
|
||||
const u8* computed_hash = reinterpret_cast<u8*>(&hash_buffer[k]) + offset_in_block;
|
||||
|
||||
if (!std::equal(desired_hash, desired_hash + sizeof(SHA1), computed_hash))
|
||||
{
|
||||
const u64 hash_offset = hash_offset_of_block + offset_in_block;
|
||||
ASSERT(hash_offset <= std::numeric_limits<u16>::max());
|
||||
|
||||
HashExceptionEntry& exception = exception_lists[j].emplace_back();
|
||||
exception.offset = static_cast<u16>(Common::swap16(hash_offset));
|
||||
std::memcpy(exception.hash.data(), desired_hash, sizeof(SHA1));
|
||||
}
|
||||
};
|
||||
|
||||
const auto compare_hashes = [&compare_hash](size_t offset, size_t size) {
|
||||
for (size_t l = 0; l < size; l += sizeof(SHA1))
|
||||
// The std::min is to ensure that we don't go beyond the end of HashBlock with
|
||||
// padding_2, which is 32 bytes long (not divisible by sizeof(SHA1), which is 20).
|
||||
compare_hash(offset + std::min(l, size - sizeof(SHA1)));
|
||||
};
|
||||
|
||||
using HashBlock = VolumeWii::HashBlock;
|
||||
compare_hashes(offsetof(HashBlock, h0), sizeof(HashBlock::h0));
|
||||
compare_hashes(offsetof(HashBlock, padding_0), sizeof(HashBlock::padding_0));
|
||||
compare_hashes(offsetof(HashBlock, h1), sizeof(HashBlock::h1));
|
||||
compare_hashes(offsetof(HashBlock, padding_1), sizeof(HashBlock::padding_1));
|
||||
compare_hashes(offsetof(HashBlock, h2), sizeof(HashBlock::h2));
|
||||
compare_hashes(offsetof(HashBlock, padding_2), sizeof(HashBlock::padding_2));
|
||||
const u16 exceptions = Common::swap16(static_cast<u16>(exception_list.size()));
|
||||
PushBack(&exceptions_buffer, exceptions);
|
||||
for (const HashExceptionEntry& exception : exception_list)
|
||||
PushBack(&exceptions_buffer, exception);
|
||||
if (!exception_list.empty())
|
||||
have_exceptions = true;
|
||||
}
|
||||
|
||||
for (u64 k = 0; k < blocks_in_this_group; ++k)
|
||||
{
|
||||
std::memcpy(buffer.data() + write_offset_of_group + k * VolumeWii::BLOCK_DATA_SIZE,
|
||||
decryption_buffer[k].data(), VolumeWii::BLOCK_DATA_SIZE);
|
||||
}
|
||||
buffer.resize(bytes_to_write);
|
||||
|
||||
// Set this group as reusable if it lacks exceptions and the decrypted data is all_same
|
||||
if (!reuse_id && !have_exceptions && all_same(buffer))
|
||||
reuse_id = create_reuse_id(buffer.front(), true);
|
||||
|
||||
const ConversionResult write_result = CompressAndWriteGroup(
|
||||
outfile, &bytes_written, &group_entries, &groups_written, compressor.get(),
|
||||
compressed_exception_lists, exceptions_buffer, buffer, &reusable_groups, reuse_id);
|
||||
|
||||
if (write_result != ConversionResult::Success)
|
||||
return write_result;
|
||||
}
|
||||
|
||||
exceptions_buffer.clear();
|
||||
for (const std::vector<HashExceptionEntry>& exception_list : exception_lists)
|
||||
{
|
||||
const u16 exceptions = Common::swap16(static_cast<u16>(exception_list.size()));
|
||||
PushBack(&exceptions_buffer, exceptions);
|
||||
for (const HashExceptionEntry& exception : exception_list)
|
||||
PushBack(&exceptions_buffer, exception);
|
||||
}
|
||||
|
||||
buffer.resize(bytes_to_write);
|
||||
|
||||
const ConversionResult write_result = CompressAndWriteGroup(
|
||||
outfile, &bytes_written, &group_entries, &groups_written, compressor.get(),
|
||||
compressed_exception_lists, exceptions_buffer, buffer);
|
||||
|
||||
if (write_result != ConversionResult::Success)
|
||||
return write_result;
|
||||
|
||||
if (!run_callback())
|
||||
return ConversionResult::Canceled;
|
||||
}
|
||||
|
@ -1742,9 +1795,13 @@ WIAFileReader::ConvertToWIA(BlobReader* infile, const VolumeDisc* infile_volume,
|
|||
return ConversionResult::ReadFailed;
|
||||
bytes_read += bytes_to_read;
|
||||
|
||||
std::optional<ReuseID> reuse_id;
|
||||
if (all_same(buffer))
|
||||
reuse_id = ReuseID{nullptr, bytes_to_read, false, buffer.front()};
|
||||
|
||||
const ConversionResult write_result = CompressAndWriteGroup(
|
||||
outfile, &bytes_written, &group_entries, &groups_written, compressor.get(),
|
||||
compressed_exception_lists, exceptions_buffer, buffer);
|
||||
compressed_exception_lists, exceptions_buffer, buffer, &reusable_groups, reuse_id);
|
||||
|
||||
if (write_result != ConversionResult::Success)
|
||||
return write_result;
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <limits>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
|
||||
#include <bzlib.h>
|
||||
|
@ -389,6 +390,33 @@ private:
|
|||
|
||||
static u32 LZMA2DictionarySize(u8 p);
|
||||
|
||||
struct ReuseID
|
||||
{
|
||||
bool operator==(const ReuseID& other) const
|
||||
{
|
||||
return std::tie(partition_key, data_size, decrypted, value) ==
|
||||
std::tie(other.partition_key, other.data_size, other.decrypted, other.value);
|
||||
}
|
||||
bool operator<(const ReuseID& other) const
|
||||
{
|
||||
return std::tie(partition_key, data_size, decrypted, value) <
|
||||
std::tie(other.partition_key, other.data_size, other.decrypted, other.value);
|
||||
}
|
||||
bool operator>(const ReuseID& other) const
|
||||
{
|
||||
return std::tie(partition_key, data_size, decrypted, value) >
|
||||
std::tie(other.partition_key, other.data_size, other.decrypted, other.value);
|
||||
}
|
||||
bool operator!=(const ReuseID& other) const { return !operator==(other); }
|
||||
bool operator>=(const ReuseID& other) const { return !operator<(other); }
|
||||
bool operator<=(const ReuseID& other) const { return !operator>(other); }
|
||||
|
||||
const WiiKey* partition_key;
|
||||
u64 data_size;
|
||||
bool decrypted;
|
||||
u8 value;
|
||||
};
|
||||
|
||||
static bool PadTo4(File::IOFile* file, u64* bytes_written);
|
||||
static void AddRawDataEntry(u64 offset, u64 size, int chunk_size, u32* total_groups,
|
||||
std::vector<RawDataEntry>* raw_data_entries,
|
||||
|
@ -402,12 +430,14 @@ private:
|
|||
std::vector<PartitionEntry>* partition_entries,
|
||||
std::vector<RawDataEntry>* raw_data_entries,
|
||||
std::vector<DataEntry>* data_entries);
|
||||
static ConversionResult CompressAndWriteGroup(File::IOFile* file, u64* bytes_written,
|
||||
std::vector<GroupEntry>* group_entries,
|
||||
size_t* groups_written, Compressor* compressor,
|
||||
bool compressed_exception_lists,
|
||||
const std::vector<u8>& exception_lists,
|
||||
const std::vector<u8>& main_data);
|
||||
static bool TryReuseGroup(std::vector<GroupEntry>* group_entries, size_t* groups_written,
|
||||
std::map<ReuseID, GroupEntry>* reusable_groups,
|
||||
std::optional<ReuseID> reuse_id);
|
||||
static ConversionResult CompressAndWriteGroup(
|
||||
File::IOFile* file, u64* bytes_written, std::vector<GroupEntry>* group_entries,
|
||||
size_t* groups_written, Compressor* compressor, bool compressed_exception_lists,
|
||||
const std::vector<u8>& exception_lists, const std::vector<u8>& main_data,
|
||||
std::map<ReuseID, GroupEntry>* reusable_groups, std::optional<ReuseID> reuse_id);
|
||||
static ConversionResult CompressAndWrite(File::IOFile* file, u64* bytes_written,
|
||||
Compressor* compressor, const u8* data, size_t size,
|
||||
size_t* size_out);
|
||||
|
|
Loading…
Reference in New Issue