From c0ffadb56e0e2f3b6906d2de2b57e06440c3fdd0 Mon Sep 17 00:00:00 2001 From: emoose Date: Sun, 19 Jan 2020 21:55:57 +0000 Subject: [PATCH] [VFS] Check hash of STFS hash tables (& cache them) when reading, fallback if invalid. This'll try salvaging any corrupt packages loaded in: normally we find the block numbers belonging to a file by reading them from the hash table. Seems there's some packages out there (eg. Mass Effect 2 demo) that have corrupt hash tables though, so using the block numbers from there just results in a crash. By verifying the hash of each hash table we can detect if this is the case, and if so we can try just using current_block_number + 1 instead of use any invalid block number. (we also let the user know about the corrupt table in the log file) In LIVE/PIRS packages this should hopefully let us get the correct data, since files are usually stored inside consecutive blocks in those package types. It's doubtful that it'd help much with CON ones though, since those are pretty much a living filesystem. The older & more used a CON package is, the more likely blocks will be fragmented throughout the file... Reading from the hash table is the only way to properly read data from these, using current_block + 1 likely won't help much (we'd be going wxPirs-mode, in a way :P) CON packages do have something that might help with this though: redundant hash blocks, where each hash table is actually made up of two blocks. Maybe in future we can find a way to automatically use the secondary block whenever the primary one is invalid. --- .../vfs/devices/stfs_container_device.cc | 103 +++++++++++++----- src/xenia/vfs/devices/stfs_container_device.h | 25 ++++- 2 files changed, 99 insertions(+), 29 deletions(-) diff --git a/src/xenia/vfs/devices/stfs_container_device.cc b/src/xenia/vfs/devices/stfs_container_device.cc index 537d1a9b4..1e90acc2a 100644 --- a/src/xenia/vfs/devices/stfs_container_device.cc +++ b/src/xenia/vfs/devices/stfs_container_device.cc @@ -13,6 +13,7 @@ #include #include +#include "third_party/crypto/TinySHA1.hpp" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/vfs/devices/stfs_container_entry.h" @@ -566,6 +567,10 @@ StfsContainerDevice::Error StfsContainerDevice::ReadSTFS() { table_block_index = block_hash.level0_next_block(); } + // At this point we've read in all the data we need from the hash tables + // Let's free some mem by clearing the cache we made. + cached_tables_.clear(); + if (all_entries.size() > 0) { return Error::kSuccess; } @@ -640,7 +645,7 @@ size_t StfsContainerDevice::STFSDataBlockToBackingHashBlockOffset( StfsHashEntry StfsContainerDevice::STFSGetLevelNHashEntry( const uint8_t* map_ptr, uint32_t block_index, uint32_t level, - bool secondary_block) { + uint8_t* hash_in_out, bool secondary_block) { uint32_t record = block_index; for (uint32_t i = 0; i < level; i++) { record = record / kSTFSDataBlocksPerHashLevel[0]; @@ -654,10 +659,48 @@ StfsHashEntry StfsContainerDevice::STFSGetLevelNHashEntry( hash_offset += bytes_per_sector(); // read from this tables secondary block } - const uint8_t* hash_data = map_ptr + hash_offset; + bool invalid_table = std::find(invalid_tables_.begin(), invalid_tables_.end(), + hash_offset) != invalid_tables_.end(); - auto* entry = (StfsHashEntry*)(hash_data + record * 0x18); - return *entry; + if (!cached_tables_.count(hash_offset)) { + // Cache the table in memory, since it's likely to be needed again + auto hash_data = (const StfsHashTable*)(map_ptr + hash_offset); + cached_tables_[hash_offset] = *hash_data; + + // If hash is provided we'll try comparing it to the hash of this table + if (hash_in_out && !invalid_table) { + sha1::SHA1 sha; + sha.processBytes(hash_data, 0x1000); + + uint8_t digest[0x14]; + sha.finalize(digest); + if (memcmp(digest, hash_in_out, 0x14)) { + XELOGW( + "STFSGetLevelNHashEntry: level %d hash table at 0x%llX " + "is corrupt (hash mismatch)!", + level, hash_offset); + invalid_table = true; + invalid_tables_.push_back(hash_offset); + } + } + } + + if (invalid_table) { + // If table is corrupt there's no use reading invalid data, lets try + // salvaging things by providing next block as block + 1, should work fine + // for LIVE/PIRS packages hopefully. + StfsHashEntry entry = {0}; + entry.level0_next_block(block_index + 1); + return entry; + } + + StfsHashTable& hash_table = cached_tables_[hash_offset]; + + auto& entry = hash_table.entries[record]; + if (hash_in_out) { + memcpy(hash_in_out, entry.sha1, 0x14); + } + return entry; } StfsHashEntry StfsContainerDevice::STFSGetLevel0HashEntry( @@ -668,36 +711,42 @@ StfsHashEntry StfsContainerDevice::STFSGetLevel0HashEntry( use_secondary_block = true; } + // Copy our top hash table hash into the buffer... + uint8_t hash[0x14]; + memcpy(hash, header_.metadata.stfs_volume_descriptor.root_hash, 0x14); + // Check upper hash table levels to find which table (primary/secondary) to // use. - // We should be able to skip this if it's a read-only package, since the hash - // tables in those only use one block - if (!header_.metadata.stfs_volume_descriptor.flags.read_only_format) { - auto num_blocks = - header_.metadata.stfs_volume_descriptor.allocated_block_count; - if (num_blocks >= kSTFSDataBlocksPerHashLevel[1]) { - // Get the L2 entry for the block - auto l2_entry = - STFSGetLevelNHashEntry(map_ptr, block_index, 2, use_secondary_block); - use_secondary_block = false; - if (l2_entry.levelN_activeindex()) { - use_secondary_block = true; - } - } + // We used to always skip this if package is read-only, but it seems there's + // a lot of LIVE/PIRS packages with corrupt hash tables out there. + // Checking the hash table hashes is the only way to detect (and then + // possibly salvage) these. + auto num_blocks = + header_.metadata.stfs_volume_descriptor.allocated_block_count; - if (num_blocks >= kSTFSDataBlocksPerHashLevel[0]) { - // Get the L1 entry for this block - auto l1_entry = - STFSGetLevelNHashEntry(map_ptr, block_index, 1, use_secondary_block); - use_secondary_block = false; - if (l1_entry.levelN_activeindex()) { - use_secondary_block = true; - } + if (num_blocks >= kSTFSDataBlocksPerHashLevel[1]) { + // Get the L2 entry for the block + auto l2_entry = STFSGetLevelNHashEntry(map_ptr, block_index, 2, hash, + use_secondary_block); + use_secondary_block = false; + if (l2_entry.levelN_activeindex()) { + use_secondary_block = true; } } - return STFSGetLevelNHashEntry(map_ptr, block_index, 0, use_secondary_block); + if (num_blocks >= kSTFSDataBlocksPerHashLevel[0]) { + // Get the L1 entry for this block + auto l1_entry = STFSGetLevelNHashEntry(map_ptr, block_index, 1, hash, + use_secondary_block); + use_secondary_block = false; + if (l1_entry.levelN_activeindex()) { + use_secondary_block = true; + } + } + + return STFSGetLevelNHashEntry(map_ptr, block_index, 0, hash, + use_secondary_block); } uint32_t StfsContainerDevice::ReadMagic(const std::wstring& path) { diff --git a/src/xenia/vfs/devices/stfs_container_device.h b/src/xenia/vfs/devices/stfs_container_device.h index 0dcf1284e..c4c6ec58d 100644 --- a/src/xenia/vfs/devices/stfs_container_device.h +++ b/src/xenia/vfs/devices/stfs_container_device.h @@ -32,8 +32,11 @@ struct StfsVolumeDescriptor { uint8_t version; union { struct { - uint8_t read_only_format : 1; - uint8_t root_active_index : 1; + uint8_t read_only_format : 1; // if set, only uses a single backing-block + // per hash table (no resiliency), + // otherwise uses two + uint8_t root_active_index : 1; // if set, uses secondary backing-block + // for the highest-level hash table uint8_t directory_overallocated : 1; uint8_t directory_index_bounds_valid : 1; }; @@ -70,6 +73,11 @@ struct StfsHashEntry { // If this is a level0 entry, this points to the next block in the chain uint32_t level0_next_block() { return info3 | (info2 << 8) | (info1 << 16); } + void level0_next_block(uint32_t value) { + info3 = static_cast(value & 0xFF); + info2 = static_cast((value >> 8) & 0xFF); + info1 = static_cast((value >> 16) & 0xFF); + } // If this is level 1 or 2, this says whether the hash table this entry refers // to is using the secondary block or not @@ -79,6 +87,13 @@ struct StfsHashEntry { }; static_assert_size(StfsHashEntry, 0x18); +struct StfsHashTable { + StfsHashEntry entries[0xAA]; + xe::be num_blocks; // "committed" blocks + uint8_t padding[12]; +}; +static_assert_size(StfsHashTable, 0x1000); + /* SVOD */ struct SvodDeviceDescriptor { uint8_t descriptor_length; @@ -489,6 +504,7 @@ class StfsContainerDevice : public Device { StfsHashEntry STFSGetLevelNHashEntry(const uint8_t* map_ptr, uint32_t block_index, uint32_t level, + uint8_t* hash_in_out = nullptr, bool secondary_block = false); StfsHashEntry STFSGetLevel0HashEntry(const uint8_t* map_ptr, @@ -503,6 +519,11 @@ class StfsContainerDevice : public Device { uint32_t blocks_per_hash_table_ = 1; uint32_t block_step_[2] = {0xAB, 0x718F}; + // Any STFS hash tables that we read from will be cached here, since it's + // likely that they'll be needed again + std::map cached_tables_; + std::vector invalid_tables_; + size_t base_offset_; size_t magic_offset_; std::unique_ptr root_entry_;