[VFS] Check hash of STFS hash tables (& cache them) when reading, fallback if invalid.

This'll try salvaging any corrupt packages loaded in: normally we find the block numbers belonging to a file by reading them from the hash table.
Seems there's some packages out there (eg. Mass Effect 2 demo) that have corrupt hash tables though, so using the block numbers from there just results in a crash.
By verifying the hash of each hash table we can detect if this is the case, and if so we can try just using current_block_number + 1 instead of use any invalid block number.
(we also let the user know about the corrupt table in the log file)

In LIVE/PIRS packages this should hopefully let us get the correct data, since files are usually stored inside consecutive blocks in those package types.
It's doubtful that it'd help much with CON ones though, since those are pretty much a living filesystem.
The older & more used a CON package is, the more likely blocks will be fragmented throughout the file...
Reading from the hash table is the only way to properly read data from these, using current_block + 1 likely won't help much (we'd be going wxPirs-mode, in a way :P)

CON packages do have something that might help with this though: redundant hash blocks, where each hash table is actually made up of two blocks.
Maybe in future we can find a way to automatically use the secondary block whenever the primary one is invalid.
This commit is contained in:
emoose 2020-01-19 21:55:57 +00:00
parent db515ac4c1
commit 17324c3081
2 changed files with 99 additions and 29 deletions

View File

@ -13,6 +13,7 @@
#include <queue>
#include <vector>
#include "third_party/crypto/TinySHA1.hpp"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/vfs/devices/stfs_container_entry.h"
@ -566,6 +567,10 @@ StfsContainerDevice::Error StfsContainerDevice::ReadSTFS() {
table_block_index = block_hash.level0_next_block();
}
// At this point we've read in all the data we need from the hash tables
// Let's free some mem by clearing the cache we made.
cached_tables_.clear();
if (all_entries.size() > 0) {
return Error::kSuccess;
}
@ -640,7 +645,7 @@ size_t StfsContainerDevice::STFSDataBlockToBackingHashBlockOffset(
StfsHashEntry StfsContainerDevice::STFSGetLevelNHashEntry(
const uint8_t* map_ptr, uint32_t block_index, uint32_t level,
bool secondary_block) {
uint8_t* hash_in_out, bool secondary_block) {
uint32_t record = block_index;
for (uint32_t i = 0; i < level; i++) {
record = record / kSTFSDataBlocksPerHashLevel[0];
@ -654,10 +659,48 @@ StfsHashEntry StfsContainerDevice::STFSGetLevelNHashEntry(
hash_offset += bytes_per_sector(); // read from this tables secondary block
}
const uint8_t* hash_data = map_ptr + hash_offset;
bool invalid_table = std::find(invalid_tables_.begin(), invalid_tables_.end(),
hash_offset) != invalid_tables_.end();
auto* entry = (StfsHashEntry*)(hash_data + record * 0x18);
return *entry;
if (!cached_tables_.count(hash_offset)) {
// Cache the table in memory, since it's likely to be needed again
auto hash_data = (const StfsHashTable*)(map_ptr + hash_offset);
cached_tables_[hash_offset] = *hash_data;
// If hash is provided we'll try comparing it to the hash of this table
if (hash_in_out && !invalid_table) {
sha1::SHA1 sha;
sha.processBytes(hash_data, 0x1000);
uint8_t digest[0x14];
sha.finalize(digest);
if (memcmp(digest, hash_in_out, 0x14)) {
XELOGW(
"STFSGetLevelNHashEntry: level %d hash table at 0x%llX "
"is corrupt (hash mismatch)!",
level, hash_offset);
invalid_table = true;
invalid_tables_.push_back(hash_offset);
}
}
}
if (invalid_table) {
// If table is corrupt there's no use reading invalid data, lets try
// salvaging things by providing next block as block + 1, should work fine
// for LIVE/PIRS packages hopefully.
StfsHashEntry entry = {0};
entry.level0_next_block(block_index + 1);
return entry;
}
StfsHashTable& hash_table = cached_tables_[hash_offset];
auto& entry = hash_table.entries[record];
if (hash_in_out) {
memcpy(hash_in_out, entry.sha1, 0x14);
}
return entry;
}
StfsHashEntry StfsContainerDevice::STFSGetLevel0HashEntry(
@ -668,18 +711,24 @@ StfsHashEntry StfsContainerDevice::STFSGetLevel0HashEntry(
use_secondary_block = true;
}
// Copy our top hash table hash into the buffer...
uint8_t hash[0x14];
memcpy(hash, header_.metadata.stfs_volume_descriptor.root_hash, 0x14);
// Check upper hash table levels to find which table (primary/secondary) to
// use.
// We should be able to skip this if it's a read-only package, since the hash
// tables in those only use one block
if (!header_.metadata.stfs_volume_descriptor.flags.read_only_format) {
// We used to always skip this if package is read-only, but it seems there's
// a lot of LIVE/PIRS packages with corrupt hash tables out there.
// Checking the hash table hashes is the only way to detect (and then
// possibly salvage) these.
auto num_blocks =
header_.metadata.stfs_volume_descriptor.allocated_block_count;
if (num_blocks >= kSTFSDataBlocksPerHashLevel[1]) {
// Get the L2 entry for the block
auto l2_entry =
STFSGetLevelNHashEntry(map_ptr, block_index, 2, use_secondary_block);
auto l2_entry = STFSGetLevelNHashEntry(map_ptr, block_index, 2, hash,
use_secondary_block);
use_secondary_block = false;
if (l2_entry.levelN_activeindex()) {
use_secondary_block = true;
@ -688,16 +737,16 @@ StfsHashEntry StfsContainerDevice::STFSGetLevel0HashEntry(
if (num_blocks >= kSTFSDataBlocksPerHashLevel[0]) {
// Get the L1 entry for this block
auto l1_entry =
STFSGetLevelNHashEntry(map_ptr, block_index, 1, use_secondary_block);
auto l1_entry = STFSGetLevelNHashEntry(map_ptr, block_index, 1, hash,
use_secondary_block);
use_secondary_block = false;
if (l1_entry.levelN_activeindex()) {
use_secondary_block = true;
}
}
}
return STFSGetLevelNHashEntry(map_ptr, block_index, 0, use_secondary_block);
return STFSGetLevelNHashEntry(map_ptr, block_index, 0, hash,
use_secondary_block);
}
uint32_t StfsContainerDevice::ReadMagic(const std::wstring& path) {

View File

@ -32,8 +32,11 @@ struct StfsVolumeDescriptor {
uint8_t version;
union {
struct {
uint8_t read_only_format : 1;
uint8_t root_active_index : 1;
uint8_t read_only_format : 1; // if set, only uses a single backing-block
// per hash table (no resiliency),
// otherwise uses two
uint8_t root_active_index : 1; // if set, uses secondary backing-block
// for the highest-level hash table
uint8_t directory_overallocated : 1;
uint8_t directory_index_bounds_valid : 1;
};
@ -70,6 +73,11 @@ struct StfsHashEntry {
// If this is a level0 entry, this points to the next block in the chain
uint32_t level0_next_block() { return info3 | (info2 << 8) | (info1 << 16); }
void level0_next_block(uint32_t value) {
info3 = static_cast<uint8_t>(value & 0xFF);
info2 = static_cast<uint8_t>((value >> 8) & 0xFF);
info1 = static_cast<uint8_t>((value >> 16) & 0xFF);
}
// If this is level 1 or 2, this says whether the hash table this entry refers
// to is using the secondary block or not
@ -79,6 +87,13 @@ struct StfsHashEntry {
};
static_assert_size(StfsHashEntry, 0x18);
struct StfsHashTable {
StfsHashEntry entries[0xAA];
xe::be<uint32_t> num_blocks; // "committed" blocks
uint8_t padding[12];
};
static_assert_size(StfsHashTable, 0x1000);
/* SVOD */
struct SvodDeviceDescriptor {
uint8_t descriptor_length;
@ -489,6 +504,7 @@ class StfsContainerDevice : public Device {
StfsHashEntry STFSGetLevelNHashEntry(const uint8_t* map_ptr,
uint32_t block_index, uint32_t level,
uint8_t* hash_in_out = nullptr,
bool secondary_block = false);
StfsHashEntry STFSGetLevel0HashEntry(const uint8_t* map_ptr,
@ -503,6 +519,11 @@ class StfsContainerDevice : public Device {
uint32_t blocks_per_hash_table_ = 1;
uint32_t block_step_[2] = {0xAB, 0x718F};
// Any STFS hash tables that we read from will be cached here, since it's
// likely that they'll be needed again
std::map<size_t, StfsHashTable> cached_tables_;
std::vector<size_t> invalid_tables_;
size_t base_offset_;
size_t magic_offset_;
std::unique_ptr<Entry> root_entry_;