3rdparty: Add CCC v2.1

This is the symbol table parser that I'm replacing the existing ELF
symbol table parser with. It supports STABS symbols in .mdebug sections
as well as ELF symbols and SNDLL symbols.

It includes its own symbol database, and an AST which facilitates
debugging tools that let the user inspect complex data structures with
full type information.

More information is provided in the included readme.
This commit is contained in:
chaoticgd 2024-08-26 18:08:33 +01:00 committed by Ty
parent b43e05a8fc
commit 87b03fdc28
39 changed files with 9306 additions and 0 deletions

41
3rdparty/ccc/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,41 @@
cmake_minimum_required(VERSION 3.14)
project(ccc)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
add_library(ccc STATIC
src/ccc/ast.cpp
src/ccc/ast.h
src/ccc/elf.cpp
src/ccc/elf.h
src/ccc/elf_symtab.cpp
src/ccc/elf_symtab.h
src/ccc/importer_flags.cpp
src/ccc/importer_flags.h
src/ccc/mdebug_analysis.cpp
src/ccc/mdebug_analysis.h
src/ccc/mdebug_importer.cpp
src/ccc/mdebug_importer.h
src/ccc/mdebug_section.cpp
src/ccc/mdebug_section.h
src/ccc/mdebug_symbols.cpp
src/ccc/mdebug_symbols.h
src/ccc/sndll.cpp
src/ccc/sndll.h
src/ccc/stabs.cpp
src/ccc/stabs.h
src/ccc/stabs_to_ast.cpp
src/ccc/stabs_to_ast.h
src/ccc/symbol_database.cpp
src/ccc/symbol_database.h
src/ccc/symbol_file.cpp
src/ccc/symbol_file.h
src/ccc/symbol_table.cpp
src/ccc/symbol_table.h
src/ccc/util.cpp
src/ccc/util.h
)
target_include_directories(ccc PUBLIC src)

37
3rdparty/ccc/README.md vendored Normal file
View File

@ -0,0 +1,37 @@
# Chaos Compiler Collection
This code was originally developed in the following repository and was copied
into PCSX2 by the author:
- [https://github.com/chaoticgd/ccc](https://github.com/chaoticgd/ccc)
It includes additional resources that are not present in the PCSX2 repository.
## Documentation
### DWARF (.debug) Section
- [DWARF Debugging Information Format](https://dwarfstd.org/doc/dwarf_1_1_0.pdf)
### MIPS Debug (.mdebug) Section
- [Third Eye Software and the MIPS symbol table (Peter Rowell)](http://datahedron.com/mips.html)
- [MIPS Mdebug Debugging Information (David Anderson, 1996)](https://www.prevanders.net/Mdebug.ps)
- MIPS Assembly Language Programmer's Guide, Symbol Table Chapter (Silicon Graphics, 1992)
- Tru64 UNIX Object File and Symbol Table Format Specification, Symbol Table Chapter
- `mdebugread.c` from gdb (reading)
- `ecoff.c` from gas (writing)
- `include/coff/sym.h` from binutils (headers)
### MIPS EABI
- [MIPS EABI](https://sourceware.org/legacy-ml/binutils/2003-06/msg00436.html)
### STABS
- [The "stabs" representation of debugging information (Julia Menapace, Jim Kingdon, and David MacKenzie, 1992-???)](https://sourceware.org/gdb/onlinedocs/stabs.html)
- `stabs.c` from binutils (reading)
- `stabsread.c` from gdb (reading)
- `dbxread.c` from gdb (reading)
- `dbxout.c` from gcc (writing)
- `stab.def` from gcc (symbol codes)

75
3rdparty/ccc/ccc.vcxproj vendored Normal file
View File

@ -0,0 +1,75 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(SolutionDir)common\vsprops\BaseProjectConfig.props" />
<Import Project="$(SolutionDir)common\vsprops\WinSDK.props" />
<PropertyGroup Label="Globals">
<ProjectGuid>{2589F8CE-EA77-4B73-911E-64074569795B}</ProjectGuid>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<PlatformToolset Condition="!$(Configuration.Contains(Clang))">$(DefaultPlatformToolset)</PlatformToolset>
<PlatformToolset Condition="$(Configuration.Contains(Clang))">ClangCL</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
<WholeProgramOptimization Condition="$(Configuration.Contains(Release))">true</WholeProgramOptimization>
<UseDebugLibraries Condition="$(Configuration.Contains(Debug))">true</UseDebugLibraries>
<UseDebugLibraries Condition="!$(Configuration.Contains(Debug))">false</UseDebugLibraries>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings" />
<ImportGroup Label="PropertySheets">
<Import Project="..\DefaultProjectRootDir.props" />
<Import Project="..\3rdparty.props" />
<Import Condition="$(Configuration.Contains(Debug))" Project="..\..\common\vsprops\CodeGen_Debug.props" />
<Import Condition="$(Configuration.Contains(Devel))" Project="..\..\common\vsprops\CodeGen_Devel.props" />
<Import Condition="$(Configuration.Contains(Release))" Project="..\..\common\vsprops\CodeGen_Release.props" />
<Import Condition="!$(Configuration.Contains(Release))" Project="..\..\common\vsprops\IncrementalLinking.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
</PropertyGroup>
<ItemGroup>
<ClInclude Include="src\ccc\ast.h" />
<ClInclude Include="src\ccc\elf.h" />
<ClInclude Include="src\ccc\elf_symtab.h" />
<ClInclude Include="src\ccc\importer_flags.h" />
<ClInclude Include="src\ccc\mdebug_analysis.h" />
<ClInclude Include="src\ccc\mdebug_importer.h" />
<ClInclude Include="src\ccc\mdebug_section.h" />
<ClInclude Include="src\ccc\mdebug_symbols.h" />
<ClInclude Include="src\ccc\sndll.h" />
<ClInclude Include="src\ccc\stabs.h" />
<ClInclude Include="src\ccc\stabs_to_ast.h" />
<ClInclude Include="src\ccc\symbol_database.h" />
<ClInclude Include="src\ccc\symbol_file.h" />
<ClInclude Include="src\ccc\symbol_table.h" />
<ClInclude Include="src\ccc\util.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\ccc\ast.cpp" />
<ClCompile Include="src\ccc\elf.cpp" />
<ClCompile Include="src\ccc\elf_symtab.cpp" />
<ClCompile Include="src\ccc\importer_flags.cpp" />
<ClCompile Include="src\ccc\mdebug_analysis.cpp" />
<ClCompile Include="src\ccc\mdebug_importer.cpp" />
<ClCompile Include="src\ccc\mdebug_section.cpp" />
<ClCompile Include="src\ccc\mdebug_symbols.cpp" />
<ClCompile Include="src\ccc\sndll.cpp" />
<ClCompile Include="src\ccc\stabs.cpp" />
<ClCompile Include="src\ccc\stabs_to_ast.cpp" />
<ClCompile Include="src\ccc\symbol_database.cpp" />
<ClCompile Include="src\ccc\symbol_file.cpp" />
<ClCompile Include="src\ccc\symbol_table.cpp" />
<ClCompile Include="src\ccc\util.cpp" />
</ItemGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>TurnOffAllWarnings</WarningLevel>
<AdditionalIncludeDirectories>$(ProjectDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<LanguageStandard>stdcpp20</LanguageStandard>
</ClCompile>
</ItemDefinitionGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>

111
3rdparty/ccc/ccc.vcxproj.filters vendored Normal file
View File

@ -0,0 +1,111 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="src\ccc\ast.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src\ccc\elf.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src\ccc\elf_symtab.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src\ccc\importer_flags.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src\ccc\mdebug_analysis.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src\ccc\mdebug_importer.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src\ccc\mdebug_section.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src\ccc\mdebug_symbols.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src\ccc\sndll.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src\ccc\stabs.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src\ccc\stabs_to_ast.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src\ccc\symbol_database.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src\ccc\symbol_file.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src\ccc\symbol_table.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="src\ccc\util.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\ccc\ast.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\ccc\elf.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\ccc\elf_symtab.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\ccc\importer_flags.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\ccc\mdebug_analysis.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\ccc\mdebug_importer.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\ccc\mdebug_section.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\ccc\mdebug_symbols.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\ccc\sndll.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\ccc\stabs.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\ccc\stabs_to_ast.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\ccc\symbol_database.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\ccc\symbol_file.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\ccc\symbol_table.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\ccc\util.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

562
3rdparty/ccc/src/ccc/ast.cpp vendored Normal file
View File

@ -0,0 +1,562 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#include "ast.h"
#include "importer_flags.h"
#include "symbol_database.h"
namespace ccc::ast {
static bool compare_nodes_and_merge(
CompareResult& dest, const Node& node_lhs, const Node& node_rhs, const SymbolDatabase* database);
static bool try_to_match_wobbly_typedefs(
const Node& node_lhs, const Node& node_rhs, const SymbolDatabase& database);
void Node::set_access_specifier(AccessSpecifier specifier, u32 importer_flags)
{
if((importer_flags & NO_ACCESS_SPECIFIERS) == 0) {
access_specifier = specifier;
}
}
std::pair<Node*, DataType*> Node::physical_type(SymbolDatabase& database, s32 max_depth)
{
Node* type = this;
DataType* symbol = nullptr;
for(s32 i = 0; i < max_depth && type->descriptor == TYPE_NAME; i++) {
DataType* data_type = database.data_types.symbol_from_handle(type->as<TypeName>().data_type_handle);
if (!data_type || !data_type->type()) {
break;
}
type = data_type->type();
symbol = data_type;
}
return std::pair(type, symbol);
}
std::pair<const Node*, const DataType*> Node::physical_type(const SymbolDatabase& database, s32 max_depth) const
{
return const_cast<Node*>(this)->physical_type(const_cast<SymbolDatabase&>(database), max_depth);
}
const char* member_function_modifier_to_string(MemberFunctionModifier modifier)
{
switch(modifier) {
case MemberFunctionModifier::NONE: return "none";
case MemberFunctionModifier::STATIC: return "static";
case MemberFunctionModifier::VIRTUAL: return "virtual";
}
return "";
}
bool StructOrUnion::flatten_fields(
std::vector<FlatField>& output,
const DataType* symbol,
const SymbolDatabase& database,
bool skip_statics,
s32 base_offset,
s32 max_fields,
s32 max_depth) const
{
if(max_depth == 0) {
return false;
}
for(const std::unique_ptr<Node>& type_name : base_classes) {
if(type_name->descriptor != TYPE_NAME) {
continue;
}
s32 new_base_offset = base_offset + type_name->offset_bytes;
DataTypeHandle handle = type_name->as<TypeName>().data_type_handle;
const DataType* base_class_symbol = database.data_types.symbol_from_handle(handle);
if(!base_class_symbol || !base_class_symbol->type() || base_class_symbol->type()->descriptor != STRUCT_OR_UNION) {
continue;
}
const StructOrUnion& base_class = base_class_symbol->type()->as<StructOrUnion>();
if(!base_class.flatten_fields(output, base_class_symbol, database, skip_statics, new_base_offset, max_fields, max_depth - 1)) {
return false;
}
}
for(const std::unique_ptr<Node>& field : fields) {
if(skip_statics && field->storage_class == STORAGE_CLASS_STATIC) {
continue;
}
if((s32) output.size() >= max_fields) {
return false;
}
FlatField& flat = output.emplace_back();
flat.node = field.get();
flat.symbol = symbol;
flat.base_offset = base_offset;
}
return true;
}
const char* type_name_source_to_string(TypeNameSource source)
{
switch(source) {
case TypeNameSource::REFERENCE: return "reference";
case TypeNameSource::CROSS_REFERENCE: return "cross_reference";
case TypeNameSource::UNNAMED_THIS: return "this";
}
return "";
}
const char* forward_declared_type_to_string(ForwardDeclaredType type)
{
switch(type) {
case ForwardDeclaredType::STRUCT: return "struct";
case ForwardDeclaredType::UNION: return "union";
case ForwardDeclaredType::ENUM: return "enum";
}
return "";
}
DataTypeHandle TypeName::data_type_handle_unless_forward_declared() const
{
if(!is_forward_declared) {
return data_type_handle;
} else {
return DataTypeHandle();
}
}
CompareResult compare_nodes(
const Node& node_lhs, const Node& node_rhs, const SymbolDatabase* database, bool check_intrusive_fields)
{
CompareResult result = CompareResultType::MATCHES_NO_SWAP;
if(node_lhs.descriptor != node_rhs.descriptor) {
return CompareFailReason::DESCRIPTOR;
}
if(check_intrusive_fields) {
if(node_lhs.storage_class != node_rhs.storage_class) {
// In some cases we can determine that a type was typedef'd for C
// translation units, but not for C++ translation units, so we need
// to add a special case for that here.
if(node_lhs.storage_class == STORAGE_CLASS_TYPEDEF && node_rhs.storage_class == STORAGE_CLASS_NONE) {
result = CompareResultType::MATCHES_FAVOUR_LHS;
} else if(node_lhs.storage_class == STORAGE_CLASS_NONE && node_rhs.storage_class == STORAGE_CLASS_TYPEDEF) {
result = CompareResultType::MATCHES_FAVOUR_RHS;
} else {
return CompareFailReason::STORAGE_CLASS;
}
}
// Vtable pointers and constructors can sometimes contain type numbers
// that are different between translation units, so we don't want to
// compare them.
bool is_vtable_pointer = node_lhs.is_vtable_pointer && node_rhs.is_vtable_pointer;
bool is_numbered_constructor = node_lhs.name.starts_with("$_") && node_rhs.name.starts_with("$_");
if(node_lhs.name != node_rhs.name && !is_vtable_pointer && !is_numbered_constructor) {
return CompareFailReason::NAME;
}
if(node_lhs.offset_bytes != node_rhs.offset_bytes) {
return CompareFailReason::RELATIVE_OFFSET_BYTES;
}
if(node_lhs.size_bits != node_rhs.size_bits) {
return CompareFailReason::SIZE_BITS;
}
if(node_lhs.is_const != node_rhs.is_const) {
return CompareFailReason::CONSTNESS;
}
}
switch(node_lhs.descriptor) {
case ARRAY: {
const auto [lhs, rhs] = Node::as<Array>(node_lhs, node_rhs);
if(compare_nodes_and_merge(result, *lhs.element_type.get(), *rhs.element_type.get(), database)) {
return result;
}
if(lhs.element_count != rhs.element_count) {
return CompareFailReason::ARRAY_ELEMENT_COUNT;
}
break;
}
case BITFIELD: {
const auto [lhs, rhs] = Node::as<BitField>(node_lhs, node_rhs);
if(lhs.bitfield_offset_bits != rhs.bitfield_offset_bits) {
return CompareFailReason::BITFIELD_OFFSET_BITS;
}
if(compare_nodes_and_merge(result, *lhs.underlying_type.get(), *rhs.underlying_type.get(), database)) {
return result;
}
break;
}
case BUILTIN: {
const auto [lhs, rhs] = Node::as<BuiltIn>(node_lhs, node_rhs);
if(lhs.bclass != rhs.bclass) {
return CompareFailReason::BUILTIN_CLASS;
}
break;
}
case ENUM: {
const auto [lhs, rhs] = Node::as<Enum>(node_lhs, node_rhs);
if(lhs.constants != rhs.constants) {
return CompareFailReason::ENUM_CONSTANTS;
}
break;
}
case ERROR_NODE: {
break;
}
case FUNCTION: {
const auto [lhs, rhs] = Node::as<Function>(node_lhs, node_rhs);
if(lhs.return_type.has_value() != rhs.return_type.has_value()) {
return CompareFailReason::FUNCTION_RETURN_TYPE_HAS_VALUE;
}
if(lhs.return_type.has_value()) {
if(compare_nodes_and_merge(result, *lhs.return_type->get(), *rhs.return_type->get(), database)) {
return result;
}
}
if(lhs.parameters.has_value() && rhs.parameters.has_value()) {
if(lhs.parameters->size() != rhs.parameters->size()) {
return CompareFailReason::FUNCTION_PARAMAETER_COUNT;
}
for(size_t i = 0; i < lhs.parameters->size(); i++) {
if(compare_nodes_and_merge(result, *(*lhs.parameters)[i].get(), *(*rhs.parameters)[i].get(), database)) {
return result;
}
}
} else if(lhs.parameters.has_value() != rhs.parameters.has_value()) {
return CompareFailReason::FUNCTION_PARAMETERS_HAS_VALUE;
}
if(lhs.modifier != rhs.modifier) {
return CompareFailReason::FUNCTION_MODIFIER;
}
break;
}
case POINTER_OR_REFERENCE: {
const auto [lhs, rhs] = Node::as<PointerOrReference>(node_lhs, node_rhs);
if(lhs.is_pointer != rhs.is_pointer) {
return CompareFailReason::DESCRIPTOR;
}
if(compare_nodes_and_merge(result, *lhs.value_type.get(), *rhs.value_type.get(), database)) {
return result;
}
break;
}
case POINTER_TO_DATA_MEMBER: {
const auto [lhs, rhs] = Node::as<PointerToDataMember>(node_lhs, node_rhs);
if(compare_nodes_and_merge(result, *lhs.class_type.get(), *rhs.class_type.get(), database)) {
return result;
}
if(compare_nodes_and_merge(result, *lhs.member_type.get(), *rhs.member_type.get(), database)) {
return result;
}
break;
}
case STRUCT_OR_UNION: {
const auto [lhs, rhs] = Node::as<StructOrUnion>(node_lhs, node_rhs);
if(lhs.is_struct != rhs.is_struct) {
return CompareFailReason::DESCRIPTOR;
}
if(lhs.base_classes.size() != rhs.base_classes.size()) {
return CompareFailReason::BASE_CLASS_COUNT;
}
for(size_t i = 0; i < lhs.base_classes.size(); i++) {
if(compare_nodes_and_merge(result, *lhs.base_classes[i].get(), *rhs.base_classes[i].get(), database)) {
return result;
}
}
if(lhs.fields.size() != rhs.fields.size()) {
return CompareFailReason::FIELDS_SIZE;
}
for(size_t i = 0; i < lhs.fields.size(); i++) {
if(compare_nodes_and_merge(result, *lhs.fields[i].get(), *rhs.fields[i].get(), database)) {
return result;
}
}
if(lhs.member_functions.size() != rhs.member_functions.size()) {
return CompareFailReason::MEMBER_FUNCTION_COUNT;
}
for(size_t i = 0; i < lhs.member_functions.size(); i++) {
if(compare_nodes_and_merge(result, *lhs.member_functions[i].get(), *rhs.member_functions[i].get(), database)) {
return result;
}
}
break;
}
case TYPE_NAME: {
const auto [lhs, rhs] = Node::as<TypeName>(node_lhs, node_rhs);
// Don't check the source so that REFERENCE and CROSS_REFERENCE are
// treated as the same.
if(lhs.data_type_handle != rhs.data_type_handle) {
return CompareFailReason::TYPE_NAME;
}
const TypeName::UnresolvedStabs* lhs_unresolved_stabs = lhs.unresolved_stabs.get();
const TypeName::UnresolvedStabs* rhs_unresolved_stabs = rhs.unresolved_stabs.get();
if(lhs_unresolved_stabs && rhs_unresolved_stabs) {
if(lhs_unresolved_stabs->type_name != rhs_unresolved_stabs->type_name) {
return CompareFailReason::TYPE_NAME;
}
} else if(lhs_unresolved_stabs || rhs_unresolved_stabs) {
return CompareFailReason::TYPE_NAME;
}
break;
}
}
return result;
}
static bool compare_nodes_and_merge(
CompareResult& dest, const Node& node_lhs, const Node& node_rhs, const SymbolDatabase* database)
{
CompareResult result = compare_nodes(node_lhs, node_rhs, database, true);
if(database) {
if(result.type == CompareResultType::DIFFERS && try_to_match_wobbly_typedefs(node_lhs, node_rhs, *database)) {
result.type = CompareResultType::MATCHES_FAVOUR_LHS;
} else if(result.type == CompareResultType::DIFFERS && try_to_match_wobbly_typedefs(node_rhs, node_lhs, *database)) {
result.type = CompareResultType::MATCHES_FAVOUR_RHS;
}
}
if(dest.type != result.type) {
if(dest.type == CompareResultType::DIFFERS || result.type == CompareResultType::DIFFERS) {
// If any of the inner types differ, the outer type does too.
dest.type = CompareResultType::DIFFERS;
} else if(dest.type == CompareResultType::MATCHES_CONFUSED || result.type == CompareResultType::MATCHES_CONFUSED) {
// Propagate confusion.
dest.type = CompareResultType::MATCHES_CONFUSED;
} else if(dest.type == CompareResultType::MATCHES_FAVOUR_LHS && result.type == CompareResultType::MATCHES_FAVOUR_RHS) {
// One of the results favours the LHS node and the other favours the
// RHS node so we are confused.
dest.type = CompareResultType::MATCHES_CONFUSED;
} else if(dest.type == CompareResultType::MATCHES_FAVOUR_RHS && result.type == CompareResultType::MATCHES_FAVOUR_LHS) {
// One of the results favours the LHS node and the other favours the
// RHS node so we are confused.
dest.type = CompareResultType::MATCHES_CONFUSED;
} else if(dest.type == CompareResultType::MATCHES_FAVOUR_LHS || result.type == CompareResultType::MATCHES_FAVOUR_LHS) {
// One of the results favours the LHS node and the other is neutral
// so go with the LHS node.
dest.type = CompareResultType::MATCHES_FAVOUR_LHS;
} else if(dest.type == CompareResultType::MATCHES_FAVOUR_RHS || result.type == CompareResultType::MATCHES_FAVOUR_RHS) {
// One of the results favours the RHS node and the other is neutral
// so go with the RHS node.
dest.type = CompareResultType::MATCHES_FAVOUR_RHS;
}
}
if(dest.fail_reason == CompareFailReason::NONE) {
dest.fail_reason = result.fail_reason;
}
return dest.type == CompareResultType::DIFFERS;
}
static bool try_to_match_wobbly_typedefs(
const Node& type_name_node, const Node& raw_node, const SymbolDatabase& database)
{
// Detect if one side has a typedef when the other just has the plain type.
// This was previously a common reason why type deduplication would fail.
if(type_name_node.descriptor != TYPE_NAME) {
return false;
}
const TypeName& type_name = type_name_node.as<TypeName>();
if(const TypeName::UnresolvedStabs* unresolved_stabs = type_name.unresolved_stabs.get()) {
if(unresolved_stabs->referenced_file_handle == (u32) -1 || !unresolved_stabs->stabs_type_number.valid()) {
return false;
}
const SourceFile* source_file =
database.source_files.symbol_from_handle(unresolved_stabs->referenced_file_handle);
CCC_ASSERT(source_file);
auto handle = source_file->stabs_type_number_to_handle.find(unresolved_stabs->stabs_type_number);
if(handle != source_file->stabs_type_number_to_handle.end()) {
const DataType* referenced_type = database.data_types.symbol_from_handle(handle->second);
CCC_ASSERT(referenced_type && referenced_type->type());
// Don't compare 'intrusive' fields e.g. the offset.
CompareResult new_result = compare_nodes(*referenced_type->type(), raw_node, &database, false);
if(new_result.type != CompareResultType::DIFFERS) {
return true;
}
}
}
return false;
}
const char* compare_fail_reason_to_string(CompareFailReason reason)
{
switch(reason) {
case CompareFailReason::NONE: return "error";
case CompareFailReason::DESCRIPTOR: return "descriptor";
case CompareFailReason::STORAGE_CLASS: return "storage class";
case CompareFailReason::NAME: return "name";
case CompareFailReason::RELATIVE_OFFSET_BYTES: return "relative offset";
case CompareFailReason::ABSOLUTE_OFFSET_BYTES: return "absolute offset";
case CompareFailReason::BITFIELD_OFFSET_BITS: return "bitfield offset";
case CompareFailReason::SIZE_BITS: return "size";
case CompareFailReason::CONSTNESS: return "constness";
case CompareFailReason::ARRAY_ELEMENT_COUNT: return "array element count";
case CompareFailReason::BUILTIN_CLASS: return "builtin class";
case CompareFailReason::FUNCTION_RETURN_TYPE_HAS_VALUE: return "function return type has value";
case CompareFailReason::FUNCTION_PARAMAETER_COUNT: return "function paramaeter count";
case CompareFailReason::FUNCTION_PARAMETERS_HAS_VALUE: return "function parameter";
case CompareFailReason::FUNCTION_MODIFIER: return "function modifier";
case CompareFailReason::ENUM_CONSTANTS: return "enum constant";
case CompareFailReason::BASE_CLASS_COUNT: return "base class count";
case CompareFailReason::FIELDS_SIZE: return "fields size";
case CompareFailReason::MEMBER_FUNCTION_COUNT: return "member function count";
case CompareFailReason::VTABLE_GLOBAL: return "vtable global";
case CompareFailReason::TYPE_NAME: return "type name";
case CompareFailReason::VARIABLE_CLASS: return "variable class";
case CompareFailReason::VARIABLE_TYPE: return "variable type";
case CompareFailReason::VARIABLE_STORAGE: return "variable storage";
case CompareFailReason::VARIABLE_BLOCK: return "variable block";
}
return "";
}
const char* node_type_to_string(const Node& node)
{
switch(node.descriptor) {
case ARRAY: return "array";
case BITFIELD: return "bitfield";
case BUILTIN: return "builtin";
case ENUM: return "enum";
case ERROR_NODE: return "error";
case FUNCTION: return "function";
case POINTER_OR_REFERENCE: {
const PointerOrReference& pointer_or_reference = node.as<PointerOrReference>();
if(pointer_or_reference.is_pointer) {
return "pointer";
} else {
return "reference";
}
}
case POINTER_TO_DATA_MEMBER: return "pointer_to_data_member";
case STRUCT_OR_UNION: {
const StructOrUnion& struct_or_union = node.as<StructOrUnion>();
if(struct_or_union.is_struct) {
return "struct";
} else {
return "union";
}
}
case TYPE_NAME: return "type_name";
}
return "";
}
const char* storage_class_to_string(StorageClass storage_class)
{
switch(storage_class) {
case STORAGE_CLASS_NONE: return "none";
case STORAGE_CLASS_TYPEDEF: return "typedef";
case STORAGE_CLASS_EXTERN: return "extern";
case STORAGE_CLASS_STATIC: return "static";
case STORAGE_CLASS_AUTO: return "auto";
case STORAGE_CLASS_REGISTER: return "register";
}
return "";
}
const char* access_specifier_to_string(AccessSpecifier specifier)
{
switch(specifier) {
case AS_PUBLIC: return "public";
case AS_PROTECTED: return "protected";
case AS_PRIVATE: return "private";
}
return "";
}
const char* builtin_class_to_string(BuiltInClass bclass)
{
switch(bclass) {
case BuiltInClass::VOID_TYPE: return "void";
case BuiltInClass::UNSIGNED_8: return "8-bit unsigned integer";
case BuiltInClass::SIGNED_8: return "8-bit signed integer";
case BuiltInClass::UNQUALIFIED_8: return "8-bit integer";
case BuiltInClass::BOOL_8: return "8-bit boolean";
case BuiltInClass::UNSIGNED_16: return "16-bit unsigned integer";
case BuiltInClass::SIGNED_16: return "16-bit signed integer";
case BuiltInClass::UNSIGNED_32: return "32-bit unsigned integer";
case BuiltInClass::SIGNED_32: return "32-bit signed integer";
case BuiltInClass::FLOAT_32: return "32-bit floating point";
case BuiltInClass::UNSIGNED_64: return "64-bit unsigned integer";
case BuiltInClass::SIGNED_64: return "64-bit signed integer";
case BuiltInClass::FLOAT_64: return "64-bit floating point";
case BuiltInClass::UNSIGNED_128: return "128-bit unsigned integer";
case BuiltInClass::SIGNED_128: return "128-bit signed integer";
case BuiltInClass::UNQUALIFIED_128: return "128-bit integer";
case BuiltInClass::FLOAT_128: return "128-bit floating point";
}
return "";
}
s32 builtin_class_size(BuiltInClass bclass)
{
switch(bclass) {
case BuiltInClass::VOID_TYPE: return 0;
case BuiltInClass::UNSIGNED_8: return 1;
case BuiltInClass::SIGNED_8: return 1;
case BuiltInClass::UNQUALIFIED_8: return 1;
case BuiltInClass::BOOL_8: return 1;
case BuiltInClass::UNSIGNED_16: return 2;
case BuiltInClass::SIGNED_16: return 2;
case BuiltInClass::UNSIGNED_32: return 4;
case BuiltInClass::SIGNED_32: return 4;
case BuiltInClass::FLOAT_32: return 4;
case BuiltInClass::UNSIGNED_64: return 8;
case BuiltInClass::SIGNED_64: return 8;
case BuiltInClass::FLOAT_64: return 8;
case BuiltInClass::UNSIGNED_128: return 16;
case BuiltInClass::SIGNED_128: return 16;
case BuiltInClass::UNQUALIFIED_128: return 16;
case BuiltInClass::FLOAT_128: return 16;
}
return 0;
}
}

377
3rdparty/ccc/src/ccc/ast.h vendored Normal file
View File

@ -0,0 +1,377 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#pragma once
#include "symbol_database.h"
namespace ccc::ast {
enum NodeDescriptor : u8 {
ARRAY,
BITFIELD,
BUILTIN,
ENUM,
ERROR_NODE,
FUNCTION,
POINTER_OR_REFERENCE,
POINTER_TO_DATA_MEMBER,
STRUCT_OR_UNION,
TYPE_NAME
};
enum AccessSpecifier {
AS_PUBLIC = 0,
AS_PROTECTED = 1,
AS_PRIVATE = 2
};
// To add a new type of node:
// 1. Add it to the NodeDescriptor enum.
// 2. Create a struct for it.
// 3. Add support for it in for_each_node.
// 4. Add support for it in compute_size_bytes_recursive.
// 5. Add support for it in compare_nodes.
// 6. Add support for it in node_type_to_string.
// 7. Add support for it in CppPrinter::ast_node.
// 8. Add support for it in write_json.
// 9. Add support for it in refine_node.
struct Node {
const NodeDescriptor descriptor;
u8 is_const : 1 = false;
u8 is_volatile : 1 = false;
u8 is_virtual_base_class : 1 = false;
u8 is_vtable_pointer : 1 = false;
u8 is_constructor_or_destructor : 1 = false;
u8 is_special_member_function : 1 = false;
u8 is_operator_member_function : 1 = false;
u8 cannot_compute_size : 1 = false;
u8 storage_class : 4 = STORAGE_CLASS_NONE;
u8 access_specifier : 2 = AS_PUBLIC;
s32 size_bytes = -1;
// If the name isn't populated for a given node, the name from the last
// ancestor to have one should be used i.e. when processing the tree you
// should pass the name down.
std::string name;
s32 offset_bytes = -1; // Offset relative to start of last inline struct/union.
s32 size_bits = -1; // Size stored in the .mdebug symbol table, may not be set.
Node(NodeDescriptor d) : descriptor(d) {}
Node(const Node& rhs) = default;
virtual ~Node() {}
template <typename SubType>
SubType& as() {
CCC_ASSERT(descriptor == SubType::DESCRIPTOR);
return *static_cast<SubType*>(this);
}
template <typename SubType>
const SubType& as() const {
CCC_ASSERT(descriptor == SubType::DESCRIPTOR);
return *static_cast<const SubType*>(this);
}
template <typename SubType>
static std::pair<const SubType&, const SubType&> as(const Node& lhs, const Node& rhs) {
CCC_ASSERT(lhs.descriptor == SubType::DESCRIPTOR && rhs.descriptor == SubType::DESCRIPTOR);
return std::pair<const SubType&, const SubType&>(static_cast<const SubType&>(lhs), static_cast<const SubType&>(rhs));
}
void set_access_specifier(AccessSpecifier specifier, u32 importer_flags);
// If this node is a type name, repeatedly resolve it to the type it's
// referencing, otherwise return (this, nullptr).
std::pair<Node*, DataType*> physical_type(SymbolDatabase& database, s32 max_depth = 100);
std::pair<const Node*, const DataType*> physical_type(const SymbolDatabase& database, s32 max_depth = 100) const;
};
struct Array : Node {
std::unique_ptr<Node> element_type;
s32 element_count = -1;
Array() : Node(DESCRIPTOR) {}
static const constexpr NodeDescriptor DESCRIPTOR = ARRAY;
};
struct BitField : Node {
s32 bitfield_offset_bits = -1; // Offset relative to the last byte (not the position of the underlying type!).
std::unique_ptr<Node> underlying_type;
BitField() : Node(DESCRIPTOR) {}
static const constexpr NodeDescriptor DESCRIPTOR = BITFIELD;
};
enum class BuiltInClass {
VOID_TYPE,
UNSIGNED_8, SIGNED_8, UNQUALIFIED_8, BOOL_8,
UNSIGNED_16, SIGNED_16,
UNSIGNED_32, SIGNED_32, FLOAT_32,
UNSIGNED_64, SIGNED_64, FLOAT_64,
UNSIGNED_128, SIGNED_128, UNQUALIFIED_128, FLOAT_128
};
struct BuiltIn : Node {
BuiltInClass bclass = BuiltInClass::VOID_TYPE;
BuiltIn() : Node(DESCRIPTOR) {}
static const constexpr NodeDescriptor DESCRIPTOR = BUILTIN;
};
struct Enum : Node {
std::vector<std::pair<s32, std::string>> constants;
Enum() : Node(DESCRIPTOR) {}
static const constexpr NodeDescriptor DESCRIPTOR = ENUM;
};
struct Error : Node {
std::string message;
Error() : Node(ERROR_NODE) {}
static const constexpr NodeDescriptor DESCRIPTOR = ERROR_NODE;
};
enum class MemberFunctionModifier {
NONE,
STATIC,
VIRTUAL
};
const char* member_function_modifier_to_string(MemberFunctionModifier modifier);
struct Function : Node {
std::optional<std::unique_ptr<Node>> return_type;
std::optional<std::vector<std::unique_ptr<Node>>> parameters;
MemberFunctionModifier modifier = MemberFunctionModifier::NONE;
s32 vtable_index = -1;
FunctionHandle definition_handle; // Filled in by fill_in_pointers_to_member_function_definitions.
Function() : Node(DESCRIPTOR) {}
static const constexpr NodeDescriptor DESCRIPTOR = FUNCTION;
};
struct PointerOrReference : Node {
bool is_pointer = true;
std::unique_ptr<Node> value_type;
PointerOrReference() : Node(DESCRIPTOR) {}
static const constexpr NodeDescriptor DESCRIPTOR = POINTER_OR_REFERENCE;
};
struct PointerToDataMember : Node {
std::unique_ptr<Node> class_type;
std::unique_ptr<Node> member_type;
PointerToDataMember() : Node(DESCRIPTOR) {}
static const constexpr NodeDescriptor DESCRIPTOR = POINTER_TO_DATA_MEMBER;
};
struct StructOrUnion : Node {
bool is_struct = true;
std::vector<std::unique_ptr<Node>> base_classes;
std::vector<std::unique_ptr<Node>> fields;
std::vector<std::unique_ptr<Node>> member_functions;
StructOrUnion() : Node(DESCRIPTOR) {}
static const constexpr NodeDescriptor DESCRIPTOR = STRUCT_OR_UNION;
struct FlatField {
// The field itself.
const Node* node;
// The symbol that owns the node.
const DataType* symbol;
// Offset of the innermost enclosing base class in the object.
s32 base_offset = 0;
};
// Generate a flat list of all the fields in this class as well as all the
// base classes recursively, but only until the max_fields or max_depth
// limits are reached. Return true if all the fields were enumerated.
bool flatten_fields(
std::vector<FlatField>& output,
const DataType* symbol,
const SymbolDatabase& database,
bool skip_statics,
s32 base_offset = 0,
s32 max_fields = 100000,
s32 max_depth = 100) const;
};
enum class TypeNameSource : u8 {
REFERENCE, // A STABS type reference.
CROSS_REFERENCE, // A STABS cross reference.
UNNAMED_THIS // A this parameter (or return type) referencing an unnamed type.
};
const char* type_name_source_to_string(TypeNameSource source);
enum class ForwardDeclaredType {
STRUCT,
UNION,
ENUM // Should be illegal but STABS supports cross references to enums so it's here.
};
const char* forward_declared_type_to_string(ForwardDeclaredType type);
struct TypeName : Node {
DataTypeHandle data_type_handle;
TypeNameSource source = TypeNameSource::REFERENCE;
bool is_forward_declared = false;
DataTypeHandle data_type_handle_unless_forward_declared() const;
struct UnresolvedStabs {
std::string type_name;
SourceFileHandle referenced_file_handle;
StabsTypeNumber stabs_type_number;
std::optional<ForwardDeclaredType> type;
};
std::unique_ptr<UnresolvedStabs> unresolved_stabs;
TypeName() : Node(DESCRIPTOR) {}
static const constexpr NodeDescriptor DESCRIPTOR = TYPE_NAME;
};
enum class CompareResultType {
MATCHES_NO_SWAP, // Both lhs and rhs are identical.
MATCHES_CONFUSED, // Both lhs and rhs are almost identical, and we don't which is better.
MATCHES_FAVOUR_LHS, // Both lhs and rhs are almost identical, but lhs is better.
MATCHES_FAVOUR_RHS, // Both lhs and rhs are almost identical, but rhs is better.
DIFFERS, // The two nodes differ substantially.
};
enum class CompareFailReason {
NONE,
DESCRIPTOR,
STORAGE_CLASS,
NAME,
RELATIVE_OFFSET_BYTES,
ABSOLUTE_OFFSET_BYTES,
BITFIELD_OFFSET_BITS,
SIZE_BITS,
CONSTNESS,
ARRAY_ELEMENT_COUNT,
BUILTIN_CLASS,
FUNCTION_RETURN_TYPE_HAS_VALUE,
FUNCTION_PARAMAETER_COUNT,
FUNCTION_PARAMETERS_HAS_VALUE,
FUNCTION_MODIFIER,
ENUM_CONSTANTS,
BASE_CLASS_COUNT,
FIELDS_SIZE,
MEMBER_FUNCTION_COUNT,
VTABLE_GLOBAL,
TYPE_NAME,
VARIABLE_CLASS,
VARIABLE_TYPE,
VARIABLE_STORAGE,
VARIABLE_BLOCK
};
struct CompareResult {
CompareResult(CompareResultType type) : type(type), fail_reason(CompareFailReason::NONE) {}
CompareResult(CompareFailReason reason) : type(CompareResultType::DIFFERS), fail_reason(reason) {}
CompareResultType type;
CompareFailReason fail_reason;
};
// Compare two AST nodes and their children recursively. This will only check
// fields that will be equal for two versions of the same type from different
// translation units.
CompareResult compare_nodes(const Node& lhs, const Node& rhs, const SymbolDatabase* database, bool check_intrusive_fields);
const char* compare_fail_reason_to_string(CompareFailReason reason);
const char* node_type_to_string(const Node& node);
const char* storage_class_to_string(StorageClass storage_class);
const char* access_specifier_to_string(AccessSpecifier specifier);
const char* builtin_class_to_string(BuiltInClass bclass);
s32 builtin_class_size(BuiltInClass bclass);
enum TraversalOrder {
PREORDER_TRAVERSAL,
POSTORDER_TRAVERSAL
};
enum ExplorationMode {
EXPLORE_CHILDREN,
DONT_EXPLORE_CHILDREN
};
template <typename ThisNode, typename Callback>
void for_each_node(ThisNode& node, TraversalOrder order, Callback callback)
{
if(order == PREORDER_TRAVERSAL && callback(node) == DONT_EXPLORE_CHILDREN) {
return;
}
switch(node.descriptor) {
case ARRAY: {
auto& array = node.template as<Array>();
for_each_node(*array.element_type.get(), order, callback);
break;
}
case BITFIELD: {
auto& bitfield = node.template as<BitField>();
for_each_node(*bitfield.underlying_type.get(), order, callback);
break;
}
case BUILTIN: {
break;
}
case ENUM: {
break;
}
case ERROR_NODE: {
break;
}
case FUNCTION: {
auto& func = node.template as<Function>();
if(func.return_type.has_value()) {
for_each_node(*func.return_type->get(), order, callback);
}
if(func.parameters.has_value()) {
for(auto& child : *func.parameters) {
for_each_node(*child.get(), order, callback);
}
}
break;
}
case POINTER_OR_REFERENCE: {
auto& pointer_or_reference = node.template as<PointerOrReference>();
for_each_node(*pointer_or_reference.value_type.get(), order, callback);
break;
}
case POINTER_TO_DATA_MEMBER: {
auto& pointer = node.template as<PointerToDataMember>();
for_each_node(*pointer.class_type.get(), order, callback);
for_each_node(*pointer.member_type.get(), order, callback);
break;
}
case STRUCT_OR_UNION: {
auto& struct_or_union = node.template as<StructOrUnion>();
for(auto& child : struct_or_union.base_classes) {
for_each_node(*child.get(), order, callback);
}
for(auto& child : struct_or_union.fields) {
for_each_node(*child.get(), order, callback);
}
for(auto& child : struct_or_union.member_functions) {
for_each_node(*child.get(), order, callback);
}
break;
}
case TYPE_NAME: {
break;
}
}
if(order == POSTORDER_TRAVERSAL) {
callback(node);
}
}
}

125
3rdparty/ccc/src/ccc/elf.cpp vendored Normal file
View File

@ -0,0 +1,125 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#include "elf.h"
namespace ccc {
Result<ElfFile> ElfFile::parse(std::vector<u8> image)
{
ElfFile elf;
elf.image = std::move(image);
const ElfIdentHeader* ident = get_packed<ElfIdentHeader>(elf.image, 0);
CCC_CHECK(ident, "ELF ident header out of range.");
CCC_CHECK(ident->magic == CCC_FOURCC("\x7f\x45\x4c\x46"), "Not an ELF file.");
CCC_CHECK(ident->e_class == ElfIdentClass::B32, "Wrong ELF class (not 32 bit).");
const ElfFileHeader* header = get_packed<ElfFileHeader>(elf.image, sizeof(ElfIdentHeader));
CCC_CHECK(header, "ELF file header out of range.");
elf.file_header = *header;
const ElfSectionHeader* shstr_section_header = get_packed<ElfSectionHeader>(elf.image, header->shoff + header->shstrndx * sizeof(ElfSectionHeader));
CCC_CHECK(shstr_section_header, "ELF section name header out of range.");
for(u32 i = 0; i < header->shnum; i++) {
u64 header_offset = header->shoff + i * sizeof(ElfSectionHeader);
const ElfSectionHeader* section_header = get_packed<ElfSectionHeader>(elf.image, header_offset);
CCC_CHECK(section_header, "ELF section header out of range.");
const char* name = get_string(elf.image, shstr_section_header->offset + section_header->name);
CCC_CHECK(section_header, "ELF section name out of range.");
ElfSection& section = elf.sections.emplace_back();
section.name = name;
section.header = *section_header;
}
for(u32 i = 0; i < header->phnum; i++) {
u64 header_offset = header->phoff + i * sizeof(ElfProgramHeader);
const ElfProgramHeader* program_header = get_packed<ElfProgramHeader>(elf.image, header_offset);
CCC_CHECK(program_header, "ELF program header out of range.");
elf.segments.emplace_back(*program_header);
}
return elf;
}
Result<void> ElfFile::create_section_symbols(
SymbolDatabase& database, const SymbolGroup& group) const
{
for(const ElfSection& section : sections) {
Address address = Address::non_zero(section.header.addr);
Result<Section*> symbol = database.sections.create_symbol(
section.name, address, group.source, group.module_symbol);
CCC_RETURN_IF_ERROR(symbol);
(*symbol)->set_size(section.header.size);
}
return Result<void>();
}
const ElfSection* ElfFile::lookup_section(const char* name) const
{
for(const ElfSection& section : sections) {
if(section.name == name) {
return &section;
}
}
return nullptr;
}
std::optional<u32> ElfFile::file_offset_to_virtual_address(u32 file_offset) const
{
for(const ElfProgramHeader& segment : segments) {
if(file_offset >= segment.offset && file_offset < segment.offset + segment.filesz) {
return segment.vaddr + file_offset - segment.offset;
}
}
return std::nullopt;
}
const ElfProgramHeader* ElfFile::entry_point_segment() const
{
const ccc::ElfProgramHeader* entry_segment = nullptr;
for(const ccc::ElfProgramHeader& segment : segments) {
if(file_header.entry >= segment.vaddr && file_header.entry < segment.vaddr + segment.filesz) {
entry_segment = &segment;
}
}
return entry_segment;
}
Result<std::span<const u8>> ElfFile::get_virtual(u32 address, u32 size) const
{
u32 end_address = address + size;
if(end_address >= address) {
for(const ElfProgramHeader& segment : segments) {
if(address >= segment.vaddr && end_address <= segment.vaddr + segment.filesz) {
size_t begin_offset = segment.offset + (address - segment.vaddr);
size_t end_offset = begin_offset + size;
if(begin_offset <= image.size() && end_offset <= image.size()) {
return std::span<const u8>(image.data() + begin_offset, image.data() + end_offset);
}
}
}
}
return CCC_FAILURE("No ELF segment for address range 0x%x to 0x%x.", address, end_address);
}
Result<void> ElfFile::copy_virtual(u8* dest, u32 address, u32 size) const
{
Result<std::span<const u8>> block = get_virtual(address, size);
CCC_RETURN_IF_ERROR(block);
memcpy(dest, block->data(), size);
return Result<void>();
}
}

156
3rdparty/ccc/src/ccc/elf.h vendored Normal file
View File

@ -0,0 +1,156 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#pragma once
#include "symbol_database.h"
namespace ccc {
enum class ElfIdentClass : u8 {
B32 = 0x1,
B64 = 0x2
};
CCC_PACKED_STRUCT(ElfIdentHeader,
/* 0x0 */ u32 magic; // 7f 45 4c 46
/* 0x4 */ ElfIdentClass e_class;
/* 0x5 */ u8 endianess;
/* 0x6 */ u8 version;
/* 0x7 */ u8 os_abi;
/* 0x8 */ u8 abi_version;
/* 0x9 */ u8 pad[7];
)
enum class ElfFileType : u16 {
NONE = 0x00,
REL = 0x01,
EXEC = 0x02,
DYN = 0x03,
CORE = 0x04,
LOOS = 0xfe00,
HIOS = 0xfeff,
LOPROC = 0xff00,
HIPROC = 0xffff
};
enum class ElfMachine : u16 {
MIPS = 0x08
};
CCC_PACKED_STRUCT(ElfFileHeader,
/* 0x10 */ ElfFileType type;
/* 0x12 */ ElfMachine machine;
/* 0x14 */ u32 version;
/* 0x18 */ u32 entry;
/* 0x1c */ u32 phoff;
/* 0x20 */ u32 shoff;
/* 0x24 */ u32 flags;
/* 0x28 */ u16 ehsize;
/* 0x2a */ u16 phentsize;
/* 0x2c */ u16 phnum;
/* 0x2e */ u16 shentsize;
/* 0x30 */ u16 shnum;
/* 0x32 */ u16 shstrndx;
)
enum class ElfSectionType : u32 {
NULL_SECTION = 0x0,
PROGBITS = 0x1,
SYMTAB = 0x2,
STRTAB = 0x3,
RELA = 0x4,
HASH = 0x5,
DYNAMIC = 0x6,
NOTE = 0x7,
NOBITS = 0x8,
REL = 0x9,
SHLIB = 0xa,
DYNSYM = 0xb,
INIT_ARRAY = 0xe,
FINI_ARRAY = 0xf,
PREINIT_ARRAY = 0x10,
GROUP = 0x11,
SYMTAB_SHNDX = 0x12,
NUM = 0x13,
LOOS = 0x60000000,
MIPS_DEBUG = 0x70000005
};
CCC_PACKED_STRUCT(ElfSectionHeader,
/* 0x00 */ u32 name;
/* 0x04 */ ElfSectionType type;
/* 0x08 */ u32 flags;
/* 0x0c */ u32 addr;
/* 0x10 */ u32 offset;
/* 0x14 */ u32 size;
/* 0x18 */ u32 link;
/* 0x1c */ u32 info;
/* 0x20 */ u32 addralign;
/* 0x24 */ u32 entsize;
)
struct ElfSection {
std::string name;
ElfSectionHeader header;
};
CCC_PACKED_STRUCT(ElfProgramHeader,
/* 0x00 */ u32 type;
/* 0x04 */ u32 offset;
/* 0x08 */ u32 vaddr;
/* 0x0c */ u32 paddr;
/* 0x10 */ u32 filesz;
/* 0x14 */ u32 memsz;
/* 0x18 */ u32 flags;
/* 0x1c */ u32 align;
)
struct ElfFile {
ElfFileHeader file_header;
std::vector<u8> image;
std::vector<ElfSection> sections;
std::vector<ElfProgramHeader> segments;
// Parse the ELF file header, section headers and program headers.
static Result<ElfFile> parse(std::vector<u8> image);
// Create a section object for each section header in the ELF file.
Result<void> create_section_symbols(SymbolDatabase& database, const SymbolGroup& group) const;
const ElfSection* lookup_section(const char* name) const;
std::optional<u32> file_offset_to_virtual_address(u32 file_offset) const;
// Find the program header for the segment that contains the entry point.
const ElfProgramHeader* entry_point_segment() const;
// Retrieve a block of data in an ELF file given its address and size.
Result<std::span<const u8>> get_virtual(u32 address, u32 size) const;
// Copy a block of data in an ELF file to the destination buffer given its
// address and size.
Result<void> copy_virtual(u8* dest, u32 address, u32 size) const;
// Retrieve an object of type T from an ELF file given its address.
template <typename T>
Result<T> get_object_virtual(u32 address) const
{
Result<std::span<const u8>> result = get_virtual(address, sizeof(T));
CCC_RETURN_IF_ERROR(result);
return *(T*) result->data();
}
// Retrieve an array of objects of type T from an ELF file given its
// address and element count.
template <typename T>
Result<std::span<const T>> get_array_virtual(u32 address, u32 element_count) const
{
Result<std::span<const u8>> result = get_virtual(address, element_count * sizeof(T));
CCC_RETURN_IF_ERROR(result);
return std::span<const T>((T*) result->data(), (T*) (result->data() + result->size()));
}
};
}

213
3rdparty/ccc/src/ccc/elf_symtab.cpp vendored Normal file
View File

@ -0,0 +1,213 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#include "elf_symtab.h"
#include "importer_flags.h"
namespace ccc::elf {
enum class SymbolBind : u8 {
LOCAL = 0,
GLOBAL = 1,
WEAK = 2,
NUM = 3,
GNU_UNIQUE = 10
};
enum class SymbolType : u8 {
NOTYPE = 0,
OBJECT = 1,
FUNC = 2,
SECTION = 3,
FILE = 4,
COMMON = 5,
TLS = 6,
NUM = 7,
GNU_IFUNC = 10
};
enum class SymbolVisibility {
DEFAULT = 0,
INTERNAL = 1,
HIDDEN = 2,
PROTECTED = 3
};
CCC_PACKED_STRUCT(Symbol,
/* 0x0 */ u32 name;
/* 0x4 */ u32 value;
/* 0x8 */ u32 size;
/* 0xc */ u8 info;
/* 0xd */ u8 other;
/* 0xe */ u16 shndx;
SymbolType type() const { return (SymbolType) (info & 0xf); }
SymbolBind bind() const { return (SymbolBind) (info >> 4); }
SymbolVisibility visibility() const { return (SymbolVisibility) (other & 0x3); }
)
static const char* symbol_bind_to_string(SymbolBind bind);
static const char* symbol_type_to_string(SymbolType type);
static const char* symbol_visibility_to_string(SymbolVisibility visibility);
Result<void> import_symbols(
SymbolDatabase& database,
const SymbolGroup& group,
std::span<const u8> symtab,
std::span<const u8> strtab,
u32 importer_flags,
DemanglerFunctions demangler)
{
for(u32 i = 0; i < symtab.size() / sizeof(Symbol); i++) {
const Symbol* symbol = get_packed<Symbol>(symtab, i * sizeof(Symbol));
CCC_ASSERT(symbol);
Address address;
if(symbol->value != 0) {
address = symbol->value;
}
if(!address.valid() || symbol->visibility() != SymbolVisibility::DEFAULT) {
continue;
}
if(!(importer_flags & DONT_DEDUPLICATE_SYMBOLS)) {
if(database.functions.first_handle_from_starting_address(address).valid()) {
continue;
}
if(database.global_variables.first_handle_from_starting_address(address).valid()) {
continue;
}
if(database.local_variables.first_handle_from_starting_address(address).valid()) {
continue;
}
}
const char* string = get_string(strtab, symbol->name);
CCC_CHECK(string, "Symbol string out of range.");
switch(symbol->type()) {
case SymbolType::NOTYPE: {
Result<Label*> label = database.labels.create_symbol(
string, group.source, group.module_symbol, address, importer_flags, demangler);
CCC_RETURN_IF_ERROR(label);
// These symbols get emitted at the same addresses as functions
// and aren't extremely useful, so we want to mark them to
// prevent them from possibly being used as function names.
(*label)->is_junk =
(*label)->name() == "__gnu_compiled_c" ||
(*label)->name() == "__gnu_compiled_cplusplus" ||
(*label)->name() == "gcc2_compiled.";
break;
}
case SymbolType::OBJECT: {
if(symbol->size != 0) {
Result<GlobalVariable*> global_variable = database.global_variables.create_symbol(
string, group.source, group.module_symbol, address, importer_flags, demangler);
CCC_RETURN_IF_ERROR(global_variable);
if(*global_variable) {
(*global_variable)->set_size(symbol->size);
}
} else {
Result<Label*> label = database.labels.create_symbol(
string, group.source, group.module_symbol, address, importer_flags, demangler);
CCC_RETURN_IF_ERROR(label);
}
break;
}
case SymbolType::FUNC: {
Result<Function*> function = database.functions.create_symbol(
string, group.source, group.module_symbol, address, importer_flags, demangler);
CCC_RETURN_IF_ERROR(function);
if(*function) {
(*function)->set_size(symbol->size);
}
break;
}
case SymbolType::FILE: {
Result<SourceFile*> source_file = database.source_files.create_symbol(
string, group.source, group.module_symbol);
CCC_RETURN_IF_ERROR(source_file);
break;
}
default: {}
}
}
return Result<void>();
}
Result<void> print_symbol_table(FILE* out, std::span<const u8> symtab, std::span<const u8> strtab)
{
fprintf(out, "ELF SYMBOLS:\n");
fprintf(out, " Num: Value Size Type Bind Vis Ndx Name\n");
for(u32 i = 0; i < symtab.size() / sizeof(Symbol); i++) {
const Symbol* symbol = get_packed<Symbol>(symtab, i * sizeof(Symbol));
CCC_ASSERT(symbol);
const char* type = symbol_type_to_string(symbol->type());
const char* bind = symbol_bind_to_string(symbol->bind());
const char* visibility = symbol_visibility_to_string(symbol->visibility());
const char* string = get_string(strtab, symbol->name);
CCC_CHECK(string, "Symbol string out of range.");
fprintf(out, "%6u: %08x %5u %-7s %-7s %-7s %3u %s\n",
i, symbol->value, symbol->size, type, bind, visibility, symbol->shndx, string);
}
return Result<void>();
}
static const char* symbol_bind_to_string(SymbolBind bind)
{
switch(bind) {
case SymbolBind::LOCAL: return "LOCAL";
case SymbolBind::GLOBAL: return "GLOBAL";
case SymbolBind::WEAK: return "WEAK";
case SymbolBind::NUM: return "NUM";
case SymbolBind::GNU_UNIQUE: return "GNU_UNIQUE";
}
return "ERROR";
}
static const char* symbol_type_to_string(SymbolType type)
{
switch(type) {
case SymbolType::NOTYPE: return "NOTYPE";
case SymbolType::OBJECT: return "OBJECT";
case SymbolType::FUNC: return "FUNC";
case SymbolType::SECTION: return "SECTION";
case SymbolType::FILE: return "FILE";
case SymbolType::COMMON: return "COMMON";
case SymbolType::TLS: return "TLS";
case SymbolType::NUM: return "NUM";
case SymbolType::GNU_IFUNC: return "GNU_IFUNC";
}
return "ERROR";
}
static const char* symbol_visibility_to_string(SymbolVisibility visibility)
{
switch(visibility) {
case SymbolVisibility::DEFAULT: return "DEFAULT";
case SymbolVisibility::INTERNAL: return "INTERNAL";
case SymbolVisibility::HIDDEN: return "HIDDEN";
case SymbolVisibility::PROTECTED: return "PROTECTED";
}
return "ERROR";
}
}

20
3rdparty/ccc/src/ccc/elf_symtab.h vendored Normal file
View File

@ -0,0 +1,20 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#pragma once
#include "symbol_database.h"
namespace ccc::elf {
Result<void> import_symbols(
SymbolDatabase& database,
const SymbolGroup& group,
std::span<const u8> symtab,
std::span<const u8> strtab,
u32 importer_flags,
DemanglerFunctions demangler);
Result<void> print_symbol_table(FILE* out, std::span<const u8> symtab, std::span<const u8> strtab);
}

95
3rdparty/ccc/src/ccc/importer_flags.cpp vendored Normal file
View File

@ -0,0 +1,95 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#include "importer_flags.h"
namespace ccc {
const std::vector<ImporterFlagInfo> IMPORTER_FLAGS = {
{DEMANGLE_PARAMETERS, "--demangle-parameters", {
"Include parameters in demangled function names."
}},
{DEMANGLE_RETURN_TYPE, "--demangle-return-type", {
"Include return types at the end of demangled",
"function names if they're available."
}},
{DONT_DEDUPLICATE_SYMBOLS, "--dont-deduplicate-symbols", {
"Do not deduplicate matching symbols from",
"different symbol tables. This options has no",
"effect on data types."
}},
{DONT_DEDUPLICATE_TYPES, "--dont-deduplicate-types", {
"Do not deduplicate data types from different",
"translation units."
}},
{DONT_DEMANGLE_NAMES, "--dont-demangle-names", {
"Do not demangle function names, global variable",
"names, or overloaded operator names."
}},
{INCLUDE_GENERATED_MEMBER_FUNCTIONS, "--include-generated-functions", {
"Output member functions that were likely",
"automatically generated by the compiler."
}},
{NO_ACCESS_SPECIFIERS, "--no-access-specifiers", {
"Do not print access specifiers."
}},
{NO_MEMBER_FUNCTIONS, "--no-member-functions", {
"Do not print member functions."
}},
{NO_OPTIMIZED_OUT_FUNCTIONS, "--no-optimized-out-functions", {
"Discard functions that were optimized out."
}},
{STRICT_PARSING, "--strict", {
"Make more types of errors fatal."
}},
{TYPEDEF_ALL_ENUMS, "--typedef-all-enums", {
"Force all emitted C++ enums to be defined using",
"a typedef. With STABS, it is not always possible",
"to determine if an enum was like this in the",
"original source code, so this option should be",
"useful for reverse engineering C projects."
}},
{TYPEDEF_ALL_STRUCTS, "--typedef-all-structs", {
"Force all emitted C++ structure types to be",
"defined using a typedef."
}},
{TYPEDEF_ALL_UNIONS, "--typedef-all-unions", {
"Force all emitted C++ union types to be defined",
"using a typedef."
}},
{UNIQUE_FUNCTIONS, "--unique-functions", {
" If multiple identical .mdebug function symbols",
"are present, find the one that seems to have",
"actually been included in the linked binary, and",
"remove the addresses from all the rest. Using",
"this importer flag in combination with",
"--no-optimized-out-functions will remove these",
"duplicate function symbols entirely."
}}
};
u32 parse_importer_flag(const char* argument)
{
for(const ImporterFlagInfo& flag : IMPORTER_FLAGS) {
if(strcmp(flag.argument, argument) == 0) {
return flag.flag;
}
}
return NO_IMPORTER_FLAGS;
}
void print_importer_flags_help(FILE* out)
{
for(const ImporterFlagInfo& flag : IMPORTER_FLAGS) {
fprintf(out, "\n");
fprintf(out, " %-29s ", flag.argument);
for(size_t i = 0; i < flag.help_text.size(); i++) {
if(i > 0) {
fprintf(out, " ");
}
fprintf(out, "%s\n", flag.help_text[i]);
}
}
}
}

39
3rdparty/ccc/src/ccc/importer_flags.h vendored Normal file
View File

@ -0,0 +1,39 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#pragma once
#include "util.h"
namespace ccc {
enum ImporterFlags {
NO_IMPORTER_FLAGS = 0,
DEMANGLE_PARAMETERS = (1 << 0),
DEMANGLE_RETURN_TYPE = (1 << 1),
DONT_DEDUPLICATE_SYMBOLS = (1 << 2),
DONT_DEDUPLICATE_TYPES = (1 << 3),
DONT_DEMANGLE_NAMES = (1 << 4),
INCLUDE_GENERATED_MEMBER_FUNCTIONS = (1 << 5),
NO_ACCESS_SPECIFIERS = (1 << 6),
NO_MEMBER_FUNCTIONS = (1 << 7),
NO_OPTIMIZED_OUT_FUNCTIONS = (1 << 8),
STRICT_PARSING = (1 << 9),
TYPEDEF_ALL_ENUMS = (1 << 10),
TYPEDEF_ALL_STRUCTS = (1 << 11),
TYPEDEF_ALL_UNIONS = (1 << 12),
UNIQUE_FUNCTIONS = (1 << 13)
};
struct ImporterFlagInfo {
ImporterFlags flag;
const char* argument;
std::vector<const char*> help_text;
};
extern const std::vector<ImporterFlagInfo> IMPORTER_FLAGS;
u32 parse_importer_flag(const char* argument);
void print_importer_flags_help(FILE* out);
}

349
3rdparty/ccc/src/ccc/mdebug_analysis.cpp vendored Normal file
View File

@ -0,0 +1,349 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#include "mdebug_analysis.h"
#include "stabs_to_ast.h"
namespace ccc::mdebug {
Result<void> LocalSymbolTableAnalyser::stab_magic(const char* magic)
{
return Result<void>();
}
Result<void> LocalSymbolTableAnalyser::source_file(const char* path, Address text_address)
{
if(m_next_relative_path.empty()) {
m_next_relative_path = m_source_file.command_line_path;
}
return Result<void>();
}
Result<void> LocalSymbolTableAnalyser::data_type(const ParsedSymbol& symbol)
{
Result<std::unique_ptr<ast::Node>> node = stabs_type_to_ast(
*symbol.name_colon_type.type.get(), nullptr, m_stabs_to_ast_state, 0, false, false);
CCC_RETURN_IF_ERROR(node);
if(symbol.is_typedef && (*node)->descriptor == ast::STRUCT_OR_UNION) {
ast::StructOrUnion& struct_or_union = (*node)->as<ast::StructOrUnion>();
const std::string& name = symbol.name_colon_type.name;
StabsTypeNumber type_number = symbol.name_colon_type.type->type_number;
fix_recursively_emitted_structures(struct_or_union, name, type_number, m_stabs_to_ast_state.file_handle);
}
bool is_struct = (*node)->descriptor == ast::STRUCT_OR_UNION && (*node)->as<ast::StructOrUnion>().is_struct;
bool force_typedef =
((m_context.importer_flags & TYPEDEF_ALL_ENUMS) && (*node)->descriptor == ast::ENUM) ||
((m_context.importer_flags & TYPEDEF_ALL_STRUCTS) && (*node)->descriptor == ast::STRUCT_OR_UNION && is_struct) ||
((m_context.importer_flags & TYPEDEF_ALL_UNIONS) && (*node)->descriptor == ast::STRUCT_OR_UNION && !is_struct);
(*node)->name = (symbol.name_colon_type.name == " ") ? "" : symbol.name_colon_type.name;
if(symbol.is_typedef || force_typedef) {
(*node)->storage_class = STORAGE_CLASS_TYPEDEF;
}
const char* name = (*node)->name.c_str();
StabsTypeNumber number = symbol.name_colon_type.type->type_number;
if(m_context.importer_flags & DONT_DEDUPLICATE_TYPES) {
Result<DataType*> data_type = m_database.data_types.create_symbol(
name, m_context.group.source, m_context.group.module_symbol);
CCC_RETURN_IF_ERROR(data_type);
m_source_file.stabs_type_number_to_handle[number] = (*data_type)->handle();
(*data_type)->set_type(std::move(*node));
(*data_type)->files = {m_source_file.handle()};
} else {
Result<ccc::DataType*> type = m_database.create_data_type_if_unique(
std::move(*node), number, name, m_source_file, m_context.group);
CCC_RETURN_IF_ERROR(type);
}
return Result<void>();
}
Result<void> LocalSymbolTableAnalyser::global_variable(
const char* mangled_name, Address address, const StabsType& type, bool is_static, GlobalStorageLocation location)
{
Result<GlobalVariable*> global = m_database.global_variables.create_symbol(
mangled_name, m_context.group.source, m_context.group.module_symbol, address, m_context.importer_flags, m_context.demangler);
CCC_RETURN_IF_ERROR(global);
CCC_ASSERT(*global);
m_global_variables.emplace_back((*global)->handle());
Result<std::unique_ptr<ast::Node>> node = stabs_type_to_ast(type, nullptr, m_stabs_to_ast_state, 0, true, false);
CCC_RETURN_IF_ERROR(node);
if(is_static) {
(*global)->storage_class = STORAGE_CLASS_STATIC;
}
(*global)->set_type(std::move(*node));
(*global)->storage.location = location;
return Result<void>();
}
Result<void> LocalSymbolTableAnalyser::sub_source_file(const char* path, Address text_address)
{
if(m_current_function && m_state == IN_FUNCTION_BEGINNING) {
Function::SubSourceFile& sub = m_current_function->sub_source_files.emplace_back();
sub.address = text_address;
sub.relative_path = path;
} else {
m_next_relative_path = path;
}
return Result<void>();
}
Result<void> LocalSymbolTableAnalyser::procedure(
const char* mangled_name, Address address, const ProcedureDescriptor* procedure_descriptor, bool is_static)
{
if(!m_current_function || strcmp(mangled_name, m_current_function->mangled_name().c_str()) != 0) {
Result<void> result = create_function(mangled_name, address);
CCC_RETURN_IF_ERROR(result);
}
if(is_static) {
m_current_function->storage_class = STORAGE_CLASS_STATIC;
}
if(procedure_descriptor) {
m_current_function->stack_frame_size = procedure_descriptor->frame_size;
}
return Result<void>();
}
Result<void> LocalSymbolTableAnalyser::label(const char* label, Address address, s32 line_number)
{
if(address.valid() && m_current_function && label[0] == '$') {
Function::LineNumberPair& pair = m_current_function->line_numbers.emplace_back();
pair.address = address;
pair.line_number = line_number;
}
return Result<void>();
}
Result<void> LocalSymbolTableAnalyser::text_end(const char* name, s32 function_size)
{
if(m_state == IN_FUNCTION_BEGINNING) {
CCC_CHECK(m_current_function, "END TEXT symbol outside of function.");
m_current_function->set_size(function_size);
m_state = IN_FUNCTION_END;
}
return Result<void>();
}
Result<void> LocalSymbolTableAnalyser::function(const char* mangled_name, const StabsType& return_type, Address address)
{
if(!m_current_function || strcmp(mangled_name, m_current_function->mangled_name().c_str()) != 0) {
Result<void> result = create_function(mangled_name, address);
CCC_RETURN_IF_ERROR(result);
} else {
// For MTV Music Maker 2, the addresses for static functions stored in
// the PROC symbols are relative to the translation unit, while the
// addresses stored in the FUN symbol are absolute. This is the only
// game I've found that seems to have this problem, but since in all
// other cases it seems all these addresses are all absolute, I may as
// well add in a hack here to deal with it.
bool no_module_base_address = m_context.group.module_symbol && m_context.group.module_symbol->address().get_or_zero() == 0;
bool new_address_greater = address.valid() && address > m_current_function->address();
if(no_module_base_address && new_address_greater) {
m_database.functions.move_symbol(m_current_function->handle(), address);
}
}
Result<std::unique_ptr<ast::Node>> node = stabs_type_to_ast(return_type, nullptr, m_stabs_to_ast_state, 0, true, true);
CCC_RETURN_IF_ERROR(node);
m_current_function->set_type(std::move(*node));
return Result<void>();
}
Result<void> LocalSymbolTableAnalyser::function_end()
{
if(m_current_function) {
m_current_function->set_parameter_variables(std::move(m_current_parameter_variables), m_database);
m_current_function->set_local_variables(std::move(m_current_local_variables), m_database);
}
m_current_function = nullptr;
m_current_parameter_variables = std::vector<ParameterVariableHandle>();
m_current_local_variables = std::vector<LocalVariableHandle>();
m_blocks.clear();
m_pending_local_variables.clear();
m_state = NOT_IN_FUNCTION;
return Result<void>();
}
Result<void> LocalSymbolTableAnalyser::parameter(
const char* name, const StabsType& type, bool is_stack, s32 value, bool is_by_reference)
{
CCC_CHECK(m_current_function, "Parameter symbol before first func/proc symbol.");
Result<ParameterVariable*> parameter_variable = m_database.parameter_variables.create_symbol(
name, m_context.group.source, m_context.group.module_symbol);
CCC_RETURN_IF_ERROR(parameter_variable);
m_current_parameter_variables.emplace_back((*parameter_variable)->handle());
Result<std::unique_ptr<ast::Node>> node = stabs_type_to_ast(type, nullptr, m_stabs_to_ast_state, 0, true, true);
CCC_RETURN_IF_ERROR(node);
(*parameter_variable)->set_type(std::move(*node));
if(is_stack) {
StackStorage& stack_storage = (*parameter_variable)->storage.emplace<StackStorage>();
stack_storage.stack_pointer_offset = value;
} else {
RegisterStorage& register_storage = (*parameter_variable)->storage.emplace<RegisterStorage>();
register_storage.dbx_register_number = value;
register_storage.is_by_reference = is_by_reference;
}
return Result<void>();
}
Result<void> LocalSymbolTableAnalyser::local_variable(
const char* name, const StabsType& type, u32 value, StabsSymbolDescriptor desc, SymbolClass sclass)
{
if(!m_current_function) {
return Result<void>();
}
Address address = (desc == StabsSymbolDescriptor::STATIC_LOCAL_VARIABLE) ? value : Address();
Result<LocalVariable*> local_variable = m_database.local_variables.create_symbol(
name, address, m_context.group.source, m_context.group.module_symbol);
CCC_RETURN_IF_ERROR(local_variable);
m_current_local_variables.emplace_back((*local_variable)->handle());
m_pending_local_variables.emplace_back((*local_variable)->handle());
Result<std::unique_ptr<ast::Node>> node = stabs_type_to_ast(type, nullptr, m_stabs_to_ast_state, 0, true, false);
CCC_RETURN_IF_ERROR(node);
if(desc == StabsSymbolDescriptor::STATIC_LOCAL_VARIABLE) {
GlobalStorage& global_storage = (*local_variable)->storage.emplace<GlobalStorage>();
std::optional<GlobalStorageLocation> location_opt =
symbol_class_to_global_variable_location(sclass);
CCC_CHECK(location_opt.has_value(),
"Invalid static local variable location %s.",
symbol_class(sclass));
global_storage.location = *location_opt;
(*node)->storage_class = STORAGE_CLASS_STATIC;
} else if(desc == StabsSymbolDescriptor::REGISTER_VARIABLE) {
RegisterStorage& register_storage = (*local_variable)->storage.emplace<RegisterStorage>();
register_storage.dbx_register_number = (s32) value;
} else if(desc == StabsSymbolDescriptor::LOCAL_VARIABLE) {
StackStorage& stack_storage = (*local_variable)->storage.emplace<StackStorage>();
stack_storage.stack_pointer_offset = (s32) value;
} else {
return CCC_FAILURE("LocalSymbolTableAnalyser::local_variable() called with bad symbol descriptor.");
}
(*local_variable)->set_type(std::move(*node));
return Result<void>();
}
Result<void> LocalSymbolTableAnalyser::lbrac(s32 begin_offset)
{
for(LocalVariableHandle local_variable_handle : m_pending_local_variables) {
if(LocalVariable* local_variable = m_database.local_variables.symbol_from_handle(local_variable_handle)) {
local_variable->live_range.low = m_source_file.address().value + begin_offset;
}
}
m_blocks.emplace_back(std::move(m_pending_local_variables));
m_pending_local_variables = {};
return Result<void>();
}
Result<void> LocalSymbolTableAnalyser::rbrac(s32 end_offset)
{
CCC_CHECK(!m_blocks.empty(), "RBRAC symbol without a matching LBRAC symbol.");
std::vector<LocalVariableHandle>& variables = m_blocks.back();
for(LocalVariableHandle local_variable_handle : variables) {
if(LocalVariable* local_variable = m_database.local_variables.symbol_from_handle(local_variable_handle)) {
local_variable->live_range.high = m_source_file.address().value + end_offset;
}
}
m_blocks.pop_back();
return Result<void>();
}
Result<void> LocalSymbolTableAnalyser::finish()
{
CCC_CHECK(m_state != IN_FUNCTION_BEGINNING,
"Unexpected end of symbol table for '%s'.", m_source_file.name().c_str());
if(m_current_function) {
Result<void> result = function_end();
CCC_RETURN_IF_ERROR(result);
}
m_source_file.set_functions(std::move(m_functions), m_database);
m_source_file.set_global_variables(std::move(m_global_variables), m_database);
return Result<void>();
}
Result<void> LocalSymbolTableAnalyser::create_function(const char* mangled_name, Address address)
{
if(m_current_function) {
Result<void> result = function_end();
CCC_RETURN_IF_ERROR(result);
}
Result<Function*> function = m_database.functions.create_symbol(
mangled_name, m_context.group.source, m_context.group.module_symbol, address, m_context.importer_flags, m_context.demangler);
CCC_RETURN_IF_ERROR(function);
CCC_ASSERT(*function);
m_current_function = *function;
m_functions.emplace_back(m_current_function->handle());
m_state = IN_FUNCTION_BEGINNING;
if(!m_next_relative_path.empty() && m_current_function->relative_path != m_source_file.command_line_path) {
m_current_function->relative_path = m_next_relative_path;
}
return Result<void>();
}
std::optional<GlobalStorageLocation> symbol_class_to_global_variable_location(SymbolClass symbol_class)
{
std::optional<GlobalStorageLocation> location;
switch(symbol_class) {
case SymbolClass::NIL: location = GlobalStorageLocation::NIL; break;
case SymbolClass::DATA: location = GlobalStorageLocation::DATA; break;
case SymbolClass::BSS: location = GlobalStorageLocation::BSS; break;
case SymbolClass::ABS: location = GlobalStorageLocation::ABS; break;
case SymbolClass::SDATA: location = GlobalStorageLocation::SDATA; break;
case SymbolClass::SBSS: location = GlobalStorageLocation::SBSS; break;
case SymbolClass::RDATA: location = GlobalStorageLocation::RDATA; break;
case SymbolClass::COMMON: location = GlobalStorageLocation::COMMON; break;
case SymbolClass::SCOMMON: location = GlobalStorageLocation::SCOMMON; break;
case SymbolClass::SUNDEFINED: location = GlobalStorageLocation::SUNDEFINED; break;
default: {}
}
return location;
}
}

99
3rdparty/ccc/src/ccc/mdebug_analysis.h vendored Normal file
View File

@ -0,0 +1,99 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#pragma once
#include "importer_flags.h"
#include "mdebug_section.h"
#include "mdebug_symbols.h"
#include "stabs.h"
#include "stabs_to_ast.h"
#include "symbol_database.h"
namespace ccc::mdebug {
struct AnalysisContext {
const mdebug::SymbolTableReader* reader = nullptr;
const std::map<u32, const mdebug::Symbol*>* external_functions = nullptr;
const std::map<std::string, const mdebug::Symbol*>* external_globals = nullptr;
SymbolGroup group;
u32 importer_flags = NO_IMPORTER_FLAGS;
DemanglerFunctions demangler;
};
class LocalSymbolTableAnalyser {
public:
LocalSymbolTableAnalyser(SymbolDatabase& database, const StabsToAstState& stabs_to_ast_state, const AnalysisContext& context, SourceFile& source_file)
: m_database(database)
, m_context(context)
, m_stabs_to_ast_state(stabs_to_ast_state)
, m_source_file(source_file) {}
// Functions for processing individual symbols.
//
// In most cases these symbols will appear in the following order:
// PROC TEXT
// ... line numbers ... ($LM<N>)
// END TEXT
// LABEL TEXT FUN
// ... parameters ...
// ... blocks ... (... local variables ... LBRAC ... subblocks ... RBRAC)
// NIL NIL FUN
//
// For some compiler versions the symbols can appear in this order:
// LABEL TEXT FUN
// ... parameters ...
// first line number ($LM1)
// PROC TEXT
// ... line numbers ... ($LM<N>)
// END TEXT
// ... blocks ... (... local variables ... LBRAC ... subblocks ... RBRAC)
Result<void> stab_magic(const char* magic);
Result<void> source_file(const char* path, Address text_address);
Result<void> data_type(const ParsedSymbol& symbol);
Result<void> global_variable(
const char* mangled_name, Address address, const StabsType& type, bool is_static, GlobalStorageLocation location);
Result<void> sub_source_file(const char* name, Address text_address);
Result<void> procedure(
const char* mangled_name, Address address, const ProcedureDescriptor* procedure_descriptor, bool is_static);
Result<void> label(const char* label, Address address, s32 line_number);
Result<void> text_end(const char* name, s32 function_size);
Result<void> function(const char* mangled_name, const StabsType& return_type, Address address);
Result<void> function_end();
Result<void> parameter(
const char* name, const StabsType& type, bool is_stack, s32 value, bool is_by_reference);
Result<void> local_variable(
const char* name, const StabsType& type, u32 value, StabsSymbolDescriptor desc, SymbolClass sclass);
Result<void> lbrac(s32 begin_offset);
Result<void> rbrac(s32 end_offset);
Result<void> finish();
Result<void> create_function(const char* mangled_name, Address address);
protected:
enum AnalysisState {
NOT_IN_FUNCTION,
IN_FUNCTION_BEGINNING,
IN_FUNCTION_END
};
SymbolDatabase& m_database;
const AnalysisContext& m_context;
const StabsToAstState& m_stabs_to_ast_state;
AnalysisState m_state = NOT_IN_FUNCTION;
SourceFile& m_source_file;
std::vector<FunctionHandle> m_functions;
std::vector<GlobalVariableHandle> m_global_variables;
Function* m_current_function = nullptr;
std::vector<ParameterVariableHandle> m_current_parameter_variables;
std::vector<LocalVariableHandle> m_current_local_variables;
std::vector<std::vector<LocalVariableHandle>> m_blocks;
std::vector<LocalVariableHandle> m_pending_local_variables;
std::string m_next_relative_path;
};
std::optional<GlobalStorageLocation> symbol_class_to_global_variable_location(SymbolClass symbol_class);
};

668
3rdparty/ccc/src/ccc/mdebug_importer.cpp vendored Normal file
View File

@ -0,0 +1,668 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#include "mdebug_importer.h"
namespace ccc::mdebug {
static Result<void> resolve_type_names(
SymbolDatabase& database, const SymbolGroup& group, u32 importer_flags);
static Result<void> resolve_type_name(
ast::TypeName& type_name,
SymbolDatabase& database,
const SymbolGroup& group,
u32 importer_flags);
static void compute_size_bytes(ast::Node& node, SymbolDatabase& database);
static void detect_duplicate_functions(SymbolDatabase& database, const SymbolGroup& group);
static void detect_fake_functions(SymbolDatabase& database, const std::map<u32, const mdebug::Symbol*>& external_functions, const SymbolGroup& group);
static void destroy_optimized_out_functions(
SymbolDatabase& database, const SymbolGroup& group);
Result<void> import_symbol_table(
SymbolDatabase& database,
std::span<const u8> elf,
s32 section_offset,
const SymbolGroup& group,
u32 importer_flags,
const DemanglerFunctions& demangler,
const std::atomic_bool* interrupt)
{
SymbolTableReader reader;
Result<void> reader_result = reader.init(elf, section_offset);
CCC_RETURN_IF_ERROR(reader_result);
Result<std::vector<mdebug::Symbol>> external_symbols = reader.parse_external_symbols();
CCC_RETURN_IF_ERROR(external_symbols);
// The addresses of the global variables aren't present in the local symbol
// table, so here we extract them from the external table. In addition, for
// some games we need to cross reference the function symbols in the local
// symbol table with the entries in the external symbol table.
std::map<u32, const mdebug::Symbol*> external_functions;
std::map<std::string, const mdebug::Symbol*> external_globals;
for(const mdebug::Symbol& external : *external_symbols) {
if(external.symbol_type == mdebug::SymbolType::PROC) {
external_functions[external.value] = &external;
}
if(external.symbol_type == mdebug::SymbolType::GLOBAL
&& (external.symbol_class != mdebug::SymbolClass::UNDEFINED)) {
external_globals[external.string] = &external;
}
}
// Bundle together some unchanging state to pass to import_files.
AnalysisContext context;
context.reader = &reader;
context.external_functions = &external_functions;
context.external_globals = &external_globals;
context.group = group;
context.importer_flags = importer_flags;
context.demangler = demangler;
Result<void> result = import_files(database, context, interrupt);
CCC_RETURN_IF_ERROR(result);
return Result<void>();
}
Result<void> import_files(SymbolDatabase& database, const AnalysisContext& context, const std::atomic_bool* interrupt)
{
Result<s32> file_count = context.reader->file_count();
CCC_RETURN_IF_ERROR(file_count);
for(s32 i = 0; i < *file_count; i++) {
if(interrupt && *interrupt) {
return CCC_FAILURE("Operation interrupted by user.");
}
Result<mdebug::File> file = context.reader->parse_file(i);
CCC_RETURN_IF_ERROR(file);
Result<void> result = import_file(database, *file, context);
CCC_RETURN_IF_ERROR(result);
}
// The files field may be modified by further analysis passes, so we
// need to save this information here.
for(DataType& data_type : database.data_types) {
if(context.group.is_in_group(data_type) && data_type.files.size() == 1) {
data_type.only_defined_in_single_translation_unit = true;
}
}
// Lookup data types and store data type handles in type names.
Result<void> type_name_result = resolve_type_names(database, context.group, context.importer_flags);
CCC_RETURN_IF_ERROR(type_name_result);
// Compute the size in bytes of all the AST nodes.
database.for_each_symbol([&](ccc::Symbol& symbol) {
if(context.group.is_in_group(symbol) && symbol.type()) {
compute_size_bytes(*symbol.type(), database);
}
});
// Propagate the size information to the global variable symbols.
for(GlobalVariable& global_variable : database.global_variables) {
if(global_variable.type() && global_variable.type()->size_bytes > -1) {
global_variable.set_size((u32) global_variable.type()->size_bytes);
}
}
// Propagate the size information to the static local variable symbols.
for(LocalVariable& local_variable : database.local_variables) {
bool is_static_local = std::holds_alternative<GlobalStorage>(local_variable.storage);
if(is_static_local && local_variable.type() && local_variable.type()->size_bytes > -1) {
local_variable.set_size((u32) local_variable.type()->size_bytes);
}
}
// Some games (e.g. Jet X2O) have multiple function symbols across different
// translation units with the same name and address.
if(context.importer_flags & UNIQUE_FUNCTIONS) {
detect_duplicate_functions(database, context.group);
}
// If multiple functions appear at the same address, discard the addresses
// of all of them except the real one.
if(context.external_functions) {
detect_fake_functions(database, *context.external_functions, context.group);
}
// Remove functions with no address. If there are any such functions, this
// will invalidate all pointers to symbols.
if(context.importer_flags & NO_OPTIMIZED_OUT_FUNCTIONS) {
destroy_optimized_out_functions(database, context.group);
}
return Result<void>();
}
Result<void> import_file(SymbolDatabase& database, const mdebug::File& input, const AnalysisContext& context)
{
// Parse the stab strings into a data structure that's vaguely
// one-to-one with the text-based representation.
u32 importer_flags_for_this_file = context.importer_flags;
Result<std::vector<ParsedSymbol>> symbols = parse_symbols(input.symbols, importer_flags_for_this_file);
CCC_RETURN_IF_ERROR(symbols);
// In stabs, types can be referenced by their number from other stabs,
// so here we build a map of type numbers to the parsed types.
std::map<StabsTypeNumber, const StabsType*> stabs_types;
for(const ParsedSymbol& symbol : *symbols) {
if(symbol.type == ParsedSymbolType::NAME_COLON_TYPE) {
symbol.name_colon_type.type->enumerate_numbered_types(stabs_types);
}
}
Result<SourceFile*> source_file = database.source_files.create_symbol(
input.full_path, input.address, context.group.source, context.group.module_symbol);
CCC_RETURN_IF_ERROR(source_file);
(*source_file)->working_dir = input.working_dir;
(*source_file)->command_line_path = input.command_line_path;
// Sometimes the INFO symbols contain information about what toolchain
// version was used for building the executable.
for(const mdebug::Symbol& symbol : input.symbols) {
if(symbol.symbol_class == mdebug::SymbolClass::INFO && strcmp(symbol.string, "@stabs") != 0) {
(*source_file)->toolchain_version_info.emplace(symbol.string);
}
}
StabsToAstState stabs_to_ast_state;
stabs_to_ast_state.file_handle = (*source_file)->handle().value;
stabs_to_ast_state.stabs_types = &stabs_types;
stabs_to_ast_state.importer_flags = importer_flags_for_this_file;
stabs_to_ast_state.demangler = context.demangler;
// Convert the parsed stabs symbols to a more standard C AST.
LocalSymbolTableAnalyser analyser(database, stabs_to_ast_state, context, **source_file);
for(const ParsedSymbol& symbol : *symbols) {
if(symbol.duplicate) {
continue;
}
switch(symbol.type) {
case ParsedSymbolType::NAME_COLON_TYPE: {
switch(symbol.name_colon_type.descriptor) {
case StabsSymbolDescriptor::LOCAL_FUNCTION:
case StabsSymbolDescriptor::GLOBAL_FUNCTION: {
const char* name = symbol.name_colon_type.name.c_str();
const StabsType& type = *symbol.name_colon_type.type.get();
Result<void> result = analyser.function(name, type, symbol.raw->value);
CCC_RETURN_IF_ERROR(result);
break;
}
case StabsSymbolDescriptor::REFERENCE_PARAMETER_A:
case StabsSymbolDescriptor::REGISTER_PARAMETER:
case StabsSymbolDescriptor::VALUE_PARAMETER:
case StabsSymbolDescriptor::REFERENCE_PARAMETER_V: {
const char* name = symbol.name_colon_type.name.c_str();
const StabsType& type = *symbol.name_colon_type.type.get();
bool is_stack_variable = symbol.name_colon_type.descriptor == StabsSymbolDescriptor::VALUE_PARAMETER;
bool is_by_reference = symbol.name_colon_type.descriptor == StabsSymbolDescriptor::REFERENCE_PARAMETER_A
|| symbol.name_colon_type.descriptor == StabsSymbolDescriptor::REFERENCE_PARAMETER_V;
Result<void> result = analyser.parameter(name, type, is_stack_variable, symbol.raw->value, is_by_reference);
CCC_RETURN_IF_ERROR(result);
break;
}
case StabsSymbolDescriptor::REGISTER_VARIABLE:
case StabsSymbolDescriptor::LOCAL_VARIABLE:
case StabsSymbolDescriptor::STATIC_LOCAL_VARIABLE: {
const char* name = symbol.name_colon_type.name.c_str();
const StabsType& type = *symbol.name_colon_type.type.get();
Result<void> result = analyser.local_variable(
name, type, symbol.raw->value, symbol.name_colon_type.descriptor, symbol.raw->symbol_class);
CCC_RETURN_IF_ERROR(result);
break;
}
case StabsSymbolDescriptor::GLOBAL_VARIABLE:
case StabsSymbolDescriptor::STATIC_GLOBAL_VARIABLE: {
const char* name = symbol.name_colon_type.name.c_str();
u32 address = -1;
std::optional<GlobalStorageLocation> location =
symbol_class_to_global_variable_location(symbol.raw->symbol_class);
if(symbol.name_colon_type.descriptor == StabsSymbolDescriptor::GLOBAL_VARIABLE) {
// The address for non-static global variables is
// only stored in the external symbol table (and
// the ELF symbol table), so we pull that
// information in here.
if(context.external_globals) {
auto global_symbol = context.external_globals->find(symbol.name_colon_type.name);
if(global_symbol != context.external_globals->end()) {
address = (u32) global_symbol->second->value;
location = symbol_class_to_global_variable_location(global_symbol->second->symbol_class);
}
}
} else {
// And for static global variables it's just stored
// in the local symbol table.
address = (u32) symbol.raw->value;
}
CCC_CHECK(location.has_value(), "Invalid global variable location.")
const StabsType& type = *symbol.name_colon_type.type.get();
bool is_static = symbol.name_colon_type.descriptor == StabsSymbolDescriptor::STATIC_GLOBAL_VARIABLE;
Result<void> result = analyser.global_variable(name, address, type, is_static, *location);
CCC_RETURN_IF_ERROR(result);
break;
}
case StabsSymbolDescriptor::TYPE_NAME:
case StabsSymbolDescriptor::ENUM_STRUCT_OR_TYPE_TAG: {
Result<void> result = analyser.data_type(symbol);
CCC_RETURN_IF_ERROR(result);
break;
}
}
break;
}
case ParsedSymbolType::SOURCE_FILE: {
Result<void> result = analyser.source_file(symbol.raw->string, symbol.raw->value);
CCC_RETURN_IF_ERROR(result);
break;
}
case ParsedSymbolType::SUB_SOURCE_FILE: {
Result<void> result = analyser.sub_source_file(symbol.raw->string, symbol.raw->value);
CCC_RETURN_IF_ERROR(result);
break;
}
case ParsedSymbolType::LBRAC: {
Result<void> result = analyser.lbrac(symbol.raw->value);
CCC_RETURN_IF_ERROR(result);
break;
}
case ParsedSymbolType::RBRAC: {
Result<void> result = analyser.rbrac(symbol.raw->value);
CCC_RETURN_IF_ERROR(result);
break;
}
case ParsedSymbolType::FUNCTION_END: {
Result<void> result = analyser.function_end();
CCC_RETURN_IF_ERROR(result);
break;
}
case ParsedSymbolType::NON_STABS: {
if(symbol.raw->symbol_class == mdebug::SymbolClass::TEXT) {
if(symbol.raw->symbol_type == mdebug::SymbolType::PROC) {
Result<void> result = analyser.procedure(symbol.raw->string, symbol.raw->value, symbol.raw->procedure_descriptor, false);
CCC_RETURN_IF_ERROR(result);
} else if(symbol.raw->symbol_type == mdebug::SymbolType::STATICPROC) {
Result<void> result = analyser.procedure(symbol.raw->string, symbol.raw->value, symbol.raw->procedure_descriptor, true);
CCC_RETURN_IF_ERROR(result);
} else if(symbol.raw->symbol_type == mdebug::SymbolType::LABEL) {
Result<void> result = analyser.label(symbol.raw->string, symbol.raw->value, symbol.raw->index);
CCC_RETURN_IF_ERROR(result);
} else if(symbol.raw->symbol_type == mdebug::SymbolType::END) {
Result<void> result = analyser.text_end(symbol.raw->string, symbol.raw->value);
CCC_RETURN_IF_ERROR(result);
}
}
break;
}
}
}
Result<void> result = analyser.finish();
CCC_RETURN_IF_ERROR(result);
return Result<void>();
}
static Result<void> resolve_type_names(
SymbolDatabase& database, const SymbolGroup& group, u32 importer_flags)
{
Result<void> result;
database.for_each_symbol([&](ccc::Symbol& symbol) {
if(group.is_in_group(symbol) && symbol.type()) {
ast::for_each_node(*symbol.type(), ast::PREORDER_TRAVERSAL, [&](ast::Node& node) {
if(node.descriptor == ast::TYPE_NAME) {
Result<void> type_name_result = resolve_type_name(node.as<ast::TypeName>(), database, group, importer_flags);
if(!type_name_result.success()) {
result = std::move(type_name_result);
}
}
return ast::EXPLORE_CHILDREN;
});
}
});
return result;
}
static Result<void> resolve_type_name(
ast::TypeName& type_name,
SymbolDatabase& database,
const SymbolGroup& group,
u32 importer_flags)
{
ast::TypeName::UnresolvedStabs* unresolved_stabs = type_name.unresolved_stabs.get();
if(!unresolved_stabs) {
return Result<void>();
}
// Lookup the type by its STABS type number. This path ensures that the
// correct type is found even if multiple types have the same name.
if(unresolved_stabs->referenced_file_handle != (u32) -1 && unresolved_stabs->stabs_type_number.valid()) {
const SourceFile* source_file = database.source_files.symbol_from_handle(unresolved_stabs->referenced_file_handle);
CCC_ASSERT(source_file);
auto handle = source_file->stabs_type_number_to_handle.find(unresolved_stabs->stabs_type_number);
if(handle != source_file->stabs_type_number_to_handle.end()) {
type_name.data_type_handle = handle->second.value;
type_name.is_forward_declared = false;
type_name.unresolved_stabs.reset();
return Result<void>();
}
}
// Looking up the type by its STABS type number failed, so look for it by
// its name instead. This happens when a type is forward declared but not
// defined in a given translation unit.
if(!unresolved_stabs->type_name.empty()) {
for(auto& name_handle : database.data_types.handles_from_name(unresolved_stabs->type_name)) {
DataType* data_type = database.data_types.symbol_from_handle(name_handle.second);
if(data_type && group.is_in_group(*data_type)) {
type_name.data_type_handle = name_handle.second.value;
type_name.is_forward_declared = true;
type_name.unresolved_stabs.reset();
return Result<void>();
}
}
}
// If this branch is taken it means the type name was probably from an
// automatically generated member function of a nested struct trying to
// reference the struct (for the this parameter). We shouldn't create a
// forward declared type in this case.
if(type_name.source == ast::TypeNameSource::UNNAMED_THIS) {
return Result<void>();
}
// Type lookup failed. This happens when a type is forward declared in a
// translation unit with symbols but is not defined in one. We haven't
// already created a forward declared type, so we create one now.
std::unique_ptr<ast::Node> forward_declared_node;
if(unresolved_stabs->type.has_value()) {
switch(*unresolved_stabs->type) {
case ast::ForwardDeclaredType::STRUCT: {
std::unique_ptr<ast::StructOrUnion> node = std::make_unique<ast::StructOrUnion>();
node->is_struct = true;
forward_declared_node = std::move(node);
break;
}
case ast::ForwardDeclaredType::UNION: {
std::unique_ptr<ast::StructOrUnion> node = std::make_unique<ast::StructOrUnion>();
node->is_struct = false;
forward_declared_node = std::move(node);
break;
}
case ast::ForwardDeclaredType::ENUM: {
std::unique_ptr<ast::Enum> node = std::make_unique<ast::Enum>();
forward_declared_node = std::move(node);
break;
}
}
}
if(forward_declared_node) {
Result<DataType*> forward_declared_type = database.data_types.create_symbol(
unresolved_stabs->type_name, group.source, group.module_symbol);
CCC_RETURN_IF_ERROR(forward_declared_type);
(*forward_declared_type)->set_type(std::move(forward_declared_node));
(*forward_declared_type)->not_defined_in_any_translation_unit = true;
type_name.data_type_handle = (*forward_declared_type)->handle().value;
type_name.is_forward_declared = true;
type_name.unresolved_stabs.reset();
return Result<void>();
}
const char* error_message = "Unresolved %s type name '%s' with STABS type number (%d,%d).";
if(importer_flags & STRICT_PARSING) {
return CCC_FAILURE(error_message,
ast::type_name_source_to_string(type_name.source),
type_name.unresolved_stabs->type_name.c_str(),
type_name.unresolved_stabs->stabs_type_number.file,
type_name.unresolved_stabs->stabs_type_number.type);
} else {
CCC_WARN(error_message,
ast::type_name_source_to_string(type_name.source),
type_name.unresolved_stabs->type_name.c_str(),
type_name.unresolved_stabs->stabs_type_number.file,
type_name.unresolved_stabs->stabs_type_number.type);
}
return Result<void>();
}
static void compute_size_bytes(ast::Node& node, SymbolDatabase& database)
{
for_each_node(node, ast::POSTORDER_TRAVERSAL, [&](ast::Node& node) {
// Skip nodes that have already been processed.
if(node.size_bytes > -1 || node.cannot_compute_size) {
return ast::EXPLORE_CHILDREN;
}
// Can't compute size recursively.
node.cannot_compute_size = true;
switch(node.descriptor) {
case ast::ARRAY: {
ast::Array& array = node.as<ast::Array>();
if(array.element_type->size_bytes > -1) {
array.size_bytes = array.element_type->size_bytes * array.element_count;
}
break;
}
case ast::BITFIELD: {
break;
}
case ast::BUILTIN: {
ast::BuiltIn& built_in = node.as<ast::BuiltIn>();
built_in.size_bytes = builtin_class_size(built_in.bclass);
break;
}
case ast::FUNCTION: {
break;
}
case ast::ENUM: {
node.size_bytes = 4;
break;
}
case ast::ERROR_NODE: {
break;
}
case ast::STRUCT_OR_UNION: {
node.size_bytes = node.size_bits / 8;
break;
}
case ast::POINTER_OR_REFERENCE: {
node.size_bytes = 4;
break;
}
case ast::POINTER_TO_DATA_MEMBER: {
break;
}
case ast::TYPE_NAME: {
ast::TypeName& type_name = node.as<ast::TypeName>();
DataType* resolved_type = database.data_types.symbol_from_handle(type_name.data_type_handle_unless_forward_declared());
if(resolved_type) {
ast::Node* resolved_node = resolved_type->type();
CCC_ASSERT(resolved_node);
if(resolved_node->size_bytes < 0 && !resolved_node->cannot_compute_size) {
compute_size_bytes(*resolved_node, database);
}
type_name.size_bytes = resolved_node->size_bytes;
}
break;
}
}
if(node.size_bytes > -1) {
node.cannot_compute_size = false;
}
return ast::EXPLORE_CHILDREN;
});
}
static void detect_duplicate_functions(SymbolDatabase& database, const SymbolGroup& group)
{
std::vector<FunctionHandle> duplicate_functions;
for(Function& test_function : database.functions) {
if(!test_function.address().valid() && !group.is_in_group(test_function)) {
continue;
}
// Find cases where there are two or more functions at the same address.
auto functions_with_same_address = database.functions.handles_from_starting_address(test_function.address());
if(functions_with_same_address.begin() == functions_with_same_address.end()) {
continue;
}
if(++functions_with_same_address.begin() == functions_with_same_address.end()) {
continue;
}
// Try to figure out the address of the translation unit which the
// version of the function that actually ended up in the linked binary
// comes from. We can't just check which source file the symbol comes
// from because it may be present in multiple.
u32 source_file_address = UINT32_MAX;
for(SourceFile& source_file : database.source_files) {
if(source_file.address() < test_function.address()) {
source_file_address = std::min(source_file.address().value, source_file_address);
}
}
if(source_file_address == UINT32_MAX) {
continue;
}
// Remove the addresses from all the matching symbols from other
// translation units.
FunctionHandle best_handle;
u32 best_offset = UINT32_MAX;
for(const auto& [address, handle] : functions_with_same_address) {
ccc::Function* function = database.functions.symbol_from_handle(handle);
if(!function || !group.is_in_group(*function) || function->mangled_name() != test_function.mangled_name()) {
continue;
}
if(address - source_file_address < best_offset) {
if(best_handle.valid()) {
duplicate_functions.emplace_back(best_handle);
}
best_handle = function->handle();
best_offset = address - source_file_address;
} else {
duplicate_functions.emplace_back(function->handle());
}
}
for(FunctionHandle duplicate_function : duplicate_functions) {
database.functions.move_symbol(duplicate_function, Address());
}
duplicate_functions.clear();
}
}
static void detect_fake_functions(SymbolDatabase& database, const std::map<u32, const mdebug::Symbol*>& external_functions, const SymbolGroup& group)
{
// Find cases where multiple fake function symbols were emitted for a given
// address and cross-reference with the external symbol table to try and
// find which one is the real one.
s32 fake_function_count = 0;
for(Function& function : database.functions) {
if(!function.address().valid() || !group.is_in_group(function)) {
continue;
}
// Find cases where there are two or more functions at the same address.
auto functions_with_same_address = database.functions.handles_from_starting_address(function.address());
if(functions_with_same_address.begin() == functions_with_same_address.end()) {
continue;
}
if(++functions_with_same_address.begin() == functions_with_same_address.end()) {
continue;
}
auto external_function = external_functions.find(function.address().value);
if(external_function == external_functions.end() || strcmp(function.mangled_name().c_str(), external_function->second->string) != 0) {
database.functions.move_symbol(function.handle(), Address());
if(fake_function_count < 10) {
CCC_WARN("Discarding address of function symbol '%s' as it is probably incorrect.", function.mangled_name().c_str());
} else if(fake_function_count == 10) {
CCC_WARN("Discarding more addresses of function symbols.");
}
fake_function_count++;
}
}
}
static void destroy_optimized_out_functions(
SymbolDatabase& database, const SymbolGroup& group)
{
bool marked = false;
for(Function& function : database.functions) {
if(group.is_in_group(function) && !function.address().valid()) {
function.mark_for_destruction();
marked = true;
}
}
if(marked) {
// This will invalidate all pointers to symbols in the database.
database.destroy_marked_symbols();
}
}
void fill_in_pointers_to_member_function_definitions(SymbolDatabase& database)
{
// Fill in pointers from member function declaration to corresponding definitions.
for(Function& function : database.functions) {
const std::string& qualified_name = function.name();
std::string::size_type name_separator_pos = qualified_name.find_last_of("::");
if(name_separator_pos == std::string::npos || name_separator_pos < 2) {
continue;
}
std::string function_name = qualified_name.substr(name_separator_pos + 1);
// This won't work for some template types.
std::string::size_type type_separator_pos = qualified_name.find_last_of("::", name_separator_pos - 2);
std::string type_name;
if(type_separator_pos != std::string::npos) {
type_name = qualified_name.substr(type_separator_pos + 1, name_separator_pos - type_separator_pos - 2);
} else {
type_name = qualified_name.substr(0, name_separator_pos - 1);
}
for(const auto& name_handle : database.data_types.handles_from_name(type_name)) {
DataType* data_type = database.data_types.symbol_from_handle(name_handle.second);
if(!data_type || !data_type->type() || data_type->type()->descriptor != ast::STRUCT_OR_UNION) {
continue;
}
ast::StructOrUnion& struct_or_union = data_type->type()->as<ast::StructOrUnion>();
for(std::unique_ptr<ast::Node>& declaration : struct_or_union.member_functions) {
if(declaration->name == function_name) {
declaration->as<ast::Function>().definition_handle = function.handle().value;
function.is_member_function_ish = true;
break;
}
}
if(function.is_member_function_ish) {
break;
}
}
}
}
}

31
3rdparty/ccc/src/ccc/mdebug_importer.h vendored Normal file
View File

@ -0,0 +1,31 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#pragma once
#include <atomic>
#include "mdebug_analysis.h"
#include "mdebug_section.h"
#include "symbol_database.h"
namespace ccc::mdebug {
// Perform all the main analysis passes on the mdebug symbol table and convert
// it to a set of C++ ASTs.
Result<void> import_symbol_table(
SymbolDatabase& database,
std::span<const u8> elf,
s32 section_offset,
const SymbolGroup& group,
u32 importer_flags,
const DemanglerFunctions& demangler,
const std::atomic_bool* interrupt);
Result<void> import_files(SymbolDatabase& database, const AnalysisContext& context, const std::atomic_bool* interrupt);
Result<void> import_file(SymbolDatabase& database, const mdebug::File& input, const AnalysisContext& context);
// Try to add pointers from member function declarations to their definitions
// using a heuristic.
void fill_in_pointers_to_member_function_definitions(SymbolDatabase& database);
}

474
3rdparty/ccc/src/ccc/mdebug_section.cpp vendored Normal file
View File

@ -0,0 +1,474 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#include "mdebug_section.h"
namespace ccc::mdebug {
// MIPS debug symbol table headers.
// See include/coff/sym.h from GNU binutils for more information.
CCC_PACKED_STRUCT(SymbolicHeader,
/* 0x00 */ s16 magic;
/* 0x02 */ s16 version_stamp;
/* 0x04 */ s32 line_number_count;
/* 0x08 */ s32 line_numbers_size_bytes;
/* 0x0c */ s32 line_numbers_offset;
/* 0x10 */ s32 dense_numbers_count;
/* 0x14 */ s32 dense_numbers_offset;
/* 0x18 */ s32 procedure_descriptor_count;
/* 0x1c */ s32 procedure_descriptors_offset;
/* 0x20 */ s32 local_symbol_count;
/* 0x24 */ s32 local_symbols_offset;
/* 0x28 */ s32 optimization_symbols_count;
/* 0x2c */ s32 optimization_symbols_offset;
/* 0x30 */ s32 auxiliary_symbol_count;
/* 0x34 */ s32 auxiliary_symbols_offset;
/* 0x38 */ s32 local_strings_size_bytes;
/* 0x3c */ s32 local_strings_offset;
/* 0x40 */ s32 external_strings_size_bytes;
/* 0x44 */ s32 external_strings_offset;
/* 0x48 */ s32 file_descriptor_count;
/* 0x4c */ s32 file_descriptors_offset;
/* 0x50 */ s32 relative_file_descriptor_count;
/* 0x54 */ s32 relative_file_descriptors_offset;
/* 0x58 */ s32 external_symbols_count;
/* 0x5c */ s32 external_symbols_offset;
)
CCC_PACKED_STRUCT(FileDescriptor,
/* 0x00 */ u32 address;
/* 0x04 */ s32 file_path_string_offset;
/* 0x08 */ s32 strings_offset;
/* 0x0c */ s32 cb_ss;
/* 0x10 */ s32 isym_base;
/* 0x14 */ s32 symbol_count;
/* 0x18 */ s32 line_number_entry_index_base;
/* 0x1c */ s32 cline;
/* 0x20 */ s32 optimization_entry_index_base;
/* 0x24 */ s32 copt;
/* 0x28 */ u16 ipd_first;
/* 0x2a */ u16 procedure_descriptor_count;
/* 0x2c */ s32 iaux_base;
/* 0x30 */ s32 caux;
/* 0x34 */ s32 rfd_base;
/* 0x38 */ s32 crfd;
/* 0x3c */ u32 lang : 5;
/* 0x3c */ u32 f_merge : 1;
/* 0x3c */ u32 f_readin : 1;
/* 0x3c */ u32 f_big_endian : 1;
/* 0x3c */ u32 reserved_1 : 22;
/* 0x40 */ s32 line_number_offset;
/* 0x44 */ s32 cb_line;
)
static_assert(sizeof(FileDescriptor) == 0x48);
CCC_PACKED_STRUCT(SymbolHeader,
/* 0x0 */ u32 iss;
/* 0x4 */ u32 value;
/* 0x8 */ u32 st : 6;
/* 0x8 */ u32 sc : 5;
/* 0x8 */ u32 reserved : 1;
/* 0x8 */ u32 index : 20;
)
static_assert(sizeof(SymbolHeader) == 0xc);
CCC_PACKED_STRUCT(ExternalSymbolHeader,
/* 0x0 */ u16 flags;
/* 0x2 */ s16 ifd;
/* 0x4 */ SymbolHeader symbol;
)
static_assert(sizeof(ExternalSymbolHeader) == 0x10);
static void print_symbol(FILE* out, const Symbol& symbol);
static void print_procedure_descriptor(FILE* out, const ProcedureDescriptor& procedure_descriptor);
static Result<s32> get_corruption_fixing_fudge_offset(s32 section_offset, const SymbolicHeader& hdrr);
static Result<Symbol> get_symbol(const SymbolHeader& header, std::span<const u8> elf, s32 strings_offset);
Result<void> SymbolTableReader::init(std::span<const u8> elf, s32 section_offset)
{
m_elf = elf;
m_section_offset = section_offset;
m_hdrr = get_packed<SymbolicHeader>(m_elf, m_section_offset);
CCC_CHECK(m_hdrr != nullptr, "MIPS debug section header out of bounds.");
CCC_CHECK(m_hdrr->magic == 0x7009, "Invalid symbolic header.");
Result<s32> fudge_offset = get_corruption_fixing_fudge_offset(m_section_offset, *m_hdrr);
CCC_RETURN_IF_ERROR(fudge_offset);
m_fudge_offset = *fudge_offset;
m_ready = true;
return Result<void>();
}
s32 SymbolTableReader::file_count() const
{
CCC_ASSERT(m_ready);
return m_hdrr->file_descriptor_count;
}
Result<File> SymbolTableReader::parse_file(s32 index) const
{
CCC_ASSERT(m_ready);
File file;
u64 fd_offset = m_hdrr->file_descriptors_offset + index * sizeof(FileDescriptor);
const FileDescriptor* fd_header = get_packed<FileDescriptor>(m_elf, fd_offset + m_fudge_offset);
CCC_CHECK(fd_header != nullptr, "MIPS debug file descriptor out of bounds.");
CCC_CHECK(fd_header->f_big_endian == 0, "Not little endian or bad file descriptor table.");
file.address = fd_header->address;
s32 rel_raw_path_offset = fd_header->strings_offset + fd_header->file_path_string_offset;
s32 raw_path_offset = m_hdrr->local_strings_offset + rel_raw_path_offset + m_fudge_offset;
const char* command_line_path = get_string(m_elf, raw_path_offset);
if(command_line_path) {
file.command_line_path = command_line_path;
}
// Parse local symbols.
for(s64 j = 0; j < fd_header->symbol_count; j++) {
u64 rel_symbol_offset = (fd_header->isym_base + j) * sizeof(SymbolHeader);
u64 symbol_offset = m_hdrr->local_symbols_offset + rel_symbol_offset + m_fudge_offset;
const SymbolHeader* symbol_header = get_packed<SymbolHeader>(m_elf, symbol_offset);
CCC_CHECK(symbol_header != nullptr, "Symbol header out of bounds.");
s32 strings_offset = m_hdrr->local_strings_offset + fd_header->strings_offset + m_fudge_offset;
Result<Symbol> sym = get_symbol(*symbol_header, m_elf, strings_offset);
CCC_RETURN_IF_ERROR(sym);
bool string_offset_equal = (s32) symbol_header->iss == fd_header->file_path_string_offset;
if(file.working_dir.empty() && string_offset_equal && sym->is_stabs() && sym->code() == N_SO && file.symbols.size() > 2) {
const Symbol& working_dir = file.symbols.back();
if(working_dir.is_stabs() && working_dir.code() == N_SO) {
file.working_dir = working_dir.string;
}
}
file.symbols.emplace_back(std::move(*sym));
}
// Parse procedure descriptors.
for(s64 i = 0; i < fd_header->procedure_descriptor_count; i++) {
u64 rel_procedure_offset = (fd_header->ipd_first + i) * sizeof(ProcedureDescriptor);
u64 procedure_offset = m_hdrr->procedure_descriptors_offset + rel_procedure_offset + m_fudge_offset;
const ProcedureDescriptor* procedure_descriptor = get_packed<ProcedureDescriptor>(m_elf, procedure_offset);
CCC_CHECK(procedure_descriptor != nullptr, "Procedure descriptor out of bounds.");
CCC_CHECK(procedure_descriptor->symbol_index < file.symbols.size(), "Symbol index out of bounds.");
file.symbols[procedure_descriptor->symbol_index].procedure_descriptor = procedure_descriptor;
}
file.full_path = merge_paths(file.working_dir, file.command_line_path);
return file;
}
Result<std::vector<Symbol>> SymbolTableReader::parse_external_symbols() const
{
CCC_ASSERT(m_ready);
std::vector<Symbol> external_symbols;
for(s64 i = 0; i < m_hdrr->external_symbols_count; i++) {
u64 sym_offset = m_hdrr->external_symbols_offset + i * sizeof(ExternalSymbolHeader);
const ExternalSymbolHeader* external_header = get_packed<ExternalSymbolHeader>(m_elf, sym_offset + m_fudge_offset);
CCC_CHECK(external_header != nullptr, "External header out of bounds.");
Result<Symbol> sym = get_symbol(external_header->symbol, m_elf, m_hdrr->external_strings_offset + m_fudge_offset);
CCC_RETURN_IF_ERROR(sym);
external_symbols.emplace_back(std::move(*sym));
}
return external_symbols;
}
void SymbolTableReader::print_header(FILE* dest) const
{
CCC_ASSERT(m_ready);
fprintf(dest, "Symbolic Header, magic = %hx, vstamp = %hx:\n",
(u16) m_hdrr->magic,
(u16) m_hdrr->version_stamp);
fprintf(dest, "\n");
fprintf(dest, " Offset Size (Bytes) Count\n");
fprintf(dest, " ------ ------------ -----\n");
fprintf(dest, " Line Numbers 0x%-8x " "0x%-8x " "%-8d\n",
(u32) m_hdrr->line_numbers_offset,
(u32) m_hdrr->line_numbers_size_bytes,
m_hdrr->line_number_count);
fprintf(dest, " Dense Numbers 0x%-8x " "0x%-8x " "%-8d\n",
(u32) m_hdrr->dense_numbers_offset,
(u32) m_hdrr->dense_numbers_count * 8,
m_hdrr->dense_numbers_count);
fprintf(dest, " Procedure Descriptors 0x%-8x " "0x%-8x " "%-8d\n",
(u32) m_hdrr->procedure_descriptors_offset,
(u32) m_hdrr->procedure_descriptor_count * (u32) sizeof(ProcedureDescriptor),
m_hdrr->procedure_descriptor_count);
fprintf(dest, " Local Symbols 0x%-8x " "0x%-8x " "%-8d\n",
(u32) m_hdrr->local_symbols_offset,
(u32) m_hdrr->local_symbol_count * (u32) sizeof(SymbolHeader),
m_hdrr->local_symbol_count);
fprintf(dest, " Optimization Symbols 0x%-8x " "- " "%-8d\n",
(u32) m_hdrr->optimization_symbols_offset,
m_hdrr->optimization_symbols_count);
fprintf(dest, " Auxiliary Symbols 0x%-8x " "0x%-8x " "%-8d\n",
(u32) m_hdrr->auxiliary_symbols_offset,
(u32) m_hdrr->auxiliary_symbol_count * 4,
m_hdrr->auxiliary_symbol_count);
fprintf(dest, " Local Strings 0x%-8x " "0x%-8x " "-\n",
(u32) m_hdrr->local_strings_offset,
(u32) m_hdrr->local_strings_size_bytes);
fprintf(dest, " External Strings 0x%-8x " "0x%-8x " "-\n",
(u32) m_hdrr->external_strings_offset,
(u32) m_hdrr->external_strings_size_bytes);
fprintf(dest, " File Descriptors 0x%-8x " "0x%-8x " "%-8d\n",
(u32) m_hdrr->file_descriptors_offset,
(u32) m_hdrr->file_descriptor_count * (u32) sizeof(FileDescriptor),
m_hdrr->file_descriptor_count);
fprintf(dest, " Relative File Descriptors 0x%-8x " "0x%-8x " "%-8d\n",
(u32) m_hdrr->relative_file_descriptors_offset,
(u32) m_hdrr->relative_file_descriptor_count * 4,
m_hdrr->relative_file_descriptor_count);
fprintf(dest, " External Symbols 0x%-8x " "0x%-8x " "%-8d\n",
(u32) m_hdrr->external_symbols_offset,
(u32) m_hdrr->external_symbols_count * 16,
m_hdrr->external_symbols_count);
}
Result<void> SymbolTableReader::print_symbols(FILE* out, bool print_locals, bool print_procedure_descriptors, bool print_externals) const
{
if(print_locals || print_procedure_descriptors) {
s32 count = file_count();
for(s32 i = 0; i < count; i++) {
Result<File> file = parse_file(i);
CCC_RETURN_IF_ERROR(file);
fprintf(out, "FILE %s:\n", file->command_line_path.c_str());
for(const Symbol& symbol : file->symbols) {
if(print_locals || symbol.procedure_descriptor) {
print_symbol(out, symbol);
}
if(print_procedure_descriptors && symbol.procedure_descriptor) {
print_procedure_descriptor(out, *symbol.procedure_descriptor);
}
}
}
}
if(print_externals) {
fprintf(out, "EXTERNAL SYMBOLS:\n");
Result<std::vector<Symbol>> external_symbols = parse_external_symbols();
CCC_RETURN_IF_ERROR(external_symbols);
for(const Symbol& symbol : *external_symbols) {
print_symbol(out, symbol);
}
}
return Result<void>();
}
static void print_symbol(FILE* out, const Symbol& symbol)
{
fprintf(out, " %8x ", symbol.value);
const char* symbol_type_str = symbol_type(symbol.symbol_type);
if(symbol_type_str) {
fprintf(out, "%-11s ", symbol_type_str);
} else {
fprintf(out, "ST(%7u) ", (u32) symbol.symbol_type);
}
const char* symbol_class_str = symbol_class(symbol.symbol_class);
if(symbol_class_str) {
fprintf(out, "%-4s ", symbol_class_str);
} else if ((u32) symbol.symbol_class == 0) {
fprintf(out, " ");
} else {
fprintf(out, "SC(%4u) ", (u32) symbol.symbol_class);
}
if(symbol.is_stabs()) {
fprintf(out, "%-8s ", stabs_code_to_string(symbol.code()));
} else {
fprintf(out, "SI(%4u) ", symbol.index);
}
fprintf(out, "%s\n", symbol.string);
}
static void print_procedure_descriptor(FILE* out, const ProcedureDescriptor& procedure_descriptor)
{
fprintf(out, " Address 0x%08x\n", procedure_descriptor.address);
fprintf(out, " Symbol Index %d\n", procedure_descriptor.symbol_index);
fprintf(out, " Line Number Entry Index %d\n", procedure_descriptor.line_number_entry_index);
fprintf(out, " Saved Register Mask 0x%08x\n", procedure_descriptor.saved_register_mask);
fprintf(out, " Saved Register Offset %d\n", procedure_descriptor.saved_register_offset);
fprintf(out, " Optimization Entry Index %d\n", procedure_descriptor.optimization_entry_index);
fprintf(out, " Saved Float Register Mask 0x%08x\n", procedure_descriptor.saved_float_register_mask);
fprintf(out, " Saved Float Register Offset %d\n", procedure_descriptor.saved_float_register_offset);
fprintf(out, " Frame Size %d\n", procedure_descriptor.frame_size);
fprintf(out, " Frame Pointer Register %hd\n", procedure_descriptor.frame_pointer_register);
fprintf(out, " Return PC Register %hd\n", procedure_descriptor.return_pc_register);
fprintf(out, " Line Number Low %d\n", procedure_descriptor.line_number_low);
fprintf(out, " Line Number High %d\n", procedure_descriptor.line_number_high);
fprintf(out, " Line Number Offset %d\n", procedure_descriptor.line_number_offset);
}
static Result<s32> get_corruption_fixing_fudge_offset(s32 section_offset, const SymbolicHeader& hdrr)
{
// GCC will always put the first part of the symbol table right after the
// header, so if the header says it's somewhere else we know the section has
// probably been moved without updating its contents.
s32 right_after_header = INT32_MAX;
if(hdrr.line_numbers_offset > 0) right_after_header = std::min(hdrr.line_numbers_offset, right_after_header);
if(hdrr.dense_numbers_offset > 0) right_after_header = std::min(hdrr.dense_numbers_offset, right_after_header);
if(hdrr.procedure_descriptors_offset > 0) right_after_header = std::min(hdrr.procedure_descriptors_offset, right_after_header);
if(hdrr.local_symbols_offset > 0) right_after_header = std::min(hdrr.local_symbols_offset, right_after_header);
if(hdrr.optimization_symbols_offset > 0) right_after_header = std::min(hdrr.optimization_symbols_offset, right_after_header);
if(hdrr.auxiliary_symbols_offset > 0) right_after_header = std::min(hdrr.auxiliary_symbols_offset, right_after_header);
if(hdrr.local_strings_offset > 0) right_after_header = std::min(hdrr.local_strings_offset, right_after_header);
if(hdrr.external_strings_offset > 0) right_after_header = std::min(hdrr.external_strings_offset, right_after_header);
if(hdrr.file_descriptors_offset > 0) right_after_header = std::min(hdrr.file_descriptors_offset, right_after_header);
if(hdrr.relative_file_descriptors_offset > 0) right_after_header = std::min(hdrr.relative_file_descriptors_offset, right_after_header);
if(hdrr.external_symbols_offset > 0) right_after_header = std::min(hdrr.external_symbols_offset, right_after_header);
CCC_CHECK(right_after_header >= 0 && right_after_header < INT32_MAX, "Invalid symbolic header.");
// Figure out how much we need to adjust all the file offsets by.
s32 fudge_offset = section_offset - (right_after_header - sizeof(SymbolicHeader));
if(fudge_offset != 0) {
CCC_WARN("The .mdebug section was moved without updating its contents. Adjusting file offsets by %d bytes.", fudge_offset);
}
return fudge_offset;
}
static Result<Symbol> get_symbol(const SymbolHeader& header, std::span<const u8> elf, s32 strings_offset)
{
Symbol symbol;
const char* string = get_string(elf, strings_offset + header.iss);
CCC_CHECK(string, "Symbol has invalid string.");
symbol.string = string;
symbol.value = header.value;
symbol.symbol_type = (SymbolType) header.st;
symbol.symbol_class = (SymbolClass) header.sc;
symbol.index = header.index;
if(symbol.is_stabs()) {
CCC_CHECK(stabs_code_to_string(symbol.code()) != nullptr, "Bad stabs symbol code '%x'.", symbol.code());
}
return symbol;
}
const char* symbol_type(SymbolType type)
{
switch(type) {
case SymbolType::NIL: return "NIL";
case SymbolType::GLOBAL: return "GLOBAL";
case SymbolType::STATIC: return "STATIC";
case SymbolType::PARAM: return "PARAM";
case SymbolType::LOCAL: return "LOCAL";
case SymbolType::LABEL: return "LABEL";
case SymbolType::PROC: return "PROC";
case SymbolType::BLOCK: return "BLOCK";
case SymbolType::END: return "END";
case SymbolType::MEMBER: return "MEMBER";
case SymbolType::TYPEDEF: return "TYPEDEF";
case SymbolType::FILE_SYMBOL: return "FILE";
case SymbolType::STATICPROC: return "STATICPROC";
case SymbolType::CONSTANT: return "CONSTANT";
}
return nullptr;
}
const char* symbol_class(SymbolClass symbol_class)
{
switch(symbol_class) {
case SymbolClass::NIL: return "NIL";
case SymbolClass::TEXT: return "TEXT";
case SymbolClass::DATA: return "DATA";
case SymbolClass::BSS: return "BSS";
case SymbolClass::REGISTER: return "REGISTER";
case SymbolClass::ABS: return "ABS";
case SymbolClass::UNDEFINED: return "UNDEFINED";
case SymbolClass::LOCAL: return "LOCAL";
case SymbolClass::BITS: return "BITS";
case SymbolClass::DBX: return "DBX";
case SymbolClass::REG_IMAGE: return "REG_IMAGE";
case SymbolClass::INFO: return "INFO";
case SymbolClass::USER_STRUCT: return "USER_STRUCT";
case SymbolClass::SDATA: return "SDATA";
case SymbolClass::SBSS: return "SBSS";
case SymbolClass::RDATA: return "RDATA";
case SymbolClass::VAR: return "VAR";
case SymbolClass::COMMON: return "COMMON";
case SymbolClass::SCOMMON: return "SCOMMON";
case SymbolClass::VAR_REGISTER: return "VAR_REGISTER";
case SymbolClass::VARIANT: return "VARIANT";
case SymbolClass::SUNDEFINED: return "SUNDEFINED";
case SymbolClass::INIT: return "INIT";
case SymbolClass::BASED_VAR: return "BASED_VAR";
case SymbolClass::XDATA: return "XDATA";
case SymbolClass::PDATA: return "PDATA";
case SymbolClass::FINI: return "FINI";
case SymbolClass::NONGP: return "NONGP";
}
return nullptr;
}
const char* stabs_code_to_string(StabsCode code)
{
switch(code) {
case STAB: return "STAB";
case N_GSYM: return "GSYM";
case N_FNAME: return "FNAME";
case N_FUN: return "FUN";
case N_STSYM: return "STSYM";
case N_LCSYM: return "LCSYM";
case N_MAIN: return "MAIN";
case N_PC: return "PC";
case N_NSYMS: return "NSYMS";
case N_NOMAP: return "NOMAP";
case N_OBJ: return "OBJ";
case N_OPT: return "OPT";
case N_RSYM: return "RSYM";
case N_M2C: return "M2C";
case N_SLINE: return "SLINE";
case N_DSLINE: return "DSLINE";
case N_BSLINE: return "BSLINE";
case N_EFD: return "EFD";
case N_EHDECL: return "EHDECL";
case N_CATCH: return "CATCH";
case N_SSYM: return "SSYM";
case N_SO: return "SO";
case N_LSYM: return "LSYM";
case N_BINCL: return "BINCL";
case N_SOL: return "SOL";
case N_PSYM: return "PSYM";
case N_EINCL: return "EINCL";
case N_ENTRY: return "ENTRY";
case N_LBRAC: return "LBRAC";
case N_EXCL: return "EXCL";
case N_SCOPE: return "SCOPE";
case N_RBRAC: return "RBRAC";
case N_BCOMM: return "BCOMM";
case N_ECOMM: return "ECOMM";
case N_ECOML: return "ECOML";
case N_NBTEXT: return "NBTEXT";
case N_NBDATA: return "NBDATA";
case N_NBBSS: return "NBBSS";
case N_NBSTS: return "NBSTS";
case N_NBLCS: return "NBLCS";
case N_LENG: return "LENG";
}
return nullptr;
}
}

176
3rdparty/ccc/src/ccc/mdebug_section.h vendored Normal file
View File

@ -0,0 +1,176 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#pragma once
#include "util.h"
namespace ccc::mdebug {
struct SymbolicHeader;
enum class SymbolType : u32 {
NIL = 0,
GLOBAL = 1,
STATIC = 2,
PARAM = 3,
LOCAL = 4,
LABEL = 5,
PROC = 6,
BLOCK = 7,
END = 8,
MEMBER = 9,
TYPEDEF = 10,
FILE_SYMBOL = 11,
STATICPROC = 14,
CONSTANT = 15
};
enum class SymbolClass : u32 {
NIL = 0,
TEXT = 1,
DATA = 2,
BSS = 3,
REGISTER = 4,
ABS = 5,
UNDEFINED = 6,
LOCAL = 7,
BITS = 8,
DBX = 9,
REG_IMAGE = 10,
INFO = 11,
USER_STRUCT = 12,
SDATA = 13,
SBSS = 14,
RDATA = 15,
VAR = 16,
COMMON = 17,
SCOMMON = 18,
VAR_REGISTER = 19,
VARIANT = 20,
SUNDEFINED = 21,
INIT = 22,
BASED_VAR = 23,
XDATA = 24,
PDATA = 25,
FINI = 26,
NONGP = 27
};
// See stab.def from gcc for documentation on what all these are.
enum StabsCode {
STAB = 0x00,
N_GSYM = 0x20,
N_FNAME = 0x22,
N_FUN = 0x24,
N_STSYM = 0x26,
N_LCSYM = 0x28,
N_MAIN = 0x2a,
N_PC = 0x30,
N_NSYMS = 0x32,
N_NOMAP = 0x34,
N_OBJ = 0x38,
N_OPT = 0x3c,
N_RSYM = 0x40,
N_M2C = 0x42,
N_SLINE = 0x44,
N_DSLINE = 0x46,
N_BSLINE = 0x48,
N_EFD = 0x4a,
N_EHDECL = 0x50,
N_CATCH = 0x54,
N_SSYM = 0x60,
N_SO = 0x64,
N_LSYM = 0x80,
N_BINCL = 0x82,
N_SOL = 0x84,
N_PSYM = 0xa0,
N_EINCL = 0xa2,
N_ENTRY = 0xa4,
N_LBRAC = 0xc0,
N_EXCL = 0xc2,
N_SCOPE = 0xc4,
N_RBRAC = 0xe0,
N_BCOMM = 0xe2,
N_ECOMM = 0xe4,
N_ECOML = 0xe8,
N_NBTEXT = 0xf0,
N_NBDATA = 0xf2,
N_NBBSS = 0xf4,
N_NBSTS = 0xf6,
N_NBLCS = 0xf8,
N_LENG = 0xfe
};
CCC_PACKED_STRUCT(ProcedureDescriptor,
/* 0x00 */ u32 address;
/* 0x04 */ u32 symbol_index;
/* 0x08 */ s32 line_number_entry_index;
/* 0x0c */ s32 saved_register_mask;
/* 0x10 */ s32 saved_register_offset;
/* 0x14 */ s32 optimization_entry_index;
/* 0x18 */ s32 saved_float_register_mask;
/* 0x1c */ s32 saved_float_register_offset;
/* 0x20 */ s32 frame_size;
/* 0x24 */ s16 frame_pointer_register;
/* 0x26 */ s16 return_pc_register;
/* 0x28 */ s32 line_number_low;
/* 0x2c */ s32 line_number_high;
/* 0x30 */ u32 line_number_offset;
)
static_assert(sizeof(ProcedureDescriptor) == 0x34);
struct Symbol {
u32 value;
SymbolType symbol_type;
SymbolClass symbol_class;
u32 index;
const char* string;
const ProcedureDescriptor* procedure_descriptor = nullptr;
bool is_stabs() const {
return (index & 0xfff00) == 0x8f300;
}
StabsCode code() const {
return (StabsCode) (index - 0x8f300);
}
};
struct File {
std::vector<Symbol> symbols;
u32 address = 0;
std::string working_dir; // The working directory of gcc.
std::string command_line_path; // The source file path passed on the command line to gcc.
std::string full_path; // The full combined path.
};
class SymbolTableReader {
public:
Result<void> init(std::span<const u8> elf, s32 section_offset);
s32 file_count() const;
Result<File> parse_file(s32 index) const;
Result<std::vector<Symbol>> parse_external_symbols() const;
void print_header(FILE* out) const;
Result<void> print_symbols(FILE* out, bool print_locals, bool print_procedure_descriptors, bool print_externals) const;
protected:
bool m_ready = false;
std::span<const u8> m_elf;
s32 m_section_offset;
// If the .mdebug section was moved without updating its contents all the
// absolute file offsets stored within will be incorrect by a fixed amount.
s32 m_fudge_offset;
const SymbolicHeader* m_hdrr;
};
const char* symbol_type(SymbolType type);
const char* symbol_class(SymbolClass symbol_class);
const char* stabs_code_to_string(StabsCode code);
}

220
3rdparty/ccc/src/ccc/mdebug_symbols.cpp vendored Normal file
View File

@ -0,0 +1,220 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#include "mdebug_symbols.h"
#include "importer_flags.h"
namespace ccc::mdebug {
static void mark_duplicate_symbols(std::vector<ParsedSymbol>& symbols);
Result<std::vector<ParsedSymbol>> parse_symbols(const std::vector<mdebug::Symbol>& input, u32& importer_flags)
{
std::vector<ParsedSymbol> output;
std::string prefix;
for(const mdebug::Symbol& symbol : input) {
if(symbol.is_stabs()) {
switch(symbol.code()) {
case mdebug::N_GSYM: // Global variable
case mdebug::N_FUN: // Function
case mdebug::N_STSYM: // Data section static global variable
case mdebug::N_LCSYM: // BSS section static global variable
case mdebug::N_RSYM: // Register variable
case mdebug::N_LSYM: // Automatic variable or type definition
case mdebug::N_PSYM: { // Parameter variable
// Some STABS symbols are split between multiple strings.
if(symbol.string[0] != '\0') {
if(symbol.string[strlen(symbol.string) - 1] == '\\') {
prefix += std::string(symbol.string, symbol.string + strlen(symbol.string) - 1);
} else {
std::string merged_string;
const char* string;
if(!prefix.empty()) {
merged_string = prefix + symbol.string;
string = merged_string.c_str();
prefix.clear();
} else {
string = symbol.string;
}
const char* input = string;
Result<StabsSymbol> parse_result = parse_stabs_symbol(input);
if(parse_result.success()) {
if(*input != '\0') {
if(importer_flags & STRICT_PARSING) {
return CCC_FAILURE("Unknown data '%s' at the end of the '%s' stab.", input, parse_result->name.c_str());
} else {
CCC_WARN("Unknown data '%s' at the end of the '%s' stab.", input, parse_result->name.c_str());
}
}
ParsedSymbol& parsed = output.emplace_back();
parsed.type = ParsedSymbolType::NAME_COLON_TYPE;
parsed.raw = &symbol;
parsed.name_colon_type = std::move(*parse_result);
} else if(parse_result.error().message == STAB_TRUNCATED_ERROR_MESSAGE) {
// Symbol truncated due to a GCC bug. Report a
// warning and try to tolerate further faults
// caused as a result of this.
CCC_WARN("%s Symbol string: %s", STAB_TRUNCATED_ERROR_MESSAGE, string);
importer_flags &= ~STRICT_PARSING;
} else {
return CCC_FAILURE("%s Symbol string: %s",
parse_result.error().message.c_str(), string);
}
}
} else {
CCC_CHECK(prefix.empty(), "Invalid STABS continuation.");
if(symbol.code() == mdebug::N_FUN) {
ParsedSymbol& func_end = output.emplace_back();
func_end.type = ParsedSymbolType::FUNCTION_END;
func_end.raw = &symbol;
}
}
break;
}
case mdebug::N_SOL: { // Sub-source file
ParsedSymbol& sub = output.emplace_back();
sub.type = ParsedSymbolType::SUB_SOURCE_FILE;
sub.raw = &symbol;
break;
}
case mdebug::N_LBRAC: { // Begin block
ParsedSymbol& begin_block = output.emplace_back();
begin_block.type = ParsedSymbolType::LBRAC;
begin_block.raw = &symbol;
break;
}
case mdebug::N_RBRAC: { // End block
ParsedSymbol& end_block = output.emplace_back();
end_block.type = ParsedSymbolType::RBRAC;
end_block.raw = &symbol;
break;
}
case mdebug::N_SO: { // Source filename
ParsedSymbol& so_symbol = output.emplace_back();
so_symbol.type = ParsedSymbolType::SOURCE_FILE;
so_symbol.raw = &symbol;
break;
}
case mdebug::STAB:
case mdebug::N_OPT:
case mdebug::N_BINCL:
case mdebug::N_EINCL: {
break;
}
case mdebug::N_FNAME:
case mdebug::N_MAIN:
case mdebug::N_PC:
case mdebug::N_NSYMS:
case mdebug::N_NOMAP:
case mdebug::N_OBJ:
case mdebug::N_M2C:
case mdebug::N_SLINE:
case mdebug::N_DSLINE:
case mdebug::N_BSLINE:
case mdebug::N_EFD:
case mdebug::N_EHDECL:
case mdebug::N_CATCH:
case mdebug::N_SSYM:
case mdebug::N_ENTRY:
case mdebug::N_EXCL:
case mdebug::N_SCOPE:
case mdebug::N_BCOMM:
case mdebug::N_ECOMM:
case mdebug::N_ECOML:
case mdebug::N_NBTEXT:
case mdebug::N_NBDATA:
case mdebug::N_NBBSS:
case mdebug::N_NBSTS:
case mdebug::N_NBLCS:
case mdebug::N_LENG: {
CCC_WARN("Unhandled N_%s symbol: %s", mdebug::stabs_code_to_string(symbol.code()), symbol.string);
break;
}
}
} else {
ParsedSymbol& non_stabs_symbol = output.emplace_back();
non_stabs_symbol.type = ParsedSymbolType::NON_STABS;
non_stabs_symbol.raw = &symbol;
}
}
mark_duplicate_symbols(output);
return output;
}
static void mark_duplicate_symbols(std::vector<ParsedSymbol>& symbols)
{
std::map<StabsTypeNumber, size_t> stabs_type_number_to_symbol;
for(size_t i = 0; i < symbols.size(); i++) {
ParsedSymbol& symbol = symbols[i];
if(symbol.type == ParsedSymbolType::NAME_COLON_TYPE) {
StabsType& type = *symbol.name_colon_type.type;
if(type.type_number.valid() && type.descriptor.has_value()) {
stabs_type_number_to_symbol.emplace(type.type_number, i);
}
}
}
for(ParsedSymbol& symbol : symbols) {
symbol.is_typedef =
symbol.type == ParsedSymbolType::NAME_COLON_TYPE &&
symbol.name_colon_type.descriptor == StabsSymbolDescriptor::TYPE_NAME &&
symbol.name_colon_type.type->descriptor != StabsTypeDescriptor::ENUM;
}
for(size_t i = 0; i < symbols.size(); i++) {
ParsedSymbol& symbol = symbols[i];
if(symbol.type != ParsedSymbolType::NAME_COLON_TYPE) {
continue;
}
bool is_type =
symbol.name_colon_type.descriptor == StabsSymbolDescriptor::TYPE_NAME ||
symbol.name_colon_type.descriptor == StabsSymbolDescriptor::ENUM_STRUCT_OR_TYPE_TAG;
if(!is_type) {
continue;
}
StabsType& type = *symbol.name_colon_type.type;
if(!type.descriptor.has_value()) {
auto referenced_index = stabs_type_number_to_symbol.find(type.type_number);
if(referenced_index != stabs_type_number_to_symbol.end()) {
ParsedSymbol& referenced = symbols[referenced_index->second];
if(referenced.name_colon_type.name == symbol.name_colon_type.name) {
// symbol: "Struct:T(1,1)=s1;"
// referenced: "Struct:t(1,1)"
symbol.duplicate = true;
}
}
}
if(type.descriptor.has_value() && type.descriptor == StabsTypeDescriptor::TYPE_REFERENCE) {
auto referenced_index = stabs_type_number_to_symbol.find(type.as<StabsTypeReferenceType>().type->type_number);
if(referenced_index != stabs_type_number_to_symbol.end() && referenced_index->second != i) {
ParsedSymbol& referenced = symbols[referenced_index->second];
if(referenced.name_colon_type.name == " ") {
// referenced: " :T(1,1)=e;"
// symbol: "ErraticEnum:t(1,2)=(1,1)"
referenced.name_colon_type.name = symbol.name_colon_type.name;
referenced.is_typedef = true;
symbol.duplicate = true;
}
if(referenced.name_colon_type.name == symbol.name_colon_type.name) {
// referenced: "NamedTypedefedStruct:T(1,1)=s1;"
// symbol: "NamedTypedefedStruct:t(1,2)=(1,1)"
referenced.is_typedef = true;
symbol.duplicate = true;
}
}
}
}
}
}

32
3rdparty/ccc/src/ccc/mdebug_symbols.h vendored Normal file
View File

@ -0,0 +1,32 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#pragma once
#include "util.h"
#include "stabs.h"
#include "mdebug_section.h"
namespace ccc::mdebug {
enum class ParsedSymbolType {
NAME_COLON_TYPE,
SOURCE_FILE,
SUB_SOURCE_FILE,
LBRAC,
RBRAC,
FUNCTION_END,
NON_STABS
};
struct ParsedSymbol {
ParsedSymbolType type;
const mdebug::Symbol* raw;
StabsSymbol name_colon_type;
bool duplicate = false;
bool is_typedef = false;
};
Result<std::vector<ParsedSymbol>> parse_symbols(const std::vector<mdebug::Symbol>& input, u32& importer_flags);
}

191
3rdparty/ccc/src/ccc/sndll.cpp vendored Normal file
View File

@ -0,0 +1,191 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#include "sndll.h"
#include "importer_flags.h"
namespace ccc {
CCC_PACKED_STRUCT(SNDLLHeaderCommon,
/* 0x00 */ u32 magic;
/* 0x04 */ u32 relocations;
/* 0x08 */ u32 relocation_count;
/* 0x0c */ u32 symbols;
/* 0x10 */ u32 symbol_count;
/* 0x14 */ u32 elf_path;
/* 0x18 */ u32 load_func;
/* 0x1c */ u32 unload_func;
/* 0x20 */ u32 unknown_20;
/* 0x24 */ u32 unknown_24;
/* 0x28 */ u32 unknown_28;
/* 0x2c */ u32 file_size;
/* 0x30 */ u32 unknown_30;
)
CCC_PACKED_STRUCT(SNDLLHeaderV1,
/* 0x00 */ SNDLLHeaderCommon common;
)
CCC_PACKED_STRUCT(SNDLLHeaderV2,
/* 0x00 */ SNDLLHeaderCommon common;
/* 0x34 */ u32 unknown_34;
/* 0x38 */ u32 unknown_38;
)
CCC_PACKED_STRUCT(SNDLLRelocation,
/* 0x0 */ u32 unknown_0;
/* 0x4 */ u32 unknown_4;
/* 0x8 */ u32 unknown_8;
)
CCC_PACKED_STRUCT(SNDLLSymbolHeader,
/* 0x0 */ u32 string;
/* 0x4 */ u32 value;
/* 0x8 */ u8 unknown_8;
/* 0x9 */ u8 unknown_9;
/* 0xa */ SNDLLSymbolType type;
/* 0xb */ u8 processed;
)
static Result<SNDLLFile> parse_sndll_common(
std::span<const u8> image, Address address, SNDLLType type, const SNDLLHeaderCommon& common, SNDLLVersion version);
static const char* sndll_symbol_type_to_string(SNDLLSymbolType type);
Result<SNDLLFile> parse_sndll_file(std::span<const u8> image, Address address, SNDLLType type)
{
const u32* magic = get_packed<u32>(image, 0);
CCC_CHECK((*magic & 0xffffff) == CCC_FOURCC("SNR\00"), "Not a SNDLL %s.", address.valid() ? "section" : "file");
char version = *magic >> 24;
switch(version) {
case '1': {
const SNDLLHeaderV1* header = get_packed<SNDLLHeaderV1>(image, 0);
CCC_CHECK(header, "File too small to contain SNDLL V1 header.");
return parse_sndll_common(image, address, type, header->common, SNDLL_V1);
}
case '2': {
const SNDLLHeaderV2* header = get_packed<SNDLLHeaderV2>(image, 0);
CCC_CHECK(header, "File too small to contain SNDLL V2 header.");
return parse_sndll_common(image, address, type, header->common, SNDLL_V2);
}
}
return CCC_FAILURE("Unknown SNDLL version '%c'.", version);
}
static Result<SNDLLFile> parse_sndll_common(
std::span<const u8> image, Address address, SNDLLType type, const SNDLLHeaderCommon& common, SNDLLVersion version)
{
SNDLLFile sndll;
sndll.address = address;
sndll.type = type;
sndll.version = version;
if(common.elf_path) {
const char* elf_path = get_string(image, common.elf_path);
if(elf_path) {
sndll.elf_path = elf_path;
}
}
CCC_CHECK(common.symbol_count < (32 * 1024 * 1024) / sizeof(SNDLLSymbol), "SNDLL symbol count is too high.");
sndll.symbols.reserve(common.symbol_count);
for(u32 i = 0; i < common.symbol_count; i++) {
u32 symbol_offset = common.symbols - address.get_or_zero() + i * sizeof(SNDLLSymbolHeader);
const SNDLLSymbolHeader* symbol_header = get_packed<SNDLLSymbolHeader>(image, symbol_offset);
CCC_CHECK(symbol_header, "SNDLL symbol out of range.");
const char* string = nullptr;
if(symbol_header->string) {
string = get_string(image, symbol_header->string - address.get_or_zero());
}
SNDLLSymbol& symbol = sndll.symbols.emplace_back();
symbol.type = symbol_header->type;
symbol.value = symbol_header->value;
symbol.string = string;
}
return sndll;
}
Result<void> import_sndll_symbols(
SymbolDatabase& database,
const SNDLLFile& sndll,
const SymbolGroup& group,
u32 importer_flags,
DemanglerFunctions demangler)
{
for(const SNDLLSymbol& symbol : sndll.symbols) {
if(symbol.value == 0 || symbol.string.empty()) {
continue;
}
u32 address = symbol.value;
if(symbol.type != SNDLL_ABSOLUTE && sndll.type == SNDLLType::DYNAMIC_LIBRARY) {
address += sndll.address.get_or_zero();
}
if(!(importer_flags & DONT_DEDUPLICATE_SYMBOLS)) {
if(database.functions.first_handle_from_starting_address(address).valid()) {
continue;
}
if(database.global_variables.first_handle_from_starting_address(address).valid()) {
continue;
}
if(database.local_variables.first_handle_from_starting_address(address).valid()) {
continue;
}
}
const Section* section = database.sections.symbol_overlapping_address(address);
if(section) {
if(section->contains_code()) {
Result<Function*> function = database.functions.create_symbol(
symbol.string, group.source, group.module_symbol, address, importer_flags, demangler);
CCC_RETURN_IF_ERROR(function);
continue;
} else if(section->contains_data()) {
Result<GlobalVariable*> global_variable = database.global_variables.create_symbol(
symbol.string, group.source, group.module_symbol, address, importer_flags, demangler);
CCC_RETURN_IF_ERROR(global_variable);
continue;
}
}
Result<Label*> label = database.labels.create_symbol(
symbol.string, group.source, group.module_symbol, address, importer_flags, demangler);
CCC_RETURN_IF_ERROR(label);
}
return Result<void>();
}
void print_sndll_symbols(FILE* out, const SNDLLFile& sndll)
{
fprintf(out, "SNDLL SYMBOLS:\n");
for(const SNDLLSymbol& symbol : sndll.symbols) {
const char* type = sndll_symbol_type_to_string(symbol.type);
const char* string = !symbol.string.empty() ? symbol.string.c_str() : "(no string)";
fprintf(out, "%8s %08x %s\n", type, symbol.value, string);
}
}
static const char* sndll_symbol_type_to_string(SNDLLSymbolType type)
{
switch(type) {
case SNDLL_NIL: return "NIL";
case SNDLL_EXTERNAL: return "EXTERNAL";
case SNDLL_RELATIVE: return "RELATIVE";
case SNDLL_WEAK: return "WEAK";
case SNDLL_ABSOLUTE: return "ABSOLUTE";
}
return "invalid";
}
}

55
3rdparty/ccc/src/ccc/sndll.h vendored Normal file
View File

@ -0,0 +1,55 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#pragma once
#include "symbol_database.h"
namespace ccc {
enum class SNDLLType {
SNDATA_SECTION,
DYNAMIC_LIBRARY
};
enum SNDLLVersion {
SNDLL_V1,
SNDLL_V2
};
enum SNDLLSymbolType : u8 {
SNDLL_NIL = 0, // I think this is just so that the first real symbol has an index of 1.
SNDLL_EXTERNAL = 1, // Symbol with an empty value, to be filled in from another module.
SNDLL_RELATIVE = 2, // Global symbol, value is relative to the start of the SNDLL file.
SNDLL_WEAK = 3, // Weak symbol, value is relative to the start of the SNDLL file.
SNDLL_ABSOLUTE = 4 // Global symbol, value is an absolute address.
};
struct SNDLLSymbol {
SNDLLSymbolType type = SNDLL_NIL;
u32 value = 0;
std::string string;
};
struct SNDLLFile {
Address address;
SNDLLType type;
SNDLLVersion version;
std::string elf_path;
std::vector<SNDLLSymbol> symbols;
};
// If a valid address is passed, the pointers in the header will be treated as
// addresses, otherwise they will be treated as file offsets.
Result<SNDLLFile> parse_sndll_file(std::span<const u8> image, Address address, SNDLLType type);
Result<void> import_sndll_symbols(
SymbolDatabase& database,
const SNDLLFile& sndll,
const SymbolGroup& group,
u32 importer_flags,
DemanglerFunctions demangler);
void print_sndll_symbols(FILE* out, const SNDLLFile& sndll);
}

835
3rdparty/ccc/src/ccc/stabs.cpp vendored Normal file
View File

@ -0,0 +1,835 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#include "stabs.h"
namespace ccc {
#define STABS_DEBUG(...) //__VA_ARGS__
#define STABS_DEBUG_PRINTF(...) STABS_DEBUG(printf(__VA_ARGS__);)
static bool validate_symbol_descriptor(StabsSymbolDescriptor descriptor);
static Result<std::unique_ptr<StabsType>> parse_stabs_type(const char*& input);
static Result<std::vector<StabsStructOrUnionType::Field>> parse_field_list(const char*& input);
static Result<std::vector<StabsStructOrUnionType::MemberFunctionSet>> parse_member_functions(const char*& input);
static Result<StabsStructOrUnionType::Visibility> parse_visibility_character(const char*& input);
STABS_DEBUG(static void print_field(const StabsStructOrUnionType::Field& field);)
const char* STAB_TRUNCATED_ERROR_MESSAGE =
"STABS symbol truncated. This was probably caused by a GCC bug. "
"Other symbols from the same translation unit may also be invalid.";
Result<StabsSymbol> parse_stabs_symbol(const char*& input)
{
STABS_DEBUG_PRINTF("PARSING %s\n", input);
StabsSymbol symbol;
Result<std::string> name = parse_dodgy_stabs_identifier(input, ':');
CCC_RETURN_IF_ERROR(name);
symbol.name = *name;
CCC_EXPECT_CHAR(input, ':', "identifier");
CCC_CHECK(*input != '\0', "Unexpected end of input.");
if((*input >= '0' && *input <= '9') || *input == '(') {
symbol.descriptor = StabsSymbolDescriptor::LOCAL_VARIABLE;
} else {
char symbol_descriptor = *(input++);
CCC_CHECK(symbol_descriptor != '\0', "Failed to parse symbol descriptor.");
symbol.descriptor = (StabsSymbolDescriptor) symbol_descriptor;
}
CCC_CHECK(validate_symbol_descriptor(symbol.descriptor),
"Invalid symbol descriptor '%c'.",
(char) symbol.descriptor);
CCC_CHECK(*input != '\0', "Unexpected end of input.");
if(*input == 't') {
input++;
}
auto type = parse_top_level_stabs_type(input);
CCC_RETURN_IF_ERROR(type);
// Handle nested functions.
bool is_function =
symbol.descriptor == StabsSymbolDescriptor::LOCAL_FUNCTION ||
symbol.descriptor == StabsSymbolDescriptor::GLOBAL_FUNCTION;
if(is_function && input[0] == ',') {
input++;
while(*input != ',' && *input != '\0') input++; // enclosing function
CCC_EXPECT_CHAR(input, ',', "nested function suffix");
while(*input != ',' && *input != '\0') input++; // function
}
symbol.type = std::move(*type);
// Make sure that variable names aren't used as type names e.g. the STABS
// symbol "somevar:P123=*456" may be referenced by the type number 123, but
// the type name is not "somevar".
bool is_type = symbol.descriptor == StabsSymbolDescriptor::TYPE_NAME
|| symbol.descriptor == StabsSymbolDescriptor::ENUM_STRUCT_OR_TYPE_TAG;
if(is_type) {
symbol.type->name = symbol.name;
}
symbol.type->is_typedef = symbol.descriptor == StabsSymbolDescriptor::TYPE_NAME;
symbol.type->is_root = true;
return symbol;
}
static bool validate_symbol_descriptor(StabsSymbolDescriptor descriptor)
{
bool valid;
switch(descriptor) {
case StabsSymbolDescriptor::LOCAL_VARIABLE:
case StabsSymbolDescriptor::REFERENCE_PARAMETER_A:
case StabsSymbolDescriptor::LOCAL_FUNCTION:
case StabsSymbolDescriptor::GLOBAL_FUNCTION:
case StabsSymbolDescriptor::GLOBAL_VARIABLE:
case StabsSymbolDescriptor::REGISTER_PARAMETER:
case StabsSymbolDescriptor::VALUE_PARAMETER:
case StabsSymbolDescriptor::REGISTER_VARIABLE:
case StabsSymbolDescriptor::STATIC_GLOBAL_VARIABLE:
case StabsSymbolDescriptor::TYPE_NAME:
case StabsSymbolDescriptor::ENUM_STRUCT_OR_TYPE_TAG:
case StabsSymbolDescriptor::STATIC_LOCAL_VARIABLE:
case StabsSymbolDescriptor::REFERENCE_PARAMETER_V:
valid = true;
break;
default:
valid = false;
break;
}
return valid;
}
Result<std::unique_ptr<StabsType>> parse_top_level_stabs_type(const char*& input)
{
Result<std::unique_ptr<StabsType>> type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(type);
// Handle first base class suffixes.
if((*type)->descriptor == StabsTypeDescriptor::STRUCT && input[0] == '~' && input[1] == '%') {
input += 2;
Result<std::unique_ptr<StabsType>> first_base_class = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(first_base_class);
(*type)->as<StabsStructType>().first_base_class = std::move(*first_base_class);
CCC_EXPECT_CHAR(input, ';', "first base class suffix");
}
// Handle extra live range information.
if(input[0] == ';' && input[1] == 'l') {
input += 2;
CCC_EXPECT_CHAR(input, '(', "live range suffix");
CCC_EXPECT_CHAR(input, '#', "live range suffix");
std::optional<s32> start = parse_number_s32(input);
CCC_CHECK(start.has_value(), "Failed to parse live range suffix.");
CCC_EXPECT_CHAR(input, ',', "live range suffix");
CCC_EXPECT_CHAR(input, '#', "live range suffix");
std::optional<s32> end = parse_number_s32(input);
CCC_CHECK(end.has_value(), "Failed to parse live range suffix.");
CCC_EXPECT_CHAR(input, ')', "live range suffix");
}
return type;
}
static Result<std::unique_ptr<StabsType>> parse_stabs_type(const char*& input)
{
StabsTypeNumber type_number;
CCC_CHECK(*input != '\0', "Unexpected end of input.");
if(*input == '(') {
// This file has type numbers made up of two pieces: an include file
// index and a type number.
input++;
std::optional<s32> file_index = parse_number_s32(input);
CCC_CHECK(file_index.has_value(), "Failed to parse type number (file index).");
CCC_EXPECT_CHAR(input, ',', "type number");
std::optional<s32> type_index = parse_number_s32(input);
CCC_CHECK(type_index.has_value(), "Failed to parse type number (type index).");
CCC_EXPECT_CHAR(input, ')', "type number");
type_number.file = *file_index;
type_number.type = *type_index;
if(*input != '=') {
return std::make_unique<StabsType>(type_number);
}
input++;
} else if(*input >= '0' && *input <= '9') {
// This file has type numbers which are just a single number. This is
// the more common case for games.
std::optional<s32> type_index = parse_number_s32(input);
CCC_CHECK(type_index.has_value(), "Failed to parse type number.");
type_number.type = *type_index;
if(*input != '=') {
return std::make_unique<StabsType>(type_number);
}
input++;
}
CCC_CHECK(*input != '\0', "Unexpected end of input.");
StabsTypeDescriptor descriptor;
if((*input >= '0' && *input <= '9') || *input == '(') {
descriptor = StabsTypeDescriptor::TYPE_REFERENCE;
} else {
char descriptor_char = *(input++);
CCC_CHECK(descriptor_char != '\0', "Failed to parse type descriptor.");
descriptor = (StabsTypeDescriptor) descriptor_char;
}
std::unique_ptr<StabsType> out_type;
switch(descriptor) {
case StabsTypeDescriptor::TYPE_REFERENCE: { // 0..9
auto type_reference = std::make_unique<StabsTypeReferenceType>(type_number);
auto type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(type);
type_reference->type = std::move(*type);
out_type = std::move(type_reference);
break;
}
case StabsTypeDescriptor::ARRAY: { // a
auto array = std::make_unique<StabsArrayType>(type_number);
auto index_type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(index_type);
array->index_type = std::move(*index_type);
auto element_type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(element_type);
array->element_type = std::move(*element_type);
out_type = std::move(array);
break;
}
case StabsTypeDescriptor::ENUM: { // e
auto enum_type = std::make_unique<StabsEnumType>(type_number);
STABS_DEBUG_PRINTF("enum {\n");
while(*input != ';') {
std::optional<std::string> name = parse_stabs_identifier(input, ':');
CCC_CHECK(name.has_value(), "Failed to parse enum field name.");
CCC_EXPECT_CHAR(input, ':', "enum");
std::optional<s32> value = parse_number_s32(input);
CCC_CHECK(value.has_value(), "Failed to parse enum value.");
enum_type->fields.emplace_back(*value, std::move(*name));
CCC_EXPECT_CHAR(input, ',', "enum");
}
input++;
STABS_DEBUG_PRINTF("}\n");
out_type = std::move(enum_type);
break;
}
case StabsTypeDescriptor::FUNCTION: { // f
auto function = std::make_unique<StabsFunctionType>(type_number);
auto return_type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(return_type);
function->return_type = std::move(*return_type);
out_type = std::move(function);
break;
}
case StabsTypeDescriptor::VOLATILE_QUALIFIER: { // B
auto volatile_qualifier = std::make_unique<StabsVolatileQualifierType>(type_number);
auto type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(type);
volatile_qualifier->type = std::move(*type);
out_type = std::move(volatile_qualifier);
break;
}
case StabsTypeDescriptor::CONST_QUALIFIER: { // k
auto const_qualifier = std::make_unique<StabsConstQualifierType>(type_number);
auto type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(type);
const_qualifier->type = std::move(*type);
out_type = std::move(const_qualifier);
break;
}
case StabsTypeDescriptor::RANGE: { // r
auto range = std::make_unique<StabsRangeType>(type_number);
auto type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(type);
range->type = std::move(*type);
CCC_EXPECT_CHAR(input, ';', "range type descriptor");
std::optional<std::string> low = parse_stabs_identifier(input, ';');
CCC_CHECK(low.has_value(), "Failed to parse low part of range.");
CCC_EXPECT_CHAR(input, ';', "low range value");
std::optional<std::string> high = parse_stabs_identifier(input, ';');
CCC_CHECK(high.has_value(), "Failed to parse high part of range.");
CCC_EXPECT_CHAR(input, ';', "high range value");
range->low = std::move(*low);
range->high = std::move(*high);
out_type = std::move(range);
break;
}
case StabsTypeDescriptor::STRUCT: { // s
auto struct_type = std::make_unique<StabsStructType>(type_number);
STABS_DEBUG_PRINTF("struct {\n");
std::optional<s64> struct_size = parse_number_s64(input);
CCC_CHECK(struct_size.has_value(), "Failed to parse struct size.");
struct_type->size = *struct_size;
if(*input == '!') {
input++;
std::optional<s32> base_class_count = parse_number_s32(input);
CCC_CHECK(base_class_count.has_value(), "Failed to parse base class count.");
CCC_EXPECT_CHAR(input, ',', "base class section");
for(s64 i = 0; i < *base_class_count; i++) {
StabsStructOrUnionType::BaseClass base_class;
char is_virtual = *(input++);
switch(is_virtual) {
case '0': base_class.is_virtual = false; break;
case '1': base_class.is_virtual = true; break;
default: return CCC_FAILURE("Failed to parse base class (virtual character).");
}
Result<StabsStructOrUnionType::Visibility> visibility = parse_visibility_character(input);
CCC_RETURN_IF_ERROR(visibility);
base_class.visibility = *visibility;
std::optional<s32> offset = parse_number_s32(input);
CCC_CHECK(offset.has_value(), "Failed to parse base class offset.");
base_class.offset = (s32) *offset;
CCC_EXPECT_CHAR(input, ',', "base class section");
auto base_class_type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(base_class_type);
base_class.type = std::move(*base_class_type);
CCC_EXPECT_CHAR(input, ';', "base class section");
struct_type->base_classes.emplace_back(std::move(base_class));
}
}
auto fields = parse_field_list(input);
CCC_RETURN_IF_ERROR(fields);
struct_type->fields = std::move(*fields);
auto member_functions = parse_member_functions(input);
CCC_RETURN_IF_ERROR(member_functions);
struct_type->member_functions = std::move(*member_functions);
STABS_DEBUG_PRINTF("}\n");
out_type = std::move(struct_type);
break;
}
case StabsTypeDescriptor::UNION: { // u
auto union_type = std::make_unique<StabsUnionType>(type_number);
STABS_DEBUG_PRINTF("union {\n");
std::optional<s64> union_size = parse_number_s64(input);
CCC_CHECK(union_size.has_value(), "Failed to parse struct size.");
union_type->size = *union_size;
auto fields = parse_field_list(input);
CCC_RETURN_IF_ERROR(fields);
union_type->fields = std::move(*fields);
auto member_functions = parse_member_functions(input);
CCC_RETURN_IF_ERROR(member_functions);
union_type->member_functions = std::move(*member_functions);
STABS_DEBUG_PRINTF("}\n");
out_type = std::move(union_type);
break;
}
case StabsTypeDescriptor::CROSS_REFERENCE: { // x
auto cross_reference = std::make_unique<StabsCrossReferenceType>(type_number);
char cross_reference_type = *(input++);
CCC_CHECK(cross_reference_type != '\0', "Failed to parse cross reference type.");
switch(cross_reference_type) {
case 'e': cross_reference->type = ast::ForwardDeclaredType::ENUM; break;
case 's': cross_reference->type = ast::ForwardDeclaredType::STRUCT; break;
case 'u': cross_reference->type = ast::ForwardDeclaredType::UNION; break;
default:
return CCC_FAILURE("Invalid cross reference type '%c'.", cross_reference->type);
}
Result<std::string> identifier = parse_dodgy_stabs_identifier(input, ':');
CCC_RETURN_IF_ERROR(identifier);
cross_reference->identifier = std::move(*identifier);
cross_reference->name = cross_reference->identifier;
CCC_EXPECT_CHAR(input, ':', "cross reference");
out_type = std::move(cross_reference);
break;
}
case StabsTypeDescriptor::FLOATING_POINT_BUILTIN: { // R
auto fp_builtin = std::make_unique<StabsFloatingPointBuiltInType>(type_number);
std::optional<s32> fpclass = parse_number_s32(input);
CCC_CHECK(fpclass.has_value(), "Failed to parse floating point built-in class.");
fp_builtin->fpclass = *fpclass;
CCC_EXPECT_CHAR(input, ';', "floating point builtin");
std::optional<s32> bytes = parse_number_s32(input);
CCC_CHECK(bytes.has_value(), "Failed to parse floating point built-in.");
fp_builtin->bytes = *bytes;
CCC_EXPECT_CHAR(input, ';', "floating point builtin");
std::optional<s32> value = parse_number_s32(input);
CCC_CHECK(value.has_value(), "Failed to parse floating point built-in.");
CCC_EXPECT_CHAR(input, ';', "floating point builtin");
out_type = std::move(fp_builtin);
break;
}
case StabsTypeDescriptor::METHOD: { // #
auto method = std::make_unique<StabsMethodType>(type_number);
if(*input == '#') {
input++;
auto return_type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(return_type);
method->return_type = std::move(*return_type);
if(*input == ';') {
input++;
}
} else {
auto class_type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(class_type);
method->class_type = std::move(*class_type);
CCC_EXPECT_CHAR(input, ',', "method");
auto return_type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(return_type);
method->return_type = std::move(*return_type);
while(*input != '\0') {
if(*input == ';') {
input++;
break;
}
CCC_EXPECT_CHAR(input, ',', "method");
auto parameter_type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(parameter_type);
method->parameter_types.emplace_back(std::move(*parameter_type));
}
}
out_type = std::move(method);
break;
}
case StabsTypeDescriptor::REFERENCE: { // &
auto reference = std::make_unique<StabsReferenceType>(type_number);
auto value_type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(value_type);
reference->value_type = std::move(*value_type);
out_type = std::move(reference);
break;
}
case StabsTypeDescriptor::POINTER: { // *
auto pointer = std::make_unique<StabsPointerType>(type_number);
auto value_type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(value_type);
pointer->value_type = std::move(*value_type);
out_type = std::move(pointer);
break;
}
case StabsTypeDescriptor::TYPE_ATTRIBUTE: { // @
if((*input >= '0' && *input <= '9') || *input == '(') {
auto member_pointer = std::make_unique<StabsPointerToDataMemberType>(type_number);
auto class_type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(class_type);
member_pointer->class_type = std::move(*class_type);
CCC_EXPECT_CHAR(input, ',', "pointer to non-static data member");
auto member_type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(member_type);
member_pointer->member_type = std::move(*member_type);
out_type = std::move(member_pointer);
} else {
auto type_attribute = std::make_unique<StabsSizeTypeAttributeType>(type_number);
CCC_CHECK(*input == 's', "Weird value following '@' type descriptor.");
input++;
std::optional<s64> size_bits = parse_number_s64(input);
CCC_CHECK(size_bits.has_value(), "Failed to parse type attribute.")
type_attribute->size_bits = *size_bits;
CCC_EXPECT_CHAR(input, ';', "type attribute");
auto type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(type);
type_attribute->type = std::move(*type);
out_type = std::move(type_attribute);
}
break;
}
case StabsTypeDescriptor::BUILTIN: { // -
auto built_in = std::make_unique<StabsBuiltInType>(type_number);
std::optional<s64> type_id = parse_number_s64(input);
CCC_CHECK(type_id.has_value(), "Failed to parse built-in.");
built_in->type_id = *type_id;
CCC_EXPECT_CHAR(input, ';', "builtin");
out_type = std::move(built_in);
break;
}
default: {
return CCC_FAILURE(
"Invalid type descriptor '%c' (%02x).",
(u32) descriptor, (u32) descriptor);
}
}
return out_type;
}
static Result<std::vector<StabsStructOrUnionType::Field>> parse_field_list(const char*& input)
{
std::vector<StabsStructOrUnionType::Field> fields;
while(*input != '\0') {
if(*input == ';') {
input++;
break;
}
const char* before_field = input;
StabsStructOrUnionType::Field field;
Result<std::string> name = parse_dodgy_stabs_identifier(input, ':');
CCC_RETURN_IF_ERROR(name);
field.name = std::move(*name);
CCC_EXPECT_CHAR(input, ':', "identifier");
if(*input == '/') {
input++;
Result<StabsStructOrUnionType::Visibility> visibility = parse_visibility_character(input);
CCC_RETURN_IF_ERROR(visibility);
field.visibility = *visibility;
}
if(*input == ':') {
input = before_field;
break;
}
auto type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(type);
field.type = std::move(*type);
if(field.name.size() >= 1 && field.name[0] == '$') {
// Virtual function table pointers and virtual base class pointers.
CCC_EXPECT_CHAR(input, ',', "field type");
std::optional<s32> offset_bits = parse_number_s32(input);
CCC_CHECK(offset_bits.has_value(), "Failed to parse field offset.");
field.offset_bits = *offset_bits;
CCC_EXPECT_CHAR(input, ';', "field offset");
} else if(*input == ':') {
// Static fields.
input++;
field.is_static = true;
std::optional<std::string> type_name = parse_stabs_identifier(input, ';');
CCC_CHECK(type_name.has_value(), "Failed to parse static field type name.");
field.type_name = std::move(*type_name);
CCC_EXPECT_CHAR(input, ';', "identifier");
} else if(*input == ',') {
// Normal fields.
input++;
std::optional<s32> offset_bits = parse_number_s32(input);
CCC_CHECK(offset_bits.has_value(), "Failed to parse field offset.");
field.offset_bits = *offset_bits;
CCC_EXPECT_CHAR(input, ',', "field offset");
std::optional<s32> size_bits = parse_number_s32(input);
CCC_CHECK(size_bits.has_value(), "Failed to parse field size.");
field.size_bits = *size_bits;
CCC_EXPECT_CHAR(input, ';', "field size");
} else {
return CCC_FAILURE("Expected ':' or ',', got '%c' (%hhx).", *input, *input);
}
STABS_DEBUG(print_field(field);)
fields.emplace_back(std::move(field));
}
return fields;
}
static Result<std::vector<StabsStructOrUnionType::MemberFunctionSet>> parse_member_functions(const char*& input)
{
// Check for if the next character is from an enclosing field list. If this
// is the case, the next character will be ',' for normal fields and ':' for
// static fields (see above).
if(*input == ',' || *input == ':') {
return std::vector<StabsStructOrUnionType::MemberFunctionSet>();
}
std::vector<StabsStructOrUnionType::MemberFunctionSet> member_functions;
while(*input != '\0') {
if(*input == ';') {
input++;
break;
}
StabsStructOrUnionType::MemberFunctionSet member_function_set;
std::optional<std::string> name = parse_stabs_identifier(input, ':');
CCC_CHECK(name.has_value(), "Failed to parse member function name.");
member_function_set.name = std::move(*name);
CCC_EXPECT_CHAR(input, ':', "member function");
CCC_EXPECT_CHAR(input, ':', "member function");
while(*input != '\0') {
if(*input == ';') {
input++;
break;
}
StabsStructOrUnionType::MemberFunction function;
auto type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(type);
function.type = std::move(*type);
CCC_EXPECT_CHAR(input, ':', "member function");
std::optional<std::string> identifier = parse_stabs_identifier(input, ';');
CCC_CHECK(identifier.has_value(), "Invalid member function identifier.");
CCC_EXPECT_CHAR(input, ';', "member function");
Result<StabsStructOrUnionType::Visibility> visibility = parse_visibility_character(input);
CCC_RETURN_IF_ERROR(visibility);
function.visibility = *visibility;
char modifiers = *(input++);
CCC_CHECK(modifiers != '\0', "Failed to parse member function modifiers.");
switch(modifiers) {
case 'A':
function.is_const = false;
function.is_volatile = false;
break;
case 'B':
function.is_const = true;
function.is_volatile = false;
break;
case 'C':
function.is_const = false;
function.is_volatile = true;
break;
case 'D':
function.is_const = true;
function.is_volatile = true;
break;
case '?':
case '.':
break;
default:
return CCC_FAILURE("Invalid member function modifiers.");
}
char flag = *(input++);
CCC_CHECK(flag != '\0', "Failed to parse member function type.");
switch(flag) {
case '.': { // normal member function
function.modifier = ast::MemberFunctionModifier::NONE;
break;
}
case '?': { // static member function
function.modifier = ast::MemberFunctionModifier::STATIC;
break;
}
case '*': { // virtual member function
std::optional<s32> vtable_index = parse_number_s32(input);
CCC_CHECK(vtable_index.has_value(), "Failed to parse vtable index.");
function.vtable_index = *vtable_index;
CCC_EXPECT_CHAR(input, ';', "virtual member function");
auto virtual_type = parse_stabs_type(input);
CCC_RETURN_IF_ERROR(virtual_type);
function.virtual_type = std::move(*virtual_type);
CCC_EXPECT_CHAR(input, ';', "virtual member function");
function.modifier = ast::MemberFunctionModifier::VIRTUAL;
break;
}
default:
return CCC_FAILURE("Invalid member function type.");
}
member_function_set.overloads.emplace_back(std::move(function));
}
STABS_DEBUG_PRINTF("member func: %s\n", member_function_set.name.c_str());
member_functions.emplace_back(std::move(member_function_set));
}
return member_functions;
}
static Result<StabsStructOrUnionType::Visibility> parse_visibility_character(const char*& input)
{
char visibility = *(input++);
switch(visibility) {
case '0': return StabsStructOrUnionType::Visibility::PRIVATE;
case '1': return StabsStructOrUnionType::Visibility::PROTECTED;
case '2': return StabsStructOrUnionType::Visibility::PUBLIC;
case '9': return StabsStructOrUnionType::Visibility::PUBLIC_OPTIMIZED_OUT;
default: break;
}
return CCC_FAILURE("Failed to parse visibility character.");
}
std::optional<s32> parse_number_s32(const char*& input)
{
char* end;
s64 value = strtoll(input, &end, 10);
if(end == input) {
return std::nullopt;
}
input = end;
return (s32) value;
}
std::optional<s64> parse_number_s64(const char*& input)
{
char* end;
s64 value = strtoll(input, &end, 10);
if(end == input) {
return std::nullopt;
}
input = end;
return value;
}
std::optional<std::string> parse_stabs_identifier(const char*& input, char terminator)
{
const char* begin = input;
for(; *input != '\0'; input++) {
if(*input == terminator) {
return std::string(begin, input);
}
}
return std::nullopt;
}
// The complexity here is because the input may contain an unescaped namespace
// separator '::' even if the field terminator is supposed to be a colon, as
// well as the raw contents of character literals. See test/ccc/stabs_tests.cpp
// for some examples.
Result<std::string> parse_dodgy_stabs_identifier(const char*& input, char terminator)
{
const char* begin = input;
s32 template_depth = 0;
for(; *input != '\0'; input++) {
// Skip past character literals.
if(*input == '\'') {
input++;
if(*input == '\'') {
input++; // Handle character literals containing a single quote.
}
while(*input != '\'' && *input != '\0') {
input++;
}
if(*input == '\0') {
break;
}
input++;
}
// Keep track of the template depth so we know when to expect the
// terminator character.
if(*input == '<') {
template_depth++;
}
if(*input == '>') {
template_depth--;
}
if(*input == terminator && template_depth == 0) {
return std::string(begin, input);
}
}
return CCC_FAILURE(STAB_TRUNCATED_ERROR_MESSAGE);
}
STABS_DEBUG(
static void print_field(const StabsStructOrUnionType::Field& field)
{
printf("\t%04x %04x %04x %04x %s\n", field.offset_bits / 8, field.size_bits / 8, field.offset_bits, field.size_bits, field.name.c_str());
}
)
const char* stabs_field_visibility_to_string(StabsStructOrUnionType::Visibility visibility)
{
switch(visibility) {
case StabsStructOrUnionType::Visibility::PRIVATE: return "private";
case StabsStructOrUnionType::Visibility::PROTECTED: return "protected";
case StabsStructOrUnionType::Visibility::PUBLIC: return "public";
case StabsStructOrUnionType::Visibility::PUBLIC_OPTIMIZED_OUT: return "public_optimizedout";
default: return "none";
}
return "";
}
}

379
3rdparty/ccc/src/ccc/stabs.h vendored Normal file
View File

@ -0,0 +1,379 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#pragma once
#include "ast.h"
#include "util.h"
namespace ccc {
enum class StabsSymbolDescriptor : u8 {
LOCAL_VARIABLE = '_',
REFERENCE_PARAMETER_A = 'a',
LOCAL_FUNCTION = 'f',
GLOBAL_FUNCTION = 'F',
GLOBAL_VARIABLE = 'G',
REGISTER_PARAMETER = 'P',
VALUE_PARAMETER = 'p',
REGISTER_VARIABLE = 'r',
STATIC_GLOBAL_VARIABLE = 'S',
TYPE_NAME = 't',
ENUM_STRUCT_OR_TYPE_TAG = 'T',
STATIC_LOCAL_VARIABLE = 'V',
REFERENCE_PARAMETER_V = 'v'
};
struct StabsType;
struct StabsSymbol {
StabsSymbolDescriptor descriptor;
std::string name;
std::unique_ptr<StabsType> type;
};
Result<StabsSymbol> parse_stabs_symbol(const char*& input);
enum class StabsTypeDescriptor : u8 {
TYPE_REFERENCE = 0xef, // '0'..'9','('
ARRAY = 'a',
ENUM = 'e',
FUNCTION = 'f',
CONST_QUALIFIER = 'k',
RANGE = 'r',
STRUCT = 's',
UNION = 'u',
CROSS_REFERENCE = 'x',
VOLATILE_QUALIFIER = 'B',
FLOATING_POINT_BUILTIN = 'R',
METHOD = '#',
REFERENCE = '&',
POINTER = '*',
TYPE_ATTRIBUTE = '@',
POINTER_TO_DATA_MEMBER = 0xee, // also '@'
BUILTIN = '-'
};
struct StabsBaseClass;
struct StabsField;
struct StabsMemberFunctionSet;
// e.g. for "123=*456" 123 would be the type_number, the type descriptor would
// be of type POINTER and StabsPointerType::value_type would point to a type
// with type_number = 456.
struct StabsType {
StabsTypeNumber type_number;
// The name field is only populated for root types and cross references.
std::optional<std::string> name;
bool is_typedef = false;
bool is_root = false;
std::optional<StabsTypeDescriptor> descriptor;
StabsType(StabsTypeNumber n) : type_number(n) {}
StabsType(StabsTypeDescriptor d) : descriptor(d) {}
StabsType(StabsTypeNumber n, StabsTypeDescriptor d) : type_number(n), descriptor(d) {}
virtual ~StabsType() {}
template <typename SubType>
SubType& as()
{
CCC_ASSERT(descriptor == SubType::DESCRIPTOR);
return *static_cast<SubType*>(this);
}
template <typename SubType>
const SubType& as() const
{
CCC_ASSERT(descriptor == SubType::DESCRIPTOR);
return *static_cast<const SubType*>(this);
}
virtual void enumerate_numbered_types(std::map<StabsTypeNumber, const StabsType*>& output) const
{
if(type_number.valid() && descriptor.has_value()) {
output.emplace(type_number, this);
}
}
};
struct StabsTypeReferenceType : StabsType {
std::unique_ptr<StabsType> type;
StabsTypeReferenceType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::TYPE_REFERENCE;
void enumerate_numbered_types(std::map<StabsTypeNumber, const StabsType*>& output) const override
{
StabsType::enumerate_numbered_types(output);
type->enumerate_numbered_types(output);
}
};
struct StabsArrayType : StabsType {
std::unique_ptr<StabsType> index_type;
std::unique_ptr<StabsType> element_type;
StabsArrayType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::ARRAY;
void enumerate_numbered_types(std::map<StabsTypeNumber, const StabsType*>& output) const override
{
StabsType::enumerate_numbered_types(output);
index_type->enumerate_numbered_types(output);
element_type->enumerate_numbered_types(output);
}
};
struct StabsEnumType : StabsType {
std::vector<std::pair<s32, std::string>> fields;
StabsEnumType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::ENUM;
};
struct StabsFunctionType : StabsType {
std::unique_ptr<StabsType> return_type;
StabsFunctionType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::FUNCTION;
void enumerate_numbered_types(std::map<StabsTypeNumber, const StabsType*>& output) const override
{
StabsType::enumerate_numbered_types(output);
return_type->enumerate_numbered_types(output);
}
};
struct StabsVolatileQualifierType : StabsType {
std::unique_ptr<StabsType> type;
StabsVolatileQualifierType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::VOLATILE_QUALIFIER;
void enumerate_numbered_types(std::map<StabsTypeNumber, const StabsType*>& output) const override
{
StabsType::enumerate_numbered_types(output);
type->enumerate_numbered_types(output);
}
};
struct StabsConstQualifierType : StabsType {
std::unique_ptr<StabsType> type;
StabsConstQualifierType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::CONST_QUALIFIER;
void enumerate_numbered_types(std::map<StabsTypeNumber, const StabsType*>& output) const override
{
StabsType::enumerate_numbered_types(output);
type->enumerate_numbered_types(output);
}
};
struct StabsRangeType : StabsType {
std::unique_ptr<StabsType> type;
std::string low;
std::string high; // Some compilers wrote out a wrapped around value here for zero (or variable?) length arrays.
StabsRangeType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::RANGE;
void enumerate_numbered_types(std::map<StabsTypeNumber, const StabsType*>& output) const override
{
StabsType::enumerate_numbered_types(output);
type->enumerate_numbered_types(output);
}
};
struct StabsStructOrUnionType : StabsType {
enum class Visibility : u8 {
NONE,
PRIVATE,
PROTECTED,
PUBLIC,
PUBLIC_OPTIMIZED_OUT
};
struct BaseClass {
bool is_virtual;
Visibility visibility;
s32 offset = -1;
std::unique_ptr<StabsType> type;
};
struct Field {
std::string name;
Visibility visibility = Visibility::NONE;
std::unique_ptr<StabsType> type;
bool is_static = false;
s32 offset_bits = 0;
s32 size_bits = 0;
std::string type_name;
};
struct MemberFunction {
std::unique_ptr<StabsType> type;
std::unique_ptr<StabsType> virtual_type;
Visibility visibility;
bool is_const = false;
bool is_volatile = false;
ast::MemberFunctionModifier modifier = ast::MemberFunctionModifier::NONE;
s32 vtable_index = -1;
};
struct MemberFunctionSet {
std::string name;
std::vector<MemberFunction> overloads;
};
s64 size = -1;
std::vector<BaseClass> base_classes;
std::vector<Field> fields;
std::vector<MemberFunctionSet> member_functions;
std::unique_ptr<StabsType> first_base_class;
StabsStructOrUnionType(StabsTypeNumber n, StabsTypeDescriptor d) : StabsType(n, d) {}
void enumerate_numbered_types(std::map<StabsTypeNumber, const StabsType*>& output) const override
{
StabsType::enumerate_numbered_types(output);
for(const BaseClass& base_class : base_classes) {
base_class.type->enumerate_numbered_types(output);
}
for(const Field& field : fields) {
field.type->enumerate_numbered_types(output);
}
for(const MemberFunctionSet& member_function_set : member_functions) {
for(const MemberFunction& member_function : member_function_set.overloads) {
member_function.type->enumerate_numbered_types(output);
if(member_function.virtual_type.get()) {
member_function.virtual_type->enumerate_numbered_types(output);
}
}
}
if(first_base_class.get()) {
first_base_class->enumerate_numbered_types(output);
}
}
};
struct StabsStructType : StabsStructOrUnionType {
StabsStructType(StabsTypeNumber n) : StabsStructOrUnionType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::STRUCT;
};
struct StabsUnionType : StabsStructOrUnionType {
StabsUnionType(StabsTypeNumber n) : StabsStructOrUnionType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::UNION;
};
struct StabsCrossReferenceType : StabsType {
ast::ForwardDeclaredType type;
std::string identifier;
StabsCrossReferenceType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::CROSS_REFERENCE;
};
struct StabsFloatingPointBuiltInType : StabsType {
s32 fpclass = -1;
s32 bytes = -1;
StabsFloatingPointBuiltInType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::FLOATING_POINT_BUILTIN;
};
struct StabsMethodType : StabsType {
std::unique_ptr<StabsType> return_type;
std::optional<std::unique_ptr<StabsType>> class_type;
std::vector<std::unique_ptr<StabsType>> parameter_types;
StabsMethodType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::METHOD;
void enumerate_numbered_types(std::map<StabsTypeNumber, const StabsType*>& output) const override
{
StabsType::enumerate_numbered_types(output);
return_type->enumerate_numbered_types(output);
if(class_type.has_value()) {
(*class_type)->enumerate_numbered_types(output);
}
for(const std::unique_ptr<StabsType>& parameter_type : parameter_types) {
parameter_type->enumerate_numbered_types(output);
}
}
};
struct StabsReferenceType : StabsType {
std::unique_ptr<StabsType> value_type;
StabsReferenceType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::REFERENCE;
void enumerate_numbered_types(std::map<StabsTypeNumber, const StabsType*>& output) const override
{
StabsType::enumerate_numbered_types(output);
value_type->enumerate_numbered_types(output);
}
};
struct StabsPointerType : StabsType {
std::unique_ptr<StabsType> value_type;
StabsPointerType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::POINTER;
void enumerate_numbered_types(std::map<StabsTypeNumber, const StabsType*>& output) const override
{
StabsType::enumerate_numbered_types(output);
value_type->enumerate_numbered_types(output);
}
};
struct StabsSizeTypeAttributeType : StabsType {
s64 size_bits = -1;
std::unique_ptr<StabsType> type;
StabsSizeTypeAttributeType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::TYPE_ATTRIBUTE;
void enumerate_numbered_types(std::map<StabsTypeNumber, const StabsType*>& output) const override
{
StabsType::enumerate_numbered_types(output);
type->enumerate_numbered_types(output);
}
};
struct StabsPointerToDataMemberType : StabsType {
std::unique_ptr<StabsType> class_type;
std::unique_ptr<StabsType> member_type;
StabsPointerToDataMemberType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::POINTER_TO_DATA_MEMBER;
void enumerate_numbered_types(std::map<StabsTypeNumber, const StabsType*>& output) const override
{
StabsType::enumerate_numbered_types(output);
class_type->enumerate_numbered_types(output);
member_type->enumerate_numbered_types(output);
}
};
struct StabsBuiltInType : StabsType {
s64 type_id = -1;
StabsBuiltInType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {}
static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::BUILTIN;
};
extern const char* STAB_TRUNCATED_ERROR_MESSAGE;
Result<std::unique_ptr<StabsType>> parse_top_level_stabs_type(const char*& input);
std::optional<s32> parse_number_s32(const char*& input);
std::optional<s64> parse_number_s64(const char*& input);
std::optional<std::string> parse_stabs_identifier(const char*& input, char terminator);
Result<std::string> parse_dodgy_stabs_identifier(const char*& input, char terminator);
const char* stabs_field_visibility_to_string(StabsStructOrUnionType::Visibility visibility);
}

834
3rdparty/ccc/src/ccc/stabs_to_ast.cpp vendored Normal file
View File

@ -0,0 +1,834 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#include "stabs_to_ast.h"
#include "importer_flags.h"
#define AST_DEBUG(...) //__VA_ARGS__
#define AST_DEBUG_PRINTF(...) AST_DEBUG(printf(__VA_ARGS__);)
namespace ccc {
struct MemberFunctionInfo {
std::string name;
bool is_constructor_or_destructor = false;
bool is_special_member_function = false;
bool is_operator_member_function = false;
};
static bool is_void_like(const StabsType& type);
static Result<ast::BuiltInClass> classify_range(const StabsRangeType& type);
static Result<std::unique_ptr<ast::Node>> field_to_ast(
const StabsStructOrUnionType::Field& field,
const StabsType& enclosing_struct,
const StabsToAstState& state,
s32 depth);
static Result<bool> detect_bitfield(const StabsStructOrUnionType::Field& field, const StabsToAstState& state);
static Result<std::vector<std::unique_ptr<ast::Node>>> member_functions_to_ast(
const StabsStructOrUnionType& type, const StabsToAstState& state, s32 depth);
static MemberFunctionInfo check_member_function(
const std::string& mangled_name,
std::string_view type_name_no_template_args,
const DemanglerFunctions& demangler,
u32 importer_flags);
Result<std::unique_ptr<ast::Node>> stabs_type_to_ast(
const StabsType& type,
const StabsType* enclosing_struct,
const StabsToAstState& state,
s32 depth,
bool substitute_type_name,
bool force_substitute)
{
AST_DEBUG_PRINTF("%-*stype desc=%hhx '%c' num=(%d,%d) name=%s\n",
depth * 4, "",
type.descriptor.has_value() ? (u8) *type.descriptor : 'X',
(type.descriptor.has_value() && isprint((u8) *type.descriptor)) ? (u8) *type.descriptor : '!',
type.type_number.file, type.type_number.type,
type.name.has_value() ? type.name->c_str() : "");
if(depth > 200) {
const char* error_message = "Call depth greater than 200 in stabs_type_to_ast, probably infinite recursion.";
if(state.importer_flags & STRICT_PARSING) {
return CCC_FAILURE(error_message);
} else {
CCC_WARN(error_message);
auto error = std::make_unique<ast::Error>();
error->message = error_message;
return std::unique_ptr<ast::Node>(std::move(error));
}
}
// This makes sure that types are replaced with their type name in cases
// where that would be more appropriate.
if(type.name.has_value()) {
bool try_substitute = depth > 0 && (type.is_root
|| type.descriptor == StabsTypeDescriptor::RANGE
|| type.descriptor == StabsTypeDescriptor::BUILTIN);
// GCC emits anonymous enums with a name of " " since apparently some
// debuggers can't handle zero-length names.
bool is_name_empty = type.name == "" || type.name == " ";
// Cross references will be handled below.
bool is_cross_reference = type.descriptor == StabsTypeDescriptor::CROSS_REFERENCE;
bool is_void = is_void_like(type);
if((substitute_type_name || try_substitute) && !is_name_empty && !is_cross_reference && !is_void) {
auto type_name = std::make_unique<ast::TypeName>();
type_name->source = ast::TypeNameSource::REFERENCE;
type_name->unresolved_stabs = std::make_unique<ast::TypeName::UnresolvedStabs>();
type_name->unresolved_stabs->type_name = *type.name;
type_name->unresolved_stabs->referenced_file_handle = state.file_handle;
type_name->unresolved_stabs->stabs_type_number = type.type_number;
return std::unique_ptr<ast::Node>(std::move(type_name));
}
}
// This prevents infinite recursion when an automatically generated member
// function references an unnamed type.
bool can_compare_type_numbers = type.type_number.valid() && enclosing_struct && enclosing_struct->type_number.valid();
if(force_substitute && can_compare_type_numbers && type.type_number == enclosing_struct->type_number) {
// It's probably a this parameter (or return type) for an unnamed type.
auto type_name = std::make_unique<ast::TypeName>();
type_name->source = ast::TypeNameSource::UNNAMED_THIS;
type_name->unresolved_stabs = std::make_unique<ast::TypeName::UnresolvedStabs>();
type_name->unresolved_stabs->type_name = enclosing_struct->name.has_value() ? *enclosing_struct->name : "";
type_name->unresolved_stabs->referenced_file_handle = state.file_handle;
type_name->unresolved_stabs->stabs_type_number = type.type_number;
return std::unique_ptr<ast::Node>(std::move(type_name));
}
if(!type.descriptor.has_value()) {
// The definition of the type has been defined previously, so we have to
// look it up by its type number.
CCC_CHECK(type.type_number.valid(), "Cannot lookup type (type is anonymous).");
auto stabs_type = state.stabs_types->find(type.type_number);
if(stabs_type == state.stabs_types->end()) {
std::string error_message = "Failed to lookup STABS type by its type number ("
+ std::to_string(type.type_number.file) + "," + std::to_string(type.type_number.type) + ").";
if(state.importer_flags & STRICT_PARSING) {
return CCC_FAILURE("%s", error_message.c_str());
} else {
CCC_WARN("%s", error_message.c_str());
std::unique_ptr<ast::Error> error = std::make_unique<ast::Error>();
error->message = std::move(error_message);
return std::unique_ptr<ast::Node>(std::move(error));
}
}
return stabs_type_to_ast(
*stabs_type->second,
enclosing_struct,
state,
depth + 1,
substitute_type_name,
force_substitute);
}
std::unique_ptr<ast::Node> result;
switch(*type.descriptor) {
case StabsTypeDescriptor::TYPE_REFERENCE: {
const auto& stabs_type_ref = type.as<StabsTypeReferenceType>();
if(!type.type_number.valid() || !stabs_type_ref.type->type_number.valid() || stabs_type_ref.type->type_number != type.type_number) {
auto node = stabs_type_to_ast(
*stabs_type_ref.type,
enclosing_struct,
state,
depth + 1,
substitute_type_name,
force_substitute);
CCC_RETURN_IF_ERROR(node);
result = std::move(*node);
} else {
// I still don't know why in STABS void is a reference to
// itself, maybe because I'm not a philosopher.
auto builtin = std::make_unique<ast::BuiltIn>();
builtin->bclass = ast::BuiltInClass::VOID_TYPE;
result = std::move(builtin);
}
break;
}
case StabsTypeDescriptor::ARRAY: {
auto array = std::make_unique<ast::Array>();
const auto& stabs_array = type.as<StabsArrayType>();
auto element_node = stabs_type_to_ast(
*stabs_array.element_type,
enclosing_struct,
state,
depth + 1,
true,
force_substitute);
CCC_RETURN_IF_ERROR(element_node);
array->element_type = std::move(*element_node);
const StabsRangeType& index = stabs_array.index_type->as<StabsRangeType>();
char* end = nullptr;
const char* low = index.low.c_str();
s64 low_value = strtoll(low, &end, 10);
CCC_CHECK(end != low, "Failed to parse low part of range as integer.");
CCC_CHECK(low_value == 0, "Invalid index type for array.");
const char* high = index.high.c_str();
s64 high_value = strtoll(high, &end, 10);
CCC_CHECK(end != high, "Failed to parse low part of range as integer.");
if(high_value == 4294967295) {
// Some compilers wrote out a wrapped around value here.
array->element_count = 0;
} else {
array->element_count = (s32) high_value + 1;
}
result = std::move(array);
break;
}
case StabsTypeDescriptor::ENUM: {
auto inline_enum = std::make_unique<ast::Enum>();
const auto& stabs_enum = type.as<StabsEnumType>();
inline_enum->constants = stabs_enum.fields;
result = std::move(inline_enum);
break;
}
case StabsTypeDescriptor::FUNCTION: {
auto function = std::make_unique<ast::Function>();
auto node = stabs_type_to_ast(
*type.as<StabsFunctionType>().return_type,
enclosing_struct,
state,
depth + 1,
true,
force_substitute);
CCC_RETURN_IF_ERROR(node);
function->return_type = std::move(*node);
result = std::move(function);
break;
}
case StabsTypeDescriptor::VOLATILE_QUALIFIER: {
const auto& volatile_qualifier = type.as<StabsVolatileQualifierType>();
auto node = stabs_type_to_ast(
*volatile_qualifier.type.get(),
enclosing_struct,
state,
depth + 1,
substitute_type_name,
force_substitute);
CCC_RETURN_IF_ERROR(node);
result = std::move(*node);
result->is_volatile = true;
break;
}
case StabsTypeDescriptor::CONST_QUALIFIER: {
const auto& const_qualifier = type.as<StabsConstQualifierType>();
auto node = stabs_type_to_ast(
*const_qualifier.type.get(),
enclosing_struct,
state,
depth + 1,
substitute_type_name,
force_substitute);
result = std::move(*node);
result->is_const = true;
break;
}
case StabsTypeDescriptor::RANGE: {
auto builtin = std::make_unique<ast::BuiltIn>();
Result<ast::BuiltInClass> bclass = classify_range(type.as<StabsRangeType>());
CCC_RETURN_IF_ERROR(bclass);
builtin->bclass = *bclass;
result = std::move(builtin);
break;
}
case StabsTypeDescriptor::STRUCT:
case StabsTypeDescriptor::UNION: {
const StabsStructOrUnionType* stabs_struct_or_union;
if(type.descriptor == StabsTypeDescriptor::STRUCT) {
stabs_struct_or_union = &type.as<StabsStructType>();
} else {
stabs_struct_or_union = &type.as<StabsUnionType>();
}
auto struct_or_union = std::make_unique<ast::StructOrUnion>();
struct_or_union->is_struct = type.descriptor == StabsTypeDescriptor::STRUCT;
struct_or_union->size_bits = (s32) stabs_struct_or_union->size * 8;
for(const StabsStructOrUnionType::BaseClass& stabs_base_class : stabs_struct_or_union->base_classes) {
auto base_class = stabs_type_to_ast(
*stabs_base_class.type,
&type,
state,
depth + 1,
true,
force_substitute);
CCC_RETURN_IF_ERROR(base_class);
(*base_class)->offset_bytes = stabs_base_class.offset;
(*base_class)->set_access_specifier(stabs_field_visibility_to_access_specifier(stabs_base_class.visibility), state.importer_flags);
if(stabs_base_class.is_virtual) {
(*base_class)->is_virtual_base_class = true;
}
struct_or_union->base_classes.emplace_back(std::move(*base_class));
}
AST_DEBUG_PRINTF("%-*s beginfields\n", depth * 4, "");
for(const StabsStructOrUnionType::Field& field : stabs_struct_or_union->fields) {
auto node = field_to_ast(field, type, state, depth);
CCC_RETURN_IF_ERROR(node);
struct_or_union->fields.emplace_back(std::move(*node));
}
AST_DEBUG_PRINTF("%-*s endfields\n", depth * 4, "");
AST_DEBUG_PRINTF("%-*s beginmemberfuncs\n", depth * 4, "");
Result<std::vector<std::unique_ptr<ast::Node>>> member_functions =
member_functions_to_ast(*stabs_struct_or_union, state, depth);
CCC_RETURN_IF_ERROR(member_functions);
struct_or_union->member_functions = std::move(*member_functions);
AST_DEBUG_PRINTF("%-*s endmemberfuncs\n", depth * 4, "");
result = std::move(struct_or_union);
break;
}
case StabsTypeDescriptor::CROSS_REFERENCE: {
const auto& cross_reference = type.as<StabsCrossReferenceType>();
auto type_name = std::make_unique<ast::TypeName>();
type_name->source = ast::TypeNameSource::CROSS_REFERENCE;
type_name->unresolved_stabs = std::make_unique<ast::TypeName::UnresolvedStabs>();
type_name->unresolved_stabs->type_name = cross_reference.identifier;
type_name->unresolved_stabs->type = cross_reference.type;
result = std::move(type_name);
break;
}
case ccc::StabsTypeDescriptor::FLOATING_POINT_BUILTIN: {
const auto& fp_builtin = type.as<StabsFloatingPointBuiltInType>();
auto builtin = std::make_unique<ast::BuiltIn>();
switch(fp_builtin.bytes) {
case 1: builtin->bclass = ast::BuiltInClass::UNSIGNED_8; break;
case 2: builtin->bclass = ast::BuiltInClass::UNSIGNED_16; break;
case 4: builtin->bclass = ast::BuiltInClass::UNSIGNED_32; break;
case 8: builtin->bclass = ast::BuiltInClass::UNSIGNED_64; break;
case 16: builtin->bclass = ast::BuiltInClass::UNSIGNED_128; break;
default: builtin->bclass = ast::BuiltInClass::UNSIGNED_8; break;
}
result = std::move(builtin);
break;
}
case StabsTypeDescriptor::METHOD: {
const auto& stabs_method = type.as<StabsMethodType>();
auto function = std::make_unique<ast::Function>();
auto return_node = stabs_type_to_ast(
*stabs_method.return_type.get(),
enclosing_struct,
state,
depth + 1,
true,
true);
CCC_RETURN_IF_ERROR(return_node);
function->return_type = std::move(*return_node);
function->parameters.emplace();
for(const std::unique_ptr<StabsType>& parameter_type : stabs_method.parameter_types) {
auto parameter_node = stabs_type_to_ast(
*parameter_type,
enclosing_struct,
state,
depth + 1,
true,
true);
CCC_RETURN_IF_ERROR(parameter_node);
function->parameters->emplace_back(std::move(*parameter_node));
}
result = std::move(function);
break;
}
case StabsTypeDescriptor::POINTER: {
auto pointer = std::make_unique<ast::PointerOrReference>();
pointer->is_pointer = true;
auto value_node = stabs_type_to_ast(
*type.as<StabsPointerType>().value_type,
enclosing_struct,
state,
depth + 1,
true,
force_substitute);
CCC_RETURN_IF_ERROR(value_node);
pointer->value_type = std::move(*value_node);
result = std::move(pointer);
break;
}
case StabsTypeDescriptor::REFERENCE: {
auto reference = std::make_unique<ast::PointerOrReference>();
reference->is_pointer = false;
auto value_node = stabs_type_to_ast(
*type.as<StabsReferenceType>().value_type,
enclosing_struct,
state,
depth + 1,
true,
force_substitute);
CCC_RETURN_IF_ERROR(value_node);
reference->value_type = std::move(*value_node);
result = std::move(reference);
break;
}
case StabsTypeDescriptor::TYPE_ATTRIBUTE: {
const auto& stabs_type_attribute = type.as<StabsSizeTypeAttributeType>();
auto node = stabs_type_to_ast(
*stabs_type_attribute.type,
enclosing_struct,
state,
depth + 1,
substitute_type_name,
force_substitute);
CCC_RETURN_IF_ERROR(node);
result = std::move(*node);
result->size_bits = (s32) stabs_type_attribute.size_bits;
break;
}
case StabsTypeDescriptor::POINTER_TO_DATA_MEMBER: {
const auto& stabs_member_pointer = type.as<StabsPointerToDataMemberType>();
auto member_pointer = std::make_unique<ast::PointerToDataMember>();
auto class_node = stabs_type_to_ast(
*stabs_member_pointer.class_type.get(),
enclosing_struct,
state,
depth + 1,
true,
true);
CCC_RETURN_IF_ERROR(class_node);
member_pointer->class_type = std::move(*class_node);
auto member_node = stabs_type_to_ast(
*stabs_member_pointer.member_type.get(),
enclosing_struct,
state,
depth + 1,
true,
true);
CCC_RETURN_IF_ERROR(member_node);
member_pointer->member_type = std::move(*member_node);
result = std::move(member_pointer);
break;
}
case StabsTypeDescriptor::BUILTIN: {
CCC_CHECK(type.as<StabsBuiltInType>().type_id == 16,
"Unknown built-in type!");
auto builtin = std::make_unique<ast::BuiltIn>();
builtin->bclass = ast::BuiltInClass::BOOL_8;
result = std::move(builtin);
break;
}
}
CCC_CHECK(result, "Result of stabs_type_to_ast call is nullptr.");
return result;
}
static bool is_void_like(const StabsType& type)
{
// Unfortunately, a common case seems to be that various types (most
// commonly __builtin_va_list) are indistinguishable from void or void*, so
// we have to output them as a void built-in.
if(type.descriptor.has_value()) {
switch(*type.descriptor) {
case StabsTypeDescriptor::POINTER: {
return is_void_like(*type.as<StabsPointerType>().value_type.get());
}
case StabsTypeDescriptor::TYPE_REFERENCE: {
return type.as<StabsTypeReferenceType>().type->type_number == type.type_number;
}
default: {
break;
}
}
}
return false;
}
static Result<ast::BuiltInClass> classify_range(const StabsRangeType& type)
{
const char* low = type.low.c_str();
const char* high = type.high.c_str();
// Handle some special cases and values that are too large to easily store
// in a 64-bit integer.
static const struct { const char* low; const char* high; ast::BuiltInClass classification; } strings[] = {
{"4", "0", ast::BuiltInClass::FLOAT_32},
{"000000000000000000000000", "001777777777777777777777", ast::BuiltInClass::UNSIGNED_64},
{"00000000000000000000000000000000000000000000", "00000000000000000000001777777777777777777777", ast::BuiltInClass::UNSIGNED_64},
{"0000000000000", "01777777777777777777777", ast::BuiltInClass::UNSIGNED_64}, // IOP
{"0", "18446744073709551615", ast::BuiltInClass::UNSIGNED_64},
{"001000000000000000000000", "000777777777777777777777", ast::BuiltInClass::SIGNED_64},
{"00000000000000000000001000000000000000000000", "00000000000000000000000777777777777777777777", ast::BuiltInClass::SIGNED_64},
{"01000000000000000000000", "0777777777777777777777", ast::BuiltInClass::SIGNED_64}, // IOP
{"-9223372036854775808", "9223372036854775807", ast::BuiltInClass::SIGNED_64},
{"8", "0", ast::BuiltInClass::FLOAT_64},
{"00000000000000000000000000000000000000000000", "03777777777777777777777777777777777777777777", ast::BuiltInClass::UNSIGNED_128},
{"02000000000000000000000000000000000000000000", "01777777777777777777777777777777777777777777", ast::BuiltInClass::SIGNED_128},
{"000000000000000000000000", "0377777777777777777777777777777777", ast::BuiltInClass::UNQUALIFIED_128},
{"16", "0", ast::BuiltInClass::FLOAT_128},
{"0", "-1", ast::BuiltInClass::UNQUALIFIED_128} // Old homebrew toolchain
};
for(const auto& range : strings) {
if(strcmp(range.low, low) == 0 && strcmp(range.high, high) == 0) {
return range.classification;
}
}
// For smaller values we actually parse the bounds as integers.
char* end = nullptr;
s64 low_value = strtoll(type.low.c_str(), &end, low[0] == '0' ? 8 : 10);
CCC_CHECK(end != low, "Failed to parse low part of range as integer.");
s64 high_value = strtoll(type.high.c_str(), &end, high[0] == '0' ? 8 : 10);
CCC_CHECK(end != high, "Failed to parse high part of range as integer.");
static const struct { s64 low; s64 high; ast::BuiltInClass classification; } integers[] = {
{0, 255, ast::BuiltInClass::UNSIGNED_8},
{-128, 127, ast::BuiltInClass::SIGNED_8},
{0, 127, ast::BuiltInClass::UNQUALIFIED_8},
{0, 65535, ast::BuiltInClass::UNSIGNED_16},
{-32768, 32767, ast::BuiltInClass::SIGNED_16},
{0, 4294967295, ast::BuiltInClass::UNSIGNED_32},
{-2147483648, 2147483647, ast::BuiltInClass::SIGNED_32},
};
for(const auto& range : integers) {
if((range.low == low_value || range.low == -low_value) && range.high == high_value) {
return range.classification;
}
}
return CCC_FAILURE("Failed to classify range.");
}
static Result<std::unique_ptr<ast::Node>> field_to_ast(
const StabsStructOrUnionType::Field& field,
const StabsType& enclosing_struct,
const StabsToAstState& state,
s32 depth)
{
AST_DEBUG_PRINTF("%-*s field %s\n", depth * 4, "", field.name.c_str());
Result<bool> is_bitfield = detect_bitfield(field, state);
CCC_RETURN_IF_ERROR(is_bitfield);
if(*is_bitfield) {
// Process bitfields.
auto bitfield_node = stabs_type_to_ast(
*field.type,
&enclosing_struct,
state,
depth + 1,
true,
false);
CCC_RETURN_IF_ERROR(bitfield_node);
std::unique_ptr<ast::BitField> bitfield = std::make_unique<ast::BitField>();
bitfield->name = (field.name == " ") ? "" : field.name;
bitfield->offset_bytes = field.offset_bits / 8;
bitfield->size_bits = field.size_bits;
bitfield->underlying_type = std::move(*bitfield_node);
bitfield->bitfield_offset_bits = field.offset_bits % 8;
bitfield->set_access_specifier(stabs_field_visibility_to_access_specifier(field.visibility), state.importer_flags);
return std::unique_ptr<ast::Node>(std::move(bitfield));
} else {
// Process a normal field.
Result<std::unique_ptr<ast::Node>> node = stabs_type_to_ast(
*field.type,
&enclosing_struct,
state,
depth + 1,
true,
false);
CCC_RETURN_IF_ERROR(node);
(*node)->name = field.name;
(*node)->offset_bytes = field.offset_bits / 8;
(*node)->size_bits = field.size_bits;
(*node)->set_access_specifier(stabs_field_visibility_to_access_specifier(field.visibility), state.importer_flags);
if(field.name.starts_with("$vf") || field.name.starts_with("_vptr$") || field.name.starts_with("_vptr.")) {
(*node)->is_vtable_pointer = true;
}
if(field.is_static) {
(*node)->storage_class = STORAGE_CLASS_STATIC;
}
return node;
}
}
static Result<bool> detect_bitfield(const StabsStructOrUnionType::Field& field, const StabsToAstState& state)
{
// Static fields can't be bitfields.
if(field.is_static) {
return false;
}
// Resolve type references.
const StabsType* type = field.type.get();
for(s32 i = 0; i < 50; i++) {
if(!type->descriptor.has_value()) {
if(!type->type_number.valid()) {
return false;
}
auto next_type = state.stabs_types->find(type->type_number);
if(next_type == state.stabs_types->end() || next_type->second == type) {
return false;
}
type = next_type->second;
} else if(type->descriptor == StabsTypeDescriptor::TYPE_REFERENCE) {
type = type->as<StabsTypeReferenceType>().type.get();
} else if(type->descriptor == StabsTypeDescriptor::CONST_QUALIFIER) {
type = type->as<StabsConstQualifierType>().type.get();
} else if(type->descriptor == StabsTypeDescriptor::VOLATILE_QUALIFIER) {
type = type->as<StabsVolatileQualifierType>().type.get();
} else {
break;
}
// Prevent an infinite loop if there's a cycle (fatal frame).
if(i == 49) {
return false;
}
}
// Determine the size of the underlying type.
s32 underlying_type_size_bits = 0;
switch(*type->descriptor) {
case ccc::StabsTypeDescriptor::RANGE: {
Result<ast::BuiltInClass> bclass = classify_range(type->as<StabsRangeType>());
CCC_RETURN_IF_ERROR(bclass);
underlying_type_size_bits = builtin_class_size(*bclass) * 8;
break;
}
case ccc::StabsTypeDescriptor::CROSS_REFERENCE: {
if(type->as<StabsCrossReferenceType>().type == ast::ForwardDeclaredType::ENUM) {
underlying_type_size_bits = 32;
} else {
return false;
}
break;
}
case ccc::StabsTypeDescriptor::TYPE_ATTRIBUTE: {
underlying_type_size_bits = (s32) type->as<StabsSizeTypeAttributeType>().size_bits;
break;
}
case ccc::StabsTypeDescriptor::BUILTIN: {
underlying_type_size_bits = 8; // bool
break;
}
default: {
return false;
}
}
if(underlying_type_size_bits == 0) {
return false;
}
return field.size_bits != underlying_type_size_bits;
}
static Result<std::vector<std::unique_ptr<ast::Node>>> member_functions_to_ast(
const StabsStructOrUnionType& type, const StabsToAstState& state, s32 depth)
{
if(state.importer_flags & NO_MEMBER_FUNCTIONS) {
return std::vector<std::unique_ptr<ast::Node>>();
}
std::string_view type_name_no_template_args;
if(type.name.has_value()) {
type_name_no_template_args =
std::string_view(*type.name).substr(0, type.name->find("<"));
}
std::vector<std::unique_ptr<ast::Node>> member_functions;
bool only_special_functions = true;
for(const StabsStructOrUnionType::MemberFunctionSet& function_set : type.member_functions) {
MemberFunctionInfo info = check_member_function(
function_set.name, type_name_no_template_args, state.demangler, state.importer_flags);
if(!info.is_special_member_function) {
only_special_functions = false;
}
for(const StabsStructOrUnionType::MemberFunction& stabs_func : function_set.overloads) {
auto node = stabs_type_to_ast(
*stabs_func.type,
&type,
state,
depth + 1,
true,
true);
CCC_RETURN_IF_ERROR(node);
(*node)->is_constructor_or_destructor = info.is_constructor_or_destructor;
(*node)->is_special_member_function = info.is_special_member_function;
(*node)->is_operator_member_function = info.is_operator_member_function;
(*node)->name = info.name;
(*node)->set_access_specifier(stabs_field_visibility_to_access_specifier(stabs_func.visibility), state.importer_flags);
if((*node)->descriptor == ast::FUNCTION) {
ast::Function& function = (*node)->as<ast::Function>();
function.modifier = stabs_func.modifier;
function.vtable_index = stabs_func.vtable_index;
}
member_functions.emplace_back(std::move(*node));
}
}
if(only_special_functions && (state.importer_flags & INCLUDE_GENERATED_MEMBER_FUNCTIONS) == 0) {
return std::vector<std::unique_ptr<ast::Node>>();
}
return member_functions;
}
static MemberFunctionInfo check_member_function(
const std::string& mangled_name,
std::string_view type_name_no_template_args,
const DemanglerFunctions& demangler,
u32 importer_flags)
{
MemberFunctionInfo info;
// Some compiler versions output gcc opnames for overloaded operators
// instead of their proper names.
if((importer_flags & DONT_DEMANGLE_NAMES) == 0 && demangler.cplus_demangle_opname) {
char* demangled_name = demangler.cplus_demangle_opname(mangled_name.c_str(), 0);
if(demangled_name) {
info.name = demangled_name;
free(reinterpret_cast<void*>(demangled_name));
}
}
if(info.name.empty()) {
info.name = mangled_name;
}
bool is_constructor =
info.name == "__ct" || // Takes a parameter to decide whether or not to construct virtual base classes.
info.name == "__comp_ctor" || // Constructs virtual base classes.
info.name == "__base_ctor"; // Does not construct virtual base classes.
if(!is_constructor && !type_name_no_template_args.empty()) {
is_constructor |= info.name == type_name_no_template_args; // Named constructor.
}
bool is_destructor =
info.name == "__dt" || // Takes parameters to decide whether or not to construct virtual base classes and/or delete the object.
info.name == "__comp_dtor" || // Destructs virtual base classes.
info.name == "__base_dtor" || // Does not construct virtual base classes.
info.name == "__deleting_dtor"; // Destructs virtual base clases then deletes the entire object.
if(!is_destructor && !info.name.empty()) {
is_destructor |= info.name[0] == '~' && std::string_view(info.name).substr(1) == type_name_no_template_args; // Named destructor.
}
info.is_constructor_or_destructor = is_constructor || is_destructor || info.name.starts_with("$_");
info.is_special_member_function = info.is_constructor_or_destructor || info.name == "operator=";
return info;
}
void fix_recursively_emitted_structures(
ast::StructOrUnion& outer_struct, const std::string& name, StabsTypeNumber type_number, SourceFileHandle file_handle)
{
// This is a rather peculiar case. For some compiler versions, when a struct
// or a union defined using a typedef is being emitted and it needs to
// reference itself from a member function parameter, it will emit its
// entire definition again in the middle of the first definition, although
// thankfully it won't recurse more than once.
//
// The game Sega Soccer Slam is affected by this. See the PeculiarParameter
// test case in mdebug_importer_tests.cpp for a bare bones example.
for(std::unique_ptr<ast::Node>& node : outer_struct.member_functions) {
if(node->descriptor != ast::FUNCTION) {
continue;
}
ast::Function& function = node->as<ast::Function>();
if(!function.parameters.has_value()) {
continue;
}
for(std::unique_ptr<ast::Node>& parameter : *function.parameters) {
if(parameter->descriptor != ast::POINTER_OR_REFERENCE) {
continue;
}
ast::PointerOrReference& pointer_or_reference = parameter->as<ast::PointerOrReference>();
if(pointer_or_reference.value_type->descriptor != ast::STRUCT_OR_UNION) {
continue;
}
ast::StructOrUnion& inner_struct = pointer_or_reference.value_type->as<ast::StructOrUnion>();
// Since C++ doesn't allow struct definitions in function parameter
// lists normally, and most of the time the member function
// parameters aren't even filled in by GCC, this is a really rare
// case, so here we only bother to do some very basic checks to
// verify that the inner struct is similar to the outer struct.
if(inner_struct.base_classes.size() != outer_struct.base_classes.size()) {
continue;
}
if(inner_struct.fields.size() != outer_struct.fields.size()) {
continue;
}
if(inner_struct.member_functions.size() != outer_struct.member_functions.size()) {
continue;
}
auto type_name = std::make_unique<ast::TypeName>();
type_name->source = ast::TypeNameSource::REFERENCE;
type_name->unresolved_stabs = std::make_unique<ast::TypeName::UnresolvedStabs>();
type_name->unresolved_stabs->type_name = name;
type_name->unresolved_stabs->referenced_file_handle = file_handle;
type_name->unresolved_stabs->stabs_type_number = type_number;
pointer_or_reference.value_type = std::move(type_name);
}
}
}
ast::AccessSpecifier stabs_field_visibility_to_access_specifier(StabsStructOrUnionType::Visibility visibility)
{
ast::AccessSpecifier access_specifier = ast::AS_PUBLIC;
switch(visibility) {
case StabsStructOrUnionType::Visibility::NONE: access_specifier = ast::AS_PUBLIC; break;
case StabsStructOrUnionType::Visibility::PUBLIC: access_specifier = ast::AS_PUBLIC; break;
case StabsStructOrUnionType::Visibility::PROTECTED: access_specifier = ast::AS_PROTECTED; break;
case StabsStructOrUnionType::Visibility::PRIVATE: access_specifier = ast::AS_PRIVATE; break;
case StabsStructOrUnionType::Visibility::PUBLIC_OPTIMIZED_OUT: access_specifier = ast::AS_PUBLIC; break;
}
return access_specifier;
}
}

29
3rdparty/ccc/src/ccc/stabs_to_ast.h vendored Normal file
View File

@ -0,0 +1,29 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#pragma once
#include "ast.h"
#include "stabs.h"
namespace ccc {
struct StabsToAstState {
u32 file_handle;
std::map<StabsTypeNumber, const StabsType*>* stabs_types;
u32 importer_flags;
DemanglerFunctions demangler;
};
Result<std::unique_ptr<ast::Node>> stabs_type_to_ast(
const StabsType& type,
const StabsType* enclosing_struct,
const StabsToAstState& state,
s32 depth,
bool substitute_type_name,
bool force_substitute);
void fix_recursively_emitted_structures(
ast::StructOrUnion& outer_struct, const std::string& name, StabsTypeNumber type_number, SourceFileHandle file_handle);
ast::AccessSpecifier stabs_field_visibility_to_access_specifier(StabsStructOrUnionType::Visibility visibility);
}

1204
3rdparty/ccc/src/ccc/symbol_database.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

721
3rdparty/ccc/src/ccc/symbol_database.h vendored Normal file
View File

@ -0,0 +1,721 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#pragma once
#include <map>
#include <atomic>
#include <variant>
#include "util.h"
namespace ccc {
// An X macro for all the symbol types.
#define CCC_FOR_EACH_SYMBOL_TYPE_DO_X \
CCC_X(DataType, data_types) \
CCC_X(Function, functions) \
CCC_X(GlobalVariable, global_variables) \
CCC_X(Label, labels) \
CCC_X(LocalVariable, local_variables) \
CCC_X(Module, modules) \
CCC_X(ParameterVariable, parameter_variables) \
CCC_X(Section, sections) \
CCC_X(SourceFile, source_files) \
CCC_X(SymbolSource, symbol_sources)
// An enum for all the symbol types.
enum SymbolDescriptor {
DATA_TYPE = 1 << 0,
FUNCTION = 1 << 1,
GLOBAL_VARIABLE = 1 << 2,
LABEL = 1 << 3,
LOCAL_VARIABLE = 1 << 4,
MODULE = 1 << 5,
PARAMETER_VARIABLE = 1 << 6,
SECTION = 1 << 7,
SOURCE_FILE = 1 << 8,
SYMBOL_SOURCE = 1 << 9
};
enum {
ALL_SYMBOL_TYPES = 0xffff
};
// Forward declare all the different types of symbol objects.
#define CCC_X(SymbolType, symbol_list) class SymbolType;
CCC_FOR_EACH_SYMBOL_TYPE_DO_X
#undef CCC_X
class SymbolDatabase;
// Strongly typed handles for all of the symbol objects. These are here to solve
// the problem of dangling references to symbols.
template <typename SymbolType>
struct SymbolHandle {
u32 value = (u32) -1;
SymbolHandle() {}
SymbolHandle(u32 v) : value(v) {}
SymbolHandle(const SymbolType* symbol)
: value(symbol ? symbol->handle().value : (u32) -1) {}
// Check if this symbol handle has been initialised. Note that this doesn't
// determine whether or not the symbol it points to has been deleted!
bool valid() const { return value != (u32) -1; }
friend auto operator<=>(const SymbolHandle& lhs, const SymbolHandle& rhs) = default;
};
#define CCC_X(SymbolType, symbol_list) using SymbolType##Handle = SymbolHandle<SymbolType>;
CCC_FOR_EACH_SYMBOL_TYPE_DO_X
#undef CCC_X
enum SymbolFlag {
NO_SYMBOL_FLAGS = 0,
WITH_ADDRESS_MAP = 1 << 0,
WITH_NAME_MAP = 1 << 1,
NAME_NEEDS_DEMANGLING = 1 << 2
};
// A container class for symbols of a given type that maintains maps of their
// names and addresses depending on the value of SymbolType::FLAGS.
template <typename SymbolType>
class SymbolList {
public:
// Lookup symbols from their handles using binary search.
SymbolType* symbol_from_handle(SymbolHandle<SymbolType> handle);
const SymbolType* symbol_from_handle(SymbolHandle<SymbolType> handle) const;
// Lookup multiple symbols from their handles using binary search.
std::vector<SymbolType*> symbols_from_handles(const std::vector<SymbolHandle<SymbolType>>& handles);
std::vector<const SymbolType*> symbols_from_handles(const std::vector<SymbolHandle<SymbolType>>& handles) const;
std::vector<SymbolType*> optional_symbols_from_handles(const std::optional<std::vector<SymbolHandle<SymbolType>>>& handles);
std::vector<const SymbolType*> optional_symbols_from_handles(const std::optional<std::vector<SymbolHandle<SymbolType>>>& handles) const;
using Iterator = typename std::vector<SymbolType>::iterator;
using ConstIterator = typename std::vector<SymbolType>::const_iterator;
// For iterating over all the symbols.
Iterator begin();
ConstIterator begin() const;
Iterator end();
ConstIterator end() const;
using AddressToHandleMap = std::multimap<u32, SymbolHandle<SymbolType>>;
using NameToHandleMap = std::multimap<std::string, SymbolHandle<SymbolType>>;
template <typename Iterator>
class Iterators {
public:
Iterators(Iterator b, Iterator e)
: m_begin(b), m_end(e) {}
Iterator begin() const { return m_begin; }
Iterator end() const { return m_end; }
protected:
Iterator m_begin;
Iterator m_end;
};
using AddressToHandleMapIterators = Iterators<typename AddressToHandleMap::const_iterator>;
using NameToHandleMapIterators = Iterators<typename NameToHandleMap::const_iterator>;
// Lookup symbols by their address.
AddressToHandleMapIterators handles_from_starting_address(Address address) const;
AddressToHandleMapIterators handles_from_address_range(AddressRange range) const;
SymbolHandle<SymbolType> first_handle_from_starting_address(Address address) const;
SymbolHandle<SymbolType> first_handle_after_address(Address address) const;
// Lookup symbols by their name.
NameToHandleMapIterators handles_from_name(const std::string& name) const;
SymbolHandle<SymbolType> first_handle_from_name(const std::string& name) const;
// Find a symbol with an address range that contains the provided address.
// For example, to find which function an instruction belongs to.
SymbolType* symbol_overlapping_address(Address address);
const SymbolType* symbol_overlapping_address(Address address) const;
// Convert handles to underlying array indices.
s32 index_from_handle(SymbolHandle<SymbolType> handle) const;
// Index into the underlying array.
SymbolType& symbol_from_index(s32 index);
const SymbolType& symbol_from_index(s32 index) const;
// Determine if any symbols are being stored.
bool empty() const;
// Retrieve the number of symbols stored.
s32 size() const;
// Create a new symbol. If it's a SymbolSource symbol, source can be left
// empty, otherwise it has to be valid.
Result<SymbolType*> create_symbol(
std::string name, Address address, SymbolSourceHandle source, const Module* module_symbol = nullptr);
// Create a new symbol. Similar to above, but for symbols without addresses.
Result<SymbolType*> create_symbol(
std::string name, SymbolSourceHandle source, const Module* module_symbol = nullptr);
// Create a new symbol. Similar to above, but unless DONT_DEMANGLE_NAMES is
// set, the name of the symbol will be demangled.
Result<SymbolType*> create_symbol(
std::string name,
SymbolSourceHandle source,
const Module* module_symbol,
Address address,
u32 importer_flags,
DemanglerFunctions demangler);
// Update the address of a symbol without changing its handle.
bool move_symbol(SymbolHandle<SymbolType> handle, Address new_address);
// Update the name of a symbol without changing its handle.
bool rename_symbol(SymbolHandle<SymbolType> handle, std::string new_name);
// Move all the symbols from the passed list into this list.
void merge_from(SymbolList<SymbolType>& list);
// Mark a symbol for destruction. If the correct symbol database pointer is
// passed, all descendants will also be marked. For example, marking a
// function will also mark its parameters and local variables.
bool mark_symbol_for_destruction(SymbolHandle<SymbolType> handle, SymbolDatabase* database);
// Mark all the symbols from a given symbol source for destruction. For
// example you can use this to free a symbol table without destroying
// user-defined symbols. The behaviour for marking descendants is the same
// as destroy_symbol.
void mark_symbols_from_source_for_destruction(SymbolSourceHandle source, SymbolDatabase* database);
// Mark all the symbols from a given module for destruction. The behaviour
// for marking descendants is the same as destroy_symbol.
void mark_symbols_from_module_for_destruction(ModuleHandle module_handle, SymbolDatabase* database);
// Destroy all symbols that have previously been marked for destruction.
// This invalidates all pointers to symbols in this list.
void destroy_marked_symbols();
// Destroy all symbols, but don't reset m_next_handle so we don't have to
// worry about dangling handles.
void clear();
protected:
// Do a binary search for a handle, and return either its index, or the
// index where it could be inserted.
size_t binary_search(SymbolHandle<SymbolType> handle) const;
// Keep the address map in sync with the symbol list.
void link_address_map(SymbolType& symbol);
void unlink_address_map(SymbolType& symbol);
// Keep the name map in sync with the symbol list.
void link_name_map(SymbolType& symbol);
void unlink_name_map(SymbolType& symbol);
std::vector<SymbolType> m_symbols;
AddressToHandleMap m_address_to_handle;
NameToHandleMap m_name_to_handle;
// We share this between symbol lists of the same type so that we can merge
// them without having to rewrite all the handles.
static std::atomic<u32> m_next_handle;
};
// Base class for all the symbols.
class Symbol {
template <typename SymbolType>
friend class SymbolList;
public:
const std::string& name() const { return m_name; }
u32 raw_handle() const { return m_handle; }
SymbolSourceHandle source() const { return m_source; }
ModuleHandle module_handle() const { return m_module; }
Address address() const { return m_address; }
u32 size() const { return m_size; }
void set_size(u32 size) { m_size = size; }
AddressRange address_range() const { return AddressRange(m_address, m_address.get_or_zero() + m_size); }
ast::Node* type() { return m_type.get(); }
const ast::Node* type() const { return m_type.get(); }
void set_type(std::unique_ptr<ast::Node> type);
u32 generation() const { return m_generation; }
// This MUST be called after any AST nodes have been created/deleted/moved.
// For the set_type function this is done for you.
void invalidate_node_handles() { m_generation++; }
// Mark a single symbol for destruction, not including its descendants.
void mark_for_destruction() { m_marked_for_destruction = true; }
bool is_marked_for_destruction() { return m_marked_for_destruction; }
protected:
void on_create() {}
void on_destroy(SymbolDatabase* database) {}
u32 m_handle = (u32) -1;
SymbolSourceHandle m_source;
Address m_address;
u32 m_size = 0;
std::string m_name;
std::unique_ptr<ast::Node> m_type;
u32 m_generation : 31 = 0;
u32 m_marked_for_destruction : 1 = false;
ModuleHandle m_module;
};
// Variable storage types. This is different to whether the variable is a
// global, local or parameter. For example local variables can have global
// storage (static locals).
enum GlobalStorageLocation {
NIL,
DATA,
BSS,
ABS,
SDATA,
SBSS,
RDATA,
COMMON,
SCOMMON,
SUNDEFINED
};
const char* global_storage_location_to_string(GlobalStorageLocation location);
struct GlobalStorage {
GlobalStorageLocation location = GlobalStorageLocation::NIL;
GlobalStorage() {}
friend auto operator<=>(const GlobalStorage& lhs, const GlobalStorage& rhs) = default;
};
struct RegisterStorage {
s32 dbx_register_number = -1;
bool is_by_reference;
RegisterStorage() {}
friend auto operator<=>(const RegisterStorage& lhs, const RegisterStorage& rhs) = default;
};
struct StackStorage {
s32 stack_pointer_offset = -1;
StackStorage() {}
friend auto operator<=>(const StackStorage& lhs, const StackStorage& rhs) = default;
};
// The hashing algorithm for functions. If you change this algorithm make sure
// to bump the version number for the JSON format so we can know if a hash was
// generated using the new algorithm or not.
class FunctionHash {
public:
void update(u32 instruction)
{
// Separate out the opcode so that the hash remains the same regardless
// of if relocations are applied or not.
u32 opcode = instruction >> 26;
m_hash = m_hash * 31 + opcode;
}
u32 get() const
{
return m_hash;
}
protected:
u32 m_hash = 0;
};
// All the different types of symbol objects.
// A C/C++ data type.
class DataType : public Symbol {
friend SourceFile;
public:
static constexpr const SymbolDescriptor DESCRIPTOR = DATA_TYPE;
static constexpr const char* NAME = "Data Type";
static constexpr const u32 FLAGS = WITH_NAME_MAP;
DataTypeHandle handle() const { return m_handle; }
std::vector<SourceFileHandle> files; // List of files for which a given top-level type is present.
const char* compare_fail_reason = nullptr;
bool not_defined_in_any_translation_unit : 1 = false;
bool only_defined_in_single_translation_unit : 1 = false;
};
// A function. The type stored is the return type.
class Function : public Symbol {
friend SourceFile;
friend SymbolList<Function>;
public:
static constexpr const SymbolDescriptor DESCRIPTOR = FUNCTION;
static constexpr const char* NAME = "Function";
static constexpr const u32 FLAGS = WITH_ADDRESS_MAP | WITH_NAME_MAP | NAME_NEEDS_DEMANGLING;
FunctionHandle handle() const { return m_handle; }
SourceFileHandle source_file() const { return m_source_file; }
const std::optional<std::vector<ParameterVariableHandle>>& parameter_variables() const;
void set_parameter_variables(std::optional<std::vector<ParameterVariableHandle>> parameter_variables, SymbolDatabase& database);
const std::optional<std::vector<LocalVariableHandle>>& local_variables() const;
void set_local_variables(std::optional<std::vector<LocalVariableHandle>> local_variables, SymbolDatabase& database);
const std::string& mangled_name() const;
void set_mangled_name(std::string mangled);
// A hash of all the opcodes in the function, read from file.
u32 original_hash() const;
void set_original_hash(u32 hash);
// A hash of all the opcodes in the function, read from memory.
u32 current_hash() const;
void set_current_hash(FunctionHash hash);
struct LineNumberPair {
Address address;
s32 line_number;
};
struct SubSourceFile {
Address address;
std::string relative_path;
};
std::string relative_path;
StorageClass storage_class;
s32 stack_frame_size = -1;
std::vector<LineNumberPair> line_numbers;
std::vector<SubSourceFile> sub_source_files;
bool is_member_function_ish = false; // Filled in by fill_in_pointers_to_member_function_definitions.
bool is_no_return = false;
protected:
void on_destroy(SymbolDatabase* database);
SourceFileHandle m_source_file;
std::optional<std::vector<ParameterVariableHandle>> m_parameter_variables;
std::optional<std::vector<LocalVariableHandle>> m_local_variables;
std::string m_mangled_name;
u32 m_original_hash = 0;
u32 m_current_hash = 0;
};
// A global variable.
class GlobalVariable : public Symbol {
friend SourceFile;
public:
static constexpr const SymbolDescriptor DESCRIPTOR = GLOBAL_VARIABLE;
static constexpr const char* NAME = "Global Variable";
static constexpr u32 FLAGS = WITH_ADDRESS_MAP | WITH_NAME_MAP | NAME_NEEDS_DEMANGLING;
GlobalVariableHandle handle() const { return m_handle; }
SourceFileHandle source_file() const { return m_source_file; };
const std::string& mangled_name() const;
void set_mangled_name(std::string mangled);
GlobalStorage storage;
StorageClass storage_class;
protected:
SourceFileHandle m_source_file;
std::string m_mangled_name;
};
// A label. This could be a label defined in assembly, C/C++, or just a symbol
// that we can't automatically determine the type of (e.g. SNDLL symbols).
class Label : public Symbol {
public:
static constexpr const SymbolDescriptor DESCRIPTOR = LABEL;
static constexpr const char* NAME = "Label";
static constexpr u32 FLAGS = WITH_ADDRESS_MAP;
LabelHandle handle() const { return m_handle; }
// Indicates that this label should not be used as a function name.
bool is_junk = false;
};
// A local variable. This includes static local variables which have global
// storage.
class LocalVariable : public Symbol {
friend Function;
public:
static constexpr const SymbolDescriptor DESCRIPTOR = LOCAL_VARIABLE;
static constexpr const char* NAME = "Local Variable";
static constexpr u32 FLAGS = WITH_ADDRESS_MAP;
LocalVariableHandle handle() const { return m_handle; }
FunctionHandle function() const { return m_function; };
std::variant<GlobalStorage, RegisterStorage, StackStorage> storage;
AddressRange live_range;
protected:
FunctionHandle m_function;
};
// A program module e.g. an ELF file or an SNDLL file. Every symbol has a module
// field indicating what module the symbol belongs to. This can be used to
// delete all the symbols associated with a given module. Additionally, when a
// valid module pointer is passed to SymbolList<>::create_symbol, the address of
// the symbol will be added to the address of the new symbol.
class Module : public Symbol {
friend SymbolList<Module>;
public:
static constexpr const SymbolDescriptor DESCRIPTOR = MODULE;
static constexpr const char* NAME = "Module";
static constexpr u32 FLAGS = WITH_NAME_MAP;
ModuleHandle handle() const { return m_handle; }
// These are used for IRX modules.
bool is_irx = false;
s32 version_major = -1;
s32 version_minor = -1;
protected:
void on_create();
};
// A parameter variable.
class ParameterVariable : public Symbol {
friend Function;
public:
static constexpr const SymbolDescriptor DESCRIPTOR = PARAMETER_VARIABLE;
static constexpr const char* NAME = "Parameter Variable";
static constexpr u32 FLAGS = NO_SYMBOL_FLAGS;
ParameterVariableHandle handle() const { return m_handle; }
FunctionHandle function() const { return m_function; };
std::variant<RegisterStorage, StackStorage> storage;
protected:
FunctionHandle m_function;
};
// An ELF section. These are created from the ELF section headers.
class Section : public Symbol {
public:
static constexpr const SymbolDescriptor DESCRIPTOR = SECTION;
static constexpr const char* NAME = "Section";
static constexpr u32 FLAGS = WITH_ADDRESS_MAP | WITH_NAME_MAP;
SectionHandle handle() const { return m_handle; }
// Check if the section name is ".text".
bool contains_code() const;
// Check for known data section names.
bool contains_data() const;
};
// A source file (.c or .cpp file). One of these will be created for every
// translation unit in the program (but only if debugging symbols are present).
class SourceFile : public Symbol {
friend SymbolList<SourceFile>;
public:
static constexpr const SymbolDescriptor DESCRIPTOR = SOURCE_FILE;
static constexpr const char* NAME = "Source File";
static constexpr u32 FLAGS = WITH_ADDRESS_MAP | WITH_NAME_MAP;
SourceFileHandle handle() const { return m_handle; }
const std::string& full_path() const { return name(); }
const std::vector<FunctionHandle>& functions() const;
void set_functions(std::vector<FunctionHandle> functions, SymbolDatabase& database);
const std::vector<GlobalVariableHandle>& global_variables() const;
void set_global_variables(std::vector<GlobalVariableHandle> global_variables, SymbolDatabase& database);
// Check whether at least half of the functions associated with the source
// file match their original hash (meaning they haven't been overwritten).
bool functions_match() const;
void check_functions_match(const SymbolDatabase& database);
std::string working_dir;
std::string command_line_path;
std::map<StabsTypeNumber, DataTypeHandle> stabs_type_number_to_handle;
std::set<std::string> toolchain_version_info;
protected:
void on_destroy(SymbolDatabase* database);
std::vector<FunctionHandle> m_functions;
std::vector<GlobalVariableHandle> m_global_variables;
bool m_functions_match = true;
};
// A symbol source. Every symbol has a symbol source field indicating how the
// symbol was created. For example, the symbol table importers will each create
// one of these (if it doesn't already exist).
class SymbolSource : public Symbol {
friend SymbolList<SymbolSource>;
public:
static constexpr const SymbolDescriptor DESCRIPTOR = SYMBOL_SOURCE;
static constexpr const char* NAME = "Symbol Source";
static constexpr u32 FLAGS = WITH_NAME_MAP;
SymbolSourceHandle handle() const { return m_handle; }
protected:
void on_create();
};
// Bundles together all the information needed to identify if a symbol came from
// a specific symbol table import operation. For example, this is used to make
// sure that we don't reference symbols from another symbol table during the
// import process.
struct SymbolGroup {
SymbolSourceHandle source;
Module* module_symbol = nullptr;
bool is_in_group(const Symbol& symbol) const;
};
// The symbol database itself. This owns all the symbols.
class SymbolDatabase {
public:
SymbolList<DataType> data_types;
SymbolList<Function> functions;
SymbolList<GlobalVariable> global_variables;
SymbolList<Label> labels;
SymbolList<LocalVariable> local_variables;
SymbolList<Module> modules;
SymbolList<ParameterVariable> parameter_variables;
SymbolList<Section> sections;
SymbolList<SourceFile> source_files;
SymbolList<SymbolSource> symbol_sources;
// Sum up the symbol counts for each symbol list.
s32 symbol_count() const;
// Find a symbol of any of the specified types given an address. Symbols of
// the types specified higher up in the CCC_FOR_EACH_SYMBOL_TYPE_DO_X macro
// are checked for first.
const Symbol* symbol_starting_at_address(
Address address, u32 descriptors = ALL_SYMBOL_TYPES, SymbolDescriptor* descriptor_out = nullptr) const;
const Symbol* symbol_after_address(
Address address, u32 descriptors = ALL_SYMBOL_TYPES, SymbolDescriptor* descriptor_out = nullptr) const;
const Symbol* symbol_overlapping_address(
Address address, u32 descriptors = ALL_SYMBOL_TYPES, SymbolDescriptor* descriptor_out = nullptr) const;
// Find a symbol of any of the specified types given its name. Symbols of
// the types specified higher up in the CCC_FOR_EACH_SYMBOL_TYPE_DO_X macro
// are checked for first.
const Symbol* symbol_with_name(
const std::string& name, u32 descriptors = ALL_SYMBOL_TYPES, SymbolDescriptor* descriptor_out = nullptr) const;
// Finds a symbol source object with the given name or creates one if it
// doesn't already exist.
Result<SymbolSourceHandle> get_symbol_source(const std::string& name);
// Deduplicate matching data types with the same name. May replace the
// existing data type with the new one if the new one is better.
Result<DataType*> create_data_type_if_unique(
std::unique_ptr<ast::Node> node,
StabsTypeNumber number,
const char* name,
SourceFile& source_file,
const SymbolGroup& group);
// Move all the symbols in the passed database into this database.
void merge_from(SymbolDatabase& database);
// Destroy all the symbols from a given symbol source. For example you can
// use this to free a symbol table without destroying user-defined symbols.
void destroy_symbols_from_source(SymbolSourceHandle source, bool destroy_descendants);
// Destroy all the symbols from a given module.
void destroy_symbols_from_module(ModuleHandle module_handle, bool destroy_descendants);
// Destroy all the symbols that have previously been marked for destruction.
// This invalidates all pointers to symbols in this database.
void destroy_marked_symbols();
// Destroy all the symbols in the symbol database.
void clear();
template <typename Callback>
void for_each_symbol(Callback callback) {
// Use indices here to avoid iterator invalidation.
#define CCC_X(SymbolType, symbol_list) \
for(s32 i = 0; i < symbol_list.size(); i++) { \
callback(symbol_list.symbol_from_index(i)); \
}
CCC_FOR_EACH_SYMBOL_TYPE_DO_X
#undef CCC_X
}
};
// A handle to a symbol of any type.
class MultiSymbolHandle {
public:
// Create an empty multi symbol handle.
MultiSymbolHandle();
// Create a multi symbol handle of the specified type.
template <typename SymbolType>
MultiSymbolHandle(const SymbolType& symbol);
MultiSymbolHandle(SymbolDescriptor descriptor, u32 handle);
bool valid() const;
SymbolDescriptor descriptor() const;
u32 handle() const;
Symbol* lookup_symbol(SymbolDatabase& database);
const Symbol* lookup_symbol(const SymbolDatabase& database) const;
bool is_flag_set(SymbolFlag flag) const;
bool move_symbol(Address new_address, SymbolDatabase& database) const;
bool rename_symbol(std::string new_name, SymbolDatabase& database) const;
bool destroy_symbol(SymbolDatabase& database, bool destroy_descendants) const;
friend auto operator<=>(const MultiSymbolHandle& lhs, const MultiSymbolHandle& rhs) = default;
protected:
SymbolDescriptor m_descriptor = DATA_TYPE;
u32 m_handle = (u32) -1;
};
// A handle to an AST node.
class NodeHandle {
friend SymbolDatabase;
public:
// Create an empty node handle.
NodeHandle();
// Create a node handle that will always allow accesses to its node. You
// should only use this if you know the lifetime of the handle is a subset
// of the lifetime of the node.
NodeHandle(const ast::Node* node);
// Create a node handle pointing to an AST node from a given symbol that
// will prevent accesses to the node if the symbol is deleted.
template <typename SymbolType>
NodeHandle(const SymbolType& symbol, const ast::Node* node);
NodeHandle(SymbolDescriptor descriptor, const Symbol& symbol, const ast::Node* node);
bool valid() const;
const MultiSymbolHandle& symbol() const;
const ast::Node* lookup_node(const SymbolDatabase& database) const;
NodeHandle handle_for_child(const ast::Node* child_node) const;
friend auto operator<=>(const NodeHandle& lhs, const NodeHandle& rhs) = default;
protected:
MultiSymbolHandle m_symbol;
const ast::Node* m_node = nullptr;
u32 m_generation = 0;
};
}

114
3rdparty/ccc/src/ccc/symbol_file.cpp vendored Normal file
View File

@ -0,0 +1,114 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#include "symbol_file.h"
namespace ccc {
Result<std::unique_ptr<SymbolFile>> parse_symbol_file(std::vector<u8> image, std::string file_name)
{
const u32* magic = get_packed<u32>(image, 0);
CCC_CHECK(magic, "File too small.");
std::unique_ptr<SymbolFile> symbol_file;
switch(*magic) {
case CCC_FOURCC("\x7f""ELF"): {
Result<ElfFile> elf = ElfFile::parse(std::move(image));
CCC_RETURN_IF_ERROR(elf);
symbol_file = std::make_unique<ElfSymbolFile>(std::move(*elf), std::move(file_name));
break;
}
case CCC_FOURCC("SNR1"):
case CCC_FOURCC("SNR2"): {
Result<SNDLLFile> sndll = parse_sndll_file(image, Address(), SNDLLType::DYNAMIC_LIBRARY);
CCC_RETURN_IF_ERROR(sndll);
symbol_file = std::make_unique<SNDLLSymbolFile>(std::make_shared<SNDLLFile>(std::move(*sndll)));
break;
}
default: {
return CCC_FAILURE("Unknown file type.");
}
}
return symbol_file;
}
ElfSymbolFile::ElfSymbolFile(ElfFile elf, std::string elf_name)
: m_elf(std::move(elf)), m_name(std::move(elf_name)) {}
std::string ElfSymbolFile::name() const
{
return m_name;
}
Result<std::vector<std::unique_ptr<SymbolTable>>> ElfSymbolFile::get_all_symbol_tables() const
{
std::vector<std::unique_ptr<SymbolTable>> symbol_tables;
symbol_tables.emplace_back(std::make_unique<ElfSectionHeadersSymbolTable>(m_elf));
for(size_t i = 0; i < SYMBOL_TABLE_FORMATS.size(); i++) {
const SymbolTableFormatInfo& info = SYMBOL_TABLE_FORMATS[i];
const ElfSection* section = m_elf.lookup_section(info.section_name);
if(section) {
Result<std::unique_ptr<SymbolTable>> symbol_table = create_elf_symbol_table(*section, m_elf, info.format);
CCC_RETURN_IF_ERROR(symbol_table);
if(*symbol_table) {
symbol_tables.emplace_back(std::move(*symbol_table));
}
}
}
return symbol_tables;
}
Result<std::vector<std::unique_ptr<SymbolTable>>> ElfSymbolFile::get_symbol_tables_from_sections(
const std::vector<SymbolTableLocation>& sections) const
{
std::vector<std::unique_ptr<SymbolTable>> symbol_tables;
for(const SymbolTableLocation& location : sections) {
const ElfSection* section = m_elf.lookup_section(location.section_name.c_str());
CCC_CHECK(section, "No '%s' section.", location.section_name.c_str());
Result<std::unique_ptr<SymbolTable>> symbol_table = create_elf_symbol_table(*section, m_elf, location.format);
CCC_RETURN_IF_ERROR(symbol_table);
if(*symbol_table) {
symbol_tables.emplace_back(std::move(*symbol_table));
}
}
return symbol_tables;
}
const ElfFile& ElfSymbolFile::elf() const
{
return m_elf;
}
SNDLLSymbolFile::SNDLLSymbolFile(std::shared_ptr<SNDLLFile> sndll)
: m_sndll(std::move(sndll)) {}
std::string SNDLLSymbolFile::name() const
{
return m_sndll->elf_path;
}
Result<std::vector<std::unique_ptr<SymbolTable>>> SNDLLSymbolFile::get_all_symbol_tables() const
{
std::vector<std::unique_ptr<SymbolTable>> symbol_tables;
symbol_tables.emplace_back(std::make_unique<SNDLLSymbolTable>(m_sndll));
return symbol_tables;
}
Result<std::vector<std::unique_ptr<SymbolTable>>> SNDLLSymbolFile::get_symbol_tables_from_sections(
const std::vector<SymbolTableLocation>& sections) const
{
return CCC_FAILURE("An SNDLL file is not composed of sections.");
}
}

62
3rdparty/ccc/src/ccc/symbol_file.h vendored Normal file
View File

@ -0,0 +1,62 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#pragma once
#include "elf.h"
#include "sndll.h"
#include "symbol_table.h"
namespace ccc {
struct SymbolTableLocation {
std::string section_name;
SymbolTableFormat format;
};
class SymbolFile {
public:
virtual ~SymbolFile() {}
virtual std::string name() const = 0;
virtual Result<std::vector<std::unique_ptr<SymbolTable>>> get_all_symbol_tables() const = 0;
virtual Result<std::vector<std::unique_ptr<SymbolTable>>> get_symbol_tables_from_sections(
const std::vector<SymbolTableLocation>& sections) const = 0;
};
// Determine the type of the input file and parse it.
Result<std::unique_ptr<SymbolFile>> parse_symbol_file(std::vector<u8> image, std::string file_name);
class ElfSymbolFile : public SymbolFile {
public:
ElfSymbolFile(ElfFile elf, std::string elf_name);
std::string name() const override;
Result<std::vector<std::unique_ptr<SymbolTable>>> get_all_symbol_tables() const override;
Result<std::vector<std::unique_ptr<SymbolTable>>> get_symbol_tables_from_sections(
const std::vector<SymbolTableLocation>& sections) const override;
const ElfFile& elf() const;
protected:
ElfFile m_elf;
std::string m_name;
};
class SNDLLSymbolFile : public SymbolFile {
public:
SNDLLSymbolFile(std::shared_ptr<SNDLLFile> sndll);
std::string name() const override;
Result<std::vector<std::unique_ptr<SymbolTable>>> get_all_symbol_tables() const override;
Result<std::vector<std::unique_ptr<SymbolTable>>> get_symbol_tables_from_sections(
const std::vector<SymbolTableLocation>& sections) const override;
protected:
std::shared_ptr<SNDLLFile> m_sndll;
};
}

283
3rdparty/ccc/src/ccc/symbol_table.cpp vendored Normal file
View File

@ -0,0 +1,283 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#include "symbol_table.h"
#include "elf.h"
#include "elf_symtab.h"
#include "mdebug_importer.h"
#include "mdebug_section.h"
#include "sndll.h"
namespace ccc {
const std::vector<SymbolTableFormatInfo> SYMBOL_TABLE_FORMATS = {
{MDEBUG, "mdebug", ".mdebug"}, // The infamous Third Eye symbol table.
{SYMTAB, "symtab", ".symtab"}, // The standard ELF symbol table.
{SNDLL, "sndll", ".sndata"} // The SNDLL symbol table.
};
const SymbolTableFormatInfo* symbol_table_format_from_enum(SymbolTableFormat format)
{
for(size_t i = 0; i < SYMBOL_TABLE_FORMATS.size(); i++) {
if(SYMBOL_TABLE_FORMATS[i].format == format) {
return &SYMBOL_TABLE_FORMATS[i];
}
}
return nullptr;
}
const SymbolTableFormatInfo* symbol_table_format_from_name(const char* format_name)
{
for(size_t i = 0; i < SYMBOL_TABLE_FORMATS.size(); i++) {
if(strcmp(SYMBOL_TABLE_FORMATS[i].format_name, format_name) == 0) {
return &SYMBOL_TABLE_FORMATS[i];
}
}
return nullptr;
}
const SymbolTableFormatInfo* symbol_table_format_from_section(const char* section_name)
{
for(size_t i = 0; i < SYMBOL_TABLE_FORMATS.size(); i++) {
if(strcmp(SYMBOL_TABLE_FORMATS[i].section_name, section_name) == 0) {
return &SYMBOL_TABLE_FORMATS[i];
}
}
return nullptr;
}
// *****************************************************************************
Result<std::unique_ptr<SymbolTable>> create_elf_symbol_table(
const ElfSection& section, const ElfFile& elf, SymbolTableFormat format)
{
std::unique_ptr<SymbolTable> symbol_table;
switch(format) {
case MDEBUG: {
symbol_table = std::make_unique<MdebugSymbolTable>(elf.image, (s32) section.header.offset);
break;
}
case SYMTAB: {
CCC_CHECK(section.header.offset + section.header.size <= elf.image.size(),
"Section '%s' out of range.", section.name.c_str());
std::span<const u8> data = std::span(elf.image).subspan(section.header.offset, section.header.size);
CCC_CHECK(section.header.link != 0, "Section '%s' has no linked string table.", section.name.c_str());
CCC_CHECK(section.header.link < elf.sections.size(),
"Section '%s' has out of range link field.", section.name.c_str());
const ElfSection& linked_section = elf.sections[section.header.link];
CCC_CHECK(linked_section.header.offset + linked_section.header.size <= elf.image.size(),
"Linked section '%s' out of range.", linked_section.name.c_str());
std::span<const u8> linked_data = std::span(elf.image).subspan(
linked_section.header.offset, linked_section.header.size);
symbol_table = std::make_unique<SymtabSymbolTable>(data, linked_data);
break;
}
case SNDLL: {
CCC_CHECK(section.header.offset + section.header.size <= elf.image.size(),
"Section '%s' out of range.", section.name.c_str());
std::span<const u8> data = std::span(elf.image).subspan(section.header.offset, section.header.size);
if(data.size() >= 4 && data[0] != '\0') {
Result<SNDLLFile> file = parse_sndll_file(data, Address::non_zero(section.header.addr), SNDLLType::SNDATA_SECTION);
CCC_RETURN_IF_ERROR(file);
symbol_table = std::make_unique<SNDLLSymbolTable>(std::make_shared<SNDLLFile>(std::move(*file)));
} else {
CCC_WARN("Invalid SNDLL section.");
}
break;
}
}
return symbol_table;
}
Result<ModuleHandle> import_symbol_tables(
SymbolDatabase& database,
std::string module_name,
const std::vector<std::unique_ptr<SymbolTable>>& symbol_tables,
u32 importer_flags,
DemanglerFunctions demangler,
const std::atomic_bool* interrupt)
{
Result<SymbolSourceHandle> module_source = database.get_symbol_source("Symbol Table Importer");
CCC_RETURN_IF_ERROR(module_source);
Result<Module*> module_symbol = database.modules.create_symbol(std::move(module_name), *module_source, nullptr);
CCC_RETURN_IF_ERROR(module_symbol);
ModuleHandle module_handle = (*module_symbol)->handle();
for(const std::unique_ptr<SymbolTable>& symbol_table : symbol_tables) {
// Find a symbol source object with the right name, or create one if one
// doesn't already exist.
Result<SymbolSourceHandle> source = database.get_symbol_source(symbol_table->name());
if(!source.success()) {
database.destroy_symbols_from_module(module_handle, false);
return source;
}
// Import the symbol table.
SymbolGroup group;
group.source = *source;
group.module_symbol = database.modules.symbol_from_handle(module_handle);
Result<void> result = symbol_table->import(
database, group, importer_flags, demangler, interrupt);
if(!result.success()) {
database.destroy_symbols_from_module(module_handle, false);
return result;
}
}
return module_handle;
}
// *****************************************************************************
MdebugSymbolTable::MdebugSymbolTable(std::span<const u8> image, s32 section_offset)
: m_image(image), m_section_offset(section_offset) {}
const char* MdebugSymbolTable::name() const
{
return "MIPS Debug Symbol Table";
}
Result<void> MdebugSymbolTable::import(
SymbolDatabase& database,
const SymbolGroup& group,
u32 importer_flags,
DemanglerFunctions demangler,
const std::atomic_bool* interrupt) const
{
return mdebug::import_symbol_table(
database, m_image, m_section_offset, group, importer_flags, demangler, interrupt);
}
Result<void> MdebugSymbolTable::print_headers(FILE* out) const
{
mdebug::SymbolTableReader reader;
Result<void> reader_result = reader.init(m_image, m_section_offset);
CCC_RETURN_IF_ERROR(reader_result);
reader.print_header(out);
return Result<void>();
}
Result<void> MdebugSymbolTable::print_symbols(FILE* out, u32 flags) const
{
mdebug::SymbolTableReader reader;
Result<void> reader_result = reader.init(m_image, m_section_offset);
CCC_RETURN_IF_ERROR(reader_result);
Result<void> print_result = reader.print_symbols(
out, flags & PRINT_LOCALS, flags & PRINT_PROCEDURE_DESCRIPTORS, flags & PRINT_EXTERNALS);
CCC_RETURN_IF_ERROR(print_result);
return Result<void>();
}
// *****************************************************************************
SymtabSymbolTable::SymtabSymbolTable(std::span<const u8> symtab, std::span<const u8> strtab)
: m_symtab(symtab), m_strtab(strtab) {}
const char* SymtabSymbolTable::name() const
{
return "ELF Symbol Table";
}
Result<void> SymtabSymbolTable::import(
SymbolDatabase& database,
const SymbolGroup& group,
u32 importer_flags,
DemanglerFunctions demangler,
const std::atomic_bool* interrupt) const
{
return elf::import_symbols(database, group, m_symtab, m_strtab, importer_flags, demangler);
}
Result<void> SymtabSymbolTable::print_headers(FILE* out) const
{
return Result<void>();
}
Result<void> SymtabSymbolTable::print_symbols(FILE* out, u32 flags) const
{
Result<void> symbtab_result = elf::print_symbol_table(out, m_symtab, m_strtab);
CCC_RETURN_IF_ERROR(symbtab_result);
return Result<void>();
}
// *****************************************************************************
SNDLLSymbolTable::SNDLLSymbolTable(std::shared_ptr<SNDLLFile> sndll)
: m_sndll(std::move(sndll)) {}
const char* SNDLLSymbolTable::name() const
{
return "SNDLL Symbol Table";
}
Result<void> SNDLLSymbolTable::import(
SymbolDatabase& database,
const SymbolGroup& group,
u32 importer_flags,
DemanglerFunctions demangler,
const std::atomic_bool* interrupt) const
{
return import_sndll_symbols(database, *m_sndll, group, importer_flags, demangler);
}
Result<void> SNDLLSymbolTable::print_headers(FILE* out) const
{
return Result<void>();
}
Result<void> SNDLLSymbolTable::print_symbols(FILE* out, u32 flags) const
{
print_sndll_symbols(out, *m_sndll);
return Result<void>();
}
// *****************************************************************************
ElfSectionHeadersSymbolTable::ElfSectionHeadersSymbolTable(const ElfFile& elf)
: m_elf(elf) {}
const char* ElfSectionHeadersSymbolTable::name() const
{
return "ELF Section Headers";
}
Result<void> ElfSectionHeadersSymbolTable::import(
SymbolDatabase& database,
const SymbolGroup& group,
u32 importer_flags,
DemanglerFunctions demangler,
const std::atomic_bool* interrupt) const
{
return m_elf.create_section_symbols(database, group);
}
Result<void> ElfSectionHeadersSymbolTable::print_headers(FILE* out) const
{
return Result<void>();
}
Result<void> ElfSectionHeadersSymbolTable::print_symbols(FILE* out, u32 flags) const
{
return Result<void>();
}
}

163
3rdparty/ccc/src/ccc/symbol_table.h vendored Normal file
View File

@ -0,0 +1,163 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#pragma once
#include <atomic>
#include "symbol_database.h"
namespace ccc {
// Determine which symbol tables are present in a given file.
enum SymbolTableFormat {
MDEBUG = 0, // The infamous Third Eye symbol table
SYMTAB = 1, // Standard ELF symbol table
SNDLL = 2 // SNDLL section
};
struct SymbolTableFormatInfo {
SymbolTableFormat format;
const char* format_name;
const char* section_name;
};
// All the supported symbol table formats, sorted from best to worst.
extern const std::vector<SymbolTableFormatInfo> SYMBOL_TABLE_FORMATS;
const SymbolTableFormatInfo* symbol_table_format_from_enum(SymbolTableFormat format);
const SymbolTableFormatInfo* symbol_table_format_from_name(const char* format_name);
const SymbolTableFormatInfo* symbol_table_format_from_section(const char* section_name);
enum SymbolPrintFlags {
PRINT_LOCALS = 1 << 0,
PRINT_PROCEDURE_DESCRIPTORS = 1 << 1,
PRINT_EXTERNALS = 1 << 2
};
class SymbolTable {
public:
virtual ~SymbolTable() {}
virtual const char* name() const = 0;
// Imports this symbol table into the passed database.
virtual Result<void> import(
SymbolDatabase& database,
const SymbolGroup& group,
u32 importer_flags,
DemanglerFunctions demangler,
const std::atomic_bool* interrupt) const = 0;
// Print out all the field in the header structure if one exists.
virtual Result<void> print_headers(FILE* out) const = 0;
// Print out all the symbols in the symbol table. For .mdebug symbol tables
// the symbols are split between those that are local to a specific
// translation unit and those that are external, which is what the
// print_locals and print_externals parameters control.
virtual Result<void> print_symbols(FILE* out, u32 flags) const = 0;
};
struct ElfSection;
struct ElfFile;
// Create a symbol table from an ELF section. The return value may be null.
Result<std::unique_ptr<SymbolTable>> create_elf_symbol_table(
const ElfSection& section, const ElfFile& elf, SymbolTableFormat format);
// Utility function to call import_symbol_table on all the passed symbol tables
// and to generate a module handle.
Result<ModuleHandle> import_symbol_tables(
SymbolDatabase& database,
std::string module_name,
const std::vector<std::unique_ptr<SymbolTable>>& symbol_tables,
u32 importer_flags,
DemanglerFunctions demangler,
const std::atomic_bool* interrupt);
class MdebugSymbolTable : public SymbolTable {
public:
MdebugSymbolTable(std::span<const u8> image, s32 section_offset);
const char* name() const override;
Result<void> import(
SymbolDatabase& database,
const SymbolGroup& group,
u32 importer_flags,
DemanglerFunctions demangler,
const std::atomic_bool* interrupt) const override;
Result<void> print_headers(FILE* out) const override;
Result<void> print_symbols(FILE* out, u32 flags) const override;
protected:
std::span<const u8> m_image;
s32 m_section_offset;
};
class SymtabSymbolTable : public SymbolTable {
public:
SymtabSymbolTable(std::span<const u8> symtab, std::span<const u8> strtab);
const char* name() const override;
Result<void> import(
SymbolDatabase& database,
const SymbolGroup& group,
u32 importer_flags,
DemanglerFunctions demangler,
const std::atomic_bool* interrupt) const override;
Result<void> print_headers(FILE* out) const override;
Result<void> print_symbols(FILE* out, u32 flags) const override;
protected:
std::span<const u8> m_symtab;
std::span<const u8> m_strtab;
};
struct SNDLLFile;
class SNDLLSymbolTable : public SymbolTable {
public:
SNDLLSymbolTable(std::shared_ptr<SNDLLFile> sndll);
const char* name() const override;
Result<void> import(
SymbolDatabase& database,
const SymbolGroup& group,
u32 importer_flags,
DemanglerFunctions demangler,
const std::atomic_bool* interrupt) const override;
Result<void> print_headers(FILE* out) const override;
Result<void> print_symbols(FILE* out, u32 flags) const override;
protected:
std::shared_ptr<SNDLLFile> m_sndll;
};
class ElfSectionHeadersSymbolTable : public SymbolTable {
public:
ElfSectionHeadersSymbolTable(const ElfFile& elf);
const char* name() const override;
Result<void> import(
SymbolDatabase& database,
const SymbolGroup& group,
u32 importer_flags,
DemanglerFunctions demangler,
const std::atomic_bool* interrupt) const override;
Result<void> print_headers(FILE* out) const override;
Result<void> print_symbols(FILE* out, u32 flags) const override;
protected:
const ElfFile& m_elf;
};
}

173
3rdparty/ccc/src/ccc/util.cpp vendored Normal file
View File

@ -0,0 +1,173 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#include "util.h"
namespace ccc {
static CustomErrorCallback custom_error_callback = nullptr;
Error format_error(const char* source_file, int source_line, const char* format, ...)
{
va_list args;
va_start(args, format);
char message[4096];
if(vsnprintf(message, sizeof(message), format, args) < 0) {
strncpy(message, "Failed to generate error message.", sizeof(message));
}
Error error;
error.message = message;
error.source_file = source_file;
error.source_line = source_line;
va_end(args);
return error;
}
void report_error(const Error& error)
{
if(custom_error_callback) {
custom_error_callback(error, ERROR_LEVEL_ERROR);
} else {
fprintf(stderr, "[%s:%d] " CCC_ANSI_COLOUR_RED "error:" CCC_ANSI_COLOUR_OFF " %s\n",
error.source_file, error.source_line, error.message.c_str());
}
}
void report_warning(const Error& warning)
{
if(custom_error_callback) {
custom_error_callback(warning, ERROR_LEVEL_WARNING);
} else {
fprintf(stderr, "[%s:%d] " CCC_ANSI_COLOUR_MAGENTA "warning:" CCC_ANSI_COLOUR_OFF " %s\n",
warning.source_file, warning.source_line, warning.message.c_str());
}
}
void set_custom_error_callback(CustomErrorCallback callback)
{
custom_error_callback = callback;
}
const char* get_string(std::span<const u8> bytes, u64 offset)
{
for(const unsigned char* c = bytes.data() + offset; c < bytes.data() + bytes.size(); c++) {
if(*c == '\0') {
return (const char*) &bytes[offset];
}
}
return nullptr;
}
std::string merge_paths(const std::string& base, const std::string& path)
{
// Try to figure out if we're dealing with a Windows path of a UNIX path.
bool is_windows_path = false;
if(base.empty()) {
is_windows_path = guess_is_windows_path(path.c_str());
} else {
is_windows_path = guess_is_windows_path(base.c_str());
}
// Actually merge the paths. If path is the entire path, we don't need to
// append base onto the front, so check for that now.
bool is_absolute_unix = (path.size() >= 1) && (path[0] == '/' || path[0] == '\\');
bool is_absolute_windows = (path.size() >= 3) && path[1] == ':' && (path[2] == '/' || path[2] == '\\');
if(base.empty() || is_absolute_unix || is_absolute_windows) {
return normalise_path(path.c_str(), is_windows_path);
}
return normalise_path((base + "/" + path).c_str(), is_windows_path);
}
std::string normalise_path(const char* input, bool use_backslashes_as_path_separators)
{
bool is_absolute = false;
std::optional<char> drive_letter;
std::vector<std::string> parts;
// Parse the beginning of the path.
if(*input == '/' || *input == '\\') { // UNIX path, drive relative Windows path or UNC Windows path.
is_absolute = true;
} else if(isalpha(*input) && input[1] == ':' && (input[2] == '/' || input[2] == '\\')) { // Absolute Windows path.
is_absolute = true;
drive_letter = toupper(*input);
input += 2;
} else {
parts.emplace_back();
}
// Parse the rest of the path.
while(*input != 0) {
if(*input == '/' || *input == '\\') {
while(*input == '/' || *input == '\\') input++;
parts.emplace_back();
} else {
parts.back() += *(input++);
}
}
// Remove "." and ".." parts.
for(s32 i = 0; i < (s32) parts.size(); i++) {
if(parts[i] == ".") {
parts.erase(parts.begin() + i);
i--;
} else if(parts[i] == ".." && i > 0 && parts[i - 1] != "..") {
parts.erase(parts.begin() + i);
parts.erase(parts.begin() + i - 1);
i -= 2;
}
}
// Output the path in a normal form.
std::string output;
if(is_absolute) {
if(drive_letter.has_value()) {
output += *drive_letter;
output += ":";
}
output += use_backslashes_as_path_separators ? '\\' : '/';
}
for(size_t i = 0; i < parts.size(); i++) {
output += parts[i];
if(i != parts.size() - 1) {
output += use_backslashes_as_path_separators ? '\\' : '/';
}
}
return output;
}
bool guess_is_windows_path(const char* path)
{
for(const char* ptr = path; *ptr != 0; ptr++) {
if(*ptr == '\\') {
return true;
} else if(*ptr == '/') {
return false;
}
}
return false;
}
std::string extract_file_name(const std::string& path)
{
std::string::size_type forward_pos = path.find_last_of('/');
std::string::size_type backward_pos = path.find_last_of('\\');
std::string::size_type pos;
if(forward_pos == std::string::npos) {
pos = backward_pos;
} else if(backward_pos == std::string::npos) {
pos = forward_pos;
} else {
pos = std::max(forward_pos, backward_pos);
}
if(pos + 1 != path.size() && pos != std::string::npos) {
return path.substr(pos + 1);
} else {
return path;
}
}
}

312
3rdparty/ccc/src/ccc/util.h vendored Normal file
View File

@ -0,0 +1,312 @@
// This file is part of the Chaos Compiler Collection.
// SPDX-License-Identifier: MIT
#pragma once
#include <set>
#include <span>
#include <cstdio>
#include <vector>
#include <memory>
#include <string>
#include <cstdint>
#include <cstdarg>
#include <cstdlib>
#include <cstring>
#include <optional>
namespace ccc {
using u8 = unsigned char;
using u16 = uint16_t;
using u32 = uint32_t;
using u64 = uint64_t;
using s8 = signed char;
using s16 = int16_t;
using s32 = int32_t;
using s64 = int64_t;
#ifdef _WIN32
#define CCC_ANSI_COLOUR_OFF ""
#define CCC_ANSI_COLOUR_RED ""
#define CCC_ANSI_COLOUR_MAGENTA ""
#define CCC_ANSI_COLOUR_GRAY ""
#else
#define CCC_ANSI_COLOUR_OFF "\033[0m"
#define CCC_ANSI_COLOUR_RED "\033[31m"
#define CCC_ANSI_COLOUR_MAGENTA "\033[35m"
#define CCC_ANSI_COLOUR_GRAY "\033[90m"
#endif
struct Error {
std::string message;
const char* source_file;
s32 source_line;
};
enum ErrorLevel {
ERROR_LEVEL_ERROR,
ERROR_LEVEL_WARNING
};
typedef void (*CustomErrorCallback)(const Error& error, ErrorLevel level);
Error format_error(const char* source_file, int source_line, const char* format, ...);
void report_error(const Error& error);
void report_warning(const Error& warning);
void set_custom_error_callback(CustomErrorCallback callback);
#define CCC_FATAL(...) \
{ \
ccc::Error error = ccc::format_error(__FILE__, __LINE__, __VA_ARGS__); \
ccc::report_error(error); \
exit(1); \
}
#define CCC_CHECK_FATAL(condition, ...) \
if(!(condition)) { \
ccc::Error error = ccc::format_error(__FILE__, __LINE__, __VA_ARGS__); \
ccc::report_error(error); \
exit(1); \
}
#define CCC_ASSERT(condition) \
CCC_CHECK_FATAL(condition, #condition)
// The main error handling construct in CCC. This class is used to bundle
// together a return value and a pointer to error information, so that errors
// can be propagated up the stack.
template <typename Value>
class [[nodiscard]] Result {
template <typename OtherValue>
friend class Result;
protected:
Value m_value;
std::unique_ptr<Error> m_error;
Result() {}
public:
Result(Value value) : m_value(std::move(value)), m_error(nullptr) {}
// Used to propagate errors up the call stack.
template <typename OtherValue>
Result(Result<OtherValue>&& rhs)
{
CCC_ASSERT(rhs.m_error != nullptr);
m_error = std::move(rhs.m_error);
}
static Result<Value> failure(Error error)
{
Result<Value> result;
result.m_error = std::make_unique<Error>(std::move(error));
return result;
}
bool success() const
{
return m_error == nullptr;
}
const Error& error() const
{
CCC_ASSERT(m_error != nullptr);
return *m_error;
}
Value& operator*()
{
CCC_ASSERT(m_error == nullptr);
return m_value;
}
const Value& operator*() const
{
CCC_ASSERT(m_error == nullptr);
return m_value;
}
Value* operator->()
{
CCC_ASSERT(m_error == nullptr);
return &m_value;
}
const Value* operator->() const
{
CCC_ASSERT(m_error == nullptr);
return &m_value;
}
};
template <>
class [[nodiscard]] Result<void> : public Result<int> {
public:
Result() : Result<int>(0) {}
// Used to propagate errors up the call stack.
template <typename OtherValue>
Result(Result<OtherValue>&& rhs)
{
CCC_ASSERT(rhs.m_error != nullptr);
m_error = std::move(rhs.m_error);
}
};
#define CCC_FAILURE(...) ccc::Result<int>::failure(ccc::format_error(__FILE__, __LINE__, __VA_ARGS__))
#define CCC_CHECK(condition, ...) \
if(!(condition)) { \
return CCC_FAILURE(__VA_ARGS__); \
}
#define CCC_EXPECT_CHAR(input, c, context) \
CCC_CHECK(*(input++) == c, \
"Expected '%c' in %s, got '%c' (%02hhx)", \
c, context, *(input - 1), *(input - 1))
#define CCC_RETURN_IF_ERROR(result) \
if(!(result).success()) { \
return (result); \
}
#define CCC_EXIT_IF_ERROR(result) \
if(!(result).success()) { \
ccc::report_error((result).error()); \
exit(1); \
}
#define CCC_GTEST_FAIL_IF_ERROR(result) \
if(!(result).success()) { \
FAIL() << (result).error().message; \
}
template <typename... Args>
void warn_impl(const char* source_file, int source_line, const char* format, Args... args)
{
Error warning = format_error(source_file, source_line, format, args...);
report_warning(warning);
}
#define CCC_WARN(...) \
ccc::warn_impl(__FILE__, __LINE__, __VA_ARGS__)
#ifdef _MSC_VER
#define CCC_PACKED_STRUCT(name, ...) \
__pragma(pack(push, 1)) struct name { __VA_ARGS__ } __pragma(pack(pop));
#else
#define CCC_PACKED_STRUCT(name, ...) \
struct __attribute__((__packed__)) name { __VA_ARGS__ };
#endif
template <typename T>
const T* get_packed(std::span<const u8> bytes, u64 offset)
{
if(offset + sizeof(T) <= bytes.size()) {
return reinterpret_cast<const T*>(&bytes[offset]);
} else {
return nullptr;
}
}
const char* get_string(std::span<const u8> bytes, u64 offset);
#define CCC_BEGIN_END(x) (x).begin(), (x).end()
#define CCC_ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define CCC_FOURCC(string) ((string)[0] | (string)[1] << 8 | (string)[2] << 16 | (string)[3] << 24)
struct Address {
u32 value = (u32) -1;
Address() {}
Address(u32 v) : value(v) {}
bool valid() const
{
return value != (u32) -1;
}
u32 get_or_zero() const
{
if(valid()) {
return value;
} else {
return 0;
}
}
Address add_base_address(Address base_address) const
{
if(valid()) {
return base_address.get_or_zero() + value;
} else {
return Address();
}
}
static Address non_zero(u32 address)
{
Address result;
if(address != 0) {
result = address;
}
return result;
}
friend auto operator<=>(const Address& lhs, const Address& rhs) = default;
};
struct AddressRange {
Address low;
Address high;
AddressRange() {}
AddressRange(Address address) : low(address), high(address) {}
AddressRange(Address l, Address h) : low(l), high(h) {}
friend auto operator<=>(const AddressRange& lhs, const AddressRange& rhs) = default;
};
// These functions are to be used only for source file paths present in the
// symbol table, since we want them to be handled consistently across different
// platforms, which with std::filesystem::path doesn't seem to be possible.
std::string merge_paths(const std::string& base, const std::string& path);
std::string normalise_path(const char* input, bool use_backslashes_as_path_separators);
bool guess_is_windows_path(const char* path);
std::string extract_file_name(const std::string& path);
namespace ast { struct Node; }
// These are used to reference STABS types from other types within a single
// translation unit. For most games these will just be a single number, the type
// number. In some cases, for example with the homebrew SDK, type numbers are a
// pair of two numbers surrounded by round brackets e.g. (1,23) where the first
// number is the index of the include file to use (includes are listed for each
// translation unit separately), and the second number is the type number.
struct StabsTypeNumber {
s32 file = -1;
s32 type = -1;
friend auto operator<=>(const StabsTypeNumber& lhs, const StabsTypeNumber& rhs) = default;
bool valid() const { return type > -1; }
};
enum StorageClass {
STORAGE_CLASS_NONE = 0,
STORAGE_CLASS_TYPEDEF = 1,
STORAGE_CLASS_EXTERN = 2,
STORAGE_CLASS_STATIC = 3,
STORAGE_CLASS_AUTO = 4,
STORAGE_CLASS_REGISTER = 5
};
// Function pointers for the GNU demangler functions, so we can build CCC as a
// library without linking against the demangler.
struct DemanglerFunctions {
char* (*cplus_demangle)(const char *mangled, int options) = nullptr;
char* (*cplus_demangle_opname)(const char *opname, int options) = nullptr;
};
}

View File

@ -51,6 +51,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "freesurround", "3rdparty\fr
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vixl", "3rdparty\vixl\vixl.vcxproj", "{8906836E-F06E-46E8-B11A-74E5E8C7B8FB}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ccc", "3rdparty\ccc\ccc.vcxproj", "{2589F8CE-EA77-4B73-911E-64074569795B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug AVX2|ARM64 = Debug AVX2|ARM64
@ -946,6 +948,45 @@ Global
{8906836E-F06E-46E8-B11A-74E5E8C7B8FB}.Release Clang|x64.ActiveCfg = Release Clang|x64
{8906836E-F06E-46E8-B11A-74E5E8C7B8FB}.Release|ARM64.ActiveCfg = Release Clang|ARM64
{8906836E-F06E-46E8-B11A-74E5E8C7B8FB}.Release|x64.ActiveCfg = Release|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Debug AVX2|ARM64.ActiveCfg = Debug Clang|ARM64
{2589F8CE-EA77-4B73-911E-64074569795B}.Debug AVX2|x64.ActiveCfg = Debug AVX2|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Debug AVX2|x64.Build.0 = Debug AVX2|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Debug Clang AVX2|ARM64.ActiveCfg = Debug Clang|ARM64
{2589F8CE-EA77-4B73-911E-64074569795B}.Debug Clang AVX2|x64.ActiveCfg = Debug Clang AVX2|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Debug Clang AVX2|x64.Build.0 = Debug Clang AVX2|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Debug Clang|ARM64.ActiveCfg = Debug Clang|ARM64
{2589F8CE-EA77-4B73-911E-64074569795B}.Debug Clang|ARM64.Build.0 = Debug Clang|ARM64
{2589F8CE-EA77-4B73-911E-64074569795B}.Debug Clang|x64.ActiveCfg = Debug Clang|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Debug Clang|x64.Build.0 = Debug Clang|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Debug|ARM64.ActiveCfg = Debug Clang|ARM64
{2589F8CE-EA77-4B73-911E-64074569795B}.Debug|x64.ActiveCfg = Debug|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Debug|x64.Build.0 = Debug|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Devel AVX2|ARM64.ActiveCfg = Devel Clang|ARM64
{2589F8CE-EA77-4B73-911E-64074569795B}.Devel AVX2|x64.ActiveCfg = Devel AVX2|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Devel AVX2|x64.Build.0 = Devel AVX2|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Devel Clang AVX2|ARM64.ActiveCfg = Devel Clang|ARM64
{2589F8CE-EA77-4B73-911E-64074569795B}.Devel Clang AVX2|x64.ActiveCfg = Devel Clang AVX2|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Devel Clang AVX2|x64.Build.0 = Devel Clang AVX2|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Devel Clang|ARM64.ActiveCfg = Devel Clang|ARM64
{2589F8CE-EA77-4B73-911E-64074569795B}.Devel Clang|ARM64.Build.0 = Devel Clang|ARM64
{2589F8CE-EA77-4B73-911E-64074569795B}.Devel Clang|x64.ActiveCfg = Devel Clang|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Devel Clang|x64.Build.0 = Devel Clang|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Devel|ARM64.ActiveCfg = Devel Clang|ARM64
{2589F8CE-EA77-4B73-911E-64074569795B}.Devel|x64.ActiveCfg = Devel|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Devel|x64.Build.0 = Devel|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Release AVX2|ARM64.ActiveCfg = Release Clang|ARM64
{2589F8CE-EA77-4B73-911E-64074569795B}.Release AVX2|x64.ActiveCfg = Release AVX2|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Release AVX2|x64.Build.0 = Release AVX2|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Release Clang AVX2|ARM64.ActiveCfg = Release Clang|ARM64
{2589F8CE-EA77-4B73-911E-64074569795B}.Release Clang AVX2|x64.ActiveCfg = Release Clang AVX2|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Release Clang AVX2|x64.Build.0 = Release Clang AVX2|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Release Clang|ARM64.ActiveCfg = Release Clang|ARM64
{2589F8CE-EA77-4B73-911E-64074569795B}.Release Clang|ARM64.Build.0 = Release Clang|ARM64
{2589F8CE-EA77-4B73-911E-64074569795B}.Release Clang|x64.ActiveCfg = Release Clang|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Release Clang|x64.Build.0 = Release Clang|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Release|ARM64.ActiveCfg = Release Clang|ARM64
{2589F8CE-EA77-4B73-911E-64074569795B}.Release|x64.ActiveCfg = Release|x64
{2589F8CE-EA77-4B73-911E-64074569795B}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@ -969,6 +1010,7 @@ Global
{67D0160C-0FE4-44B9-AC2E-82BBCF4104DF} = {78EBE642-7A4D-4EA7-86BE-5639C6646C38}
{1DD0B31F-37F0-4A36-A521-74133ACA4737} = {78EBE642-7A4D-4EA7-86BE-5639C6646C38}
{8906836E-F06E-46E8-B11A-74E5E8C7B8FB} = {78EBE642-7A4D-4EA7-86BE-5639C6646C38}
{2589F8CE-EA77-4B73-911E-64074569795B} = {78EBE642-7A4D-4EA7-86BE-5639C6646C38}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {0BC474EA-3628-45D3-9DBC-E22D0B7E0F77}

View File

@ -112,6 +112,9 @@ if(WIN32)
add_subdirectory(3rdparty/rainterface EXCLUDE_FROM_ALL)
endif()
# Symbol table parser.
add_subdirectory(3rdparty/ccc EXCLUDE_FROM_ALL)
# Architecture-specific.
if(_M_X86)
add_subdirectory(3rdparty/zydis EXCLUDE_FROM_ALL)

View File

@ -44,6 +44,7 @@
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)3rdparty\imgui\include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)3rdparty\cpuinfo\include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)3rdparty\fast_float\include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)3rdparty\ccc\src</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)pcsx2</AdditionalIncludeDirectories>
<!-- Needed for moc pch -->
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(ProjectDir)\Settings;$(ProjectDir)\GameList;$(ProjectDir)\Tools\InputRecording;$(ProjectDir)\Debugger;$(ProjectDir)\Debugger\Models</AdditionalIncludeDirectories>

View File

@ -1147,6 +1147,7 @@ target_link_libraries(PCSX2_FLAGS INTERFACE
PNG::PNG
LZMA::LZMA
Zstd::Zstd
ccc
${LIBC_LIBRARIES}
)

View File

@ -56,6 +56,7 @@
<AdditionalIncludeDirectories Condition="'$(Platform)'=='x64'">%(AdditionalIncludeDirectories);$(SolutionDir)3rdparty\xbyak</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories Condition="'$(Platform)'=='x64'">%(AdditionalIncludeDirectories);$(SolutionDir)3rdparty\zydis\include;$(SolutionDir)3rdparty\zydis\dependencies\zycore\include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories Condition="'$(Platform)'=='ARM64'">%(AdditionalIncludeDirectories);$(SolutionDir)3rdparty\vixl\include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)3rdparty\ccc\src</AdditionalIncludeDirectories>
<PrecompiledHeader>Use</PrecompiledHeader>
<PrecompiledHeaderFile>PrecompiledHeader.h</PrecompiledHeaderFile>
<ForcedIncludeFiles>PrecompiledHeader.h;%(ForcedIncludeFiles)</ForcedIncludeFiles>
@ -1004,6 +1005,9 @@
<ProjectReference Include="..\3rdparty\vixl\vixl.vcxproj" Condition="'$(Platform)'=='ARM64'">
<Project>{8906836e-f06e-46e8-b11a-74e5e8c7b8fb}</Project>
</ProjectReference>
<ProjectReference Include="..\3rdparty\ccc\ccc.vcxproj">
<Project>{2589f8ce-ea77-4b73-911e-64074569795b}</Project>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<Natvis Include="GS\GS.natvis" />