From 87b03fdc284c344d87e4959357835eb3938c5f3f Mon Sep 17 00:00:00 2001 From: chaoticgd <43898262+chaoticgd@users.noreply.github.com> Date: Mon, 26 Aug 2024 18:08:33 +0100 Subject: [PATCH] 3rdparty: Add CCC v2.1 This is the symbol table parser that I'm replacing the existing ELF symbol table parser with. It supports STABS symbols in .mdebug sections as well as ELF symbols and SNDLL symbols. It includes its own symbol database, and an AST which facilitates debugging tools that let the user inspect complex data structures with full type information. More information is provided in the included readme. --- 3rdparty/ccc/CMakeLists.txt | 41 + 3rdparty/ccc/README.md | 37 + 3rdparty/ccc/ccc.vcxproj | 75 ++ 3rdparty/ccc/ccc.vcxproj.filters | 111 ++ 3rdparty/ccc/src/ccc/ast.cpp | 562 ++++++++++ 3rdparty/ccc/src/ccc/ast.h | 377 +++++++ 3rdparty/ccc/src/ccc/elf.cpp | 125 +++ 3rdparty/ccc/src/ccc/elf.h | 156 +++ 3rdparty/ccc/src/ccc/elf_symtab.cpp | 213 ++++ 3rdparty/ccc/src/ccc/elf_symtab.h | 20 + 3rdparty/ccc/src/ccc/importer_flags.cpp | 95 ++ 3rdparty/ccc/src/ccc/importer_flags.h | 39 + 3rdparty/ccc/src/ccc/mdebug_analysis.cpp | 349 +++++++ 3rdparty/ccc/src/ccc/mdebug_analysis.h | 99 ++ 3rdparty/ccc/src/ccc/mdebug_importer.cpp | 668 ++++++++++++ 3rdparty/ccc/src/ccc/mdebug_importer.h | 31 + 3rdparty/ccc/src/ccc/mdebug_section.cpp | 474 +++++++++ 3rdparty/ccc/src/ccc/mdebug_section.h | 176 ++++ 3rdparty/ccc/src/ccc/mdebug_symbols.cpp | 220 ++++ 3rdparty/ccc/src/ccc/mdebug_symbols.h | 32 + 3rdparty/ccc/src/ccc/sndll.cpp | 191 ++++ 3rdparty/ccc/src/ccc/sndll.h | 55 + 3rdparty/ccc/src/ccc/stabs.cpp | 835 +++++++++++++++ 3rdparty/ccc/src/ccc/stabs.h | 379 +++++++ 3rdparty/ccc/src/ccc/stabs_to_ast.cpp | 834 +++++++++++++++ 3rdparty/ccc/src/ccc/stabs_to_ast.h | 29 + 3rdparty/ccc/src/ccc/symbol_database.cpp | 1204 ++++++++++++++++++++++ 3rdparty/ccc/src/ccc/symbol_database.h | 721 +++++++++++++ 3rdparty/ccc/src/ccc/symbol_file.cpp | 114 ++ 3rdparty/ccc/src/ccc/symbol_file.h | 62 ++ 3rdparty/ccc/src/ccc/symbol_table.cpp | 283 +++++ 3rdparty/ccc/src/ccc/symbol_table.h | 163 +++ 3rdparty/ccc/src/ccc/util.cpp | 173 ++++ 3rdparty/ccc/src/ccc/util.h | 312 ++++++ PCSX2_qt.sln | 42 + cmake/SearchForStuff.cmake | 3 + pcsx2-qt/pcsx2-qt.vcxproj | 1 + pcsx2/CMakeLists.txt | 1 + pcsx2/pcsx2.vcxproj | 4 + 39 files changed, 9306 insertions(+) create mode 100644 3rdparty/ccc/CMakeLists.txt create mode 100644 3rdparty/ccc/README.md create mode 100644 3rdparty/ccc/ccc.vcxproj create mode 100644 3rdparty/ccc/ccc.vcxproj.filters create mode 100644 3rdparty/ccc/src/ccc/ast.cpp create mode 100644 3rdparty/ccc/src/ccc/ast.h create mode 100644 3rdparty/ccc/src/ccc/elf.cpp create mode 100644 3rdparty/ccc/src/ccc/elf.h create mode 100644 3rdparty/ccc/src/ccc/elf_symtab.cpp create mode 100644 3rdparty/ccc/src/ccc/elf_symtab.h create mode 100644 3rdparty/ccc/src/ccc/importer_flags.cpp create mode 100644 3rdparty/ccc/src/ccc/importer_flags.h create mode 100644 3rdparty/ccc/src/ccc/mdebug_analysis.cpp create mode 100644 3rdparty/ccc/src/ccc/mdebug_analysis.h create mode 100644 3rdparty/ccc/src/ccc/mdebug_importer.cpp create mode 100644 3rdparty/ccc/src/ccc/mdebug_importer.h create mode 100644 3rdparty/ccc/src/ccc/mdebug_section.cpp create mode 100644 3rdparty/ccc/src/ccc/mdebug_section.h create mode 100644 3rdparty/ccc/src/ccc/mdebug_symbols.cpp create mode 100644 3rdparty/ccc/src/ccc/mdebug_symbols.h create mode 100644 3rdparty/ccc/src/ccc/sndll.cpp create mode 100644 3rdparty/ccc/src/ccc/sndll.h create mode 100644 3rdparty/ccc/src/ccc/stabs.cpp create mode 100644 3rdparty/ccc/src/ccc/stabs.h create mode 100644 3rdparty/ccc/src/ccc/stabs_to_ast.cpp create mode 100644 3rdparty/ccc/src/ccc/stabs_to_ast.h create mode 100644 3rdparty/ccc/src/ccc/symbol_database.cpp create mode 100644 3rdparty/ccc/src/ccc/symbol_database.h create mode 100644 3rdparty/ccc/src/ccc/symbol_file.cpp create mode 100644 3rdparty/ccc/src/ccc/symbol_file.h create mode 100644 3rdparty/ccc/src/ccc/symbol_table.cpp create mode 100644 3rdparty/ccc/src/ccc/symbol_table.h create mode 100644 3rdparty/ccc/src/ccc/util.cpp create mode 100644 3rdparty/ccc/src/ccc/util.h diff --git a/3rdparty/ccc/CMakeLists.txt b/3rdparty/ccc/CMakeLists.txt new file mode 100644 index 0000000000..7ecffbdca6 --- /dev/null +++ b/3rdparty/ccc/CMakeLists.txt @@ -0,0 +1,41 @@ +cmake_minimum_required(VERSION 3.14) +project(ccc) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +add_library(ccc STATIC + src/ccc/ast.cpp + src/ccc/ast.h + src/ccc/elf.cpp + src/ccc/elf.h + src/ccc/elf_symtab.cpp + src/ccc/elf_symtab.h + src/ccc/importer_flags.cpp + src/ccc/importer_flags.h + src/ccc/mdebug_analysis.cpp + src/ccc/mdebug_analysis.h + src/ccc/mdebug_importer.cpp + src/ccc/mdebug_importer.h + src/ccc/mdebug_section.cpp + src/ccc/mdebug_section.h + src/ccc/mdebug_symbols.cpp + src/ccc/mdebug_symbols.h + src/ccc/sndll.cpp + src/ccc/sndll.h + src/ccc/stabs.cpp + src/ccc/stabs.h + src/ccc/stabs_to_ast.cpp + src/ccc/stabs_to_ast.h + src/ccc/symbol_database.cpp + src/ccc/symbol_database.h + src/ccc/symbol_file.cpp + src/ccc/symbol_file.h + src/ccc/symbol_table.cpp + src/ccc/symbol_table.h + src/ccc/util.cpp + src/ccc/util.h +) + +target_include_directories(ccc PUBLIC src) diff --git a/3rdparty/ccc/README.md b/3rdparty/ccc/README.md new file mode 100644 index 0000000000..9c8f1efecf --- /dev/null +++ b/3rdparty/ccc/README.md @@ -0,0 +1,37 @@ +# Chaos Compiler Collection + +This code was originally developed in the following repository and was copied +into PCSX2 by the author: + +- [https://github.com/chaoticgd/ccc](https://github.com/chaoticgd/ccc) + +It includes additional resources that are not present in the PCSX2 repository. + +## Documentation + +### DWARF (.debug) Section + +- [DWARF Debugging Information Format](https://dwarfstd.org/doc/dwarf_1_1_0.pdf) + +### MIPS Debug (.mdebug) Section + +- [Third Eye Software and the MIPS symbol table (Peter Rowell)](http://datahedron.com/mips.html) +- [MIPS Mdebug Debugging Information (David Anderson, 1996)](https://www.prevanders.net/Mdebug.ps) +- MIPS Assembly Language Programmer's Guide, Symbol Table Chapter (Silicon Graphics, 1992) +- Tru64 UNIX Object File and Symbol Table Format Specification, Symbol Table Chapter +- `mdebugread.c` from gdb (reading) +- `ecoff.c` from gas (writing) +- `include/coff/sym.h` from binutils (headers) + +### MIPS EABI + +- [MIPS EABI](https://sourceware.org/legacy-ml/binutils/2003-06/msg00436.html) + +### STABS + +- [The "stabs" representation of debugging information (Julia Menapace, Jim Kingdon, and David MacKenzie, 1992-???)](https://sourceware.org/gdb/onlinedocs/stabs.html) +- `stabs.c` from binutils (reading) +- `stabsread.c` from gdb (reading) +- `dbxread.c` from gdb (reading) +- `dbxout.c` from gcc (writing) +- `stab.def` from gcc (symbol codes) diff --git a/3rdparty/ccc/ccc.vcxproj b/3rdparty/ccc/ccc.vcxproj new file mode 100644 index 0000000000..16ab210948 --- /dev/null +++ b/3rdparty/ccc/ccc.vcxproj @@ -0,0 +1,75 @@ + + + + + + {2589F8CE-EA77-4B73-911E-64074569795B} + + + + StaticLibrary + $(DefaultPlatformToolset) + ClangCL + MultiByte + true + true + false + + + + + + + + + + + + + + AllRules.ruleset + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TurnOffAllWarnings + $(ProjectDir)src;%(AdditionalIncludeDirectories) + stdcpp20 + + + + + diff --git a/3rdparty/ccc/ccc.vcxproj.filters b/3rdparty/ccc/ccc.vcxproj.filters new file mode 100644 index 0000000000..9f49bde786 --- /dev/null +++ b/3rdparty/ccc/ccc.vcxproj.filters @@ -0,0 +1,111 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + diff --git a/3rdparty/ccc/src/ccc/ast.cpp b/3rdparty/ccc/src/ccc/ast.cpp new file mode 100644 index 0000000000..fb5709d366 --- /dev/null +++ b/3rdparty/ccc/src/ccc/ast.cpp @@ -0,0 +1,562 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "ast.h" + +#include "importer_flags.h" +#include "symbol_database.h" + +namespace ccc::ast { + +static bool compare_nodes_and_merge( + CompareResult& dest, const Node& node_lhs, const Node& node_rhs, const SymbolDatabase* database); +static bool try_to_match_wobbly_typedefs( + const Node& node_lhs, const Node& node_rhs, const SymbolDatabase& database); + +void Node::set_access_specifier(AccessSpecifier specifier, u32 importer_flags) +{ + if((importer_flags & NO_ACCESS_SPECIFIERS) == 0) { + access_specifier = specifier; + } +} + +std::pair Node::physical_type(SymbolDatabase& database, s32 max_depth) +{ + Node* type = this; + DataType* symbol = nullptr; + for(s32 i = 0; i < max_depth && type->descriptor == TYPE_NAME; i++) { + DataType* data_type = database.data_types.symbol_from_handle(type->as().data_type_handle); + if (!data_type || !data_type->type()) { + break; + } + + type = data_type->type(); + symbol = data_type; + } + + return std::pair(type, symbol); +} + +std::pair Node::physical_type(const SymbolDatabase& database, s32 max_depth) const +{ + return const_cast(this)->physical_type(const_cast(database), max_depth); +} + +const char* member_function_modifier_to_string(MemberFunctionModifier modifier) +{ + switch(modifier) { + case MemberFunctionModifier::NONE: return "none"; + case MemberFunctionModifier::STATIC: return "static"; + case MemberFunctionModifier::VIRTUAL: return "virtual"; + } + return ""; +} + +bool StructOrUnion::flatten_fields( + std::vector& output, + const DataType* symbol, + const SymbolDatabase& database, + bool skip_statics, + s32 base_offset, + s32 max_fields, + s32 max_depth) const +{ + if(max_depth == 0) { + return false; + } + + for(const std::unique_ptr& type_name : base_classes) { + if(type_name->descriptor != TYPE_NAME) { + continue; + } + + s32 new_base_offset = base_offset + type_name->offset_bytes; + + DataTypeHandle handle = type_name->as().data_type_handle; + const DataType* base_class_symbol = database.data_types.symbol_from_handle(handle); + if(!base_class_symbol || !base_class_symbol->type() || base_class_symbol->type()->descriptor != STRUCT_OR_UNION) { + continue; + } + + const StructOrUnion& base_class = base_class_symbol->type()->as(); + if(!base_class.flatten_fields(output, base_class_symbol, database, skip_statics, new_base_offset, max_fields, max_depth - 1)) { + return false; + } + } + + for(const std::unique_ptr& field : fields) { + if(skip_statics && field->storage_class == STORAGE_CLASS_STATIC) { + continue; + } + + if((s32) output.size() >= max_fields) { + return false; + } + + FlatField& flat = output.emplace_back(); + flat.node = field.get(); + flat.symbol = symbol; + flat.base_offset = base_offset; + } + + return true; +} + +const char* type_name_source_to_string(TypeNameSource source) +{ + switch(source) { + case TypeNameSource::REFERENCE: return "reference"; + case TypeNameSource::CROSS_REFERENCE: return "cross_reference"; + case TypeNameSource::UNNAMED_THIS: return "this"; + } + return ""; +} + +const char* forward_declared_type_to_string(ForwardDeclaredType type) +{ + switch(type) { + case ForwardDeclaredType::STRUCT: return "struct"; + case ForwardDeclaredType::UNION: return "union"; + case ForwardDeclaredType::ENUM: return "enum"; + } + return ""; +} + +DataTypeHandle TypeName::data_type_handle_unless_forward_declared() const +{ + if(!is_forward_declared) { + return data_type_handle; + } else { + return DataTypeHandle(); + } +} + +CompareResult compare_nodes( + const Node& node_lhs, const Node& node_rhs, const SymbolDatabase* database, bool check_intrusive_fields) +{ + CompareResult result = CompareResultType::MATCHES_NO_SWAP; + + if(node_lhs.descriptor != node_rhs.descriptor) { + return CompareFailReason::DESCRIPTOR; + } + + if(check_intrusive_fields) { + if(node_lhs.storage_class != node_rhs.storage_class) { + // In some cases we can determine that a type was typedef'd for C + // translation units, but not for C++ translation units, so we need + // to add a special case for that here. + if(node_lhs.storage_class == STORAGE_CLASS_TYPEDEF && node_rhs.storage_class == STORAGE_CLASS_NONE) { + result = CompareResultType::MATCHES_FAVOUR_LHS; + } else if(node_lhs.storage_class == STORAGE_CLASS_NONE && node_rhs.storage_class == STORAGE_CLASS_TYPEDEF) { + result = CompareResultType::MATCHES_FAVOUR_RHS; + } else { + return CompareFailReason::STORAGE_CLASS; + } + } + + // Vtable pointers and constructors can sometimes contain type numbers + // that are different between translation units, so we don't want to + // compare them. + bool is_vtable_pointer = node_lhs.is_vtable_pointer && node_rhs.is_vtable_pointer; + bool is_numbered_constructor = node_lhs.name.starts_with("$_") && node_rhs.name.starts_with("$_"); + if(node_lhs.name != node_rhs.name && !is_vtable_pointer && !is_numbered_constructor) { + return CompareFailReason::NAME; + } + + if(node_lhs.offset_bytes != node_rhs.offset_bytes) { + return CompareFailReason::RELATIVE_OFFSET_BYTES; + } + + if(node_lhs.size_bits != node_rhs.size_bits) { + return CompareFailReason::SIZE_BITS; + } + + if(node_lhs.is_const != node_rhs.is_const) { + return CompareFailReason::CONSTNESS; + } + } + + switch(node_lhs.descriptor) { + case ARRAY: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(compare_nodes_and_merge(result, *lhs.element_type.get(), *rhs.element_type.get(), database)) { + return result; + } + + if(lhs.element_count != rhs.element_count) { + return CompareFailReason::ARRAY_ELEMENT_COUNT; + } + + break; + } + case BITFIELD: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(lhs.bitfield_offset_bits != rhs.bitfield_offset_bits) { + return CompareFailReason::BITFIELD_OFFSET_BITS; + } + + if(compare_nodes_and_merge(result, *lhs.underlying_type.get(), *rhs.underlying_type.get(), database)) { + return result; + } + + break; + } + case BUILTIN: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(lhs.bclass != rhs.bclass) { + return CompareFailReason::BUILTIN_CLASS; + } + + break; + } + case ENUM: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(lhs.constants != rhs.constants) { + return CompareFailReason::ENUM_CONSTANTS; + } + + break; + } + case ERROR_NODE: { + break; + } + case FUNCTION: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(lhs.return_type.has_value() != rhs.return_type.has_value()) { + return CompareFailReason::FUNCTION_RETURN_TYPE_HAS_VALUE; + } + + if(lhs.return_type.has_value()) { + if(compare_nodes_and_merge(result, *lhs.return_type->get(), *rhs.return_type->get(), database)) { + return result; + } + } + + if(lhs.parameters.has_value() && rhs.parameters.has_value()) { + if(lhs.parameters->size() != rhs.parameters->size()) { + return CompareFailReason::FUNCTION_PARAMAETER_COUNT; + } + for(size_t i = 0; i < lhs.parameters->size(); i++) { + if(compare_nodes_and_merge(result, *(*lhs.parameters)[i].get(), *(*rhs.parameters)[i].get(), database)) { + return result; + } + } + } else if(lhs.parameters.has_value() != rhs.parameters.has_value()) { + return CompareFailReason::FUNCTION_PARAMETERS_HAS_VALUE; + } + + if(lhs.modifier != rhs.modifier) { + return CompareFailReason::FUNCTION_MODIFIER; + } + + break; + } + case POINTER_OR_REFERENCE: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(lhs.is_pointer != rhs.is_pointer) { + return CompareFailReason::DESCRIPTOR; + } + + if(compare_nodes_and_merge(result, *lhs.value_type.get(), *rhs.value_type.get(), database)) { + return result; + } + + break; + } + case POINTER_TO_DATA_MEMBER: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(compare_nodes_and_merge(result, *lhs.class_type.get(), *rhs.class_type.get(), database)) { + return result; + } + + if(compare_nodes_and_merge(result, *lhs.member_type.get(), *rhs.member_type.get(), database)) { + return result; + } + + break; + } + case STRUCT_OR_UNION: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(lhs.is_struct != rhs.is_struct) { + return CompareFailReason::DESCRIPTOR; + } + + if(lhs.base_classes.size() != rhs.base_classes.size()) { + return CompareFailReason::BASE_CLASS_COUNT; + } + + for(size_t i = 0; i < lhs.base_classes.size(); i++) { + if(compare_nodes_and_merge(result, *lhs.base_classes[i].get(), *rhs.base_classes[i].get(), database)) { + return result; + } + } + + if(lhs.fields.size() != rhs.fields.size()) { + return CompareFailReason::FIELDS_SIZE; + } + + for(size_t i = 0; i < lhs.fields.size(); i++) { + if(compare_nodes_and_merge(result, *lhs.fields[i].get(), *rhs.fields[i].get(), database)) { + return result; + } + } + + if(lhs.member_functions.size() != rhs.member_functions.size()) { + return CompareFailReason::MEMBER_FUNCTION_COUNT; + } + + for(size_t i = 0; i < lhs.member_functions.size(); i++) { + if(compare_nodes_and_merge(result, *lhs.member_functions[i].get(), *rhs.member_functions[i].get(), database)) { + return result; + } + } + + break; + } + case TYPE_NAME: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + // Don't check the source so that REFERENCE and CROSS_REFERENCE are + // treated as the same. + if(lhs.data_type_handle != rhs.data_type_handle) { + return CompareFailReason::TYPE_NAME; + } + + const TypeName::UnresolvedStabs* lhs_unresolved_stabs = lhs.unresolved_stabs.get(); + const TypeName::UnresolvedStabs* rhs_unresolved_stabs = rhs.unresolved_stabs.get(); + if(lhs_unresolved_stabs && rhs_unresolved_stabs) { + if(lhs_unresolved_stabs->type_name != rhs_unresolved_stabs->type_name) { + return CompareFailReason::TYPE_NAME; + } + } else if(lhs_unresolved_stabs || rhs_unresolved_stabs) { + return CompareFailReason::TYPE_NAME; + } + + break; + } + } + return result; +} + +static bool compare_nodes_and_merge( + CompareResult& dest, const Node& node_lhs, const Node& node_rhs, const SymbolDatabase* database) +{ + CompareResult result = compare_nodes(node_lhs, node_rhs, database, true); + if(database) { + if(result.type == CompareResultType::DIFFERS && try_to_match_wobbly_typedefs(node_lhs, node_rhs, *database)) { + result.type = CompareResultType::MATCHES_FAVOUR_LHS; + } else if(result.type == CompareResultType::DIFFERS && try_to_match_wobbly_typedefs(node_rhs, node_lhs, *database)) { + result.type = CompareResultType::MATCHES_FAVOUR_RHS; + } + } + + if(dest.type != result.type) { + if(dest.type == CompareResultType::DIFFERS || result.type == CompareResultType::DIFFERS) { + // If any of the inner types differ, the outer type does too. + dest.type = CompareResultType::DIFFERS; + } else if(dest.type == CompareResultType::MATCHES_CONFUSED || result.type == CompareResultType::MATCHES_CONFUSED) { + // Propagate confusion. + dest.type = CompareResultType::MATCHES_CONFUSED; + } else if(dest.type == CompareResultType::MATCHES_FAVOUR_LHS && result.type == CompareResultType::MATCHES_FAVOUR_RHS) { + // One of the results favours the LHS node and the other favours the + // RHS node so we are confused. + dest.type = CompareResultType::MATCHES_CONFUSED; + } else if(dest.type == CompareResultType::MATCHES_FAVOUR_RHS && result.type == CompareResultType::MATCHES_FAVOUR_LHS) { + // One of the results favours the LHS node and the other favours the + // RHS node so we are confused. + dest.type = CompareResultType::MATCHES_CONFUSED; + } else if(dest.type == CompareResultType::MATCHES_FAVOUR_LHS || result.type == CompareResultType::MATCHES_FAVOUR_LHS) { + // One of the results favours the LHS node and the other is neutral + // so go with the LHS node. + dest.type = CompareResultType::MATCHES_FAVOUR_LHS; + } else if(dest.type == CompareResultType::MATCHES_FAVOUR_RHS || result.type == CompareResultType::MATCHES_FAVOUR_RHS) { + // One of the results favours the RHS node and the other is neutral + // so go with the RHS node. + dest.type = CompareResultType::MATCHES_FAVOUR_RHS; + } + } + + if(dest.fail_reason == CompareFailReason::NONE) { + dest.fail_reason = result.fail_reason; + } + + return dest.type == CompareResultType::DIFFERS; +} + +static bool try_to_match_wobbly_typedefs( + const Node& type_name_node, const Node& raw_node, const SymbolDatabase& database) +{ + // Detect if one side has a typedef when the other just has the plain type. + // This was previously a common reason why type deduplication would fail. + if(type_name_node.descriptor != TYPE_NAME) { + return false; + } + + const TypeName& type_name = type_name_node.as(); + if(const TypeName::UnresolvedStabs* unresolved_stabs = type_name.unresolved_stabs.get()) { + if(unresolved_stabs->referenced_file_handle == (u32) -1 || !unresolved_stabs->stabs_type_number.valid()) { + return false; + } + + const SourceFile* source_file = + database.source_files.symbol_from_handle(unresolved_stabs->referenced_file_handle); + CCC_ASSERT(source_file); + + auto handle = source_file->stabs_type_number_to_handle.find(unresolved_stabs->stabs_type_number); + if(handle != source_file->stabs_type_number_to_handle.end()) { + const DataType* referenced_type = database.data_types.symbol_from_handle(handle->second); + CCC_ASSERT(referenced_type && referenced_type->type()); + // Don't compare 'intrusive' fields e.g. the offset. + CompareResult new_result = compare_nodes(*referenced_type->type(), raw_node, &database, false); + if(new_result.type != CompareResultType::DIFFERS) { + return true; + } + } + } + + return false; +} + +const char* compare_fail_reason_to_string(CompareFailReason reason) +{ + switch(reason) { + case CompareFailReason::NONE: return "error"; + case CompareFailReason::DESCRIPTOR: return "descriptor"; + case CompareFailReason::STORAGE_CLASS: return "storage class"; + case CompareFailReason::NAME: return "name"; + case CompareFailReason::RELATIVE_OFFSET_BYTES: return "relative offset"; + case CompareFailReason::ABSOLUTE_OFFSET_BYTES: return "absolute offset"; + case CompareFailReason::BITFIELD_OFFSET_BITS: return "bitfield offset"; + case CompareFailReason::SIZE_BITS: return "size"; + case CompareFailReason::CONSTNESS: return "constness"; + case CompareFailReason::ARRAY_ELEMENT_COUNT: return "array element count"; + case CompareFailReason::BUILTIN_CLASS: return "builtin class"; + case CompareFailReason::FUNCTION_RETURN_TYPE_HAS_VALUE: return "function return type has value"; + case CompareFailReason::FUNCTION_PARAMAETER_COUNT: return "function paramaeter count"; + case CompareFailReason::FUNCTION_PARAMETERS_HAS_VALUE: return "function parameter"; + case CompareFailReason::FUNCTION_MODIFIER: return "function modifier"; + case CompareFailReason::ENUM_CONSTANTS: return "enum constant"; + case CompareFailReason::BASE_CLASS_COUNT: return "base class count"; + case CompareFailReason::FIELDS_SIZE: return "fields size"; + case CompareFailReason::MEMBER_FUNCTION_COUNT: return "member function count"; + case CompareFailReason::VTABLE_GLOBAL: return "vtable global"; + case CompareFailReason::TYPE_NAME: return "type name"; + case CompareFailReason::VARIABLE_CLASS: return "variable class"; + case CompareFailReason::VARIABLE_TYPE: return "variable type"; + case CompareFailReason::VARIABLE_STORAGE: return "variable storage"; + case CompareFailReason::VARIABLE_BLOCK: return "variable block"; + } + return ""; +} + +const char* node_type_to_string(const Node& node) +{ + switch(node.descriptor) { + case ARRAY: return "array"; + case BITFIELD: return "bitfield"; + case BUILTIN: return "builtin"; + case ENUM: return "enum"; + case ERROR_NODE: return "error"; + case FUNCTION: return "function"; + case POINTER_OR_REFERENCE: { + const PointerOrReference& pointer_or_reference = node.as(); + if(pointer_or_reference.is_pointer) { + return "pointer"; + } else { + return "reference"; + } + } + case POINTER_TO_DATA_MEMBER: return "pointer_to_data_member"; + case STRUCT_OR_UNION: { + const StructOrUnion& struct_or_union = node.as(); + if(struct_or_union.is_struct) { + return "struct"; + } else { + return "union"; + } + } + case TYPE_NAME: return "type_name"; + } + return ""; +} + +const char* storage_class_to_string(StorageClass storage_class) +{ + switch(storage_class) { + case STORAGE_CLASS_NONE: return "none"; + case STORAGE_CLASS_TYPEDEF: return "typedef"; + case STORAGE_CLASS_EXTERN: return "extern"; + case STORAGE_CLASS_STATIC: return "static"; + case STORAGE_CLASS_AUTO: return "auto"; + case STORAGE_CLASS_REGISTER: return "register"; + } + return ""; +} + +const char* access_specifier_to_string(AccessSpecifier specifier) +{ + switch(specifier) { + case AS_PUBLIC: return "public"; + case AS_PROTECTED: return "protected"; + case AS_PRIVATE: return "private"; + } + return ""; +} + +const char* builtin_class_to_string(BuiltInClass bclass) +{ + switch(bclass) { + case BuiltInClass::VOID_TYPE: return "void"; + case BuiltInClass::UNSIGNED_8: return "8-bit unsigned integer"; + case BuiltInClass::SIGNED_8: return "8-bit signed integer"; + case BuiltInClass::UNQUALIFIED_8: return "8-bit integer"; + case BuiltInClass::BOOL_8: return "8-bit boolean"; + case BuiltInClass::UNSIGNED_16: return "16-bit unsigned integer"; + case BuiltInClass::SIGNED_16: return "16-bit signed integer"; + case BuiltInClass::UNSIGNED_32: return "32-bit unsigned integer"; + case BuiltInClass::SIGNED_32: return "32-bit signed integer"; + case BuiltInClass::FLOAT_32: return "32-bit floating point"; + case BuiltInClass::UNSIGNED_64: return "64-bit unsigned integer"; + case BuiltInClass::SIGNED_64: return "64-bit signed integer"; + case BuiltInClass::FLOAT_64: return "64-bit floating point"; + case BuiltInClass::UNSIGNED_128: return "128-bit unsigned integer"; + case BuiltInClass::SIGNED_128: return "128-bit signed integer"; + case BuiltInClass::UNQUALIFIED_128: return "128-bit integer"; + case BuiltInClass::FLOAT_128: return "128-bit floating point"; + } + return ""; +} + +s32 builtin_class_size(BuiltInClass bclass) +{ + switch(bclass) { + case BuiltInClass::VOID_TYPE: return 0; + case BuiltInClass::UNSIGNED_8: return 1; + case BuiltInClass::SIGNED_8: return 1; + case BuiltInClass::UNQUALIFIED_8: return 1; + case BuiltInClass::BOOL_8: return 1; + case BuiltInClass::UNSIGNED_16: return 2; + case BuiltInClass::SIGNED_16: return 2; + case BuiltInClass::UNSIGNED_32: return 4; + case BuiltInClass::SIGNED_32: return 4; + case BuiltInClass::FLOAT_32: return 4; + case BuiltInClass::UNSIGNED_64: return 8; + case BuiltInClass::SIGNED_64: return 8; + case BuiltInClass::FLOAT_64: return 8; + case BuiltInClass::UNSIGNED_128: return 16; + case BuiltInClass::SIGNED_128: return 16; + case BuiltInClass::UNQUALIFIED_128: return 16; + case BuiltInClass::FLOAT_128: return 16; + } + return 0; +} + +} diff --git a/3rdparty/ccc/src/ccc/ast.h b/3rdparty/ccc/src/ccc/ast.h new file mode 100644 index 0000000000..a34fa2e446 --- /dev/null +++ b/3rdparty/ccc/src/ccc/ast.h @@ -0,0 +1,377 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "symbol_database.h" + +namespace ccc::ast { + +enum NodeDescriptor : u8 { + ARRAY, + BITFIELD, + BUILTIN, + ENUM, + ERROR_NODE, + FUNCTION, + POINTER_OR_REFERENCE, + POINTER_TO_DATA_MEMBER, + STRUCT_OR_UNION, + TYPE_NAME +}; + +enum AccessSpecifier { + AS_PUBLIC = 0, + AS_PROTECTED = 1, + AS_PRIVATE = 2 +}; + +// To add a new type of node: +// 1. Add it to the NodeDescriptor enum. +// 2. Create a struct for it. +// 3. Add support for it in for_each_node. +// 4. Add support for it in compute_size_bytes_recursive. +// 5. Add support for it in compare_nodes. +// 6. Add support for it in node_type_to_string. +// 7. Add support for it in CppPrinter::ast_node. +// 8. Add support for it in write_json. +// 9. Add support for it in refine_node. +struct Node { + const NodeDescriptor descriptor; + u8 is_const : 1 = false; + u8 is_volatile : 1 = false; + u8 is_virtual_base_class : 1 = false; + u8 is_vtable_pointer : 1 = false; + u8 is_constructor_or_destructor : 1 = false; + u8 is_special_member_function : 1 = false; + u8 is_operator_member_function : 1 = false; + u8 cannot_compute_size : 1 = false; + u8 storage_class : 4 = STORAGE_CLASS_NONE; + u8 access_specifier : 2 = AS_PUBLIC; + + s32 size_bytes = -1; + + // If the name isn't populated for a given node, the name from the last + // ancestor to have one should be used i.e. when processing the tree you + // should pass the name down. + std::string name; + + s32 offset_bytes = -1; // Offset relative to start of last inline struct/union. + s32 size_bits = -1; // Size stored in the .mdebug symbol table, may not be set. + + Node(NodeDescriptor d) : descriptor(d) {} + Node(const Node& rhs) = default; + virtual ~Node() {} + + template + SubType& as() { + CCC_ASSERT(descriptor == SubType::DESCRIPTOR); + return *static_cast(this); + } + + template + const SubType& as() const { + CCC_ASSERT(descriptor == SubType::DESCRIPTOR); + return *static_cast(this); + } + + template + static std::pair as(const Node& lhs, const Node& rhs) { + CCC_ASSERT(lhs.descriptor == SubType::DESCRIPTOR && rhs.descriptor == SubType::DESCRIPTOR); + return std::pair(static_cast(lhs), static_cast(rhs)); + } + + void set_access_specifier(AccessSpecifier specifier, u32 importer_flags); + + // If this node is a type name, repeatedly resolve it to the type it's + // referencing, otherwise return (this, nullptr). + std::pair physical_type(SymbolDatabase& database, s32 max_depth = 100); + std::pair physical_type(const SymbolDatabase& database, s32 max_depth = 100) const; +}; + +struct Array : Node { + std::unique_ptr element_type; + s32 element_count = -1; + + Array() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = ARRAY; +}; + +struct BitField : Node { + s32 bitfield_offset_bits = -1; // Offset relative to the last byte (not the position of the underlying type!). + std::unique_ptr underlying_type; + + BitField() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = BITFIELD; +}; + +enum class BuiltInClass { + VOID_TYPE, + UNSIGNED_8, SIGNED_8, UNQUALIFIED_8, BOOL_8, + UNSIGNED_16, SIGNED_16, + UNSIGNED_32, SIGNED_32, FLOAT_32, + UNSIGNED_64, SIGNED_64, FLOAT_64, + UNSIGNED_128, SIGNED_128, UNQUALIFIED_128, FLOAT_128 +}; + +struct BuiltIn : Node { + BuiltInClass bclass = BuiltInClass::VOID_TYPE; + + BuiltIn() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = BUILTIN; +}; + +struct Enum : Node { + std::vector> constants; + + Enum() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = ENUM; +}; + +struct Error : Node { + std::string message; + + Error() : Node(ERROR_NODE) {} + static const constexpr NodeDescriptor DESCRIPTOR = ERROR_NODE; +}; + +enum class MemberFunctionModifier { + NONE, + STATIC, + VIRTUAL +}; + +const char* member_function_modifier_to_string(MemberFunctionModifier modifier); + +struct Function : Node { + std::optional> return_type; + std::optional>> parameters; + MemberFunctionModifier modifier = MemberFunctionModifier::NONE; + s32 vtable_index = -1; + FunctionHandle definition_handle; // Filled in by fill_in_pointers_to_member_function_definitions. + + Function() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = FUNCTION; +}; + +struct PointerOrReference : Node { + bool is_pointer = true; + std::unique_ptr value_type; + + PointerOrReference() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = POINTER_OR_REFERENCE; +}; + +struct PointerToDataMember : Node { + std::unique_ptr class_type; + std::unique_ptr member_type; + + PointerToDataMember() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = POINTER_TO_DATA_MEMBER; +}; + +struct StructOrUnion : Node { + bool is_struct = true; + std::vector> base_classes; + std::vector> fields; + std::vector> member_functions; + + StructOrUnion() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = STRUCT_OR_UNION; + + struct FlatField { + // The field itself. + const Node* node; + // The symbol that owns the node. + const DataType* symbol; + // Offset of the innermost enclosing base class in the object. + s32 base_offset = 0; + }; + + // Generate a flat list of all the fields in this class as well as all the + // base classes recursively, but only until the max_fields or max_depth + // limits are reached. Return true if all the fields were enumerated. + bool flatten_fields( + std::vector& output, + const DataType* symbol, + const SymbolDatabase& database, + bool skip_statics, + s32 base_offset = 0, + s32 max_fields = 100000, + s32 max_depth = 100) const; +}; + +enum class TypeNameSource : u8 { + REFERENCE, // A STABS type reference. + CROSS_REFERENCE, // A STABS cross reference. + UNNAMED_THIS // A this parameter (or return type) referencing an unnamed type. +}; + +const char* type_name_source_to_string(TypeNameSource source); + +enum class ForwardDeclaredType { + STRUCT, + UNION, + ENUM // Should be illegal but STABS supports cross references to enums so it's here. +}; + +const char* forward_declared_type_to_string(ForwardDeclaredType type); + +struct TypeName : Node { + DataTypeHandle data_type_handle; + TypeNameSource source = TypeNameSource::REFERENCE; + bool is_forward_declared = false; + + DataTypeHandle data_type_handle_unless_forward_declared() const; + + struct UnresolvedStabs { + std::string type_name; + SourceFileHandle referenced_file_handle; + StabsTypeNumber stabs_type_number; + std::optional type; + }; + + std::unique_ptr unresolved_stabs; + + TypeName() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = TYPE_NAME; +}; + +enum class CompareResultType { + MATCHES_NO_SWAP, // Both lhs and rhs are identical. + MATCHES_CONFUSED, // Both lhs and rhs are almost identical, and we don't which is better. + MATCHES_FAVOUR_LHS, // Both lhs and rhs are almost identical, but lhs is better. + MATCHES_FAVOUR_RHS, // Both lhs and rhs are almost identical, but rhs is better. + DIFFERS, // The two nodes differ substantially. +}; + +enum class CompareFailReason { + NONE, + DESCRIPTOR, + STORAGE_CLASS, + NAME, + RELATIVE_OFFSET_BYTES, + ABSOLUTE_OFFSET_BYTES, + BITFIELD_OFFSET_BITS, + SIZE_BITS, + CONSTNESS, + ARRAY_ELEMENT_COUNT, + BUILTIN_CLASS, + FUNCTION_RETURN_TYPE_HAS_VALUE, + FUNCTION_PARAMAETER_COUNT, + FUNCTION_PARAMETERS_HAS_VALUE, + FUNCTION_MODIFIER, + ENUM_CONSTANTS, + BASE_CLASS_COUNT, + FIELDS_SIZE, + MEMBER_FUNCTION_COUNT, + VTABLE_GLOBAL, + TYPE_NAME, + VARIABLE_CLASS, + VARIABLE_TYPE, + VARIABLE_STORAGE, + VARIABLE_BLOCK +}; + +struct CompareResult { + CompareResult(CompareResultType type) : type(type), fail_reason(CompareFailReason::NONE) {} + CompareResult(CompareFailReason reason) : type(CompareResultType::DIFFERS), fail_reason(reason) {} + CompareResultType type; + CompareFailReason fail_reason; +}; + +// Compare two AST nodes and their children recursively. This will only check +// fields that will be equal for two versions of the same type from different +// translation units. +CompareResult compare_nodes(const Node& lhs, const Node& rhs, const SymbolDatabase* database, bool check_intrusive_fields); + +const char* compare_fail_reason_to_string(CompareFailReason reason); +const char* node_type_to_string(const Node& node); +const char* storage_class_to_string(StorageClass storage_class); +const char* access_specifier_to_string(AccessSpecifier specifier); +const char* builtin_class_to_string(BuiltInClass bclass); + +s32 builtin_class_size(BuiltInClass bclass); + +enum TraversalOrder { + PREORDER_TRAVERSAL, + POSTORDER_TRAVERSAL +}; + +enum ExplorationMode { + EXPLORE_CHILDREN, + DONT_EXPLORE_CHILDREN +}; + +template +void for_each_node(ThisNode& node, TraversalOrder order, Callback callback) +{ + if(order == PREORDER_TRAVERSAL && callback(node) == DONT_EXPLORE_CHILDREN) { + return; + } + switch(node.descriptor) { + case ARRAY: { + auto& array = node.template as(); + for_each_node(*array.element_type.get(), order, callback); + break; + } + case BITFIELD: { + auto& bitfield = node.template as(); + for_each_node(*bitfield.underlying_type.get(), order, callback); + break; + } + case BUILTIN: { + break; + } + case ENUM: { + break; + } + case ERROR_NODE: { + break; + } + case FUNCTION: { + auto& func = node.template as(); + if(func.return_type.has_value()) { + for_each_node(*func.return_type->get(), order, callback); + } + if(func.parameters.has_value()) { + for(auto& child : *func.parameters) { + for_each_node(*child.get(), order, callback); + } + } + break; + } + case POINTER_OR_REFERENCE: { + auto& pointer_or_reference = node.template as(); + for_each_node(*pointer_or_reference.value_type.get(), order, callback); + break; + } + case POINTER_TO_DATA_MEMBER: { + auto& pointer = node.template as(); + for_each_node(*pointer.class_type.get(), order, callback); + for_each_node(*pointer.member_type.get(), order, callback); + break; + } + case STRUCT_OR_UNION: { + auto& struct_or_union = node.template as(); + for(auto& child : struct_or_union.base_classes) { + for_each_node(*child.get(), order, callback); + } + for(auto& child : struct_or_union.fields) { + for_each_node(*child.get(), order, callback); + } + for(auto& child : struct_or_union.member_functions) { + for_each_node(*child.get(), order, callback); + } + break; + } + case TYPE_NAME: { + break; + } + } + if(order == POSTORDER_TRAVERSAL) { + callback(node); + } +} + +} diff --git a/3rdparty/ccc/src/ccc/elf.cpp b/3rdparty/ccc/src/ccc/elf.cpp new file mode 100644 index 0000000000..8c3ed99e57 --- /dev/null +++ b/3rdparty/ccc/src/ccc/elf.cpp @@ -0,0 +1,125 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "elf.h" + +namespace ccc { + +Result ElfFile::parse(std::vector image) +{ + ElfFile elf; + elf.image = std::move(image); + + const ElfIdentHeader* ident = get_packed(elf.image, 0); + CCC_CHECK(ident, "ELF ident header out of range."); + CCC_CHECK(ident->magic == CCC_FOURCC("\x7f\x45\x4c\x46"), "Not an ELF file."); + CCC_CHECK(ident->e_class == ElfIdentClass::B32, "Wrong ELF class (not 32 bit)."); + + const ElfFileHeader* header = get_packed(elf.image, sizeof(ElfIdentHeader)); + CCC_CHECK(header, "ELF file header out of range."); + elf.file_header = *header; + + const ElfSectionHeader* shstr_section_header = get_packed(elf.image, header->shoff + header->shstrndx * sizeof(ElfSectionHeader)); + CCC_CHECK(shstr_section_header, "ELF section name header out of range."); + + for(u32 i = 0; i < header->shnum; i++) { + u64 header_offset = header->shoff + i * sizeof(ElfSectionHeader); + const ElfSectionHeader* section_header = get_packed(elf.image, header_offset); + CCC_CHECK(section_header, "ELF section header out of range."); + + const char* name = get_string(elf.image, shstr_section_header->offset + section_header->name); + CCC_CHECK(section_header, "ELF section name out of range."); + + ElfSection& section = elf.sections.emplace_back(); + section.name = name; + section.header = *section_header; + } + + for(u32 i = 0; i < header->phnum; i++) { + u64 header_offset = header->phoff + i * sizeof(ElfProgramHeader); + const ElfProgramHeader* program_header = get_packed(elf.image, header_offset); + CCC_CHECK(program_header, "ELF program header out of range."); + + elf.segments.emplace_back(*program_header); + } + + return elf; +} + +Result ElfFile::create_section_symbols( + SymbolDatabase& database, const SymbolGroup& group) const +{ + for(const ElfSection& section : sections) { + Address address = Address::non_zero(section.header.addr); + + Result symbol = database.sections.create_symbol( + section.name, address, group.source, group.module_symbol); + CCC_RETURN_IF_ERROR(symbol); + + (*symbol)->set_size(section.header.size); + } + + return Result(); +} + +const ElfSection* ElfFile::lookup_section(const char* name) const +{ + for(const ElfSection& section : sections) { + if(section.name == name) { + return §ion; + } + } + return nullptr; +} + +std::optional ElfFile::file_offset_to_virtual_address(u32 file_offset) const +{ + for(const ElfProgramHeader& segment : segments) { + if(file_offset >= segment.offset && file_offset < segment.offset + segment.filesz) { + return segment.vaddr + file_offset - segment.offset; + } + } + return std::nullopt; +} + +const ElfProgramHeader* ElfFile::entry_point_segment() const +{ + const ccc::ElfProgramHeader* entry_segment = nullptr; + for(const ccc::ElfProgramHeader& segment : segments) { + if(file_header.entry >= segment.vaddr && file_header.entry < segment.vaddr + segment.filesz) { + entry_segment = &segment; + } + } + return entry_segment; +} + +Result> ElfFile::get_virtual(u32 address, u32 size) const +{ + u32 end_address = address + size; + + if(end_address >= address) { + for(const ElfProgramHeader& segment : segments) { + if(address >= segment.vaddr && end_address <= segment.vaddr + segment.filesz) { + size_t begin_offset = segment.offset + (address - segment.vaddr); + size_t end_offset = begin_offset + size; + if(begin_offset <= image.size() && end_offset <= image.size()) { + return std::span(image.data() + begin_offset, image.data() + end_offset); + } + } + } + } + + return CCC_FAILURE("No ELF segment for address range 0x%x to 0x%x.", address, end_address); +} + +Result ElfFile::copy_virtual(u8* dest, u32 address, u32 size) const +{ + Result> block = get_virtual(address, size); + CCC_RETURN_IF_ERROR(block); + + memcpy(dest, block->data(), size); + + return Result(); +} + +} diff --git a/3rdparty/ccc/src/ccc/elf.h b/3rdparty/ccc/src/ccc/elf.h new file mode 100644 index 0000000000..33d89e759c --- /dev/null +++ b/3rdparty/ccc/src/ccc/elf.h @@ -0,0 +1,156 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "symbol_database.h" + +namespace ccc { + +enum class ElfIdentClass : u8 { + B32 = 0x1, + B64 = 0x2 +}; + +CCC_PACKED_STRUCT(ElfIdentHeader, + /* 0x0 */ u32 magic; // 7f 45 4c 46 + /* 0x4 */ ElfIdentClass e_class; + /* 0x5 */ u8 endianess; + /* 0x6 */ u8 version; + /* 0x7 */ u8 os_abi; + /* 0x8 */ u8 abi_version; + /* 0x9 */ u8 pad[7]; +) + +enum class ElfFileType : u16 { + NONE = 0x00, + REL = 0x01, + EXEC = 0x02, + DYN = 0x03, + CORE = 0x04, + LOOS = 0xfe00, + HIOS = 0xfeff, + LOPROC = 0xff00, + HIPROC = 0xffff +}; + +enum class ElfMachine : u16 { + MIPS = 0x08 +}; + +CCC_PACKED_STRUCT(ElfFileHeader, + /* 0x10 */ ElfFileType type; + /* 0x12 */ ElfMachine machine; + /* 0x14 */ u32 version; + /* 0x18 */ u32 entry; + /* 0x1c */ u32 phoff; + /* 0x20 */ u32 shoff; + /* 0x24 */ u32 flags; + /* 0x28 */ u16 ehsize; + /* 0x2a */ u16 phentsize; + /* 0x2c */ u16 phnum; + /* 0x2e */ u16 shentsize; + /* 0x30 */ u16 shnum; + /* 0x32 */ u16 shstrndx; +) + +enum class ElfSectionType : u32 { + NULL_SECTION = 0x0, + PROGBITS = 0x1, + SYMTAB = 0x2, + STRTAB = 0x3, + RELA = 0x4, + HASH = 0x5, + DYNAMIC = 0x6, + NOTE = 0x7, + NOBITS = 0x8, + REL = 0x9, + SHLIB = 0xa, + DYNSYM = 0xb, + INIT_ARRAY = 0xe, + FINI_ARRAY = 0xf, + PREINIT_ARRAY = 0x10, + GROUP = 0x11, + SYMTAB_SHNDX = 0x12, + NUM = 0x13, + LOOS = 0x60000000, + MIPS_DEBUG = 0x70000005 +}; + +CCC_PACKED_STRUCT(ElfSectionHeader, + /* 0x00 */ u32 name; + /* 0x04 */ ElfSectionType type; + /* 0x08 */ u32 flags; + /* 0x0c */ u32 addr; + /* 0x10 */ u32 offset; + /* 0x14 */ u32 size; + /* 0x18 */ u32 link; + /* 0x1c */ u32 info; + /* 0x20 */ u32 addralign; + /* 0x24 */ u32 entsize; +) + +struct ElfSection { + std::string name; + ElfSectionHeader header; +}; + +CCC_PACKED_STRUCT(ElfProgramHeader, + /* 0x00 */ u32 type; + /* 0x04 */ u32 offset; + /* 0x08 */ u32 vaddr; + /* 0x0c */ u32 paddr; + /* 0x10 */ u32 filesz; + /* 0x14 */ u32 memsz; + /* 0x18 */ u32 flags; + /* 0x1c */ u32 align; +) + +struct ElfFile { + ElfFileHeader file_header; + std::vector image; + std::vector sections; + std::vector segments; + + // Parse the ELF file header, section headers and program headers. + static Result parse(std::vector image); + + // Create a section object for each section header in the ELF file. + Result create_section_symbols(SymbolDatabase& database, const SymbolGroup& group) const; + + const ElfSection* lookup_section(const char* name) const; + std::optional file_offset_to_virtual_address(u32 file_offset) const; + + // Find the program header for the segment that contains the entry point. + const ElfProgramHeader* entry_point_segment() const; + + // Retrieve a block of data in an ELF file given its address and size. + Result> get_virtual(u32 address, u32 size) const; + + // Copy a block of data in an ELF file to the destination buffer given its + // address and size. + Result copy_virtual(u8* dest, u32 address, u32 size) const; + + // Retrieve an object of type T from an ELF file given its address. + template + Result get_object_virtual(u32 address) const + { + Result> result = get_virtual(address, sizeof(T)); + CCC_RETURN_IF_ERROR(result); + + return *(T*) result->data(); + } + + // Retrieve an array of objects of type T from an ELF file given its + // address and element count. + template + Result> get_array_virtual(u32 address, u32 element_count) const + { + Result> result = get_virtual(address, element_count * sizeof(T)); + CCC_RETURN_IF_ERROR(result); + + return std::span((T*) result->data(), (T*) (result->data() + result->size())); + } +}; + +} diff --git a/3rdparty/ccc/src/ccc/elf_symtab.cpp b/3rdparty/ccc/src/ccc/elf_symtab.cpp new file mode 100644 index 0000000000..892ea5ec2d --- /dev/null +++ b/3rdparty/ccc/src/ccc/elf_symtab.cpp @@ -0,0 +1,213 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "elf_symtab.h" + +#include "importer_flags.h" + +namespace ccc::elf { + +enum class SymbolBind : u8 { + LOCAL = 0, + GLOBAL = 1, + WEAK = 2, + NUM = 3, + GNU_UNIQUE = 10 +}; + +enum class SymbolType : u8 { + NOTYPE = 0, + OBJECT = 1, + FUNC = 2, + SECTION = 3, + FILE = 4, + COMMON = 5, + TLS = 6, + NUM = 7, + GNU_IFUNC = 10 +}; + +enum class SymbolVisibility { + DEFAULT = 0, + INTERNAL = 1, + HIDDEN = 2, + PROTECTED = 3 +}; + +CCC_PACKED_STRUCT(Symbol, + /* 0x0 */ u32 name; + /* 0x4 */ u32 value; + /* 0x8 */ u32 size; + /* 0xc */ u8 info; + /* 0xd */ u8 other; + /* 0xe */ u16 shndx; + + SymbolType type() const { return (SymbolType) (info & 0xf); } + SymbolBind bind() const { return (SymbolBind) (info >> 4); } + SymbolVisibility visibility() const { return (SymbolVisibility) (other & 0x3); } +) + +static const char* symbol_bind_to_string(SymbolBind bind); +static const char* symbol_type_to_string(SymbolType type); +static const char* symbol_visibility_to_string(SymbolVisibility visibility); + +Result import_symbols( + SymbolDatabase& database, + const SymbolGroup& group, + std::span symtab, + std::span strtab, + u32 importer_flags, + DemanglerFunctions demangler) +{ + for(u32 i = 0; i < symtab.size() / sizeof(Symbol); i++) { + const Symbol* symbol = get_packed(symtab, i * sizeof(Symbol)); + CCC_ASSERT(symbol); + + Address address; + if(symbol->value != 0) { + address = symbol->value; + } + + if(!address.valid() || symbol->visibility() != SymbolVisibility::DEFAULT) { + continue; + } + + if(!(importer_flags & DONT_DEDUPLICATE_SYMBOLS)) { + if(database.functions.first_handle_from_starting_address(address).valid()) { + continue; + } + + if(database.global_variables.first_handle_from_starting_address(address).valid()) { + continue; + } + + if(database.local_variables.first_handle_from_starting_address(address).valid()) { + continue; + } + } + + const char* string = get_string(strtab, symbol->name); + CCC_CHECK(string, "Symbol string out of range."); + + switch(symbol->type()) { + case SymbolType::NOTYPE: { + Result label = database.labels.create_symbol( + string, group.source, group.module_symbol, address, importer_flags, demangler); + CCC_RETURN_IF_ERROR(label); + + // These symbols get emitted at the same addresses as functions + // and aren't extremely useful, so we want to mark them to + // prevent them from possibly being used as function names. + (*label)->is_junk = + (*label)->name() == "__gnu_compiled_c" || + (*label)->name() == "__gnu_compiled_cplusplus" || + (*label)->name() == "gcc2_compiled."; + + break; + } + case SymbolType::OBJECT: { + if(symbol->size != 0) { + Result global_variable = database.global_variables.create_symbol( + string, group.source, group.module_symbol, address, importer_flags, demangler); + CCC_RETURN_IF_ERROR(global_variable); + + if(*global_variable) { + (*global_variable)->set_size(symbol->size); + } + } else { + Result label = database.labels.create_symbol( + string, group.source, group.module_symbol, address, importer_flags, demangler); + CCC_RETURN_IF_ERROR(label); + } + + break; + } + case SymbolType::FUNC: { + Result function = database.functions.create_symbol( + string, group.source, group.module_symbol, address, importer_flags, demangler); + CCC_RETURN_IF_ERROR(function); + + if(*function) { + (*function)->set_size(symbol->size); + } + + break; + } + case SymbolType::FILE: { + Result source_file = database.source_files.create_symbol( + string, group.source, group.module_symbol); + CCC_RETURN_IF_ERROR(source_file); + + break; + } + default: {} + } + } + + return Result(); +} + +Result print_symbol_table(FILE* out, std::span symtab, std::span strtab) +{ + fprintf(out, "ELF SYMBOLS:\n"); + fprintf(out, " Num: Value Size Type Bind Vis Ndx Name\n"); + + for(u32 i = 0; i < symtab.size() / sizeof(Symbol); i++) { + const Symbol* symbol = get_packed(symtab, i * sizeof(Symbol)); + CCC_ASSERT(symbol); + + const char* type = symbol_type_to_string(symbol->type()); + const char* bind = symbol_bind_to_string(symbol->bind()); + const char* visibility = symbol_visibility_to_string(symbol->visibility()); + + const char* string = get_string(strtab, symbol->name); + CCC_CHECK(string, "Symbol string out of range."); + + fprintf(out, "%6u: %08x %5u %-7s %-7s %-7s %3u %s\n", + i, symbol->value, symbol->size, type, bind, visibility, symbol->shndx, string); + + } + + return Result(); +} + +static const char* symbol_bind_to_string(SymbolBind bind) +{ + switch(bind) { + case SymbolBind::LOCAL: return "LOCAL"; + case SymbolBind::GLOBAL: return "GLOBAL"; + case SymbolBind::WEAK: return "WEAK"; + case SymbolBind::NUM: return "NUM"; + case SymbolBind::GNU_UNIQUE: return "GNU_UNIQUE"; + } + return "ERROR"; +} + +static const char* symbol_type_to_string(SymbolType type) +{ + switch(type) { + case SymbolType::NOTYPE: return "NOTYPE"; + case SymbolType::OBJECT: return "OBJECT"; + case SymbolType::FUNC: return "FUNC"; + case SymbolType::SECTION: return "SECTION"; + case SymbolType::FILE: return "FILE"; + case SymbolType::COMMON: return "COMMON"; + case SymbolType::TLS: return "TLS"; + case SymbolType::NUM: return "NUM"; + case SymbolType::GNU_IFUNC: return "GNU_IFUNC"; + } + return "ERROR"; +} + +static const char* symbol_visibility_to_string(SymbolVisibility visibility) +{ + switch(visibility) { + case SymbolVisibility::DEFAULT: return "DEFAULT"; + case SymbolVisibility::INTERNAL: return "INTERNAL"; + case SymbolVisibility::HIDDEN: return "HIDDEN"; + case SymbolVisibility::PROTECTED: return "PROTECTED"; + } + return "ERROR"; +} + +} diff --git a/3rdparty/ccc/src/ccc/elf_symtab.h b/3rdparty/ccc/src/ccc/elf_symtab.h new file mode 100644 index 0000000000..3880de969f --- /dev/null +++ b/3rdparty/ccc/src/ccc/elf_symtab.h @@ -0,0 +1,20 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "symbol_database.h" + +namespace ccc::elf { + +Result import_symbols( + SymbolDatabase& database, + const SymbolGroup& group, + std::span symtab, + std::span strtab, + u32 importer_flags, + DemanglerFunctions demangler); + +Result print_symbol_table(FILE* out, std::span symtab, std::span strtab); + +} diff --git a/3rdparty/ccc/src/ccc/importer_flags.cpp b/3rdparty/ccc/src/ccc/importer_flags.cpp new file mode 100644 index 0000000000..a05d6714ac --- /dev/null +++ b/3rdparty/ccc/src/ccc/importer_flags.cpp @@ -0,0 +1,95 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "importer_flags.h" + +namespace ccc { + +const std::vector IMPORTER_FLAGS = { + {DEMANGLE_PARAMETERS, "--demangle-parameters", { + "Include parameters in demangled function names." + }}, + {DEMANGLE_RETURN_TYPE, "--demangle-return-type", { + "Include return types at the end of demangled", + "function names if they're available." + }}, + {DONT_DEDUPLICATE_SYMBOLS, "--dont-deduplicate-symbols", { + "Do not deduplicate matching symbols from", + "different symbol tables. This options has no", + "effect on data types." + }}, + {DONT_DEDUPLICATE_TYPES, "--dont-deduplicate-types", { + "Do not deduplicate data types from different", + "translation units." + }}, + {DONT_DEMANGLE_NAMES, "--dont-demangle-names", { + "Do not demangle function names, global variable", + "names, or overloaded operator names." + }}, + {INCLUDE_GENERATED_MEMBER_FUNCTIONS, "--include-generated-functions", { + "Output member functions that were likely", + "automatically generated by the compiler." + }}, + {NO_ACCESS_SPECIFIERS, "--no-access-specifiers", { + "Do not print access specifiers." + }}, + {NO_MEMBER_FUNCTIONS, "--no-member-functions", { + "Do not print member functions." + }}, + {NO_OPTIMIZED_OUT_FUNCTIONS, "--no-optimized-out-functions", { + "Discard functions that were optimized out." + }}, + {STRICT_PARSING, "--strict", { + "Make more types of errors fatal." + }}, + {TYPEDEF_ALL_ENUMS, "--typedef-all-enums", { + "Force all emitted C++ enums to be defined using", + "a typedef. With STABS, it is not always possible", + "to determine if an enum was like this in the", + "original source code, so this option should be", + "useful for reverse engineering C projects." + }}, + {TYPEDEF_ALL_STRUCTS, "--typedef-all-structs", { + "Force all emitted C++ structure types to be", + "defined using a typedef." + }}, + {TYPEDEF_ALL_UNIONS, "--typedef-all-unions", { + "Force all emitted C++ union types to be defined", + "using a typedef." + }}, + {UNIQUE_FUNCTIONS, "--unique-functions", { + " If multiple identical .mdebug function symbols", + "are present, find the one that seems to have", + "actually been included in the linked binary, and", + "remove the addresses from all the rest. Using", + "this importer flag in combination with", + "--no-optimized-out-functions will remove these", + "duplicate function symbols entirely." + }} +}; + +u32 parse_importer_flag(const char* argument) +{ + for(const ImporterFlagInfo& flag : IMPORTER_FLAGS) { + if(strcmp(flag.argument, argument) == 0) { + return flag.flag; + } + } + return NO_IMPORTER_FLAGS; +} + +void print_importer_flags_help(FILE* out) +{ + for(const ImporterFlagInfo& flag : IMPORTER_FLAGS) { + fprintf(out, "\n"); + fprintf(out, " %-29s ", flag.argument); + for(size_t i = 0; i < flag.help_text.size(); i++) { + if(i > 0) { + fprintf(out, " "); + } + fprintf(out, "%s\n", flag.help_text[i]); + } + } +} + +} diff --git a/3rdparty/ccc/src/ccc/importer_flags.h b/3rdparty/ccc/src/ccc/importer_flags.h new file mode 100644 index 0000000000..12ab79538a --- /dev/null +++ b/3rdparty/ccc/src/ccc/importer_flags.h @@ -0,0 +1,39 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "util.h" + +namespace ccc { + +enum ImporterFlags { + NO_IMPORTER_FLAGS = 0, + DEMANGLE_PARAMETERS = (1 << 0), + DEMANGLE_RETURN_TYPE = (1 << 1), + DONT_DEDUPLICATE_SYMBOLS = (1 << 2), + DONT_DEDUPLICATE_TYPES = (1 << 3), + DONT_DEMANGLE_NAMES = (1 << 4), + INCLUDE_GENERATED_MEMBER_FUNCTIONS = (1 << 5), + NO_ACCESS_SPECIFIERS = (1 << 6), + NO_MEMBER_FUNCTIONS = (1 << 7), + NO_OPTIMIZED_OUT_FUNCTIONS = (1 << 8), + STRICT_PARSING = (1 << 9), + TYPEDEF_ALL_ENUMS = (1 << 10), + TYPEDEF_ALL_STRUCTS = (1 << 11), + TYPEDEF_ALL_UNIONS = (1 << 12), + UNIQUE_FUNCTIONS = (1 << 13) +}; + +struct ImporterFlagInfo { + ImporterFlags flag; + const char* argument; + std::vector help_text; +}; + +extern const std::vector IMPORTER_FLAGS; + +u32 parse_importer_flag(const char* argument); +void print_importer_flags_help(FILE* out); + +} diff --git a/3rdparty/ccc/src/ccc/mdebug_analysis.cpp b/3rdparty/ccc/src/ccc/mdebug_analysis.cpp new file mode 100644 index 0000000000..0309c858d4 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_analysis.cpp @@ -0,0 +1,349 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "mdebug_analysis.h" + +#include "stabs_to_ast.h" + +namespace ccc::mdebug { + +Result LocalSymbolTableAnalyser::stab_magic(const char* magic) +{ + return Result(); +} + +Result LocalSymbolTableAnalyser::source_file(const char* path, Address text_address) +{ + if(m_next_relative_path.empty()) { + m_next_relative_path = m_source_file.command_line_path; + } + + return Result(); +} + +Result LocalSymbolTableAnalyser::data_type(const ParsedSymbol& symbol) +{ + Result> node = stabs_type_to_ast( + *symbol.name_colon_type.type.get(), nullptr, m_stabs_to_ast_state, 0, false, false); + CCC_RETURN_IF_ERROR(node); + + if(symbol.is_typedef && (*node)->descriptor == ast::STRUCT_OR_UNION) { + ast::StructOrUnion& struct_or_union = (*node)->as(); + const std::string& name = symbol.name_colon_type.name; + StabsTypeNumber type_number = symbol.name_colon_type.type->type_number; + fix_recursively_emitted_structures(struct_or_union, name, type_number, m_stabs_to_ast_state.file_handle); + } + + bool is_struct = (*node)->descriptor == ast::STRUCT_OR_UNION && (*node)->as().is_struct; + bool force_typedef = + ((m_context.importer_flags & TYPEDEF_ALL_ENUMS) && (*node)->descriptor == ast::ENUM) || + ((m_context.importer_flags & TYPEDEF_ALL_STRUCTS) && (*node)->descriptor == ast::STRUCT_OR_UNION && is_struct) || + ((m_context.importer_flags & TYPEDEF_ALL_UNIONS) && (*node)->descriptor == ast::STRUCT_OR_UNION && !is_struct); + + (*node)->name = (symbol.name_colon_type.name == " ") ? "" : symbol.name_colon_type.name; + if(symbol.is_typedef || force_typedef) { + (*node)->storage_class = STORAGE_CLASS_TYPEDEF; + } + + const char* name = (*node)->name.c_str(); + StabsTypeNumber number = symbol.name_colon_type.type->type_number; + + if(m_context.importer_flags & DONT_DEDUPLICATE_TYPES) { + Result data_type = m_database.data_types.create_symbol( + name, m_context.group.source, m_context.group.module_symbol); + CCC_RETURN_IF_ERROR(data_type); + + m_source_file.stabs_type_number_to_handle[number] = (*data_type)->handle(); + (*data_type)->set_type(std::move(*node)); + + (*data_type)->files = {m_source_file.handle()}; + } else { + Result type = m_database.create_data_type_if_unique( + std::move(*node), number, name, m_source_file, m_context.group); + CCC_RETURN_IF_ERROR(type); + } + + return Result(); +} + +Result LocalSymbolTableAnalyser::global_variable( + const char* mangled_name, Address address, const StabsType& type, bool is_static, GlobalStorageLocation location) +{ + Result global = m_database.global_variables.create_symbol( + mangled_name, m_context.group.source, m_context.group.module_symbol, address, m_context.importer_flags, m_context.demangler); + CCC_RETURN_IF_ERROR(global); + CCC_ASSERT(*global); + + m_global_variables.emplace_back((*global)->handle()); + + Result> node = stabs_type_to_ast(type, nullptr, m_stabs_to_ast_state, 0, true, false); + CCC_RETURN_IF_ERROR(node); + + if(is_static) { + (*global)->storage_class = STORAGE_CLASS_STATIC; + } + (*global)->set_type(std::move(*node)); + + (*global)->storage.location = location; + + return Result(); +} + +Result LocalSymbolTableAnalyser::sub_source_file(const char* path, Address text_address) +{ + if(m_current_function && m_state == IN_FUNCTION_BEGINNING) { + Function::SubSourceFile& sub = m_current_function->sub_source_files.emplace_back(); + sub.address = text_address; + sub.relative_path = path; + } else { + m_next_relative_path = path; + } + + return Result(); +} + +Result LocalSymbolTableAnalyser::procedure( + const char* mangled_name, Address address, const ProcedureDescriptor* procedure_descriptor, bool is_static) +{ + if(!m_current_function || strcmp(mangled_name, m_current_function->mangled_name().c_str()) != 0) { + Result result = create_function(mangled_name, address); + CCC_RETURN_IF_ERROR(result); + } + + if(is_static) { + m_current_function->storage_class = STORAGE_CLASS_STATIC; + } + + if(procedure_descriptor) { + m_current_function->stack_frame_size = procedure_descriptor->frame_size; + } + + return Result(); +} + +Result LocalSymbolTableAnalyser::label(const char* label, Address address, s32 line_number) +{ + if(address.valid() && m_current_function && label[0] == '$') { + Function::LineNumberPair& pair = m_current_function->line_numbers.emplace_back(); + pair.address = address; + pair.line_number = line_number; + } + + return Result(); +} + +Result LocalSymbolTableAnalyser::text_end(const char* name, s32 function_size) +{ + if(m_state == IN_FUNCTION_BEGINNING) { + CCC_CHECK(m_current_function, "END TEXT symbol outside of function."); + m_current_function->set_size(function_size); + m_state = IN_FUNCTION_END; + } + + return Result(); +} + +Result LocalSymbolTableAnalyser::function(const char* mangled_name, const StabsType& return_type, Address address) +{ + if(!m_current_function || strcmp(mangled_name, m_current_function->mangled_name().c_str()) != 0) { + Result result = create_function(mangled_name, address); + CCC_RETURN_IF_ERROR(result); + } else { + // For MTV Music Maker 2, the addresses for static functions stored in + // the PROC symbols are relative to the translation unit, while the + // addresses stored in the FUN symbol are absolute. This is the only + // game I've found that seems to have this problem, but since in all + // other cases it seems all these addresses are all absolute, I may as + // well add in a hack here to deal with it. + bool no_module_base_address = m_context.group.module_symbol && m_context.group.module_symbol->address().get_or_zero() == 0; + bool new_address_greater = address.valid() && address > m_current_function->address(); + if(no_module_base_address && new_address_greater) { + m_database.functions.move_symbol(m_current_function->handle(), address); + } + } + + Result> node = stabs_type_to_ast(return_type, nullptr, m_stabs_to_ast_state, 0, true, true); + CCC_RETURN_IF_ERROR(node); + m_current_function->set_type(std::move(*node)); + + return Result(); +} + +Result LocalSymbolTableAnalyser::function_end() +{ + if(m_current_function) { + m_current_function->set_parameter_variables(std::move(m_current_parameter_variables), m_database); + m_current_function->set_local_variables(std::move(m_current_local_variables), m_database); + } + + m_current_function = nullptr; + m_current_parameter_variables = std::vector(); + m_current_local_variables = std::vector(); + + m_blocks.clear(); + m_pending_local_variables.clear(); + + m_state = NOT_IN_FUNCTION; + + return Result(); +} + +Result LocalSymbolTableAnalyser::parameter( + const char* name, const StabsType& type, bool is_stack, s32 value, bool is_by_reference) +{ + CCC_CHECK(m_current_function, "Parameter symbol before first func/proc symbol."); + + Result parameter_variable = m_database.parameter_variables.create_symbol( + name, m_context.group.source, m_context.group.module_symbol); + CCC_RETURN_IF_ERROR(parameter_variable); + + m_current_parameter_variables.emplace_back((*parameter_variable)->handle()); + + Result> node = stabs_type_to_ast(type, nullptr, m_stabs_to_ast_state, 0, true, true); + CCC_RETURN_IF_ERROR(node); + (*parameter_variable)->set_type(std::move(*node)); + + if(is_stack) { + StackStorage& stack_storage = (*parameter_variable)->storage.emplace(); + stack_storage.stack_pointer_offset = value; + } else { + RegisterStorage& register_storage = (*parameter_variable)->storage.emplace(); + register_storage.dbx_register_number = value; + register_storage.is_by_reference = is_by_reference; + } + + return Result(); +} + +Result LocalSymbolTableAnalyser::local_variable( + const char* name, const StabsType& type, u32 value, StabsSymbolDescriptor desc, SymbolClass sclass) +{ + if(!m_current_function) { + return Result(); + } + + Address address = (desc == StabsSymbolDescriptor::STATIC_LOCAL_VARIABLE) ? value : Address(); + Result local_variable = m_database.local_variables.create_symbol( + name, address, m_context.group.source, m_context.group.module_symbol); + CCC_RETURN_IF_ERROR(local_variable); + + m_current_local_variables.emplace_back((*local_variable)->handle()); + m_pending_local_variables.emplace_back((*local_variable)->handle()); + + Result> node = stabs_type_to_ast(type, nullptr, m_stabs_to_ast_state, 0, true, false); + CCC_RETURN_IF_ERROR(node); + + if(desc == StabsSymbolDescriptor::STATIC_LOCAL_VARIABLE) { + GlobalStorage& global_storage = (*local_variable)->storage.emplace(); + std::optional location_opt = + symbol_class_to_global_variable_location(sclass); + CCC_CHECK(location_opt.has_value(), + "Invalid static local variable location %s.", + symbol_class(sclass)); + global_storage.location = *location_opt; + (*node)->storage_class = STORAGE_CLASS_STATIC; + } else if(desc == StabsSymbolDescriptor::REGISTER_VARIABLE) { + RegisterStorage& register_storage = (*local_variable)->storage.emplace(); + register_storage.dbx_register_number = (s32) value; + } else if(desc == StabsSymbolDescriptor::LOCAL_VARIABLE) { + StackStorage& stack_storage = (*local_variable)->storage.emplace(); + stack_storage.stack_pointer_offset = (s32) value; + } else { + return CCC_FAILURE("LocalSymbolTableAnalyser::local_variable() called with bad symbol descriptor."); + } + + (*local_variable)->set_type(std::move(*node)); + + return Result(); +} + +Result LocalSymbolTableAnalyser::lbrac(s32 begin_offset) +{ + for(LocalVariableHandle local_variable_handle : m_pending_local_variables) { + if(LocalVariable* local_variable = m_database.local_variables.symbol_from_handle(local_variable_handle)) { + local_variable->live_range.low = m_source_file.address().value + begin_offset; + } + } + + m_blocks.emplace_back(std::move(m_pending_local_variables)); + m_pending_local_variables = {}; + + return Result(); +} + +Result LocalSymbolTableAnalyser::rbrac(s32 end_offset) +{ + CCC_CHECK(!m_blocks.empty(), "RBRAC symbol without a matching LBRAC symbol."); + + std::vector& variables = m_blocks.back(); + for(LocalVariableHandle local_variable_handle : variables) { + if(LocalVariable* local_variable = m_database.local_variables.symbol_from_handle(local_variable_handle)) { + local_variable->live_range.high = m_source_file.address().value + end_offset; + } + } + + m_blocks.pop_back(); + + return Result(); +} + +Result LocalSymbolTableAnalyser::finish() +{ + CCC_CHECK(m_state != IN_FUNCTION_BEGINNING, + "Unexpected end of symbol table for '%s'.", m_source_file.name().c_str()); + + if(m_current_function) { + Result result = function_end(); + CCC_RETURN_IF_ERROR(result); + } + + m_source_file.set_functions(std::move(m_functions), m_database); + m_source_file.set_global_variables(std::move(m_global_variables), m_database); + + return Result(); +} + +Result LocalSymbolTableAnalyser::create_function(const char* mangled_name, Address address) +{ + if(m_current_function) { + Result result = function_end(); + CCC_RETURN_IF_ERROR(result); + } + + Result function = m_database.functions.create_symbol( + mangled_name, m_context.group.source, m_context.group.module_symbol, address, m_context.importer_flags, m_context.demangler); + CCC_RETURN_IF_ERROR(function); + CCC_ASSERT(*function); + m_current_function = *function; + + m_functions.emplace_back(m_current_function->handle()); + + m_state = IN_FUNCTION_BEGINNING; + + if(!m_next_relative_path.empty() && m_current_function->relative_path != m_source_file.command_line_path) { + m_current_function->relative_path = m_next_relative_path; + } + + return Result(); +} + +std::optional symbol_class_to_global_variable_location(SymbolClass symbol_class) +{ + std::optional location; + switch(symbol_class) { + case SymbolClass::NIL: location = GlobalStorageLocation::NIL; break; + case SymbolClass::DATA: location = GlobalStorageLocation::DATA; break; + case SymbolClass::BSS: location = GlobalStorageLocation::BSS; break; + case SymbolClass::ABS: location = GlobalStorageLocation::ABS; break; + case SymbolClass::SDATA: location = GlobalStorageLocation::SDATA; break; + case SymbolClass::SBSS: location = GlobalStorageLocation::SBSS; break; + case SymbolClass::RDATA: location = GlobalStorageLocation::RDATA; break; + case SymbolClass::COMMON: location = GlobalStorageLocation::COMMON; break; + case SymbolClass::SCOMMON: location = GlobalStorageLocation::SCOMMON; break; + case SymbolClass::SUNDEFINED: location = GlobalStorageLocation::SUNDEFINED; break; + default: {} + } + return location; +} + +} diff --git a/3rdparty/ccc/src/ccc/mdebug_analysis.h b/3rdparty/ccc/src/ccc/mdebug_analysis.h new file mode 100644 index 0000000000..61cc9b0184 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_analysis.h @@ -0,0 +1,99 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "importer_flags.h" +#include "mdebug_section.h" +#include "mdebug_symbols.h" +#include "stabs.h" +#include "stabs_to_ast.h" +#include "symbol_database.h" + +namespace ccc::mdebug { + +struct AnalysisContext { + const mdebug::SymbolTableReader* reader = nullptr; + const std::map* external_functions = nullptr; + const std::map* external_globals = nullptr; + SymbolGroup group; + u32 importer_flags = NO_IMPORTER_FLAGS; + DemanglerFunctions demangler; +}; + +class LocalSymbolTableAnalyser { +public: + LocalSymbolTableAnalyser(SymbolDatabase& database, const StabsToAstState& stabs_to_ast_state, const AnalysisContext& context, SourceFile& source_file) + : m_database(database) + , m_context(context) + , m_stabs_to_ast_state(stabs_to_ast_state) + , m_source_file(source_file) {} + + // Functions for processing individual symbols. + // + // In most cases these symbols will appear in the following order: + // PROC TEXT + // ... line numbers ... ($LM) + // END TEXT + // LABEL TEXT FUN + // ... parameters ... + // ... blocks ... (... local variables ... LBRAC ... subblocks ... RBRAC) + // NIL NIL FUN + // + // For some compiler versions the symbols can appear in this order: + // LABEL TEXT FUN + // ... parameters ... + // first line number ($LM1) + // PROC TEXT + // ... line numbers ... ($LM) + // END TEXT + // ... blocks ... (... local variables ... LBRAC ... subblocks ... RBRAC) + Result stab_magic(const char* magic); + Result source_file(const char* path, Address text_address); + Result data_type(const ParsedSymbol& symbol); + Result global_variable( + const char* mangled_name, Address address, const StabsType& type, bool is_static, GlobalStorageLocation location); + Result sub_source_file(const char* name, Address text_address); + Result procedure( + const char* mangled_name, Address address, const ProcedureDescriptor* procedure_descriptor, bool is_static); + Result label(const char* label, Address address, s32 line_number); + Result text_end(const char* name, s32 function_size); + Result function(const char* mangled_name, const StabsType& return_type, Address address); + Result function_end(); + Result parameter( + const char* name, const StabsType& type, bool is_stack, s32 value, bool is_by_reference); + Result local_variable( + const char* name, const StabsType& type, u32 value, StabsSymbolDescriptor desc, SymbolClass sclass); + Result lbrac(s32 begin_offset); + Result rbrac(s32 end_offset); + + Result finish(); + + Result create_function(const char* mangled_name, Address address); + +protected: + enum AnalysisState { + NOT_IN_FUNCTION, + IN_FUNCTION_BEGINNING, + IN_FUNCTION_END + }; + + SymbolDatabase& m_database; + const AnalysisContext& m_context; + const StabsToAstState& m_stabs_to_ast_state; + + AnalysisState m_state = NOT_IN_FUNCTION; + SourceFile& m_source_file; + std::vector m_functions; + std::vector m_global_variables; + Function* m_current_function = nullptr; + std::vector m_current_parameter_variables; + std::vector m_current_local_variables; + std::vector> m_blocks; + std::vector m_pending_local_variables; + std::string m_next_relative_path; +}; + +std::optional symbol_class_to_global_variable_location(SymbolClass symbol_class); + +}; diff --git a/3rdparty/ccc/src/ccc/mdebug_importer.cpp b/3rdparty/ccc/src/ccc/mdebug_importer.cpp new file mode 100644 index 0000000000..3038aede89 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_importer.cpp @@ -0,0 +1,668 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "mdebug_importer.h" + +namespace ccc::mdebug { + +static Result resolve_type_names( + SymbolDatabase& database, const SymbolGroup& group, u32 importer_flags); +static Result resolve_type_name( + ast::TypeName& type_name, + SymbolDatabase& database, + const SymbolGroup& group, + u32 importer_flags); +static void compute_size_bytes(ast::Node& node, SymbolDatabase& database); +static void detect_duplicate_functions(SymbolDatabase& database, const SymbolGroup& group); +static void detect_fake_functions(SymbolDatabase& database, const std::map& external_functions, const SymbolGroup& group); +static void destroy_optimized_out_functions( + SymbolDatabase& database, const SymbolGroup& group); + +Result import_symbol_table( + SymbolDatabase& database, + std::span elf, + s32 section_offset, + const SymbolGroup& group, + u32 importer_flags, + const DemanglerFunctions& demangler, + const std::atomic_bool* interrupt) +{ + SymbolTableReader reader; + + Result reader_result = reader.init(elf, section_offset); + CCC_RETURN_IF_ERROR(reader_result); + + Result> external_symbols = reader.parse_external_symbols(); + CCC_RETURN_IF_ERROR(external_symbols); + + // The addresses of the global variables aren't present in the local symbol + // table, so here we extract them from the external table. In addition, for + // some games we need to cross reference the function symbols in the local + // symbol table with the entries in the external symbol table. + std::map external_functions; + std::map external_globals; + for(const mdebug::Symbol& external : *external_symbols) { + if(external.symbol_type == mdebug::SymbolType::PROC) { + external_functions[external.value] = &external; + } + + if(external.symbol_type == mdebug::SymbolType::GLOBAL + && (external.symbol_class != mdebug::SymbolClass::UNDEFINED)) { + external_globals[external.string] = &external; + } + } + + // Bundle together some unchanging state to pass to import_files. + AnalysisContext context; + context.reader = &reader; + context.external_functions = &external_functions; + context.external_globals = &external_globals; + context.group = group; + context.importer_flags = importer_flags; + context.demangler = demangler; + + Result result = import_files(database, context, interrupt); + CCC_RETURN_IF_ERROR(result); + + return Result(); +} + +Result import_files(SymbolDatabase& database, const AnalysisContext& context, const std::atomic_bool* interrupt) +{ + Result file_count = context.reader->file_count(); + CCC_RETURN_IF_ERROR(file_count); + + for(s32 i = 0; i < *file_count; i++) { + if(interrupt && *interrupt) { + return CCC_FAILURE("Operation interrupted by user."); + } + + Result file = context.reader->parse_file(i); + CCC_RETURN_IF_ERROR(file); + + Result result = import_file(database, *file, context); + CCC_RETURN_IF_ERROR(result); + } + + // The files field may be modified by further analysis passes, so we + // need to save this information here. + for(DataType& data_type : database.data_types) { + if(context.group.is_in_group(data_type) && data_type.files.size() == 1) { + data_type.only_defined_in_single_translation_unit = true; + } + } + + // Lookup data types and store data type handles in type names. + Result type_name_result = resolve_type_names(database, context.group, context.importer_flags); + CCC_RETURN_IF_ERROR(type_name_result); + + // Compute the size in bytes of all the AST nodes. + database.for_each_symbol([&](ccc::Symbol& symbol) { + if(context.group.is_in_group(symbol) && symbol.type()) { + compute_size_bytes(*symbol.type(), database); + } + }); + + // Propagate the size information to the global variable symbols. + for(GlobalVariable& global_variable : database.global_variables) { + if(global_variable.type() && global_variable.type()->size_bytes > -1) { + global_variable.set_size((u32) global_variable.type()->size_bytes); + } + } + + // Propagate the size information to the static local variable symbols. + for(LocalVariable& local_variable : database.local_variables) { + bool is_static_local = std::holds_alternative(local_variable.storage); + if(is_static_local && local_variable.type() && local_variable.type()->size_bytes > -1) { + local_variable.set_size((u32) local_variable.type()->size_bytes); + } + } + + // Some games (e.g. Jet X2O) have multiple function symbols across different + // translation units with the same name and address. + if(context.importer_flags & UNIQUE_FUNCTIONS) { + detect_duplicate_functions(database, context.group); + } + + // If multiple functions appear at the same address, discard the addresses + // of all of them except the real one. + if(context.external_functions) { + detect_fake_functions(database, *context.external_functions, context.group); + } + + // Remove functions with no address. If there are any such functions, this + // will invalidate all pointers to symbols. + if(context.importer_flags & NO_OPTIMIZED_OUT_FUNCTIONS) { + destroy_optimized_out_functions(database, context.group); + } + + return Result(); +} + +Result import_file(SymbolDatabase& database, const mdebug::File& input, const AnalysisContext& context) +{ + // Parse the stab strings into a data structure that's vaguely + // one-to-one with the text-based representation. + u32 importer_flags_for_this_file = context.importer_flags; + Result> symbols = parse_symbols(input.symbols, importer_flags_for_this_file); + CCC_RETURN_IF_ERROR(symbols); + + // In stabs, types can be referenced by their number from other stabs, + // so here we build a map of type numbers to the parsed types. + std::map stabs_types; + for(const ParsedSymbol& symbol : *symbols) { + if(symbol.type == ParsedSymbolType::NAME_COLON_TYPE) { + symbol.name_colon_type.type->enumerate_numbered_types(stabs_types); + } + } + + Result source_file = database.source_files.create_symbol( + input.full_path, input.address, context.group.source, context.group.module_symbol); + CCC_RETURN_IF_ERROR(source_file); + + (*source_file)->working_dir = input.working_dir; + (*source_file)->command_line_path = input.command_line_path; + + // Sometimes the INFO symbols contain information about what toolchain + // version was used for building the executable. + for(const mdebug::Symbol& symbol : input.symbols) { + if(symbol.symbol_class == mdebug::SymbolClass::INFO && strcmp(symbol.string, "@stabs") != 0) { + (*source_file)->toolchain_version_info.emplace(symbol.string); + } + } + + StabsToAstState stabs_to_ast_state; + stabs_to_ast_state.file_handle = (*source_file)->handle().value; + stabs_to_ast_state.stabs_types = &stabs_types; + stabs_to_ast_state.importer_flags = importer_flags_for_this_file; + stabs_to_ast_state.demangler = context.demangler; + + // Convert the parsed stabs symbols to a more standard C AST. + LocalSymbolTableAnalyser analyser(database, stabs_to_ast_state, context, **source_file); + for(const ParsedSymbol& symbol : *symbols) { + if(symbol.duplicate) { + continue; + } + + switch(symbol.type) { + case ParsedSymbolType::NAME_COLON_TYPE: { + switch(symbol.name_colon_type.descriptor) { + case StabsSymbolDescriptor::LOCAL_FUNCTION: + case StabsSymbolDescriptor::GLOBAL_FUNCTION: { + const char* name = symbol.name_colon_type.name.c_str(); + const StabsType& type = *symbol.name_colon_type.type.get(); + Result result = analyser.function(name, type, symbol.raw->value); + CCC_RETURN_IF_ERROR(result); + break; + } + case StabsSymbolDescriptor::REFERENCE_PARAMETER_A: + case StabsSymbolDescriptor::REGISTER_PARAMETER: + case StabsSymbolDescriptor::VALUE_PARAMETER: + case StabsSymbolDescriptor::REFERENCE_PARAMETER_V: { + const char* name = symbol.name_colon_type.name.c_str(); + const StabsType& type = *symbol.name_colon_type.type.get(); + bool is_stack_variable = symbol.name_colon_type.descriptor == StabsSymbolDescriptor::VALUE_PARAMETER; + bool is_by_reference = symbol.name_colon_type.descriptor == StabsSymbolDescriptor::REFERENCE_PARAMETER_A + || symbol.name_colon_type.descriptor == StabsSymbolDescriptor::REFERENCE_PARAMETER_V; + + Result result = analyser.parameter(name, type, is_stack_variable, symbol.raw->value, is_by_reference); + CCC_RETURN_IF_ERROR(result); + break; + } + case StabsSymbolDescriptor::REGISTER_VARIABLE: + case StabsSymbolDescriptor::LOCAL_VARIABLE: + case StabsSymbolDescriptor::STATIC_LOCAL_VARIABLE: { + const char* name = symbol.name_colon_type.name.c_str(); + const StabsType& type = *symbol.name_colon_type.type.get(); + Result result = analyser.local_variable( + name, type, symbol.raw->value, symbol.name_colon_type.descriptor, symbol.raw->symbol_class); + CCC_RETURN_IF_ERROR(result); + break; + } + case StabsSymbolDescriptor::GLOBAL_VARIABLE: + case StabsSymbolDescriptor::STATIC_GLOBAL_VARIABLE: { + const char* name = symbol.name_colon_type.name.c_str(); + u32 address = -1; + std::optional location = + symbol_class_to_global_variable_location(symbol.raw->symbol_class); + if(symbol.name_colon_type.descriptor == StabsSymbolDescriptor::GLOBAL_VARIABLE) { + // The address for non-static global variables is + // only stored in the external symbol table (and + // the ELF symbol table), so we pull that + // information in here. + if(context.external_globals) { + auto global_symbol = context.external_globals->find(symbol.name_colon_type.name); + if(global_symbol != context.external_globals->end()) { + address = (u32) global_symbol->second->value; + location = symbol_class_to_global_variable_location(global_symbol->second->symbol_class); + } + } + } else { + // And for static global variables it's just stored + // in the local symbol table. + address = (u32) symbol.raw->value; + } + CCC_CHECK(location.has_value(), "Invalid global variable location.") + const StabsType& type = *symbol.name_colon_type.type.get(); + bool is_static = symbol.name_colon_type.descriptor == StabsSymbolDescriptor::STATIC_GLOBAL_VARIABLE; + Result result = analyser.global_variable(name, address, type, is_static, *location); + CCC_RETURN_IF_ERROR(result); + break; + } + case StabsSymbolDescriptor::TYPE_NAME: + case StabsSymbolDescriptor::ENUM_STRUCT_OR_TYPE_TAG: { + Result result = analyser.data_type(symbol); + CCC_RETURN_IF_ERROR(result); + break; + } + } + break; + } + case ParsedSymbolType::SOURCE_FILE: { + Result result = analyser.source_file(symbol.raw->string, symbol.raw->value); + CCC_RETURN_IF_ERROR(result); + break; + } + case ParsedSymbolType::SUB_SOURCE_FILE: { + Result result = analyser.sub_source_file(symbol.raw->string, symbol.raw->value); + CCC_RETURN_IF_ERROR(result); + break; + } + case ParsedSymbolType::LBRAC: { + Result result = analyser.lbrac(symbol.raw->value); + CCC_RETURN_IF_ERROR(result); + break; + } + case ParsedSymbolType::RBRAC: { + Result result = analyser.rbrac(symbol.raw->value); + CCC_RETURN_IF_ERROR(result); + break; + } + case ParsedSymbolType::FUNCTION_END: { + Result result = analyser.function_end(); + CCC_RETURN_IF_ERROR(result); + break; + } + case ParsedSymbolType::NON_STABS: { + if(symbol.raw->symbol_class == mdebug::SymbolClass::TEXT) { + if(symbol.raw->symbol_type == mdebug::SymbolType::PROC) { + Result result = analyser.procedure(symbol.raw->string, symbol.raw->value, symbol.raw->procedure_descriptor, false); + CCC_RETURN_IF_ERROR(result); + } else if(symbol.raw->symbol_type == mdebug::SymbolType::STATICPROC) { + Result result = analyser.procedure(symbol.raw->string, symbol.raw->value, symbol.raw->procedure_descriptor, true); + CCC_RETURN_IF_ERROR(result); + } else if(symbol.raw->symbol_type == mdebug::SymbolType::LABEL) { + Result result = analyser.label(symbol.raw->string, symbol.raw->value, symbol.raw->index); + CCC_RETURN_IF_ERROR(result); + } else if(symbol.raw->symbol_type == mdebug::SymbolType::END) { + Result result = analyser.text_end(symbol.raw->string, symbol.raw->value); + CCC_RETURN_IF_ERROR(result); + } + } + break; + } + } + } + + Result result = analyser.finish(); + CCC_RETURN_IF_ERROR(result); + + return Result(); +} + +static Result resolve_type_names( + SymbolDatabase& database, const SymbolGroup& group, u32 importer_flags) +{ + Result result; + database.for_each_symbol([&](ccc::Symbol& symbol) { + if(group.is_in_group(symbol) && symbol.type()) { + ast::for_each_node(*symbol.type(), ast::PREORDER_TRAVERSAL, [&](ast::Node& node) { + if(node.descriptor == ast::TYPE_NAME) { + Result type_name_result = resolve_type_name(node.as(), database, group, importer_flags); + if(!type_name_result.success()) { + result = std::move(type_name_result); + } + } + return ast::EXPLORE_CHILDREN; + }); + } + }); + return result; +} + +static Result resolve_type_name( + ast::TypeName& type_name, + SymbolDatabase& database, + const SymbolGroup& group, + u32 importer_flags) +{ + ast::TypeName::UnresolvedStabs* unresolved_stabs = type_name.unresolved_stabs.get(); + if(!unresolved_stabs) { + return Result(); + } + + // Lookup the type by its STABS type number. This path ensures that the + // correct type is found even if multiple types have the same name. + if(unresolved_stabs->referenced_file_handle != (u32) -1 && unresolved_stabs->stabs_type_number.valid()) { + const SourceFile* source_file = database.source_files.symbol_from_handle(unresolved_stabs->referenced_file_handle); + CCC_ASSERT(source_file); + auto handle = source_file->stabs_type_number_to_handle.find(unresolved_stabs->stabs_type_number); + if(handle != source_file->stabs_type_number_to_handle.end()) { + type_name.data_type_handle = handle->second.value; + type_name.is_forward_declared = false; + type_name.unresolved_stabs.reset(); + return Result(); + } + } + + // Looking up the type by its STABS type number failed, so look for it by + // its name instead. This happens when a type is forward declared but not + // defined in a given translation unit. + if(!unresolved_stabs->type_name.empty()) { + for(auto& name_handle : database.data_types.handles_from_name(unresolved_stabs->type_name)) { + DataType* data_type = database.data_types.symbol_from_handle(name_handle.second); + if(data_type && group.is_in_group(*data_type)) { + type_name.data_type_handle = name_handle.second.value; + type_name.is_forward_declared = true; + type_name.unresolved_stabs.reset(); + return Result(); + } + } + } + + // If this branch is taken it means the type name was probably from an + // automatically generated member function of a nested struct trying to + // reference the struct (for the this parameter). We shouldn't create a + // forward declared type in this case. + if(type_name.source == ast::TypeNameSource::UNNAMED_THIS) { + return Result(); + } + + // Type lookup failed. This happens when a type is forward declared in a + // translation unit with symbols but is not defined in one. We haven't + // already created a forward declared type, so we create one now. + std::unique_ptr forward_declared_node; + if(unresolved_stabs->type.has_value()) { + switch(*unresolved_stabs->type) { + case ast::ForwardDeclaredType::STRUCT: { + std::unique_ptr node = std::make_unique(); + node->is_struct = true; + forward_declared_node = std::move(node); + break; + } + case ast::ForwardDeclaredType::UNION: { + std::unique_ptr node = std::make_unique(); + node->is_struct = false; + forward_declared_node = std::move(node); + break; + } + case ast::ForwardDeclaredType::ENUM: { + std::unique_ptr node = std::make_unique(); + forward_declared_node = std::move(node); + break; + } + } + } + + if(forward_declared_node) { + Result forward_declared_type = database.data_types.create_symbol( + unresolved_stabs->type_name, group.source, group.module_symbol); + CCC_RETURN_IF_ERROR(forward_declared_type); + + (*forward_declared_type)->set_type(std::move(forward_declared_node)); + (*forward_declared_type)->not_defined_in_any_translation_unit = true; + + type_name.data_type_handle = (*forward_declared_type)->handle().value; + type_name.is_forward_declared = true; + type_name.unresolved_stabs.reset(); + + return Result(); + } + + const char* error_message = "Unresolved %s type name '%s' with STABS type number (%d,%d)."; + if(importer_flags & STRICT_PARSING) { + return CCC_FAILURE(error_message, + ast::type_name_source_to_string(type_name.source), + type_name.unresolved_stabs->type_name.c_str(), + type_name.unresolved_stabs->stabs_type_number.file, + type_name.unresolved_stabs->stabs_type_number.type); + } else { + CCC_WARN(error_message, + ast::type_name_source_to_string(type_name.source), + type_name.unresolved_stabs->type_name.c_str(), + type_name.unresolved_stabs->stabs_type_number.file, + type_name.unresolved_stabs->stabs_type_number.type); + } + + return Result(); +} + +static void compute_size_bytes(ast::Node& node, SymbolDatabase& database) +{ + for_each_node(node, ast::POSTORDER_TRAVERSAL, [&](ast::Node& node) { + // Skip nodes that have already been processed. + if(node.size_bytes > -1 || node.cannot_compute_size) { + return ast::EXPLORE_CHILDREN; + } + + // Can't compute size recursively. + node.cannot_compute_size = true; + + switch(node.descriptor) { + case ast::ARRAY: { + ast::Array& array = node.as(); + if(array.element_type->size_bytes > -1) { + array.size_bytes = array.element_type->size_bytes * array.element_count; + } + break; + } + case ast::BITFIELD: { + break; + } + case ast::BUILTIN: { + ast::BuiltIn& built_in = node.as(); + built_in.size_bytes = builtin_class_size(built_in.bclass); + break; + } + case ast::FUNCTION: { + break; + } + case ast::ENUM: { + node.size_bytes = 4; + break; + } + case ast::ERROR_NODE: { + break; + } + case ast::STRUCT_OR_UNION: { + node.size_bytes = node.size_bits / 8; + break; + } + case ast::POINTER_OR_REFERENCE: { + node.size_bytes = 4; + break; + } + case ast::POINTER_TO_DATA_MEMBER: { + break; + } + case ast::TYPE_NAME: { + ast::TypeName& type_name = node.as(); + DataType* resolved_type = database.data_types.symbol_from_handle(type_name.data_type_handle_unless_forward_declared()); + if(resolved_type) { + ast::Node* resolved_node = resolved_type->type(); + CCC_ASSERT(resolved_node); + if(resolved_node->size_bytes < 0 && !resolved_node->cannot_compute_size) { + compute_size_bytes(*resolved_node, database); + } + type_name.size_bytes = resolved_node->size_bytes; + } + break; + } + } + + if(node.size_bytes > -1) { + node.cannot_compute_size = false; + } + + return ast::EXPLORE_CHILDREN; + }); +} + +static void detect_duplicate_functions(SymbolDatabase& database, const SymbolGroup& group) +{ + std::vector duplicate_functions; + + for(Function& test_function : database.functions) { + if(!test_function.address().valid() && !group.is_in_group(test_function)) { + continue; + } + + // Find cases where there are two or more functions at the same address. + auto functions_with_same_address = database.functions.handles_from_starting_address(test_function.address()); + if(functions_with_same_address.begin() == functions_with_same_address.end()) { + continue; + } + if(++functions_with_same_address.begin() == functions_with_same_address.end()) { + continue; + } + + // Try to figure out the address of the translation unit which the + // version of the function that actually ended up in the linked binary + // comes from. We can't just check which source file the symbol comes + // from because it may be present in multiple. + u32 source_file_address = UINT32_MAX; + for(SourceFile& source_file : database.source_files) { + if(source_file.address() < test_function.address()) { + source_file_address = std::min(source_file.address().value, source_file_address); + } + } + + if(source_file_address == UINT32_MAX) { + continue; + } + + // Remove the addresses from all the matching symbols from other + // translation units. + FunctionHandle best_handle; + u32 best_offset = UINT32_MAX; + for(const auto& [address, handle] : functions_with_same_address) { + ccc::Function* function = database.functions.symbol_from_handle(handle); + if(!function || !group.is_in_group(*function) || function->mangled_name() != test_function.mangled_name()) { + continue; + } + + if(address - source_file_address < best_offset) { + if(best_handle.valid()) { + duplicate_functions.emplace_back(best_handle); + } + best_handle = function->handle(); + best_offset = address - source_file_address; + } else { + duplicate_functions.emplace_back(function->handle()); + } + } + + for(FunctionHandle duplicate_function : duplicate_functions) { + database.functions.move_symbol(duplicate_function, Address()); + } + duplicate_functions.clear(); + } +} + +static void detect_fake_functions(SymbolDatabase& database, const std::map& external_functions, const SymbolGroup& group) +{ + // Find cases where multiple fake function symbols were emitted for a given + // address and cross-reference with the external symbol table to try and + // find which one is the real one. + s32 fake_function_count = 0; + for(Function& function : database.functions) { + if(!function.address().valid() || !group.is_in_group(function)) { + continue; + } + + // Find cases where there are two or more functions at the same address. + auto functions_with_same_address = database.functions.handles_from_starting_address(function.address()); + if(functions_with_same_address.begin() == functions_with_same_address.end()) { + continue; + } + if(++functions_with_same_address.begin() == functions_with_same_address.end()) { + continue; + } + + auto external_function = external_functions.find(function.address().value); + if(external_function == external_functions.end() || strcmp(function.mangled_name().c_str(), external_function->second->string) != 0) { + database.functions.move_symbol(function.handle(), Address()); + + if(fake_function_count < 10) { + CCC_WARN("Discarding address of function symbol '%s' as it is probably incorrect.", function.mangled_name().c_str()); + } else if(fake_function_count == 10) { + CCC_WARN("Discarding more addresses of function symbols."); + } + + fake_function_count++; + } + } +} + +static void destroy_optimized_out_functions( + SymbolDatabase& database, const SymbolGroup& group) +{ + bool marked = false; + + for(Function& function : database.functions) { + if(group.is_in_group(function) && !function.address().valid()) { + function.mark_for_destruction(); + marked = true; + } + } + + if(marked) { + // This will invalidate all pointers to symbols in the database. + database.destroy_marked_symbols(); + } +} + +void fill_in_pointers_to_member_function_definitions(SymbolDatabase& database) +{ + // Fill in pointers from member function declaration to corresponding definitions. + for(Function& function : database.functions) { + const std::string& qualified_name = function.name(); + std::string::size_type name_separator_pos = qualified_name.find_last_of("::"); + if(name_separator_pos == std::string::npos || name_separator_pos < 2) { + continue; + } + + std::string function_name = qualified_name.substr(name_separator_pos + 1); + + // This won't work for some template types. + std::string::size_type type_separator_pos = qualified_name.find_last_of("::", name_separator_pos - 2); + std::string type_name; + if(type_separator_pos != std::string::npos) { + type_name = qualified_name.substr(type_separator_pos + 1, name_separator_pos - type_separator_pos - 2); + } else { + type_name = qualified_name.substr(0, name_separator_pos - 1); + } + + for(const auto& name_handle : database.data_types.handles_from_name(type_name)) { + DataType* data_type = database.data_types.symbol_from_handle(name_handle.second); + if(!data_type || !data_type->type() || data_type->type()->descriptor != ast::STRUCT_OR_UNION) { + continue; + } + + ast::StructOrUnion& struct_or_union = data_type->type()->as(); + for(std::unique_ptr& declaration : struct_or_union.member_functions) { + if(declaration->name == function_name) { + declaration->as().definition_handle = function.handle().value; + function.is_member_function_ish = true; + break; + } + } + + if(function.is_member_function_ish) { + break; + } + } + } +} + +} diff --git a/3rdparty/ccc/src/ccc/mdebug_importer.h b/3rdparty/ccc/src/ccc/mdebug_importer.h new file mode 100644 index 0000000000..cec65497c9 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_importer.h @@ -0,0 +1,31 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +#include "mdebug_analysis.h" +#include "mdebug_section.h" +#include "symbol_database.h" + +namespace ccc::mdebug { + +// Perform all the main analysis passes on the mdebug symbol table and convert +// it to a set of C++ ASTs. +Result import_symbol_table( + SymbolDatabase& database, + std::span elf, + s32 section_offset, + const SymbolGroup& group, + u32 importer_flags, + const DemanglerFunctions& demangler, + const std::atomic_bool* interrupt); +Result import_files(SymbolDatabase& database, const AnalysisContext& context, const std::atomic_bool* interrupt); +Result import_file(SymbolDatabase& database, const mdebug::File& input, const AnalysisContext& context); + +// Try to add pointers from member function declarations to their definitions +// using a heuristic. +void fill_in_pointers_to_member_function_definitions(SymbolDatabase& database); + +} diff --git a/3rdparty/ccc/src/ccc/mdebug_section.cpp b/3rdparty/ccc/src/ccc/mdebug_section.cpp new file mode 100644 index 0000000000..676303aad2 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_section.cpp @@ -0,0 +1,474 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "mdebug_section.h" + +namespace ccc::mdebug { + +// MIPS debug symbol table headers. +// See include/coff/sym.h from GNU binutils for more information. + +CCC_PACKED_STRUCT(SymbolicHeader, + /* 0x00 */ s16 magic; + /* 0x02 */ s16 version_stamp; + /* 0x04 */ s32 line_number_count; + /* 0x08 */ s32 line_numbers_size_bytes; + /* 0x0c */ s32 line_numbers_offset; + /* 0x10 */ s32 dense_numbers_count; + /* 0x14 */ s32 dense_numbers_offset; + /* 0x18 */ s32 procedure_descriptor_count; + /* 0x1c */ s32 procedure_descriptors_offset; + /* 0x20 */ s32 local_symbol_count; + /* 0x24 */ s32 local_symbols_offset; + /* 0x28 */ s32 optimization_symbols_count; + /* 0x2c */ s32 optimization_symbols_offset; + /* 0x30 */ s32 auxiliary_symbol_count; + /* 0x34 */ s32 auxiliary_symbols_offset; + /* 0x38 */ s32 local_strings_size_bytes; + /* 0x3c */ s32 local_strings_offset; + /* 0x40 */ s32 external_strings_size_bytes; + /* 0x44 */ s32 external_strings_offset; + /* 0x48 */ s32 file_descriptor_count; + /* 0x4c */ s32 file_descriptors_offset; + /* 0x50 */ s32 relative_file_descriptor_count; + /* 0x54 */ s32 relative_file_descriptors_offset; + /* 0x58 */ s32 external_symbols_count; + /* 0x5c */ s32 external_symbols_offset; +) + +CCC_PACKED_STRUCT(FileDescriptor, + /* 0x00 */ u32 address; + /* 0x04 */ s32 file_path_string_offset; + /* 0x08 */ s32 strings_offset; + /* 0x0c */ s32 cb_ss; + /* 0x10 */ s32 isym_base; + /* 0x14 */ s32 symbol_count; + /* 0x18 */ s32 line_number_entry_index_base; + /* 0x1c */ s32 cline; + /* 0x20 */ s32 optimization_entry_index_base; + /* 0x24 */ s32 copt; + /* 0x28 */ u16 ipd_first; + /* 0x2a */ u16 procedure_descriptor_count; + /* 0x2c */ s32 iaux_base; + /* 0x30 */ s32 caux; + /* 0x34 */ s32 rfd_base; + /* 0x38 */ s32 crfd; + /* 0x3c */ u32 lang : 5; + /* 0x3c */ u32 f_merge : 1; + /* 0x3c */ u32 f_readin : 1; + /* 0x3c */ u32 f_big_endian : 1; + /* 0x3c */ u32 reserved_1 : 22; + /* 0x40 */ s32 line_number_offset; + /* 0x44 */ s32 cb_line; +) +static_assert(sizeof(FileDescriptor) == 0x48); + +CCC_PACKED_STRUCT(SymbolHeader, + /* 0x0 */ u32 iss; + /* 0x4 */ u32 value; + /* 0x8 */ u32 st : 6; + /* 0x8 */ u32 sc : 5; + /* 0x8 */ u32 reserved : 1; + /* 0x8 */ u32 index : 20; +) +static_assert(sizeof(SymbolHeader) == 0xc); + +CCC_PACKED_STRUCT(ExternalSymbolHeader, + /* 0x0 */ u16 flags; + /* 0x2 */ s16 ifd; + /* 0x4 */ SymbolHeader symbol; +) +static_assert(sizeof(ExternalSymbolHeader) == 0x10); + +static void print_symbol(FILE* out, const Symbol& symbol); +static void print_procedure_descriptor(FILE* out, const ProcedureDescriptor& procedure_descriptor); +static Result get_corruption_fixing_fudge_offset(s32 section_offset, const SymbolicHeader& hdrr); +static Result get_symbol(const SymbolHeader& header, std::span elf, s32 strings_offset); + +Result SymbolTableReader::init(std::span elf, s32 section_offset) +{ + m_elf = elf; + m_section_offset = section_offset; + + m_hdrr = get_packed(m_elf, m_section_offset); + CCC_CHECK(m_hdrr != nullptr, "MIPS debug section header out of bounds."); + CCC_CHECK(m_hdrr->magic == 0x7009, "Invalid symbolic header."); + + Result fudge_offset = get_corruption_fixing_fudge_offset(m_section_offset, *m_hdrr); + CCC_RETURN_IF_ERROR(fudge_offset); + m_fudge_offset = *fudge_offset; + + m_ready = true; + + return Result(); +} + +s32 SymbolTableReader::file_count() const +{ + CCC_ASSERT(m_ready); + return m_hdrr->file_descriptor_count; +} + +Result SymbolTableReader::parse_file(s32 index) const +{ + CCC_ASSERT(m_ready); + + File file; + + u64 fd_offset = m_hdrr->file_descriptors_offset + index * sizeof(FileDescriptor); + const FileDescriptor* fd_header = get_packed(m_elf, fd_offset + m_fudge_offset); + CCC_CHECK(fd_header != nullptr, "MIPS debug file descriptor out of bounds."); + CCC_CHECK(fd_header->f_big_endian == 0, "Not little endian or bad file descriptor table."); + + file.address = fd_header->address; + + s32 rel_raw_path_offset = fd_header->strings_offset + fd_header->file_path_string_offset; + s32 raw_path_offset = m_hdrr->local_strings_offset + rel_raw_path_offset + m_fudge_offset; + const char* command_line_path = get_string(m_elf, raw_path_offset); + if(command_line_path) { + file.command_line_path = command_line_path; + } + + // Parse local symbols. + for(s64 j = 0; j < fd_header->symbol_count; j++) { + u64 rel_symbol_offset = (fd_header->isym_base + j) * sizeof(SymbolHeader); + u64 symbol_offset = m_hdrr->local_symbols_offset + rel_symbol_offset + m_fudge_offset; + const SymbolHeader* symbol_header = get_packed(m_elf, symbol_offset); + CCC_CHECK(symbol_header != nullptr, "Symbol header out of bounds."); + + s32 strings_offset = m_hdrr->local_strings_offset + fd_header->strings_offset + m_fudge_offset; + Result sym = get_symbol(*symbol_header, m_elf, strings_offset); + CCC_RETURN_IF_ERROR(sym); + + bool string_offset_equal = (s32) symbol_header->iss == fd_header->file_path_string_offset; + if(file.working_dir.empty() && string_offset_equal && sym->is_stabs() && sym->code() == N_SO && file.symbols.size() > 2) { + const Symbol& working_dir = file.symbols.back(); + if(working_dir.is_stabs() && working_dir.code() == N_SO) { + file.working_dir = working_dir.string; + } + } + + file.symbols.emplace_back(std::move(*sym)); + } + + // Parse procedure descriptors. + for(s64 i = 0; i < fd_header->procedure_descriptor_count; i++) { + u64 rel_procedure_offset = (fd_header->ipd_first + i) * sizeof(ProcedureDescriptor); + u64 procedure_offset = m_hdrr->procedure_descriptors_offset + rel_procedure_offset + m_fudge_offset; + const ProcedureDescriptor* procedure_descriptor = get_packed(m_elf, procedure_offset); + CCC_CHECK(procedure_descriptor != nullptr, "Procedure descriptor out of bounds."); + + CCC_CHECK(procedure_descriptor->symbol_index < file.symbols.size(), "Symbol index out of bounds."); + file.symbols[procedure_descriptor->symbol_index].procedure_descriptor = procedure_descriptor; + } + + + file.full_path = merge_paths(file.working_dir, file.command_line_path); + + return file; +} + +Result> SymbolTableReader::parse_external_symbols() const +{ + CCC_ASSERT(m_ready); + + std::vector external_symbols; + for(s64 i = 0; i < m_hdrr->external_symbols_count; i++) { + u64 sym_offset = m_hdrr->external_symbols_offset + i * sizeof(ExternalSymbolHeader); + const ExternalSymbolHeader* external_header = get_packed(m_elf, sym_offset + m_fudge_offset); + CCC_CHECK(external_header != nullptr, "External header out of bounds."); + + Result sym = get_symbol(external_header->symbol, m_elf, m_hdrr->external_strings_offset + m_fudge_offset); + CCC_RETURN_IF_ERROR(sym); + external_symbols.emplace_back(std::move(*sym)); + } + + return external_symbols; +} + +void SymbolTableReader::print_header(FILE* dest) const +{ + CCC_ASSERT(m_ready); + + fprintf(dest, "Symbolic Header, magic = %hx, vstamp = %hx:\n", + (u16) m_hdrr->magic, + (u16) m_hdrr->version_stamp); + fprintf(dest, "\n"); + fprintf(dest, " Offset Size (Bytes) Count\n"); + fprintf(dest, " ------ ------------ -----\n"); + fprintf(dest, " Line Numbers 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->line_numbers_offset, + (u32) m_hdrr->line_numbers_size_bytes, + m_hdrr->line_number_count); + fprintf(dest, " Dense Numbers 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->dense_numbers_offset, + (u32) m_hdrr->dense_numbers_count * 8, + m_hdrr->dense_numbers_count); + fprintf(dest, " Procedure Descriptors 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->procedure_descriptors_offset, + (u32) m_hdrr->procedure_descriptor_count * (u32) sizeof(ProcedureDescriptor), + m_hdrr->procedure_descriptor_count); + fprintf(dest, " Local Symbols 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->local_symbols_offset, + (u32) m_hdrr->local_symbol_count * (u32) sizeof(SymbolHeader), + m_hdrr->local_symbol_count); + fprintf(dest, " Optimization Symbols 0x%-8x " "- " "%-8d\n", + (u32) m_hdrr->optimization_symbols_offset, + m_hdrr->optimization_symbols_count); + fprintf(dest, " Auxiliary Symbols 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->auxiliary_symbols_offset, + (u32) m_hdrr->auxiliary_symbol_count * 4, + m_hdrr->auxiliary_symbol_count); + fprintf(dest, " Local Strings 0x%-8x " "0x%-8x " "-\n", + (u32) m_hdrr->local_strings_offset, + (u32) m_hdrr->local_strings_size_bytes); + fprintf(dest, " External Strings 0x%-8x " "0x%-8x " "-\n", + (u32) m_hdrr->external_strings_offset, + (u32) m_hdrr->external_strings_size_bytes); + fprintf(dest, " File Descriptors 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->file_descriptors_offset, + (u32) m_hdrr->file_descriptor_count * (u32) sizeof(FileDescriptor), + m_hdrr->file_descriptor_count); + fprintf(dest, " Relative File Descriptors 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->relative_file_descriptors_offset, + (u32) m_hdrr->relative_file_descriptor_count * 4, + m_hdrr->relative_file_descriptor_count); + fprintf(dest, " External Symbols 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->external_symbols_offset, + (u32) m_hdrr->external_symbols_count * 16, + m_hdrr->external_symbols_count); +} + +Result SymbolTableReader::print_symbols(FILE* out, bool print_locals, bool print_procedure_descriptors, bool print_externals) const +{ + if(print_locals || print_procedure_descriptors) { + s32 count = file_count(); + for(s32 i = 0; i < count; i++) { + Result file = parse_file(i); + CCC_RETURN_IF_ERROR(file); + + fprintf(out, "FILE %s:\n", file->command_line_path.c_str()); + for(const Symbol& symbol : file->symbols) { + if(print_locals || symbol.procedure_descriptor) { + print_symbol(out, symbol); + } + if(print_procedure_descriptors && symbol.procedure_descriptor) { + print_procedure_descriptor(out, *symbol.procedure_descriptor); + } + } + } + } + + if(print_externals) { + fprintf(out, "EXTERNAL SYMBOLS:\n"); + Result> external_symbols = parse_external_symbols(); + CCC_RETURN_IF_ERROR(external_symbols); + for(const Symbol& symbol : *external_symbols) { + print_symbol(out, symbol); + } + } + + return Result(); +} + +static void print_symbol(FILE* out, const Symbol& symbol) +{ + fprintf(out, " %8x ", symbol.value); + + const char* symbol_type_str = symbol_type(symbol.symbol_type); + if(symbol_type_str) { + fprintf(out, "%-11s ", symbol_type_str); + } else { + fprintf(out, "ST(%7u) ", (u32) symbol.symbol_type); + } + + const char* symbol_class_str = symbol_class(symbol.symbol_class); + if(symbol_class_str) { + fprintf(out, "%-4s ", symbol_class_str); + } else if ((u32) symbol.symbol_class == 0) { + fprintf(out, " "); + } else { + fprintf(out, "SC(%4u) ", (u32) symbol.symbol_class); + } + + if(symbol.is_stabs()) { + fprintf(out, "%-8s ", stabs_code_to_string(symbol.code())); + } else { + fprintf(out, "SI(%4u) ", symbol.index); + } + + fprintf(out, "%s\n", symbol.string); +} + +static void print_procedure_descriptor(FILE* out, const ProcedureDescriptor& procedure_descriptor) +{ + fprintf(out, " Address 0x%08x\n", procedure_descriptor.address); + fprintf(out, " Symbol Index %d\n", procedure_descriptor.symbol_index); + fprintf(out, " Line Number Entry Index %d\n", procedure_descriptor.line_number_entry_index); + fprintf(out, " Saved Register Mask 0x%08x\n", procedure_descriptor.saved_register_mask); + fprintf(out, " Saved Register Offset %d\n", procedure_descriptor.saved_register_offset); + fprintf(out, " Optimization Entry Index %d\n", procedure_descriptor.optimization_entry_index); + fprintf(out, " Saved Float Register Mask 0x%08x\n", procedure_descriptor.saved_float_register_mask); + fprintf(out, " Saved Float Register Offset %d\n", procedure_descriptor.saved_float_register_offset); + fprintf(out, " Frame Size %d\n", procedure_descriptor.frame_size); + fprintf(out, " Frame Pointer Register %hd\n", procedure_descriptor.frame_pointer_register); + fprintf(out, " Return PC Register %hd\n", procedure_descriptor.return_pc_register); + fprintf(out, " Line Number Low %d\n", procedure_descriptor.line_number_low); + fprintf(out, " Line Number High %d\n", procedure_descriptor.line_number_high); + fprintf(out, " Line Number Offset %d\n", procedure_descriptor.line_number_offset); +} + +static Result get_corruption_fixing_fudge_offset(s32 section_offset, const SymbolicHeader& hdrr) +{ + // GCC will always put the first part of the symbol table right after the + // header, so if the header says it's somewhere else we know the section has + // probably been moved without updating its contents. + s32 right_after_header = INT32_MAX; + if(hdrr.line_numbers_offset > 0) right_after_header = std::min(hdrr.line_numbers_offset, right_after_header); + if(hdrr.dense_numbers_offset > 0) right_after_header = std::min(hdrr.dense_numbers_offset, right_after_header); + if(hdrr.procedure_descriptors_offset > 0) right_after_header = std::min(hdrr.procedure_descriptors_offset, right_after_header); + if(hdrr.local_symbols_offset > 0) right_after_header = std::min(hdrr.local_symbols_offset, right_after_header); + if(hdrr.optimization_symbols_offset > 0) right_after_header = std::min(hdrr.optimization_symbols_offset, right_after_header); + if(hdrr.auxiliary_symbols_offset > 0) right_after_header = std::min(hdrr.auxiliary_symbols_offset, right_after_header); + if(hdrr.local_strings_offset > 0) right_after_header = std::min(hdrr.local_strings_offset, right_after_header); + if(hdrr.external_strings_offset > 0) right_after_header = std::min(hdrr.external_strings_offset, right_after_header); + if(hdrr.file_descriptors_offset > 0) right_after_header = std::min(hdrr.file_descriptors_offset, right_after_header); + if(hdrr.relative_file_descriptors_offset > 0) right_after_header = std::min(hdrr.relative_file_descriptors_offset, right_after_header); + if(hdrr.external_symbols_offset > 0) right_after_header = std::min(hdrr.external_symbols_offset, right_after_header); + + CCC_CHECK(right_after_header >= 0 && right_after_header < INT32_MAX, "Invalid symbolic header."); + + // Figure out how much we need to adjust all the file offsets by. + s32 fudge_offset = section_offset - (right_after_header - sizeof(SymbolicHeader)); + if(fudge_offset != 0) { + CCC_WARN("The .mdebug section was moved without updating its contents. Adjusting file offsets by %d bytes.", fudge_offset); + } + + return fudge_offset; +} + +static Result get_symbol(const SymbolHeader& header, std::span elf, s32 strings_offset) +{ + Symbol symbol; + + const char* string = get_string(elf, strings_offset + header.iss); + CCC_CHECK(string, "Symbol has invalid string."); + symbol.string = string; + + symbol.value = header.value; + symbol.symbol_type = (SymbolType) header.st; + symbol.symbol_class = (SymbolClass) header.sc; + symbol.index = header.index; + + if(symbol.is_stabs()) { + CCC_CHECK(stabs_code_to_string(symbol.code()) != nullptr, "Bad stabs symbol code '%x'.", symbol.code()); + } + + return symbol; +} + +const char* symbol_type(SymbolType type) +{ + switch(type) { + case SymbolType::NIL: return "NIL"; + case SymbolType::GLOBAL: return "GLOBAL"; + case SymbolType::STATIC: return "STATIC"; + case SymbolType::PARAM: return "PARAM"; + case SymbolType::LOCAL: return "LOCAL"; + case SymbolType::LABEL: return "LABEL"; + case SymbolType::PROC: return "PROC"; + case SymbolType::BLOCK: return "BLOCK"; + case SymbolType::END: return "END"; + case SymbolType::MEMBER: return "MEMBER"; + case SymbolType::TYPEDEF: return "TYPEDEF"; + case SymbolType::FILE_SYMBOL: return "FILE"; + case SymbolType::STATICPROC: return "STATICPROC"; + case SymbolType::CONSTANT: return "CONSTANT"; + } + return nullptr; +} + +const char* symbol_class(SymbolClass symbol_class) +{ + switch(symbol_class) { + case SymbolClass::NIL: return "NIL"; + case SymbolClass::TEXT: return "TEXT"; + case SymbolClass::DATA: return "DATA"; + case SymbolClass::BSS: return "BSS"; + case SymbolClass::REGISTER: return "REGISTER"; + case SymbolClass::ABS: return "ABS"; + case SymbolClass::UNDEFINED: return "UNDEFINED"; + case SymbolClass::LOCAL: return "LOCAL"; + case SymbolClass::BITS: return "BITS"; + case SymbolClass::DBX: return "DBX"; + case SymbolClass::REG_IMAGE: return "REG_IMAGE"; + case SymbolClass::INFO: return "INFO"; + case SymbolClass::USER_STRUCT: return "USER_STRUCT"; + case SymbolClass::SDATA: return "SDATA"; + case SymbolClass::SBSS: return "SBSS"; + case SymbolClass::RDATA: return "RDATA"; + case SymbolClass::VAR: return "VAR"; + case SymbolClass::COMMON: return "COMMON"; + case SymbolClass::SCOMMON: return "SCOMMON"; + case SymbolClass::VAR_REGISTER: return "VAR_REGISTER"; + case SymbolClass::VARIANT: return "VARIANT"; + case SymbolClass::SUNDEFINED: return "SUNDEFINED"; + case SymbolClass::INIT: return "INIT"; + case SymbolClass::BASED_VAR: return "BASED_VAR"; + case SymbolClass::XDATA: return "XDATA"; + case SymbolClass::PDATA: return "PDATA"; + case SymbolClass::FINI: return "FINI"; + case SymbolClass::NONGP: return "NONGP"; + } + return nullptr; +} + +const char* stabs_code_to_string(StabsCode code) +{ + switch(code) { + case STAB: return "STAB"; + case N_GSYM: return "GSYM"; + case N_FNAME: return "FNAME"; + case N_FUN: return "FUN"; + case N_STSYM: return "STSYM"; + case N_LCSYM: return "LCSYM"; + case N_MAIN: return "MAIN"; + case N_PC: return "PC"; + case N_NSYMS: return "NSYMS"; + case N_NOMAP: return "NOMAP"; + case N_OBJ: return "OBJ"; + case N_OPT: return "OPT"; + case N_RSYM: return "RSYM"; + case N_M2C: return "M2C"; + case N_SLINE: return "SLINE"; + case N_DSLINE: return "DSLINE"; + case N_BSLINE: return "BSLINE"; + case N_EFD: return "EFD"; + case N_EHDECL: return "EHDECL"; + case N_CATCH: return "CATCH"; + case N_SSYM: return "SSYM"; + case N_SO: return "SO"; + case N_LSYM: return "LSYM"; + case N_BINCL: return "BINCL"; + case N_SOL: return "SOL"; + case N_PSYM: return "PSYM"; + case N_EINCL: return "EINCL"; + case N_ENTRY: return "ENTRY"; + case N_LBRAC: return "LBRAC"; + case N_EXCL: return "EXCL"; + case N_SCOPE: return "SCOPE"; + case N_RBRAC: return "RBRAC"; + case N_BCOMM: return "BCOMM"; + case N_ECOMM: return "ECOMM"; + case N_ECOML: return "ECOML"; + case N_NBTEXT: return "NBTEXT"; + case N_NBDATA: return "NBDATA"; + case N_NBBSS: return "NBBSS"; + case N_NBSTS: return "NBSTS"; + case N_NBLCS: return "NBLCS"; + case N_LENG: return "LENG"; + } + return nullptr; +} + +} diff --git a/3rdparty/ccc/src/ccc/mdebug_section.h b/3rdparty/ccc/src/ccc/mdebug_section.h new file mode 100644 index 0000000000..df15d3e5d0 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_section.h @@ -0,0 +1,176 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "util.h" + +namespace ccc::mdebug { + +struct SymbolicHeader; + +enum class SymbolType : u32 { + NIL = 0, + GLOBAL = 1, + STATIC = 2, + PARAM = 3, + LOCAL = 4, + LABEL = 5, + PROC = 6, + BLOCK = 7, + END = 8, + MEMBER = 9, + TYPEDEF = 10, + FILE_SYMBOL = 11, + STATICPROC = 14, + CONSTANT = 15 +}; + +enum class SymbolClass : u32 { + NIL = 0, + TEXT = 1, + DATA = 2, + BSS = 3, + REGISTER = 4, + ABS = 5, + UNDEFINED = 6, + LOCAL = 7, + BITS = 8, + DBX = 9, + REG_IMAGE = 10, + INFO = 11, + USER_STRUCT = 12, + SDATA = 13, + SBSS = 14, + RDATA = 15, + VAR = 16, + COMMON = 17, + SCOMMON = 18, + VAR_REGISTER = 19, + VARIANT = 20, + SUNDEFINED = 21, + INIT = 22, + BASED_VAR = 23, + XDATA = 24, + PDATA = 25, + FINI = 26, + NONGP = 27 +}; + +// See stab.def from gcc for documentation on what all these are. +enum StabsCode { + STAB = 0x00, + N_GSYM = 0x20, + N_FNAME = 0x22, + N_FUN = 0x24, + N_STSYM = 0x26, + N_LCSYM = 0x28, + N_MAIN = 0x2a, + N_PC = 0x30, + N_NSYMS = 0x32, + N_NOMAP = 0x34, + N_OBJ = 0x38, + N_OPT = 0x3c, + N_RSYM = 0x40, + N_M2C = 0x42, + N_SLINE = 0x44, + N_DSLINE = 0x46, + N_BSLINE = 0x48, + N_EFD = 0x4a, + N_EHDECL = 0x50, + N_CATCH = 0x54, + N_SSYM = 0x60, + N_SO = 0x64, + N_LSYM = 0x80, + N_BINCL = 0x82, + N_SOL = 0x84, + N_PSYM = 0xa0, + N_EINCL = 0xa2, + N_ENTRY = 0xa4, + N_LBRAC = 0xc0, + N_EXCL = 0xc2, + N_SCOPE = 0xc4, + N_RBRAC = 0xe0, + N_BCOMM = 0xe2, + N_ECOMM = 0xe4, + N_ECOML = 0xe8, + N_NBTEXT = 0xf0, + N_NBDATA = 0xf2, + N_NBBSS = 0xf4, + N_NBSTS = 0xf6, + N_NBLCS = 0xf8, + N_LENG = 0xfe +}; + +CCC_PACKED_STRUCT(ProcedureDescriptor, + /* 0x00 */ u32 address; + /* 0x04 */ u32 symbol_index; + /* 0x08 */ s32 line_number_entry_index; + /* 0x0c */ s32 saved_register_mask; + /* 0x10 */ s32 saved_register_offset; + /* 0x14 */ s32 optimization_entry_index; + /* 0x18 */ s32 saved_float_register_mask; + /* 0x1c */ s32 saved_float_register_offset; + /* 0x20 */ s32 frame_size; + /* 0x24 */ s16 frame_pointer_register; + /* 0x26 */ s16 return_pc_register; + /* 0x28 */ s32 line_number_low; + /* 0x2c */ s32 line_number_high; + /* 0x30 */ u32 line_number_offset; +) +static_assert(sizeof(ProcedureDescriptor) == 0x34); + +struct Symbol { + u32 value; + SymbolType symbol_type; + SymbolClass symbol_class; + u32 index; + const char* string; + const ProcedureDescriptor* procedure_descriptor = nullptr; + + bool is_stabs() const { + return (index & 0xfff00) == 0x8f300; + } + + StabsCode code() const { + return (StabsCode) (index - 0x8f300); + } +}; + +struct File { + std::vector symbols; + u32 address = 0; + std::string working_dir; // The working directory of gcc. + std::string command_line_path; // The source file path passed on the command line to gcc. + std::string full_path; // The full combined path. +}; + +class SymbolTableReader { +public: + Result init(std::span elf, s32 section_offset); + + s32 file_count() const; + Result parse_file(s32 index) const; + Result> parse_external_symbols() const; + + void print_header(FILE* out) const; + Result print_symbols(FILE* out, bool print_locals, bool print_procedure_descriptors, bool print_externals) const; + +protected: + bool m_ready = false; + + std::span m_elf; + s32 m_section_offset; + + // If the .mdebug section was moved without updating its contents all the + // absolute file offsets stored within will be incorrect by a fixed amount. + s32 m_fudge_offset; + + const SymbolicHeader* m_hdrr; +}; + +const char* symbol_type(SymbolType type); +const char* symbol_class(SymbolClass symbol_class); +const char* stabs_code_to_string(StabsCode code); + +} diff --git a/3rdparty/ccc/src/ccc/mdebug_symbols.cpp b/3rdparty/ccc/src/ccc/mdebug_symbols.cpp new file mode 100644 index 0000000000..eacd9bf806 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_symbols.cpp @@ -0,0 +1,220 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "mdebug_symbols.h" + +#include "importer_flags.h" + +namespace ccc::mdebug { + +static void mark_duplicate_symbols(std::vector& symbols); + +Result> parse_symbols(const std::vector& input, u32& importer_flags) +{ + std::vector output; + std::string prefix; + for(const mdebug::Symbol& symbol : input) { + if(symbol.is_stabs()) { + switch(symbol.code()) { + case mdebug::N_GSYM: // Global variable + case mdebug::N_FUN: // Function + case mdebug::N_STSYM: // Data section static global variable + case mdebug::N_LCSYM: // BSS section static global variable + case mdebug::N_RSYM: // Register variable + case mdebug::N_LSYM: // Automatic variable or type definition + case mdebug::N_PSYM: { // Parameter variable + // Some STABS symbols are split between multiple strings. + if(symbol.string[0] != '\0') { + if(symbol.string[strlen(symbol.string) - 1] == '\\') { + prefix += std::string(symbol.string, symbol.string + strlen(symbol.string) - 1); + } else { + std::string merged_string; + const char* string; + if(!prefix.empty()) { + merged_string = prefix + symbol.string; + string = merged_string.c_str(); + prefix.clear(); + } else { + string = symbol.string; + } + + const char* input = string; + Result parse_result = parse_stabs_symbol(input); + if(parse_result.success()) { + if(*input != '\0') { + if(importer_flags & STRICT_PARSING) { + return CCC_FAILURE("Unknown data '%s' at the end of the '%s' stab.", input, parse_result->name.c_str()); + } else { + CCC_WARN("Unknown data '%s' at the end of the '%s' stab.", input, parse_result->name.c_str()); + } + } + + ParsedSymbol& parsed = output.emplace_back(); + parsed.type = ParsedSymbolType::NAME_COLON_TYPE; + parsed.raw = &symbol; + parsed.name_colon_type = std::move(*parse_result); + } else if(parse_result.error().message == STAB_TRUNCATED_ERROR_MESSAGE) { + // Symbol truncated due to a GCC bug. Report a + // warning and try to tolerate further faults + // caused as a result of this. + CCC_WARN("%s Symbol string: %s", STAB_TRUNCATED_ERROR_MESSAGE, string); + importer_flags &= ~STRICT_PARSING; + } else { + return CCC_FAILURE("%s Symbol string: %s", + parse_result.error().message.c_str(), string); + } + } + } else { + CCC_CHECK(prefix.empty(), "Invalid STABS continuation."); + if(symbol.code() == mdebug::N_FUN) { + ParsedSymbol& func_end = output.emplace_back(); + func_end.type = ParsedSymbolType::FUNCTION_END; + func_end.raw = &symbol; + } + } + break; + } + case mdebug::N_SOL: { // Sub-source file + ParsedSymbol& sub = output.emplace_back(); + sub.type = ParsedSymbolType::SUB_SOURCE_FILE; + sub.raw = &symbol; + break; + } + case mdebug::N_LBRAC: { // Begin block + ParsedSymbol& begin_block = output.emplace_back(); + begin_block.type = ParsedSymbolType::LBRAC; + begin_block.raw = &symbol; + break; + } + case mdebug::N_RBRAC: { // End block + ParsedSymbol& end_block = output.emplace_back(); + end_block.type = ParsedSymbolType::RBRAC; + end_block.raw = &symbol; + break; + } + case mdebug::N_SO: { // Source filename + ParsedSymbol& so_symbol = output.emplace_back(); + so_symbol.type = ParsedSymbolType::SOURCE_FILE; + so_symbol.raw = &symbol; + break; + } + case mdebug::STAB: + case mdebug::N_OPT: + case mdebug::N_BINCL: + case mdebug::N_EINCL: { + break; + } + case mdebug::N_FNAME: + case mdebug::N_MAIN: + case mdebug::N_PC: + case mdebug::N_NSYMS: + case mdebug::N_NOMAP: + case mdebug::N_OBJ: + case mdebug::N_M2C: + case mdebug::N_SLINE: + case mdebug::N_DSLINE: + case mdebug::N_BSLINE: + case mdebug::N_EFD: + case mdebug::N_EHDECL: + case mdebug::N_CATCH: + case mdebug::N_SSYM: + case mdebug::N_ENTRY: + case mdebug::N_EXCL: + case mdebug::N_SCOPE: + case mdebug::N_BCOMM: + case mdebug::N_ECOMM: + case mdebug::N_ECOML: + case mdebug::N_NBTEXT: + case mdebug::N_NBDATA: + case mdebug::N_NBBSS: + case mdebug::N_NBSTS: + case mdebug::N_NBLCS: + case mdebug::N_LENG: { + CCC_WARN("Unhandled N_%s symbol: %s", mdebug::stabs_code_to_string(symbol.code()), symbol.string); + break; + } + } + } else { + ParsedSymbol& non_stabs_symbol = output.emplace_back(); + non_stabs_symbol.type = ParsedSymbolType::NON_STABS; + non_stabs_symbol.raw = &symbol; + } + } + + mark_duplicate_symbols(output); + + return output; +} + +static void mark_duplicate_symbols(std::vector& symbols) +{ + std::map stabs_type_number_to_symbol; + for(size_t i = 0; i < symbols.size(); i++) { + ParsedSymbol& symbol = symbols[i]; + if(symbol.type == ParsedSymbolType::NAME_COLON_TYPE) { + StabsType& type = *symbol.name_colon_type.type; + if(type.type_number.valid() && type.descriptor.has_value()) { + stabs_type_number_to_symbol.emplace(type.type_number, i); + } + } + } + + for(ParsedSymbol& symbol : symbols) { + symbol.is_typedef = + symbol.type == ParsedSymbolType::NAME_COLON_TYPE && + symbol.name_colon_type.descriptor == StabsSymbolDescriptor::TYPE_NAME && + symbol.name_colon_type.type->descriptor != StabsTypeDescriptor::ENUM; + } + + for(size_t i = 0; i < symbols.size(); i++) { + ParsedSymbol& symbol = symbols[i]; + if(symbol.type != ParsedSymbolType::NAME_COLON_TYPE) { + continue; + } + + bool is_type = + symbol.name_colon_type.descriptor == StabsSymbolDescriptor::TYPE_NAME || + symbol.name_colon_type.descriptor == StabsSymbolDescriptor::ENUM_STRUCT_OR_TYPE_TAG; + if(!is_type) { + continue; + } + + StabsType& type = *symbol.name_colon_type.type; + + if(!type.descriptor.has_value()) { + auto referenced_index = stabs_type_number_to_symbol.find(type.type_number); + if(referenced_index != stabs_type_number_to_symbol.end()) { + ParsedSymbol& referenced = symbols[referenced_index->second]; + if(referenced.name_colon_type.name == symbol.name_colon_type.name) { + // symbol: "Struct:T(1,1)=s1;" + // referenced: "Struct:t(1,1)" + symbol.duplicate = true; + } + } + } + + if(type.descriptor.has_value() && type.descriptor == StabsTypeDescriptor::TYPE_REFERENCE) { + auto referenced_index = stabs_type_number_to_symbol.find(type.as().type->type_number); + if(referenced_index != stabs_type_number_to_symbol.end() && referenced_index->second != i) { + ParsedSymbol& referenced = symbols[referenced_index->second]; + + if(referenced.name_colon_type.name == " ") { + // referenced: " :T(1,1)=e;" + // symbol: "ErraticEnum:t(1,2)=(1,1)" + referenced.name_colon_type.name = symbol.name_colon_type.name; + referenced.is_typedef = true; + symbol.duplicate = true; + } + + if(referenced.name_colon_type.name == symbol.name_colon_type.name) { + // referenced: "NamedTypedefedStruct:T(1,1)=s1;" + // symbol: "NamedTypedefedStruct:t(1,2)=(1,1)" + referenced.is_typedef = true; + symbol.duplicate = true; + } + } + } + } +} + +} diff --git a/3rdparty/ccc/src/ccc/mdebug_symbols.h b/3rdparty/ccc/src/ccc/mdebug_symbols.h new file mode 100644 index 0000000000..1341dcbb20 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_symbols.h @@ -0,0 +1,32 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "util.h" +#include "stabs.h" +#include "mdebug_section.h" + +namespace ccc::mdebug { + +enum class ParsedSymbolType { + NAME_COLON_TYPE, + SOURCE_FILE, + SUB_SOURCE_FILE, + LBRAC, + RBRAC, + FUNCTION_END, + NON_STABS +}; + +struct ParsedSymbol { + ParsedSymbolType type; + const mdebug::Symbol* raw; + StabsSymbol name_colon_type; + bool duplicate = false; + bool is_typedef = false; +}; + +Result> parse_symbols(const std::vector& input, u32& importer_flags); + +} diff --git a/3rdparty/ccc/src/ccc/sndll.cpp b/3rdparty/ccc/src/ccc/sndll.cpp new file mode 100644 index 0000000000..894deba69e --- /dev/null +++ b/3rdparty/ccc/src/ccc/sndll.cpp @@ -0,0 +1,191 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "sndll.h" + +#include "importer_flags.h" + +namespace ccc { + +CCC_PACKED_STRUCT(SNDLLHeaderCommon, + /* 0x00 */ u32 magic; + /* 0x04 */ u32 relocations; + /* 0x08 */ u32 relocation_count; + /* 0x0c */ u32 symbols; + /* 0x10 */ u32 symbol_count; + /* 0x14 */ u32 elf_path; + /* 0x18 */ u32 load_func; + /* 0x1c */ u32 unload_func; + /* 0x20 */ u32 unknown_20; + /* 0x24 */ u32 unknown_24; + /* 0x28 */ u32 unknown_28; + /* 0x2c */ u32 file_size; + /* 0x30 */ u32 unknown_30; +) + +CCC_PACKED_STRUCT(SNDLLHeaderV1, + /* 0x00 */ SNDLLHeaderCommon common; +) + +CCC_PACKED_STRUCT(SNDLLHeaderV2, + /* 0x00 */ SNDLLHeaderCommon common; + /* 0x34 */ u32 unknown_34; + /* 0x38 */ u32 unknown_38; +) + +CCC_PACKED_STRUCT(SNDLLRelocation, + /* 0x0 */ u32 unknown_0; + /* 0x4 */ u32 unknown_4; + /* 0x8 */ u32 unknown_8; +) + +CCC_PACKED_STRUCT(SNDLLSymbolHeader, + /* 0x0 */ u32 string; + /* 0x4 */ u32 value; + /* 0x8 */ u8 unknown_8; + /* 0x9 */ u8 unknown_9; + /* 0xa */ SNDLLSymbolType type; + /* 0xb */ u8 processed; +) + +static Result parse_sndll_common( + std::span image, Address address, SNDLLType type, const SNDLLHeaderCommon& common, SNDLLVersion version); +static const char* sndll_symbol_type_to_string(SNDLLSymbolType type); + +Result parse_sndll_file(std::span image, Address address, SNDLLType type) +{ + const u32* magic = get_packed(image, 0); + CCC_CHECK((*magic & 0xffffff) == CCC_FOURCC("SNR\00"), "Not a SNDLL %s.", address.valid() ? "section" : "file"); + + char version = *magic >> 24; + switch(version) { + case '1': { + const SNDLLHeaderV1* header = get_packed(image, 0); + CCC_CHECK(header, "File too small to contain SNDLL V1 header."); + return parse_sndll_common(image, address, type, header->common, SNDLL_V1); + } + case '2': { + const SNDLLHeaderV2* header = get_packed(image, 0); + CCC_CHECK(header, "File too small to contain SNDLL V2 header."); + return parse_sndll_common(image, address, type, header->common, SNDLL_V2); + } + } + + return CCC_FAILURE("Unknown SNDLL version '%c'.", version); +} + +static Result parse_sndll_common( + std::span image, Address address, SNDLLType type, const SNDLLHeaderCommon& common, SNDLLVersion version) +{ + SNDLLFile sndll; + + sndll.address = address; + sndll.type = type; + sndll.version = version; + + if(common.elf_path) { + const char* elf_path = get_string(image, common.elf_path); + if(elf_path) { + sndll.elf_path = elf_path; + } + } + + CCC_CHECK(common.symbol_count < (32 * 1024 * 1024) / sizeof(SNDLLSymbol), "SNDLL symbol count is too high."); + sndll.symbols.reserve(common.symbol_count); + + for(u32 i = 0; i < common.symbol_count; i++) { + u32 symbol_offset = common.symbols - address.get_or_zero() + i * sizeof(SNDLLSymbolHeader); + const SNDLLSymbolHeader* symbol_header = get_packed(image, symbol_offset); + CCC_CHECK(symbol_header, "SNDLL symbol out of range."); + + const char* string = nullptr; + if(symbol_header->string) { + string = get_string(image, symbol_header->string - address.get_or_zero()); + } + + SNDLLSymbol& symbol = sndll.symbols.emplace_back(); + symbol.type = symbol_header->type; + symbol.value = symbol_header->value; + symbol.string = string; + } + + return sndll; +} + +Result import_sndll_symbols( + SymbolDatabase& database, + const SNDLLFile& sndll, + const SymbolGroup& group, + u32 importer_flags, + DemanglerFunctions demangler) +{ + for(const SNDLLSymbol& symbol : sndll.symbols) { + if(symbol.value == 0 || symbol.string.empty()) { + continue; + } + + u32 address = symbol.value; + if(symbol.type != SNDLL_ABSOLUTE && sndll.type == SNDLLType::DYNAMIC_LIBRARY) { + address += sndll.address.get_or_zero(); + } + + if(!(importer_flags & DONT_DEDUPLICATE_SYMBOLS)) { + if(database.functions.first_handle_from_starting_address(address).valid()) { + continue; + } + + if(database.global_variables.first_handle_from_starting_address(address).valid()) { + continue; + } + + if(database.local_variables.first_handle_from_starting_address(address).valid()) { + continue; + } + } + + const Section* section = database.sections.symbol_overlapping_address(address); + if(section) { + if(section->contains_code()) { + Result function = database.functions.create_symbol( + symbol.string, group.source, group.module_symbol, address, importer_flags, demangler); + CCC_RETURN_IF_ERROR(function); + continue; + } else if(section->contains_data()) { + Result global_variable = database.global_variables.create_symbol( + symbol.string, group.source, group.module_symbol, address, importer_flags, demangler); + CCC_RETURN_IF_ERROR(global_variable); + continue; + } + } + + Result label = database.labels.create_symbol( + symbol.string, group.source, group.module_symbol, address, importer_flags, demangler); + CCC_RETURN_IF_ERROR(label); + } + + return Result(); +} + +void print_sndll_symbols(FILE* out, const SNDLLFile& sndll) +{ + fprintf(out, "SNDLL SYMBOLS:\n"); + for(const SNDLLSymbol& symbol : sndll.symbols) { + const char* type = sndll_symbol_type_to_string(symbol.type); + const char* string = !symbol.string.empty() ? symbol.string.c_str() : "(no string)"; + fprintf(out, "%8s %08x %s\n", type, symbol.value, string); + } +} + +static const char* sndll_symbol_type_to_string(SNDLLSymbolType type) +{ + switch(type) { + case SNDLL_NIL: return "NIL"; + case SNDLL_EXTERNAL: return "EXTERNAL"; + case SNDLL_RELATIVE: return "RELATIVE"; + case SNDLL_WEAK: return "WEAK"; + case SNDLL_ABSOLUTE: return "ABSOLUTE"; + } + return "invalid"; +} + +} diff --git a/3rdparty/ccc/src/ccc/sndll.h b/3rdparty/ccc/src/ccc/sndll.h new file mode 100644 index 0000000000..50e284bd61 --- /dev/null +++ b/3rdparty/ccc/src/ccc/sndll.h @@ -0,0 +1,55 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "symbol_database.h" + +namespace ccc { + +enum class SNDLLType { + SNDATA_SECTION, + DYNAMIC_LIBRARY +}; + +enum SNDLLVersion { + SNDLL_V1, + SNDLL_V2 +}; + +enum SNDLLSymbolType : u8 { + SNDLL_NIL = 0, // I think this is just so that the first real symbol has an index of 1. + SNDLL_EXTERNAL = 1, // Symbol with an empty value, to be filled in from another module. + SNDLL_RELATIVE = 2, // Global symbol, value is relative to the start of the SNDLL file. + SNDLL_WEAK = 3, // Weak symbol, value is relative to the start of the SNDLL file. + SNDLL_ABSOLUTE = 4 // Global symbol, value is an absolute address. +}; + +struct SNDLLSymbol { + SNDLLSymbolType type = SNDLL_NIL; + u32 value = 0; + std::string string; +}; + +struct SNDLLFile { + Address address; + SNDLLType type; + SNDLLVersion version; + std::string elf_path; + std::vector symbols; +}; + +// If a valid address is passed, the pointers in the header will be treated as +// addresses, otherwise they will be treated as file offsets. +Result parse_sndll_file(std::span image, Address address, SNDLLType type); + +Result import_sndll_symbols( + SymbolDatabase& database, + const SNDLLFile& sndll, + const SymbolGroup& group, + u32 importer_flags, + DemanglerFunctions demangler); + +void print_sndll_symbols(FILE* out, const SNDLLFile& sndll); + +} diff --git a/3rdparty/ccc/src/ccc/stabs.cpp b/3rdparty/ccc/src/ccc/stabs.cpp new file mode 100644 index 0000000000..0fe10dec7f --- /dev/null +++ b/3rdparty/ccc/src/ccc/stabs.cpp @@ -0,0 +1,835 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "stabs.h" + +namespace ccc { + +#define STABS_DEBUG(...) //__VA_ARGS__ +#define STABS_DEBUG_PRINTF(...) STABS_DEBUG(printf(__VA_ARGS__);) + +static bool validate_symbol_descriptor(StabsSymbolDescriptor descriptor); +static Result> parse_stabs_type(const char*& input); +static Result> parse_field_list(const char*& input); +static Result> parse_member_functions(const char*& input); +static Result parse_visibility_character(const char*& input); +STABS_DEBUG(static void print_field(const StabsStructOrUnionType::Field& field);) + +const char* STAB_TRUNCATED_ERROR_MESSAGE = + "STABS symbol truncated. This was probably caused by a GCC bug. " + "Other symbols from the same translation unit may also be invalid."; + +Result parse_stabs_symbol(const char*& input) +{ + STABS_DEBUG_PRINTF("PARSING %s\n", input); + + StabsSymbol symbol; + + Result name = parse_dodgy_stabs_identifier(input, ':'); + CCC_RETURN_IF_ERROR(name); + + symbol.name = *name; + + CCC_EXPECT_CHAR(input, ':', "identifier"); + CCC_CHECK(*input != '\0', "Unexpected end of input."); + if((*input >= '0' && *input <= '9') || *input == '(') { + symbol.descriptor = StabsSymbolDescriptor::LOCAL_VARIABLE; + } else { + char symbol_descriptor = *(input++); + CCC_CHECK(symbol_descriptor != '\0', "Failed to parse symbol descriptor."); + symbol.descriptor = (StabsSymbolDescriptor) symbol_descriptor; + } + CCC_CHECK(validate_symbol_descriptor(symbol.descriptor), + "Invalid symbol descriptor '%c'.", + (char) symbol.descriptor); + CCC_CHECK(*input != '\0', "Unexpected end of input."); + if(*input == 't') { + input++; + } + + auto type = parse_top_level_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + + // Handle nested functions. + bool is_function = + symbol.descriptor == StabsSymbolDescriptor::LOCAL_FUNCTION || + symbol.descriptor == StabsSymbolDescriptor::GLOBAL_FUNCTION; + if(is_function && input[0] == ',') { + input++; + while(*input != ',' && *input != '\0') input++; // enclosing function + CCC_EXPECT_CHAR(input, ',', "nested function suffix"); + while(*input != ',' && *input != '\0') input++; // function + } + + symbol.type = std::move(*type); + + // Make sure that variable names aren't used as type names e.g. the STABS + // symbol "somevar:P123=*456" may be referenced by the type number 123, but + // the type name is not "somevar". + bool is_type = symbol.descriptor == StabsSymbolDescriptor::TYPE_NAME + || symbol.descriptor == StabsSymbolDescriptor::ENUM_STRUCT_OR_TYPE_TAG; + if(is_type) { + symbol.type->name = symbol.name; + } + + symbol.type->is_typedef = symbol.descriptor == StabsSymbolDescriptor::TYPE_NAME; + symbol.type->is_root = true; + + return symbol; +} + +static bool validate_symbol_descriptor(StabsSymbolDescriptor descriptor) +{ + bool valid; + switch(descriptor) { + case StabsSymbolDescriptor::LOCAL_VARIABLE: + case StabsSymbolDescriptor::REFERENCE_PARAMETER_A: + case StabsSymbolDescriptor::LOCAL_FUNCTION: + case StabsSymbolDescriptor::GLOBAL_FUNCTION: + case StabsSymbolDescriptor::GLOBAL_VARIABLE: + case StabsSymbolDescriptor::REGISTER_PARAMETER: + case StabsSymbolDescriptor::VALUE_PARAMETER: + case StabsSymbolDescriptor::REGISTER_VARIABLE: + case StabsSymbolDescriptor::STATIC_GLOBAL_VARIABLE: + case StabsSymbolDescriptor::TYPE_NAME: + case StabsSymbolDescriptor::ENUM_STRUCT_OR_TYPE_TAG: + case StabsSymbolDescriptor::STATIC_LOCAL_VARIABLE: + case StabsSymbolDescriptor::REFERENCE_PARAMETER_V: + valid = true; + break; + default: + valid = false; + break; + } + return valid; +} + +Result> parse_top_level_stabs_type(const char*& input) +{ + Result> type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + + // Handle first base class suffixes. + if((*type)->descriptor == StabsTypeDescriptor::STRUCT && input[0] == '~' && input[1] == '%') { + input += 2; + + Result> first_base_class = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(first_base_class); + (*type)->as().first_base_class = std::move(*first_base_class); + + CCC_EXPECT_CHAR(input, ';', "first base class suffix"); + } + + // Handle extra live range information. + if(input[0] == ';' && input[1] == 'l') { + input += 2; + CCC_EXPECT_CHAR(input, '(', "live range suffix"); + CCC_EXPECT_CHAR(input, '#', "live range suffix"); + std::optional start = parse_number_s32(input); + CCC_CHECK(start.has_value(), "Failed to parse live range suffix."); + CCC_EXPECT_CHAR(input, ',', "live range suffix"); + CCC_EXPECT_CHAR(input, '#', "live range suffix"); + std::optional end = parse_number_s32(input); + CCC_CHECK(end.has_value(), "Failed to parse live range suffix."); + CCC_EXPECT_CHAR(input, ')', "live range suffix"); + } + + return type; +} + +static Result> parse_stabs_type(const char*& input) +{ + StabsTypeNumber type_number; + + CCC_CHECK(*input != '\0', "Unexpected end of input."); + + if(*input == '(') { + // This file has type numbers made up of two pieces: an include file + // index and a type number. + + input++; + + std::optional file_index = parse_number_s32(input); + CCC_CHECK(file_index.has_value(), "Failed to parse type number (file index)."); + + CCC_EXPECT_CHAR(input, ',', "type number"); + + std::optional type_index = parse_number_s32(input); + CCC_CHECK(type_index.has_value(), "Failed to parse type number (type index)."); + + CCC_EXPECT_CHAR(input, ')', "type number"); + + type_number.file = *file_index; + type_number.type = *type_index; + + if(*input != '=') { + return std::make_unique(type_number); + } + input++; + } else if(*input >= '0' && *input <= '9') { + // This file has type numbers which are just a single number. This is + // the more common case for games. + + std::optional type_index = parse_number_s32(input); + CCC_CHECK(type_index.has_value(), "Failed to parse type number."); + type_number.type = *type_index; + + if(*input != '=') { + return std::make_unique(type_number); + } + input++; + } + + CCC_CHECK(*input != '\0', "Unexpected end of input."); + + StabsTypeDescriptor descriptor; + if((*input >= '0' && *input <= '9') || *input == '(') { + descriptor = StabsTypeDescriptor::TYPE_REFERENCE; + } else { + char descriptor_char = *(input++); + CCC_CHECK(descriptor_char != '\0', "Failed to parse type descriptor."); + descriptor = (StabsTypeDescriptor) descriptor_char; + } + + std::unique_ptr out_type; + + switch(descriptor) { + case StabsTypeDescriptor::TYPE_REFERENCE: { // 0..9 + auto type_reference = std::make_unique(type_number); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + type_reference->type = std::move(*type); + + out_type = std::move(type_reference); + break; + } + case StabsTypeDescriptor::ARRAY: { // a + auto array = std::make_unique(type_number); + + auto index_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(index_type); + array->index_type = std::move(*index_type); + + auto element_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(element_type); + array->element_type = std::move(*element_type); + + out_type = std::move(array); + break; + } + case StabsTypeDescriptor::ENUM: { // e + auto enum_type = std::make_unique(type_number); + STABS_DEBUG_PRINTF("enum {\n"); + while(*input != ';') { + std::optional name = parse_stabs_identifier(input, ':'); + CCC_CHECK(name.has_value(), "Failed to parse enum field name."); + + CCC_EXPECT_CHAR(input, ':', "enum"); + + std::optional value = parse_number_s32(input); + CCC_CHECK(value.has_value(), "Failed to parse enum value."); + + enum_type->fields.emplace_back(*value, std::move(*name)); + + CCC_EXPECT_CHAR(input, ',', "enum"); + } + input++; + STABS_DEBUG_PRINTF("}\n"); + + out_type = std::move(enum_type); + break; + } + case StabsTypeDescriptor::FUNCTION: { // f + auto function = std::make_unique(type_number); + + auto return_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(return_type); + function->return_type = std::move(*return_type); + + out_type = std::move(function); + break; + } + case StabsTypeDescriptor::VOLATILE_QUALIFIER: { // B + auto volatile_qualifier = std::make_unique(type_number); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + volatile_qualifier->type = std::move(*type); + + out_type = std::move(volatile_qualifier); + break; + } + case StabsTypeDescriptor::CONST_QUALIFIER: { // k + auto const_qualifier = std::make_unique(type_number); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + const_qualifier->type = std::move(*type); + + out_type = std::move(const_qualifier); + break; + } + case StabsTypeDescriptor::RANGE: { // r + auto range = std::make_unique(type_number); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + range->type = std::move(*type); + + CCC_EXPECT_CHAR(input, ';', "range type descriptor"); + + std::optional low = parse_stabs_identifier(input, ';'); + CCC_CHECK(low.has_value(), "Failed to parse low part of range."); + CCC_EXPECT_CHAR(input, ';', "low range value"); + + std::optional high = parse_stabs_identifier(input, ';'); + CCC_CHECK(high.has_value(), "Failed to parse high part of range."); + CCC_EXPECT_CHAR(input, ';', "high range value"); + + range->low = std::move(*low); + range->high = std::move(*high); + + out_type = std::move(range); + break; + } + case StabsTypeDescriptor::STRUCT: { // s + auto struct_type = std::make_unique(type_number); + STABS_DEBUG_PRINTF("struct {\n"); + + std::optional struct_size = parse_number_s64(input); + CCC_CHECK(struct_size.has_value(), "Failed to parse struct size."); + struct_type->size = *struct_size; + + if(*input == '!') { + input++; + std::optional base_class_count = parse_number_s32(input); + CCC_CHECK(base_class_count.has_value(), "Failed to parse base class count."); + + CCC_EXPECT_CHAR(input, ',', "base class section"); + + for(s64 i = 0; i < *base_class_count; i++) { + StabsStructOrUnionType::BaseClass base_class; + + char is_virtual = *(input++); + switch(is_virtual) { + case '0': base_class.is_virtual = false; break; + case '1': base_class.is_virtual = true; break; + default: return CCC_FAILURE("Failed to parse base class (virtual character)."); + } + + Result visibility = parse_visibility_character(input); + CCC_RETURN_IF_ERROR(visibility); + base_class.visibility = *visibility; + + std::optional offset = parse_number_s32(input); + CCC_CHECK(offset.has_value(), "Failed to parse base class offset."); + base_class.offset = (s32) *offset; + + CCC_EXPECT_CHAR(input, ',', "base class section"); + + auto base_class_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(base_class_type); + base_class.type = std::move(*base_class_type); + + CCC_EXPECT_CHAR(input, ';', "base class section"); + struct_type->base_classes.emplace_back(std::move(base_class)); + } + } + + auto fields = parse_field_list(input); + CCC_RETURN_IF_ERROR(fields); + struct_type->fields = std::move(*fields); + + auto member_functions = parse_member_functions(input); + CCC_RETURN_IF_ERROR(member_functions); + struct_type->member_functions = std::move(*member_functions); + + STABS_DEBUG_PRINTF("}\n"); + + out_type = std::move(struct_type); + break; + } + case StabsTypeDescriptor::UNION: { // u + auto union_type = std::make_unique(type_number); + STABS_DEBUG_PRINTF("union {\n"); + + std::optional union_size = parse_number_s64(input); + CCC_CHECK(union_size.has_value(), "Failed to parse struct size."); + union_type->size = *union_size; + + auto fields = parse_field_list(input); + CCC_RETURN_IF_ERROR(fields); + union_type->fields = std::move(*fields); + + auto member_functions = parse_member_functions(input); + CCC_RETURN_IF_ERROR(member_functions); + union_type->member_functions = std::move(*member_functions); + + STABS_DEBUG_PRINTF("}\n"); + + out_type = std::move(union_type); + break; + } + case StabsTypeDescriptor::CROSS_REFERENCE: { // x + auto cross_reference = std::make_unique(type_number); + + char cross_reference_type = *(input++); + CCC_CHECK(cross_reference_type != '\0', "Failed to parse cross reference type."); + + switch(cross_reference_type) { + case 'e': cross_reference->type = ast::ForwardDeclaredType::ENUM; break; + case 's': cross_reference->type = ast::ForwardDeclaredType::STRUCT; break; + case 'u': cross_reference->type = ast::ForwardDeclaredType::UNION; break; + default: + return CCC_FAILURE("Invalid cross reference type '%c'.", cross_reference->type); + } + + Result identifier = parse_dodgy_stabs_identifier(input, ':'); + CCC_RETURN_IF_ERROR(identifier); + cross_reference->identifier = std::move(*identifier); + + cross_reference->name = cross_reference->identifier; + CCC_EXPECT_CHAR(input, ':', "cross reference"); + + out_type = std::move(cross_reference); + break; + } + case StabsTypeDescriptor::FLOATING_POINT_BUILTIN: { // R + auto fp_builtin = std::make_unique(type_number); + + std::optional fpclass = parse_number_s32(input); + CCC_CHECK(fpclass.has_value(), "Failed to parse floating point built-in class."); + fp_builtin->fpclass = *fpclass; + + CCC_EXPECT_CHAR(input, ';', "floating point builtin"); + + std::optional bytes = parse_number_s32(input); + CCC_CHECK(bytes.has_value(), "Failed to parse floating point built-in."); + fp_builtin->bytes = *bytes; + + CCC_EXPECT_CHAR(input, ';', "floating point builtin"); + + std::optional value = parse_number_s32(input); + CCC_CHECK(value.has_value(), "Failed to parse floating point built-in."); + + CCC_EXPECT_CHAR(input, ';', "floating point builtin"); + + out_type = std::move(fp_builtin); + break; + } + case StabsTypeDescriptor::METHOD: { // # + auto method = std::make_unique(type_number); + + if(*input == '#') { + input++; + + auto return_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(return_type); + method->return_type = std::move(*return_type); + + if(*input == ';') { + input++; + } + } else { + auto class_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(class_type); + method->class_type = std::move(*class_type); + + CCC_EXPECT_CHAR(input, ',', "method"); + + auto return_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(return_type); + method->return_type = std::move(*return_type); + + while(*input != '\0') { + if(*input == ';') { + input++; + break; + } + + CCC_EXPECT_CHAR(input, ',', "method"); + + auto parameter_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(parameter_type); + method->parameter_types.emplace_back(std::move(*parameter_type)); + } + } + + out_type = std::move(method); + break; + } + case StabsTypeDescriptor::REFERENCE: { // & + auto reference = std::make_unique(type_number); + + auto value_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(value_type); + reference->value_type = std::move(*value_type); + + out_type = std::move(reference); + break; + } + case StabsTypeDescriptor::POINTER: { // * + auto pointer = std::make_unique(type_number); + + auto value_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(value_type); + pointer->value_type = std::move(*value_type); + + out_type = std::move(pointer); + break; + } + case StabsTypeDescriptor::TYPE_ATTRIBUTE: { // @ + if((*input >= '0' && *input <= '9') || *input == '(') { + auto member_pointer = std::make_unique(type_number); + + auto class_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(class_type); + member_pointer->class_type = std::move(*class_type); + + CCC_EXPECT_CHAR(input, ',', "pointer to non-static data member"); + + auto member_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(member_type); + member_pointer->member_type = std::move(*member_type); + + out_type = std::move(member_pointer); + } else { + auto type_attribute = std::make_unique(type_number); + CCC_CHECK(*input == 's', "Weird value following '@' type descriptor."); + input++; + + std::optional size_bits = parse_number_s64(input); + CCC_CHECK(size_bits.has_value(), "Failed to parse type attribute.") + type_attribute->size_bits = *size_bits; + CCC_EXPECT_CHAR(input, ';', "type attribute"); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + type_attribute->type = std::move(*type); + + out_type = std::move(type_attribute); + } + break; + } + case StabsTypeDescriptor::BUILTIN: { // - + auto built_in = std::make_unique(type_number); + + std::optional type_id = parse_number_s64(input); + CCC_CHECK(type_id.has_value(), "Failed to parse built-in."); + built_in->type_id = *type_id; + + CCC_EXPECT_CHAR(input, ';', "builtin"); + + out_type = std::move(built_in); + break; + } + default: { + return CCC_FAILURE( + "Invalid type descriptor '%c' (%02x).", + (u32) descriptor, (u32) descriptor); + } + } + + return out_type; +} + +static Result> parse_field_list(const char*& input) +{ + std::vector fields; + + while(*input != '\0') { + if(*input == ';') { + input++; + break; + } + + const char* before_field = input; + StabsStructOrUnionType::Field field; + + Result name = parse_dodgy_stabs_identifier(input, ':'); + CCC_RETURN_IF_ERROR(name); + field.name = std::move(*name); + + CCC_EXPECT_CHAR(input, ':', "identifier"); + if(*input == '/') { + input++; + + Result visibility = parse_visibility_character(input); + CCC_RETURN_IF_ERROR(visibility); + field.visibility = *visibility; + } + if(*input == ':') { + input = before_field; + break; + } + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + field.type = std::move(*type); + + if(field.name.size() >= 1 && field.name[0] == '$') { + // Virtual function table pointers and virtual base class pointers. + CCC_EXPECT_CHAR(input, ',', "field type"); + + std::optional offset_bits = parse_number_s32(input); + CCC_CHECK(offset_bits.has_value(), "Failed to parse field offset."); + field.offset_bits = *offset_bits; + + CCC_EXPECT_CHAR(input, ';', "field offset"); + } else if(*input == ':') { + // Static fields. + input++; + field.is_static = true; + + std::optional type_name = parse_stabs_identifier(input, ';'); + CCC_CHECK(type_name.has_value(), "Failed to parse static field type name."); + + field.type_name = std::move(*type_name); + + CCC_EXPECT_CHAR(input, ';', "identifier"); + } else if(*input == ',') { + // Normal fields. + input++; + + std::optional offset_bits = parse_number_s32(input); + CCC_CHECK(offset_bits.has_value(), "Failed to parse field offset."); + field.offset_bits = *offset_bits; + + CCC_EXPECT_CHAR(input, ',', "field offset"); + + std::optional size_bits = parse_number_s32(input); + CCC_CHECK(size_bits.has_value(), "Failed to parse field size."); + field.size_bits = *size_bits; + + CCC_EXPECT_CHAR(input, ';', "field size"); + } else { + return CCC_FAILURE("Expected ':' or ',', got '%c' (%hhx).", *input, *input); + } + + STABS_DEBUG(print_field(field);) + + fields.emplace_back(std::move(field)); + } + + return fields; +} + +static Result> parse_member_functions(const char*& input) +{ + // Check for if the next character is from an enclosing field list. If this + // is the case, the next character will be ',' for normal fields and ':' for + // static fields (see above). + if(*input == ',' || *input == ':') { + return std::vector(); + } + + std::vector member_functions; + while(*input != '\0') { + if(*input == ';') { + input++; + break; + } + StabsStructOrUnionType::MemberFunctionSet member_function_set; + + std::optional name = parse_stabs_identifier(input, ':'); + CCC_CHECK(name.has_value(), "Failed to parse member function name."); + member_function_set.name = std::move(*name); + + CCC_EXPECT_CHAR(input, ':', "member function"); + CCC_EXPECT_CHAR(input, ':', "member function"); + while(*input != '\0') { + if(*input == ';') { + input++; + break; + } + + StabsStructOrUnionType::MemberFunction function; + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + function.type = std::move(*type); + + CCC_EXPECT_CHAR(input, ':', "member function"); + std::optional identifier = parse_stabs_identifier(input, ';'); + CCC_CHECK(identifier.has_value(), "Invalid member function identifier."); + + CCC_EXPECT_CHAR(input, ';', "member function"); + + Result visibility = parse_visibility_character(input); + CCC_RETURN_IF_ERROR(visibility); + function.visibility = *visibility; + + char modifiers = *(input++); + CCC_CHECK(modifiers != '\0', "Failed to parse member function modifiers."); + switch(modifiers) { + case 'A': + function.is_const = false; + function.is_volatile = false; + break; + case 'B': + function.is_const = true; + function.is_volatile = false; + break; + case 'C': + function.is_const = false; + function.is_volatile = true; + break; + case 'D': + function.is_const = true; + function.is_volatile = true; + break; + case '?': + case '.': + break; + default: + return CCC_FAILURE("Invalid member function modifiers."); + } + + char flag = *(input++); + CCC_CHECK(flag != '\0', "Failed to parse member function type."); + switch(flag) { + case '.': { // normal member function + function.modifier = ast::MemberFunctionModifier::NONE; + break; + } + case '?': { // static member function + function.modifier = ast::MemberFunctionModifier::STATIC; + break; + } + case '*': { // virtual member function + std::optional vtable_index = parse_number_s32(input); + CCC_CHECK(vtable_index.has_value(), "Failed to parse vtable index."); + function.vtable_index = *vtable_index; + + CCC_EXPECT_CHAR(input, ';', "virtual member function"); + + auto virtual_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(virtual_type); + function.virtual_type = std::move(*virtual_type); + + CCC_EXPECT_CHAR(input, ';', "virtual member function"); + function.modifier = ast::MemberFunctionModifier::VIRTUAL; + break; + } + default: + return CCC_FAILURE("Invalid member function type."); + } + member_function_set.overloads.emplace_back(std::move(function)); + } + STABS_DEBUG_PRINTF("member func: %s\n", member_function_set.name.c_str()); + member_functions.emplace_back(std::move(member_function_set)); + } + return member_functions; +} + +static Result parse_visibility_character(const char*& input) +{ + char visibility = *(input++); + switch(visibility) { + case '0': return StabsStructOrUnionType::Visibility::PRIVATE; + case '1': return StabsStructOrUnionType::Visibility::PROTECTED; + case '2': return StabsStructOrUnionType::Visibility::PUBLIC; + case '9': return StabsStructOrUnionType::Visibility::PUBLIC_OPTIMIZED_OUT; + default: break; + } + + return CCC_FAILURE("Failed to parse visibility character."); +} + +std::optional parse_number_s32(const char*& input) +{ + char* end; + s64 value = strtoll(input, &end, 10); + if(end == input) { + return std::nullopt; + } + input = end; + return (s32) value; +} + +std::optional parse_number_s64(const char*& input) +{ + char* end; + s64 value = strtoll(input, &end, 10); + if(end == input) { + return std::nullopt; + } + input = end; + return value; +} + +std::optional parse_stabs_identifier(const char*& input, char terminator) +{ + const char* begin = input; + for(; *input != '\0'; input++) { + if(*input == terminator) { + return std::string(begin, input); + } + } + return std::nullopt; +} + +// The complexity here is because the input may contain an unescaped namespace +// separator '::' even if the field terminator is supposed to be a colon, as +// well as the raw contents of character literals. See test/ccc/stabs_tests.cpp +// for some examples. +Result parse_dodgy_stabs_identifier(const char*& input, char terminator) +{ + const char* begin = input; + s32 template_depth = 0; + + for(; *input != '\0'; input++) { + // Skip past character literals. + if(*input == '\'') { + input++; + if(*input == '\'') { + input++; // Handle character literals containing a single quote. + } + while(*input != '\'' && *input != '\0') { + input++; + } + if(*input == '\0') { + break; + } + input++; + } + + // Keep track of the template depth so we know when to expect the + // terminator character. + if(*input == '<') { + template_depth++; + } + if(*input == '>') { + template_depth--; + } + + if(*input == terminator && template_depth == 0) { + return std::string(begin, input); + } + } + + return CCC_FAILURE(STAB_TRUNCATED_ERROR_MESSAGE); +} + +STABS_DEBUG( + +static void print_field(const StabsStructOrUnionType::Field& field) +{ + printf("\t%04x %04x %04x %04x %s\n", field.offset_bits / 8, field.size_bits / 8, field.offset_bits, field.size_bits, field.name.c_str()); +} + +) + +const char* stabs_field_visibility_to_string(StabsStructOrUnionType::Visibility visibility) +{ + switch(visibility) { + case StabsStructOrUnionType::Visibility::PRIVATE: return "private"; + case StabsStructOrUnionType::Visibility::PROTECTED: return "protected"; + case StabsStructOrUnionType::Visibility::PUBLIC: return "public"; + case StabsStructOrUnionType::Visibility::PUBLIC_OPTIMIZED_OUT: return "public_optimizedout"; + default: return "none"; + } + return ""; +} + +} diff --git a/3rdparty/ccc/src/ccc/stabs.h b/3rdparty/ccc/src/ccc/stabs.h new file mode 100644 index 0000000000..fef413ba06 --- /dev/null +++ b/3rdparty/ccc/src/ccc/stabs.h @@ -0,0 +1,379 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "ast.h" +#include "util.h" + +namespace ccc { + +enum class StabsSymbolDescriptor : u8 { + LOCAL_VARIABLE = '_', + REFERENCE_PARAMETER_A = 'a', + LOCAL_FUNCTION = 'f', + GLOBAL_FUNCTION = 'F', + GLOBAL_VARIABLE = 'G', + REGISTER_PARAMETER = 'P', + VALUE_PARAMETER = 'p', + REGISTER_VARIABLE = 'r', + STATIC_GLOBAL_VARIABLE = 'S', + TYPE_NAME = 't', + ENUM_STRUCT_OR_TYPE_TAG = 'T', + STATIC_LOCAL_VARIABLE = 'V', + REFERENCE_PARAMETER_V = 'v' +}; + +struct StabsType; + +struct StabsSymbol { + StabsSymbolDescriptor descriptor; + std::string name; + std::unique_ptr type; +}; + +Result parse_stabs_symbol(const char*& input); + +enum class StabsTypeDescriptor : u8 { + TYPE_REFERENCE = 0xef, // '0'..'9','(' + ARRAY = 'a', + ENUM = 'e', + FUNCTION = 'f', + CONST_QUALIFIER = 'k', + RANGE = 'r', + STRUCT = 's', + UNION = 'u', + CROSS_REFERENCE = 'x', + VOLATILE_QUALIFIER = 'B', + FLOATING_POINT_BUILTIN = 'R', + METHOD = '#', + REFERENCE = '&', + POINTER = '*', + TYPE_ATTRIBUTE = '@', + POINTER_TO_DATA_MEMBER = 0xee, // also '@' + BUILTIN = '-' +}; + +struct StabsBaseClass; +struct StabsField; +struct StabsMemberFunctionSet; + +// e.g. for "123=*456" 123 would be the type_number, the type descriptor would +// be of type POINTER and StabsPointerType::value_type would point to a type +// with type_number = 456. +struct StabsType { + StabsTypeNumber type_number; + // The name field is only populated for root types and cross references. + std::optional name; + bool is_typedef = false; + bool is_root = false; + std::optional descriptor; + + StabsType(StabsTypeNumber n) : type_number(n) {} + StabsType(StabsTypeDescriptor d) : descriptor(d) {} + StabsType(StabsTypeNumber n, StabsTypeDescriptor d) : type_number(n), descriptor(d) {} + virtual ~StabsType() {} + + template + SubType& as() + { + CCC_ASSERT(descriptor == SubType::DESCRIPTOR); + return *static_cast(this); + } + + template + const SubType& as() const + { + CCC_ASSERT(descriptor == SubType::DESCRIPTOR); + return *static_cast(this); + } + + virtual void enumerate_numbered_types(std::map& output) const + { + if(type_number.valid() && descriptor.has_value()) { + output.emplace(type_number, this); + } + } +}; + +struct StabsTypeReferenceType : StabsType { + std::unique_ptr type; + + StabsTypeReferenceType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::TYPE_REFERENCE; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + type->enumerate_numbered_types(output); + } +}; + +struct StabsArrayType : StabsType { + std::unique_ptr index_type; + std::unique_ptr element_type; + + StabsArrayType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::ARRAY; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + index_type->enumerate_numbered_types(output); + element_type->enumerate_numbered_types(output); + } +}; + +struct StabsEnumType : StabsType { + std::vector> fields; + + StabsEnumType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::ENUM; +}; + +struct StabsFunctionType : StabsType { + std::unique_ptr return_type; + + StabsFunctionType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::FUNCTION; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + return_type->enumerate_numbered_types(output); + } +}; + +struct StabsVolatileQualifierType : StabsType { + std::unique_ptr type; + + StabsVolatileQualifierType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::VOLATILE_QUALIFIER; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + type->enumerate_numbered_types(output); + } +}; + +struct StabsConstQualifierType : StabsType { + std::unique_ptr type; + + StabsConstQualifierType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::CONST_QUALIFIER; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + type->enumerate_numbered_types(output); + } +}; + +struct StabsRangeType : StabsType { + std::unique_ptr type; + std::string low; + std::string high; // Some compilers wrote out a wrapped around value here for zero (or variable?) length arrays. + + StabsRangeType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::RANGE; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + type->enumerate_numbered_types(output); + } +}; + +struct StabsStructOrUnionType : StabsType { + enum class Visibility : u8 { + NONE, + PRIVATE, + PROTECTED, + PUBLIC, + PUBLIC_OPTIMIZED_OUT + }; + + struct BaseClass { + bool is_virtual; + Visibility visibility; + s32 offset = -1; + std::unique_ptr type; + }; + + struct Field { + std::string name; + Visibility visibility = Visibility::NONE; + std::unique_ptr type; + bool is_static = false; + s32 offset_bits = 0; + s32 size_bits = 0; + std::string type_name; + }; + + struct MemberFunction { + std::unique_ptr type; + std::unique_ptr virtual_type; + Visibility visibility; + bool is_const = false; + bool is_volatile = false; + ast::MemberFunctionModifier modifier = ast::MemberFunctionModifier::NONE; + s32 vtable_index = -1; + }; + + struct MemberFunctionSet { + std::string name; + std::vector overloads; + }; + + s64 size = -1; + std::vector base_classes; + std::vector fields; + std::vector member_functions; + std::unique_ptr first_base_class; + + StabsStructOrUnionType(StabsTypeNumber n, StabsTypeDescriptor d) : StabsType(n, d) {} + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + for(const BaseClass& base_class : base_classes) { + base_class.type->enumerate_numbered_types(output); + } + for(const Field& field : fields) { + field.type->enumerate_numbered_types(output); + } + for(const MemberFunctionSet& member_function_set : member_functions) { + for(const MemberFunction& member_function : member_function_set.overloads) { + member_function.type->enumerate_numbered_types(output); + if(member_function.virtual_type.get()) { + member_function.virtual_type->enumerate_numbered_types(output); + } + } + } + if(first_base_class.get()) { + first_base_class->enumerate_numbered_types(output); + } + } +}; + +struct StabsStructType : StabsStructOrUnionType { + StabsStructType(StabsTypeNumber n) : StabsStructOrUnionType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::STRUCT; +}; + +struct StabsUnionType : StabsStructOrUnionType { + StabsUnionType(StabsTypeNumber n) : StabsStructOrUnionType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::UNION; +}; + + +struct StabsCrossReferenceType : StabsType { + ast::ForwardDeclaredType type; + std::string identifier; + + StabsCrossReferenceType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::CROSS_REFERENCE; +}; + +struct StabsFloatingPointBuiltInType : StabsType { + s32 fpclass = -1; + s32 bytes = -1; + + StabsFloatingPointBuiltInType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::FLOATING_POINT_BUILTIN; +}; + +struct StabsMethodType : StabsType { + std::unique_ptr return_type; + std::optional> class_type; + std::vector> parameter_types; + + StabsMethodType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::METHOD; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + return_type->enumerate_numbered_types(output); + if(class_type.has_value()) { + (*class_type)->enumerate_numbered_types(output); + } + for(const std::unique_ptr& parameter_type : parameter_types) { + parameter_type->enumerate_numbered_types(output); + } + } +}; + +struct StabsReferenceType : StabsType { + std::unique_ptr value_type; + + StabsReferenceType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::REFERENCE; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + value_type->enumerate_numbered_types(output); + } +}; + +struct StabsPointerType : StabsType { + std::unique_ptr value_type; + + StabsPointerType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::POINTER; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + value_type->enumerate_numbered_types(output); + } +}; + +struct StabsSizeTypeAttributeType : StabsType { + s64 size_bits = -1; + std::unique_ptr type; + + StabsSizeTypeAttributeType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::TYPE_ATTRIBUTE; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + type->enumerate_numbered_types(output); + } +}; + +struct StabsPointerToDataMemberType : StabsType { + std::unique_ptr class_type; + std::unique_ptr member_type; + + StabsPointerToDataMemberType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::POINTER_TO_DATA_MEMBER; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + class_type->enumerate_numbered_types(output); + member_type->enumerate_numbered_types(output); + } +}; + +struct StabsBuiltInType : StabsType { + s64 type_id = -1; + + StabsBuiltInType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::BUILTIN; +}; + +extern const char* STAB_TRUNCATED_ERROR_MESSAGE; + +Result> parse_top_level_stabs_type(const char*& input); +std::optional parse_number_s32(const char*& input); +std::optional parse_number_s64(const char*& input); +std::optional parse_stabs_identifier(const char*& input, char terminator); +Result parse_dodgy_stabs_identifier(const char*& input, char terminator); +const char* stabs_field_visibility_to_string(StabsStructOrUnionType::Visibility visibility); + +} diff --git a/3rdparty/ccc/src/ccc/stabs_to_ast.cpp b/3rdparty/ccc/src/ccc/stabs_to_ast.cpp new file mode 100644 index 0000000000..42b4784d28 --- /dev/null +++ b/3rdparty/ccc/src/ccc/stabs_to_ast.cpp @@ -0,0 +1,834 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "stabs_to_ast.h" + +#include "importer_flags.h" + +#define AST_DEBUG(...) //__VA_ARGS__ +#define AST_DEBUG_PRINTF(...) AST_DEBUG(printf(__VA_ARGS__);) + +namespace ccc { + +struct MemberFunctionInfo { + std::string name; + bool is_constructor_or_destructor = false; + bool is_special_member_function = false; + bool is_operator_member_function = false; +}; + +static bool is_void_like(const StabsType& type); +static Result classify_range(const StabsRangeType& type); +static Result> field_to_ast( + const StabsStructOrUnionType::Field& field, + const StabsType& enclosing_struct, + const StabsToAstState& state, + s32 depth); +static Result detect_bitfield(const StabsStructOrUnionType::Field& field, const StabsToAstState& state); +static Result>> member_functions_to_ast( + const StabsStructOrUnionType& type, const StabsToAstState& state, s32 depth); +static MemberFunctionInfo check_member_function( + const std::string& mangled_name, + std::string_view type_name_no_template_args, + const DemanglerFunctions& demangler, + u32 importer_flags); + +Result> stabs_type_to_ast( + const StabsType& type, + const StabsType* enclosing_struct, + const StabsToAstState& state, + s32 depth, + bool substitute_type_name, + bool force_substitute) +{ + AST_DEBUG_PRINTF("%-*stype desc=%hhx '%c' num=(%d,%d) name=%s\n", + depth * 4, "", + type.descriptor.has_value() ? (u8) *type.descriptor : 'X', + (type.descriptor.has_value() && isprint((u8) *type.descriptor)) ? (u8) *type.descriptor : '!', + type.type_number.file, type.type_number.type, + type.name.has_value() ? type.name->c_str() : ""); + + if(depth > 200) { + const char* error_message = "Call depth greater than 200 in stabs_type_to_ast, probably infinite recursion."; + if(state.importer_flags & STRICT_PARSING) { + return CCC_FAILURE(error_message); + } else { + CCC_WARN(error_message); + + auto error = std::make_unique(); + error->message = error_message; + return std::unique_ptr(std::move(error)); + } + } + + // This makes sure that types are replaced with their type name in cases + // where that would be more appropriate. + if(type.name.has_value()) { + bool try_substitute = depth > 0 && (type.is_root + || type.descriptor == StabsTypeDescriptor::RANGE + || type.descriptor == StabsTypeDescriptor::BUILTIN); + // GCC emits anonymous enums with a name of " " since apparently some + // debuggers can't handle zero-length names. + bool is_name_empty = type.name == "" || type.name == " "; + // Cross references will be handled below. + bool is_cross_reference = type.descriptor == StabsTypeDescriptor::CROSS_REFERENCE; + bool is_void = is_void_like(type); + if((substitute_type_name || try_substitute) && !is_name_empty && !is_cross_reference && !is_void) { + auto type_name = std::make_unique(); + type_name->source = ast::TypeNameSource::REFERENCE; + type_name->unresolved_stabs = std::make_unique(); + type_name->unresolved_stabs->type_name = *type.name; + type_name->unresolved_stabs->referenced_file_handle = state.file_handle; + type_name->unresolved_stabs->stabs_type_number = type.type_number; + return std::unique_ptr(std::move(type_name)); + } + } + + // This prevents infinite recursion when an automatically generated member + // function references an unnamed type. + bool can_compare_type_numbers = type.type_number.valid() && enclosing_struct && enclosing_struct->type_number.valid(); + if(force_substitute && can_compare_type_numbers && type.type_number == enclosing_struct->type_number) { + // It's probably a this parameter (or return type) for an unnamed type. + auto type_name = std::make_unique(); + type_name->source = ast::TypeNameSource::UNNAMED_THIS; + type_name->unresolved_stabs = std::make_unique(); + type_name->unresolved_stabs->type_name = enclosing_struct->name.has_value() ? *enclosing_struct->name : ""; + type_name->unresolved_stabs->referenced_file_handle = state.file_handle; + type_name->unresolved_stabs->stabs_type_number = type.type_number; + return std::unique_ptr(std::move(type_name)); + } + + if(!type.descriptor.has_value()) { + // The definition of the type has been defined previously, so we have to + // look it up by its type number. + CCC_CHECK(type.type_number.valid(), "Cannot lookup type (type is anonymous)."); + auto stabs_type = state.stabs_types->find(type.type_number); + if(stabs_type == state.stabs_types->end()) { + std::string error_message = "Failed to lookup STABS type by its type number (" + + std::to_string(type.type_number.file) + "," + std::to_string(type.type_number.type) + ")."; + if(state.importer_flags & STRICT_PARSING) { + return CCC_FAILURE("%s", error_message.c_str()); + } else { + CCC_WARN("%s", error_message.c_str()); + std::unique_ptr error = std::make_unique(); + error->message = std::move(error_message); + return std::unique_ptr(std::move(error)); + } + } + return stabs_type_to_ast( + *stabs_type->second, + enclosing_struct, + state, + depth + 1, + substitute_type_name, + force_substitute); + } + + std::unique_ptr result; + + switch(*type.descriptor) { + case StabsTypeDescriptor::TYPE_REFERENCE: { + const auto& stabs_type_ref = type.as(); + if(!type.type_number.valid() || !stabs_type_ref.type->type_number.valid() || stabs_type_ref.type->type_number != type.type_number) { + auto node = stabs_type_to_ast( + *stabs_type_ref.type, + enclosing_struct, + state, + depth + 1, + substitute_type_name, + force_substitute); + CCC_RETURN_IF_ERROR(node); + result = std::move(*node); + } else { + // I still don't know why in STABS void is a reference to + // itself, maybe because I'm not a philosopher. + auto builtin = std::make_unique(); + builtin->bclass = ast::BuiltInClass::VOID_TYPE; + result = std::move(builtin); + } + break; + } + case StabsTypeDescriptor::ARRAY: { + auto array = std::make_unique(); + const auto& stabs_array = type.as(); + + auto element_node = stabs_type_to_ast( + *stabs_array.element_type, + enclosing_struct, + state, + depth + 1, + true, + force_substitute); + CCC_RETURN_IF_ERROR(element_node); + array->element_type = std::move(*element_node); + + const StabsRangeType& index = stabs_array.index_type->as(); + + char* end = nullptr; + + const char* low = index.low.c_str(); + s64 low_value = strtoll(low, &end, 10); + CCC_CHECK(end != low, "Failed to parse low part of range as integer."); + CCC_CHECK(low_value == 0, "Invalid index type for array."); + + const char* high = index.high.c_str(); + s64 high_value = strtoll(high, &end, 10); + CCC_CHECK(end != high, "Failed to parse low part of range as integer."); + + if(high_value == 4294967295) { + // Some compilers wrote out a wrapped around value here. + array->element_count = 0; + } else { + array->element_count = (s32) high_value + 1; + } + + result = std::move(array); + break; + } + case StabsTypeDescriptor::ENUM: { + auto inline_enum = std::make_unique(); + const auto& stabs_enum = type.as(); + inline_enum->constants = stabs_enum.fields; + result = std::move(inline_enum); + break; + } + case StabsTypeDescriptor::FUNCTION: { + auto function = std::make_unique(); + + auto node = stabs_type_to_ast( + *type.as().return_type, + enclosing_struct, + state, + depth + 1, + true, + force_substitute); + CCC_RETURN_IF_ERROR(node); + function->return_type = std::move(*node); + + result = std::move(function); + break; + } + case StabsTypeDescriptor::VOLATILE_QUALIFIER: { + const auto& volatile_qualifier = type.as(); + + auto node = stabs_type_to_ast( + *volatile_qualifier.type.get(), + enclosing_struct, + state, + depth + 1, + substitute_type_name, + force_substitute); + CCC_RETURN_IF_ERROR(node); + result = std::move(*node); + + result->is_volatile = true; + break; + } + case StabsTypeDescriptor::CONST_QUALIFIER: { + const auto& const_qualifier = type.as(); + + auto node = stabs_type_to_ast( + *const_qualifier.type.get(), + enclosing_struct, + state, + depth + 1, + substitute_type_name, + force_substitute); + result = std::move(*node); + + result->is_const = true; + break; + } + case StabsTypeDescriptor::RANGE: { + auto builtin = std::make_unique(); + Result bclass = classify_range(type.as()); + CCC_RETURN_IF_ERROR(bclass); + builtin->bclass = *bclass; + result = std::move(builtin); + break; + } + case StabsTypeDescriptor::STRUCT: + case StabsTypeDescriptor::UNION: { + const StabsStructOrUnionType* stabs_struct_or_union; + if(type.descriptor == StabsTypeDescriptor::STRUCT) { + stabs_struct_or_union = &type.as(); + } else { + stabs_struct_or_union = &type.as(); + } + + auto struct_or_union = std::make_unique(); + struct_or_union->is_struct = type.descriptor == StabsTypeDescriptor::STRUCT; + struct_or_union->size_bits = (s32) stabs_struct_or_union->size * 8; + + for(const StabsStructOrUnionType::BaseClass& stabs_base_class : stabs_struct_or_union->base_classes) { + auto base_class = stabs_type_to_ast( + *stabs_base_class.type, + &type, + state, + depth + 1, + true, + force_substitute); + CCC_RETURN_IF_ERROR(base_class); + + (*base_class)->offset_bytes = stabs_base_class.offset; + (*base_class)->set_access_specifier(stabs_field_visibility_to_access_specifier(stabs_base_class.visibility), state.importer_flags); + + if(stabs_base_class.is_virtual) { + (*base_class)->is_virtual_base_class = true; + } + + struct_or_union->base_classes.emplace_back(std::move(*base_class)); + } + + AST_DEBUG_PRINTF("%-*s beginfields\n", depth * 4, ""); + for(const StabsStructOrUnionType::Field& field : stabs_struct_or_union->fields) { + auto node = field_to_ast(field, type, state, depth); + CCC_RETURN_IF_ERROR(node); + struct_or_union->fields.emplace_back(std::move(*node)); + } + AST_DEBUG_PRINTF("%-*s endfields\n", depth * 4, ""); + + AST_DEBUG_PRINTF("%-*s beginmemberfuncs\n", depth * 4, ""); + Result>> member_functions = + member_functions_to_ast(*stabs_struct_or_union, state, depth); + CCC_RETURN_IF_ERROR(member_functions); + struct_or_union->member_functions = std::move(*member_functions); + AST_DEBUG_PRINTF("%-*s endmemberfuncs\n", depth * 4, ""); + + result = std::move(struct_or_union); + break; + } + case StabsTypeDescriptor::CROSS_REFERENCE: { + const auto& cross_reference = type.as(); + auto type_name = std::make_unique(); + type_name->source = ast::TypeNameSource::CROSS_REFERENCE; + type_name->unresolved_stabs = std::make_unique(); + type_name->unresolved_stabs->type_name = cross_reference.identifier; + type_name->unresolved_stabs->type = cross_reference.type; + result = std::move(type_name); + break; + } + case ccc::StabsTypeDescriptor::FLOATING_POINT_BUILTIN: { + const auto& fp_builtin = type.as(); + auto builtin = std::make_unique(); + switch(fp_builtin.bytes) { + case 1: builtin->bclass = ast::BuiltInClass::UNSIGNED_8; break; + case 2: builtin->bclass = ast::BuiltInClass::UNSIGNED_16; break; + case 4: builtin->bclass = ast::BuiltInClass::UNSIGNED_32; break; + case 8: builtin->bclass = ast::BuiltInClass::UNSIGNED_64; break; + case 16: builtin->bclass = ast::BuiltInClass::UNSIGNED_128; break; + default: builtin->bclass = ast::BuiltInClass::UNSIGNED_8; break; + } + result = std::move(builtin); + break; + } + case StabsTypeDescriptor::METHOD: { + const auto& stabs_method = type.as(); + auto function = std::make_unique(); + + auto return_node = stabs_type_to_ast( + *stabs_method.return_type.get(), + enclosing_struct, + state, + depth + 1, + true, + true); + CCC_RETURN_IF_ERROR(return_node); + function->return_type = std::move(*return_node); + + function->parameters.emplace(); + for(const std::unique_ptr& parameter_type : stabs_method.parameter_types) { + auto parameter_node = stabs_type_to_ast( + *parameter_type, + enclosing_struct, + state, + depth + 1, + true, + true); + CCC_RETURN_IF_ERROR(parameter_node); + function->parameters->emplace_back(std::move(*parameter_node)); + } + result = std::move(function); + break; + } + case StabsTypeDescriptor::POINTER: { + auto pointer = std::make_unique(); + pointer->is_pointer = true; + + auto value_node = stabs_type_to_ast( + *type.as().value_type, + enclosing_struct, + state, + depth + 1, + true, + force_substitute); + CCC_RETURN_IF_ERROR(value_node); + pointer->value_type = std::move(*value_node); + + result = std::move(pointer); + break; + } + case StabsTypeDescriptor::REFERENCE: { + auto reference = std::make_unique(); + reference->is_pointer = false; + + auto value_node = stabs_type_to_ast( + *type.as().value_type, + enclosing_struct, + state, + depth + 1, + true, + force_substitute); + CCC_RETURN_IF_ERROR(value_node); + reference->value_type = std::move(*value_node); + + result = std::move(reference); + break; + } + case StabsTypeDescriptor::TYPE_ATTRIBUTE: { + const auto& stabs_type_attribute = type.as(); + + auto node = stabs_type_to_ast( + *stabs_type_attribute.type, + enclosing_struct, + state, + depth + 1, + substitute_type_name, + force_substitute); + CCC_RETURN_IF_ERROR(node); + result = std::move(*node); + + result->size_bits = (s32) stabs_type_attribute.size_bits; + break; + } + case StabsTypeDescriptor::POINTER_TO_DATA_MEMBER: { + const auto& stabs_member_pointer = type.as(); + auto member_pointer = std::make_unique(); + + auto class_node = stabs_type_to_ast( + *stabs_member_pointer.class_type.get(), + enclosing_struct, + state, + depth + 1, + true, + true); + CCC_RETURN_IF_ERROR(class_node); + member_pointer->class_type = std::move(*class_node); + + auto member_node = stabs_type_to_ast( + *stabs_member_pointer.member_type.get(), + enclosing_struct, + state, + depth + 1, + true, + true); + CCC_RETURN_IF_ERROR(member_node); + member_pointer->member_type = std::move(*member_node); + + result = std::move(member_pointer); + break; + } + case StabsTypeDescriptor::BUILTIN: { + CCC_CHECK(type.as().type_id == 16, + "Unknown built-in type!"); + auto builtin = std::make_unique(); + builtin->bclass = ast::BuiltInClass::BOOL_8; + result = std::move(builtin); + break; + } + } + + CCC_CHECK(result, "Result of stabs_type_to_ast call is nullptr."); + return result; +} + +static bool is_void_like(const StabsType& type) +{ + // Unfortunately, a common case seems to be that various types (most + // commonly __builtin_va_list) are indistinguishable from void or void*, so + // we have to output them as a void built-in. + if(type.descriptor.has_value()) { + switch(*type.descriptor) { + case StabsTypeDescriptor::POINTER: { + return is_void_like(*type.as().value_type.get()); + } + case StabsTypeDescriptor::TYPE_REFERENCE: { + return type.as().type->type_number == type.type_number; + } + default: { + break; + } + } + } + + return false; +} + +static Result classify_range(const StabsRangeType& type) +{ + const char* low = type.low.c_str(); + const char* high = type.high.c_str(); + + // Handle some special cases and values that are too large to easily store + // in a 64-bit integer. + static const struct { const char* low; const char* high; ast::BuiltInClass classification; } strings[] = { + {"4", "0", ast::BuiltInClass::FLOAT_32}, + {"000000000000000000000000", "001777777777777777777777", ast::BuiltInClass::UNSIGNED_64}, + {"00000000000000000000000000000000000000000000", "00000000000000000000001777777777777777777777", ast::BuiltInClass::UNSIGNED_64}, + {"0000000000000", "01777777777777777777777", ast::BuiltInClass::UNSIGNED_64}, // IOP + {"0", "18446744073709551615", ast::BuiltInClass::UNSIGNED_64}, + {"001000000000000000000000", "000777777777777777777777", ast::BuiltInClass::SIGNED_64}, + {"00000000000000000000001000000000000000000000", "00000000000000000000000777777777777777777777", ast::BuiltInClass::SIGNED_64}, + {"01000000000000000000000", "0777777777777777777777", ast::BuiltInClass::SIGNED_64}, // IOP + {"-9223372036854775808", "9223372036854775807", ast::BuiltInClass::SIGNED_64}, + {"8", "0", ast::BuiltInClass::FLOAT_64}, + {"00000000000000000000000000000000000000000000", "03777777777777777777777777777777777777777777", ast::BuiltInClass::UNSIGNED_128}, + {"02000000000000000000000000000000000000000000", "01777777777777777777777777777777777777777777", ast::BuiltInClass::SIGNED_128}, + {"000000000000000000000000", "0377777777777777777777777777777777", ast::BuiltInClass::UNQUALIFIED_128}, + {"16", "0", ast::BuiltInClass::FLOAT_128}, + {"0", "-1", ast::BuiltInClass::UNQUALIFIED_128} // Old homebrew toolchain + }; + + for(const auto& range : strings) { + if(strcmp(range.low, low) == 0 && strcmp(range.high, high) == 0) { + return range.classification; + } + } + + // For smaller values we actually parse the bounds as integers. + char* end = nullptr; + s64 low_value = strtoll(type.low.c_str(), &end, low[0] == '0' ? 8 : 10); + CCC_CHECK(end != low, "Failed to parse low part of range as integer."); + s64 high_value = strtoll(type.high.c_str(), &end, high[0] == '0' ? 8 : 10); + CCC_CHECK(end != high, "Failed to parse high part of range as integer."); + + static const struct { s64 low; s64 high; ast::BuiltInClass classification; } integers[] = { + {0, 255, ast::BuiltInClass::UNSIGNED_8}, + {-128, 127, ast::BuiltInClass::SIGNED_8}, + {0, 127, ast::BuiltInClass::UNQUALIFIED_8}, + {0, 65535, ast::BuiltInClass::UNSIGNED_16}, + {-32768, 32767, ast::BuiltInClass::SIGNED_16}, + {0, 4294967295, ast::BuiltInClass::UNSIGNED_32}, + {-2147483648, 2147483647, ast::BuiltInClass::SIGNED_32}, + }; + + for(const auto& range : integers) { + if((range.low == low_value || range.low == -low_value) && range.high == high_value) { + return range.classification; + } + } + + return CCC_FAILURE("Failed to classify range."); +} + +static Result> field_to_ast( + const StabsStructOrUnionType::Field& field, + const StabsType& enclosing_struct, + const StabsToAstState& state, + s32 depth) +{ + AST_DEBUG_PRINTF("%-*s field %s\n", depth * 4, "", field.name.c_str()); + + Result is_bitfield = detect_bitfield(field, state); + CCC_RETURN_IF_ERROR(is_bitfield); + + if(*is_bitfield) { + // Process bitfields. + auto bitfield_node = stabs_type_to_ast( + *field.type, + &enclosing_struct, + state, + depth + 1, + true, + false); + CCC_RETURN_IF_ERROR(bitfield_node); + + std::unique_ptr bitfield = std::make_unique(); + bitfield->name = (field.name == " ") ? "" : field.name; + bitfield->offset_bytes = field.offset_bits / 8; + bitfield->size_bits = field.size_bits; + bitfield->underlying_type = std::move(*bitfield_node); + bitfield->bitfield_offset_bits = field.offset_bits % 8; + bitfield->set_access_specifier(stabs_field_visibility_to_access_specifier(field.visibility), state.importer_flags); + + return std::unique_ptr(std::move(bitfield)); + } else { + // Process a normal field. + Result> node = stabs_type_to_ast( + *field.type, + &enclosing_struct, + state, + depth + 1, + true, + false); + CCC_RETURN_IF_ERROR(node); + + (*node)->name = field.name; + (*node)->offset_bytes = field.offset_bits / 8; + (*node)->size_bits = field.size_bits; + (*node)->set_access_specifier(stabs_field_visibility_to_access_specifier(field.visibility), state.importer_flags); + + if(field.name.starts_with("$vf") || field.name.starts_with("_vptr$") || field.name.starts_with("_vptr.")) { + (*node)->is_vtable_pointer = true; + } + + if(field.is_static) { + (*node)->storage_class = STORAGE_CLASS_STATIC; + } + + return node; + } +} + +static Result detect_bitfield(const StabsStructOrUnionType::Field& field, const StabsToAstState& state) +{ + // Static fields can't be bitfields. + if(field.is_static) { + return false; + } + + // Resolve type references. + const StabsType* type = field.type.get(); + for(s32 i = 0; i < 50; i++) { + if(!type->descriptor.has_value()) { + if(!type->type_number.valid()) { + return false; + } + auto next_type = state.stabs_types->find(type->type_number); + if(next_type == state.stabs_types->end() || next_type->second == type) { + return false; + } + type = next_type->second; + } else if(type->descriptor == StabsTypeDescriptor::TYPE_REFERENCE) { + type = type->as().type.get(); + } else if(type->descriptor == StabsTypeDescriptor::CONST_QUALIFIER) { + type = type->as().type.get(); + } else if(type->descriptor == StabsTypeDescriptor::VOLATILE_QUALIFIER) { + type = type->as().type.get(); + } else { + break; + } + + // Prevent an infinite loop if there's a cycle (fatal frame). + if(i == 49) { + return false; + } + } + + // Determine the size of the underlying type. + s32 underlying_type_size_bits = 0; + switch(*type->descriptor) { + case ccc::StabsTypeDescriptor::RANGE: { + Result bclass = classify_range(type->as()); + CCC_RETURN_IF_ERROR(bclass); + underlying_type_size_bits = builtin_class_size(*bclass) * 8; + break; + } + case ccc::StabsTypeDescriptor::CROSS_REFERENCE: { + if(type->as().type == ast::ForwardDeclaredType::ENUM) { + underlying_type_size_bits = 32; + } else { + return false; + } + break; + } + case ccc::StabsTypeDescriptor::TYPE_ATTRIBUTE: { + underlying_type_size_bits = (s32) type->as().size_bits; + break; + } + case ccc::StabsTypeDescriptor::BUILTIN: { + underlying_type_size_bits = 8; // bool + break; + } + default: { + return false; + } + } + + if(underlying_type_size_bits == 0) { + return false; + } + + return field.size_bits != underlying_type_size_bits; +} + +static Result>> member_functions_to_ast( + const StabsStructOrUnionType& type, const StabsToAstState& state, s32 depth) +{ + if(state.importer_flags & NO_MEMBER_FUNCTIONS) { + return std::vector>(); + } + + std::string_view type_name_no_template_args; + if(type.name.has_value()) { + type_name_no_template_args = + std::string_view(*type.name).substr(0, type.name->find("<")); + } + + std::vector> member_functions; + bool only_special_functions = true; + + for(const StabsStructOrUnionType::MemberFunctionSet& function_set : type.member_functions) { + MemberFunctionInfo info = check_member_function( + function_set.name, type_name_no_template_args, state.demangler, state.importer_flags); + + if(!info.is_special_member_function) { + only_special_functions = false; + } + + for(const StabsStructOrUnionType::MemberFunction& stabs_func : function_set.overloads) { + auto node = stabs_type_to_ast( + *stabs_func.type, + &type, + state, + depth + 1, + true, + true); + CCC_RETURN_IF_ERROR(node); + + (*node)->is_constructor_or_destructor = info.is_constructor_or_destructor; + (*node)->is_special_member_function = info.is_special_member_function; + (*node)->is_operator_member_function = info.is_operator_member_function; + + (*node)->name = info.name; + (*node)->set_access_specifier(stabs_field_visibility_to_access_specifier(stabs_func.visibility), state.importer_flags); + + if((*node)->descriptor == ast::FUNCTION) { + ast::Function& function = (*node)->as(); + function.modifier = stabs_func.modifier; + function.vtable_index = stabs_func.vtable_index; + } + + member_functions.emplace_back(std::move(*node)); + } + } + + if(only_special_functions && (state.importer_flags & INCLUDE_GENERATED_MEMBER_FUNCTIONS) == 0) { + return std::vector>(); + } + + return member_functions; +} + +static MemberFunctionInfo check_member_function( + const std::string& mangled_name, + std::string_view type_name_no_template_args, + const DemanglerFunctions& demangler, + u32 importer_flags) +{ + MemberFunctionInfo info; + + // Some compiler versions output gcc opnames for overloaded operators + // instead of their proper names. + if((importer_flags & DONT_DEMANGLE_NAMES) == 0 && demangler.cplus_demangle_opname) { + char* demangled_name = demangler.cplus_demangle_opname(mangled_name.c_str(), 0); + if(demangled_name) { + info.name = demangled_name; + free(reinterpret_cast(demangled_name)); + } + } + if(info.name.empty()) { + info.name = mangled_name; + } + + bool is_constructor = + info.name == "__ct" || // Takes a parameter to decide whether or not to construct virtual base classes. + info.name == "__comp_ctor" || // Constructs virtual base classes. + info.name == "__base_ctor"; // Does not construct virtual base classes. + + if(!is_constructor && !type_name_no_template_args.empty()) { + is_constructor |= info.name == type_name_no_template_args; // Named constructor. + } + + bool is_destructor = + info.name == "__dt" || // Takes parameters to decide whether or not to construct virtual base classes and/or delete the object. + info.name == "__comp_dtor" || // Destructs virtual base classes. + info.name == "__base_dtor" || // Does not construct virtual base classes. + info.name == "__deleting_dtor"; // Destructs virtual base clases then deletes the entire object. + + if(!is_destructor && !info.name.empty()) { + is_destructor |= info.name[0] == '~' && std::string_view(info.name).substr(1) == type_name_no_template_args; // Named destructor. + } + + info.is_constructor_or_destructor = is_constructor || is_destructor || info.name.starts_with("$_"); + info.is_special_member_function = info.is_constructor_or_destructor || info.name == "operator="; + + return info; +} + +void fix_recursively_emitted_structures( + ast::StructOrUnion& outer_struct, const std::string& name, StabsTypeNumber type_number, SourceFileHandle file_handle) +{ + // This is a rather peculiar case. For some compiler versions, when a struct + // or a union defined using a typedef is being emitted and it needs to + // reference itself from a member function parameter, it will emit its + // entire definition again in the middle of the first definition, although + // thankfully it won't recurse more than once. + // + // The game Sega Soccer Slam is affected by this. See the PeculiarParameter + // test case in mdebug_importer_tests.cpp for a bare bones example. + + for(std::unique_ptr& node : outer_struct.member_functions) { + if(node->descriptor != ast::FUNCTION) { + continue; + } + + ast::Function& function = node->as(); + if(!function.parameters.has_value()) { + continue; + } + + for(std::unique_ptr& parameter : *function.parameters) { + if(parameter->descriptor != ast::POINTER_OR_REFERENCE) { + continue; + } + + ast::PointerOrReference& pointer_or_reference = parameter->as(); + if(pointer_or_reference.value_type->descriptor != ast::STRUCT_OR_UNION) { + continue; + } + + ast::StructOrUnion& inner_struct = pointer_or_reference.value_type->as(); + + // Since C++ doesn't allow struct definitions in function parameter + // lists normally, and most of the time the member function + // parameters aren't even filled in by GCC, this is a really rare + // case, so here we only bother to do some very basic checks to + // verify that the inner struct is similar to the outer struct. + if(inner_struct.base_classes.size() != outer_struct.base_classes.size()) { + continue; + } + + if(inner_struct.fields.size() != outer_struct.fields.size()) { + continue; + } + + if(inner_struct.member_functions.size() != outer_struct.member_functions.size()) { + continue; + } + + auto type_name = std::make_unique(); + type_name->source = ast::TypeNameSource::REFERENCE; + type_name->unresolved_stabs = std::make_unique(); + type_name->unresolved_stabs->type_name = name; + type_name->unresolved_stabs->referenced_file_handle = file_handle; + type_name->unresolved_stabs->stabs_type_number = type_number; + pointer_or_reference.value_type = std::move(type_name); + } + } +} + +ast::AccessSpecifier stabs_field_visibility_to_access_specifier(StabsStructOrUnionType::Visibility visibility) +{ + ast::AccessSpecifier access_specifier = ast::AS_PUBLIC; + switch(visibility) { + case StabsStructOrUnionType::Visibility::NONE: access_specifier = ast::AS_PUBLIC; break; + case StabsStructOrUnionType::Visibility::PUBLIC: access_specifier = ast::AS_PUBLIC; break; + case StabsStructOrUnionType::Visibility::PROTECTED: access_specifier = ast::AS_PROTECTED; break; + case StabsStructOrUnionType::Visibility::PRIVATE: access_specifier = ast::AS_PRIVATE; break; + case StabsStructOrUnionType::Visibility::PUBLIC_OPTIMIZED_OUT: access_specifier = ast::AS_PUBLIC; break; + } + return access_specifier; +} + +} diff --git a/3rdparty/ccc/src/ccc/stabs_to_ast.h b/3rdparty/ccc/src/ccc/stabs_to_ast.h new file mode 100644 index 0000000000..03d265e436 --- /dev/null +++ b/3rdparty/ccc/src/ccc/stabs_to_ast.h @@ -0,0 +1,29 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "ast.h" +#include "stabs.h" + +namespace ccc { + +struct StabsToAstState { + u32 file_handle; + std::map* stabs_types; + u32 importer_flags; + DemanglerFunctions demangler; +}; + +Result> stabs_type_to_ast( + const StabsType& type, + const StabsType* enclosing_struct, + const StabsToAstState& state, + s32 depth, + bool substitute_type_name, + bool force_substitute); +void fix_recursively_emitted_structures( + ast::StructOrUnion& outer_struct, const std::string& name, StabsTypeNumber type_number, SourceFileHandle file_handle); +ast::AccessSpecifier stabs_field_visibility_to_access_specifier(StabsStructOrUnionType::Visibility visibility); + +} diff --git a/3rdparty/ccc/src/ccc/symbol_database.cpp b/3rdparty/ccc/src/ccc/symbol_database.cpp new file mode 100644 index 0000000000..82451bbb77 --- /dev/null +++ b/3rdparty/ccc/src/ccc/symbol_database.cpp @@ -0,0 +1,1204 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "symbol_database.h" + +#include "ast.h" +#include "importer_flags.h" + +namespace ccc { + +template +SymbolType* SymbolList::symbol_from_handle(SymbolHandle handle) +{ + if(!handle.valid()) { + return nullptr; + } + + size_t index = binary_search(handle); + if(index >= m_symbols.size() || m_symbols[index].m_handle != handle) { + return nullptr; + } + + return &m_symbols[index]; +} + +template +const SymbolType* SymbolList::symbol_from_handle(SymbolHandle handle) const +{ + return const_cast*>(this)->symbol_from_handle(handle); +} + +template +std::vector SymbolList::symbols_from_handles( + const std::vector>& handles) +{ + std::vector result; + for(SymbolHandle handle : handles) { + SymbolType* symbol = symbol_from_handle(handle); + if(symbol) { + result.emplace_back(symbol); + } + } + return result; +} + +template +std::vector SymbolList::symbols_from_handles( + const std::vector>& handles) const +{ + std::vector result; + for(SymbolHandle handle : handles) { + const SymbolType* symbol = symbol_from_handle(handle); + if(symbol) { + result.emplace_back(symbol); + } + } + return result; +} + +template +std::vector SymbolList::optional_symbols_from_handles( + const std::optional>>& handles) +{ + if(handles.has_value()) { + return symbols_from_handles(*handles); + } else { + return std::vector(); + } +} + +template +std::vector SymbolList::optional_symbols_from_handles( + const std::optional>>& handles) const +{ + if(handles.has_value()) { + return symbols_from_handles(*handles); + } else { + return std::vector(); + } +} + +template +typename SymbolList::Iterator SymbolList::begin() +{ + return m_symbols.begin(); +} + +template +typename SymbolList::ConstIterator SymbolList::begin() const +{ + return m_symbols.begin(); +} + +template +typename SymbolList::Iterator SymbolList::end() +{ + return m_symbols.end(); +} + +template +typename SymbolList::ConstIterator SymbolList::end() const +{ + return m_symbols.end(); +} + +template +typename SymbolList::AddressToHandleMapIterators SymbolList::handles_from_starting_address(Address address) const +{ + auto iterators = m_address_to_handle.equal_range(address.value); + return {iterators.first, iterators.second}; +} + +template +typename SymbolList::AddressToHandleMapIterators SymbolList::handles_from_address_range(AddressRange range) const +{ + if(range.low.valid()) { + return {m_address_to_handle.lower_bound(range.low.value), m_address_to_handle.lower_bound(range.high.value)}; + } else if(range.high.valid()) { + return {m_address_to_handle.begin(), m_address_to_handle.lower_bound(range.high.value)}; + } else { + return {m_address_to_handle.end(), m_address_to_handle.end()}; + } +} + +template +SymbolHandle SymbolList::first_handle_from_starting_address(Address address) const +{ + auto iterator = m_address_to_handle.find(address.value); + if(iterator != m_address_to_handle.end()) { + return iterator->second; + } else { + return SymbolHandle(); + } +} + +template +typename SymbolList::NameToHandleMapIterators SymbolList::handles_from_name(const std::string& name) const +{ + auto iterators = m_name_to_handle.equal_range(name); + return {iterators.first, iterators.second}; +} + +template +SymbolHandle SymbolList::first_handle_after_address(Address address) const +{ + auto iterator = m_address_to_handle.upper_bound(address.value); + if(iterator != m_address_to_handle.end()) { + return iterator->second; + } else { + return SymbolHandle(); + } +} + +template +SymbolHandle SymbolList::first_handle_from_name(const std::string& name) const +{ + auto iterator = m_name_to_handle.find(name); + if(iterator != m_name_to_handle.end()) { + return iterator->second; + } else { + return SymbolHandle(); + } +} + +template +SymbolType* SymbolList::symbol_overlapping_address(Address address) +{ + auto iterator = m_address_to_handle.upper_bound(address.value); + if(iterator != m_address_to_handle.begin()) { + iterator--; // Find the greatest element that is less than or equal to the address. + SymbolType* symbol = symbol_from_handle(iterator->second); + if(symbol && address.value < symbol->address().value + symbol->size()) { + return symbol; + } + } + return nullptr; +} + +template +const SymbolType* SymbolList::symbol_overlapping_address(Address address) const +{ + return const_cast*>(this)->symbol_overlapping_address(address); +} + +template +s32 SymbolList::index_from_handle(SymbolHandle handle) const +{ + if(!handle.valid()) { + return -1; + } + + size_t index = binary_search(handle); + if(index >= m_symbols.size() || m_symbols[index].handle() != handle) { + return -1; + } + + return (s32) index; +} + +template +SymbolType& SymbolList::symbol_from_index(s32 index) +{ + return m_symbols.at(index); +} + +template +const SymbolType& SymbolList::symbol_from_index(s32 index) const +{ + return m_symbols.at(index); +} + +template +bool SymbolList::empty() const +{ + return m_symbols.size() == 0; +} + + +template +s32 SymbolList::size() const +{ + return (s32) m_symbols.size(); +} + +template +Result SymbolList::create_symbol( + std::string name, Address address, SymbolSourceHandle source, const Module* module_symbol) +{ + u32 handle; + do { + handle = m_next_handle; + CCC_CHECK(handle != UINT32_MAX, "Ran out of handles to use for %s symbols.", SymbolType::NAME); + } while(!m_next_handle.compare_exchange_weak(handle, handle + 1)); + + SymbolType& symbol = m_symbols.emplace_back(); + + symbol.m_handle = handle; + symbol.m_name = std::move(name); + symbol.m_source = source; + + if(module_symbol) { + symbol.m_address = address.add_base_address(module_symbol->address()); + symbol.m_module = module_symbol->handle(); + } else { + symbol.m_address = address; + } + + symbol.on_create(); + + CCC_ASSERT(symbol.source().valid()); + + link_address_map(symbol); + link_name_map(symbol); + + return &symbol; +} + +template +Result SymbolList::create_symbol( + std::string name, SymbolSourceHandle source, const Module* module_symbol) +{ + return create_symbol(std::move(name), Address(), source, module_symbol); +} + +template +Result SymbolList::create_symbol( + std::string name, SymbolSourceHandle source, const Module* module_symbol, Address address, u32 importer_flags, DemanglerFunctions demangler) +{ + static const int DMGL_PARAMS = 1 << 0; + static const int DMGL_RET_POSTFIX = 1 << 5; + + std::string demangled_name; + if constexpr(SymbolType::FLAGS & NAME_NEEDS_DEMANGLING) { + if((importer_flags & DONT_DEMANGLE_NAMES) == 0 && demangler.cplus_demangle) { + int demangler_flags = 0; + if(importer_flags & DEMANGLE_PARAMETERS) demangler_flags |= DMGL_PARAMS; + if(importer_flags & DEMANGLE_RETURN_TYPE) demangler_flags |= DMGL_RET_POSTFIX; + char* demangled_name_ptr = demangler.cplus_demangle(name.c_str(), demangler_flags); + if(demangled_name_ptr) { + demangled_name = demangled_name_ptr; + free(reinterpret_cast(demangled_name_ptr)); + } + } + } + + std::string& non_mangled_name = demangled_name.empty() ? name : demangled_name; + + Result symbol = create_symbol(non_mangled_name, address, source, module_symbol); + CCC_RETURN_IF_ERROR(symbol); + + if constexpr(SymbolType::FLAGS & NAME_NEEDS_DEMANGLING) { + if(!demangled_name.empty()) { + (*symbol)->set_mangled_name(name); + } + } + + return symbol; +} + +template +bool SymbolList::move_symbol(SymbolHandle handle, Address new_address) +{ + SymbolType* symbol = symbol_from_handle(handle); + if(!symbol) { + return false; + } + + if(symbol->address() != new_address) { + unlink_address_map(*symbol); + symbol->m_address = new_address; + link_address_map(*symbol); + } + + return true; +} + +template +bool SymbolList::rename_symbol(SymbolHandle handle, std::string new_name) +{ + SymbolType* symbol = symbol_from_handle(handle); + if(!symbol) { + return false; + } + + if(symbol->name() != new_name) { + unlink_name_map(*symbol); + symbol->m_name = std::move(new_name); + link_name_map(*symbol); + } + + return true; +} + +template +void SymbolList::merge_from(SymbolList& list) +{ + m_address_to_handle.clear(); + m_name_to_handle.clear(); + + std::vector lhs = std::move(m_symbols); + std::vector rhs = std::move(list.m_symbols); + + m_symbols = std::vector(); + m_symbols.reserve(lhs.size() + rhs.size()); + + size_t lhs_pos = 0; + size_t rhs_pos = 0; + for(;;) { + SymbolType* symbol; + if(lhs_pos < lhs.size() && (rhs_pos >= rhs.size() || lhs[lhs_pos].handle() < rhs[rhs_pos].handle())) { + symbol = &m_symbols.emplace_back(std::move(lhs[lhs_pos++])); + } else if(rhs_pos < rhs.size()) { + symbol = &m_symbols.emplace_back(std::move(rhs[rhs_pos++])); + } else { + break; + } + + link_address_map(*symbol); + link_name_map(*symbol); + } + + CCC_ASSERT(m_symbols.size() == lhs.size() + rhs.size()); + + list.m_symbols.clear(); + list.m_address_to_handle.clear(); + list.m_name_to_handle.clear(); +} + +template +bool SymbolList::mark_symbol_for_destruction(SymbolHandle handle, SymbolDatabase* database) +{ + SymbolType* symbol = symbol_from_handle(handle); + if(!symbol) { + return false; + } + + symbol->mark_for_destruction(); + + symbol->on_destroy(database); + + return true; +} + +template +void SymbolList::mark_symbols_from_source_for_destruction(SymbolSourceHandle source, SymbolDatabase* database) +{ + for(SymbolType& symbol : m_symbols) { + if(symbol.source() != source) { + continue; + } + + symbol.mark_for_destruction(); + + symbol.on_destroy(database); + } +} + +template +void SymbolList::mark_symbols_from_module_for_destruction(ModuleHandle module_handle, SymbolDatabase* database) +{ + for(SymbolType& symbol : m_symbols) { + if(symbol.module_handle() != module_handle) { + continue; + } + + symbol.mark_for_destruction(); + + symbol.on_destroy(database); + } +} + +template +void SymbolList::destroy_marked_symbols() +{ + std::vector remaining_symbols; + for(SymbolType& symbol : m_symbols) { + if(symbol.m_marked_for_destruction) { + unlink_address_map(symbol); + unlink_name_map(symbol); + } else { + remaining_symbols.emplace_back(std::move(symbol)); + } + } + + m_symbols = std::move(remaining_symbols); +} + +template +void SymbolList::clear() +{ + m_symbols.clear(); + m_address_to_handle.clear(); + m_name_to_handle.clear(); +} + +template +size_t SymbolList::binary_search(SymbolHandle handle) const +{ + size_t begin = 0; + size_t end = m_symbols.size(); + + while(begin < end) { + size_t mid = (begin + end) / 2; + if(m_symbols[mid].handle() < handle) { + begin = mid + 1; + } else if(m_symbols[mid].handle() > handle) { + end = mid; + } else { + return mid; + } + } + + return end; +} + +template +void SymbolList::link_address_map(SymbolType& symbol) +{ + if constexpr((SymbolType::FLAGS & WITH_ADDRESS_MAP)) { + if(symbol.address().valid()) { + m_address_to_handle.emplace(symbol.address().value, symbol.handle()); + } + } +} + +template +void SymbolList::unlink_address_map(SymbolType& symbol) +{ + if constexpr(SymbolType::FLAGS & WITH_ADDRESS_MAP) { + if(symbol.address().valid()) { + auto iterators = m_address_to_handle.equal_range(symbol.address().value); + for(auto iterator = iterators.first; iterator != iterators.second; iterator++) { + if(iterator->second == symbol.handle()) { + m_address_to_handle.erase(iterator); + break; + } + } + } + } +} + +template +void SymbolList::link_name_map(SymbolType& symbol) +{ + if constexpr(SymbolType::FLAGS & WITH_NAME_MAP) { + m_name_to_handle.emplace(symbol.name(), symbol.handle()); + } +} + +template +void SymbolList::unlink_name_map(SymbolType& symbol) +{ + if constexpr(SymbolType::FLAGS & WITH_NAME_MAP) { + auto iterators = m_name_to_handle.equal_range(symbol.name()); + for(auto iterator = iterators.first; iterator != iterators.second; iterator++) { + if(iterator->second == symbol.handle()) { + m_name_to_handle.erase(iterator); + break; + } + } + } +} + +template +std::atomic SymbolList::m_next_handle = 0; + +#define CCC_X(SymbolType, symbol_list) template class SymbolList; +CCC_FOR_EACH_SYMBOL_TYPE_DO_X +#undef CCC_X + +// ***************************************************************************** + +void Symbol::set_type(std::unique_ptr type) +{ + m_type = std::move(type); + invalidate_node_handles(); +} + +// ***************************************************************************** + +const char* global_storage_location_to_string(GlobalStorageLocation location) +{ + switch(location) { + case NIL: return "nil"; + case DATA: return "data"; + case BSS: return "bss"; + case ABS: return "abs"; + case SDATA: return "sdata"; + case SBSS: return "sbss"; + case RDATA: return "rdata"; + case COMMON: return "common"; + case SCOMMON: return "scommon"; + case SUNDEFINED: return "sundefined"; + } + return ""; +} + +// ***************************************************************************** + +const std::optional>& Function::parameter_variables() const +{ + return m_parameter_variables; +} + +void Function::set_parameter_variables( + std::optional> parameter_variables, SymbolDatabase& database) +{ + if(m_parameter_variables.has_value()) { + for(ParameterVariableHandle parameter_variable_handle : *m_parameter_variables) { + ParameterVariable* parameter_variable = database.parameter_variables.symbol_from_handle(parameter_variable_handle); + if(parameter_variable && parameter_variable->m_function == handle()) { + parameter_variable->m_function = FunctionHandle(); + } + } + } + + m_parameter_variables = std::move(parameter_variables); + + if(m_parameter_variables.has_value()) { + for(ParameterVariableHandle parameter_variable_handle : *m_parameter_variables) { + ParameterVariable* parameter_variable = database.parameter_variables.symbol_from_handle(parameter_variable_handle); + if(parameter_variable) { + parameter_variable->m_function = handle(); + } + } + } +} + +const std::optional>& Function::local_variables() const +{ + return m_local_variables; +} + +void Function::set_local_variables( + std::optional> local_variables, SymbolDatabase& database) +{ + if(m_local_variables.has_value()) { + for(LocalVariableHandle local_variable_handle : *m_local_variables) { + LocalVariable* local_variable = database.local_variables.symbol_from_handle(local_variable_handle); + if(local_variable && local_variable->m_function == handle()) { + local_variable->m_function = FunctionHandle(); + } + } + } + + m_local_variables = std::move(local_variables); + + if(m_local_variables.has_value()) { + for(LocalVariableHandle local_variable_handle : *m_local_variables) { + LocalVariable* local_variable = database.local_variables.symbol_from_handle(local_variable_handle); + if(local_variable) { + local_variable->m_function = handle(); + } + } + } +} + +const std::string& Function::mangled_name() const +{ + if(!m_mangled_name.empty()) { + return m_mangled_name; + } else { + return name(); + } +} + +void Function::set_mangled_name(std::string mangled) +{ + m_mangled_name = std::move(mangled); +} + +u32 Function::original_hash() const +{ + return m_original_hash; +} + +void Function::set_original_hash(u32 hash) +{ + m_original_hash = hash; +} + +u32 Function::current_hash() const +{ + return m_current_hash; +} + +void Function::set_current_hash(FunctionHash hash) +{ + m_current_hash = hash.get(); +} + +void Function::on_destroy(SymbolDatabase* database) +{ + if(!database) { + return; + } + + if(m_parameter_variables.has_value()) { + for(ParameterVariableHandle parameter_variable : *m_parameter_variables) { + database->parameter_variables.mark_symbol_for_destruction(parameter_variable, database); + } + } + + if(m_local_variables.has_value()) { + for(LocalVariableHandle local_variable : *m_local_variables) { + database->local_variables.mark_symbol_for_destruction(local_variable, database); + } + } +} + +// ***************************************************************************** + +const std::string& GlobalVariable::mangled_name() const +{ + if(!m_mangled_name.empty()) { + return m_mangled_name; + } else { + return name(); + } +} + +void GlobalVariable::set_mangled_name(std::string mangled) +{ + m_mangled_name = std::move(mangled); +} + +// ***************************************************************************** + +void Module::on_create() +{ + m_module = m_handle; +} + +// ***************************************************************************** + +bool Section::contains_code() const +{ + return name() == ".text"; +} + +bool Section::contains_data() const +{ + return name() == ".bss" + || name() == ".data" + || name() == ".lit" + || name() == ".lita" + || name() == ".lit4" + || name() == ".lit8" + || name() == ".rdata" + || name() == ".rodata" + || name() == ".sbss" + || name() == ".sdata"; +} + +// ***************************************************************************** + +const std::vector& SourceFile::functions() const +{ + return m_functions; +} + +void SourceFile::set_functions(std::vector functions, SymbolDatabase& database) +{ + for(FunctionHandle function_handle : m_functions) { + Function* function = database.functions.symbol_from_handle(function_handle); + if(function && function->m_source_file == handle()) { + function->m_source_file = SourceFileHandle(); + } + } + + m_functions = std::move(functions); + + for(FunctionHandle function_handle : m_functions) { + Function* function = database.functions.symbol_from_handle(function_handle); + if(function) { + function->m_source_file = handle(); + } + } +} + +const std::vector& SourceFile::global_variables() const +{ + return m_global_variables; +} + +void SourceFile::set_global_variables(std::vector global_variables, SymbolDatabase& database) +{ + for(GlobalVariableHandle global_variable_handle : m_global_variables) { + GlobalVariable* global_variable = database.global_variables.symbol_from_handle(global_variable_handle); + if(global_variable && global_variable->m_source_file == handle()) { + global_variable->m_source_file = SourceFileHandle(); + } + } + + m_global_variables = std::move(global_variables); + + for(GlobalVariableHandle global_variable_handle : m_global_variables) { + GlobalVariable* global_variable = database.global_variables.symbol_from_handle(global_variable_handle); + if(global_variable) { + global_variable->m_source_file = handle(); + } + } +} + +bool SourceFile::functions_match() const +{ + return m_functions_match; +} + +void SourceFile::check_functions_match(const SymbolDatabase& database) +{ + u32 matching = 0; + u32 modified = 0; + for(FunctionHandle function_handle : functions()) { + const ccc::Function* function = database.functions.symbol_from_handle(function_handle); + if(!function || function->original_hash() == 0) { + continue; + } + + if(function->current_hash() == function->original_hash()) { + matching++; + } else { + modified++; + } + } + + m_functions_match = matching >= modified; +} + +void SourceFile::on_destroy(SymbolDatabase* database) +{ + if(!database) { + return; + } + + for(FunctionHandle function : m_functions) { + database->functions.mark_symbol_for_destruction(function, database); + } + + for(GlobalVariableHandle global_variable : m_global_variables) { + database->global_variables.mark_symbol_for_destruction(global_variable, database); + } +} + +// ***************************************************************************** + +void SymbolSource::on_create() +{ + m_source = m_handle; +} + +// ***************************************************************************** + +bool SymbolGroup::is_in_group(const Symbol& symbol) const +{ + return symbol.source() == source && symbol.module_handle() == ModuleHandle(module_symbol); +} + +// ***************************************************************************** + +s32 SymbolDatabase::symbol_count() const +{ + s32 sum = 0; + #define CCC_X(SymbolType, symbol_list) sum += symbol_list.size(); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + return sum; +} + +const Symbol* SymbolDatabase::symbol_starting_at_address( + Address address, u32 descriptors, SymbolDescriptor* descriptor_out) const +{ + #define CCC_X(SymbolType, symbol_list) \ + if constexpr(SymbolType::FLAGS & WITH_ADDRESS_MAP) { \ + if(descriptors & SymbolType::DESCRIPTOR) { \ + const SymbolHandle handle = symbol_list.first_handle_from_starting_address(address); \ + const SymbolType* symbol = symbol_list.symbol_from_handle(handle); \ + if(symbol) { \ + if(descriptor_out) { \ + *descriptor_out = SymbolType::DESCRIPTOR; \ + } \ + return symbol; \ + } \ + } \ + } + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + return nullptr; +} + +const Symbol* SymbolDatabase::symbol_after_address( + Address address, u32 descriptors, SymbolDescriptor* descriptor_out) const +{ + const Symbol* result = nullptr; + #define CCC_X(SymbolType, symbol_list) \ + if constexpr(SymbolType::FLAGS & WITH_ADDRESS_MAP) { \ + if(descriptors & SymbolType::DESCRIPTOR) { \ + const SymbolHandle handle = symbol_list.first_handle_after_address(address); \ + const SymbolType* symbol = symbol_list.symbol_from_handle(handle); \ + if(symbol && (!result || symbol->address() < result->address())) { \ + if(descriptor_out) { \ + *descriptor_out = SymbolType::DESCRIPTOR; \ + } \ + result = symbol; \ + } \ + } \ + } + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + return result; +} + +const Symbol* SymbolDatabase::symbol_overlapping_address( + Address address, u32 descriptors, SymbolDescriptor* descriptor_out) const +{ + #define CCC_X(SymbolType, symbol_list) \ + if constexpr(SymbolType::FLAGS & WITH_ADDRESS_MAP) { \ + if(descriptors & SymbolType::DESCRIPTOR) { \ + const SymbolType* symbol = symbol_list.symbol_overlapping_address(address); \ + if(symbol) { \ + if(descriptor_out) { \ + *descriptor_out = SymbolType::DESCRIPTOR; \ + } \ + return symbol; \ + } \ + } \ + } + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + return nullptr; +} + +const Symbol* SymbolDatabase::symbol_with_name( + const std::string& name, u32 descriptors, SymbolDescriptor* descriptor_out) const +{ + #define CCC_X(SymbolType, symbol_list) \ + if constexpr(SymbolType::FLAGS & WITH_ADDRESS_MAP) { \ + if(descriptors & SymbolType::DESCRIPTOR) { \ + const SymbolHandle handle = symbol_list.first_handle_from_name(name); \ + const SymbolType* symbol = symbol_list.symbol_from_handle(handle); \ + if(symbol) { \ + if(descriptor_out) { \ + *descriptor_out = SymbolType::DESCRIPTOR; \ + } \ + return symbol; \ + } \ + } \ + } + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + return nullptr; +} + +Result SymbolDatabase::get_symbol_source(const std::string& name) +{ + SymbolSourceHandle handle = symbol_sources.first_handle_from_name(name); + if(!handle.valid()) { + Result source = symbol_sources.create_symbol(name, SymbolSourceHandle(), nullptr); + CCC_RETURN_IF_ERROR(source); + handle = (*source)->handle(); + } + return handle; +} + +Result SymbolDatabase::create_data_type_if_unique( + std::unique_ptr node, + StabsTypeNumber number, + const char* name, + SourceFile& source_file, + const SymbolGroup& group) +{ + auto types_with_same_name = data_types.handles_from_name(name); + const char* compare_fail_reason = nullptr; + if(types_with_same_name.begin() == types_with_same_name.end()) { + // No types with this name have previously been processed. + Result data_type = data_types.create_symbol(name, group.source, group.module_symbol); + CCC_RETURN_IF_ERROR(data_type); + + (*data_type)->files = {source_file.handle()}; + if(number.type > -1) { + source_file.stabs_type_number_to_handle[number] = (*data_type)->handle(); + } + + (*data_type)->set_type(std::move(node)); + + return *data_type; + } else { + // Types with this name have previously been processed, we need to + // figure out if this one matches any of the previous ones. + bool match = false; + for(auto [key, existing_type_handle] : types_with_same_name) { + DataType* existing_type = data_types.symbol_from_handle(existing_type_handle); + CCC_ASSERT(existing_type); + + // We don't want to merge together types from different sources or + // modules so that we can destroy all the types from one source + // without breaking anything else. + if(!group.is_in_group(*existing_type)) { + continue; + } + + CCC_ASSERT(existing_type->type()); + ast::CompareResult compare_result = compare_nodes(*existing_type->type(), *node.get(), this, true); + if(compare_result.type == ast::CompareResultType::DIFFERS) { + // The new node doesn't match this existing node. + bool is_anonymous_enum = existing_type->type()->descriptor == ast::ENUM + && existing_type->name().empty(); + if(!is_anonymous_enum) { + existing_type->compare_fail_reason = compare_fail_reason_to_string(compare_result.fail_reason); + compare_fail_reason = compare_fail_reason_to_string(compare_result.fail_reason); + } + } else { + // The new node matches this existing node. + existing_type->files.emplace_back(source_file.handle()); + if(number.type > -1) { + source_file.stabs_type_number_to_handle[number] = existing_type->handle(); + } + if(compare_result.type == ast::CompareResultType::MATCHES_FAVOUR_RHS) { + // The new node almost matches the old one, but the new one + // is slightly better, so we replace the old type. + existing_type->set_type(std::move(node)); + } + match = true; + break; + } + } + + if(!match) { + // This type doesn't match any of the others with the same name + // that have already been processed. + Result data_type = data_types.create_symbol(name, group.source, group.module_symbol); + CCC_RETURN_IF_ERROR(data_type); + + (*data_type)->files = {source_file.handle()}; + if(number.type > -1) { + source_file.stabs_type_number_to_handle[number] = (*data_type)->handle(); + } + (*data_type)->compare_fail_reason = compare_fail_reason; + + (*data_type)->set_type(std::move(node)); + + return *data_type; + } + } + + return nullptr; +} + +void SymbolDatabase::merge_from(SymbolDatabase& database) +{ + #define CCC_X(SymbolType, symbol_list) symbol_list.merge_from(database.symbol_list); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X +} + +void SymbolDatabase::destroy_symbols_from_source(SymbolSourceHandle source, bool destroy_descendants) +{ + SymbolDatabase* database = destroy_descendants ? this : nullptr; + + #define CCC_X(SymbolType, symbol_list) symbol_list.mark_symbols_from_source_for_destruction(source, database); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + + destroy_marked_symbols(); +} + +void SymbolDatabase::destroy_symbols_from_module(ModuleHandle module_handle, bool destroy_descendants) +{ + SymbolDatabase* database = destroy_descendants ? this : nullptr; + + #define CCC_X(SymbolType, symbol_list) symbol_list.mark_symbols_from_module_for_destruction(module_handle, database); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + + destroy_marked_symbols(); +} + +void SymbolDatabase::destroy_marked_symbols() +{ + #define CCC_X(SymbolType, symbol_list) symbol_list.destroy_marked_symbols(); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X +} + +void SymbolDatabase::clear() +{ + #define CCC_X(SymbolType, symbol_list) symbol_list.clear(); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X +} + +// ***************************************************************************** + +MultiSymbolHandle::MultiSymbolHandle() {} + +template +MultiSymbolHandle::MultiSymbolHandle(const SymbolType& symbol) + : MultiSymbolHandle(SymbolType::DESCRIPTOR, symbol.raw_handle()) {} + +MultiSymbolHandle::MultiSymbolHandle(SymbolDescriptor descriptor, u32 handle) + : m_descriptor(descriptor) + , m_handle(handle) {} + +bool MultiSymbolHandle::valid() const +{ + return m_handle != (u32) -1; +} + +SymbolDescriptor MultiSymbolHandle::descriptor() const +{ + return m_descriptor; +} + +u32 MultiSymbolHandle::handle() const +{ + return m_handle; +} + +Symbol* MultiSymbolHandle::lookup_symbol(SymbolDatabase& database) +{ + if(m_handle == (u32) -1) { + return nullptr; + } + + switch(m_descriptor) { + #define CCC_X(SymbolType, symbol_list) \ + case SymbolType::DESCRIPTOR: \ + return database.symbol_list.symbol_from_handle(m_handle); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + } + + return nullptr; +} + +const Symbol* MultiSymbolHandle::lookup_symbol(const SymbolDatabase& database) const +{ + return const_cast(this)->lookup_symbol(const_cast(database)); +} + +bool MultiSymbolHandle::is_flag_set(SymbolFlag flag) const +{ + if(m_handle != (u32) -1) { + switch(m_descriptor) { + #define CCC_X(SymbolType, symbol_list) \ + case SymbolType::DESCRIPTOR: \ + return SymbolType::FLAGS & flag; + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + } + } + + return false; +} + +bool MultiSymbolHandle::move_symbol(Address new_address, SymbolDatabase& database) const +{ + if(m_handle != (u32) -1) { + switch(m_descriptor) { + #define CCC_X(SymbolType, symbol_list) \ + case SymbolType::DESCRIPTOR: \ + return database.symbol_list.move_symbol(m_handle, new_address); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + } + } + + return false; +} + +bool MultiSymbolHandle::rename_symbol(std::string new_name, SymbolDatabase& database) const +{ + if(m_handle != (u32) -1) { + switch(m_descriptor) { + #define CCC_X(SymbolType, symbol_list) \ + case SymbolType::DESCRIPTOR: \ + return database.symbol_list.rename_symbol(m_handle, std::move(new_name)); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + } + } + + return false; +} + +bool MultiSymbolHandle::destroy_symbol(SymbolDatabase& database, bool destroy_descendants) const +{ + bool success = false; + + if(m_handle != (u32) -1) { + SymbolDatabase* database_ptr = destroy_descendants ? &database : nullptr; + + switch(m_descriptor) { + #define CCC_X(SymbolType, symbol_list) \ + case SymbolType::DESCRIPTOR: \ + success = database.symbol_list.mark_symbol_for_destruction(m_handle, database_ptr); \ + break; + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + } + } + + if(success) { + database.destroy_marked_symbols(); + } + + return success; +} + +#define CCC_X(SymbolType, symbol_list) template MultiSymbolHandle::MultiSymbolHandle(const SymbolType& symbol); +CCC_FOR_EACH_SYMBOL_TYPE_DO_X +#undef CCC_X + +// ***************************************************************************** + +NodeHandle::NodeHandle() {} + +NodeHandle::NodeHandle(const ast::Node* node) + : m_node(node) {} + +template +NodeHandle::NodeHandle(const SymbolType& symbol, const ast::Node* node) + : NodeHandle(SymbolType::DESCRIPTOR, symbol, node) {} + +NodeHandle::NodeHandle(SymbolDescriptor descriptor, const Symbol& symbol, const ast::Node* node) + : m_symbol(descriptor, symbol.raw_handle()) + , m_node(node) + , m_generation(symbol.generation()) {} + +bool NodeHandle::valid() const +{ + return m_node != nullptr; +} + +const MultiSymbolHandle& NodeHandle::symbol() const +{ + return m_symbol; +} + +const ast::Node* NodeHandle::lookup_node(const SymbolDatabase& database) const +{ + if(m_symbol.valid()) { + const Symbol* symbol = m_symbol.lookup_symbol(database); + if(!symbol || symbol->generation() != m_generation) { + return nullptr; + } + } + return m_node; +} + +NodeHandle NodeHandle::handle_for_child(const ast::Node* child_node) const +{ + NodeHandle child_handle; + child_handle.m_symbol = m_symbol; + child_handle.m_node = child_node; + child_handle.m_generation = m_generation; + return child_handle; +} + +#define CCC_X(SymbolType, symbol_list) template NodeHandle::NodeHandle(const SymbolType& symbol, const ast::Node* node); +CCC_FOR_EACH_SYMBOL_TYPE_DO_X +#undef CCC_X + +} diff --git a/3rdparty/ccc/src/ccc/symbol_database.h b/3rdparty/ccc/src/ccc/symbol_database.h new file mode 100644 index 0000000000..52c6f1ece4 --- /dev/null +++ b/3rdparty/ccc/src/ccc/symbol_database.h @@ -0,0 +1,721 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include + +#include "util.h" + +namespace ccc { + +// An X macro for all the symbol types. +#define CCC_FOR_EACH_SYMBOL_TYPE_DO_X \ + CCC_X(DataType, data_types) \ + CCC_X(Function, functions) \ + CCC_X(GlobalVariable, global_variables) \ + CCC_X(Label, labels) \ + CCC_X(LocalVariable, local_variables) \ + CCC_X(Module, modules) \ + CCC_X(ParameterVariable, parameter_variables) \ + CCC_X(Section, sections) \ + CCC_X(SourceFile, source_files) \ + CCC_X(SymbolSource, symbol_sources) + +// An enum for all the symbol types. +enum SymbolDescriptor { + DATA_TYPE = 1 << 0, + FUNCTION = 1 << 1, + GLOBAL_VARIABLE = 1 << 2, + LABEL = 1 << 3, + LOCAL_VARIABLE = 1 << 4, + MODULE = 1 << 5, + PARAMETER_VARIABLE = 1 << 6, + SECTION = 1 << 7, + SOURCE_FILE = 1 << 8, + SYMBOL_SOURCE = 1 << 9 +}; + +enum { + ALL_SYMBOL_TYPES = 0xffff +}; + +// Forward declare all the different types of symbol objects. +#define CCC_X(SymbolType, symbol_list) class SymbolType; +CCC_FOR_EACH_SYMBOL_TYPE_DO_X +#undef CCC_X + +class SymbolDatabase; + +// Strongly typed handles for all of the symbol objects. These are here to solve +// the problem of dangling references to symbols. +template +struct SymbolHandle { + u32 value = (u32) -1; + + SymbolHandle() {} + SymbolHandle(u32 v) : value(v) {} + SymbolHandle(const SymbolType* symbol) + : value(symbol ? symbol->handle().value : (u32) -1) {} + + // Check if this symbol handle has been initialised. Note that this doesn't + // determine whether or not the symbol it points to has been deleted! + bool valid() const { return value != (u32) -1; } + + friend auto operator<=>(const SymbolHandle& lhs, const SymbolHandle& rhs) = default; +}; + +#define CCC_X(SymbolType, symbol_list) using SymbolType##Handle = SymbolHandle; +CCC_FOR_EACH_SYMBOL_TYPE_DO_X +#undef CCC_X + +enum SymbolFlag { + NO_SYMBOL_FLAGS = 0, + WITH_ADDRESS_MAP = 1 << 0, + WITH_NAME_MAP = 1 << 1, + NAME_NEEDS_DEMANGLING = 1 << 2 +}; + +// A container class for symbols of a given type that maintains maps of their +// names and addresses depending on the value of SymbolType::FLAGS. +template +class SymbolList { +public: + // Lookup symbols from their handles using binary search. + SymbolType* symbol_from_handle(SymbolHandle handle); + const SymbolType* symbol_from_handle(SymbolHandle handle) const; + + // Lookup multiple symbols from their handles using binary search. + std::vector symbols_from_handles(const std::vector>& handles); + std::vector symbols_from_handles(const std::vector>& handles) const; + std::vector optional_symbols_from_handles(const std::optional>>& handles); + std::vector optional_symbols_from_handles(const std::optional>>& handles) const; + + using Iterator = typename std::vector::iterator; + using ConstIterator = typename std::vector::const_iterator; + + // For iterating over all the symbols. + Iterator begin(); + ConstIterator begin() const; + Iterator end(); + ConstIterator end() const; + + using AddressToHandleMap = std::multimap>; + using NameToHandleMap = std::multimap>; + + template + class Iterators { + public: + Iterators(Iterator b, Iterator e) + : m_begin(b), m_end(e) {} + Iterator begin() const { return m_begin; } + Iterator end() const { return m_end; } + protected: + Iterator m_begin; + Iterator m_end; + }; + + using AddressToHandleMapIterators = Iterators; + using NameToHandleMapIterators = Iterators; + + // Lookup symbols by their address. + AddressToHandleMapIterators handles_from_starting_address(Address address) const; + AddressToHandleMapIterators handles_from_address_range(AddressRange range) const; + SymbolHandle first_handle_from_starting_address(Address address) const; + SymbolHandle first_handle_after_address(Address address) const; + + // Lookup symbols by their name. + NameToHandleMapIterators handles_from_name(const std::string& name) const; + SymbolHandle first_handle_from_name(const std::string& name) const; + + // Find a symbol with an address range that contains the provided address. + // For example, to find which function an instruction belongs to. + SymbolType* symbol_overlapping_address(Address address); + const SymbolType* symbol_overlapping_address(Address address) const; + + // Convert handles to underlying array indices. + s32 index_from_handle(SymbolHandle handle) const; + + // Index into the underlying array. + SymbolType& symbol_from_index(s32 index); + const SymbolType& symbol_from_index(s32 index) const; + + // Determine if any symbols are being stored. + bool empty() const; + + // Retrieve the number of symbols stored. + s32 size() const; + + // Create a new symbol. If it's a SymbolSource symbol, source can be left + // empty, otherwise it has to be valid. + Result create_symbol( + std::string name, Address address, SymbolSourceHandle source, const Module* module_symbol = nullptr); + + // Create a new symbol. Similar to above, but for symbols without addresses. + Result create_symbol( + std::string name, SymbolSourceHandle source, const Module* module_symbol = nullptr); + + // Create a new symbol. Similar to above, but unless DONT_DEMANGLE_NAMES is + // set, the name of the symbol will be demangled. + Result create_symbol( + std::string name, + SymbolSourceHandle source, + const Module* module_symbol, + Address address, + u32 importer_flags, + DemanglerFunctions demangler); + + // Update the address of a symbol without changing its handle. + bool move_symbol(SymbolHandle handle, Address new_address); + + // Update the name of a symbol without changing its handle. + bool rename_symbol(SymbolHandle handle, std::string new_name); + + // Move all the symbols from the passed list into this list. + void merge_from(SymbolList& list); + + // Mark a symbol for destruction. If the correct symbol database pointer is + // passed, all descendants will also be marked. For example, marking a + // function will also mark its parameters and local variables. + bool mark_symbol_for_destruction(SymbolHandle handle, SymbolDatabase* database); + + // Mark all the symbols from a given symbol source for destruction. For + // example you can use this to free a symbol table without destroying + // user-defined symbols. The behaviour for marking descendants is the same + // as destroy_symbol. + void mark_symbols_from_source_for_destruction(SymbolSourceHandle source, SymbolDatabase* database); + + // Mark all the symbols from a given module for destruction. The behaviour + // for marking descendants is the same as destroy_symbol. + void mark_symbols_from_module_for_destruction(ModuleHandle module_handle, SymbolDatabase* database); + + // Destroy all symbols that have previously been marked for destruction. + // This invalidates all pointers to symbols in this list. + void destroy_marked_symbols(); + + // Destroy all symbols, but don't reset m_next_handle so we don't have to + // worry about dangling handles. + void clear(); + +protected: + // Do a binary search for a handle, and return either its index, or the + // index where it could be inserted. + size_t binary_search(SymbolHandle handle) const; + + // Keep the address map in sync with the symbol list. + void link_address_map(SymbolType& symbol); + void unlink_address_map(SymbolType& symbol); + + // Keep the name map in sync with the symbol list. + void link_name_map(SymbolType& symbol); + void unlink_name_map(SymbolType& symbol); + + std::vector m_symbols; + AddressToHandleMap m_address_to_handle; + NameToHandleMap m_name_to_handle; + + // We share this between symbol lists of the same type so that we can merge + // them without having to rewrite all the handles. + static std::atomic m_next_handle; +}; + +// Base class for all the symbols. +class Symbol { + template + friend class SymbolList; +public: + const std::string& name() const { return m_name; } + u32 raw_handle() const { return m_handle; } + SymbolSourceHandle source() const { return m_source; } + ModuleHandle module_handle() const { return m_module; } + + Address address() const { return m_address; } + u32 size() const { return m_size; } + void set_size(u32 size) { m_size = size; } + AddressRange address_range() const { return AddressRange(m_address, m_address.get_or_zero() + m_size); } + + ast::Node* type() { return m_type.get(); } + const ast::Node* type() const { return m_type.get(); } + void set_type(std::unique_ptr type); + + u32 generation() const { return m_generation; } + + // This MUST be called after any AST nodes have been created/deleted/moved. + // For the set_type function this is done for you. + void invalidate_node_handles() { m_generation++; } + + // Mark a single symbol for destruction, not including its descendants. + void mark_for_destruction() { m_marked_for_destruction = true; } + bool is_marked_for_destruction() { return m_marked_for_destruction; } + +protected: + void on_create() {} + void on_destroy(SymbolDatabase* database) {} + + u32 m_handle = (u32) -1; + SymbolSourceHandle m_source; + Address m_address; + u32 m_size = 0; + std::string m_name; + std::unique_ptr m_type; + u32 m_generation : 31 = 0; + u32 m_marked_for_destruction : 1 = false; + ModuleHandle m_module; +}; + +// Variable storage types. This is different to whether the variable is a +// global, local or parameter. For example local variables can have global +// storage (static locals). + +enum GlobalStorageLocation { + NIL, + DATA, + BSS, + ABS, + SDATA, + SBSS, + RDATA, + COMMON, + SCOMMON, + SUNDEFINED +}; + +const char* global_storage_location_to_string(GlobalStorageLocation location); + +struct GlobalStorage { + GlobalStorageLocation location = GlobalStorageLocation::NIL; + + GlobalStorage() {} + friend auto operator<=>(const GlobalStorage& lhs, const GlobalStorage& rhs) = default; +}; + +struct RegisterStorage { + s32 dbx_register_number = -1; + bool is_by_reference; + + RegisterStorage() {} + friend auto operator<=>(const RegisterStorage& lhs, const RegisterStorage& rhs) = default; +}; + +struct StackStorage { + s32 stack_pointer_offset = -1; + + StackStorage() {} + friend auto operator<=>(const StackStorage& lhs, const StackStorage& rhs) = default; +}; + +// The hashing algorithm for functions. If you change this algorithm make sure +// to bump the version number for the JSON format so we can know if a hash was +// generated using the new algorithm or not. +class FunctionHash { +public: + void update(u32 instruction) + { + // Separate out the opcode so that the hash remains the same regardless + // of if relocations are applied or not. + u32 opcode = instruction >> 26; + m_hash = m_hash * 31 + opcode; + } + + u32 get() const + { + return m_hash; + } + +protected: + u32 m_hash = 0; +}; + +// All the different types of symbol objects. + +// A C/C++ data type. +class DataType : public Symbol { + friend SourceFile; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = DATA_TYPE; + static constexpr const char* NAME = "Data Type"; + static constexpr const u32 FLAGS = WITH_NAME_MAP; + + DataTypeHandle handle() const { return m_handle; } + + std::vector files; // List of files for which a given top-level type is present. + const char* compare_fail_reason = nullptr; + + bool not_defined_in_any_translation_unit : 1 = false; + bool only_defined_in_single_translation_unit : 1 = false; +}; + +// A function. The type stored is the return type. +class Function : public Symbol { + friend SourceFile; + friend SymbolList; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = FUNCTION; + static constexpr const char* NAME = "Function"; + static constexpr const u32 FLAGS = WITH_ADDRESS_MAP | WITH_NAME_MAP | NAME_NEEDS_DEMANGLING; + + FunctionHandle handle() const { return m_handle; } + SourceFileHandle source_file() const { return m_source_file; } + + const std::optional>& parameter_variables() const; + void set_parameter_variables(std::optional> parameter_variables, SymbolDatabase& database); + + const std::optional>& local_variables() const; + void set_local_variables(std::optional> local_variables, SymbolDatabase& database); + + const std::string& mangled_name() const; + void set_mangled_name(std::string mangled); + + // A hash of all the opcodes in the function, read from file. + u32 original_hash() const; + void set_original_hash(u32 hash); + + // A hash of all the opcodes in the function, read from memory. + u32 current_hash() const; + void set_current_hash(FunctionHash hash); + + struct LineNumberPair { + Address address; + s32 line_number; + }; + + struct SubSourceFile { + Address address; + std::string relative_path; + }; + + std::string relative_path; + StorageClass storage_class; + s32 stack_frame_size = -1; + std::vector line_numbers; + std::vector sub_source_files; + bool is_member_function_ish = false; // Filled in by fill_in_pointers_to_member_function_definitions. + bool is_no_return = false; + +protected: + void on_destroy(SymbolDatabase* database); + + SourceFileHandle m_source_file; + std::optional> m_parameter_variables; + std::optional> m_local_variables; + + std::string m_mangled_name; + + u32 m_original_hash = 0; + u32 m_current_hash = 0; +}; + +// A global variable. +class GlobalVariable : public Symbol { + friend SourceFile; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = GLOBAL_VARIABLE; + static constexpr const char* NAME = "Global Variable"; + static constexpr u32 FLAGS = WITH_ADDRESS_MAP | WITH_NAME_MAP | NAME_NEEDS_DEMANGLING; + + GlobalVariableHandle handle() const { return m_handle; } + SourceFileHandle source_file() const { return m_source_file; }; + + const std::string& mangled_name() const; + void set_mangled_name(std::string mangled); + + GlobalStorage storage; + StorageClass storage_class; + +protected: + SourceFileHandle m_source_file; + std::string m_mangled_name; +}; + +// A label. This could be a label defined in assembly, C/C++, or just a symbol +// that we can't automatically determine the type of (e.g. SNDLL symbols). +class Label : public Symbol { +public: + static constexpr const SymbolDescriptor DESCRIPTOR = LABEL; + static constexpr const char* NAME = "Label"; + static constexpr u32 FLAGS = WITH_ADDRESS_MAP; + + LabelHandle handle() const { return m_handle; } + + // Indicates that this label should not be used as a function name. + bool is_junk = false; +}; + +// A local variable. This includes static local variables which have global +// storage. +class LocalVariable : public Symbol { + friend Function; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = LOCAL_VARIABLE; + static constexpr const char* NAME = "Local Variable"; + static constexpr u32 FLAGS = WITH_ADDRESS_MAP; + + LocalVariableHandle handle() const { return m_handle; } + FunctionHandle function() const { return m_function; }; + + std::variant storage; + AddressRange live_range; + +protected: + FunctionHandle m_function; +}; + +// A program module e.g. an ELF file or an SNDLL file. Every symbol has a module +// field indicating what module the symbol belongs to. This can be used to +// delete all the symbols associated with a given module. Additionally, when a +// valid module pointer is passed to SymbolList<>::create_symbol, the address of +// the symbol will be added to the address of the new symbol. +class Module : public Symbol { + friend SymbolList; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = MODULE; + static constexpr const char* NAME = "Module"; + static constexpr u32 FLAGS = WITH_NAME_MAP; + + ModuleHandle handle() const { return m_handle; } + + // These are used for IRX modules. + bool is_irx = false; + s32 version_major = -1; + s32 version_minor = -1; + +protected: + void on_create(); +}; + +// A parameter variable. +class ParameterVariable : public Symbol { + friend Function; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = PARAMETER_VARIABLE; + static constexpr const char* NAME = "Parameter Variable"; + static constexpr u32 FLAGS = NO_SYMBOL_FLAGS; + + ParameterVariableHandle handle() const { return m_handle; } + FunctionHandle function() const { return m_function; }; + + std::variant storage; + +protected: + FunctionHandle m_function; +}; + +// An ELF section. These are created from the ELF section headers. +class Section : public Symbol { +public: + static constexpr const SymbolDescriptor DESCRIPTOR = SECTION; + static constexpr const char* NAME = "Section"; + static constexpr u32 FLAGS = WITH_ADDRESS_MAP | WITH_NAME_MAP; + + SectionHandle handle() const { return m_handle; } + + // Check if the section name is ".text". + bool contains_code() const; + + // Check for known data section names. + bool contains_data() const; +}; + +// A source file (.c or .cpp file). One of these will be created for every +// translation unit in the program (but only if debugging symbols are present). +class SourceFile : public Symbol { + friend SymbolList; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = SOURCE_FILE; + static constexpr const char* NAME = "Source File"; + static constexpr u32 FLAGS = WITH_ADDRESS_MAP | WITH_NAME_MAP; + + SourceFileHandle handle() const { return m_handle; } + const std::string& full_path() const { return name(); } + + const std::vector& functions() const; + void set_functions(std::vector functions, SymbolDatabase& database); + + const std::vector& global_variables() const; + void set_global_variables(std::vector global_variables, SymbolDatabase& database); + + // Check whether at least half of the functions associated with the source + // file match their original hash (meaning they haven't been overwritten). + bool functions_match() const; + void check_functions_match(const SymbolDatabase& database); + + std::string working_dir; + std::string command_line_path; + std::map stabs_type_number_to_handle; + std::set toolchain_version_info; + +protected: + void on_destroy(SymbolDatabase* database); + + std::vector m_functions; + std::vector m_global_variables; + bool m_functions_match = true; +}; + +// A symbol source. Every symbol has a symbol source field indicating how the +// symbol was created. For example, the symbol table importers will each create +// one of these (if it doesn't already exist). +class SymbolSource : public Symbol { + friend SymbolList; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = SYMBOL_SOURCE; + static constexpr const char* NAME = "Symbol Source"; + static constexpr u32 FLAGS = WITH_NAME_MAP; + + SymbolSourceHandle handle() const { return m_handle; } + +protected: + void on_create(); +}; + +// Bundles together all the information needed to identify if a symbol came from +// a specific symbol table import operation. For example, this is used to make +// sure that we don't reference symbols from another symbol table during the +// import process. +struct SymbolGroup { + SymbolSourceHandle source; + Module* module_symbol = nullptr; + + bool is_in_group(const Symbol& symbol) const; +}; + +// The symbol database itself. This owns all the symbols. +class SymbolDatabase { +public: + SymbolList data_types; + SymbolList functions; + SymbolList global_variables; + SymbolList