diff --git a/3rdparty/ccc/CMakeLists.txt b/3rdparty/ccc/CMakeLists.txt new file mode 100644 index 0000000000..7ecffbdca6 --- /dev/null +++ b/3rdparty/ccc/CMakeLists.txt @@ -0,0 +1,41 @@ +cmake_minimum_required(VERSION 3.14) +project(ccc) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +add_library(ccc STATIC + src/ccc/ast.cpp + src/ccc/ast.h + src/ccc/elf.cpp + src/ccc/elf.h + src/ccc/elf_symtab.cpp + src/ccc/elf_symtab.h + src/ccc/importer_flags.cpp + src/ccc/importer_flags.h + src/ccc/mdebug_analysis.cpp + src/ccc/mdebug_analysis.h + src/ccc/mdebug_importer.cpp + src/ccc/mdebug_importer.h + src/ccc/mdebug_section.cpp + src/ccc/mdebug_section.h + src/ccc/mdebug_symbols.cpp + src/ccc/mdebug_symbols.h + src/ccc/sndll.cpp + src/ccc/sndll.h + src/ccc/stabs.cpp + src/ccc/stabs.h + src/ccc/stabs_to_ast.cpp + src/ccc/stabs_to_ast.h + src/ccc/symbol_database.cpp + src/ccc/symbol_database.h + src/ccc/symbol_file.cpp + src/ccc/symbol_file.h + src/ccc/symbol_table.cpp + src/ccc/symbol_table.h + src/ccc/util.cpp + src/ccc/util.h +) + +target_include_directories(ccc PUBLIC src) diff --git a/3rdparty/ccc/README.md b/3rdparty/ccc/README.md new file mode 100644 index 0000000000..9c8f1efecf --- /dev/null +++ b/3rdparty/ccc/README.md @@ -0,0 +1,37 @@ +# Chaos Compiler Collection + +This code was originally developed in the following repository and was copied +into PCSX2 by the author: + +- [https://github.com/chaoticgd/ccc](https://github.com/chaoticgd/ccc) + +It includes additional resources that are not present in the PCSX2 repository. + +## Documentation + +### DWARF (.debug) Section + +- [DWARF Debugging Information Format](https://dwarfstd.org/doc/dwarf_1_1_0.pdf) + +### MIPS Debug (.mdebug) Section + +- [Third Eye Software and the MIPS symbol table (Peter Rowell)](http://datahedron.com/mips.html) +- [MIPS Mdebug Debugging Information (David Anderson, 1996)](https://www.prevanders.net/Mdebug.ps) +- MIPS Assembly Language Programmer's Guide, Symbol Table Chapter (Silicon Graphics, 1992) +- Tru64 UNIX Object File and Symbol Table Format Specification, Symbol Table Chapter +- `mdebugread.c` from gdb (reading) +- `ecoff.c` from gas (writing) +- `include/coff/sym.h` from binutils (headers) + +### MIPS EABI + +- [MIPS EABI](https://sourceware.org/legacy-ml/binutils/2003-06/msg00436.html) + +### STABS + +- [The "stabs" representation of debugging information (Julia Menapace, Jim Kingdon, and David MacKenzie, 1992-???)](https://sourceware.org/gdb/onlinedocs/stabs.html) +- `stabs.c` from binutils (reading) +- `stabsread.c` from gdb (reading) +- `dbxread.c` from gdb (reading) +- `dbxout.c` from gcc (writing) +- `stab.def` from gcc (symbol codes) diff --git a/3rdparty/ccc/ccc.vcxproj b/3rdparty/ccc/ccc.vcxproj new file mode 100644 index 0000000000..16ab210948 --- /dev/null +++ b/3rdparty/ccc/ccc.vcxproj @@ -0,0 +1,75 @@ + + + + + + {2589F8CE-EA77-4B73-911E-64074569795B} + + + + StaticLibrary + $(DefaultPlatformToolset) + ClangCL + MultiByte + true + true + false + + + + + + + + + + + + + + AllRules.ruleset + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TurnOffAllWarnings + $(ProjectDir)src;%(AdditionalIncludeDirectories) + stdcpp20 + + + + + diff --git a/3rdparty/ccc/ccc.vcxproj.filters b/3rdparty/ccc/ccc.vcxproj.filters new file mode 100644 index 0000000000..9f49bde786 --- /dev/null +++ b/3rdparty/ccc/ccc.vcxproj.filters @@ -0,0 +1,111 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + diff --git a/3rdparty/ccc/src/ccc/ast.cpp b/3rdparty/ccc/src/ccc/ast.cpp new file mode 100644 index 0000000000..fb5709d366 --- /dev/null +++ b/3rdparty/ccc/src/ccc/ast.cpp @@ -0,0 +1,562 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "ast.h" + +#include "importer_flags.h" +#include "symbol_database.h" + +namespace ccc::ast { + +static bool compare_nodes_and_merge( + CompareResult& dest, const Node& node_lhs, const Node& node_rhs, const SymbolDatabase* database); +static bool try_to_match_wobbly_typedefs( + const Node& node_lhs, const Node& node_rhs, const SymbolDatabase& database); + +void Node::set_access_specifier(AccessSpecifier specifier, u32 importer_flags) +{ + if((importer_flags & NO_ACCESS_SPECIFIERS) == 0) { + access_specifier = specifier; + } +} + +std::pair Node::physical_type(SymbolDatabase& database, s32 max_depth) +{ + Node* type = this; + DataType* symbol = nullptr; + for(s32 i = 0; i < max_depth && type->descriptor == TYPE_NAME; i++) { + DataType* data_type = database.data_types.symbol_from_handle(type->as().data_type_handle); + if (!data_type || !data_type->type()) { + break; + } + + type = data_type->type(); + symbol = data_type; + } + + return std::pair(type, symbol); +} + +std::pair Node::physical_type(const SymbolDatabase& database, s32 max_depth) const +{ + return const_cast(this)->physical_type(const_cast(database), max_depth); +} + +const char* member_function_modifier_to_string(MemberFunctionModifier modifier) +{ + switch(modifier) { + case MemberFunctionModifier::NONE: return "none"; + case MemberFunctionModifier::STATIC: return "static"; + case MemberFunctionModifier::VIRTUAL: return "virtual"; + } + return ""; +} + +bool StructOrUnion::flatten_fields( + std::vector& output, + const DataType* symbol, + const SymbolDatabase& database, + bool skip_statics, + s32 base_offset, + s32 max_fields, + s32 max_depth) const +{ + if(max_depth == 0) { + return false; + } + + for(const std::unique_ptr& type_name : base_classes) { + if(type_name->descriptor != TYPE_NAME) { + continue; + } + + s32 new_base_offset = base_offset + type_name->offset_bytes; + + DataTypeHandle handle = type_name->as().data_type_handle; + const DataType* base_class_symbol = database.data_types.symbol_from_handle(handle); + if(!base_class_symbol || !base_class_symbol->type() || base_class_symbol->type()->descriptor != STRUCT_OR_UNION) { + continue; + } + + const StructOrUnion& base_class = base_class_symbol->type()->as(); + if(!base_class.flatten_fields(output, base_class_symbol, database, skip_statics, new_base_offset, max_fields, max_depth - 1)) { + return false; + } + } + + for(const std::unique_ptr& field : fields) { + if(skip_statics && field->storage_class == STORAGE_CLASS_STATIC) { + continue; + } + + if((s32) output.size() >= max_fields) { + return false; + } + + FlatField& flat = output.emplace_back(); + flat.node = field.get(); + flat.symbol = symbol; + flat.base_offset = base_offset; + } + + return true; +} + +const char* type_name_source_to_string(TypeNameSource source) +{ + switch(source) { + case TypeNameSource::REFERENCE: return "reference"; + case TypeNameSource::CROSS_REFERENCE: return "cross_reference"; + case TypeNameSource::UNNAMED_THIS: return "this"; + } + return ""; +} + +const char* forward_declared_type_to_string(ForwardDeclaredType type) +{ + switch(type) { + case ForwardDeclaredType::STRUCT: return "struct"; + case ForwardDeclaredType::UNION: return "union"; + case ForwardDeclaredType::ENUM: return "enum"; + } + return ""; +} + +DataTypeHandle TypeName::data_type_handle_unless_forward_declared() const +{ + if(!is_forward_declared) { + return data_type_handle; + } else { + return DataTypeHandle(); + } +} + +CompareResult compare_nodes( + const Node& node_lhs, const Node& node_rhs, const SymbolDatabase* database, bool check_intrusive_fields) +{ + CompareResult result = CompareResultType::MATCHES_NO_SWAP; + + if(node_lhs.descriptor != node_rhs.descriptor) { + return CompareFailReason::DESCRIPTOR; + } + + if(check_intrusive_fields) { + if(node_lhs.storage_class != node_rhs.storage_class) { + // In some cases we can determine that a type was typedef'd for C + // translation units, but not for C++ translation units, so we need + // to add a special case for that here. + if(node_lhs.storage_class == STORAGE_CLASS_TYPEDEF && node_rhs.storage_class == STORAGE_CLASS_NONE) { + result = CompareResultType::MATCHES_FAVOUR_LHS; + } else if(node_lhs.storage_class == STORAGE_CLASS_NONE && node_rhs.storage_class == STORAGE_CLASS_TYPEDEF) { + result = CompareResultType::MATCHES_FAVOUR_RHS; + } else { + return CompareFailReason::STORAGE_CLASS; + } + } + + // Vtable pointers and constructors can sometimes contain type numbers + // that are different between translation units, so we don't want to + // compare them. + bool is_vtable_pointer = node_lhs.is_vtable_pointer && node_rhs.is_vtable_pointer; + bool is_numbered_constructor = node_lhs.name.starts_with("$_") && node_rhs.name.starts_with("$_"); + if(node_lhs.name != node_rhs.name && !is_vtable_pointer && !is_numbered_constructor) { + return CompareFailReason::NAME; + } + + if(node_lhs.offset_bytes != node_rhs.offset_bytes) { + return CompareFailReason::RELATIVE_OFFSET_BYTES; + } + + if(node_lhs.size_bits != node_rhs.size_bits) { + return CompareFailReason::SIZE_BITS; + } + + if(node_lhs.is_const != node_rhs.is_const) { + return CompareFailReason::CONSTNESS; + } + } + + switch(node_lhs.descriptor) { + case ARRAY: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(compare_nodes_and_merge(result, *lhs.element_type.get(), *rhs.element_type.get(), database)) { + return result; + } + + if(lhs.element_count != rhs.element_count) { + return CompareFailReason::ARRAY_ELEMENT_COUNT; + } + + break; + } + case BITFIELD: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(lhs.bitfield_offset_bits != rhs.bitfield_offset_bits) { + return CompareFailReason::BITFIELD_OFFSET_BITS; + } + + if(compare_nodes_and_merge(result, *lhs.underlying_type.get(), *rhs.underlying_type.get(), database)) { + return result; + } + + break; + } + case BUILTIN: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(lhs.bclass != rhs.bclass) { + return CompareFailReason::BUILTIN_CLASS; + } + + break; + } + case ENUM: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(lhs.constants != rhs.constants) { + return CompareFailReason::ENUM_CONSTANTS; + } + + break; + } + case ERROR_NODE: { + break; + } + case FUNCTION: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(lhs.return_type.has_value() != rhs.return_type.has_value()) { + return CompareFailReason::FUNCTION_RETURN_TYPE_HAS_VALUE; + } + + if(lhs.return_type.has_value()) { + if(compare_nodes_and_merge(result, *lhs.return_type->get(), *rhs.return_type->get(), database)) { + return result; + } + } + + if(lhs.parameters.has_value() && rhs.parameters.has_value()) { + if(lhs.parameters->size() != rhs.parameters->size()) { + return CompareFailReason::FUNCTION_PARAMAETER_COUNT; + } + for(size_t i = 0; i < lhs.parameters->size(); i++) { + if(compare_nodes_and_merge(result, *(*lhs.parameters)[i].get(), *(*rhs.parameters)[i].get(), database)) { + return result; + } + } + } else if(lhs.parameters.has_value() != rhs.parameters.has_value()) { + return CompareFailReason::FUNCTION_PARAMETERS_HAS_VALUE; + } + + if(lhs.modifier != rhs.modifier) { + return CompareFailReason::FUNCTION_MODIFIER; + } + + break; + } + case POINTER_OR_REFERENCE: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(lhs.is_pointer != rhs.is_pointer) { + return CompareFailReason::DESCRIPTOR; + } + + if(compare_nodes_and_merge(result, *lhs.value_type.get(), *rhs.value_type.get(), database)) { + return result; + } + + break; + } + case POINTER_TO_DATA_MEMBER: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(compare_nodes_and_merge(result, *lhs.class_type.get(), *rhs.class_type.get(), database)) { + return result; + } + + if(compare_nodes_and_merge(result, *lhs.member_type.get(), *rhs.member_type.get(), database)) { + return result; + } + + break; + } + case STRUCT_OR_UNION: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + if(lhs.is_struct != rhs.is_struct) { + return CompareFailReason::DESCRIPTOR; + } + + if(lhs.base_classes.size() != rhs.base_classes.size()) { + return CompareFailReason::BASE_CLASS_COUNT; + } + + for(size_t i = 0; i < lhs.base_classes.size(); i++) { + if(compare_nodes_and_merge(result, *lhs.base_classes[i].get(), *rhs.base_classes[i].get(), database)) { + return result; + } + } + + if(lhs.fields.size() != rhs.fields.size()) { + return CompareFailReason::FIELDS_SIZE; + } + + for(size_t i = 0; i < lhs.fields.size(); i++) { + if(compare_nodes_and_merge(result, *lhs.fields[i].get(), *rhs.fields[i].get(), database)) { + return result; + } + } + + if(lhs.member_functions.size() != rhs.member_functions.size()) { + return CompareFailReason::MEMBER_FUNCTION_COUNT; + } + + for(size_t i = 0; i < lhs.member_functions.size(); i++) { + if(compare_nodes_and_merge(result, *lhs.member_functions[i].get(), *rhs.member_functions[i].get(), database)) { + return result; + } + } + + break; + } + case TYPE_NAME: { + const auto [lhs, rhs] = Node::as(node_lhs, node_rhs); + + // Don't check the source so that REFERENCE and CROSS_REFERENCE are + // treated as the same. + if(lhs.data_type_handle != rhs.data_type_handle) { + return CompareFailReason::TYPE_NAME; + } + + const TypeName::UnresolvedStabs* lhs_unresolved_stabs = lhs.unresolved_stabs.get(); + const TypeName::UnresolvedStabs* rhs_unresolved_stabs = rhs.unresolved_stabs.get(); + if(lhs_unresolved_stabs && rhs_unresolved_stabs) { + if(lhs_unresolved_stabs->type_name != rhs_unresolved_stabs->type_name) { + return CompareFailReason::TYPE_NAME; + } + } else if(lhs_unresolved_stabs || rhs_unresolved_stabs) { + return CompareFailReason::TYPE_NAME; + } + + break; + } + } + return result; +} + +static bool compare_nodes_and_merge( + CompareResult& dest, const Node& node_lhs, const Node& node_rhs, const SymbolDatabase* database) +{ + CompareResult result = compare_nodes(node_lhs, node_rhs, database, true); + if(database) { + if(result.type == CompareResultType::DIFFERS && try_to_match_wobbly_typedefs(node_lhs, node_rhs, *database)) { + result.type = CompareResultType::MATCHES_FAVOUR_LHS; + } else if(result.type == CompareResultType::DIFFERS && try_to_match_wobbly_typedefs(node_rhs, node_lhs, *database)) { + result.type = CompareResultType::MATCHES_FAVOUR_RHS; + } + } + + if(dest.type != result.type) { + if(dest.type == CompareResultType::DIFFERS || result.type == CompareResultType::DIFFERS) { + // If any of the inner types differ, the outer type does too. + dest.type = CompareResultType::DIFFERS; + } else if(dest.type == CompareResultType::MATCHES_CONFUSED || result.type == CompareResultType::MATCHES_CONFUSED) { + // Propagate confusion. + dest.type = CompareResultType::MATCHES_CONFUSED; + } else if(dest.type == CompareResultType::MATCHES_FAVOUR_LHS && result.type == CompareResultType::MATCHES_FAVOUR_RHS) { + // One of the results favours the LHS node and the other favours the + // RHS node so we are confused. + dest.type = CompareResultType::MATCHES_CONFUSED; + } else if(dest.type == CompareResultType::MATCHES_FAVOUR_RHS && result.type == CompareResultType::MATCHES_FAVOUR_LHS) { + // One of the results favours the LHS node and the other favours the + // RHS node so we are confused. + dest.type = CompareResultType::MATCHES_CONFUSED; + } else if(dest.type == CompareResultType::MATCHES_FAVOUR_LHS || result.type == CompareResultType::MATCHES_FAVOUR_LHS) { + // One of the results favours the LHS node and the other is neutral + // so go with the LHS node. + dest.type = CompareResultType::MATCHES_FAVOUR_LHS; + } else if(dest.type == CompareResultType::MATCHES_FAVOUR_RHS || result.type == CompareResultType::MATCHES_FAVOUR_RHS) { + // One of the results favours the RHS node and the other is neutral + // so go with the RHS node. + dest.type = CompareResultType::MATCHES_FAVOUR_RHS; + } + } + + if(dest.fail_reason == CompareFailReason::NONE) { + dest.fail_reason = result.fail_reason; + } + + return dest.type == CompareResultType::DIFFERS; +} + +static bool try_to_match_wobbly_typedefs( + const Node& type_name_node, const Node& raw_node, const SymbolDatabase& database) +{ + // Detect if one side has a typedef when the other just has the plain type. + // This was previously a common reason why type deduplication would fail. + if(type_name_node.descriptor != TYPE_NAME) { + return false; + } + + const TypeName& type_name = type_name_node.as(); + if(const TypeName::UnresolvedStabs* unresolved_stabs = type_name.unresolved_stabs.get()) { + if(unresolved_stabs->referenced_file_handle == (u32) -1 || !unresolved_stabs->stabs_type_number.valid()) { + return false; + } + + const SourceFile* source_file = + database.source_files.symbol_from_handle(unresolved_stabs->referenced_file_handle); + CCC_ASSERT(source_file); + + auto handle = source_file->stabs_type_number_to_handle.find(unresolved_stabs->stabs_type_number); + if(handle != source_file->stabs_type_number_to_handle.end()) { + const DataType* referenced_type = database.data_types.symbol_from_handle(handle->second); + CCC_ASSERT(referenced_type && referenced_type->type()); + // Don't compare 'intrusive' fields e.g. the offset. + CompareResult new_result = compare_nodes(*referenced_type->type(), raw_node, &database, false); + if(new_result.type != CompareResultType::DIFFERS) { + return true; + } + } + } + + return false; +} + +const char* compare_fail_reason_to_string(CompareFailReason reason) +{ + switch(reason) { + case CompareFailReason::NONE: return "error"; + case CompareFailReason::DESCRIPTOR: return "descriptor"; + case CompareFailReason::STORAGE_CLASS: return "storage class"; + case CompareFailReason::NAME: return "name"; + case CompareFailReason::RELATIVE_OFFSET_BYTES: return "relative offset"; + case CompareFailReason::ABSOLUTE_OFFSET_BYTES: return "absolute offset"; + case CompareFailReason::BITFIELD_OFFSET_BITS: return "bitfield offset"; + case CompareFailReason::SIZE_BITS: return "size"; + case CompareFailReason::CONSTNESS: return "constness"; + case CompareFailReason::ARRAY_ELEMENT_COUNT: return "array element count"; + case CompareFailReason::BUILTIN_CLASS: return "builtin class"; + case CompareFailReason::FUNCTION_RETURN_TYPE_HAS_VALUE: return "function return type has value"; + case CompareFailReason::FUNCTION_PARAMAETER_COUNT: return "function paramaeter count"; + case CompareFailReason::FUNCTION_PARAMETERS_HAS_VALUE: return "function parameter"; + case CompareFailReason::FUNCTION_MODIFIER: return "function modifier"; + case CompareFailReason::ENUM_CONSTANTS: return "enum constant"; + case CompareFailReason::BASE_CLASS_COUNT: return "base class count"; + case CompareFailReason::FIELDS_SIZE: return "fields size"; + case CompareFailReason::MEMBER_FUNCTION_COUNT: return "member function count"; + case CompareFailReason::VTABLE_GLOBAL: return "vtable global"; + case CompareFailReason::TYPE_NAME: return "type name"; + case CompareFailReason::VARIABLE_CLASS: return "variable class"; + case CompareFailReason::VARIABLE_TYPE: return "variable type"; + case CompareFailReason::VARIABLE_STORAGE: return "variable storage"; + case CompareFailReason::VARIABLE_BLOCK: return "variable block"; + } + return ""; +} + +const char* node_type_to_string(const Node& node) +{ + switch(node.descriptor) { + case ARRAY: return "array"; + case BITFIELD: return "bitfield"; + case BUILTIN: return "builtin"; + case ENUM: return "enum"; + case ERROR_NODE: return "error"; + case FUNCTION: return "function"; + case POINTER_OR_REFERENCE: { + const PointerOrReference& pointer_or_reference = node.as(); + if(pointer_or_reference.is_pointer) { + return "pointer"; + } else { + return "reference"; + } + } + case POINTER_TO_DATA_MEMBER: return "pointer_to_data_member"; + case STRUCT_OR_UNION: { + const StructOrUnion& struct_or_union = node.as(); + if(struct_or_union.is_struct) { + return "struct"; + } else { + return "union"; + } + } + case TYPE_NAME: return "type_name"; + } + return ""; +} + +const char* storage_class_to_string(StorageClass storage_class) +{ + switch(storage_class) { + case STORAGE_CLASS_NONE: return "none"; + case STORAGE_CLASS_TYPEDEF: return "typedef"; + case STORAGE_CLASS_EXTERN: return "extern"; + case STORAGE_CLASS_STATIC: return "static"; + case STORAGE_CLASS_AUTO: return "auto"; + case STORAGE_CLASS_REGISTER: return "register"; + } + return ""; +} + +const char* access_specifier_to_string(AccessSpecifier specifier) +{ + switch(specifier) { + case AS_PUBLIC: return "public"; + case AS_PROTECTED: return "protected"; + case AS_PRIVATE: return "private"; + } + return ""; +} + +const char* builtin_class_to_string(BuiltInClass bclass) +{ + switch(bclass) { + case BuiltInClass::VOID_TYPE: return "void"; + case BuiltInClass::UNSIGNED_8: return "8-bit unsigned integer"; + case BuiltInClass::SIGNED_8: return "8-bit signed integer"; + case BuiltInClass::UNQUALIFIED_8: return "8-bit integer"; + case BuiltInClass::BOOL_8: return "8-bit boolean"; + case BuiltInClass::UNSIGNED_16: return "16-bit unsigned integer"; + case BuiltInClass::SIGNED_16: return "16-bit signed integer"; + case BuiltInClass::UNSIGNED_32: return "32-bit unsigned integer"; + case BuiltInClass::SIGNED_32: return "32-bit signed integer"; + case BuiltInClass::FLOAT_32: return "32-bit floating point"; + case BuiltInClass::UNSIGNED_64: return "64-bit unsigned integer"; + case BuiltInClass::SIGNED_64: return "64-bit signed integer"; + case BuiltInClass::FLOAT_64: return "64-bit floating point"; + case BuiltInClass::UNSIGNED_128: return "128-bit unsigned integer"; + case BuiltInClass::SIGNED_128: return "128-bit signed integer"; + case BuiltInClass::UNQUALIFIED_128: return "128-bit integer"; + case BuiltInClass::FLOAT_128: return "128-bit floating point"; + } + return ""; +} + +s32 builtin_class_size(BuiltInClass bclass) +{ + switch(bclass) { + case BuiltInClass::VOID_TYPE: return 0; + case BuiltInClass::UNSIGNED_8: return 1; + case BuiltInClass::SIGNED_8: return 1; + case BuiltInClass::UNQUALIFIED_8: return 1; + case BuiltInClass::BOOL_8: return 1; + case BuiltInClass::UNSIGNED_16: return 2; + case BuiltInClass::SIGNED_16: return 2; + case BuiltInClass::UNSIGNED_32: return 4; + case BuiltInClass::SIGNED_32: return 4; + case BuiltInClass::FLOAT_32: return 4; + case BuiltInClass::UNSIGNED_64: return 8; + case BuiltInClass::SIGNED_64: return 8; + case BuiltInClass::FLOAT_64: return 8; + case BuiltInClass::UNSIGNED_128: return 16; + case BuiltInClass::SIGNED_128: return 16; + case BuiltInClass::UNQUALIFIED_128: return 16; + case BuiltInClass::FLOAT_128: return 16; + } + return 0; +} + +} diff --git a/3rdparty/ccc/src/ccc/ast.h b/3rdparty/ccc/src/ccc/ast.h new file mode 100644 index 0000000000..a34fa2e446 --- /dev/null +++ b/3rdparty/ccc/src/ccc/ast.h @@ -0,0 +1,377 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "symbol_database.h" + +namespace ccc::ast { + +enum NodeDescriptor : u8 { + ARRAY, + BITFIELD, + BUILTIN, + ENUM, + ERROR_NODE, + FUNCTION, + POINTER_OR_REFERENCE, + POINTER_TO_DATA_MEMBER, + STRUCT_OR_UNION, + TYPE_NAME +}; + +enum AccessSpecifier { + AS_PUBLIC = 0, + AS_PROTECTED = 1, + AS_PRIVATE = 2 +}; + +// To add a new type of node: +// 1. Add it to the NodeDescriptor enum. +// 2. Create a struct for it. +// 3. Add support for it in for_each_node. +// 4. Add support for it in compute_size_bytes_recursive. +// 5. Add support for it in compare_nodes. +// 6. Add support for it in node_type_to_string. +// 7. Add support for it in CppPrinter::ast_node. +// 8. Add support for it in write_json. +// 9. Add support for it in refine_node. +struct Node { + const NodeDescriptor descriptor; + u8 is_const : 1 = false; + u8 is_volatile : 1 = false; + u8 is_virtual_base_class : 1 = false; + u8 is_vtable_pointer : 1 = false; + u8 is_constructor_or_destructor : 1 = false; + u8 is_special_member_function : 1 = false; + u8 is_operator_member_function : 1 = false; + u8 cannot_compute_size : 1 = false; + u8 storage_class : 4 = STORAGE_CLASS_NONE; + u8 access_specifier : 2 = AS_PUBLIC; + + s32 size_bytes = -1; + + // If the name isn't populated for a given node, the name from the last + // ancestor to have one should be used i.e. when processing the tree you + // should pass the name down. + std::string name; + + s32 offset_bytes = -1; // Offset relative to start of last inline struct/union. + s32 size_bits = -1; // Size stored in the .mdebug symbol table, may not be set. + + Node(NodeDescriptor d) : descriptor(d) {} + Node(const Node& rhs) = default; + virtual ~Node() {} + + template + SubType& as() { + CCC_ASSERT(descriptor == SubType::DESCRIPTOR); + return *static_cast(this); + } + + template + const SubType& as() const { + CCC_ASSERT(descriptor == SubType::DESCRIPTOR); + return *static_cast(this); + } + + template + static std::pair as(const Node& lhs, const Node& rhs) { + CCC_ASSERT(lhs.descriptor == SubType::DESCRIPTOR && rhs.descriptor == SubType::DESCRIPTOR); + return std::pair(static_cast(lhs), static_cast(rhs)); + } + + void set_access_specifier(AccessSpecifier specifier, u32 importer_flags); + + // If this node is a type name, repeatedly resolve it to the type it's + // referencing, otherwise return (this, nullptr). + std::pair physical_type(SymbolDatabase& database, s32 max_depth = 100); + std::pair physical_type(const SymbolDatabase& database, s32 max_depth = 100) const; +}; + +struct Array : Node { + std::unique_ptr element_type; + s32 element_count = -1; + + Array() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = ARRAY; +}; + +struct BitField : Node { + s32 bitfield_offset_bits = -1; // Offset relative to the last byte (not the position of the underlying type!). + std::unique_ptr underlying_type; + + BitField() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = BITFIELD; +}; + +enum class BuiltInClass { + VOID_TYPE, + UNSIGNED_8, SIGNED_8, UNQUALIFIED_8, BOOL_8, + UNSIGNED_16, SIGNED_16, + UNSIGNED_32, SIGNED_32, FLOAT_32, + UNSIGNED_64, SIGNED_64, FLOAT_64, + UNSIGNED_128, SIGNED_128, UNQUALIFIED_128, FLOAT_128 +}; + +struct BuiltIn : Node { + BuiltInClass bclass = BuiltInClass::VOID_TYPE; + + BuiltIn() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = BUILTIN; +}; + +struct Enum : Node { + std::vector> constants; + + Enum() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = ENUM; +}; + +struct Error : Node { + std::string message; + + Error() : Node(ERROR_NODE) {} + static const constexpr NodeDescriptor DESCRIPTOR = ERROR_NODE; +}; + +enum class MemberFunctionModifier { + NONE, + STATIC, + VIRTUAL +}; + +const char* member_function_modifier_to_string(MemberFunctionModifier modifier); + +struct Function : Node { + std::optional> return_type; + std::optional>> parameters; + MemberFunctionModifier modifier = MemberFunctionModifier::NONE; + s32 vtable_index = -1; + FunctionHandle definition_handle; // Filled in by fill_in_pointers_to_member_function_definitions. + + Function() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = FUNCTION; +}; + +struct PointerOrReference : Node { + bool is_pointer = true; + std::unique_ptr value_type; + + PointerOrReference() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = POINTER_OR_REFERENCE; +}; + +struct PointerToDataMember : Node { + std::unique_ptr class_type; + std::unique_ptr member_type; + + PointerToDataMember() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = POINTER_TO_DATA_MEMBER; +}; + +struct StructOrUnion : Node { + bool is_struct = true; + std::vector> base_classes; + std::vector> fields; + std::vector> member_functions; + + StructOrUnion() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = STRUCT_OR_UNION; + + struct FlatField { + // The field itself. + const Node* node; + // The symbol that owns the node. + const DataType* symbol; + // Offset of the innermost enclosing base class in the object. + s32 base_offset = 0; + }; + + // Generate a flat list of all the fields in this class as well as all the + // base classes recursively, but only until the max_fields or max_depth + // limits are reached. Return true if all the fields were enumerated. + bool flatten_fields( + std::vector& output, + const DataType* symbol, + const SymbolDatabase& database, + bool skip_statics, + s32 base_offset = 0, + s32 max_fields = 100000, + s32 max_depth = 100) const; +}; + +enum class TypeNameSource : u8 { + REFERENCE, // A STABS type reference. + CROSS_REFERENCE, // A STABS cross reference. + UNNAMED_THIS // A this parameter (or return type) referencing an unnamed type. +}; + +const char* type_name_source_to_string(TypeNameSource source); + +enum class ForwardDeclaredType { + STRUCT, + UNION, + ENUM // Should be illegal but STABS supports cross references to enums so it's here. +}; + +const char* forward_declared_type_to_string(ForwardDeclaredType type); + +struct TypeName : Node { + DataTypeHandle data_type_handle; + TypeNameSource source = TypeNameSource::REFERENCE; + bool is_forward_declared = false; + + DataTypeHandle data_type_handle_unless_forward_declared() const; + + struct UnresolvedStabs { + std::string type_name; + SourceFileHandle referenced_file_handle; + StabsTypeNumber stabs_type_number; + std::optional type; + }; + + std::unique_ptr unresolved_stabs; + + TypeName() : Node(DESCRIPTOR) {} + static const constexpr NodeDescriptor DESCRIPTOR = TYPE_NAME; +}; + +enum class CompareResultType { + MATCHES_NO_SWAP, // Both lhs and rhs are identical. + MATCHES_CONFUSED, // Both lhs and rhs are almost identical, and we don't which is better. + MATCHES_FAVOUR_LHS, // Both lhs and rhs are almost identical, but lhs is better. + MATCHES_FAVOUR_RHS, // Both lhs and rhs are almost identical, but rhs is better. + DIFFERS, // The two nodes differ substantially. +}; + +enum class CompareFailReason { + NONE, + DESCRIPTOR, + STORAGE_CLASS, + NAME, + RELATIVE_OFFSET_BYTES, + ABSOLUTE_OFFSET_BYTES, + BITFIELD_OFFSET_BITS, + SIZE_BITS, + CONSTNESS, + ARRAY_ELEMENT_COUNT, + BUILTIN_CLASS, + FUNCTION_RETURN_TYPE_HAS_VALUE, + FUNCTION_PARAMAETER_COUNT, + FUNCTION_PARAMETERS_HAS_VALUE, + FUNCTION_MODIFIER, + ENUM_CONSTANTS, + BASE_CLASS_COUNT, + FIELDS_SIZE, + MEMBER_FUNCTION_COUNT, + VTABLE_GLOBAL, + TYPE_NAME, + VARIABLE_CLASS, + VARIABLE_TYPE, + VARIABLE_STORAGE, + VARIABLE_BLOCK +}; + +struct CompareResult { + CompareResult(CompareResultType type) : type(type), fail_reason(CompareFailReason::NONE) {} + CompareResult(CompareFailReason reason) : type(CompareResultType::DIFFERS), fail_reason(reason) {} + CompareResultType type; + CompareFailReason fail_reason; +}; + +// Compare two AST nodes and their children recursively. This will only check +// fields that will be equal for two versions of the same type from different +// translation units. +CompareResult compare_nodes(const Node& lhs, const Node& rhs, const SymbolDatabase* database, bool check_intrusive_fields); + +const char* compare_fail_reason_to_string(CompareFailReason reason); +const char* node_type_to_string(const Node& node); +const char* storage_class_to_string(StorageClass storage_class); +const char* access_specifier_to_string(AccessSpecifier specifier); +const char* builtin_class_to_string(BuiltInClass bclass); + +s32 builtin_class_size(BuiltInClass bclass); + +enum TraversalOrder { + PREORDER_TRAVERSAL, + POSTORDER_TRAVERSAL +}; + +enum ExplorationMode { + EXPLORE_CHILDREN, + DONT_EXPLORE_CHILDREN +}; + +template +void for_each_node(ThisNode& node, TraversalOrder order, Callback callback) +{ + if(order == PREORDER_TRAVERSAL && callback(node) == DONT_EXPLORE_CHILDREN) { + return; + } + switch(node.descriptor) { + case ARRAY: { + auto& array = node.template as(); + for_each_node(*array.element_type.get(), order, callback); + break; + } + case BITFIELD: { + auto& bitfield = node.template as(); + for_each_node(*bitfield.underlying_type.get(), order, callback); + break; + } + case BUILTIN: { + break; + } + case ENUM: { + break; + } + case ERROR_NODE: { + break; + } + case FUNCTION: { + auto& func = node.template as(); + if(func.return_type.has_value()) { + for_each_node(*func.return_type->get(), order, callback); + } + if(func.parameters.has_value()) { + for(auto& child : *func.parameters) { + for_each_node(*child.get(), order, callback); + } + } + break; + } + case POINTER_OR_REFERENCE: { + auto& pointer_or_reference = node.template as(); + for_each_node(*pointer_or_reference.value_type.get(), order, callback); + break; + } + case POINTER_TO_DATA_MEMBER: { + auto& pointer = node.template as(); + for_each_node(*pointer.class_type.get(), order, callback); + for_each_node(*pointer.member_type.get(), order, callback); + break; + } + case STRUCT_OR_UNION: { + auto& struct_or_union = node.template as(); + for(auto& child : struct_or_union.base_classes) { + for_each_node(*child.get(), order, callback); + } + for(auto& child : struct_or_union.fields) { + for_each_node(*child.get(), order, callback); + } + for(auto& child : struct_or_union.member_functions) { + for_each_node(*child.get(), order, callback); + } + break; + } + case TYPE_NAME: { + break; + } + } + if(order == POSTORDER_TRAVERSAL) { + callback(node); + } +} + +} diff --git a/3rdparty/ccc/src/ccc/elf.cpp b/3rdparty/ccc/src/ccc/elf.cpp new file mode 100644 index 0000000000..8c3ed99e57 --- /dev/null +++ b/3rdparty/ccc/src/ccc/elf.cpp @@ -0,0 +1,125 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "elf.h" + +namespace ccc { + +Result ElfFile::parse(std::vector image) +{ + ElfFile elf; + elf.image = std::move(image); + + const ElfIdentHeader* ident = get_packed(elf.image, 0); + CCC_CHECK(ident, "ELF ident header out of range."); + CCC_CHECK(ident->magic == CCC_FOURCC("\x7f\x45\x4c\x46"), "Not an ELF file."); + CCC_CHECK(ident->e_class == ElfIdentClass::B32, "Wrong ELF class (not 32 bit)."); + + const ElfFileHeader* header = get_packed(elf.image, sizeof(ElfIdentHeader)); + CCC_CHECK(header, "ELF file header out of range."); + elf.file_header = *header; + + const ElfSectionHeader* shstr_section_header = get_packed(elf.image, header->shoff + header->shstrndx * sizeof(ElfSectionHeader)); + CCC_CHECK(shstr_section_header, "ELF section name header out of range."); + + for(u32 i = 0; i < header->shnum; i++) { + u64 header_offset = header->shoff + i * sizeof(ElfSectionHeader); + const ElfSectionHeader* section_header = get_packed(elf.image, header_offset); + CCC_CHECK(section_header, "ELF section header out of range."); + + const char* name = get_string(elf.image, shstr_section_header->offset + section_header->name); + CCC_CHECK(section_header, "ELF section name out of range."); + + ElfSection& section = elf.sections.emplace_back(); + section.name = name; + section.header = *section_header; + } + + for(u32 i = 0; i < header->phnum; i++) { + u64 header_offset = header->phoff + i * sizeof(ElfProgramHeader); + const ElfProgramHeader* program_header = get_packed(elf.image, header_offset); + CCC_CHECK(program_header, "ELF program header out of range."); + + elf.segments.emplace_back(*program_header); + } + + return elf; +} + +Result ElfFile::create_section_symbols( + SymbolDatabase& database, const SymbolGroup& group) const +{ + for(const ElfSection& section : sections) { + Address address = Address::non_zero(section.header.addr); + + Result symbol = database.sections.create_symbol( + section.name, address, group.source, group.module_symbol); + CCC_RETURN_IF_ERROR(symbol); + + (*symbol)->set_size(section.header.size); + } + + return Result(); +} + +const ElfSection* ElfFile::lookup_section(const char* name) const +{ + for(const ElfSection& section : sections) { + if(section.name == name) { + return §ion; + } + } + return nullptr; +} + +std::optional ElfFile::file_offset_to_virtual_address(u32 file_offset) const +{ + for(const ElfProgramHeader& segment : segments) { + if(file_offset >= segment.offset && file_offset < segment.offset + segment.filesz) { + return segment.vaddr + file_offset - segment.offset; + } + } + return std::nullopt; +} + +const ElfProgramHeader* ElfFile::entry_point_segment() const +{ + const ccc::ElfProgramHeader* entry_segment = nullptr; + for(const ccc::ElfProgramHeader& segment : segments) { + if(file_header.entry >= segment.vaddr && file_header.entry < segment.vaddr + segment.filesz) { + entry_segment = &segment; + } + } + return entry_segment; +} + +Result> ElfFile::get_virtual(u32 address, u32 size) const +{ + u32 end_address = address + size; + + if(end_address >= address) { + for(const ElfProgramHeader& segment : segments) { + if(address >= segment.vaddr && end_address <= segment.vaddr + segment.filesz) { + size_t begin_offset = segment.offset + (address - segment.vaddr); + size_t end_offset = begin_offset + size; + if(begin_offset <= image.size() && end_offset <= image.size()) { + return std::span(image.data() + begin_offset, image.data() + end_offset); + } + } + } + } + + return CCC_FAILURE("No ELF segment for address range 0x%x to 0x%x.", address, end_address); +} + +Result ElfFile::copy_virtual(u8* dest, u32 address, u32 size) const +{ + Result> block = get_virtual(address, size); + CCC_RETURN_IF_ERROR(block); + + memcpy(dest, block->data(), size); + + return Result(); +} + +} diff --git a/3rdparty/ccc/src/ccc/elf.h b/3rdparty/ccc/src/ccc/elf.h new file mode 100644 index 0000000000..33d89e759c --- /dev/null +++ b/3rdparty/ccc/src/ccc/elf.h @@ -0,0 +1,156 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "symbol_database.h" + +namespace ccc { + +enum class ElfIdentClass : u8 { + B32 = 0x1, + B64 = 0x2 +}; + +CCC_PACKED_STRUCT(ElfIdentHeader, + /* 0x0 */ u32 magic; // 7f 45 4c 46 + /* 0x4 */ ElfIdentClass e_class; + /* 0x5 */ u8 endianess; + /* 0x6 */ u8 version; + /* 0x7 */ u8 os_abi; + /* 0x8 */ u8 abi_version; + /* 0x9 */ u8 pad[7]; +) + +enum class ElfFileType : u16 { + NONE = 0x00, + REL = 0x01, + EXEC = 0x02, + DYN = 0x03, + CORE = 0x04, + LOOS = 0xfe00, + HIOS = 0xfeff, + LOPROC = 0xff00, + HIPROC = 0xffff +}; + +enum class ElfMachine : u16 { + MIPS = 0x08 +}; + +CCC_PACKED_STRUCT(ElfFileHeader, + /* 0x10 */ ElfFileType type; + /* 0x12 */ ElfMachine machine; + /* 0x14 */ u32 version; + /* 0x18 */ u32 entry; + /* 0x1c */ u32 phoff; + /* 0x20 */ u32 shoff; + /* 0x24 */ u32 flags; + /* 0x28 */ u16 ehsize; + /* 0x2a */ u16 phentsize; + /* 0x2c */ u16 phnum; + /* 0x2e */ u16 shentsize; + /* 0x30 */ u16 shnum; + /* 0x32 */ u16 shstrndx; +) + +enum class ElfSectionType : u32 { + NULL_SECTION = 0x0, + PROGBITS = 0x1, + SYMTAB = 0x2, + STRTAB = 0x3, + RELA = 0x4, + HASH = 0x5, + DYNAMIC = 0x6, + NOTE = 0x7, + NOBITS = 0x8, + REL = 0x9, + SHLIB = 0xa, + DYNSYM = 0xb, + INIT_ARRAY = 0xe, + FINI_ARRAY = 0xf, + PREINIT_ARRAY = 0x10, + GROUP = 0x11, + SYMTAB_SHNDX = 0x12, + NUM = 0x13, + LOOS = 0x60000000, + MIPS_DEBUG = 0x70000005 +}; + +CCC_PACKED_STRUCT(ElfSectionHeader, + /* 0x00 */ u32 name; + /* 0x04 */ ElfSectionType type; + /* 0x08 */ u32 flags; + /* 0x0c */ u32 addr; + /* 0x10 */ u32 offset; + /* 0x14 */ u32 size; + /* 0x18 */ u32 link; + /* 0x1c */ u32 info; + /* 0x20 */ u32 addralign; + /* 0x24 */ u32 entsize; +) + +struct ElfSection { + std::string name; + ElfSectionHeader header; +}; + +CCC_PACKED_STRUCT(ElfProgramHeader, + /* 0x00 */ u32 type; + /* 0x04 */ u32 offset; + /* 0x08 */ u32 vaddr; + /* 0x0c */ u32 paddr; + /* 0x10 */ u32 filesz; + /* 0x14 */ u32 memsz; + /* 0x18 */ u32 flags; + /* 0x1c */ u32 align; +) + +struct ElfFile { + ElfFileHeader file_header; + std::vector image; + std::vector sections; + std::vector segments; + + // Parse the ELF file header, section headers and program headers. + static Result parse(std::vector image); + + // Create a section object for each section header in the ELF file. + Result create_section_symbols(SymbolDatabase& database, const SymbolGroup& group) const; + + const ElfSection* lookup_section(const char* name) const; + std::optional file_offset_to_virtual_address(u32 file_offset) const; + + // Find the program header for the segment that contains the entry point. + const ElfProgramHeader* entry_point_segment() const; + + // Retrieve a block of data in an ELF file given its address and size. + Result> get_virtual(u32 address, u32 size) const; + + // Copy a block of data in an ELF file to the destination buffer given its + // address and size. + Result copy_virtual(u8* dest, u32 address, u32 size) const; + + // Retrieve an object of type T from an ELF file given its address. + template + Result get_object_virtual(u32 address) const + { + Result> result = get_virtual(address, sizeof(T)); + CCC_RETURN_IF_ERROR(result); + + return *(T*) result->data(); + } + + // Retrieve an array of objects of type T from an ELF file given its + // address and element count. + template + Result> get_array_virtual(u32 address, u32 element_count) const + { + Result> result = get_virtual(address, element_count * sizeof(T)); + CCC_RETURN_IF_ERROR(result); + + return std::span((T*) result->data(), (T*) (result->data() + result->size())); + } +}; + +} diff --git a/3rdparty/ccc/src/ccc/elf_symtab.cpp b/3rdparty/ccc/src/ccc/elf_symtab.cpp new file mode 100644 index 0000000000..892ea5ec2d --- /dev/null +++ b/3rdparty/ccc/src/ccc/elf_symtab.cpp @@ -0,0 +1,213 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "elf_symtab.h" + +#include "importer_flags.h" + +namespace ccc::elf { + +enum class SymbolBind : u8 { + LOCAL = 0, + GLOBAL = 1, + WEAK = 2, + NUM = 3, + GNU_UNIQUE = 10 +}; + +enum class SymbolType : u8 { + NOTYPE = 0, + OBJECT = 1, + FUNC = 2, + SECTION = 3, + FILE = 4, + COMMON = 5, + TLS = 6, + NUM = 7, + GNU_IFUNC = 10 +}; + +enum class SymbolVisibility { + DEFAULT = 0, + INTERNAL = 1, + HIDDEN = 2, + PROTECTED = 3 +}; + +CCC_PACKED_STRUCT(Symbol, + /* 0x0 */ u32 name; + /* 0x4 */ u32 value; + /* 0x8 */ u32 size; + /* 0xc */ u8 info; + /* 0xd */ u8 other; + /* 0xe */ u16 shndx; + + SymbolType type() const { return (SymbolType) (info & 0xf); } + SymbolBind bind() const { return (SymbolBind) (info >> 4); } + SymbolVisibility visibility() const { return (SymbolVisibility) (other & 0x3); } +) + +static const char* symbol_bind_to_string(SymbolBind bind); +static const char* symbol_type_to_string(SymbolType type); +static const char* symbol_visibility_to_string(SymbolVisibility visibility); + +Result import_symbols( + SymbolDatabase& database, + const SymbolGroup& group, + std::span symtab, + std::span strtab, + u32 importer_flags, + DemanglerFunctions demangler) +{ + for(u32 i = 0; i < symtab.size() / sizeof(Symbol); i++) { + const Symbol* symbol = get_packed(symtab, i * sizeof(Symbol)); + CCC_ASSERT(symbol); + + Address address; + if(symbol->value != 0) { + address = symbol->value; + } + + if(!address.valid() || symbol->visibility() != SymbolVisibility::DEFAULT) { + continue; + } + + if(!(importer_flags & DONT_DEDUPLICATE_SYMBOLS)) { + if(database.functions.first_handle_from_starting_address(address).valid()) { + continue; + } + + if(database.global_variables.first_handle_from_starting_address(address).valid()) { + continue; + } + + if(database.local_variables.first_handle_from_starting_address(address).valid()) { + continue; + } + } + + const char* string = get_string(strtab, symbol->name); + CCC_CHECK(string, "Symbol string out of range."); + + switch(symbol->type()) { + case SymbolType::NOTYPE: { + Result label = database.labels.create_symbol( + string, group.source, group.module_symbol, address, importer_flags, demangler); + CCC_RETURN_IF_ERROR(label); + + // These symbols get emitted at the same addresses as functions + // and aren't extremely useful, so we want to mark them to + // prevent them from possibly being used as function names. + (*label)->is_junk = + (*label)->name() == "__gnu_compiled_c" || + (*label)->name() == "__gnu_compiled_cplusplus" || + (*label)->name() == "gcc2_compiled."; + + break; + } + case SymbolType::OBJECT: { + if(symbol->size != 0) { + Result global_variable = database.global_variables.create_symbol( + string, group.source, group.module_symbol, address, importer_flags, demangler); + CCC_RETURN_IF_ERROR(global_variable); + + if(*global_variable) { + (*global_variable)->set_size(symbol->size); + } + } else { + Result label = database.labels.create_symbol( + string, group.source, group.module_symbol, address, importer_flags, demangler); + CCC_RETURN_IF_ERROR(label); + } + + break; + } + case SymbolType::FUNC: { + Result function = database.functions.create_symbol( + string, group.source, group.module_symbol, address, importer_flags, demangler); + CCC_RETURN_IF_ERROR(function); + + if(*function) { + (*function)->set_size(symbol->size); + } + + break; + } + case SymbolType::FILE: { + Result source_file = database.source_files.create_symbol( + string, group.source, group.module_symbol); + CCC_RETURN_IF_ERROR(source_file); + + break; + } + default: {} + } + } + + return Result(); +} + +Result print_symbol_table(FILE* out, std::span symtab, std::span strtab) +{ + fprintf(out, "ELF SYMBOLS:\n"); + fprintf(out, " Num: Value Size Type Bind Vis Ndx Name\n"); + + for(u32 i = 0; i < symtab.size() / sizeof(Symbol); i++) { + const Symbol* symbol = get_packed(symtab, i * sizeof(Symbol)); + CCC_ASSERT(symbol); + + const char* type = symbol_type_to_string(symbol->type()); + const char* bind = symbol_bind_to_string(symbol->bind()); + const char* visibility = symbol_visibility_to_string(symbol->visibility()); + + const char* string = get_string(strtab, symbol->name); + CCC_CHECK(string, "Symbol string out of range."); + + fprintf(out, "%6u: %08x %5u %-7s %-7s %-7s %3u %s\n", + i, symbol->value, symbol->size, type, bind, visibility, symbol->shndx, string); + + } + + return Result(); +} + +static const char* symbol_bind_to_string(SymbolBind bind) +{ + switch(bind) { + case SymbolBind::LOCAL: return "LOCAL"; + case SymbolBind::GLOBAL: return "GLOBAL"; + case SymbolBind::WEAK: return "WEAK"; + case SymbolBind::NUM: return "NUM"; + case SymbolBind::GNU_UNIQUE: return "GNU_UNIQUE"; + } + return "ERROR"; +} + +static const char* symbol_type_to_string(SymbolType type) +{ + switch(type) { + case SymbolType::NOTYPE: return "NOTYPE"; + case SymbolType::OBJECT: return "OBJECT"; + case SymbolType::FUNC: return "FUNC"; + case SymbolType::SECTION: return "SECTION"; + case SymbolType::FILE: return "FILE"; + case SymbolType::COMMON: return "COMMON"; + case SymbolType::TLS: return "TLS"; + case SymbolType::NUM: return "NUM"; + case SymbolType::GNU_IFUNC: return "GNU_IFUNC"; + } + return "ERROR"; +} + +static const char* symbol_visibility_to_string(SymbolVisibility visibility) +{ + switch(visibility) { + case SymbolVisibility::DEFAULT: return "DEFAULT"; + case SymbolVisibility::INTERNAL: return "INTERNAL"; + case SymbolVisibility::HIDDEN: return "HIDDEN"; + case SymbolVisibility::PROTECTED: return "PROTECTED"; + } + return "ERROR"; +} + +} diff --git a/3rdparty/ccc/src/ccc/elf_symtab.h b/3rdparty/ccc/src/ccc/elf_symtab.h new file mode 100644 index 0000000000..3880de969f --- /dev/null +++ b/3rdparty/ccc/src/ccc/elf_symtab.h @@ -0,0 +1,20 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "symbol_database.h" + +namespace ccc::elf { + +Result import_symbols( + SymbolDatabase& database, + const SymbolGroup& group, + std::span symtab, + std::span strtab, + u32 importer_flags, + DemanglerFunctions demangler); + +Result print_symbol_table(FILE* out, std::span symtab, std::span strtab); + +} diff --git a/3rdparty/ccc/src/ccc/importer_flags.cpp b/3rdparty/ccc/src/ccc/importer_flags.cpp new file mode 100644 index 0000000000..a05d6714ac --- /dev/null +++ b/3rdparty/ccc/src/ccc/importer_flags.cpp @@ -0,0 +1,95 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "importer_flags.h" + +namespace ccc { + +const std::vector IMPORTER_FLAGS = { + {DEMANGLE_PARAMETERS, "--demangle-parameters", { + "Include parameters in demangled function names." + }}, + {DEMANGLE_RETURN_TYPE, "--demangle-return-type", { + "Include return types at the end of demangled", + "function names if they're available." + }}, + {DONT_DEDUPLICATE_SYMBOLS, "--dont-deduplicate-symbols", { + "Do not deduplicate matching symbols from", + "different symbol tables. This options has no", + "effect on data types." + }}, + {DONT_DEDUPLICATE_TYPES, "--dont-deduplicate-types", { + "Do not deduplicate data types from different", + "translation units." + }}, + {DONT_DEMANGLE_NAMES, "--dont-demangle-names", { + "Do not demangle function names, global variable", + "names, or overloaded operator names." + }}, + {INCLUDE_GENERATED_MEMBER_FUNCTIONS, "--include-generated-functions", { + "Output member functions that were likely", + "automatically generated by the compiler." + }}, + {NO_ACCESS_SPECIFIERS, "--no-access-specifiers", { + "Do not print access specifiers." + }}, + {NO_MEMBER_FUNCTIONS, "--no-member-functions", { + "Do not print member functions." + }}, + {NO_OPTIMIZED_OUT_FUNCTIONS, "--no-optimized-out-functions", { + "Discard functions that were optimized out." + }}, + {STRICT_PARSING, "--strict", { + "Make more types of errors fatal." + }}, + {TYPEDEF_ALL_ENUMS, "--typedef-all-enums", { + "Force all emitted C++ enums to be defined using", + "a typedef. With STABS, it is not always possible", + "to determine if an enum was like this in the", + "original source code, so this option should be", + "useful for reverse engineering C projects." + }}, + {TYPEDEF_ALL_STRUCTS, "--typedef-all-structs", { + "Force all emitted C++ structure types to be", + "defined using a typedef." + }}, + {TYPEDEF_ALL_UNIONS, "--typedef-all-unions", { + "Force all emitted C++ union types to be defined", + "using a typedef." + }}, + {UNIQUE_FUNCTIONS, "--unique-functions", { + " If multiple identical .mdebug function symbols", + "are present, find the one that seems to have", + "actually been included in the linked binary, and", + "remove the addresses from all the rest. Using", + "this importer flag in combination with", + "--no-optimized-out-functions will remove these", + "duplicate function symbols entirely." + }} +}; + +u32 parse_importer_flag(const char* argument) +{ + for(const ImporterFlagInfo& flag : IMPORTER_FLAGS) { + if(strcmp(flag.argument, argument) == 0) { + return flag.flag; + } + } + return NO_IMPORTER_FLAGS; +} + +void print_importer_flags_help(FILE* out) +{ + for(const ImporterFlagInfo& flag : IMPORTER_FLAGS) { + fprintf(out, "\n"); + fprintf(out, " %-29s ", flag.argument); + for(size_t i = 0; i < flag.help_text.size(); i++) { + if(i > 0) { + fprintf(out, " "); + } + fprintf(out, "%s\n", flag.help_text[i]); + } + } +} + +} diff --git a/3rdparty/ccc/src/ccc/importer_flags.h b/3rdparty/ccc/src/ccc/importer_flags.h new file mode 100644 index 0000000000..12ab79538a --- /dev/null +++ b/3rdparty/ccc/src/ccc/importer_flags.h @@ -0,0 +1,39 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "util.h" + +namespace ccc { + +enum ImporterFlags { + NO_IMPORTER_FLAGS = 0, + DEMANGLE_PARAMETERS = (1 << 0), + DEMANGLE_RETURN_TYPE = (1 << 1), + DONT_DEDUPLICATE_SYMBOLS = (1 << 2), + DONT_DEDUPLICATE_TYPES = (1 << 3), + DONT_DEMANGLE_NAMES = (1 << 4), + INCLUDE_GENERATED_MEMBER_FUNCTIONS = (1 << 5), + NO_ACCESS_SPECIFIERS = (1 << 6), + NO_MEMBER_FUNCTIONS = (1 << 7), + NO_OPTIMIZED_OUT_FUNCTIONS = (1 << 8), + STRICT_PARSING = (1 << 9), + TYPEDEF_ALL_ENUMS = (1 << 10), + TYPEDEF_ALL_STRUCTS = (1 << 11), + TYPEDEF_ALL_UNIONS = (1 << 12), + UNIQUE_FUNCTIONS = (1 << 13) +}; + +struct ImporterFlagInfo { + ImporterFlags flag; + const char* argument; + std::vector help_text; +}; + +extern const std::vector IMPORTER_FLAGS; + +u32 parse_importer_flag(const char* argument); +void print_importer_flags_help(FILE* out); + +} diff --git a/3rdparty/ccc/src/ccc/mdebug_analysis.cpp b/3rdparty/ccc/src/ccc/mdebug_analysis.cpp new file mode 100644 index 0000000000..0309c858d4 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_analysis.cpp @@ -0,0 +1,349 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "mdebug_analysis.h" + +#include "stabs_to_ast.h" + +namespace ccc::mdebug { + +Result LocalSymbolTableAnalyser::stab_magic(const char* magic) +{ + return Result(); +} + +Result LocalSymbolTableAnalyser::source_file(const char* path, Address text_address) +{ + if(m_next_relative_path.empty()) { + m_next_relative_path = m_source_file.command_line_path; + } + + return Result(); +} + +Result LocalSymbolTableAnalyser::data_type(const ParsedSymbol& symbol) +{ + Result> node = stabs_type_to_ast( + *symbol.name_colon_type.type.get(), nullptr, m_stabs_to_ast_state, 0, false, false); + CCC_RETURN_IF_ERROR(node); + + if(symbol.is_typedef && (*node)->descriptor == ast::STRUCT_OR_UNION) { + ast::StructOrUnion& struct_or_union = (*node)->as(); + const std::string& name = symbol.name_colon_type.name; + StabsTypeNumber type_number = symbol.name_colon_type.type->type_number; + fix_recursively_emitted_structures(struct_or_union, name, type_number, m_stabs_to_ast_state.file_handle); + } + + bool is_struct = (*node)->descriptor == ast::STRUCT_OR_UNION && (*node)->as().is_struct; + bool force_typedef = + ((m_context.importer_flags & TYPEDEF_ALL_ENUMS) && (*node)->descriptor == ast::ENUM) || + ((m_context.importer_flags & TYPEDEF_ALL_STRUCTS) && (*node)->descriptor == ast::STRUCT_OR_UNION && is_struct) || + ((m_context.importer_flags & TYPEDEF_ALL_UNIONS) && (*node)->descriptor == ast::STRUCT_OR_UNION && !is_struct); + + (*node)->name = (symbol.name_colon_type.name == " ") ? "" : symbol.name_colon_type.name; + if(symbol.is_typedef || force_typedef) { + (*node)->storage_class = STORAGE_CLASS_TYPEDEF; + } + + const char* name = (*node)->name.c_str(); + StabsTypeNumber number = symbol.name_colon_type.type->type_number; + + if(m_context.importer_flags & DONT_DEDUPLICATE_TYPES) { + Result data_type = m_database.data_types.create_symbol( + name, m_context.group.source, m_context.group.module_symbol); + CCC_RETURN_IF_ERROR(data_type); + + m_source_file.stabs_type_number_to_handle[number] = (*data_type)->handle(); + (*data_type)->set_type(std::move(*node)); + + (*data_type)->files = {m_source_file.handle()}; + } else { + Result type = m_database.create_data_type_if_unique( + std::move(*node), number, name, m_source_file, m_context.group); + CCC_RETURN_IF_ERROR(type); + } + + return Result(); +} + +Result LocalSymbolTableAnalyser::global_variable( + const char* mangled_name, Address address, const StabsType& type, bool is_static, GlobalStorageLocation location) +{ + Result global = m_database.global_variables.create_symbol( + mangled_name, m_context.group.source, m_context.group.module_symbol, address, m_context.importer_flags, m_context.demangler); + CCC_RETURN_IF_ERROR(global); + CCC_ASSERT(*global); + + m_global_variables.emplace_back((*global)->handle()); + + Result> node = stabs_type_to_ast(type, nullptr, m_stabs_to_ast_state, 0, true, false); + CCC_RETURN_IF_ERROR(node); + + if(is_static) { + (*global)->storage_class = STORAGE_CLASS_STATIC; + } + (*global)->set_type(std::move(*node)); + + (*global)->storage.location = location; + + return Result(); +} + +Result LocalSymbolTableAnalyser::sub_source_file(const char* path, Address text_address) +{ + if(m_current_function && m_state == IN_FUNCTION_BEGINNING) { + Function::SubSourceFile& sub = m_current_function->sub_source_files.emplace_back(); + sub.address = text_address; + sub.relative_path = path; + } else { + m_next_relative_path = path; + } + + return Result(); +} + +Result LocalSymbolTableAnalyser::procedure( + const char* mangled_name, Address address, const ProcedureDescriptor* procedure_descriptor, bool is_static) +{ + if(!m_current_function || strcmp(mangled_name, m_current_function->mangled_name().c_str()) != 0) { + Result result = create_function(mangled_name, address); + CCC_RETURN_IF_ERROR(result); + } + + if(is_static) { + m_current_function->storage_class = STORAGE_CLASS_STATIC; + } + + if(procedure_descriptor) { + m_current_function->stack_frame_size = procedure_descriptor->frame_size; + } + + return Result(); +} + +Result LocalSymbolTableAnalyser::label(const char* label, Address address, s32 line_number) +{ + if(address.valid() && m_current_function && label[0] == '$') { + Function::LineNumberPair& pair = m_current_function->line_numbers.emplace_back(); + pair.address = address; + pair.line_number = line_number; + } + + return Result(); +} + +Result LocalSymbolTableAnalyser::text_end(const char* name, s32 function_size) +{ + if(m_state == IN_FUNCTION_BEGINNING) { + CCC_CHECK(m_current_function, "END TEXT symbol outside of function."); + m_current_function->set_size(function_size); + m_state = IN_FUNCTION_END; + } + + return Result(); +} + +Result LocalSymbolTableAnalyser::function(const char* mangled_name, const StabsType& return_type, Address address) +{ + if(!m_current_function || strcmp(mangled_name, m_current_function->mangled_name().c_str()) != 0) { + Result result = create_function(mangled_name, address); + CCC_RETURN_IF_ERROR(result); + } else { + // For MTV Music Maker 2, the addresses for static functions stored in + // the PROC symbols are relative to the translation unit, while the + // addresses stored in the FUN symbol are absolute. This is the only + // game I've found that seems to have this problem, but since in all + // other cases it seems all these addresses are all absolute, I may as + // well add in a hack here to deal with it. + bool no_module_base_address = m_context.group.module_symbol && m_context.group.module_symbol->address().get_or_zero() == 0; + bool new_address_greater = address.valid() && address > m_current_function->address(); + if(no_module_base_address && new_address_greater) { + m_database.functions.move_symbol(m_current_function->handle(), address); + } + } + + Result> node = stabs_type_to_ast(return_type, nullptr, m_stabs_to_ast_state, 0, true, true); + CCC_RETURN_IF_ERROR(node); + m_current_function->set_type(std::move(*node)); + + return Result(); +} + +Result LocalSymbolTableAnalyser::function_end() +{ + if(m_current_function) { + m_current_function->set_parameter_variables(std::move(m_current_parameter_variables), m_database); + m_current_function->set_local_variables(std::move(m_current_local_variables), m_database); + } + + m_current_function = nullptr; + m_current_parameter_variables = std::vector(); + m_current_local_variables = std::vector(); + + m_blocks.clear(); + m_pending_local_variables.clear(); + + m_state = NOT_IN_FUNCTION; + + return Result(); +} + +Result LocalSymbolTableAnalyser::parameter( + const char* name, const StabsType& type, bool is_stack, s32 value, bool is_by_reference) +{ + CCC_CHECK(m_current_function, "Parameter symbol before first func/proc symbol."); + + Result parameter_variable = m_database.parameter_variables.create_symbol( + name, m_context.group.source, m_context.group.module_symbol); + CCC_RETURN_IF_ERROR(parameter_variable); + + m_current_parameter_variables.emplace_back((*parameter_variable)->handle()); + + Result> node = stabs_type_to_ast(type, nullptr, m_stabs_to_ast_state, 0, true, true); + CCC_RETURN_IF_ERROR(node); + (*parameter_variable)->set_type(std::move(*node)); + + if(is_stack) { + StackStorage& stack_storage = (*parameter_variable)->storage.emplace(); + stack_storage.stack_pointer_offset = value; + } else { + RegisterStorage& register_storage = (*parameter_variable)->storage.emplace(); + register_storage.dbx_register_number = value; + register_storage.is_by_reference = is_by_reference; + } + + return Result(); +} + +Result LocalSymbolTableAnalyser::local_variable( + const char* name, const StabsType& type, u32 value, StabsSymbolDescriptor desc, SymbolClass sclass) +{ + if(!m_current_function) { + return Result(); + } + + Address address = (desc == StabsSymbolDescriptor::STATIC_LOCAL_VARIABLE) ? value : Address(); + Result local_variable = m_database.local_variables.create_symbol( + name, address, m_context.group.source, m_context.group.module_symbol); + CCC_RETURN_IF_ERROR(local_variable); + + m_current_local_variables.emplace_back((*local_variable)->handle()); + m_pending_local_variables.emplace_back((*local_variable)->handle()); + + Result> node = stabs_type_to_ast(type, nullptr, m_stabs_to_ast_state, 0, true, false); + CCC_RETURN_IF_ERROR(node); + + if(desc == StabsSymbolDescriptor::STATIC_LOCAL_VARIABLE) { + GlobalStorage& global_storage = (*local_variable)->storage.emplace(); + std::optional location_opt = + symbol_class_to_global_variable_location(sclass); + CCC_CHECK(location_opt.has_value(), + "Invalid static local variable location %s.", + symbol_class(sclass)); + global_storage.location = *location_opt; + (*node)->storage_class = STORAGE_CLASS_STATIC; + } else if(desc == StabsSymbolDescriptor::REGISTER_VARIABLE) { + RegisterStorage& register_storage = (*local_variable)->storage.emplace(); + register_storage.dbx_register_number = (s32) value; + } else if(desc == StabsSymbolDescriptor::LOCAL_VARIABLE) { + StackStorage& stack_storage = (*local_variable)->storage.emplace(); + stack_storage.stack_pointer_offset = (s32) value; + } else { + return CCC_FAILURE("LocalSymbolTableAnalyser::local_variable() called with bad symbol descriptor."); + } + + (*local_variable)->set_type(std::move(*node)); + + return Result(); +} + +Result LocalSymbolTableAnalyser::lbrac(s32 begin_offset) +{ + for(LocalVariableHandle local_variable_handle : m_pending_local_variables) { + if(LocalVariable* local_variable = m_database.local_variables.symbol_from_handle(local_variable_handle)) { + local_variable->live_range.low = m_source_file.address().value + begin_offset; + } + } + + m_blocks.emplace_back(std::move(m_pending_local_variables)); + m_pending_local_variables = {}; + + return Result(); +} + +Result LocalSymbolTableAnalyser::rbrac(s32 end_offset) +{ + CCC_CHECK(!m_blocks.empty(), "RBRAC symbol without a matching LBRAC symbol."); + + std::vector& variables = m_blocks.back(); + for(LocalVariableHandle local_variable_handle : variables) { + if(LocalVariable* local_variable = m_database.local_variables.symbol_from_handle(local_variable_handle)) { + local_variable->live_range.high = m_source_file.address().value + end_offset; + } + } + + m_blocks.pop_back(); + + return Result(); +} + +Result LocalSymbolTableAnalyser::finish() +{ + CCC_CHECK(m_state != IN_FUNCTION_BEGINNING, + "Unexpected end of symbol table for '%s'.", m_source_file.name().c_str()); + + if(m_current_function) { + Result result = function_end(); + CCC_RETURN_IF_ERROR(result); + } + + m_source_file.set_functions(std::move(m_functions), m_database); + m_source_file.set_global_variables(std::move(m_global_variables), m_database); + + return Result(); +} + +Result LocalSymbolTableAnalyser::create_function(const char* mangled_name, Address address) +{ + if(m_current_function) { + Result result = function_end(); + CCC_RETURN_IF_ERROR(result); + } + + Result function = m_database.functions.create_symbol( + mangled_name, m_context.group.source, m_context.group.module_symbol, address, m_context.importer_flags, m_context.demangler); + CCC_RETURN_IF_ERROR(function); + CCC_ASSERT(*function); + m_current_function = *function; + + m_functions.emplace_back(m_current_function->handle()); + + m_state = IN_FUNCTION_BEGINNING; + + if(!m_next_relative_path.empty() && m_current_function->relative_path != m_source_file.command_line_path) { + m_current_function->relative_path = m_next_relative_path; + } + + return Result(); +} + +std::optional symbol_class_to_global_variable_location(SymbolClass symbol_class) +{ + std::optional location; + switch(symbol_class) { + case SymbolClass::NIL: location = GlobalStorageLocation::NIL; break; + case SymbolClass::DATA: location = GlobalStorageLocation::DATA; break; + case SymbolClass::BSS: location = GlobalStorageLocation::BSS; break; + case SymbolClass::ABS: location = GlobalStorageLocation::ABS; break; + case SymbolClass::SDATA: location = GlobalStorageLocation::SDATA; break; + case SymbolClass::SBSS: location = GlobalStorageLocation::SBSS; break; + case SymbolClass::RDATA: location = GlobalStorageLocation::RDATA; break; + case SymbolClass::COMMON: location = GlobalStorageLocation::COMMON; break; + case SymbolClass::SCOMMON: location = GlobalStorageLocation::SCOMMON; break; + case SymbolClass::SUNDEFINED: location = GlobalStorageLocation::SUNDEFINED; break; + default: {} + } + return location; +} + +} diff --git a/3rdparty/ccc/src/ccc/mdebug_analysis.h b/3rdparty/ccc/src/ccc/mdebug_analysis.h new file mode 100644 index 0000000000..61cc9b0184 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_analysis.h @@ -0,0 +1,99 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "importer_flags.h" +#include "mdebug_section.h" +#include "mdebug_symbols.h" +#include "stabs.h" +#include "stabs_to_ast.h" +#include "symbol_database.h" + +namespace ccc::mdebug { + +struct AnalysisContext { + const mdebug::SymbolTableReader* reader = nullptr; + const std::map* external_functions = nullptr; + const std::map* external_globals = nullptr; + SymbolGroup group; + u32 importer_flags = NO_IMPORTER_FLAGS; + DemanglerFunctions demangler; +}; + +class LocalSymbolTableAnalyser { +public: + LocalSymbolTableAnalyser(SymbolDatabase& database, const StabsToAstState& stabs_to_ast_state, const AnalysisContext& context, SourceFile& source_file) + : m_database(database) + , m_context(context) + , m_stabs_to_ast_state(stabs_to_ast_state) + , m_source_file(source_file) {} + + // Functions for processing individual symbols. + // + // In most cases these symbols will appear in the following order: + // PROC TEXT + // ... line numbers ... ($LM) + // END TEXT + // LABEL TEXT FUN + // ... parameters ... + // ... blocks ... (... local variables ... LBRAC ... subblocks ... RBRAC) + // NIL NIL FUN + // + // For some compiler versions the symbols can appear in this order: + // LABEL TEXT FUN + // ... parameters ... + // first line number ($LM1) + // PROC TEXT + // ... line numbers ... ($LM) + // END TEXT + // ... blocks ... (... local variables ... LBRAC ... subblocks ... RBRAC) + Result stab_magic(const char* magic); + Result source_file(const char* path, Address text_address); + Result data_type(const ParsedSymbol& symbol); + Result global_variable( + const char* mangled_name, Address address, const StabsType& type, bool is_static, GlobalStorageLocation location); + Result sub_source_file(const char* name, Address text_address); + Result procedure( + const char* mangled_name, Address address, const ProcedureDescriptor* procedure_descriptor, bool is_static); + Result label(const char* label, Address address, s32 line_number); + Result text_end(const char* name, s32 function_size); + Result function(const char* mangled_name, const StabsType& return_type, Address address); + Result function_end(); + Result parameter( + const char* name, const StabsType& type, bool is_stack, s32 value, bool is_by_reference); + Result local_variable( + const char* name, const StabsType& type, u32 value, StabsSymbolDescriptor desc, SymbolClass sclass); + Result lbrac(s32 begin_offset); + Result rbrac(s32 end_offset); + + Result finish(); + + Result create_function(const char* mangled_name, Address address); + +protected: + enum AnalysisState { + NOT_IN_FUNCTION, + IN_FUNCTION_BEGINNING, + IN_FUNCTION_END + }; + + SymbolDatabase& m_database; + const AnalysisContext& m_context; + const StabsToAstState& m_stabs_to_ast_state; + + AnalysisState m_state = NOT_IN_FUNCTION; + SourceFile& m_source_file; + std::vector m_functions; + std::vector m_global_variables; + Function* m_current_function = nullptr; + std::vector m_current_parameter_variables; + std::vector m_current_local_variables; + std::vector> m_blocks; + std::vector m_pending_local_variables; + std::string m_next_relative_path; +}; + +std::optional symbol_class_to_global_variable_location(SymbolClass symbol_class); + +}; diff --git a/3rdparty/ccc/src/ccc/mdebug_importer.cpp b/3rdparty/ccc/src/ccc/mdebug_importer.cpp new file mode 100644 index 0000000000..3038aede89 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_importer.cpp @@ -0,0 +1,668 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "mdebug_importer.h" + +namespace ccc::mdebug { + +static Result resolve_type_names( + SymbolDatabase& database, const SymbolGroup& group, u32 importer_flags); +static Result resolve_type_name( + ast::TypeName& type_name, + SymbolDatabase& database, + const SymbolGroup& group, + u32 importer_flags); +static void compute_size_bytes(ast::Node& node, SymbolDatabase& database); +static void detect_duplicate_functions(SymbolDatabase& database, const SymbolGroup& group); +static void detect_fake_functions(SymbolDatabase& database, const std::map& external_functions, const SymbolGroup& group); +static void destroy_optimized_out_functions( + SymbolDatabase& database, const SymbolGroup& group); + +Result import_symbol_table( + SymbolDatabase& database, + std::span elf, + s32 section_offset, + const SymbolGroup& group, + u32 importer_flags, + const DemanglerFunctions& demangler, + const std::atomic_bool* interrupt) +{ + SymbolTableReader reader; + + Result reader_result = reader.init(elf, section_offset); + CCC_RETURN_IF_ERROR(reader_result); + + Result> external_symbols = reader.parse_external_symbols(); + CCC_RETURN_IF_ERROR(external_symbols); + + // The addresses of the global variables aren't present in the local symbol + // table, so here we extract them from the external table. In addition, for + // some games we need to cross reference the function symbols in the local + // symbol table with the entries in the external symbol table. + std::map external_functions; + std::map external_globals; + for(const mdebug::Symbol& external : *external_symbols) { + if(external.symbol_type == mdebug::SymbolType::PROC) { + external_functions[external.value] = &external; + } + + if(external.symbol_type == mdebug::SymbolType::GLOBAL + && (external.symbol_class != mdebug::SymbolClass::UNDEFINED)) { + external_globals[external.string] = &external; + } + } + + // Bundle together some unchanging state to pass to import_files. + AnalysisContext context; + context.reader = &reader; + context.external_functions = &external_functions; + context.external_globals = &external_globals; + context.group = group; + context.importer_flags = importer_flags; + context.demangler = demangler; + + Result result = import_files(database, context, interrupt); + CCC_RETURN_IF_ERROR(result); + + return Result(); +} + +Result import_files(SymbolDatabase& database, const AnalysisContext& context, const std::atomic_bool* interrupt) +{ + Result file_count = context.reader->file_count(); + CCC_RETURN_IF_ERROR(file_count); + + for(s32 i = 0; i < *file_count; i++) { + if(interrupt && *interrupt) { + return CCC_FAILURE("Operation interrupted by user."); + } + + Result file = context.reader->parse_file(i); + CCC_RETURN_IF_ERROR(file); + + Result result = import_file(database, *file, context); + CCC_RETURN_IF_ERROR(result); + } + + // The files field may be modified by further analysis passes, so we + // need to save this information here. + for(DataType& data_type : database.data_types) { + if(context.group.is_in_group(data_type) && data_type.files.size() == 1) { + data_type.only_defined_in_single_translation_unit = true; + } + } + + // Lookup data types and store data type handles in type names. + Result type_name_result = resolve_type_names(database, context.group, context.importer_flags); + CCC_RETURN_IF_ERROR(type_name_result); + + // Compute the size in bytes of all the AST nodes. + database.for_each_symbol([&](ccc::Symbol& symbol) { + if(context.group.is_in_group(symbol) && symbol.type()) { + compute_size_bytes(*symbol.type(), database); + } + }); + + // Propagate the size information to the global variable symbols. + for(GlobalVariable& global_variable : database.global_variables) { + if(global_variable.type() && global_variable.type()->size_bytes > -1) { + global_variable.set_size((u32) global_variable.type()->size_bytes); + } + } + + // Propagate the size information to the static local variable symbols. + for(LocalVariable& local_variable : database.local_variables) { + bool is_static_local = std::holds_alternative(local_variable.storage); + if(is_static_local && local_variable.type() && local_variable.type()->size_bytes > -1) { + local_variable.set_size((u32) local_variable.type()->size_bytes); + } + } + + // Some games (e.g. Jet X2O) have multiple function symbols across different + // translation units with the same name and address. + if(context.importer_flags & UNIQUE_FUNCTIONS) { + detect_duplicate_functions(database, context.group); + } + + // If multiple functions appear at the same address, discard the addresses + // of all of them except the real one. + if(context.external_functions) { + detect_fake_functions(database, *context.external_functions, context.group); + } + + // Remove functions with no address. If there are any such functions, this + // will invalidate all pointers to symbols. + if(context.importer_flags & NO_OPTIMIZED_OUT_FUNCTIONS) { + destroy_optimized_out_functions(database, context.group); + } + + return Result(); +} + +Result import_file(SymbolDatabase& database, const mdebug::File& input, const AnalysisContext& context) +{ + // Parse the stab strings into a data structure that's vaguely + // one-to-one with the text-based representation. + u32 importer_flags_for_this_file = context.importer_flags; + Result> symbols = parse_symbols(input.symbols, importer_flags_for_this_file); + CCC_RETURN_IF_ERROR(symbols); + + // In stabs, types can be referenced by their number from other stabs, + // so here we build a map of type numbers to the parsed types. + std::map stabs_types; + for(const ParsedSymbol& symbol : *symbols) { + if(symbol.type == ParsedSymbolType::NAME_COLON_TYPE) { + symbol.name_colon_type.type->enumerate_numbered_types(stabs_types); + } + } + + Result source_file = database.source_files.create_symbol( + input.full_path, input.address, context.group.source, context.group.module_symbol); + CCC_RETURN_IF_ERROR(source_file); + + (*source_file)->working_dir = input.working_dir; + (*source_file)->command_line_path = input.command_line_path; + + // Sometimes the INFO symbols contain information about what toolchain + // version was used for building the executable. + for(const mdebug::Symbol& symbol : input.symbols) { + if(symbol.symbol_class == mdebug::SymbolClass::INFO && strcmp(symbol.string, "@stabs") != 0) { + (*source_file)->toolchain_version_info.emplace(symbol.string); + } + } + + StabsToAstState stabs_to_ast_state; + stabs_to_ast_state.file_handle = (*source_file)->handle().value; + stabs_to_ast_state.stabs_types = &stabs_types; + stabs_to_ast_state.importer_flags = importer_flags_for_this_file; + stabs_to_ast_state.demangler = context.demangler; + + // Convert the parsed stabs symbols to a more standard C AST. + LocalSymbolTableAnalyser analyser(database, stabs_to_ast_state, context, **source_file); + for(const ParsedSymbol& symbol : *symbols) { + if(symbol.duplicate) { + continue; + } + + switch(symbol.type) { + case ParsedSymbolType::NAME_COLON_TYPE: { + switch(symbol.name_colon_type.descriptor) { + case StabsSymbolDescriptor::LOCAL_FUNCTION: + case StabsSymbolDescriptor::GLOBAL_FUNCTION: { + const char* name = symbol.name_colon_type.name.c_str(); + const StabsType& type = *symbol.name_colon_type.type.get(); + Result result = analyser.function(name, type, symbol.raw->value); + CCC_RETURN_IF_ERROR(result); + break; + } + case StabsSymbolDescriptor::REFERENCE_PARAMETER_A: + case StabsSymbolDescriptor::REGISTER_PARAMETER: + case StabsSymbolDescriptor::VALUE_PARAMETER: + case StabsSymbolDescriptor::REFERENCE_PARAMETER_V: { + const char* name = symbol.name_colon_type.name.c_str(); + const StabsType& type = *symbol.name_colon_type.type.get(); + bool is_stack_variable = symbol.name_colon_type.descriptor == StabsSymbolDescriptor::VALUE_PARAMETER; + bool is_by_reference = symbol.name_colon_type.descriptor == StabsSymbolDescriptor::REFERENCE_PARAMETER_A + || symbol.name_colon_type.descriptor == StabsSymbolDescriptor::REFERENCE_PARAMETER_V; + + Result result = analyser.parameter(name, type, is_stack_variable, symbol.raw->value, is_by_reference); + CCC_RETURN_IF_ERROR(result); + break; + } + case StabsSymbolDescriptor::REGISTER_VARIABLE: + case StabsSymbolDescriptor::LOCAL_VARIABLE: + case StabsSymbolDescriptor::STATIC_LOCAL_VARIABLE: { + const char* name = symbol.name_colon_type.name.c_str(); + const StabsType& type = *symbol.name_colon_type.type.get(); + Result result = analyser.local_variable( + name, type, symbol.raw->value, symbol.name_colon_type.descriptor, symbol.raw->symbol_class); + CCC_RETURN_IF_ERROR(result); + break; + } + case StabsSymbolDescriptor::GLOBAL_VARIABLE: + case StabsSymbolDescriptor::STATIC_GLOBAL_VARIABLE: { + const char* name = symbol.name_colon_type.name.c_str(); + u32 address = -1; + std::optional location = + symbol_class_to_global_variable_location(symbol.raw->symbol_class); + if(symbol.name_colon_type.descriptor == StabsSymbolDescriptor::GLOBAL_VARIABLE) { + // The address for non-static global variables is + // only stored in the external symbol table (and + // the ELF symbol table), so we pull that + // information in here. + if(context.external_globals) { + auto global_symbol = context.external_globals->find(symbol.name_colon_type.name); + if(global_symbol != context.external_globals->end()) { + address = (u32) global_symbol->second->value; + location = symbol_class_to_global_variable_location(global_symbol->second->symbol_class); + } + } + } else { + // And for static global variables it's just stored + // in the local symbol table. + address = (u32) symbol.raw->value; + } + CCC_CHECK(location.has_value(), "Invalid global variable location.") + const StabsType& type = *symbol.name_colon_type.type.get(); + bool is_static = symbol.name_colon_type.descriptor == StabsSymbolDescriptor::STATIC_GLOBAL_VARIABLE; + Result result = analyser.global_variable(name, address, type, is_static, *location); + CCC_RETURN_IF_ERROR(result); + break; + } + case StabsSymbolDescriptor::TYPE_NAME: + case StabsSymbolDescriptor::ENUM_STRUCT_OR_TYPE_TAG: { + Result result = analyser.data_type(symbol); + CCC_RETURN_IF_ERROR(result); + break; + } + } + break; + } + case ParsedSymbolType::SOURCE_FILE: { + Result result = analyser.source_file(symbol.raw->string, symbol.raw->value); + CCC_RETURN_IF_ERROR(result); + break; + } + case ParsedSymbolType::SUB_SOURCE_FILE: { + Result result = analyser.sub_source_file(symbol.raw->string, symbol.raw->value); + CCC_RETURN_IF_ERROR(result); + break; + } + case ParsedSymbolType::LBRAC: { + Result result = analyser.lbrac(symbol.raw->value); + CCC_RETURN_IF_ERROR(result); + break; + } + case ParsedSymbolType::RBRAC: { + Result result = analyser.rbrac(symbol.raw->value); + CCC_RETURN_IF_ERROR(result); + break; + } + case ParsedSymbolType::FUNCTION_END: { + Result result = analyser.function_end(); + CCC_RETURN_IF_ERROR(result); + break; + } + case ParsedSymbolType::NON_STABS: { + if(symbol.raw->symbol_class == mdebug::SymbolClass::TEXT) { + if(symbol.raw->symbol_type == mdebug::SymbolType::PROC) { + Result result = analyser.procedure(symbol.raw->string, symbol.raw->value, symbol.raw->procedure_descriptor, false); + CCC_RETURN_IF_ERROR(result); + } else if(symbol.raw->symbol_type == mdebug::SymbolType::STATICPROC) { + Result result = analyser.procedure(symbol.raw->string, symbol.raw->value, symbol.raw->procedure_descriptor, true); + CCC_RETURN_IF_ERROR(result); + } else if(symbol.raw->symbol_type == mdebug::SymbolType::LABEL) { + Result result = analyser.label(symbol.raw->string, symbol.raw->value, symbol.raw->index); + CCC_RETURN_IF_ERROR(result); + } else if(symbol.raw->symbol_type == mdebug::SymbolType::END) { + Result result = analyser.text_end(symbol.raw->string, symbol.raw->value); + CCC_RETURN_IF_ERROR(result); + } + } + break; + } + } + } + + Result result = analyser.finish(); + CCC_RETURN_IF_ERROR(result); + + return Result(); +} + +static Result resolve_type_names( + SymbolDatabase& database, const SymbolGroup& group, u32 importer_flags) +{ + Result result; + database.for_each_symbol([&](ccc::Symbol& symbol) { + if(group.is_in_group(symbol) && symbol.type()) { + ast::for_each_node(*symbol.type(), ast::PREORDER_TRAVERSAL, [&](ast::Node& node) { + if(node.descriptor == ast::TYPE_NAME) { + Result type_name_result = resolve_type_name(node.as(), database, group, importer_flags); + if(!type_name_result.success()) { + result = std::move(type_name_result); + } + } + return ast::EXPLORE_CHILDREN; + }); + } + }); + return result; +} + +static Result resolve_type_name( + ast::TypeName& type_name, + SymbolDatabase& database, + const SymbolGroup& group, + u32 importer_flags) +{ + ast::TypeName::UnresolvedStabs* unresolved_stabs = type_name.unresolved_stabs.get(); + if(!unresolved_stabs) { + return Result(); + } + + // Lookup the type by its STABS type number. This path ensures that the + // correct type is found even if multiple types have the same name. + if(unresolved_stabs->referenced_file_handle != (u32) -1 && unresolved_stabs->stabs_type_number.valid()) { + const SourceFile* source_file = database.source_files.symbol_from_handle(unresolved_stabs->referenced_file_handle); + CCC_ASSERT(source_file); + auto handle = source_file->stabs_type_number_to_handle.find(unresolved_stabs->stabs_type_number); + if(handle != source_file->stabs_type_number_to_handle.end()) { + type_name.data_type_handle = handle->second.value; + type_name.is_forward_declared = false; + type_name.unresolved_stabs.reset(); + return Result(); + } + } + + // Looking up the type by its STABS type number failed, so look for it by + // its name instead. This happens when a type is forward declared but not + // defined in a given translation unit. + if(!unresolved_stabs->type_name.empty()) { + for(auto& name_handle : database.data_types.handles_from_name(unresolved_stabs->type_name)) { + DataType* data_type = database.data_types.symbol_from_handle(name_handle.second); + if(data_type && group.is_in_group(*data_type)) { + type_name.data_type_handle = name_handle.second.value; + type_name.is_forward_declared = true; + type_name.unresolved_stabs.reset(); + return Result(); + } + } + } + + // If this branch is taken it means the type name was probably from an + // automatically generated member function of a nested struct trying to + // reference the struct (for the this parameter). We shouldn't create a + // forward declared type in this case. + if(type_name.source == ast::TypeNameSource::UNNAMED_THIS) { + return Result(); + } + + // Type lookup failed. This happens when a type is forward declared in a + // translation unit with symbols but is not defined in one. We haven't + // already created a forward declared type, so we create one now. + std::unique_ptr forward_declared_node; + if(unresolved_stabs->type.has_value()) { + switch(*unresolved_stabs->type) { + case ast::ForwardDeclaredType::STRUCT: { + std::unique_ptr node = std::make_unique(); + node->is_struct = true; + forward_declared_node = std::move(node); + break; + } + case ast::ForwardDeclaredType::UNION: { + std::unique_ptr node = std::make_unique(); + node->is_struct = false; + forward_declared_node = std::move(node); + break; + } + case ast::ForwardDeclaredType::ENUM: { + std::unique_ptr node = std::make_unique(); + forward_declared_node = std::move(node); + break; + } + } + } + + if(forward_declared_node) { + Result forward_declared_type = database.data_types.create_symbol( + unresolved_stabs->type_name, group.source, group.module_symbol); + CCC_RETURN_IF_ERROR(forward_declared_type); + + (*forward_declared_type)->set_type(std::move(forward_declared_node)); + (*forward_declared_type)->not_defined_in_any_translation_unit = true; + + type_name.data_type_handle = (*forward_declared_type)->handle().value; + type_name.is_forward_declared = true; + type_name.unresolved_stabs.reset(); + + return Result(); + } + + const char* error_message = "Unresolved %s type name '%s' with STABS type number (%d,%d)."; + if(importer_flags & STRICT_PARSING) { + return CCC_FAILURE(error_message, + ast::type_name_source_to_string(type_name.source), + type_name.unresolved_stabs->type_name.c_str(), + type_name.unresolved_stabs->stabs_type_number.file, + type_name.unresolved_stabs->stabs_type_number.type); + } else { + CCC_WARN(error_message, + ast::type_name_source_to_string(type_name.source), + type_name.unresolved_stabs->type_name.c_str(), + type_name.unresolved_stabs->stabs_type_number.file, + type_name.unresolved_stabs->stabs_type_number.type); + } + + return Result(); +} + +static void compute_size_bytes(ast::Node& node, SymbolDatabase& database) +{ + for_each_node(node, ast::POSTORDER_TRAVERSAL, [&](ast::Node& node) { + // Skip nodes that have already been processed. + if(node.size_bytes > -1 || node.cannot_compute_size) { + return ast::EXPLORE_CHILDREN; + } + + // Can't compute size recursively. + node.cannot_compute_size = true; + + switch(node.descriptor) { + case ast::ARRAY: { + ast::Array& array = node.as(); + if(array.element_type->size_bytes > -1) { + array.size_bytes = array.element_type->size_bytes * array.element_count; + } + break; + } + case ast::BITFIELD: { + break; + } + case ast::BUILTIN: { + ast::BuiltIn& built_in = node.as(); + built_in.size_bytes = builtin_class_size(built_in.bclass); + break; + } + case ast::FUNCTION: { + break; + } + case ast::ENUM: { + node.size_bytes = 4; + break; + } + case ast::ERROR_NODE: { + break; + } + case ast::STRUCT_OR_UNION: { + node.size_bytes = node.size_bits / 8; + break; + } + case ast::POINTER_OR_REFERENCE: { + node.size_bytes = 4; + break; + } + case ast::POINTER_TO_DATA_MEMBER: { + break; + } + case ast::TYPE_NAME: { + ast::TypeName& type_name = node.as(); + DataType* resolved_type = database.data_types.symbol_from_handle(type_name.data_type_handle_unless_forward_declared()); + if(resolved_type) { + ast::Node* resolved_node = resolved_type->type(); + CCC_ASSERT(resolved_node); + if(resolved_node->size_bytes < 0 && !resolved_node->cannot_compute_size) { + compute_size_bytes(*resolved_node, database); + } + type_name.size_bytes = resolved_node->size_bytes; + } + break; + } + } + + if(node.size_bytes > -1) { + node.cannot_compute_size = false; + } + + return ast::EXPLORE_CHILDREN; + }); +} + +static void detect_duplicate_functions(SymbolDatabase& database, const SymbolGroup& group) +{ + std::vector duplicate_functions; + + for(Function& test_function : database.functions) { + if(!test_function.address().valid() && !group.is_in_group(test_function)) { + continue; + } + + // Find cases where there are two or more functions at the same address. + auto functions_with_same_address = database.functions.handles_from_starting_address(test_function.address()); + if(functions_with_same_address.begin() == functions_with_same_address.end()) { + continue; + } + if(++functions_with_same_address.begin() == functions_with_same_address.end()) { + continue; + } + + // Try to figure out the address of the translation unit which the + // version of the function that actually ended up in the linked binary + // comes from. We can't just check which source file the symbol comes + // from because it may be present in multiple. + u32 source_file_address = UINT32_MAX; + for(SourceFile& source_file : database.source_files) { + if(source_file.address() < test_function.address()) { + source_file_address = std::min(source_file.address().value, source_file_address); + } + } + + if(source_file_address == UINT32_MAX) { + continue; + } + + // Remove the addresses from all the matching symbols from other + // translation units. + FunctionHandle best_handle; + u32 best_offset = UINT32_MAX; + for(const auto& [address, handle] : functions_with_same_address) { + ccc::Function* function = database.functions.symbol_from_handle(handle); + if(!function || !group.is_in_group(*function) || function->mangled_name() != test_function.mangled_name()) { + continue; + } + + if(address - source_file_address < best_offset) { + if(best_handle.valid()) { + duplicate_functions.emplace_back(best_handle); + } + best_handle = function->handle(); + best_offset = address - source_file_address; + } else { + duplicate_functions.emplace_back(function->handle()); + } + } + + for(FunctionHandle duplicate_function : duplicate_functions) { + database.functions.move_symbol(duplicate_function, Address()); + } + duplicate_functions.clear(); + } +} + +static void detect_fake_functions(SymbolDatabase& database, const std::map& external_functions, const SymbolGroup& group) +{ + // Find cases where multiple fake function symbols were emitted for a given + // address and cross-reference with the external symbol table to try and + // find which one is the real one. + s32 fake_function_count = 0; + for(Function& function : database.functions) { + if(!function.address().valid() || !group.is_in_group(function)) { + continue; + } + + // Find cases where there are two or more functions at the same address. + auto functions_with_same_address = database.functions.handles_from_starting_address(function.address()); + if(functions_with_same_address.begin() == functions_with_same_address.end()) { + continue; + } + if(++functions_with_same_address.begin() == functions_with_same_address.end()) { + continue; + } + + auto external_function = external_functions.find(function.address().value); + if(external_function == external_functions.end() || strcmp(function.mangled_name().c_str(), external_function->second->string) != 0) { + database.functions.move_symbol(function.handle(), Address()); + + if(fake_function_count < 10) { + CCC_WARN("Discarding address of function symbol '%s' as it is probably incorrect.", function.mangled_name().c_str()); + } else if(fake_function_count == 10) { + CCC_WARN("Discarding more addresses of function symbols."); + } + + fake_function_count++; + } + } +} + +static void destroy_optimized_out_functions( + SymbolDatabase& database, const SymbolGroup& group) +{ + bool marked = false; + + for(Function& function : database.functions) { + if(group.is_in_group(function) && !function.address().valid()) { + function.mark_for_destruction(); + marked = true; + } + } + + if(marked) { + // This will invalidate all pointers to symbols in the database. + database.destroy_marked_symbols(); + } +} + +void fill_in_pointers_to_member_function_definitions(SymbolDatabase& database) +{ + // Fill in pointers from member function declaration to corresponding definitions. + for(Function& function : database.functions) { + const std::string& qualified_name = function.name(); + std::string::size_type name_separator_pos = qualified_name.find_last_of("::"); + if(name_separator_pos == std::string::npos || name_separator_pos < 2) { + continue; + } + + std::string function_name = qualified_name.substr(name_separator_pos + 1); + + // This won't work for some template types. + std::string::size_type type_separator_pos = qualified_name.find_last_of("::", name_separator_pos - 2); + std::string type_name; + if(type_separator_pos != std::string::npos) { + type_name = qualified_name.substr(type_separator_pos + 1, name_separator_pos - type_separator_pos - 2); + } else { + type_name = qualified_name.substr(0, name_separator_pos - 1); + } + + for(const auto& name_handle : database.data_types.handles_from_name(type_name)) { + DataType* data_type = database.data_types.symbol_from_handle(name_handle.second); + if(!data_type || !data_type->type() || data_type->type()->descriptor != ast::STRUCT_OR_UNION) { + continue; + } + + ast::StructOrUnion& struct_or_union = data_type->type()->as(); + for(std::unique_ptr& declaration : struct_or_union.member_functions) { + if(declaration->name == function_name) { + declaration->as().definition_handle = function.handle().value; + function.is_member_function_ish = true; + break; + } + } + + if(function.is_member_function_ish) { + break; + } + } + } +} + +} diff --git a/3rdparty/ccc/src/ccc/mdebug_importer.h b/3rdparty/ccc/src/ccc/mdebug_importer.h new file mode 100644 index 0000000000..cec65497c9 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_importer.h @@ -0,0 +1,31 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +#include "mdebug_analysis.h" +#include "mdebug_section.h" +#include "symbol_database.h" + +namespace ccc::mdebug { + +// Perform all the main analysis passes on the mdebug symbol table and convert +// it to a set of C++ ASTs. +Result import_symbol_table( + SymbolDatabase& database, + std::span elf, + s32 section_offset, + const SymbolGroup& group, + u32 importer_flags, + const DemanglerFunctions& demangler, + const std::atomic_bool* interrupt); +Result import_files(SymbolDatabase& database, const AnalysisContext& context, const std::atomic_bool* interrupt); +Result import_file(SymbolDatabase& database, const mdebug::File& input, const AnalysisContext& context); + +// Try to add pointers from member function declarations to their definitions +// using a heuristic. +void fill_in_pointers_to_member_function_definitions(SymbolDatabase& database); + +} diff --git a/3rdparty/ccc/src/ccc/mdebug_section.cpp b/3rdparty/ccc/src/ccc/mdebug_section.cpp new file mode 100644 index 0000000000..676303aad2 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_section.cpp @@ -0,0 +1,474 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "mdebug_section.h" + +namespace ccc::mdebug { + +// MIPS debug symbol table headers. +// See include/coff/sym.h from GNU binutils for more information. + +CCC_PACKED_STRUCT(SymbolicHeader, + /* 0x00 */ s16 magic; + /* 0x02 */ s16 version_stamp; + /* 0x04 */ s32 line_number_count; + /* 0x08 */ s32 line_numbers_size_bytes; + /* 0x0c */ s32 line_numbers_offset; + /* 0x10 */ s32 dense_numbers_count; + /* 0x14 */ s32 dense_numbers_offset; + /* 0x18 */ s32 procedure_descriptor_count; + /* 0x1c */ s32 procedure_descriptors_offset; + /* 0x20 */ s32 local_symbol_count; + /* 0x24 */ s32 local_symbols_offset; + /* 0x28 */ s32 optimization_symbols_count; + /* 0x2c */ s32 optimization_symbols_offset; + /* 0x30 */ s32 auxiliary_symbol_count; + /* 0x34 */ s32 auxiliary_symbols_offset; + /* 0x38 */ s32 local_strings_size_bytes; + /* 0x3c */ s32 local_strings_offset; + /* 0x40 */ s32 external_strings_size_bytes; + /* 0x44 */ s32 external_strings_offset; + /* 0x48 */ s32 file_descriptor_count; + /* 0x4c */ s32 file_descriptors_offset; + /* 0x50 */ s32 relative_file_descriptor_count; + /* 0x54 */ s32 relative_file_descriptors_offset; + /* 0x58 */ s32 external_symbols_count; + /* 0x5c */ s32 external_symbols_offset; +) + +CCC_PACKED_STRUCT(FileDescriptor, + /* 0x00 */ u32 address; + /* 0x04 */ s32 file_path_string_offset; + /* 0x08 */ s32 strings_offset; + /* 0x0c */ s32 cb_ss; + /* 0x10 */ s32 isym_base; + /* 0x14 */ s32 symbol_count; + /* 0x18 */ s32 line_number_entry_index_base; + /* 0x1c */ s32 cline; + /* 0x20 */ s32 optimization_entry_index_base; + /* 0x24 */ s32 copt; + /* 0x28 */ u16 ipd_first; + /* 0x2a */ u16 procedure_descriptor_count; + /* 0x2c */ s32 iaux_base; + /* 0x30 */ s32 caux; + /* 0x34 */ s32 rfd_base; + /* 0x38 */ s32 crfd; + /* 0x3c */ u32 lang : 5; + /* 0x3c */ u32 f_merge : 1; + /* 0x3c */ u32 f_readin : 1; + /* 0x3c */ u32 f_big_endian : 1; + /* 0x3c */ u32 reserved_1 : 22; + /* 0x40 */ s32 line_number_offset; + /* 0x44 */ s32 cb_line; +) +static_assert(sizeof(FileDescriptor) == 0x48); + +CCC_PACKED_STRUCT(SymbolHeader, + /* 0x0 */ u32 iss; + /* 0x4 */ u32 value; + /* 0x8 */ u32 st : 6; + /* 0x8 */ u32 sc : 5; + /* 0x8 */ u32 reserved : 1; + /* 0x8 */ u32 index : 20; +) +static_assert(sizeof(SymbolHeader) == 0xc); + +CCC_PACKED_STRUCT(ExternalSymbolHeader, + /* 0x0 */ u16 flags; + /* 0x2 */ s16 ifd; + /* 0x4 */ SymbolHeader symbol; +) +static_assert(sizeof(ExternalSymbolHeader) == 0x10); + +static void print_symbol(FILE* out, const Symbol& symbol); +static void print_procedure_descriptor(FILE* out, const ProcedureDescriptor& procedure_descriptor); +static Result get_corruption_fixing_fudge_offset(s32 section_offset, const SymbolicHeader& hdrr); +static Result get_symbol(const SymbolHeader& header, std::span elf, s32 strings_offset); + +Result SymbolTableReader::init(std::span elf, s32 section_offset) +{ + m_elf = elf; + m_section_offset = section_offset; + + m_hdrr = get_packed(m_elf, m_section_offset); + CCC_CHECK(m_hdrr != nullptr, "MIPS debug section header out of bounds."); + CCC_CHECK(m_hdrr->magic == 0x7009, "Invalid symbolic header."); + + Result fudge_offset = get_corruption_fixing_fudge_offset(m_section_offset, *m_hdrr); + CCC_RETURN_IF_ERROR(fudge_offset); + m_fudge_offset = *fudge_offset; + + m_ready = true; + + return Result(); +} + +s32 SymbolTableReader::file_count() const +{ + CCC_ASSERT(m_ready); + return m_hdrr->file_descriptor_count; +} + +Result SymbolTableReader::parse_file(s32 index) const +{ + CCC_ASSERT(m_ready); + + File file; + + u64 fd_offset = m_hdrr->file_descriptors_offset + index * sizeof(FileDescriptor); + const FileDescriptor* fd_header = get_packed(m_elf, fd_offset + m_fudge_offset); + CCC_CHECK(fd_header != nullptr, "MIPS debug file descriptor out of bounds."); + CCC_CHECK(fd_header->f_big_endian == 0, "Not little endian or bad file descriptor table."); + + file.address = fd_header->address; + + s32 rel_raw_path_offset = fd_header->strings_offset + fd_header->file_path_string_offset; + s32 raw_path_offset = m_hdrr->local_strings_offset + rel_raw_path_offset + m_fudge_offset; + const char* command_line_path = get_string(m_elf, raw_path_offset); + if(command_line_path) { + file.command_line_path = command_line_path; + } + + // Parse local symbols. + for(s64 j = 0; j < fd_header->symbol_count; j++) { + u64 rel_symbol_offset = (fd_header->isym_base + j) * sizeof(SymbolHeader); + u64 symbol_offset = m_hdrr->local_symbols_offset + rel_symbol_offset + m_fudge_offset; + const SymbolHeader* symbol_header = get_packed(m_elf, symbol_offset); + CCC_CHECK(symbol_header != nullptr, "Symbol header out of bounds."); + + s32 strings_offset = m_hdrr->local_strings_offset + fd_header->strings_offset + m_fudge_offset; + Result sym = get_symbol(*symbol_header, m_elf, strings_offset); + CCC_RETURN_IF_ERROR(sym); + + bool string_offset_equal = (s32) symbol_header->iss == fd_header->file_path_string_offset; + if(file.working_dir.empty() && string_offset_equal && sym->is_stabs() && sym->code() == N_SO && file.symbols.size() > 2) { + const Symbol& working_dir = file.symbols.back(); + if(working_dir.is_stabs() && working_dir.code() == N_SO) { + file.working_dir = working_dir.string; + } + } + + file.symbols.emplace_back(std::move(*sym)); + } + + // Parse procedure descriptors. + for(s64 i = 0; i < fd_header->procedure_descriptor_count; i++) { + u64 rel_procedure_offset = (fd_header->ipd_first + i) * sizeof(ProcedureDescriptor); + u64 procedure_offset = m_hdrr->procedure_descriptors_offset + rel_procedure_offset + m_fudge_offset; + const ProcedureDescriptor* procedure_descriptor = get_packed(m_elf, procedure_offset); + CCC_CHECK(procedure_descriptor != nullptr, "Procedure descriptor out of bounds."); + + CCC_CHECK(procedure_descriptor->symbol_index < file.symbols.size(), "Symbol index out of bounds."); + file.symbols[procedure_descriptor->symbol_index].procedure_descriptor = procedure_descriptor; + } + + + file.full_path = merge_paths(file.working_dir, file.command_line_path); + + return file; +} + +Result> SymbolTableReader::parse_external_symbols() const +{ + CCC_ASSERT(m_ready); + + std::vector external_symbols; + for(s64 i = 0; i < m_hdrr->external_symbols_count; i++) { + u64 sym_offset = m_hdrr->external_symbols_offset + i * sizeof(ExternalSymbolHeader); + const ExternalSymbolHeader* external_header = get_packed(m_elf, sym_offset + m_fudge_offset); + CCC_CHECK(external_header != nullptr, "External header out of bounds."); + + Result sym = get_symbol(external_header->symbol, m_elf, m_hdrr->external_strings_offset + m_fudge_offset); + CCC_RETURN_IF_ERROR(sym); + external_symbols.emplace_back(std::move(*sym)); + } + + return external_symbols; +} + +void SymbolTableReader::print_header(FILE* dest) const +{ + CCC_ASSERT(m_ready); + + fprintf(dest, "Symbolic Header, magic = %hx, vstamp = %hx:\n", + (u16) m_hdrr->magic, + (u16) m_hdrr->version_stamp); + fprintf(dest, "\n"); + fprintf(dest, " Offset Size (Bytes) Count\n"); + fprintf(dest, " ------ ------------ -----\n"); + fprintf(dest, " Line Numbers 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->line_numbers_offset, + (u32) m_hdrr->line_numbers_size_bytes, + m_hdrr->line_number_count); + fprintf(dest, " Dense Numbers 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->dense_numbers_offset, + (u32) m_hdrr->dense_numbers_count * 8, + m_hdrr->dense_numbers_count); + fprintf(dest, " Procedure Descriptors 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->procedure_descriptors_offset, + (u32) m_hdrr->procedure_descriptor_count * (u32) sizeof(ProcedureDescriptor), + m_hdrr->procedure_descriptor_count); + fprintf(dest, " Local Symbols 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->local_symbols_offset, + (u32) m_hdrr->local_symbol_count * (u32) sizeof(SymbolHeader), + m_hdrr->local_symbol_count); + fprintf(dest, " Optimization Symbols 0x%-8x " "- " "%-8d\n", + (u32) m_hdrr->optimization_symbols_offset, + m_hdrr->optimization_symbols_count); + fprintf(dest, " Auxiliary Symbols 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->auxiliary_symbols_offset, + (u32) m_hdrr->auxiliary_symbol_count * 4, + m_hdrr->auxiliary_symbol_count); + fprintf(dest, " Local Strings 0x%-8x " "0x%-8x " "-\n", + (u32) m_hdrr->local_strings_offset, + (u32) m_hdrr->local_strings_size_bytes); + fprintf(dest, " External Strings 0x%-8x " "0x%-8x " "-\n", + (u32) m_hdrr->external_strings_offset, + (u32) m_hdrr->external_strings_size_bytes); + fprintf(dest, " File Descriptors 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->file_descriptors_offset, + (u32) m_hdrr->file_descriptor_count * (u32) sizeof(FileDescriptor), + m_hdrr->file_descriptor_count); + fprintf(dest, " Relative File Descriptors 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->relative_file_descriptors_offset, + (u32) m_hdrr->relative_file_descriptor_count * 4, + m_hdrr->relative_file_descriptor_count); + fprintf(dest, " External Symbols 0x%-8x " "0x%-8x " "%-8d\n", + (u32) m_hdrr->external_symbols_offset, + (u32) m_hdrr->external_symbols_count * 16, + m_hdrr->external_symbols_count); +} + +Result SymbolTableReader::print_symbols(FILE* out, bool print_locals, bool print_procedure_descriptors, bool print_externals) const +{ + if(print_locals || print_procedure_descriptors) { + s32 count = file_count(); + for(s32 i = 0; i < count; i++) { + Result file = parse_file(i); + CCC_RETURN_IF_ERROR(file); + + fprintf(out, "FILE %s:\n", file->command_line_path.c_str()); + for(const Symbol& symbol : file->symbols) { + if(print_locals || symbol.procedure_descriptor) { + print_symbol(out, symbol); + } + if(print_procedure_descriptors && symbol.procedure_descriptor) { + print_procedure_descriptor(out, *symbol.procedure_descriptor); + } + } + } + } + + if(print_externals) { + fprintf(out, "EXTERNAL SYMBOLS:\n"); + Result> external_symbols = parse_external_symbols(); + CCC_RETURN_IF_ERROR(external_symbols); + for(const Symbol& symbol : *external_symbols) { + print_symbol(out, symbol); + } + } + + return Result(); +} + +static void print_symbol(FILE* out, const Symbol& symbol) +{ + fprintf(out, " %8x ", symbol.value); + + const char* symbol_type_str = symbol_type(symbol.symbol_type); + if(symbol_type_str) { + fprintf(out, "%-11s ", symbol_type_str); + } else { + fprintf(out, "ST(%7u) ", (u32) symbol.symbol_type); + } + + const char* symbol_class_str = symbol_class(symbol.symbol_class); + if(symbol_class_str) { + fprintf(out, "%-4s ", symbol_class_str); + } else if ((u32) symbol.symbol_class == 0) { + fprintf(out, " "); + } else { + fprintf(out, "SC(%4u) ", (u32) symbol.symbol_class); + } + + if(symbol.is_stabs()) { + fprintf(out, "%-8s ", stabs_code_to_string(symbol.code())); + } else { + fprintf(out, "SI(%4u) ", symbol.index); + } + + fprintf(out, "%s\n", symbol.string); +} + +static void print_procedure_descriptor(FILE* out, const ProcedureDescriptor& procedure_descriptor) +{ + fprintf(out, " Address 0x%08x\n", procedure_descriptor.address); + fprintf(out, " Symbol Index %d\n", procedure_descriptor.symbol_index); + fprintf(out, " Line Number Entry Index %d\n", procedure_descriptor.line_number_entry_index); + fprintf(out, " Saved Register Mask 0x%08x\n", procedure_descriptor.saved_register_mask); + fprintf(out, " Saved Register Offset %d\n", procedure_descriptor.saved_register_offset); + fprintf(out, " Optimization Entry Index %d\n", procedure_descriptor.optimization_entry_index); + fprintf(out, " Saved Float Register Mask 0x%08x\n", procedure_descriptor.saved_float_register_mask); + fprintf(out, " Saved Float Register Offset %d\n", procedure_descriptor.saved_float_register_offset); + fprintf(out, " Frame Size %d\n", procedure_descriptor.frame_size); + fprintf(out, " Frame Pointer Register %hd\n", procedure_descriptor.frame_pointer_register); + fprintf(out, " Return PC Register %hd\n", procedure_descriptor.return_pc_register); + fprintf(out, " Line Number Low %d\n", procedure_descriptor.line_number_low); + fprintf(out, " Line Number High %d\n", procedure_descriptor.line_number_high); + fprintf(out, " Line Number Offset %d\n", procedure_descriptor.line_number_offset); +} + +static Result get_corruption_fixing_fudge_offset(s32 section_offset, const SymbolicHeader& hdrr) +{ + // GCC will always put the first part of the symbol table right after the + // header, so if the header says it's somewhere else we know the section has + // probably been moved without updating its contents. + s32 right_after_header = INT32_MAX; + if(hdrr.line_numbers_offset > 0) right_after_header = std::min(hdrr.line_numbers_offset, right_after_header); + if(hdrr.dense_numbers_offset > 0) right_after_header = std::min(hdrr.dense_numbers_offset, right_after_header); + if(hdrr.procedure_descriptors_offset > 0) right_after_header = std::min(hdrr.procedure_descriptors_offset, right_after_header); + if(hdrr.local_symbols_offset > 0) right_after_header = std::min(hdrr.local_symbols_offset, right_after_header); + if(hdrr.optimization_symbols_offset > 0) right_after_header = std::min(hdrr.optimization_symbols_offset, right_after_header); + if(hdrr.auxiliary_symbols_offset > 0) right_after_header = std::min(hdrr.auxiliary_symbols_offset, right_after_header); + if(hdrr.local_strings_offset > 0) right_after_header = std::min(hdrr.local_strings_offset, right_after_header); + if(hdrr.external_strings_offset > 0) right_after_header = std::min(hdrr.external_strings_offset, right_after_header); + if(hdrr.file_descriptors_offset > 0) right_after_header = std::min(hdrr.file_descriptors_offset, right_after_header); + if(hdrr.relative_file_descriptors_offset > 0) right_after_header = std::min(hdrr.relative_file_descriptors_offset, right_after_header); + if(hdrr.external_symbols_offset > 0) right_after_header = std::min(hdrr.external_symbols_offset, right_after_header); + + CCC_CHECK(right_after_header >= 0 && right_after_header < INT32_MAX, "Invalid symbolic header."); + + // Figure out how much we need to adjust all the file offsets by. + s32 fudge_offset = section_offset - (right_after_header - sizeof(SymbolicHeader)); + if(fudge_offset != 0) { + CCC_WARN("The .mdebug section was moved without updating its contents. Adjusting file offsets by %d bytes.", fudge_offset); + } + + return fudge_offset; +} + +static Result get_symbol(const SymbolHeader& header, std::span elf, s32 strings_offset) +{ + Symbol symbol; + + const char* string = get_string(elf, strings_offset + header.iss); + CCC_CHECK(string, "Symbol has invalid string."); + symbol.string = string; + + symbol.value = header.value; + symbol.symbol_type = (SymbolType) header.st; + symbol.symbol_class = (SymbolClass) header.sc; + symbol.index = header.index; + + if(symbol.is_stabs()) { + CCC_CHECK(stabs_code_to_string(symbol.code()) != nullptr, "Bad stabs symbol code '%x'.", symbol.code()); + } + + return symbol; +} + +const char* symbol_type(SymbolType type) +{ + switch(type) { + case SymbolType::NIL: return "NIL"; + case SymbolType::GLOBAL: return "GLOBAL"; + case SymbolType::STATIC: return "STATIC"; + case SymbolType::PARAM: return "PARAM"; + case SymbolType::LOCAL: return "LOCAL"; + case SymbolType::LABEL: return "LABEL"; + case SymbolType::PROC: return "PROC"; + case SymbolType::BLOCK: return "BLOCK"; + case SymbolType::END: return "END"; + case SymbolType::MEMBER: return "MEMBER"; + case SymbolType::TYPEDEF: return "TYPEDEF"; + case SymbolType::FILE_SYMBOL: return "FILE"; + case SymbolType::STATICPROC: return "STATICPROC"; + case SymbolType::CONSTANT: return "CONSTANT"; + } + return nullptr; +} + +const char* symbol_class(SymbolClass symbol_class) +{ + switch(symbol_class) { + case SymbolClass::NIL: return "NIL"; + case SymbolClass::TEXT: return "TEXT"; + case SymbolClass::DATA: return "DATA"; + case SymbolClass::BSS: return "BSS"; + case SymbolClass::REGISTER: return "REGISTER"; + case SymbolClass::ABS: return "ABS"; + case SymbolClass::UNDEFINED: return "UNDEFINED"; + case SymbolClass::LOCAL: return "LOCAL"; + case SymbolClass::BITS: return "BITS"; + case SymbolClass::DBX: return "DBX"; + case SymbolClass::REG_IMAGE: return "REG_IMAGE"; + case SymbolClass::INFO: return "INFO"; + case SymbolClass::USER_STRUCT: return "USER_STRUCT"; + case SymbolClass::SDATA: return "SDATA"; + case SymbolClass::SBSS: return "SBSS"; + case SymbolClass::RDATA: return "RDATA"; + case SymbolClass::VAR: return "VAR"; + case SymbolClass::COMMON: return "COMMON"; + case SymbolClass::SCOMMON: return "SCOMMON"; + case SymbolClass::VAR_REGISTER: return "VAR_REGISTER"; + case SymbolClass::VARIANT: return "VARIANT"; + case SymbolClass::SUNDEFINED: return "SUNDEFINED"; + case SymbolClass::INIT: return "INIT"; + case SymbolClass::BASED_VAR: return "BASED_VAR"; + case SymbolClass::XDATA: return "XDATA"; + case SymbolClass::PDATA: return "PDATA"; + case SymbolClass::FINI: return "FINI"; + case SymbolClass::NONGP: return "NONGP"; + } + return nullptr; +} + +const char* stabs_code_to_string(StabsCode code) +{ + switch(code) { + case STAB: return "STAB"; + case N_GSYM: return "GSYM"; + case N_FNAME: return "FNAME"; + case N_FUN: return "FUN"; + case N_STSYM: return "STSYM"; + case N_LCSYM: return "LCSYM"; + case N_MAIN: return "MAIN"; + case N_PC: return "PC"; + case N_NSYMS: return "NSYMS"; + case N_NOMAP: return "NOMAP"; + case N_OBJ: return "OBJ"; + case N_OPT: return "OPT"; + case N_RSYM: return "RSYM"; + case N_M2C: return "M2C"; + case N_SLINE: return "SLINE"; + case N_DSLINE: return "DSLINE"; + case N_BSLINE: return "BSLINE"; + case N_EFD: return "EFD"; + case N_EHDECL: return "EHDECL"; + case N_CATCH: return "CATCH"; + case N_SSYM: return "SSYM"; + case N_SO: return "SO"; + case N_LSYM: return "LSYM"; + case N_BINCL: return "BINCL"; + case N_SOL: return "SOL"; + case N_PSYM: return "PSYM"; + case N_EINCL: return "EINCL"; + case N_ENTRY: return "ENTRY"; + case N_LBRAC: return "LBRAC"; + case N_EXCL: return "EXCL"; + case N_SCOPE: return "SCOPE"; + case N_RBRAC: return "RBRAC"; + case N_BCOMM: return "BCOMM"; + case N_ECOMM: return "ECOMM"; + case N_ECOML: return "ECOML"; + case N_NBTEXT: return "NBTEXT"; + case N_NBDATA: return "NBDATA"; + case N_NBBSS: return "NBBSS"; + case N_NBSTS: return "NBSTS"; + case N_NBLCS: return "NBLCS"; + case N_LENG: return "LENG"; + } + return nullptr; +} + +} diff --git a/3rdparty/ccc/src/ccc/mdebug_section.h b/3rdparty/ccc/src/ccc/mdebug_section.h new file mode 100644 index 0000000000..df15d3e5d0 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_section.h @@ -0,0 +1,176 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "util.h" + +namespace ccc::mdebug { + +struct SymbolicHeader; + +enum class SymbolType : u32 { + NIL = 0, + GLOBAL = 1, + STATIC = 2, + PARAM = 3, + LOCAL = 4, + LABEL = 5, + PROC = 6, + BLOCK = 7, + END = 8, + MEMBER = 9, + TYPEDEF = 10, + FILE_SYMBOL = 11, + STATICPROC = 14, + CONSTANT = 15 +}; + +enum class SymbolClass : u32 { + NIL = 0, + TEXT = 1, + DATA = 2, + BSS = 3, + REGISTER = 4, + ABS = 5, + UNDEFINED = 6, + LOCAL = 7, + BITS = 8, + DBX = 9, + REG_IMAGE = 10, + INFO = 11, + USER_STRUCT = 12, + SDATA = 13, + SBSS = 14, + RDATA = 15, + VAR = 16, + COMMON = 17, + SCOMMON = 18, + VAR_REGISTER = 19, + VARIANT = 20, + SUNDEFINED = 21, + INIT = 22, + BASED_VAR = 23, + XDATA = 24, + PDATA = 25, + FINI = 26, + NONGP = 27 +}; + +// See stab.def from gcc for documentation on what all these are. +enum StabsCode { + STAB = 0x00, + N_GSYM = 0x20, + N_FNAME = 0x22, + N_FUN = 0x24, + N_STSYM = 0x26, + N_LCSYM = 0x28, + N_MAIN = 0x2a, + N_PC = 0x30, + N_NSYMS = 0x32, + N_NOMAP = 0x34, + N_OBJ = 0x38, + N_OPT = 0x3c, + N_RSYM = 0x40, + N_M2C = 0x42, + N_SLINE = 0x44, + N_DSLINE = 0x46, + N_BSLINE = 0x48, + N_EFD = 0x4a, + N_EHDECL = 0x50, + N_CATCH = 0x54, + N_SSYM = 0x60, + N_SO = 0x64, + N_LSYM = 0x80, + N_BINCL = 0x82, + N_SOL = 0x84, + N_PSYM = 0xa0, + N_EINCL = 0xa2, + N_ENTRY = 0xa4, + N_LBRAC = 0xc0, + N_EXCL = 0xc2, + N_SCOPE = 0xc4, + N_RBRAC = 0xe0, + N_BCOMM = 0xe2, + N_ECOMM = 0xe4, + N_ECOML = 0xe8, + N_NBTEXT = 0xf0, + N_NBDATA = 0xf2, + N_NBBSS = 0xf4, + N_NBSTS = 0xf6, + N_NBLCS = 0xf8, + N_LENG = 0xfe +}; + +CCC_PACKED_STRUCT(ProcedureDescriptor, + /* 0x00 */ u32 address; + /* 0x04 */ u32 symbol_index; + /* 0x08 */ s32 line_number_entry_index; + /* 0x0c */ s32 saved_register_mask; + /* 0x10 */ s32 saved_register_offset; + /* 0x14 */ s32 optimization_entry_index; + /* 0x18 */ s32 saved_float_register_mask; + /* 0x1c */ s32 saved_float_register_offset; + /* 0x20 */ s32 frame_size; + /* 0x24 */ s16 frame_pointer_register; + /* 0x26 */ s16 return_pc_register; + /* 0x28 */ s32 line_number_low; + /* 0x2c */ s32 line_number_high; + /* 0x30 */ u32 line_number_offset; +) +static_assert(sizeof(ProcedureDescriptor) == 0x34); + +struct Symbol { + u32 value; + SymbolType symbol_type; + SymbolClass symbol_class; + u32 index; + const char* string; + const ProcedureDescriptor* procedure_descriptor = nullptr; + + bool is_stabs() const { + return (index & 0xfff00) == 0x8f300; + } + + StabsCode code() const { + return (StabsCode) (index - 0x8f300); + } +}; + +struct File { + std::vector symbols; + u32 address = 0; + std::string working_dir; // The working directory of gcc. + std::string command_line_path; // The source file path passed on the command line to gcc. + std::string full_path; // The full combined path. +}; + +class SymbolTableReader { +public: + Result init(std::span elf, s32 section_offset); + + s32 file_count() const; + Result parse_file(s32 index) const; + Result> parse_external_symbols() const; + + void print_header(FILE* out) const; + Result print_symbols(FILE* out, bool print_locals, bool print_procedure_descriptors, bool print_externals) const; + +protected: + bool m_ready = false; + + std::span m_elf; + s32 m_section_offset; + + // If the .mdebug section was moved without updating its contents all the + // absolute file offsets stored within will be incorrect by a fixed amount. + s32 m_fudge_offset; + + const SymbolicHeader* m_hdrr; +}; + +const char* symbol_type(SymbolType type); +const char* symbol_class(SymbolClass symbol_class); +const char* stabs_code_to_string(StabsCode code); + +} diff --git a/3rdparty/ccc/src/ccc/mdebug_symbols.cpp b/3rdparty/ccc/src/ccc/mdebug_symbols.cpp new file mode 100644 index 0000000000..eacd9bf806 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_symbols.cpp @@ -0,0 +1,220 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "mdebug_symbols.h" + +#include "importer_flags.h" + +namespace ccc::mdebug { + +static void mark_duplicate_symbols(std::vector& symbols); + +Result> parse_symbols(const std::vector& input, u32& importer_flags) +{ + std::vector output; + std::string prefix; + for(const mdebug::Symbol& symbol : input) { + if(symbol.is_stabs()) { + switch(symbol.code()) { + case mdebug::N_GSYM: // Global variable + case mdebug::N_FUN: // Function + case mdebug::N_STSYM: // Data section static global variable + case mdebug::N_LCSYM: // BSS section static global variable + case mdebug::N_RSYM: // Register variable + case mdebug::N_LSYM: // Automatic variable or type definition + case mdebug::N_PSYM: { // Parameter variable + // Some STABS symbols are split between multiple strings. + if(symbol.string[0] != '\0') { + if(symbol.string[strlen(symbol.string) - 1] == '\\') { + prefix += std::string(symbol.string, symbol.string + strlen(symbol.string) - 1); + } else { + std::string merged_string; + const char* string; + if(!prefix.empty()) { + merged_string = prefix + symbol.string; + string = merged_string.c_str(); + prefix.clear(); + } else { + string = symbol.string; + } + + const char* input = string; + Result parse_result = parse_stabs_symbol(input); + if(parse_result.success()) { + if(*input != '\0') { + if(importer_flags & STRICT_PARSING) { + return CCC_FAILURE("Unknown data '%s' at the end of the '%s' stab.", input, parse_result->name.c_str()); + } else { + CCC_WARN("Unknown data '%s' at the end of the '%s' stab.", input, parse_result->name.c_str()); + } + } + + ParsedSymbol& parsed = output.emplace_back(); + parsed.type = ParsedSymbolType::NAME_COLON_TYPE; + parsed.raw = &symbol; + parsed.name_colon_type = std::move(*parse_result); + } else if(parse_result.error().message == STAB_TRUNCATED_ERROR_MESSAGE) { + // Symbol truncated due to a GCC bug. Report a + // warning and try to tolerate further faults + // caused as a result of this. + CCC_WARN("%s Symbol string: %s", STAB_TRUNCATED_ERROR_MESSAGE, string); + importer_flags &= ~STRICT_PARSING; + } else { + return CCC_FAILURE("%s Symbol string: %s", + parse_result.error().message.c_str(), string); + } + } + } else { + CCC_CHECK(prefix.empty(), "Invalid STABS continuation."); + if(symbol.code() == mdebug::N_FUN) { + ParsedSymbol& func_end = output.emplace_back(); + func_end.type = ParsedSymbolType::FUNCTION_END; + func_end.raw = &symbol; + } + } + break; + } + case mdebug::N_SOL: { // Sub-source file + ParsedSymbol& sub = output.emplace_back(); + sub.type = ParsedSymbolType::SUB_SOURCE_FILE; + sub.raw = &symbol; + break; + } + case mdebug::N_LBRAC: { // Begin block + ParsedSymbol& begin_block = output.emplace_back(); + begin_block.type = ParsedSymbolType::LBRAC; + begin_block.raw = &symbol; + break; + } + case mdebug::N_RBRAC: { // End block + ParsedSymbol& end_block = output.emplace_back(); + end_block.type = ParsedSymbolType::RBRAC; + end_block.raw = &symbol; + break; + } + case mdebug::N_SO: { // Source filename + ParsedSymbol& so_symbol = output.emplace_back(); + so_symbol.type = ParsedSymbolType::SOURCE_FILE; + so_symbol.raw = &symbol; + break; + } + case mdebug::STAB: + case mdebug::N_OPT: + case mdebug::N_BINCL: + case mdebug::N_EINCL: { + break; + } + case mdebug::N_FNAME: + case mdebug::N_MAIN: + case mdebug::N_PC: + case mdebug::N_NSYMS: + case mdebug::N_NOMAP: + case mdebug::N_OBJ: + case mdebug::N_M2C: + case mdebug::N_SLINE: + case mdebug::N_DSLINE: + case mdebug::N_BSLINE: + case mdebug::N_EFD: + case mdebug::N_EHDECL: + case mdebug::N_CATCH: + case mdebug::N_SSYM: + case mdebug::N_ENTRY: + case mdebug::N_EXCL: + case mdebug::N_SCOPE: + case mdebug::N_BCOMM: + case mdebug::N_ECOMM: + case mdebug::N_ECOML: + case mdebug::N_NBTEXT: + case mdebug::N_NBDATA: + case mdebug::N_NBBSS: + case mdebug::N_NBSTS: + case mdebug::N_NBLCS: + case mdebug::N_LENG: { + CCC_WARN("Unhandled N_%s symbol: %s", mdebug::stabs_code_to_string(symbol.code()), symbol.string); + break; + } + } + } else { + ParsedSymbol& non_stabs_symbol = output.emplace_back(); + non_stabs_symbol.type = ParsedSymbolType::NON_STABS; + non_stabs_symbol.raw = &symbol; + } + } + + mark_duplicate_symbols(output); + + return output; +} + +static void mark_duplicate_symbols(std::vector& symbols) +{ + std::map stabs_type_number_to_symbol; + for(size_t i = 0; i < symbols.size(); i++) { + ParsedSymbol& symbol = symbols[i]; + if(symbol.type == ParsedSymbolType::NAME_COLON_TYPE) { + StabsType& type = *symbol.name_colon_type.type; + if(type.type_number.valid() && type.descriptor.has_value()) { + stabs_type_number_to_symbol.emplace(type.type_number, i); + } + } + } + + for(ParsedSymbol& symbol : symbols) { + symbol.is_typedef = + symbol.type == ParsedSymbolType::NAME_COLON_TYPE && + symbol.name_colon_type.descriptor == StabsSymbolDescriptor::TYPE_NAME && + symbol.name_colon_type.type->descriptor != StabsTypeDescriptor::ENUM; + } + + for(size_t i = 0; i < symbols.size(); i++) { + ParsedSymbol& symbol = symbols[i]; + if(symbol.type != ParsedSymbolType::NAME_COLON_TYPE) { + continue; + } + + bool is_type = + symbol.name_colon_type.descriptor == StabsSymbolDescriptor::TYPE_NAME || + symbol.name_colon_type.descriptor == StabsSymbolDescriptor::ENUM_STRUCT_OR_TYPE_TAG; + if(!is_type) { + continue; + } + + StabsType& type = *symbol.name_colon_type.type; + + if(!type.descriptor.has_value()) { + auto referenced_index = stabs_type_number_to_symbol.find(type.type_number); + if(referenced_index != stabs_type_number_to_symbol.end()) { + ParsedSymbol& referenced = symbols[referenced_index->second]; + if(referenced.name_colon_type.name == symbol.name_colon_type.name) { + // symbol: "Struct:T(1,1)=s1;" + // referenced: "Struct:t(1,1)" + symbol.duplicate = true; + } + } + } + + if(type.descriptor.has_value() && type.descriptor == StabsTypeDescriptor::TYPE_REFERENCE) { + auto referenced_index = stabs_type_number_to_symbol.find(type.as().type->type_number); + if(referenced_index != stabs_type_number_to_symbol.end() && referenced_index->second != i) { + ParsedSymbol& referenced = symbols[referenced_index->second]; + + if(referenced.name_colon_type.name == " ") { + // referenced: " :T(1,1)=e;" + // symbol: "ErraticEnum:t(1,2)=(1,1)" + referenced.name_colon_type.name = symbol.name_colon_type.name; + referenced.is_typedef = true; + symbol.duplicate = true; + } + + if(referenced.name_colon_type.name == symbol.name_colon_type.name) { + // referenced: "NamedTypedefedStruct:T(1,1)=s1;" + // symbol: "NamedTypedefedStruct:t(1,2)=(1,1)" + referenced.is_typedef = true; + symbol.duplicate = true; + } + } + } + } +} + +} diff --git a/3rdparty/ccc/src/ccc/mdebug_symbols.h b/3rdparty/ccc/src/ccc/mdebug_symbols.h new file mode 100644 index 0000000000..1341dcbb20 --- /dev/null +++ b/3rdparty/ccc/src/ccc/mdebug_symbols.h @@ -0,0 +1,32 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "util.h" +#include "stabs.h" +#include "mdebug_section.h" + +namespace ccc::mdebug { + +enum class ParsedSymbolType { + NAME_COLON_TYPE, + SOURCE_FILE, + SUB_SOURCE_FILE, + LBRAC, + RBRAC, + FUNCTION_END, + NON_STABS +}; + +struct ParsedSymbol { + ParsedSymbolType type; + const mdebug::Symbol* raw; + StabsSymbol name_colon_type; + bool duplicate = false; + bool is_typedef = false; +}; + +Result> parse_symbols(const std::vector& input, u32& importer_flags); + +} diff --git a/3rdparty/ccc/src/ccc/sndll.cpp b/3rdparty/ccc/src/ccc/sndll.cpp new file mode 100644 index 0000000000..894deba69e --- /dev/null +++ b/3rdparty/ccc/src/ccc/sndll.cpp @@ -0,0 +1,191 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "sndll.h" + +#include "importer_flags.h" + +namespace ccc { + +CCC_PACKED_STRUCT(SNDLLHeaderCommon, + /* 0x00 */ u32 magic; + /* 0x04 */ u32 relocations; + /* 0x08 */ u32 relocation_count; + /* 0x0c */ u32 symbols; + /* 0x10 */ u32 symbol_count; + /* 0x14 */ u32 elf_path; + /* 0x18 */ u32 load_func; + /* 0x1c */ u32 unload_func; + /* 0x20 */ u32 unknown_20; + /* 0x24 */ u32 unknown_24; + /* 0x28 */ u32 unknown_28; + /* 0x2c */ u32 file_size; + /* 0x30 */ u32 unknown_30; +) + +CCC_PACKED_STRUCT(SNDLLHeaderV1, + /* 0x00 */ SNDLLHeaderCommon common; +) + +CCC_PACKED_STRUCT(SNDLLHeaderV2, + /* 0x00 */ SNDLLHeaderCommon common; + /* 0x34 */ u32 unknown_34; + /* 0x38 */ u32 unknown_38; +) + +CCC_PACKED_STRUCT(SNDLLRelocation, + /* 0x0 */ u32 unknown_0; + /* 0x4 */ u32 unknown_4; + /* 0x8 */ u32 unknown_8; +) + +CCC_PACKED_STRUCT(SNDLLSymbolHeader, + /* 0x0 */ u32 string; + /* 0x4 */ u32 value; + /* 0x8 */ u8 unknown_8; + /* 0x9 */ u8 unknown_9; + /* 0xa */ SNDLLSymbolType type; + /* 0xb */ u8 processed; +) + +static Result parse_sndll_common( + std::span image, Address address, SNDLLType type, const SNDLLHeaderCommon& common, SNDLLVersion version); +static const char* sndll_symbol_type_to_string(SNDLLSymbolType type); + +Result parse_sndll_file(std::span image, Address address, SNDLLType type) +{ + const u32* magic = get_packed(image, 0); + CCC_CHECK((*magic & 0xffffff) == CCC_FOURCC("SNR\00"), "Not a SNDLL %s.", address.valid() ? "section" : "file"); + + char version = *magic >> 24; + switch(version) { + case '1': { + const SNDLLHeaderV1* header = get_packed(image, 0); + CCC_CHECK(header, "File too small to contain SNDLL V1 header."); + return parse_sndll_common(image, address, type, header->common, SNDLL_V1); + } + case '2': { + const SNDLLHeaderV2* header = get_packed(image, 0); + CCC_CHECK(header, "File too small to contain SNDLL V2 header."); + return parse_sndll_common(image, address, type, header->common, SNDLL_V2); + } + } + + return CCC_FAILURE("Unknown SNDLL version '%c'.", version); +} + +static Result parse_sndll_common( + std::span image, Address address, SNDLLType type, const SNDLLHeaderCommon& common, SNDLLVersion version) +{ + SNDLLFile sndll; + + sndll.address = address; + sndll.type = type; + sndll.version = version; + + if(common.elf_path) { + const char* elf_path = get_string(image, common.elf_path); + if(elf_path) { + sndll.elf_path = elf_path; + } + } + + CCC_CHECK(common.symbol_count < (32 * 1024 * 1024) / sizeof(SNDLLSymbol), "SNDLL symbol count is too high."); + sndll.symbols.reserve(common.symbol_count); + + for(u32 i = 0; i < common.symbol_count; i++) { + u32 symbol_offset = common.symbols - address.get_or_zero() + i * sizeof(SNDLLSymbolHeader); + const SNDLLSymbolHeader* symbol_header = get_packed(image, symbol_offset); + CCC_CHECK(symbol_header, "SNDLL symbol out of range."); + + const char* string = nullptr; + if(symbol_header->string) { + string = get_string(image, symbol_header->string - address.get_or_zero()); + } + + SNDLLSymbol& symbol = sndll.symbols.emplace_back(); + symbol.type = symbol_header->type; + symbol.value = symbol_header->value; + symbol.string = string; + } + + return sndll; +} + +Result import_sndll_symbols( + SymbolDatabase& database, + const SNDLLFile& sndll, + const SymbolGroup& group, + u32 importer_flags, + DemanglerFunctions demangler) +{ + for(const SNDLLSymbol& symbol : sndll.symbols) { + if(symbol.value == 0 || symbol.string.empty()) { + continue; + } + + u32 address = symbol.value; + if(symbol.type != SNDLL_ABSOLUTE && sndll.type == SNDLLType::DYNAMIC_LIBRARY) { + address += sndll.address.get_or_zero(); + } + + if(!(importer_flags & DONT_DEDUPLICATE_SYMBOLS)) { + if(database.functions.first_handle_from_starting_address(address).valid()) { + continue; + } + + if(database.global_variables.first_handle_from_starting_address(address).valid()) { + continue; + } + + if(database.local_variables.first_handle_from_starting_address(address).valid()) { + continue; + } + } + + const Section* section = database.sections.symbol_overlapping_address(address); + if(section) { + if(section->contains_code()) { + Result function = database.functions.create_symbol( + symbol.string, group.source, group.module_symbol, address, importer_flags, demangler); + CCC_RETURN_IF_ERROR(function); + continue; + } else if(section->contains_data()) { + Result global_variable = database.global_variables.create_symbol( + symbol.string, group.source, group.module_symbol, address, importer_flags, demangler); + CCC_RETURN_IF_ERROR(global_variable); + continue; + } + } + + Result label = database.labels.create_symbol( + symbol.string, group.source, group.module_symbol, address, importer_flags, demangler); + CCC_RETURN_IF_ERROR(label); + } + + return Result(); +} + +void print_sndll_symbols(FILE* out, const SNDLLFile& sndll) +{ + fprintf(out, "SNDLL SYMBOLS:\n"); + for(const SNDLLSymbol& symbol : sndll.symbols) { + const char* type = sndll_symbol_type_to_string(symbol.type); + const char* string = !symbol.string.empty() ? symbol.string.c_str() : "(no string)"; + fprintf(out, "%8s %08x %s\n", type, symbol.value, string); + } +} + +static const char* sndll_symbol_type_to_string(SNDLLSymbolType type) +{ + switch(type) { + case SNDLL_NIL: return "NIL"; + case SNDLL_EXTERNAL: return "EXTERNAL"; + case SNDLL_RELATIVE: return "RELATIVE"; + case SNDLL_WEAK: return "WEAK"; + case SNDLL_ABSOLUTE: return "ABSOLUTE"; + } + return "invalid"; +} + +} diff --git a/3rdparty/ccc/src/ccc/sndll.h b/3rdparty/ccc/src/ccc/sndll.h new file mode 100644 index 0000000000..50e284bd61 --- /dev/null +++ b/3rdparty/ccc/src/ccc/sndll.h @@ -0,0 +1,55 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "symbol_database.h" + +namespace ccc { + +enum class SNDLLType { + SNDATA_SECTION, + DYNAMIC_LIBRARY +}; + +enum SNDLLVersion { + SNDLL_V1, + SNDLL_V2 +}; + +enum SNDLLSymbolType : u8 { + SNDLL_NIL = 0, // I think this is just so that the first real symbol has an index of 1. + SNDLL_EXTERNAL = 1, // Symbol with an empty value, to be filled in from another module. + SNDLL_RELATIVE = 2, // Global symbol, value is relative to the start of the SNDLL file. + SNDLL_WEAK = 3, // Weak symbol, value is relative to the start of the SNDLL file. + SNDLL_ABSOLUTE = 4 // Global symbol, value is an absolute address. +}; + +struct SNDLLSymbol { + SNDLLSymbolType type = SNDLL_NIL; + u32 value = 0; + std::string string; +}; + +struct SNDLLFile { + Address address; + SNDLLType type; + SNDLLVersion version; + std::string elf_path; + std::vector symbols; +}; + +// If a valid address is passed, the pointers in the header will be treated as +// addresses, otherwise they will be treated as file offsets. +Result parse_sndll_file(std::span image, Address address, SNDLLType type); + +Result import_sndll_symbols( + SymbolDatabase& database, + const SNDLLFile& sndll, + const SymbolGroup& group, + u32 importer_flags, + DemanglerFunctions demangler); + +void print_sndll_symbols(FILE* out, const SNDLLFile& sndll); + +} diff --git a/3rdparty/ccc/src/ccc/stabs.cpp b/3rdparty/ccc/src/ccc/stabs.cpp new file mode 100644 index 0000000000..0fe10dec7f --- /dev/null +++ b/3rdparty/ccc/src/ccc/stabs.cpp @@ -0,0 +1,835 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "stabs.h" + +namespace ccc { + +#define STABS_DEBUG(...) //__VA_ARGS__ +#define STABS_DEBUG_PRINTF(...) STABS_DEBUG(printf(__VA_ARGS__);) + +static bool validate_symbol_descriptor(StabsSymbolDescriptor descriptor); +static Result> parse_stabs_type(const char*& input); +static Result> parse_field_list(const char*& input); +static Result> parse_member_functions(const char*& input); +static Result parse_visibility_character(const char*& input); +STABS_DEBUG(static void print_field(const StabsStructOrUnionType::Field& field);) + +const char* STAB_TRUNCATED_ERROR_MESSAGE = + "STABS symbol truncated. This was probably caused by a GCC bug. " + "Other symbols from the same translation unit may also be invalid."; + +Result parse_stabs_symbol(const char*& input) +{ + STABS_DEBUG_PRINTF("PARSING %s\n", input); + + StabsSymbol symbol; + + Result name = parse_dodgy_stabs_identifier(input, ':'); + CCC_RETURN_IF_ERROR(name); + + symbol.name = *name; + + CCC_EXPECT_CHAR(input, ':', "identifier"); + CCC_CHECK(*input != '\0', "Unexpected end of input."); + if((*input >= '0' && *input <= '9') || *input == '(') { + symbol.descriptor = StabsSymbolDescriptor::LOCAL_VARIABLE; + } else { + char symbol_descriptor = *(input++); + CCC_CHECK(symbol_descriptor != '\0', "Failed to parse symbol descriptor."); + symbol.descriptor = (StabsSymbolDescriptor) symbol_descriptor; + } + CCC_CHECK(validate_symbol_descriptor(symbol.descriptor), + "Invalid symbol descriptor '%c'.", + (char) symbol.descriptor); + CCC_CHECK(*input != '\0', "Unexpected end of input."); + if(*input == 't') { + input++; + } + + auto type = parse_top_level_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + + // Handle nested functions. + bool is_function = + symbol.descriptor == StabsSymbolDescriptor::LOCAL_FUNCTION || + symbol.descriptor == StabsSymbolDescriptor::GLOBAL_FUNCTION; + if(is_function && input[0] == ',') { + input++; + while(*input != ',' && *input != '\0') input++; // enclosing function + CCC_EXPECT_CHAR(input, ',', "nested function suffix"); + while(*input != ',' && *input != '\0') input++; // function + } + + symbol.type = std::move(*type); + + // Make sure that variable names aren't used as type names e.g. the STABS + // symbol "somevar:P123=*456" may be referenced by the type number 123, but + // the type name is not "somevar". + bool is_type = symbol.descriptor == StabsSymbolDescriptor::TYPE_NAME + || symbol.descriptor == StabsSymbolDescriptor::ENUM_STRUCT_OR_TYPE_TAG; + if(is_type) { + symbol.type->name = symbol.name; + } + + symbol.type->is_typedef = symbol.descriptor == StabsSymbolDescriptor::TYPE_NAME; + symbol.type->is_root = true; + + return symbol; +} + +static bool validate_symbol_descriptor(StabsSymbolDescriptor descriptor) +{ + bool valid; + switch(descriptor) { + case StabsSymbolDescriptor::LOCAL_VARIABLE: + case StabsSymbolDescriptor::REFERENCE_PARAMETER_A: + case StabsSymbolDescriptor::LOCAL_FUNCTION: + case StabsSymbolDescriptor::GLOBAL_FUNCTION: + case StabsSymbolDescriptor::GLOBAL_VARIABLE: + case StabsSymbolDescriptor::REGISTER_PARAMETER: + case StabsSymbolDescriptor::VALUE_PARAMETER: + case StabsSymbolDescriptor::REGISTER_VARIABLE: + case StabsSymbolDescriptor::STATIC_GLOBAL_VARIABLE: + case StabsSymbolDescriptor::TYPE_NAME: + case StabsSymbolDescriptor::ENUM_STRUCT_OR_TYPE_TAG: + case StabsSymbolDescriptor::STATIC_LOCAL_VARIABLE: + case StabsSymbolDescriptor::REFERENCE_PARAMETER_V: + valid = true; + break; + default: + valid = false; + break; + } + return valid; +} + +Result> parse_top_level_stabs_type(const char*& input) +{ + Result> type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + + // Handle first base class suffixes. + if((*type)->descriptor == StabsTypeDescriptor::STRUCT && input[0] == '~' && input[1] == '%') { + input += 2; + + Result> first_base_class = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(first_base_class); + (*type)->as().first_base_class = std::move(*first_base_class); + + CCC_EXPECT_CHAR(input, ';', "first base class suffix"); + } + + // Handle extra live range information. + if(input[0] == ';' && input[1] == 'l') { + input += 2; + CCC_EXPECT_CHAR(input, '(', "live range suffix"); + CCC_EXPECT_CHAR(input, '#', "live range suffix"); + std::optional start = parse_number_s32(input); + CCC_CHECK(start.has_value(), "Failed to parse live range suffix."); + CCC_EXPECT_CHAR(input, ',', "live range suffix"); + CCC_EXPECT_CHAR(input, '#', "live range suffix"); + std::optional end = parse_number_s32(input); + CCC_CHECK(end.has_value(), "Failed to parse live range suffix."); + CCC_EXPECT_CHAR(input, ')', "live range suffix"); + } + + return type; +} + +static Result> parse_stabs_type(const char*& input) +{ + StabsTypeNumber type_number; + + CCC_CHECK(*input != '\0', "Unexpected end of input."); + + if(*input == '(') { + // This file has type numbers made up of two pieces: an include file + // index and a type number. + + input++; + + std::optional file_index = parse_number_s32(input); + CCC_CHECK(file_index.has_value(), "Failed to parse type number (file index)."); + + CCC_EXPECT_CHAR(input, ',', "type number"); + + std::optional type_index = parse_number_s32(input); + CCC_CHECK(type_index.has_value(), "Failed to parse type number (type index)."); + + CCC_EXPECT_CHAR(input, ')', "type number"); + + type_number.file = *file_index; + type_number.type = *type_index; + + if(*input != '=') { + return std::make_unique(type_number); + } + input++; + } else if(*input >= '0' && *input <= '9') { + // This file has type numbers which are just a single number. This is + // the more common case for games. + + std::optional type_index = parse_number_s32(input); + CCC_CHECK(type_index.has_value(), "Failed to parse type number."); + type_number.type = *type_index; + + if(*input != '=') { + return std::make_unique(type_number); + } + input++; + } + + CCC_CHECK(*input != '\0', "Unexpected end of input."); + + StabsTypeDescriptor descriptor; + if((*input >= '0' && *input <= '9') || *input == '(') { + descriptor = StabsTypeDescriptor::TYPE_REFERENCE; + } else { + char descriptor_char = *(input++); + CCC_CHECK(descriptor_char != '\0', "Failed to parse type descriptor."); + descriptor = (StabsTypeDescriptor) descriptor_char; + } + + std::unique_ptr out_type; + + switch(descriptor) { + case StabsTypeDescriptor::TYPE_REFERENCE: { // 0..9 + auto type_reference = std::make_unique(type_number); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + type_reference->type = std::move(*type); + + out_type = std::move(type_reference); + break; + } + case StabsTypeDescriptor::ARRAY: { // a + auto array = std::make_unique(type_number); + + auto index_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(index_type); + array->index_type = std::move(*index_type); + + auto element_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(element_type); + array->element_type = std::move(*element_type); + + out_type = std::move(array); + break; + } + case StabsTypeDescriptor::ENUM: { // e + auto enum_type = std::make_unique(type_number); + STABS_DEBUG_PRINTF("enum {\n"); + while(*input != ';') { + std::optional name = parse_stabs_identifier(input, ':'); + CCC_CHECK(name.has_value(), "Failed to parse enum field name."); + + CCC_EXPECT_CHAR(input, ':', "enum"); + + std::optional value = parse_number_s32(input); + CCC_CHECK(value.has_value(), "Failed to parse enum value."); + + enum_type->fields.emplace_back(*value, std::move(*name)); + + CCC_EXPECT_CHAR(input, ',', "enum"); + } + input++; + STABS_DEBUG_PRINTF("}\n"); + + out_type = std::move(enum_type); + break; + } + case StabsTypeDescriptor::FUNCTION: { // f + auto function = std::make_unique(type_number); + + auto return_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(return_type); + function->return_type = std::move(*return_type); + + out_type = std::move(function); + break; + } + case StabsTypeDescriptor::VOLATILE_QUALIFIER: { // B + auto volatile_qualifier = std::make_unique(type_number); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + volatile_qualifier->type = std::move(*type); + + out_type = std::move(volatile_qualifier); + break; + } + case StabsTypeDescriptor::CONST_QUALIFIER: { // k + auto const_qualifier = std::make_unique(type_number); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + const_qualifier->type = std::move(*type); + + out_type = std::move(const_qualifier); + break; + } + case StabsTypeDescriptor::RANGE: { // r + auto range = std::make_unique(type_number); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + range->type = std::move(*type); + + CCC_EXPECT_CHAR(input, ';', "range type descriptor"); + + std::optional low = parse_stabs_identifier(input, ';'); + CCC_CHECK(low.has_value(), "Failed to parse low part of range."); + CCC_EXPECT_CHAR(input, ';', "low range value"); + + std::optional high = parse_stabs_identifier(input, ';'); + CCC_CHECK(high.has_value(), "Failed to parse high part of range."); + CCC_EXPECT_CHAR(input, ';', "high range value"); + + range->low = std::move(*low); + range->high = std::move(*high); + + out_type = std::move(range); + break; + } + case StabsTypeDescriptor::STRUCT: { // s + auto struct_type = std::make_unique(type_number); + STABS_DEBUG_PRINTF("struct {\n"); + + std::optional struct_size = parse_number_s64(input); + CCC_CHECK(struct_size.has_value(), "Failed to parse struct size."); + struct_type->size = *struct_size; + + if(*input == '!') { + input++; + std::optional base_class_count = parse_number_s32(input); + CCC_CHECK(base_class_count.has_value(), "Failed to parse base class count."); + + CCC_EXPECT_CHAR(input, ',', "base class section"); + + for(s64 i = 0; i < *base_class_count; i++) { + StabsStructOrUnionType::BaseClass base_class; + + char is_virtual = *(input++); + switch(is_virtual) { + case '0': base_class.is_virtual = false; break; + case '1': base_class.is_virtual = true; break; + default: return CCC_FAILURE("Failed to parse base class (virtual character)."); + } + + Result visibility = parse_visibility_character(input); + CCC_RETURN_IF_ERROR(visibility); + base_class.visibility = *visibility; + + std::optional offset = parse_number_s32(input); + CCC_CHECK(offset.has_value(), "Failed to parse base class offset."); + base_class.offset = (s32) *offset; + + CCC_EXPECT_CHAR(input, ',', "base class section"); + + auto base_class_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(base_class_type); + base_class.type = std::move(*base_class_type); + + CCC_EXPECT_CHAR(input, ';', "base class section"); + struct_type->base_classes.emplace_back(std::move(base_class)); + } + } + + auto fields = parse_field_list(input); + CCC_RETURN_IF_ERROR(fields); + struct_type->fields = std::move(*fields); + + auto member_functions = parse_member_functions(input); + CCC_RETURN_IF_ERROR(member_functions); + struct_type->member_functions = std::move(*member_functions); + + STABS_DEBUG_PRINTF("}\n"); + + out_type = std::move(struct_type); + break; + } + case StabsTypeDescriptor::UNION: { // u + auto union_type = std::make_unique(type_number); + STABS_DEBUG_PRINTF("union {\n"); + + std::optional union_size = parse_number_s64(input); + CCC_CHECK(union_size.has_value(), "Failed to parse struct size."); + union_type->size = *union_size; + + auto fields = parse_field_list(input); + CCC_RETURN_IF_ERROR(fields); + union_type->fields = std::move(*fields); + + auto member_functions = parse_member_functions(input); + CCC_RETURN_IF_ERROR(member_functions); + union_type->member_functions = std::move(*member_functions); + + STABS_DEBUG_PRINTF("}\n"); + + out_type = std::move(union_type); + break; + } + case StabsTypeDescriptor::CROSS_REFERENCE: { // x + auto cross_reference = std::make_unique(type_number); + + char cross_reference_type = *(input++); + CCC_CHECK(cross_reference_type != '\0', "Failed to parse cross reference type."); + + switch(cross_reference_type) { + case 'e': cross_reference->type = ast::ForwardDeclaredType::ENUM; break; + case 's': cross_reference->type = ast::ForwardDeclaredType::STRUCT; break; + case 'u': cross_reference->type = ast::ForwardDeclaredType::UNION; break; + default: + return CCC_FAILURE("Invalid cross reference type '%c'.", cross_reference->type); + } + + Result identifier = parse_dodgy_stabs_identifier(input, ':'); + CCC_RETURN_IF_ERROR(identifier); + cross_reference->identifier = std::move(*identifier); + + cross_reference->name = cross_reference->identifier; + CCC_EXPECT_CHAR(input, ':', "cross reference"); + + out_type = std::move(cross_reference); + break; + } + case StabsTypeDescriptor::FLOATING_POINT_BUILTIN: { // R + auto fp_builtin = std::make_unique(type_number); + + std::optional fpclass = parse_number_s32(input); + CCC_CHECK(fpclass.has_value(), "Failed to parse floating point built-in class."); + fp_builtin->fpclass = *fpclass; + + CCC_EXPECT_CHAR(input, ';', "floating point builtin"); + + std::optional bytes = parse_number_s32(input); + CCC_CHECK(bytes.has_value(), "Failed to parse floating point built-in."); + fp_builtin->bytes = *bytes; + + CCC_EXPECT_CHAR(input, ';', "floating point builtin"); + + std::optional value = parse_number_s32(input); + CCC_CHECK(value.has_value(), "Failed to parse floating point built-in."); + + CCC_EXPECT_CHAR(input, ';', "floating point builtin"); + + out_type = std::move(fp_builtin); + break; + } + case StabsTypeDescriptor::METHOD: { // # + auto method = std::make_unique(type_number); + + if(*input == '#') { + input++; + + auto return_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(return_type); + method->return_type = std::move(*return_type); + + if(*input == ';') { + input++; + } + } else { + auto class_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(class_type); + method->class_type = std::move(*class_type); + + CCC_EXPECT_CHAR(input, ',', "method"); + + auto return_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(return_type); + method->return_type = std::move(*return_type); + + while(*input != '\0') { + if(*input == ';') { + input++; + break; + } + + CCC_EXPECT_CHAR(input, ',', "method"); + + auto parameter_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(parameter_type); + method->parameter_types.emplace_back(std::move(*parameter_type)); + } + } + + out_type = std::move(method); + break; + } + case StabsTypeDescriptor::REFERENCE: { // & + auto reference = std::make_unique(type_number); + + auto value_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(value_type); + reference->value_type = std::move(*value_type); + + out_type = std::move(reference); + break; + } + case StabsTypeDescriptor::POINTER: { // * + auto pointer = std::make_unique(type_number); + + auto value_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(value_type); + pointer->value_type = std::move(*value_type); + + out_type = std::move(pointer); + break; + } + case StabsTypeDescriptor::TYPE_ATTRIBUTE: { // @ + if((*input >= '0' && *input <= '9') || *input == '(') { + auto member_pointer = std::make_unique(type_number); + + auto class_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(class_type); + member_pointer->class_type = std::move(*class_type); + + CCC_EXPECT_CHAR(input, ',', "pointer to non-static data member"); + + auto member_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(member_type); + member_pointer->member_type = std::move(*member_type); + + out_type = std::move(member_pointer); + } else { + auto type_attribute = std::make_unique(type_number); + CCC_CHECK(*input == 's', "Weird value following '@' type descriptor."); + input++; + + std::optional size_bits = parse_number_s64(input); + CCC_CHECK(size_bits.has_value(), "Failed to parse type attribute.") + type_attribute->size_bits = *size_bits; + CCC_EXPECT_CHAR(input, ';', "type attribute"); + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + type_attribute->type = std::move(*type); + + out_type = std::move(type_attribute); + } + break; + } + case StabsTypeDescriptor::BUILTIN: { // - + auto built_in = std::make_unique(type_number); + + std::optional type_id = parse_number_s64(input); + CCC_CHECK(type_id.has_value(), "Failed to parse built-in."); + built_in->type_id = *type_id; + + CCC_EXPECT_CHAR(input, ';', "builtin"); + + out_type = std::move(built_in); + break; + } + default: { + return CCC_FAILURE( + "Invalid type descriptor '%c' (%02x).", + (u32) descriptor, (u32) descriptor); + } + } + + return out_type; +} + +static Result> parse_field_list(const char*& input) +{ + std::vector fields; + + while(*input != '\0') { + if(*input == ';') { + input++; + break; + } + + const char* before_field = input; + StabsStructOrUnionType::Field field; + + Result name = parse_dodgy_stabs_identifier(input, ':'); + CCC_RETURN_IF_ERROR(name); + field.name = std::move(*name); + + CCC_EXPECT_CHAR(input, ':', "identifier"); + if(*input == '/') { + input++; + + Result visibility = parse_visibility_character(input); + CCC_RETURN_IF_ERROR(visibility); + field.visibility = *visibility; + } + if(*input == ':') { + input = before_field; + break; + } + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + field.type = std::move(*type); + + if(field.name.size() >= 1 && field.name[0] == '$') { + // Virtual function table pointers and virtual base class pointers. + CCC_EXPECT_CHAR(input, ',', "field type"); + + std::optional offset_bits = parse_number_s32(input); + CCC_CHECK(offset_bits.has_value(), "Failed to parse field offset."); + field.offset_bits = *offset_bits; + + CCC_EXPECT_CHAR(input, ';', "field offset"); + } else if(*input == ':') { + // Static fields. + input++; + field.is_static = true; + + std::optional type_name = parse_stabs_identifier(input, ';'); + CCC_CHECK(type_name.has_value(), "Failed to parse static field type name."); + + field.type_name = std::move(*type_name); + + CCC_EXPECT_CHAR(input, ';', "identifier"); + } else if(*input == ',') { + // Normal fields. + input++; + + std::optional offset_bits = parse_number_s32(input); + CCC_CHECK(offset_bits.has_value(), "Failed to parse field offset."); + field.offset_bits = *offset_bits; + + CCC_EXPECT_CHAR(input, ',', "field offset"); + + std::optional size_bits = parse_number_s32(input); + CCC_CHECK(size_bits.has_value(), "Failed to parse field size."); + field.size_bits = *size_bits; + + CCC_EXPECT_CHAR(input, ';', "field size"); + } else { + return CCC_FAILURE("Expected ':' or ',', got '%c' (%hhx).", *input, *input); + } + + STABS_DEBUG(print_field(field);) + + fields.emplace_back(std::move(field)); + } + + return fields; +} + +static Result> parse_member_functions(const char*& input) +{ + // Check for if the next character is from an enclosing field list. If this + // is the case, the next character will be ',' for normal fields and ':' for + // static fields (see above). + if(*input == ',' || *input == ':') { + return std::vector(); + } + + std::vector member_functions; + while(*input != '\0') { + if(*input == ';') { + input++; + break; + } + StabsStructOrUnionType::MemberFunctionSet member_function_set; + + std::optional name = parse_stabs_identifier(input, ':'); + CCC_CHECK(name.has_value(), "Failed to parse member function name."); + member_function_set.name = std::move(*name); + + CCC_EXPECT_CHAR(input, ':', "member function"); + CCC_EXPECT_CHAR(input, ':', "member function"); + while(*input != '\0') { + if(*input == ';') { + input++; + break; + } + + StabsStructOrUnionType::MemberFunction function; + + auto type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(type); + function.type = std::move(*type); + + CCC_EXPECT_CHAR(input, ':', "member function"); + std::optional identifier = parse_stabs_identifier(input, ';'); + CCC_CHECK(identifier.has_value(), "Invalid member function identifier."); + + CCC_EXPECT_CHAR(input, ';', "member function"); + + Result visibility = parse_visibility_character(input); + CCC_RETURN_IF_ERROR(visibility); + function.visibility = *visibility; + + char modifiers = *(input++); + CCC_CHECK(modifiers != '\0', "Failed to parse member function modifiers."); + switch(modifiers) { + case 'A': + function.is_const = false; + function.is_volatile = false; + break; + case 'B': + function.is_const = true; + function.is_volatile = false; + break; + case 'C': + function.is_const = false; + function.is_volatile = true; + break; + case 'D': + function.is_const = true; + function.is_volatile = true; + break; + case '?': + case '.': + break; + default: + return CCC_FAILURE("Invalid member function modifiers."); + } + + char flag = *(input++); + CCC_CHECK(flag != '\0', "Failed to parse member function type."); + switch(flag) { + case '.': { // normal member function + function.modifier = ast::MemberFunctionModifier::NONE; + break; + } + case '?': { // static member function + function.modifier = ast::MemberFunctionModifier::STATIC; + break; + } + case '*': { // virtual member function + std::optional vtable_index = parse_number_s32(input); + CCC_CHECK(vtable_index.has_value(), "Failed to parse vtable index."); + function.vtable_index = *vtable_index; + + CCC_EXPECT_CHAR(input, ';', "virtual member function"); + + auto virtual_type = parse_stabs_type(input); + CCC_RETURN_IF_ERROR(virtual_type); + function.virtual_type = std::move(*virtual_type); + + CCC_EXPECT_CHAR(input, ';', "virtual member function"); + function.modifier = ast::MemberFunctionModifier::VIRTUAL; + break; + } + default: + return CCC_FAILURE("Invalid member function type."); + } + member_function_set.overloads.emplace_back(std::move(function)); + } + STABS_DEBUG_PRINTF("member func: %s\n", member_function_set.name.c_str()); + member_functions.emplace_back(std::move(member_function_set)); + } + return member_functions; +} + +static Result parse_visibility_character(const char*& input) +{ + char visibility = *(input++); + switch(visibility) { + case '0': return StabsStructOrUnionType::Visibility::PRIVATE; + case '1': return StabsStructOrUnionType::Visibility::PROTECTED; + case '2': return StabsStructOrUnionType::Visibility::PUBLIC; + case '9': return StabsStructOrUnionType::Visibility::PUBLIC_OPTIMIZED_OUT; + default: break; + } + + return CCC_FAILURE("Failed to parse visibility character."); +} + +std::optional parse_number_s32(const char*& input) +{ + char* end; + s64 value = strtoll(input, &end, 10); + if(end == input) { + return std::nullopt; + } + input = end; + return (s32) value; +} + +std::optional parse_number_s64(const char*& input) +{ + char* end; + s64 value = strtoll(input, &end, 10); + if(end == input) { + return std::nullopt; + } + input = end; + return value; +} + +std::optional parse_stabs_identifier(const char*& input, char terminator) +{ + const char* begin = input; + for(; *input != '\0'; input++) { + if(*input == terminator) { + return std::string(begin, input); + } + } + return std::nullopt; +} + +// The complexity here is because the input may contain an unescaped namespace +// separator '::' even if the field terminator is supposed to be a colon, as +// well as the raw contents of character literals. See test/ccc/stabs_tests.cpp +// for some examples. +Result parse_dodgy_stabs_identifier(const char*& input, char terminator) +{ + const char* begin = input; + s32 template_depth = 0; + + for(; *input != '\0'; input++) { + // Skip past character literals. + if(*input == '\'') { + input++; + if(*input == '\'') { + input++; // Handle character literals containing a single quote. + } + while(*input != '\'' && *input != '\0') { + input++; + } + if(*input == '\0') { + break; + } + input++; + } + + // Keep track of the template depth so we know when to expect the + // terminator character. + if(*input == '<') { + template_depth++; + } + if(*input == '>') { + template_depth--; + } + + if(*input == terminator && template_depth == 0) { + return std::string(begin, input); + } + } + + return CCC_FAILURE(STAB_TRUNCATED_ERROR_MESSAGE); +} + +STABS_DEBUG( + +static void print_field(const StabsStructOrUnionType::Field& field) +{ + printf("\t%04x %04x %04x %04x %s\n", field.offset_bits / 8, field.size_bits / 8, field.offset_bits, field.size_bits, field.name.c_str()); +} + +) + +const char* stabs_field_visibility_to_string(StabsStructOrUnionType::Visibility visibility) +{ + switch(visibility) { + case StabsStructOrUnionType::Visibility::PRIVATE: return "private"; + case StabsStructOrUnionType::Visibility::PROTECTED: return "protected"; + case StabsStructOrUnionType::Visibility::PUBLIC: return "public"; + case StabsStructOrUnionType::Visibility::PUBLIC_OPTIMIZED_OUT: return "public_optimizedout"; + default: return "none"; + } + return ""; +} + +} diff --git a/3rdparty/ccc/src/ccc/stabs.h b/3rdparty/ccc/src/ccc/stabs.h new file mode 100644 index 0000000000..fef413ba06 --- /dev/null +++ b/3rdparty/ccc/src/ccc/stabs.h @@ -0,0 +1,379 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "ast.h" +#include "util.h" + +namespace ccc { + +enum class StabsSymbolDescriptor : u8 { + LOCAL_VARIABLE = '_', + REFERENCE_PARAMETER_A = 'a', + LOCAL_FUNCTION = 'f', + GLOBAL_FUNCTION = 'F', + GLOBAL_VARIABLE = 'G', + REGISTER_PARAMETER = 'P', + VALUE_PARAMETER = 'p', + REGISTER_VARIABLE = 'r', + STATIC_GLOBAL_VARIABLE = 'S', + TYPE_NAME = 't', + ENUM_STRUCT_OR_TYPE_TAG = 'T', + STATIC_LOCAL_VARIABLE = 'V', + REFERENCE_PARAMETER_V = 'v' +}; + +struct StabsType; + +struct StabsSymbol { + StabsSymbolDescriptor descriptor; + std::string name; + std::unique_ptr type; +}; + +Result parse_stabs_symbol(const char*& input); + +enum class StabsTypeDescriptor : u8 { + TYPE_REFERENCE = 0xef, // '0'..'9','(' + ARRAY = 'a', + ENUM = 'e', + FUNCTION = 'f', + CONST_QUALIFIER = 'k', + RANGE = 'r', + STRUCT = 's', + UNION = 'u', + CROSS_REFERENCE = 'x', + VOLATILE_QUALIFIER = 'B', + FLOATING_POINT_BUILTIN = 'R', + METHOD = '#', + REFERENCE = '&', + POINTER = '*', + TYPE_ATTRIBUTE = '@', + POINTER_TO_DATA_MEMBER = 0xee, // also '@' + BUILTIN = '-' +}; + +struct StabsBaseClass; +struct StabsField; +struct StabsMemberFunctionSet; + +// e.g. for "123=*456" 123 would be the type_number, the type descriptor would +// be of type POINTER and StabsPointerType::value_type would point to a type +// with type_number = 456. +struct StabsType { + StabsTypeNumber type_number; + // The name field is only populated for root types and cross references. + std::optional name; + bool is_typedef = false; + bool is_root = false; + std::optional descriptor; + + StabsType(StabsTypeNumber n) : type_number(n) {} + StabsType(StabsTypeDescriptor d) : descriptor(d) {} + StabsType(StabsTypeNumber n, StabsTypeDescriptor d) : type_number(n), descriptor(d) {} + virtual ~StabsType() {} + + template + SubType& as() + { + CCC_ASSERT(descriptor == SubType::DESCRIPTOR); + return *static_cast(this); + } + + template + const SubType& as() const + { + CCC_ASSERT(descriptor == SubType::DESCRIPTOR); + return *static_cast(this); + } + + virtual void enumerate_numbered_types(std::map& output) const + { + if(type_number.valid() && descriptor.has_value()) { + output.emplace(type_number, this); + } + } +}; + +struct StabsTypeReferenceType : StabsType { + std::unique_ptr type; + + StabsTypeReferenceType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::TYPE_REFERENCE; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + type->enumerate_numbered_types(output); + } +}; + +struct StabsArrayType : StabsType { + std::unique_ptr index_type; + std::unique_ptr element_type; + + StabsArrayType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::ARRAY; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + index_type->enumerate_numbered_types(output); + element_type->enumerate_numbered_types(output); + } +}; + +struct StabsEnumType : StabsType { + std::vector> fields; + + StabsEnumType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::ENUM; +}; + +struct StabsFunctionType : StabsType { + std::unique_ptr return_type; + + StabsFunctionType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::FUNCTION; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + return_type->enumerate_numbered_types(output); + } +}; + +struct StabsVolatileQualifierType : StabsType { + std::unique_ptr type; + + StabsVolatileQualifierType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::VOLATILE_QUALIFIER; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + type->enumerate_numbered_types(output); + } +}; + +struct StabsConstQualifierType : StabsType { + std::unique_ptr type; + + StabsConstQualifierType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::CONST_QUALIFIER; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + type->enumerate_numbered_types(output); + } +}; + +struct StabsRangeType : StabsType { + std::unique_ptr type; + std::string low; + std::string high; // Some compilers wrote out a wrapped around value here for zero (or variable?) length arrays. + + StabsRangeType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::RANGE; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + type->enumerate_numbered_types(output); + } +}; + +struct StabsStructOrUnionType : StabsType { + enum class Visibility : u8 { + NONE, + PRIVATE, + PROTECTED, + PUBLIC, + PUBLIC_OPTIMIZED_OUT + }; + + struct BaseClass { + bool is_virtual; + Visibility visibility; + s32 offset = -1; + std::unique_ptr type; + }; + + struct Field { + std::string name; + Visibility visibility = Visibility::NONE; + std::unique_ptr type; + bool is_static = false; + s32 offset_bits = 0; + s32 size_bits = 0; + std::string type_name; + }; + + struct MemberFunction { + std::unique_ptr type; + std::unique_ptr virtual_type; + Visibility visibility; + bool is_const = false; + bool is_volatile = false; + ast::MemberFunctionModifier modifier = ast::MemberFunctionModifier::NONE; + s32 vtable_index = -1; + }; + + struct MemberFunctionSet { + std::string name; + std::vector overloads; + }; + + s64 size = -1; + std::vector base_classes; + std::vector fields; + std::vector member_functions; + std::unique_ptr first_base_class; + + StabsStructOrUnionType(StabsTypeNumber n, StabsTypeDescriptor d) : StabsType(n, d) {} + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + for(const BaseClass& base_class : base_classes) { + base_class.type->enumerate_numbered_types(output); + } + for(const Field& field : fields) { + field.type->enumerate_numbered_types(output); + } + for(const MemberFunctionSet& member_function_set : member_functions) { + for(const MemberFunction& member_function : member_function_set.overloads) { + member_function.type->enumerate_numbered_types(output); + if(member_function.virtual_type.get()) { + member_function.virtual_type->enumerate_numbered_types(output); + } + } + } + if(first_base_class.get()) { + first_base_class->enumerate_numbered_types(output); + } + } +}; + +struct StabsStructType : StabsStructOrUnionType { + StabsStructType(StabsTypeNumber n) : StabsStructOrUnionType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::STRUCT; +}; + +struct StabsUnionType : StabsStructOrUnionType { + StabsUnionType(StabsTypeNumber n) : StabsStructOrUnionType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::UNION; +}; + + +struct StabsCrossReferenceType : StabsType { + ast::ForwardDeclaredType type; + std::string identifier; + + StabsCrossReferenceType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::CROSS_REFERENCE; +}; + +struct StabsFloatingPointBuiltInType : StabsType { + s32 fpclass = -1; + s32 bytes = -1; + + StabsFloatingPointBuiltInType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::FLOATING_POINT_BUILTIN; +}; + +struct StabsMethodType : StabsType { + std::unique_ptr return_type; + std::optional> class_type; + std::vector> parameter_types; + + StabsMethodType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::METHOD; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + return_type->enumerate_numbered_types(output); + if(class_type.has_value()) { + (*class_type)->enumerate_numbered_types(output); + } + for(const std::unique_ptr& parameter_type : parameter_types) { + parameter_type->enumerate_numbered_types(output); + } + } +}; + +struct StabsReferenceType : StabsType { + std::unique_ptr value_type; + + StabsReferenceType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::REFERENCE; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + value_type->enumerate_numbered_types(output); + } +}; + +struct StabsPointerType : StabsType { + std::unique_ptr value_type; + + StabsPointerType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::POINTER; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + value_type->enumerate_numbered_types(output); + } +}; + +struct StabsSizeTypeAttributeType : StabsType { + s64 size_bits = -1; + std::unique_ptr type; + + StabsSizeTypeAttributeType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::TYPE_ATTRIBUTE; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + type->enumerate_numbered_types(output); + } +}; + +struct StabsPointerToDataMemberType : StabsType { + std::unique_ptr class_type; + std::unique_ptr member_type; + + StabsPointerToDataMemberType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::POINTER_TO_DATA_MEMBER; + + void enumerate_numbered_types(std::map& output) const override + { + StabsType::enumerate_numbered_types(output); + class_type->enumerate_numbered_types(output); + member_type->enumerate_numbered_types(output); + } +}; + +struct StabsBuiltInType : StabsType { + s64 type_id = -1; + + StabsBuiltInType(StabsTypeNumber n) : StabsType(n, DESCRIPTOR) {} + static const constexpr StabsTypeDescriptor DESCRIPTOR = StabsTypeDescriptor::BUILTIN; +}; + +extern const char* STAB_TRUNCATED_ERROR_MESSAGE; + +Result> parse_top_level_stabs_type(const char*& input); +std::optional parse_number_s32(const char*& input); +std::optional parse_number_s64(const char*& input); +std::optional parse_stabs_identifier(const char*& input, char terminator); +Result parse_dodgy_stabs_identifier(const char*& input, char terminator); +const char* stabs_field_visibility_to_string(StabsStructOrUnionType::Visibility visibility); + +} diff --git a/3rdparty/ccc/src/ccc/stabs_to_ast.cpp b/3rdparty/ccc/src/ccc/stabs_to_ast.cpp new file mode 100644 index 0000000000..42b4784d28 --- /dev/null +++ b/3rdparty/ccc/src/ccc/stabs_to_ast.cpp @@ -0,0 +1,834 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "stabs_to_ast.h" + +#include "importer_flags.h" + +#define AST_DEBUG(...) //__VA_ARGS__ +#define AST_DEBUG_PRINTF(...) AST_DEBUG(printf(__VA_ARGS__);) + +namespace ccc { + +struct MemberFunctionInfo { + std::string name; + bool is_constructor_or_destructor = false; + bool is_special_member_function = false; + bool is_operator_member_function = false; +}; + +static bool is_void_like(const StabsType& type); +static Result classify_range(const StabsRangeType& type); +static Result> field_to_ast( + const StabsStructOrUnionType::Field& field, + const StabsType& enclosing_struct, + const StabsToAstState& state, + s32 depth); +static Result detect_bitfield(const StabsStructOrUnionType::Field& field, const StabsToAstState& state); +static Result>> member_functions_to_ast( + const StabsStructOrUnionType& type, const StabsToAstState& state, s32 depth); +static MemberFunctionInfo check_member_function( + const std::string& mangled_name, + std::string_view type_name_no_template_args, + const DemanglerFunctions& demangler, + u32 importer_flags); + +Result> stabs_type_to_ast( + const StabsType& type, + const StabsType* enclosing_struct, + const StabsToAstState& state, + s32 depth, + bool substitute_type_name, + bool force_substitute) +{ + AST_DEBUG_PRINTF("%-*stype desc=%hhx '%c' num=(%d,%d) name=%s\n", + depth * 4, "", + type.descriptor.has_value() ? (u8) *type.descriptor : 'X', + (type.descriptor.has_value() && isprint((u8) *type.descriptor)) ? (u8) *type.descriptor : '!', + type.type_number.file, type.type_number.type, + type.name.has_value() ? type.name->c_str() : ""); + + if(depth > 200) { + const char* error_message = "Call depth greater than 200 in stabs_type_to_ast, probably infinite recursion."; + if(state.importer_flags & STRICT_PARSING) { + return CCC_FAILURE(error_message); + } else { + CCC_WARN(error_message); + + auto error = std::make_unique(); + error->message = error_message; + return std::unique_ptr(std::move(error)); + } + } + + // This makes sure that types are replaced with their type name in cases + // where that would be more appropriate. + if(type.name.has_value()) { + bool try_substitute = depth > 0 && (type.is_root + || type.descriptor == StabsTypeDescriptor::RANGE + || type.descriptor == StabsTypeDescriptor::BUILTIN); + // GCC emits anonymous enums with a name of " " since apparently some + // debuggers can't handle zero-length names. + bool is_name_empty = type.name == "" || type.name == " "; + // Cross references will be handled below. + bool is_cross_reference = type.descriptor == StabsTypeDescriptor::CROSS_REFERENCE; + bool is_void = is_void_like(type); + if((substitute_type_name || try_substitute) && !is_name_empty && !is_cross_reference && !is_void) { + auto type_name = std::make_unique(); + type_name->source = ast::TypeNameSource::REFERENCE; + type_name->unresolved_stabs = std::make_unique(); + type_name->unresolved_stabs->type_name = *type.name; + type_name->unresolved_stabs->referenced_file_handle = state.file_handle; + type_name->unresolved_stabs->stabs_type_number = type.type_number; + return std::unique_ptr(std::move(type_name)); + } + } + + // This prevents infinite recursion when an automatically generated member + // function references an unnamed type. + bool can_compare_type_numbers = type.type_number.valid() && enclosing_struct && enclosing_struct->type_number.valid(); + if(force_substitute && can_compare_type_numbers && type.type_number == enclosing_struct->type_number) { + // It's probably a this parameter (or return type) for an unnamed type. + auto type_name = std::make_unique(); + type_name->source = ast::TypeNameSource::UNNAMED_THIS; + type_name->unresolved_stabs = std::make_unique(); + type_name->unresolved_stabs->type_name = enclosing_struct->name.has_value() ? *enclosing_struct->name : ""; + type_name->unresolved_stabs->referenced_file_handle = state.file_handle; + type_name->unresolved_stabs->stabs_type_number = type.type_number; + return std::unique_ptr(std::move(type_name)); + } + + if(!type.descriptor.has_value()) { + // The definition of the type has been defined previously, so we have to + // look it up by its type number. + CCC_CHECK(type.type_number.valid(), "Cannot lookup type (type is anonymous)."); + auto stabs_type = state.stabs_types->find(type.type_number); + if(stabs_type == state.stabs_types->end()) { + std::string error_message = "Failed to lookup STABS type by its type number (" + + std::to_string(type.type_number.file) + "," + std::to_string(type.type_number.type) + ")."; + if(state.importer_flags & STRICT_PARSING) { + return CCC_FAILURE("%s", error_message.c_str()); + } else { + CCC_WARN("%s", error_message.c_str()); + std::unique_ptr error = std::make_unique(); + error->message = std::move(error_message); + return std::unique_ptr(std::move(error)); + } + } + return stabs_type_to_ast( + *stabs_type->second, + enclosing_struct, + state, + depth + 1, + substitute_type_name, + force_substitute); + } + + std::unique_ptr result; + + switch(*type.descriptor) { + case StabsTypeDescriptor::TYPE_REFERENCE: { + const auto& stabs_type_ref = type.as(); + if(!type.type_number.valid() || !stabs_type_ref.type->type_number.valid() || stabs_type_ref.type->type_number != type.type_number) { + auto node = stabs_type_to_ast( + *stabs_type_ref.type, + enclosing_struct, + state, + depth + 1, + substitute_type_name, + force_substitute); + CCC_RETURN_IF_ERROR(node); + result = std::move(*node); + } else { + // I still don't know why in STABS void is a reference to + // itself, maybe because I'm not a philosopher. + auto builtin = std::make_unique(); + builtin->bclass = ast::BuiltInClass::VOID_TYPE; + result = std::move(builtin); + } + break; + } + case StabsTypeDescriptor::ARRAY: { + auto array = std::make_unique(); + const auto& stabs_array = type.as(); + + auto element_node = stabs_type_to_ast( + *stabs_array.element_type, + enclosing_struct, + state, + depth + 1, + true, + force_substitute); + CCC_RETURN_IF_ERROR(element_node); + array->element_type = std::move(*element_node); + + const StabsRangeType& index = stabs_array.index_type->as(); + + char* end = nullptr; + + const char* low = index.low.c_str(); + s64 low_value = strtoll(low, &end, 10); + CCC_CHECK(end != low, "Failed to parse low part of range as integer."); + CCC_CHECK(low_value == 0, "Invalid index type for array."); + + const char* high = index.high.c_str(); + s64 high_value = strtoll(high, &end, 10); + CCC_CHECK(end != high, "Failed to parse low part of range as integer."); + + if(high_value == 4294967295) { + // Some compilers wrote out a wrapped around value here. + array->element_count = 0; + } else { + array->element_count = (s32) high_value + 1; + } + + result = std::move(array); + break; + } + case StabsTypeDescriptor::ENUM: { + auto inline_enum = std::make_unique(); + const auto& stabs_enum = type.as(); + inline_enum->constants = stabs_enum.fields; + result = std::move(inline_enum); + break; + } + case StabsTypeDescriptor::FUNCTION: { + auto function = std::make_unique(); + + auto node = stabs_type_to_ast( + *type.as().return_type, + enclosing_struct, + state, + depth + 1, + true, + force_substitute); + CCC_RETURN_IF_ERROR(node); + function->return_type = std::move(*node); + + result = std::move(function); + break; + } + case StabsTypeDescriptor::VOLATILE_QUALIFIER: { + const auto& volatile_qualifier = type.as(); + + auto node = stabs_type_to_ast( + *volatile_qualifier.type.get(), + enclosing_struct, + state, + depth + 1, + substitute_type_name, + force_substitute); + CCC_RETURN_IF_ERROR(node); + result = std::move(*node); + + result->is_volatile = true; + break; + } + case StabsTypeDescriptor::CONST_QUALIFIER: { + const auto& const_qualifier = type.as(); + + auto node = stabs_type_to_ast( + *const_qualifier.type.get(), + enclosing_struct, + state, + depth + 1, + substitute_type_name, + force_substitute); + result = std::move(*node); + + result->is_const = true; + break; + } + case StabsTypeDescriptor::RANGE: { + auto builtin = std::make_unique(); + Result bclass = classify_range(type.as()); + CCC_RETURN_IF_ERROR(bclass); + builtin->bclass = *bclass; + result = std::move(builtin); + break; + } + case StabsTypeDescriptor::STRUCT: + case StabsTypeDescriptor::UNION: { + const StabsStructOrUnionType* stabs_struct_or_union; + if(type.descriptor == StabsTypeDescriptor::STRUCT) { + stabs_struct_or_union = &type.as(); + } else { + stabs_struct_or_union = &type.as(); + } + + auto struct_or_union = std::make_unique(); + struct_or_union->is_struct = type.descriptor == StabsTypeDescriptor::STRUCT; + struct_or_union->size_bits = (s32) stabs_struct_or_union->size * 8; + + for(const StabsStructOrUnionType::BaseClass& stabs_base_class : stabs_struct_or_union->base_classes) { + auto base_class = stabs_type_to_ast( + *stabs_base_class.type, + &type, + state, + depth + 1, + true, + force_substitute); + CCC_RETURN_IF_ERROR(base_class); + + (*base_class)->offset_bytes = stabs_base_class.offset; + (*base_class)->set_access_specifier(stabs_field_visibility_to_access_specifier(stabs_base_class.visibility), state.importer_flags); + + if(stabs_base_class.is_virtual) { + (*base_class)->is_virtual_base_class = true; + } + + struct_or_union->base_classes.emplace_back(std::move(*base_class)); + } + + AST_DEBUG_PRINTF("%-*s beginfields\n", depth * 4, ""); + for(const StabsStructOrUnionType::Field& field : stabs_struct_or_union->fields) { + auto node = field_to_ast(field, type, state, depth); + CCC_RETURN_IF_ERROR(node); + struct_or_union->fields.emplace_back(std::move(*node)); + } + AST_DEBUG_PRINTF("%-*s endfields\n", depth * 4, ""); + + AST_DEBUG_PRINTF("%-*s beginmemberfuncs\n", depth * 4, ""); + Result>> member_functions = + member_functions_to_ast(*stabs_struct_or_union, state, depth); + CCC_RETURN_IF_ERROR(member_functions); + struct_or_union->member_functions = std::move(*member_functions); + AST_DEBUG_PRINTF("%-*s endmemberfuncs\n", depth * 4, ""); + + result = std::move(struct_or_union); + break; + } + case StabsTypeDescriptor::CROSS_REFERENCE: { + const auto& cross_reference = type.as(); + auto type_name = std::make_unique(); + type_name->source = ast::TypeNameSource::CROSS_REFERENCE; + type_name->unresolved_stabs = std::make_unique(); + type_name->unresolved_stabs->type_name = cross_reference.identifier; + type_name->unresolved_stabs->type = cross_reference.type; + result = std::move(type_name); + break; + } + case ccc::StabsTypeDescriptor::FLOATING_POINT_BUILTIN: { + const auto& fp_builtin = type.as(); + auto builtin = std::make_unique(); + switch(fp_builtin.bytes) { + case 1: builtin->bclass = ast::BuiltInClass::UNSIGNED_8; break; + case 2: builtin->bclass = ast::BuiltInClass::UNSIGNED_16; break; + case 4: builtin->bclass = ast::BuiltInClass::UNSIGNED_32; break; + case 8: builtin->bclass = ast::BuiltInClass::UNSIGNED_64; break; + case 16: builtin->bclass = ast::BuiltInClass::UNSIGNED_128; break; + default: builtin->bclass = ast::BuiltInClass::UNSIGNED_8; break; + } + result = std::move(builtin); + break; + } + case StabsTypeDescriptor::METHOD: { + const auto& stabs_method = type.as(); + auto function = std::make_unique(); + + auto return_node = stabs_type_to_ast( + *stabs_method.return_type.get(), + enclosing_struct, + state, + depth + 1, + true, + true); + CCC_RETURN_IF_ERROR(return_node); + function->return_type = std::move(*return_node); + + function->parameters.emplace(); + for(const std::unique_ptr& parameter_type : stabs_method.parameter_types) { + auto parameter_node = stabs_type_to_ast( + *parameter_type, + enclosing_struct, + state, + depth + 1, + true, + true); + CCC_RETURN_IF_ERROR(parameter_node); + function->parameters->emplace_back(std::move(*parameter_node)); + } + result = std::move(function); + break; + } + case StabsTypeDescriptor::POINTER: { + auto pointer = std::make_unique(); + pointer->is_pointer = true; + + auto value_node = stabs_type_to_ast( + *type.as().value_type, + enclosing_struct, + state, + depth + 1, + true, + force_substitute); + CCC_RETURN_IF_ERROR(value_node); + pointer->value_type = std::move(*value_node); + + result = std::move(pointer); + break; + } + case StabsTypeDescriptor::REFERENCE: { + auto reference = std::make_unique(); + reference->is_pointer = false; + + auto value_node = stabs_type_to_ast( + *type.as().value_type, + enclosing_struct, + state, + depth + 1, + true, + force_substitute); + CCC_RETURN_IF_ERROR(value_node); + reference->value_type = std::move(*value_node); + + result = std::move(reference); + break; + } + case StabsTypeDescriptor::TYPE_ATTRIBUTE: { + const auto& stabs_type_attribute = type.as(); + + auto node = stabs_type_to_ast( + *stabs_type_attribute.type, + enclosing_struct, + state, + depth + 1, + substitute_type_name, + force_substitute); + CCC_RETURN_IF_ERROR(node); + result = std::move(*node); + + result->size_bits = (s32) stabs_type_attribute.size_bits; + break; + } + case StabsTypeDescriptor::POINTER_TO_DATA_MEMBER: { + const auto& stabs_member_pointer = type.as(); + auto member_pointer = std::make_unique(); + + auto class_node = stabs_type_to_ast( + *stabs_member_pointer.class_type.get(), + enclosing_struct, + state, + depth + 1, + true, + true); + CCC_RETURN_IF_ERROR(class_node); + member_pointer->class_type = std::move(*class_node); + + auto member_node = stabs_type_to_ast( + *stabs_member_pointer.member_type.get(), + enclosing_struct, + state, + depth + 1, + true, + true); + CCC_RETURN_IF_ERROR(member_node); + member_pointer->member_type = std::move(*member_node); + + result = std::move(member_pointer); + break; + } + case StabsTypeDescriptor::BUILTIN: { + CCC_CHECK(type.as().type_id == 16, + "Unknown built-in type!"); + auto builtin = std::make_unique(); + builtin->bclass = ast::BuiltInClass::BOOL_8; + result = std::move(builtin); + break; + } + } + + CCC_CHECK(result, "Result of stabs_type_to_ast call is nullptr."); + return result; +} + +static bool is_void_like(const StabsType& type) +{ + // Unfortunately, a common case seems to be that various types (most + // commonly __builtin_va_list) are indistinguishable from void or void*, so + // we have to output them as a void built-in. + if(type.descriptor.has_value()) { + switch(*type.descriptor) { + case StabsTypeDescriptor::POINTER: { + return is_void_like(*type.as().value_type.get()); + } + case StabsTypeDescriptor::TYPE_REFERENCE: { + return type.as().type->type_number == type.type_number; + } + default: { + break; + } + } + } + + return false; +} + +static Result classify_range(const StabsRangeType& type) +{ + const char* low = type.low.c_str(); + const char* high = type.high.c_str(); + + // Handle some special cases and values that are too large to easily store + // in a 64-bit integer. + static const struct { const char* low; const char* high; ast::BuiltInClass classification; } strings[] = { + {"4", "0", ast::BuiltInClass::FLOAT_32}, + {"000000000000000000000000", "001777777777777777777777", ast::BuiltInClass::UNSIGNED_64}, + {"00000000000000000000000000000000000000000000", "00000000000000000000001777777777777777777777", ast::BuiltInClass::UNSIGNED_64}, + {"0000000000000", "01777777777777777777777", ast::BuiltInClass::UNSIGNED_64}, // IOP + {"0", "18446744073709551615", ast::BuiltInClass::UNSIGNED_64}, + {"001000000000000000000000", "000777777777777777777777", ast::BuiltInClass::SIGNED_64}, + {"00000000000000000000001000000000000000000000", "00000000000000000000000777777777777777777777", ast::BuiltInClass::SIGNED_64}, + {"01000000000000000000000", "0777777777777777777777", ast::BuiltInClass::SIGNED_64}, // IOP + {"-9223372036854775808", "9223372036854775807", ast::BuiltInClass::SIGNED_64}, + {"8", "0", ast::BuiltInClass::FLOAT_64}, + {"00000000000000000000000000000000000000000000", "03777777777777777777777777777777777777777777", ast::BuiltInClass::UNSIGNED_128}, + {"02000000000000000000000000000000000000000000", "01777777777777777777777777777777777777777777", ast::BuiltInClass::SIGNED_128}, + {"000000000000000000000000", "0377777777777777777777777777777777", ast::BuiltInClass::UNQUALIFIED_128}, + {"16", "0", ast::BuiltInClass::FLOAT_128}, + {"0", "-1", ast::BuiltInClass::UNQUALIFIED_128} // Old homebrew toolchain + }; + + for(const auto& range : strings) { + if(strcmp(range.low, low) == 0 && strcmp(range.high, high) == 0) { + return range.classification; + } + } + + // For smaller values we actually parse the bounds as integers. + char* end = nullptr; + s64 low_value = strtoll(type.low.c_str(), &end, low[0] == '0' ? 8 : 10); + CCC_CHECK(end != low, "Failed to parse low part of range as integer."); + s64 high_value = strtoll(type.high.c_str(), &end, high[0] == '0' ? 8 : 10); + CCC_CHECK(end != high, "Failed to parse high part of range as integer."); + + static const struct { s64 low; s64 high; ast::BuiltInClass classification; } integers[] = { + {0, 255, ast::BuiltInClass::UNSIGNED_8}, + {-128, 127, ast::BuiltInClass::SIGNED_8}, + {0, 127, ast::BuiltInClass::UNQUALIFIED_8}, + {0, 65535, ast::BuiltInClass::UNSIGNED_16}, + {-32768, 32767, ast::BuiltInClass::SIGNED_16}, + {0, 4294967295, ast::BuiltInClass::UNSIGNED_32}, + {-2147483648, 2147483647, ast::BuiltInClass::SIGNED_32}, + }; + + for(const auto& range : integers) { + if((range.low == low_value || range.low == -low_value) && range.high == high_value) { + return range.classification; + } + } + + return CCC_FAILURE("Failed to classify range."); +} + +static Result> field_to_ast( + const StabsStructOrUnionType::Field& field, + const StabsType& enclosing_struct, + const StabsToAstState& state, + s32 depth) +{ + AST_DEBUG_PRINTF("%-*s field %s\n", depth * 4, "", field.name.c_str()); + + Result is_bitfield = detect_bitfield(field, state); + CCC_RETURN_IF_ERROR(is_bitfield); + + if(*is_bitfield) { + // Process bitfields. + auto bitfield_node = stabs_type_to_ast( + *field.type, + &enclosing_struct, + state, + depth + 1, + true, + false); + CCC_RETURN_IF_ERROR(bitfield_node); + + std::unique_ptr bitfield = std::make_unique(); + bitfield->name = (field.name == " ") ? "" : field.name; + bitfield->offset_bytes = field.offset_bits / 8; + bitfield->size_bits = field.size_bits; + bitfield->underlying_type = std::move(*bitfield_node); + bitfield->bitfield_offset_bits = field.offset_bits % 8; + bitfield->set_access_specifier(stabs_field_visibility_to_access_specifier(field.visibility), state.importer_flags); + + return std::unique_ptr(std::move(bitfield)); + } else { + // Process a normal field. + Result> node = stabs_type_to_ast( + *field.type, + &enclosing_struct, + state, + depth + 1, + true, + false); + CCC_RETURN_IF_ERROR(node); + + (*node)->name = field.name; + (*node)->offset_bytes = field.offset_bits / 8; + (*node)->size_bits = field.size_bits; + (*node)->set_access_specifier(stabs_field_visibility_to_access_specifier(field.visibility), state.importer_flags); + + if(field.name.starts_with("$vf") || field.name.starts_with("_vptr$") || field.name.starts_with("_vptr.")) { + (*node)->is_vtable_pointer = true; + } + + if(field.is_static) { + (*node)->storage_class = STORAGE_CLASS_STATIC; + } + + return node; + } +} + +static Result detect_bitfield(const StabsStructOrUnionType::Field& field, const StabsToAstState& state) +{ + // Static fields can't be bitfields. + if(field.is_static) { + return false; + } + + // Resolve type references. + const StabsType* type = field.type.get(); + for(s32 i = 0; i < 50; i++) { + if(!type->descriptor.has_value()) { + if(!type->type_number.valid()) { + return false; + } + auto next_type = state.stabs_types->find(type->type_number); + if(next_type == state.stabs_types->end() || next_type->second == type) { + return false; + } + type = next_type->second; + } else if(type->descriptor == StabsTypeDescriptor::TYPE_REFERENCE) { + type = type->as().type.get(); + } else if(type->descriptor == StabsTypeDescriptor::CONST_QUALIFIER) { + type = type->as().type.get(); + } else if(type->descriptor == StabsTypeDescriptor::VOLATILE_QUALIFIER) { + type = type->as().type.get(); + } else { + break; + } + + // Prevent an infinite loop if there's a cycle (fatal frame). + if(i == 49) { + return false; + } + } + + // Determine the size of the underlying type. + s32 underlying_type_size_bits = 0; + switch(*type->descriptor) { + case ccc::StabsTypeDescriptor::RANGE: { + Result bclass = classify_range(type->as()); + CCC_RETURN_IF_ERROR(bclass); + underlying_type_size_bits = builtin_class_size(*bclass) * 8; + break; + } + case ccc::StabsTypeDescriptor::CROSS_REFERENCE: { + if(type->as().type == ast::ForwardDeclaredType::ENUM) { + underlying_type_size_bits = 32; + } else { + return false; + } + break; + } + case ccc::StabsTypeDescriptor::TYPE_ATTRIBUTE: { + underlying_type_size_bits = (s32) type->as().size_bits; + break; + } + case ccc::StabsTypeDescriptor::BUILTIN: { + underlying_type_size_bits = 8; // bool + break; + } + default: { + return false; + } + } + + if(underlying_type_size_bits == 0) { + return false; + } + + return field.size_bits != underlying_type_size_bits; +} + +static Result>> member_functions_to_ast( + const StabsStructOrUnionType& type, const StabsToAstState& state, s32 depth) +{ + if(state.importer_flags & NO_MEMBER_FUNCTIONS) { + return std::vector>(); + } + + std::string_view type_name_no_template_args; + if(type.name.has_value()) { + type_name_no_template_args = + std::string_view(*type.name).substr(0, type.name->find("<")); + } + + std::vector> member_functions; + bool only_special_functions = true; + + for(const StabsStructOrUnionType::MemberFunctionSet& function_set : type.member_functions) { + MemberFunctionInfo info = check_member_function( + function_set.name, type_name_no_template_args, state.demangler, state.importer_flags); + + if(!info.is_special_member_function) { + only_special_functions = false; + } + + for(const StabsStructOrUnionType::MemberFunction& stabs_func : function_set.overloads) { + auto node = stabs_type_to_ast( + *stabs_func.type, + &type, + state, + depth + 1, + true, + true); + CCC_RETURN_IF_ERROR(node); + + (*node)->is_constructor_or_destructor = info.is_constructor_or_destructor; + (*node)->is_special_member_function = info.is_special_member_function; + (*node)->is_operator_member_function = info.is_operator_member_function; + + (*node)->name = info.name; + (*node)->set_access_specifier(stabs_field_visibility_to_access_specifier(stabs_func.visibility), state.importer_flags); + + if((*node)->descriptor == ast::FUNCTION) { + ast::Function& function = (*node)->as(); + function.modifier = stabs_func.modifier; + function.vtable_index = stabs_func.vtable_index; + } + + member_functions.emplace_back(std::move(*node)); + } + } + + if(only_special_functions && (state.importer_flags & INCLUDE_GENERATED_MEMBER_FUNCTIONS) == 0) { + return std::vector>(); + } + + return member_functions; +} + +static MemberFunctionInfo check_member_function( + const std::string& mangled_name, + std::string_view type_name_no_template_args, + const DemanglerFunctions& demangler, + u32 importer_flags) +{ + MemberFunctionInfo info; + + // Some compiler versions output gcc opnames for overloaded operators + // instead of their proper names. + if((importer_flags & DONT_DEMANGLE_NAMES) == 0 && demangler.cplus_demangle_opname) { + char* demangled_name = demangler.cplus_demangle_opname(mangled_name.c_str(), 0); + if(demangled_name) { + info.name = demangled_name; + free(reinterpret_cast(demangled_name)); + } + } + if(info.name.empty()) { + info.name = mangled_name; + } + + bool is_constructor = + info.name == "__ct" || // Takes a parameter to decide whether or not to construct virtual base classes. + info.name == "__comp_ctor" || // Constructs virtual base classes. + info.name == "__base_ctor"; // Does not construct virtual base classes. + + if(!is_constructor && !type_name_no_template_args.empty()) { + is_constructor |= info.name == type_name_no_template_args; // Named constructor. + } + + bool is_destructor = + info.name == "__dt" || // Takes parameters to decide whether or not to construct virtual base classes and/or delete the object. + info.name == "__comp_dtor" || // Destructs virtual base classes. + info.name == "__base_dtor" || // Does not construct virtual base classes. + info.name == "__deleting_dtor"; // Destructs virtual base clases then deletes the entire object. + + if(!is_destructor && !info.name.empty()) { + is_destructor |= info.name[0] == '~' && std::string_view(info.name).substr(1) == type_name_no_template_args; // Named destructor. + } + + info.is_constructor_or_destructor = is_constructor || is_destructor || info.name.starts_with("$_"); + info.is_special_member_function = info.is_constructor_or_destructor || info.name == "operator="; + + return info; +} + +void fix_recursively_emitted_structures( + ast::StructOrUnion& outer_struct, const std::string& name, StabsTypeNumber type_number, SourceFileHandle file_handle) +{ + // This is a rather peculiar case. For some compiler versions, when a struct + // or a union defined using a typedef is being emitted and it needs to + // reference itself from a member function parameter, it will emit its + // entire definition again in the middle of the first definition, although + // thankfully it won't recurse more than once. + // + // The game Sega Soccer Slam is affected by this. See the PeculiarParameter + // test case in mdebug_importer_tests.cpp for a bare bones example. + + for(std::unique_ptr& node : outer_struct.member_functions) { + if(node->descriptor != ast::FUNCTION) { + continue; + } + + ast::Function& function = node->as(); + if(!function.parameters.has_value()) { + continue; + } + + for(std::unique_ptr& parameter : *function.parameters) { + if(parameter->descriptor != ast::POINTER_OR_REFERENCE) { + continue; + } + + ast::PointerOrReference& pointer_or_reference = parameter->as(); + if(pointer_or_reference.value_type->descriptor != ast::STRUCT_OR_UNION) { + continue; + } + + ast::StructOrUnion& inner_struct = pointer_or_reference.value_type->as(); + + // Since C++ doesn't allow struct definitions in function parameter + // lists normally, and most of the time the member function + // parameters aren't even filled in by GCC, this is a really rare + // case, so here we only bother to do some very basic checks to + // verify that the inner struct is similar to the outer struct. + if(inner_struct.base_classes.size() != outer_struct.base_classes.size()) { + continue; + } + + if(inner_struct.fields.size() != outer_struct.fields.size()) { + continue; + } + + if(inner_struct.member_functions.size() != outer_struct.member_functions.size()) { + continue; + } + + auto type_name = std::make_unique(); + type_name->source = ast::TypeNameSource::REFERENCE; + type_name->unresolved_stabs = std::make_unique(); + type_name->unresolved_stabs->type_name = name; + type_name->unresolved_stabs->referenced_file_handle = file_handle; + type_name->unresolved_stabs->stabs_type_number = type_number; + pointer_or_reference.value_type = std::move(type_name); + } + } +} + +ast::AccessSpecifier stabs_field_visibility_to_access_specifier(StabsStructOrUnionType::Visibility visibility) +{ + ast::AccessSpecifier access_specifier = ast::AS_PUBLIC; + switch(visibility) { + case StabsStructOrUnionType::Visibility::NONE: access_specifier = ast::AS_PUBLIC; break; + case StabsStructOrUnionType::Visibility::PUBLIC: access_specifier = ast::AS_PUBLIC; break; + case StabsStructOrUnionType::Visibility::PROTECTED: access_specifier = ast::AS_PROTECTED; break; + case StabsStructOrUnionType::Visibility::PRIVATE: access_specifier = ast::AS_PRIVATE; break; + case StabsStructOrUnionType::Visibility::PUBLIC_OPTIMIZED_OUT: access_specifier = ast::AS_PUBLIC; break; + } + return access_specifier; +} + +} diff --git a/3rdparty/ccc/src/ccc/stabs_to_ast.h b/3rdparty/ccc/src/ccc/stabs_to_ast.h new file mode 100644 index 0000000000..03d265e436 --- /dev/null +++ b/3rdparty/ccc/src/ccc/stabs_to_ast.h @@ -0,0 +1,29 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "ast.h" +#include "stabs.h" + +namespace ccc { + +struct StabsToAstState { + u32 file_handle; + std::map* stabs_types; + u32 importer_flags; + DemanglerFunctions demangler; +}; + +Result> stabs_type_to_ast( + const StabsType& type, + const StabsType* enclosing_struct, + const StabsToAstState& state, + s32 depth, + bool substitute_type_name, + bool force_substitute); +void fix_recursively_emitted_structures( + ast::StructOrUnion& outer_struct, const std::string& name, StabsTypeNumber type_number, SourceFileHandle file_handle); +ast::AccessSpecifier stabs_field_visibility_to_access_specifier(StabsStructOrUnionType::Visibility visibility); + +} diff --git a/3rdparty/ccc/src/ccc/symbol_database.cpp b/3rdparty/ccc/src/ccc/symbol_database.cpp new file mode 100644 index 0000000000..82451bbb77 --- /dev/null +++ b/3rdparty/ccc/src/ccc/symbol_database.cpp @@ -0,0 +1,1204 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "symbol_database.h" + +#include "ast.h" +#include "importer_flags.h" + +namespace ccc { + +template +SymbolType* SymbolList::symbol_from_handle(SymbolHandle handle) +{ + if(!handle.valid()) { + return nullptr; + } + + size_t index = binary_search(handle); + if(index >= m_symbols.size() || m_symbols[index].m_handle != handle) { + return nullptr; + } + + return &m_symbols[index]; +} + +template +const SymbolType* SymbolList::symbol_from_handle(SymbolHandle handle) const +{ + return const_cast*>(this)->symbol_from_handle(handle); +} + +template +std::vector SymbolList::symbols_from_handles( + const std::vector>& handles) +{ + std::vector result; + for(SymbolHandle handle : handles) { + SymbolType* symbol = symbol_from_handle(handle); + if(symbol) { + result.emplace_back(symbol); + } + } + return result; +} + +template +std::vector SymbolList::symbols_from_handles( + const std::vector>& handles) const +{ + std::vector result; + for(SymbolHandle handle : handles) { + const SymbolType* symbol = symbol_from_handle(handle); + if(symbol) { + result.emplace_back(symbol); + } + } + return result; +} + +template +std::vector SymbolList::optional_symbols_from_handles( + const std::optional>>& handles) +{ + if(handles.has_value()) { + return symbols_from_handles(*handles); + } else { + return std::vector(); + } +} + +template +std::vector SymbolList::optional_symbols_from_handles( + const std::optional>>& handles) const +{ + if(handles.has_value()) { + return symbols_from_handles(*handles); + } else { + return std::vector(); + } +} + +template +typename SymbolList::Iterator SymbolList::begin() +{ + return m_symbols.begin(); +} + +template +typename SymbolList::ConstIterator SymbolList::begin() const +{ + return m_symbols.begin(); +} + +template +typename SymbolList::Iterator SymbolList::end() +{ + return m_symbols.end(); +} + +template +typename SymbolList::ConstIterator SymbolList::end() const +{ + return m_symbols.end(); +} + +template +typename SymbolList::AddressToHandleMapIterators SymbolList::handles_from_starting_address(Address address) const +{ + auto iterators = m_address_to_handle.equal_range(address.value); + return {iterators.first, iterators.second}; +} + +template +typename SymbolList::AddressToHandleMapIterators SymbolList::handles_from_address_range(AddressRange range) const +{ + if(range.low.valid()) { + return {m_address_to_handle.lower_bound(range.low.value), m_address_to_handle.lower_bound(range.high.value)}; + } else if(range.high.valid()) { + return {m_address_to_handle.begin(), m_address_to_handle.lower_bound(range.high.value)}; + } else { + return {m_address_to_handle.end(), m_address_to_handle.end()}; + } +} + +template +SymbolHandle SymbolList::first_handle_from_starting_address(Address address) const +{ + auto iterator = m_address_to_handle.find(address.value); + if(iterator != m_address_to_handle.end()) { + return iterator->second; + } else { + return SymbolHandle(); + } +} + +template +typename SymbolList::NameToHandleMapIterators SymbolList::handles_from_name(const std::string& name) const +{ + auto iterators = m_name_to_handle.equal_range(name); + return {iterators.first, iterators.second}; +} + +template +SymbolHandle SymbolList::first_handle_after_address(Address address) const +{ + auto iterator = m_address_to_handle.upper_bound(address.value); + if(iterator != m_address_to_handle.end()) { + return iterator->second; + } else { + return SymbolHandle(); + } +} + +template +SymbolHandle SymbolList::first_handle_from_name(const std::string& name) const +{ + auto iterator = m_name_to_handle.find(name); + if(iterator != m_name_to_handle.end()) { + return iterator->second; + } else { + return SymbolHandle(); + } +} + +template +SymbolType* SymbolList::symbol_overlapping_address(Address address) +{ + auto iterator = m_address_to_handle.upper_bound(address.value); + if(iterator != m_address_to_handle.begin()) { + iterator--; // Find the greatest element that is less than or equal to the address. + SymbolType* symbol = symbol_from_handle(iterator->second); + if(symbol && address.value < symbol->address().value + symbol->size()) { + return symbol; + } + } + return nullptr; +} + +template +const SymbolType* SymbolList::symbol_overlapping_address(Address address) const +{ + return const_cast*>(this)->symbol_overlapping_address(address); +} + +template +s32 SymbolList::index_from_handle(SymbolHandle handle) const +{ + if(!handle.valid()) { + return -1; + } + + size_t index = binary_search(handle); + if(index >= m_symbols.size() || m_symbols[index].handle() != handle) { + return -1; + } + + return (s32) index; +} + +template +SymbolType& SymbolList::symbol_from_index(s32 index) +{ + return m_symbols.at(index); +} + +template +const SymbolType& SymbolList::symbol_from_index(s32 index) const +{ + return m_symbols.at(index); +} + +template +bool SymbolList::empty() const +{ + return m_symbols.size() == 0; +} + + +template +s32 SymbolList::size() const +{ + return (s32) m_symbols.size(); +} + +template +Result SymbolList::create_symbol( + std::string name, Address address, SymbolSourceHandle source, const Module* module_symbol) +{ + u32 handle; + do { + handle = m_next_handle; + CCC_CHECK(handle != UINT32_MAX, "Ran out of handles to use for %s symbols.", SymbolType::NAME); + } while(!m_next_handle.compare_exchange_weak(handle, handle + 1)); + + SymbolType& symbol = m_symbols.emplace_back(); + + symbol.m_handle = handle; + symbol.m_name = std::move(name); + symbol.m_source = source; + + if(module_symbol) { + symbol.m_address = address.add_base_address(module_symbol->address()); + symbol.m_module = module_symbol->handle(); + } else { + symbol.m_address = address; + } + + symbol.on_create(); + + CCC_ASSERT(symbol.source().valid()); + + link_address_map(symbol); + link_name_map(symbol); + + return &symbol; +} + +template +Result SymbolList::create_symbol( + std::string name, SymbolSourceHandle source, const Module* module_symbol) +{ + return create_symbol(std::move(name), Address(), source, module_symbol); +} + +template +Result SymbolList::create_symbol( + std::string name, SymbolSourceHandle source, const Module* module_symbol, Address address, u32 importer_flags, DemanglerFunctions demangler) +{ + static const int DMGL_PARAMS = 1 << 0; + static const int DMGL_RET_POSTFIX = 1 << 5; + + std::string demangled_name; + if constexpr(SymbolType::FLAGS & NAME_NEEDS_DEMANGLING) { + if((importer_flags & DONT_DEMANGLE_NAMES) == 0 && demangler.cplus_demangle) { + int demangler_flags = 0; + if(importer_flags & DEMANGLE_PARAMETERS) demangler_flags |= DMGL_PARAMS; + if(importer_flags & DEMANGLE_RETURN_TYPE) demangler_flags |= DMGL_RET_POSTFIX; + char* demangled_name_ptr = demangler.cplus_demangle(name.c_str(), demangler_flags); + if(demangled_name_ptr) { + demangled_name = demangled_name_ptr; + free(reinterpret_cast(demangled_name_ptr)); + } + } + } + + std::string& non_mangled_name = demangled_name.empty() ? name : demangled_name; + + Result symbol = create_symbol(non_mangled_name, address, source, module_symbol); + CCC_RETURN_IF_ERROR(symbol); + + if constexpr(SymbolType::FLAGS & NAME_NEEDS_DEMANGLING) { + if(!demangled_name.empty()) { + (*symbol)->set_mangled_name(name); + } + } + + return symbol; +} + +template +bool SymbolList::move_symbol(SymbolHandle handle, Address new_address) +{ + SymbolType* symbol = symbol_from_handle(handle); + if(!symbol) { + return false; + } + + if(symbol->address() != new_address) { + unlink_address_map(*symbol); + symbol->m_address = new_address; + link_address_map(*symbol); + } + + return true; +} + +template +bool SymbolList::rename_symbol(SymbolHandle handle, std::string new_name) +{ + SymbolType* symbol = symbol_from_handle(handle); + if(!symbol) { + return false; + } + + if(symbol->name() != new_name) { + unlink_name_map(*symbol); + symbol->m_name = std::move(new_name); + link_name_map(*symbol); + } + + return true; +} + +template +void SymbolList::merge_from(SymbolList& list) +{ + m_address_to_handle.clear(); + m_name_to_handle.clear(); + + std::vector lhs = std::move(m_symbols); + std::vector rhs = std::move(list.m_symbols); + + m_symbols = std::vector(); + m_symbols.reserve(lhs.size() + rhs.size()); + + size_t lhs_pos = 0; + size_t rhs_pos = 0; + for(;;) { + SymbolType* symbol; + if(lhs_pos < lhs.size() && (rhs_pos >= rhs.size() || lhs[lhs_pos].handle() < rhs[rhs_pos].handle())) { + symbol = &m_symbols.emplace_back(std::move(lhs[lhs_pos++])); + } else if(rhs_pos < rhs.size()) { + symbol = &m_symbols.emplace_back(std::move(rhs[rhs_pos++])); + } else { + break; + } + + link_address_map(*symbol); + link_name_map(*symbol); + } + + CCC_ASSERT(m_symbols.size() == lhs.size() + rhs.size()); + + list.m_symbols.clear(); + list.m_address_to_handle.clear(); + list.m_name_to_handle.clear(); +} + +template +bool SymbolList::mark_symbol_for_destruction(SymbolHandle handle, SymbolDatabase* database) +{ + SymbolType* symbol = symbol_from_handle(handle); + if(!symbol) { + return false; + } + + symbol->mark_for_destruction(); + + symbol->on_destroy(database); + + return true; +} + +template +void SymbolList::mark_symbols_from_source_for_destruction(SymbolSourceHandle source, SymbolDatabase* database) +{ + for(SymbolType& symbol : m_symbols) { + if(symbol.source() != source) { + continue; + } + + symbol.mark_for_destruction(); + + symbol.on_destroy(database); + } +} + +template +void SymbolList::mark_symbols_from_module_for_destruction(ModuleHandle module_handle, SymbolDatabase* database) +{ + for(SymbolType& symbol : m_symbols) { + if(symbol.module_handle() != module_handle) { + continue; + } + + symbol.mark_for_destruction(); + + symbol.on_destroy(database); + } +} + +template +void SymbolList::destroy_marked_symbols() +{ + std::vector remaining_symbols; + for(SymbolType& symbol : m_symbols) { + if(symbol.m_marked_for_destruction) { + unlink_address_map(symbol); + unlink_name_map(symbol); + } else { + remaining_symbols.emplace_back(std::move(symbol)); + } + } + + m_symbols = std::move(remaining_symbols); +} + +template +void SymbolList::clear() +{ + m_symbols.clear(); + m_address_to_handle.clear(); + m_name_to_handle.clear(); +} + +template +size_t SymbolList::binary_search(SymbolHandle handle) const +{ + size_t begin = 0; + size_t end = m_symbols.size(); + + while(begin < end) { + size_t mid = (begin + end) / 2; + if(m_symbols[mid].handle() < handle) { + begin = mid + 1; + } else if(m_symbols[mid].handle() > handle) { + end = mid; + } else { + return mid; + } + } + + return end; +} + +template +void SymbolList::link_address_map(SymbolType& symbol) +{ + if constexpr((SymbolType::FLAGS & WITH_ADDRESS_MAP)) { + if(symbol.address().valid()) { + m_address_to_handle.emplace(symbol.address().value, symbol.handle()); + } + } +} + +template +void SymbolList::unlink_address_map(SymbolType& symbol) +{ + if constexpr(SymbolType::FLAGS & WITH_ADDRESS_MAP) { + if(symbol.address().valid()) { + auto iterators = m_address_to_handle.equal_range(symbol.address().value); + for(auto iterator = iterators.first; iterator != iterators.second; iterator++) { + if(iterator->second == symbol.handle()) { + m_address_to_handle.erase(iterator); + break; + } + } + } + } +} + +template +void SymbolList::link_name_map(SymbolType& symbol) +{ + if constexpr(SymbolType::FLAGS & WITH_NAME_MAP) { + m_name_to_handle.emplace(symbol.name(), symbol.handle()); + } +} + +template +void SymbolList::unlink_name_map(SymbolType& symbol) +{ + if constexpr(SymbolType::FLAGS & WITH_NAME_MAP) { + auto iterators = m_name_to_handle.equal_range(symbol.name()); + for(auto iterator = iterators.first; iterator != iterators.second; iterator++) { + if(iterator->second == symbol.handle()) { + m_name_to_handle.erase(iterator); + break; + } + } + } +} + +template +std::atomic SymbolList::m_next_handle = 0; + +#define CCC_X(SymbolType, symbol_list) template class SymbolList; +CCC_FOR_EACH_SYMBOL_TYPE_DO_X +#undef CCC_X + +// ***************************************************************************** + +void Symbol::set_type(std::unique_ptr type) +{ + m_type = std::move(type); + invalidate_node_handles(); +} + +// ***************************************************************************** + +const char* global_storage_location_to_string(GlobalStorageLocation location) +{ + switch(location) { + case NIL: return "nil"; + case DATA: return "data"; + case BSS: return "bss"; + case ABS: return "abs"; + case SDATA: return "sdata"; + case SBSS: return "sbss"; + case RDATA: return "rdata"; + case COMMON: return "common"; + case SCOMMON: return "scommon"; + case SUNDEFINED: return "sundefined"; + } + return ""; +} + +// ***************************************************************************** + +const std::optional>& Function::parameter_variables() const +{ + return m_parameter_variables; +} + +void Function::set_parameter_variables( + std::optional> parameter_variables, SymbolDatabase& database) +{ + if(m_parameter_variables.has_value()) { + for(ParameterVariableHandle parameter_variable_handle : *m_parameter_variables) { + ParameterVariable* parameter_variable = database.parameter_variables.symbol_from_handle(parameter_variable_handle); + if(parameter_variable && parameter_variable->m_function == handle()) { + parameter_variable->m_function = FunctionHandle(); + } + } + } + + m_parameter_variables = std::move(parameter_variables); + + if(m_parameter_variables.has_value()) { + for(ParameterVariableHandle parameter_variable_handle : *m_parameter_variables) { + ParameterVariable* parameter_variable = database.parameter_variables.symbol_from_handle(parameter_variable_handle); + if(parameter_variable) { + parameter_variable->m_function = handle(); + } + } + } +} + +const std::optional>& Function::local_variables() const +{ + return m_local_variables; +} + +void Function::set_local_variables( + std::optional> local_variables, SymbolDatabase& database) +{ + if(m_local_variables.has_value()) { + for(LocalVariableHandle local_variable_handle : *m_local_variables) { + LocalVariable* local_variable = database.local_variables.symbol_from_handle(local_variable_handle); + if(local_variable && local_variable->m_function == handle()) { + local_variable->m_function = FunctionHandle(); + } + } + } + + m_local_variables = std::move(local_variables); + + if(m_local_variables.has_value()) { + for(LocalVariableHandle local_variable_handle : *m_local_variables) { + LocalVariable* local_variable = database.local_variables.symbol_from_handle(local_variable_handle); + if(local_variable) { + local_variable->m_function = handle(); + } + } + } +} + +const std::string& Function::mangled_name() const +{ + if(!m_mangled_name.empty()) { + return m_mangled_name; + } else { + return name(); + } +} + +void Function::set_mangled_name(std::string mangled) +{ + m_mangled_name = std::move(mangled); +} + +u32 Function::original_hash() const +{ + return m_original_hash; +} + +void Function::set_original_hash(u32 hash) +{ + m_original_hash = hash; +} + +u32 Function::current_hash() const +{ + return m_current_hash; +} + +void Function::set_current_hash(FunctionHash hash) +{ + m_current_hash = hash.get(); +} + +void Function::on_destroy(SymbolDatabase* database) +{ + if(!database) { + return; + } + + if(m_parameter_variables.has_value()) { + for(ParameterVariableHandle parameter_variable : *m_parameter_variables) { + database->parameter_variables.mark_symbol_for_destruction(parameter_variable, database); + } + } + + if(m_local_variables.has_value()) { + for(LocalVariableHandle local_variable : *m_local_variables) { + database->local_variables.mark_symbol_for_destruction(local_variable, database); + } + } +} + +// ***************************************************************************** + +const std::string& GlobalVariable::mangled_name() const +{ + if(!m_mangled_name.empty()) { + return m_mangled_name; + } else { + return name(); + } +} + +void GlobalVariable::set_mangled_name(std::string mangled) +{ + m_mangled_name = std::move(mangled); +} + +// ***************************************************************************** + +void Module::on_create() +{ + m_module = m_handle; +} + +// ***************************************************************************** + +bool Section::contains_code() const +{ + return name() == ".text"; +} + +bool Section::contains_data() const +{ + return name() == ".bss" + || name() == ".data" + || name() == ".lit" + || name() == ".lita" + || name() == ".lit4" + || name() == ".lit8" + || name() == ".rdata" + || name() == ".rodata" + || name() == ".sbss" + || name() == ".sdata"; +} + +// ***************************************************************************** + +const std::vector& SourceFile::functions() const +{ + return m_functions; +} + +void SourceFile::set_functions(std::vector functions, SymbolDatabase& database) +{ + for(FunctionHandle function_handle : m_functions) { + Function* function = database.functions.symbol_from_handle(function_handle); + if(function && function->m_source_file == handle()) { + function->m_source_file = SourceFileHandle(); + } + } + + m_functions = std::move(functions); + + for(FunctionHandle function_handle : m_functions) { + Function* function = database.functions.symbol_from_handle(function_handle); + if(function) { + function->m_source_file = handle(); + } + } +} + +const std::vector& SourceFile::global_variables() const +{ + return m_global_variables; +} + +void SourceFile::set_global_variables(std::vector global_variables, SymbolDatabase& database) +{ + for(GlobalVariableHandle global_variable_handle : m_global_variables) { + GlobalVariable* global_variable = database.global_variables.symbol_from_handle(global_variable_handle); + if(global_variable && global_variable->m_source_file == handle()) { + global_variable->m_source_file = SourceFileHandle(); + } + } + + m_global_variables = std::move(global_variables); + + for(GlobalVariableHandle global_variable_handle : m_global_variables) { + GlobalVariable* global_variable = database.global_variables.symbol_from_handle(global_variable_handle); + if(global_variable) { + global_variable->m_source_file = handle(); + } + } +} + +bool SourceFile::functions_match() const +{ + return m_functions_match; +} + +void SourceFile::check_functions_match(const SymbolDatabase& database) +{ + u32 matching = 0; + u32 modified = 0; + for(FunctionHandle function_handle : functions()) { + const ccc::Function* function = database.functions.symbol_from_handle(function_handle); + if(!function || function->original_hash() == 0) { + continue; + } + + if(function->current_hash() == function->original_hash()) { + matching++; + } else { + modified++; + } + } + + m_functions_match = matching >= modified; +} + +void SourceFile::on_destroy(SymbolDatabase* database) +{ + if(!database) { + return; + } + + for(FunctionHandle function : m_functions) { + database->functions.mark_symbol_for_destruction(function, database); + } + + for(GlobalVariableHandle global_variable : m_global_variables) { + database->global_variables.mark_symbol_for_destruction(global_variable, database); + } +} + +// ***************************************************************************** + +void SymbolSource::on_create() +{ + m_source = m_handle; +} + +// ***************************************************************************** + +bool SymbolGroup::is_in_group(const Symbol& symbol) const +{ + return symbol.source() == source && symbol.module_handle() == ModuleHandle(module_symbol); +} + +// ***************************************************************************** + +s32 SymbolDatabase::symbol_count() const +{ + s32 sum = 0; + #define CCC_X(SymbolType, symbol_list) sum += symbol_list.size(); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + return sum; +} + +const Symbol* SymbolDatabase::symbol_starting_at_address( + Address address, u32 descriptors, SymbolDescriptor* descriptor_out) const +{ + #define CCC_X(SymbolType, symbol_list) \ + if constexpr(SymbolType::FLAGS & WITH_ADDRESS_MAP) { \ + if(descriptors & SymbolType::DESCRIPTOR) { \ + const SymbolHandle handle = symbol_list.first_handle_from_starting_address(address); \ + const SymbolType* symbol = symbol_list.symbol_from_handle(handle); \ + if(symbol) { \ + if(descriptor_out) { \ + *descriptor_out = SymbolType::DESCRIPTOR; \ + } \ + return symbol; \ + } \ + } \ + } + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + return nullptr; +} + +const Symbol* SymbolDatabase::symbol_after_address( + Address address, u32 descriptors, SymbolDescriptor* descriptor_out) const +{ + const Symbol* result = nullptr; + #define CCC_X(SymbolType, symbol_list) \ + if constexpr(SymbolType::FLAGS & WITH_ADDRESS_MAP) { \ + if(descriptors & SymbolType::DESCRIPTOR) { \ + const SymbolHandle handle = symbol_list.first_handle_after_address(address); \ + const SymbolType* symbol = symbol_list.symbol_from_handle(handle); \ + if(symbol && (!result || symbol->address() < result->address())) { \ + if(descriptor_out) { \ + *descriptor_out = SymbolType::DESCRIPTOR; \ + } \ + result = symbol; \ + } \ + } \ + } + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + return result; +} + +const Symbol* SymbolDatabase::symbol_overlapping_address( + Address address, u32 descriptors, SymbolDescriptor* descriptor_out) const +{ + #define CCC_X(SymbolType, symbol_list) \ + if constexpr(SymbolType::FLAGS & WITH_ADDRESS_MAP) { \ + if(descriptors & SymbolType::DESCRIPTOR) { \ + const SymbolType* symbol = symbol_list.symbol_overlapping_address(address); \ + if(symbol) { \ + if(descriptor_out) { \ + *descriptor_out = SymbolType::DESCRIPTOR; \ + } \ + return symbol; \ + } \ + } \ + } + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + return nullptr; +} + +const Symbol* SymbolDatabase::symbol_with_name( + const std::string& name, u32 descriptors, SymbolDescriptor* descriptor_out) const +{ + #define CCC_X(SymbolType, symbol_list) \ + if constexpr(SymbolType::FLAGS & WITH_ADDRESS_MAP) { \ + if(descriptors & SymbolType::DESCRIPTOR) { \ + const SymbolHandle handle = symbol_list.first_handle_from_name(name); \ + const SymbolType* symbol = symbol_list.symbol_from_handle(handle); \ + if(symbol) { \ + if(descriptor_out) { \ + *descriptor_out = SymbolType::DESCRIPTOR; \ + } \ + return symbol; \ + } \ + } \ + } + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + return nullptr; +} + +Result SymbolDatabase::get_symbol_source(const std::string& name) +{ + SymbolSourceHandle handle = symbol_sources.first_handle_from_name(name); + if(!handle.valid()) { + Result source = symbol_sources.create_symbol(name, SymbolSourceHandle(), nullptr); + CCC_RETURN_IF_ERROR(source); + handle = (*source)->handle(); + } + return handle; +} + +Result SymbolDatabase::create_data_type_if_unique( + std::unique_ptr node, + StabsTypeNumber number, + const char* name, + SourceFile& source_file, + const SymbolGroup& group) +{ + auto types_with_same_name = data_types.handles_from_name(name); + const char* compare_fail_reason = nullptr; + if(types_with_same_name.begin() == types_with_same_name.end()) { + // No types with this name have previously been processed. + Result data_type = data_types.create_symbol(name, group.source, group.module_symbol); + CCC_RETURN_IF_ERROR(data_type); + + (*data_type)->files = {source_file.handle()}; + if(number.type > -1) { + source_file.stabs_type_number_to_handle[number] = (*data_type)->handle(); + } + + (*data_type)->set_type(std::move(node)); + + return *data_type; + } else { + // Types with this name have previously been processed, we need to + // figure out if this one matches any of the previous ones. + bool match = false; + for(auto [key, existing_type_handle] : types_with_same_name) { + DataType* existing_type = data_types.symbol_from_handle(existing_type_handle); + CCC_ASSERT(existing_type); + + // We don't want to merge together types from different sources or + // modules so that we can destroy all the types from one source + // without breaking anything else. + if(!group.is_in_group(*existing_type)) { + continue; + } + + CCC_ASSERT(existing_type->type()); + ast::CompareResult compare_result = compare_nodes(*existing_type->type(), *node.get(), this, true); + if(compare_result.type == ast::CompareResultType::DIFFERS) { + // The new node doesn't match this existing node. + bool is_anonymous_enum = existing_type->type()->descriptor == ast::ENUM + && existing_type->name().empty(); + if(!is_anonymous_enum) { + existing_type->compare_fail_reason = compare_fail_reason_to_string(compare_result.fail_reason); + compare_fail_reason = compare_fail_reason_to_string(compare_result.fail_reason); + } + } else { + // The new node matches this existing node. + existing_type->files.emplace_back(source_file.handle()); + if(number.type > -1) { + source_file.stabs_type_number_to_handle[number] = existing_type->handle(); + } + if(compare_result.type == ast::CompareResultType::MATCHES_FAVOUR_RHS) { + // The new node almost matches the old one, but the new one + // is slightly better, so we replace the old type. + existing_type->set_type(std::move(node)); + } + match = true; + break; + } + } + + if(!match) { + // This type doesn't match any of the others with the same name + // that have already been processed. + Result data_type = data_types.create_symbol(name, group.source, group.module_symbol); + CCC_RETURN_IF_ERROR(data_type); + + (*data_type)->files = {source_file.handle()}; + if(number.type > -1) { + source_file.stabs_type_number_to_handle[number] = (*data_type)->handle(); + } + (*data_type)->compare_fail_reason = compare_fail_reason; + + (*data_type)->set_type(std::move(node)); + + return *data_type; + } + } + + return nullptr; +} + +void SymbolDatabase::merge_from(SymbolDatabase& database) +{ + #define CCC_X(SymbolType, symbol_list) symbol_list.merge_from(database.symbol_list); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X +} + +void SymbolDatabase::destroy_symbols_from_source(SymbolSourceHandle source, bool destroy_descendants) +{ + SymbolDatabase* database = destroy_descendants ? this : nullptr; + + #define CCC_X(SymbolType, symbol_list) symbol_list.mark_symbols_from_source_for_destruction(source, database); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + + destroy_marked_symbols(); +} + +void SymbolDatabase::destroy_symbols_from_module(ModuleHandle module_handle, bool destroy_descendants) +{ + SymbolDatabase* database = destroy_descendants ? this : nullptr; + + #define CCC_X(SymbolType, symbol_list) symbol_list.mark_symbols_from_module_for_destruction(module_handle, database); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + + destroy_marked_symbols(); +} + +void SymbolDatabase::destroy_marked_symbols() +{ + #define CCC_X(SymbolType, symbol_list) symbol_list.destroy_marked_symbols(); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X +} + +void SymbolDatabase::clear() +{ + #define CCC_X(SymbolType, symbol_list) symbol_list.clear(); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X +} + +// ***************************************************************************** + +MultiSymbolHandle::MultiSymbolHandle() {} + +template +MultiSymbolHandle::MultiSymbolHandle(const SymbolType& symbol) + : MultiSymbolHandle(SymbolType::DESCRIPTOR, symbol.raw_handle()) {} + +MultiSymbolHandle::MultiSymbolHandle(SymbolDescriptor descriptor, u32 handle) + : m_descriptor(descriptor) + , m_handle(handle) {} + +bool MultiSymbolHandle::valid() const +{ + return m_handle != (u32) -1; +} + +SymbolDescriptor MultiSymbolHandle::descriptor() const +{ + return m_descriptor; +} + +u32 MultiSymbolHandle::handle() const +{ + return m_handle; +} + +Symbol* MultiSymbolHandle::lookup_symbol(SymbolDatabase& database) +{ + if(m_handle == (u32) -1) { + return nullptr; + } + + switch(m_descriptor) { + #define CCC_X(SymbolType, symbol_list) \ + case SymbolType::DESCRIPTOR: \ + return database.symbol_list.symbol_from_handle(m_handle); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + } + + return nullptr; +} + +const Symbol* MultiSymbolHandle::lookup_symbol(const SymbolDatabase& database) const +{ + return const_cast(this)->lookup_symbol(const_cast(database)); +} + +bool MultiSymbolHandle::is_flag_set(SymbolFlag flag) const +{ + if(m_handle != (u32) -1) { + switch(m_descriptor) { + #define CCC_X(SymbolType, symbol_list) \ + case SymbolType::DESCRIPTOR: \ + return SymbolType::FLAGS & flag; + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + } + } + + return false; +} + +bool MultiSymbolHandle::move_symbol(Address new_address, SymbolDatabase& database) const +{ + if(m_handle != (u32) -1) { + switch(m_descriptor) { + #define CCC_X(SymbolType, symbol_list) \ + case SymbolType::DESCRIPTOR: \ + return database.symbol_list.move_symbol(m_handle, new_address); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + } + } + + return false; +} + +bool MultiSymbolHandle::rename_symbol(std::string new_name, SymbolDatabase& database) const +{ + if(m_handle != (u32) -1) { + switch(m_descriptor) { + #define CCC_X(SymbolType, symbol_list) \ + case SymbolType::DESCRIPTOR: \ + return database.symbol_list.rename_symbol(m_handle, std::move(new_name)); + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + } + } + + return false; +} + +bool MultiSymbolHandle::destroy_symbol(SymbolDatabase& database, bool destroy_descendants) const +{ + bool success = false; + + if(m_handle != (u32) -1) { + SymbolDatabase* database_ptr = destroy_descendants ? &database : nullptr; + + switch(m_descriptor) { + #define CCC_X(SymbolType, symbol_list) \ + case SymbolType::DESCRIPTOR: \ + success = database.symbol_list.mark_symbol_for_destruction(m_handle, database_ptr); \ + break; + CCC_FOR_EACH_SYMBOL_TYPE_DO_X + #undef CCC_X + } + } + + if(success) { + database.destroy_marked_symbols(); + } + + return success; +} + +#define CCC_X(SymbolType, symbol_list) template MultiSymbolHandle::MultiSymbolHandle(const SymbolType& symbol); +CCC_FOR_EACH_SYMBOL_TYPE_DO_X +#undef CCC_X + +// ***************************************************************************** + +NodeHandle::NodeHandle() {} + +NodeHandle::NodeHandle(const ast::Node* node) + : m_node(node) {} + +template +NodeHandle::NodeHandle(const SymbolType& symbol, const ast::Node* node) + : NodeHandle(SymbolType::DESCRIPTOR, symbol, node) {} + +NodeHandle::NodeHandle(SymbolDescriptor descriptor, const Symbol& symbol, const ast::Node* node) + : m_symbol(descriptor, symbol.raw_handle()) + , m_node(node) + , m_generation(symbol.generation()) {} + +bool NodeHandle::valid() const +{ + return m_node != nullptr; +} + +const MultiSymbolHandle& NodeHandle::symbol() const +{ + return m_symbol; +} + +const ast::Node* NodeHandle::lookup_node(const SymbolDatabase& database) const +{ + if(m_symbol.valid()) { + const Symbol* symbol = m_symbol.lookup_symbol(database); + if(!symbol || symbol->generation() != m_generation) { + return nullptr; + } + } + return m_node; +} + +NodeHandle NodeHandle::handle_for_child(const ast::Node* child_node) const +{ + NodeHandle child_handle; + child_handle.m_symbol = m_symbol; + child_handle.m_node = child_node; + child_handle.m_generation = m_generation; + return child_handle; +} + +#define CCC_X(SymbolType, symbol_list) template NodeHandle::NodeHandle(const SymbolType& symbol, const ast::Node* node); +CCC_FOR_EACH_SYMBOL_TYPE_DO_X +#undef CCC_X + +} diff --git a/3rdparty/ccc/src/ccc/symbol_database.h b/3rdparty/ccc/src/ccc/symbol_database.h new file mode 100644 index 0000000000..52c6f1ece4 --- /dev/null +++ b/3rdparty/ccc/src/ccc/symbol_database.h @@ -0,0 +1,721 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include + +#include "util.h" + +namespace ccc { + +// An X macro for all the symbol types. +#define CCC_FOR_EACH_SYMBOL_TYPE_DO_X \ + CCC_X(DataType, data_types) \ + CCC_X(Function, functions) \ + CCC_X(GlobalVariable, global_variables) \ + CCC_X(Label, labels) \ + CCC_X(LocalVariable, local_variables) \ + CCC_X(Module, modules) \ + CCC_X(ParameterVariable, parameter_variables) \ + CCC_X(Section, sections) \ + CCC_X(SourceFile, source_files) \ + CCC_X(SymbolSource, symbol_sources) + +// An enum for all the symbol types. +enum SymbolDescriptor { + DATA_TYPE = 1 << 0, + FUNCTION = 1 << 1, + GLOBAL_VARIABLE = 1 << 2, + LABEL = 1 << 3, + LOCAL_VARIABLE = 1 << 4, + MODULE = 1 << 5, + PARAMETER_VARIABLE = 1 << 6, + SECTION = 1 << 7, + SOURCE_FILE = 1 << 8, + SYMBOL_SOURCE = 1 << 9 +}; + +enum { + ALL_SYMBOL_TYPES = 0xffff +}; + +// Forward declare all the different types of symbol objects. +#define CCC_X(SymbolType, symbol_list) class SymbolType; +CCC_FOR_EACH_SYMBOL_TYPE_DO_X +#undef CCC_X + +class SymbolDatabase; + +// Strongly typed handles for all of the symbol objects. These are here to solve +// the problem of dangling references to symbols. +template +struct SymbolHandle { + u32 value = (u32) -1; + + SymbolHandle() {} + SymbolHandle(u32 v) : value(v) {} + SymbolHandle(const SymbolType* symbol) + : value(symbol ? symbol->handle().value : (u32) -1) {} + + // Check if this symbol handle has been initialised. Note that this doesn't + // determine whether or not the symbol it points to has been deleted! + bool valid() const { return value != (u32) -1; } + + friend auto operator<=>(const SymbolHandle& lhs, const SymbolHandle& rhs) = default; +}; + +#define CCC_X(SymbolType, symbol_list) using SymbolType##Handle = SymbolHandle; +CCC_FOR_EACH_SYMBOL_TYPE_DO_X +#undef CCC_X + +enum SymbolFlag { + NO_SYMBOL_FLAGS = 0, + WITH_ADDRESS_MAP = 1 << 0, + WITH_NAME_MAP = 1 << 1, + NAME_NEEDS_DEMANGLING = 1 << 2 +}; + +// A container class for symbols of a given type that maintains maps of their +// names and addresses depending on the value of SymbolType::FLAGS. +template +class SymbolList { +public: + // Lookup symbols from their handles using binary search. + SymbolType* symbol_from_handle(SymbolHandle handle); + const SymbolType* symbol_from_handle(SymbolHandle handle) const; + + // Lookup multiple symbols from their handles using binary search. + std::vector symbols_from_handles(const std::vector>& handles); + std::vector symbols_from_handles(const std::vector>& handles) const; + std::vector optional_symbols_from_handles(const std::optional>>& handles); + std::vector optional_symbols_from_handles(const std::optional>>& handles) const; + + using Iterator = typename std::vector::iterator; + using ConstIterator = typename std::vector::const_iterator; + + // For iterating over all the symbols. + Iterator begin(); + ConstIterator begin() const; + Iterator end(); + ConstIterator end() const; + + using AddressToHandleMap = std::multimap>; + using NameToHandleMap = std::multimap>; + + template + class Iterators { + public: + Iterators(Iterator b, Iterator e) + : m_begin(b), m_end(e) {} + Iterator begin() const { return m_begin; } + Iterator end() const { return m_end; } + protected: + Iterator m_begin; + Iterator m_end; + }; + + using AddressToHandleMapIterators = Iterators; + using NameToHandleMapIterators = Iterators; + + // Lookup symbols by their address. + AddressToHandleMapIterators handles_from_starting_address(Address address) const; + AddressToHandleMapIterators handles_from_address_range(AddressRange range) const; + SymbolHandle first_handle_from_starting_address(Address address) const; + SymbolHandle first_handle_after_address(Address address) const; + + // Lookup symbols by their name. + NameToHandleMapIterators handles_from_name(const std::string& name) const; + SymbolHandle first_handle_from_name(const std::string& name) const; + + // Find a symbol with an address range that contains the provided address. + // For example, to find which function an instruction belongs to. + SymbolType* symbol_overlapping_address(Address address); + const SymbolType* symbol_overlapping_address(Address address) const; + + // Convert handles to underlying array indices. + s32 index_from_handle(SymbolHandle handle) const; + + // Index into the underlying array. + SymbolType& symbol_from_index(s32 index); + const SymbolType& symbol_from_index(s32 index) const; + + // Determine if any symbols are being stored. + bool empty() const; + + // Retrieve the number of symbols stored. + s32 size() const; + + // Create a new symbol. If it's a SymbolSource symbol, source can be left + // empty, otherwise it has to be valid. + Result create_symbol( + std::string name, Address address, SymbolSourceHandle source, const Module* module_symbol = nullptr); + + // Create a new symbol. Similar to above, but for symbols without addresses. + Result create_symbol( + std::string name, SymbolSourceHandle source, const Module* module_symbol = nullptr); + + // Create a new symbol. Similar to above, but unless DONT_DEMANGLE_NAMES is + // set, the name of the symbol will be demangled. + Result create_symbol( + std::string name, + SymbolSourceHandle source, + const Module* module_symbol, + Address address, + u32 importer_flags, + DemanglerFunctions demangler); + + // Update the address of a symbol without changing its handle. + bool move_symbol(SymbolHandle handle, Address new_address); + + // Update the name of a symbol without changing its handle. + bool rename_symbol(SymbolHandle handle, std::string new_name); + + // Move all the symbols from the passed list into this list. + void merge_from(SymbolList& list); + + // Mark a symbol for destruction. If the correct symbol database pointer is + // passed, all descendants will also be marked. For example, marking a + // function will also mark its parameters and local variables. + bool mark_symbol_for_destruction(SymbolHandle handle, SymbolDatabase* database); + + // Mark all the symbols from a given symbol source for destruction. For + // example you can use this to free a symbol table without destroying + // user-defined symbols. The behaviour for marking descendants is the same + // as destroy_symbol. + void mark_symbols_from_source_for_destruction(SymbolSourceHandle source, SymbolDatabase* database); + + // Mark all the symbols from a given module for destruction. The behaviour + // for marking descendants is the same as destroy_symbol. + void mark_symbols_from_module_for_destruction(ModuleHandle module_handle, SymbolDatabase* database); + + // Destroy all symbols that have previously been marked for destruction. + // This invalidates all pointers to symbols in this list. + void destroy_marked_symbols(); + + // Destroy all symbols, but don't reset m_next_handle so we don't have to + // worry about dangling handles. + void clear(); + +protected: + // Do a binary search for a handle, and return either its index, or the + // index where it could be inserted. + size_t binary_search(SymbolHandle handle) const; + + // Keep the address map in sync with the symbol list. + void link_address_map(SymbolType& symbol); + void unlink_address_map(SymbolType& symbol); + + // Keep the name map in sync with the symbol list. + void link_name_map(SymbolType& symbol); + void unlink_name_map(SymbolType& symbol); + + std::vector m_symbols; + AddressToHandleMap m_address_to_handle; + NameToHandleMap m_name_to_handle; + + // We share this between symbol lists of the same type so that we can merge + // them without having to rewrite all the handles. + static std::atomic m_next_handle; +}; + +// Base class for all the symbols. +class Symbol { + template + friend class SymbolList; +public: + const std::string& name() const { return m_name; } + u32 raw_handle() const { return m_handle; } + SymbolSourceHandle source() const { return m_source; } + ModuleHandle module_handle() const { return m_module; } + + Address address() const { return m_address; } + u32 size() const { return m_size; } + void set_size(u32 size) { m_size = size; } + AddressRange address_range() const { return AddressRange(m_address, m_address.get_or_zero() + m_size); } + + ast::Node* type() { return m_type.get(); } + const ast::Node* type() const { return m_type.get(); } + void set_type(std::unique_ptr type); + + u32 generation() const { return m_generation; } + + // This MUST be called after any AST nodes have been created/deleted/moved. + // For the set_type function this is done for you. + void invalidate_node_handles() { m_generation++; } + + // Mark a single symbol for destruction, not including its descendants. + void mark_for_destruction() { m_marked_for_destruction = true; } + bool is_marked_for_destruction() { return m_marked_for_destruction; } + +protected: + void on_create() {} + void on_destroy(SymbolDatabase* database) {} + + u32 m_handle = (u32) -1; + SymbolSourceHandle m_source; + Address m_address; + u32 m_size = 0; + std::string m_name; + std::unique_ptr m_type; + u32 m_generation : 31 = 0; + u32 m_marked_for_destruction : 1 = false; + ModuleHandle m_module; +}; + +// Variable storage types. This is different to whether the variable is a +// global, local or parameter. For example local variables can have global +// storage (static locals). + +enum GlobalStorageLocation { + NIL, + DATA, + BSS, + ABS, + SDATA, + SBSS, + RDATA, + COMMON, + SCOMMON, + SUNDEFINED +}; + +const char* global_storage_location_to_string(GlobalStorageLocation location); + +struct GlobalStorage { + GlobalStorageLocation location = GlobalStorageLocation::NIL; + + GlobalStorage() {} + friend auto operator<=>(const GlobalStorage& lhs, const GlobalStorage& rhs) = default; +}; + +struct RegisterStorage { + s32 dbx_register_number = -1; + bool is_by_reference; + + RegisterStorage() {} + friend auto operator<=>(const RegisterStorage& lhs, const RegisterStorage& rhs) = default; +}; + +struct StackStorage { + s32 stack_pointer_offset = -1; + + StackStorage() {} + friend auto operator<=>(const StackStorage& lhs, const StackStorage& rhs) = default; +}; + +// The hashing algorithm for functions. If you change this algorithm make sure +// to bump the version number for the JSON format so we can know if a hash was +// generated using the new algorithm or not. +class FunctionHash { +public: + void update(u32 instruction) + { + // Separate out the opcode so that the hash remains the same regardless + // of if relocations are applied or not. + u32 opcode = instruction >> 26; + m_hash = m_hash * 31 + opcode; + } + + u32 get() const + { + return m_hash; + } + +protected: + u32 m_hash = 0; +}; + +// All the different types of symbol objects. + +// A C/C++ data type. +class DataType : public Symbol { + friend SourceFile; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = DATA_TYPE; + static constexpr const char* NAME = "Data Type"; + static constexpr const u32 FLAGS = WITH_NAME_MAP; + + DataTypeHandle handle() const { return m_handle; } + + std::vector files; // List of files for which a given top-level type is present. + const char* compare_fail_reason = nullptr; + + bool not_defined_in_any_translation_unit : 1 = false; + bool only_defined_in_single_translation_unit : 1 = false; +}; + +// A function. The type stored is the return type. +class Function : public Symbol { + friend SourceFile; + friend SymbolList; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = FUNCTION; + static constexpr const char* NAME = "Function"; + static constexpr const u32 FLAGS = WITH_ADDRESS_MAP | WITH_NAME_MAP | NAME_NEEDS_DEMANGLING; + + FunctionHandle handle() const { return m_handle; } + SourceFileHandle source_file() const { return m_source_file; } + + const std::optional>& parameter_variables() const; + void set_parameter_variables(std::optional> parameter_variables, SymbolDatabase& database); + + const std::optional>& local_variables() const; + void set_local_variables(std::optional> local_variables, SymbolDatabase& database); + + const std::string& mangled_name() const; + void set_mangled_name(std::string mangled); + + // A hash of all the opcodes in the function, read from file. + u32 original_hash() const; + void set_original_hash(u32 hash); + + // A hash of all the opcodes in the function, read from memory. + u32 current_hash() const; + void set_current_hash(FunctionHash hash); + + struct LineNumberPair { + Address address; + s32 line_number; + }; + + struct SubSourceFile { + Address address; + std::string relative_path; + }; + + std::string relative_path; + StorageClass storage_class; + s32 stack_frame_size = -1; + std::vector line_numbers; + std::vector sub_source_files; + bool is_member_function_ish = false; // Filled in by fill_in_pointers_to_member_function_definitions. + bool is_no_return = false; + +protected: + void on_destroy(SymbolDatabase* database); + + SourceFileHandle m_source_file; + std::optional> m_parameter_variables; + std::optional> m_local_variables; + + std::string m_mangled_name; + + u32 m_original_hash = 0; + u32 m_current_hash = 0; +}; + +// A global variable. +class GlobalVariable : public Symbol { + friend SourceFile; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = GLOBAL_VARIABLE; + static constexpr const char* NAME = "Global Variable"; + static constexpr u32 FLAGS = WITH_ADDRESS_MAP | WITH_NAME_MAP | NAME_NEEDS_DEMANGLING; + + GlobalVariableHandle handle() const { return m_handle; } + SourceFileHandle source_file() const { return m_source_file; }; + + const std::string& mangled_name() const; + void set_mangled_name(std::string mangled); + + GlobalStorage storage; + StorageClass storage_class; + +protected: + SourceFileHandle m_source_file; + std::string m_mangled_name; +}; + +// A label. This could be a label defined in assembly, C/C++, or just a symbol +// that we can't automatically determine the type of (e.g. SNDLL symbols). +class Label : public Symbol { +public: + static constexpr const SymbolDescriptor DESCRIPTOR = LABEL; + static constexpr const char* NAME = "Label"; + static constexpr u32 FLAGS = WITH_ADDRESS_MAP; + + LabelHandle handle() const { return m_handle; } + + // Indicates that this label should not be used as a function name. + bool is_junk = false; +}; + +// A local variable. This includes static local variables which have global +// storage. +class LocalVariable : public Symbol { + friend Function; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = LOCAL_VARIABLE; + static constexpr const char* NAME = "Local Variable"; + static constexpr u32 FLAGS = WITH_ADDRESS_MAP; + + LocalVariableHandle handle() const { return m_handle; } + FunctionHandle function() const { return m_function; }; + + std::variant storage; + AddressRange live_range; + +protected: + FunctionHandle m_function; +}; + +// A program module e.g. an ELF file or an SNDLL file. Every symbol has a module +// field indicating what module the symbol belongs to. This can be used to +// delete all the symbols associated with a given module. Additionally, when a +// valid module pointer is passed to SymbolList<>::create_symbol, the address of +// the symbol will be added to the address of the new symbol. +class Module : public Symbol { + friend SymbolList; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = MODULE; + static constexpr const char* NAME = "Module"; + static constexpr u32 FLAGS = WITH_NAME_MAP; + + ModuleHandle handle() const { return m_handle; } + + // These are used for IRX modules. + bool is_irx = false; + s32 version_major = -1; + s32 version_minor = -1; + +protected: + void on_create(); +}; + +// A parameter variable. +class ParameterVariable : public Symbol { + friend Function; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = PARAMETER_VARIABLE; + static constexpr const char* NAME = "Parameter Variable"; + static constexpr u32 FLAGS = NO_SYMBOL_FLAGS; + + ParameterVariableHandle handle() const { return m_handle; } + FunctionHandle function() const { return m_function; }; + + std::variant storage; + +protected: + FunctionHandle m_function; +}; + +// An ELF section. These are created from the ELF section headers. +class Section : public Symbol { +public: + static constexpr const SymbolDescriptor DESCRIPTOR = SECTION; + static constexpr const char* NAME = "Section"; + static constexpr u32 FLAGS = WITH_ADDRESS_MAP | WITH_NAME_MAP; + + SectionHandle handle() const { return m_handle; } + + // Check if the section name is ".text". + bool contains_code() const; + + // Check for known data section names. + bool contains_data() const; +}; + +// A source file (.c or .cpp file). One of these will be created for every +// translation unit in the program (but only if debugging symbols are present). +class SourceFile : public Symbol { + friend SymbolList; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = SOURCE_FILE; + static constexpr const char* NAME = "Source File"; + static constexpr u32 FLAGS = WITH_ADDRESS_MAP | WITH_NAME_MAP; + + SourceFileHandle handle() const { return m_handle; } + const std::string& full_path() const { return name(); } + + const std::vector& functions() const; + void set_functions(std::vector functions, SymbolDatabase& database); + + const std::vector& global_variables() const; + void set_global_variables(std::vector global_variables, SymbolDatabase& database); + + // Check whether at least half of the functions associated with the source + // file match their original hash (meaning they haven't been overwritten). + bool functions_match() const; + void check_functions_match(const SymbolDatabase& database); + + std::string working_dir; + std::string command_line_path; + std::map stabs_type_number_to_handle; + std::set toolchain_version_info; + +protected: + void on_destroy(SymbolDatabase* database); + + std::vector m_functions; + std::vector m_global_variables; + bool m_functions_match = true; +}; + +// A symbol source. Every symbol has a symbol source field indicating how the +// symbol was created. For example, the symbol table importers will each create +// one of these (if it doesn't already exist). +class SymbolSource : public Symbol { + friend SymbolList; +public: + static constexpr const SymbolDescriptor DESCRIPTOR = SYMBOL_SOURCE; + static constexpr const char* NAME = "Symbol Source"; + static constexpr u32 FLAGS = WITH_NAME_MAP; + + SymbolSourceHandle handle() const { return m_handle; } + +protected: + void on_create(); +}; + +// Bundles together all the information needed to identify if a symbol came from +// a specific symbol table import operation. For example, this is used to make +// sure that we don't reference symbols from another symbol table during the +// import process. +struct SymbolGroup { + SymbolSourceHandle source; + Module* module_symbol = nullptr; + + bool is_in_group(const Symbol& symbol) const; +}; + +// The symbol database itself. This owns all the symbols. +class SymbolDatabase { +public: + SymbolList data_types; + SymbolList functions; + SymbolList global_variables; + SymbolList