From dbff32fdaa0205f3cbc6bc33d62ca19712d9fcf8 Mon Sep 17 00:00:00 2001 From: zeromus Date: Thu, 7 Jan 2021 16:53:59 -0500 Subject: [PATCH] dive into the N-layer muck of zip file opening and try to run amok on filenames with weird characters in them (should fix #412) --- desmume/src/emufile.cpp | 3 +- .../src/frontend/windows/FEX_Interface.cpp | 6 ++-- .../File_Extractor/fex/Zip_Extractor.cpp | 33 ++++++++++++++++++- .../File_Extractor/fex/Zip_Extractor.h | 3 ++ desmume/src/frontend/windows/OpenArchive.cpp | 7 ++-- 5 files changed, 43 insertions(+), 9 deletions(-) diff --git a/desmume/src/emufile.cpp b/desmume/src/emufile.cpp index 0448fa365..9aae7099f 100644 --- a/desmume/src/emufile.cpp +++ b/desmume/src/emufile.cpp @@ -517,7 +517,8 @@ void EMUFILE_FILE::open(const char* fname, const char* mode) mCondition = eCondition_Clean; mFilePosition = 0; #ifdef HOST_WINDOWS - fp = _wfopen(mbstowcs((std::string)fname).c_str(),mbstowcs(mode).c_str()); + auto tmp = mbstowcs((std::string)fname); + fp = _wfopen(tmp.c_str(),mbstowcs(mode).c_str()); #else fp = fopen(fname,mode); #endif diff --git a/desmume/src/frontend/windows/FEX_Interface.cpp b/desmume/src/frontend/windows/FEX_Interface.cpp index 75693409a..e763d1d6a 100644 --- a/desmume/src/frontend/windows/FEX_Interface.cpp +++ b/desmume/src/frontend/windows/FEX_Interface.cpp @@ -193,10 +193,8 @@ ArchiveFile::ArchiveFile(const char* filename) else { const char* name = fex_name(object); - wchar_t temp_wchar[MAX_PATH]; - //what code page to use??? who knows. - MultiByteToWideChar(CP_ACP,0,name,-1,temp_wchar,ARRAY_SIZE(temp_wchar)); - item.wname = _wcsdup(temp_wchar); + item.wname = _wcsdup(mbstowcs(name).c_str()); + item.name = strdup(item.name); } diff --git a/desmume/src/frontend/windows/File_Extractor/fex/Zip_Extractor.cpp b/desmume/src/frontend/windows/File_Extractor/fex/Zip_Extractor.cpp index 2293d3fc6..081f15b1e 100644 --- a/desmume/src/frontend/windows/File_Extractor/fex/Zip_Extractor.cpp +++ b/desmume/src/frontend/windows/File_Extractor/fex/Zip_Extractor.cpp @@ -22,6 +22,10 @@ of hacky, but I'd rather not have to allocate memory for a copy of it. */ #include "blargg_source.h" +#ifdef _MSC_VER +#include +#endif + /* Reads this much from end of file when first opening. Only this much is searched for the end catalog entry. If whole catalog is within this data, nothing more needs to be read on open. */ @@ -120,6 +124,8 @@ Zip_Extractor::Zip_Extractor() : Zip_Extractor::~Zip_Extractor() { + for(auto tp : tmppaths) + free(tp); close(); } @@ -251,7 +257,32 @@ blargg_err_t Zip_Extractor::update_info( bool advance_first ) if ( is_normal_file( e, len ) ) { - set_name( e.filename ); + if(e.flags[1] & 0x08) + { + //known to be UTF8 + set_name( e.filename ); + } + else + { + //known not to be UTF8. + //we COULD use CP_OEMCP and it would probably be a correct guess + //but using CP_UTF8 in windows will give us unknown characters for anything >= 0x80 + //these results won't be ideal, but they will be less likely to be a malfunctioned mess. + //then again, CP_OEMCP will simply give us weird latin characters (or otherwise single, valid characters) for unknown stuff. + //That's not so bad either + //If there's every any proof that invalid characters (for pathnames) are produced, then we will change it to CP_UTF8 + #ifdef _MSC_VER + wchar_t *temp_wchar = (wchar_t*)malloc(sizeof(wchar_t)*MAX_PATH); + tmppaths.push_back(temp_wchar); + MultiByteToWideChar(CP_OEMCP,0,e.filename,-1,temp_wchar,MAX_PATH); + char *temp_char = (char*)malloc(MAX_PATH*4+4); + tmppaths.push_back(temp_char); + WideCharToMultiByte(CP_UTF8,0,temp_wchar,-1,temp_char,MAX_PATH*4,nullptr,nullptr); + set_name(temp_char); + #else + set_name( e.filename ); + #endif + } set_info( get_le32( e.size ), get_le32( e.date ), get_le32( e.crc ) ); break; } diff --git a/desmume/src/frontend/windows/File_Extractor/fex/Zip_Extractor.h b/desmume/src/frontend/windows/File_Extractor/fex/Zip_Extractor.h index 9f0d8adcb..f7666579d 100644 --- a/desmume/src/frontend/windows/File_Extractor/fex/Zip_Extractor.h +++ b/desmume/src/frontend/windows/File_Extractor/fex/Zip_Extractor.h @@ -7,6 +7,8 @@ #include "File_Extractor.h" #include "Zlib_Inflater.h" +#include + class Zip_Extractor : public File_Extractor { public: Zip_Extractor(); @@ -34,6 +36,7 @@ private: unsigned long correct_crc; bool file_deflated; Zlib_Inflater buf; + std::vector tmppaths; blargg_err_t fill_buf( long offset, long buf_size, long initial_read ); blargg_err_t update_info( bool advance_first ); diff --git a/desmume/src/frontend/windows/OpenArchive.cpp b/desmume/src/frontend/windows/OpenArchive.cpp index 2ee483356..ed7086ec3 100644 --- a/desmume/src/frontend/windows/OpenArchive.cpp +++ b/desmume/src/frontend/windows/OpenArchive.cpp @@ -464,7 +464,8 @@ bool ObtainFile(const char* Name, char *const & LogicalName, char *const & Physi if(item < 0) item = ChooseItemFromArchive(archive, !forceManual, ignoreExtensions, numIgnoreExtensions); - const char* TempFileName = s_tempFiles.GetFile(category, strrchr(archive.GetItemName(item), '.')); + const char* itemName = archive.GetItemName(item); + const char* TempFileName = s_tempFiles.GetFile(category, strrchr(itemName, '.')); if(!archive.ExtractItem(item, TempFileName)) s_tempFiles.ReleaseFile(TempFileName); s_tempFiles.ReleaseFile(PhysicalName); @@ -472,9 +473,9 @@ bool ObtainFile(const char* Name, char *const & LogicalName, char *const & Physi const wchar_t* itemNameW = archive.GetItemNameW(item); - //convert the itemname to local encoding + //convert the itemname to utf8 char itemname_utf8[MAX_PATH*4]; - WideCharToMultiByte(CP_THREAD_ACP,0,itemNameW,-1,itemname_utf8,ARRAY_SIZE(itemname_utf8),NULL,NULL); + WideCharToMultiByte(CP_UTF8,0,itemNameW,-1,itemname_utf8,ARRAY_SIZE(itemname_utf8),NULL,NULL); //strcat(LogicalName,itemname_utf8); _snprintf(LogicalName + strlen(LogicalName), 1024 - (strlen(LogicalName)+1), "|%s", itemname_utf8);