dive into the N-layer muck of zip file opening and try to run amok on filenames with weird characters in them (should fix #412)

2021-01-07 16:53:59 -05:00 · 2021-01-07 16:53:59 -05:00 · dbff32fdaa
parent aeaf404177
commit dbff32fdaa
5 changed files with 43 additions and 9 deletions
--- a/desmume/src/emufile.cpp
+++ b/desmume/src/emufile.cpp
@ -517,7 +517,8 @@ void EMUFILE_FILE::open(const char* fname, const char* mode)
 	mCondition = eCondition_Clean;
 	mFilePosition = 0;
 	#ifdef HOST_WINDOWS
-	fp = _wfopen(mbstowcs((std::string)fname).c_str(),mbstowcs(mode).c_str());
+	auto tmp = mbstowcs((std::string)fname);
+	fp = _wfopen(tmp.c_str(),mbstowcs(mode).c_str());
 	#else
 	fp = fopen(fname,mode);
 	#endif
--- a/desmume/src/frontend/windows/FEX_Interface.cpp
+++ b/desmume/src/frontend/windows/FEX_Interface.cpp
@ -193,10 +193,8 @@ ArchiveFile::ArchiveFile(const char* filename)
 				else
 				{
 					const char* name = fex_name(object);
-					wchar_t temp_wchar[MAX_PATH];
-					//what code page to use??? who knows. 
-					MultiByteToWideChar(CP_ACP,0,name,-1,temp_wchar,ARRAY_SIZE(temp_wchar));
-					item.wname = _wcsdup(temp_wchar);
+					item.wname = _wcsdup(mbstowcs(name).c_str());
+					item.name = strdup(item.name);
 				}


--- a/desmume/src/frontend/windows/File_Extractor/fex/Zip_Extractor.cpp
+++ b/desmume/src/frontend/windows/File_Extractor/fex/Zip_Extractor.cpp
@ -22,6 +22,10 @@ of hacky, but I'd rather not have to allocate memory for a copy of it. */

 #include "blargg_source.h"

+#ifdef _MSC_VER
+#include <Windows.h>
+#endif
+
 /* Reads this much from end of file when first opening. Only this much is
 searched for the end catalog entry. If whole catalog is within this data,
 nothing more needs to be read on open. */
@ -120,6 +124,8 @@ Zip_Extractor::Zip_Extractor() :

 Zip_Extractor::~Zip_Extractor()
 {
+	for(auto tp : tmppaths)
+		free(tp);
 	close();
 }

@ -251,7 +257,32 @@ blargg_err_t Zip_Extractor::update_info( bool advance_first )
 			
 			if ( is_normal_file( e, len ) )
 			{
-				set_name( e.filename );
+				if(e.flags[1] & 0x08)
+				{
+					//known to be UTF8
+					set_name( e.filename );
+				}
+				else
+				{
+					//known not to be UTF8.
+					//we COULD use CP_OEMCP and it would probably be a correct guess
+					//but using CP_UTF8 in windows will give us unknown characters for anything >= 0x80
+					//these results won't be ideal, but they will be less likely to be a malfunctioned mess.
+					//then again, CP_OEMCP will simply give us weird latin characters (or otherwise single, valid characters) for unknown stuff. 
+					//That's not so bad either
+					//If there's every any proof that invalid characters (for pathnames) are produced, then we will change it to CP_UTF8
+					#ifdef _MSC_VER
+					wchar_t *temp_wchar = (wchar_t*)malloc(sizeof(wchar_t)*MAX_PATH);
+					tmppaths.push_back(temp_wchar);
+					MultiByteToWideChar(CP_OEMCP,0,e.filename,-1,temp_wchar,MAX_PATH);
+					char *temp_char = (char*)malloc(MAX_PATH*4+4);
+					tmppaths.push_back(temp_char);
+					WideCharToMultiByte(CP_UTF8,0,temp_wchar,-1,temp_char,MAX_PATH*4,nullptr,nullptr);
+					set_name(temp_char);
+					#else
+					set_name( e.filename );
+					#endif
+				}
 				set_info( get_le32( e.size ), get_le32( e.date ), get_le32( e.crc ) );
 				break;
 			}
--- a/desmume/src/frontend/windows/File_Extractor/fex/Zip_Extractor.h
+++ b/desmume/src/frontend/windows/File_Extractor/fex/Zip_Extractor.h
@ -7,6 +7,8 @@
 #include "File_Extractor.h"
 #include "Zlib_Inflater.h"

+#include <vector>
+
 class Zip_Extractor : public File_Extractor {
 public:
 	Zip_Extractor();
@ -34,6 +36,7 @@ private:
 	unsigned long correct_crc;
 	bool file_deflated;
 	Zlib_Inflater buf;
+	std::vector<void*> tmppaths;

 	blargg_err_t fill_buf( long offset, long buf_size, long initial_read );
 	blargg_err_t update_info( bool advance_first );
--- a/desmume/src/frontend/windows/OpenArchive.cpp
+++ b/desmume/src/frontend/windows/OpenArchive.cpp
@ -464,7 +464,8 @@ bool ObtainFile(const char* Name, char *const & LogicalName, char *const & Physi
 			if(item < 0)
 				item = ChooseItemFromArchive(archive, !forceManual, ignoreExtensions, numIgnoreExtensions);

-			const char* TempFileName = s_tempFiles.GetFile(category, strrchr(archive.GetItemName(item), '.'));
+			const char* itemName = archive.GetItemName(item);
+			const char* TempFileName = s_tempFiles.GetFile(category, strrchr(itemName, '.'));
 			if(!archive.ExtractItem(item, TempFileName))
 				s_tempFiles.ReleaseFile(TempFileName);
 			s_tempFiles.ReleaseFile(PhysicalName);
@ -472,9 +473,9 @@ bool ObtainFile(const char* Name, char *const & LogicalName, char *const & Physi

 			const wchar_t* itemNameW = archive.GetItemNameW(item);

-			//convert the itemname to local encoding
+			//convert the itemname to utf8
 			char itemname_utf8[MAX_PATH*4];
-			WideCharToMultiByte(CP_THREAD_ACP,0,itemNameW,-1,itemname_utf8,ARRAY_SIZE(itemname_utf8),NULL,NULL);
+			WideCharToMultiByte(CP_UTF8,0,itemNameW,-1,itemname_utf8,ARRAY_SIZE(itemname_utf8),NULL,NULL);

 			//strcat(LogicalName,itemname_utf8);
 			_snprintf(LogicalName + strlen(LogicalName), 1024 - (strlen(LogicalName)+1), "|%s", itemname_utf8);