dive into the N-layer muck of zip file opening and try to run amok on filenames with weird characters in them (should fix #412)

This commit is contained in:
zeromus 2021-01-07 16:53:59 -05:00
parent aeaf404177
commit dbff32fdaa
5 changed files with 43 additions and 9 deletions

View File

@ -517,7 +517,8 @@ void EMUFILE_FILE::open(const char* fname, const char* mode)
mCondition = eCondition_Clean;
mFilePosition = 0;
#ifdef HOST_WINDOWS
fp = _wfopen(mbstowcs((std::string)fname).c_str(),mbstowcs(mode).c_str());
auto tmp = mbstowcs((std::string)fname);
fp = _wfopen(tmp.c_str(),mbstowcs(mode).c_str());
#else
fp = fopen(fname,mode);
#endif

View File

@ -193,10 +193,8 @@ ArchiveFile::ArchiveFile(const char* filename)
else
{
const char* name = fex_name(object);
wchar_t temp_wchar[MAX_PATH];
//what code page to use??? who knows.
MultiByteToWideChar(CP_ACP,0,name,-1,temp_wchar,ARRAY_SIZE(temp_wchar));
item.wname = _wcsdup(temp_wchar);
item.wname = _wcsdup(mbstowcs(name).c_str());
item.name = strdup(item.name);
}

View File

@ -22,6 +22,10 @@ of hacky, but I'd rather not have to allocate memory for a copy of it. */
#include "blargg_source.h"
#ifdef _MSC_VER
#include <Windows.h>
#endif
/* Reads this much from end of file when first opening. Only this much is
searched for the end catalog entry. If whole catalog is within this data,
nothing more needs to be read on open. */
@ -120,6 +124,8 @@ Zip_Extractor::Zip_Extractor() :
Zip_Extractor::~Zip_Extractor()
{
for(auto tp : tmppaths)
free(tp);
close();
}
@ -251,7 +257,32 @@ blargg_err_t Zip_Extractor::update_info( bool advance_first )
if ( is_normal_file( e, len ) )
{
if(e.flags[1] & 0x08)
{
//known to be UTF8
set_name( e.filename );
}
else
{
//known not to be UTF8.
//we COULD use CP_OEMCP and it would probably be a correct guess
//but using CP_UTF8 in windows will give us unknown characters for anything >= 0x80
//these results won't be ideal, but they will be less likely to be a malfunctioned mess.
//then again, CP_OEMCP will simply give us weird latin characters (or otherwise single, valid characters) for unknown stuff.
//That's not so bad either
//If there's every any proof that invalid characters (for pathnames) are produced, then we will change it to CP_UTF8
#ifdef _MSC_VER
wchar_t *temp_wchar = (wchar_t*)malloc(sizeof(wchar_t)*MAX_PATH);
tmppaths.push_back(temp_wchar);
MultiByteToWideChar(CP_OEMCP,0,e.filename,-1,temp_wchar,MAX_PATH);
char *temp_char = (char*)malloc(MAX_PATH*4+4);
tmppaths.push_back(temp_char);
WideCharToMultiByte(CP_UTF8,0,temp_wchar,-1,temp_char,MAX_PATH*4,nullptr,nullptr);
set_name(temp_char);
#else
set_name( e.filename );
#endif
}
set_info( get_le32( e.size ), get_le32( e.date ), get_le32( e.crc ) );
break;
}

View File

@ -7,6 +7,8 @@
#include "File_Extractor.h"
#include "Zlib_Inflater.h"
#include <vector>
class Zip_Extractor : public File_Extractor {
public:
Zip_Extractor();
@ -34,6 +36,7 @@ private:
unsigned long correct_crc;
bool file_deflated;
Zlib_Inflater buf;
std::vector<void*> tmppaths;
blargg_err_t fill_buf( long offset, long buf_size, long initial_read );
blargg_err_t update_info( bool advance_first );

View File

@ -464,7 +464,8 @@ bool ObtainFile(const char* Name, char *const & LogicalName, char *const & Physi
if(item < 0)
item = ChooseItemFromArchive(archive, !forceManual, ignoreExtensions, numIgnoreExtensions);
const char* TempFileName = s_tempFiles.GetFile(category, strrchr(archive.GetItemName(item), '.'));
const char* itemName = archive.GetItemName(item);
const char* TempFileName = s_tempFiles.GetFile(category, strrchr(itemName, '.'));
if(!archive.ExtractItem(item, TempFileName))
s_tempFiles.ReleaseFile(TempFileName);
s_tempFiles.ReleaseFile(PhysicalName);
@ -472,9 +473,9 @@ bool ObtainFile(const char* Name, char *const & LogicalName, char *const & Physi
const wchar_t* itemNameW = archive.GetItemNameW(item);
//convert the itemname to local encoding
//convert the itemname to utf8
char itemname_utf8[MAX_PATH*4];
WideCharToMultiByte(CP_THREAD_ACP,0,itemNameW,-1,itemname_utf8,ARRAY_SIZE(itemname_utf8),NULL,NULL);
WideCharToMultiByte(CP_UTF8,0,itemNameW,-1,itemname_utf8,ARRAY_SIZE(itemname_utf8),NULL,NULL);
//strcat(LogicalName,itemname_utf8);
_snprintf(LogicalName + strlen(LogicalName), 1024 - (strlen(LogicalName)+1), "|%s", itemname_utf8);