gsdx-ogl_wnd: Merging r5633 through r5661

git-svn-id: http://pcsx2.googlecode.com/svn/branches/gsdx-ogl-wnd@5662 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut 2013-06-15 09:05:02 +00:00
commit f8242bd458
39 changed files with 4781 additions and 1606 deletions

View File

@ -100,10 +100,18 @@ struct Gif_Tag {
void analyzeTag() {
hasAD = false;
u32 t = tag.REGS[0];
for(u32 i = 0; i < nRegs; i++) {
if (i == 8) t = tag.REGS[1];
u32 i = 0;
u32 j = std::min<u32>(nRegs, 8);
for(; i < j; i++) {
regs[i] = t & 0xf;
hasAD |= (regs[i] == GIF_REG_A_D);
hasAD |= (regs[i] == GIF_REG_A_D);
t >>= 4;
}
t = tag.REGS[1];
j = nRegs;
for(; i < j; i++) {
regs[i] = t & 0xf;
hasAD |= (regs[i] == GIF_REG_A_D);
t >>= 4;
}
}

View File

@ -171,55 +171,51 @@ void ResetCheatsCount()
cheatnumber = 0;
}
static int LoadCheatsFiles(const wxString& folderName, wxString& fileSpec, const wxString& friendlyName)
static int LoadCheatsFiles(const wxDirName& folderName, wxString& fileSpec, const wxString& friendlyName)
{
if (!wxDir::Exists(folderName)) {
Console.WriteLn(Color_Red, L"The %s folder ('%s') is inaccessible. Skipping...", friendlyName.c_str(), folderName.c_str());
return 0;
}
wxDir dir(folderName);
if (!folderName.Exists()) {
Console.WriteLn(Color_Red, L"The %s folder ('%s') is inaccessible. Skipping...", friendlyName.c_str(), folderName.ToString().c_str());
return 0;
}
wxDir dir(folderName.ToString());
int before = cheatnumber;
wxString buffer;
wxTextFile f;
bool found = dir.GetFirst(&buffer, fileSpec, wxDIR_FILES);
while (found) {
Console.WriteLn(Color_Gray, L"Found %s file: '%s'", friendlyName.c_str(), buffer.c_str());
int before = cheatnumber;
f.Open(Path::Combine(dir.GetName(), buffer));
inifile_process(f);
f.Close();
int loaded = cheatnumber - before;
Console.WriteLn((loaded ? Color_Green : Color_Gray), L"Loaded %d %s from '%s'", loaded, friendlyName.c_str(), buffer.c_str());
found = dir.GetNext(&buffer);
}
int before = cheatnumber;
wxString buffer;
wxTextFile f;
bool found = dir.GetFirst(&buffer, L"*", wxDIR_FILES);
while (found) {
if (buffer.Upper().Matches(fileSpec.Upper())) {
Console.WriteLn(Color_Gray, L"Found %s file: '%s'", friendlyName.c_str(), buffer.c_str());
int before = cheatnumber;
f.Open(Path::Combine(dir.GetName(), buffer));
inifile_process(f);
f.Close();
int loaded = cheatnumber - before;
Console.WriteLn((loaded ? Color_Green : Color_Gray), L"Loaded %d %s from '%s'", loaded, friendlyName.c_str(), buffer.c_str());
}
found = dir.GetNext(&buffer);
}
return cheatnumber - before;
return cheatnumber - before;
}
// This routine loads cheats from *.pnach files
// Returns number of cheats loaded
// Note: Should be called after InitPatches()
int LoadCheats(wxString name, const wxString& folderName, const wxString& friendlyName)
int LoadCheats(wxString name, const wxDirName& folderName, const wxString& friendlyName)
{
if (!name.Length()) {
Console.WriteLn(Color_Gray, "Cheats: No CRC, using 00000000 instead.");
name = L"00000000";
}
if (!name.Length()) {
Console.WriteLn(Color_Gray, "Cheats: No CRC, using 00000000 instead.");
name = L"00000000";
}
int loaded = 0;
int loaded = 0;
wxString filespec = name + L"*.pnach";
loaded += LoadCheatsFiles(folderName, filespec, friendlyName);
wxString filespec = name + L"*.pnach";
loaded += LoadCheatsFiles(folderName, filespec, friendlyName);
wxString nameUpper = name; nameUpper.Upper();
if (wxFileName::IsCaseSensitive() && name != nameUpper) {
filespec = nameUpper + L"*.pnach";
loaded += LoadCheatsFiles(folderName, filespec, friendlyName);
}
Console.WriteLn((loaded ? Color_Green : Color_Gray), L"Overall %d %s loaded", loaded, friendlyName.c_str());
return loaded;
Console.WriteLn((loaded ? Color_Green : Color_Gray), L"Overall %d %s loaded", loaded, friendlyName.c_str());
return loaded;
}
static u32 StrToU32(const wxString& str, int base = 10)

View File

@ -58,7 +58,7 @@ namespace PatchFunc
}
extern void ResetCheatsCount();
extern int LoadCheats(wxString name, const wxString& folderName, const wxString& friendlyName);
extern int LoadCheats(wxString name, const wxDirName& folderName, const wxString& friendlyName);
extern void inifile_command(bool isCheat, const wxString& cmd);
extern void inifile_trim(wxString& buffer);

View File

@ -33,6 +33,9 @@ enum FoldersEnum_t
FolderId_Documents,
FolderId_Cheats,
FolderId_CheatsWS,
FolderId_COUNT
};
@ -58,6 +61,8 @@ namespace PathDefs
extern wxDirName GetSettings();
extern wxDirName GetLogs();
extern wxDirName GetLangs();
extern wxDirName GetCheats();
extern wxDirName GetCheatsWS();
extern wxDirName Get( FoldersEnum_t folderidx );
@ -74,6 +79,8 @@ namespace PathDefs
extern const wxDirName& Logs();
extern const wxDirName& Dumps();
extern const wxDirName& Langs();
extern const wxDirName& Cheats();
extern const wxDirName& CheatsWS();
}
}

View File

@ -162,6 +162,16 @@ namespace PathDefs
return GetDocuments() + wxDirName( L"bios" );
}
wxDirName GetCheats()
{
return GetDocuments() + wxDirName( L"cheats" );
}
wxDirName GetCheatsWS()
{
return GetDocuments() + wxDirName( L"cheats_ws" );
}
wxDirName GetSavestates()
{
return GetDocuments() + Base::Savestates();
@ -218,6 +228,8 @@ namespace PathDefs
case FolderId_MemoryCards: return GetMemoryCards();
case FolderId_Logs: return GetLogs();
case FolderId_Langs: return GetLangs();
case FolderId_Cheats: return GetCheats();
case FolderId_CheatsWS: return GetCheatsWS();
case FolderId_Documents: return CustomDocumentsFolder;
@ -240,6 +252,8 @@ wxDirName& AppConfig::FolderOptions::operator[]( FoldersEnum_t folderidx )
case FolderId_MemoryCards: return MemoryCards;
case FolderId_Logs: return Logs;
case FolderId_Langs: return Langs;
case FolderId_Cheats: return Cheats;
case FolderId_CheatsWS: return CheatsWS;
case FolderId_Documents: return CustomDocumentsFolder;
@ -266,6 +280,8 @@ bool AppConfig::FolderOptions::IsDefault( FoldersEnum_t folderidx ) const
case FolderId_MemoryCards: return UseDefaultMemoryCards;
case FolderId_Logs: return UseDefaultLogs;
case FolderId_Langs: return UseDefaultLangs;
case FolderId_Cheats: return UseDefaultCheats;
case FolderId_CheatsWS: return UseDefaultCheatsWS;
case FolderId_Documents: return false;
@ -327,6 +343,16 @@ void AppConfig::FolderOptions::Set( FoldersEnum_t folderidx, const wxString& src
CustomDocumentsFolder = src;
break;
case FolderId_Cheats:
Cheats = src;
UseDefaultCheats = useDefault;
break;
case FolderId_CheatsWS:
CheatsWS = src;
UseDefaultCheatsWS = useDefault;
break;
jNO_DEFAULT
}
}
@ -634,6 +660,8 @@ void AppConfig::FolderOptions::ApplyDefaults()
if( UseDefaultLogs ) Logs = PathDefs::GetLogs();
if( UseDefaultLangs ) Langs = PathDefs::GetLangs();
if( UseDefaultPluginsFolder)PluginsFolder = PathDefs::GetPlugins();
if( UseDefaultCheats ) Cheats = PathDefs::GetCheats();
if( UseDefaultCheatsWS ) CheatsWS = PathDefs::GetCheatsWS();
}
// ------------------------------------------------------------------------
@ -667,6 +695,8 @@ void AppConfig::FolderOptions::LoadSave( IniInterface& ini )
IniBitBool( UseDefaultLogs );
IniBitBool( UseDefaultLangs );
IniBitBool( UseDefaultPluginsFolder );
IniBitBool( UseDefaultCheats );
IniBitBool( UseDefaultCheatsWS );
//when saving in portable mode, we save relative paths if possible
// --> on load, these relative paths will be expanded relative to the exe folder.
@ -678,6 +708,8 @@ void AppConfig::FolderOptions::LoadSave( IniInterface& ini )
IniEntryDirFile( MemoryCards, rel );
IniEntryDirFile( Logs, rel );
IniEntryDirFile( Langs, rel );
IniEntryDirFile( Cheats, rel );
IniEntryDirFile( CheatsWS, rel );
ini.Entry( L"PluginsFolder", PluginsFolder, InstallFolder + PathDefs::Base::Plugins(), rel );
IniEntryDirFile( RunIso, rel );

View File

@ -132,7 +132,9 @@ public:
UseDefaultSavestates:1,
UseDefaultMemoryCards:1,
UseDefaultLogs:1,
UseDefaultLangs:1;
UseDefaultLangs:1,
UseDefaultCheats:1,
UseDefaultCheatsWS:1;
BITFIELD_END
wxDirName
@ -141,7 +143,9 @@ public:
Savestates,
MemoryCards,
Langs,
Logs;
Logs,
Cheats,
CheatsWS;
wxDirName RunIso; // last used location for Iso loading.
wxDirName RunELF; // last used location for ELF loading.

View File

@ -352,14 +352,14 @@ void AppCoreThread::ApplySettings( const Pcsx2Config& src )
ResetCheatsCount();
if (EmuConfig.EnableCheats) {
if (int cheats = LoadCheats(gameCRC, L"cheats", L"Cheats")) {
if (int cheats = LoadCheats(gameCRC, PathDefs::GetCheats(), L"Cheats")) {
gameCheats.Printf(L" [%d Cheats]", cheats);
}
}
// FIXME: we should have a widescreen hacks config
if (EmuConfig.EnableCheats) {
if (int cheats = LoadCheats(gameCRC, L"cheats_ws", L"Widescreen hacks")) {
if (int cheats = LoadCheats(gameCRC, PathDefs::GetCheatsWS(), L"Widescreen hacks")) {
gameWsHacks.Printf(L" [%d WS hacks]", cheats);
}
}

View File

@ -120,6 +120,8 @@ Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug AVX|Win32 = Debug AVX|Win32
Debug AVX|x64 = Debug AVX|x64
Debug AVX2|Win32 = Debug AVX2|Win32
Debug AVX2|x64 = Debug AVX2|x64
Debug SSE2|Win32 = Debug SSE2|Win32
Debug SSE2|x64 = Debug SSE2|x64
Debug SSE4|Win32 = Debug SSE4|Win32
@ -132,6 +134,8 @@ Global
Devel|x64 = Devel|x64
Release AVX|Win32 = Release AVX|Win32
Release AVX|x64 = Release AVX|x64
Release AVX2|Win32 = Release AVX2|Win32
Release AVX2|x64 = Release AVX2|x64
Release SSE2|Win32 = Release SSE2|Win32
Release SSE2|x64 = Release SSE2|x64
Release SSE4|Win32 = Release SSE4|Win32
@ -145,6 +149,9 @@ Global
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Debug AVX|Win32.Build.0 = Debug|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Debug AVX|x64.ActiveCfg = Debug|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Debug AVX2|Win32.Build.0 = Debug|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Debug SSE2|Win32.Build.0 = Debug|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -163,6 +170,9 @@ Global
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release AVX|Win32.ActiveCfg = Release|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release AVX|Win32.Build.0 = Release|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release AVX|x64.ActiveCfg = Release|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release AVX2|Win32.ActiveCfg = Release|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release AVX2|Win32.Build.0 = Release|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release AVX2|x64.ActiveCfg = Release|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release SSE2|Win32.ActiveCfg = Release|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release SSE2|Win32.Build.0 = Release|Win32
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -177,6 +187,8 @@ Global
{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release|x64.ActiveCfg = Release|Win32
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Debug AVX|x64.ActiveCfg = Debug|Win32
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Debug SSE2|x64.ActiveCfg = Debug|Win32
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Debug SSE4|Win32.ActiveCfg = Debug|Win32
@ -192,6 +204,9 @@ Global
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Release AVX|Win32.ActiveCfg = Release|Win32
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Release AVX|Win32.Build.0 = Release|Win32
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Release AVX|x64.ActiveCfg = Release|Win32
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Release AVX2|Win32.ActiveCfg = Release|Win32
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Release AVX2|Win32.Build.0 = Release|Win32
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Release AVX2|x64.ActiveCfg = Release|Win32
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Release SSE2|Win32.ActiveCfg = Release|Win32
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Release SSE2|Win32.Build.0 = Release|Win32
{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -206,6 +221,9 @@ Global
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Debug AVX|Win32.Build.0 = Debug|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Debug AVX|x64.ActiveCfg = Debug|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Debug AVX2|Win32.Build.0 = Debug|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Debug SSE2|Win32.Build.0 = Debug|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -224,6 +242,9 @@ Global
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Release AVX|Win32.ActiveCfg = Release|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Release AVX|Win32.Build.0 = Release|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Release AVX|x64.ActiveCfg = Release|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Release AVX2|Win32.ActiveCfg = Release|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Release AVX2|Win32.Build.0 = Release|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Release AVX2|x64.ActiveCfg = Release|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Release SSE2|Win32.ActiveCfg = Release|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Release SSE2|Win32.Build.0 = Release|Win32
{5F78E90B-BD22-47B1-9CA5-7A80F4DF5EF3}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -239,6 +260,9 @@ Global
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Debug AVX|Win32.Build.0 = Debug|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Debug AVX|x64.ActiveCfg = Debug|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Debug AVX2|Win32.Build.0 = Debug|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Debug SSE2|Win32.Build.0 = Debug|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -257,6 +281,9 @@ Global
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Release AVX|Win32.ActiveCfg = Release|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Release AVX|Win32.Build.0 = Release|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Release AVX|x64.ActiveCfg = Release|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Release AVX2|Win32.ActiveCfg = Release|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Release AVX2|Win32.Build.0 = Release|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Release AVX2|x64.ActiveCfg = Release|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Release SSE2|Win32.ActiveCfg = Release|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Release SSE2|Win32.Build.0 = Release|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -273,6 +300,10 @@ Global
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug AVX|Win32.Build.0 = Debug AVX|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug AVX|x64.ActiveCfg = Debug AVX|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug AVX|x64.Build.0 = Debug AVX|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug AVX2|Win32.ActiveCfg = Debug AVX2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug AVX2|Win32.Build.0 = Debug AVX2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug AVX2|x64.ActiveCfg = Debug AVX|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug AVX2|x64.Build.0 = Debug AVX|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|Win32.ActiveCfg = Debug SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|Win32.Build.0 = Debug SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|x64.ActiveCfg = Debug SSE2|x64
@ -297,6 +328,10 @@ Global
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX|Win32.Build.0 = Release AVX|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX|x64.ActiveCfg = Release AVX|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX|x64.Build.0 = Release AVX|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX2|Win32.ActiveCfg = Release AVX2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX2|Win32.Build.0 = Release AVX2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX2|x64.ActiveCfg = Release AVX|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release AVX2|x64.Build.0 = Release AVX|x64
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.ActiveCfg = Release SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.Build.0 = Release SSE2|Win32
{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|x64.ActiveCfg = Release SSE2|x64
@ -316,6 +351,9 @@ Global
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug AVX|Win32.Build.0 = Debug|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug AVX|x64.ActiveCfg = Debug|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug AVX2|Win32.Build.0 = Debug|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug SSE2|Win32.Build.0 = Debug|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -334,6 +372,9 @@ Global
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX|Win32.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX|Win32.Build.0 = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX|x64.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX2|Win32.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX2|Win32.Build.0 = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX2|x64.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE2|Win32.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE2|Win32.Build.0 = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -349,6 +390,9 @@ Global
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Debug AVX|Win32.Build.0 = Debug|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Debug AVX|x64.ActiveCfg = Debug|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Debug AVX2|Win32.Build.0 = Debug|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Debug SSE2|Win32.Build.0 = Debug|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -367,6 +411,9 @@ Global
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release AVX|Win32.ActiveCfg = Release|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release AVX|Win32.Build.0 = Release|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release AVX|x64.ActiveCfg = Release|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release AVX2|Win32.ActiveCfg = Release|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release AVX2|Win32.Build.0 = Release|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release AVX2|x64.ActiveCfg = Release|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release SSE2|Win32.ActiveCfg = Release|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release SSE2|Win32.Build.0 = Release|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -382,6 +429,9 @@ Global
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Debug AVX|Win32.Build.0 = Debug|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Debug AVX|x64.ActiveCfg = Debug|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Debug AVX2|Win32.Build.0 = Debug|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Debug SSE2|Win32.Build.0 = Debug|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -400,6 +450,9 @@ Global
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Release AVX|Win32.ActiveCfg = Release|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Release AVX|Win32.Build.0 = Release|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Release AVX|x64.ActiveCfg = Release|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Release AVX2|Win32.ActiveCfg = Release|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Release AVX2|Win32.Build.0 = Release|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Release AVX2|x64.ActiveCfg = Release|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Release SSE2|Win32.ActiveCfg = Release|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Release SSE2|Win32.Build.0 = Release|Win32
{F4EB4AB2-C595-4B05-8BC0-059024BC796C}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -415,6 +468,9 @@ Global
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Debug AVX|x64.ActiveCfg = Debug|x64
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Debug AVX|x64.Build.0 = Debug|x64
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Debug AVX2|x64.ActiveCfg = Debug|x64
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Debug AVX2|x64.Build.0 = Debug|x64
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Debug SSE2|x64.ActiveCfg = Debug|x64
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Debug SSE2|x64.Build.0 = Debug|x64
@ -433,6 +489,9 @@ Global
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Release AVX|Win32.ActiveCfg = Release|Win32
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Release AVX|Win32.Build.0 = Release|Win32
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Release AVX|x64.ActiveCfg = Release|Win32
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Release AVX2|Win32.ActiveCfg = Release|Win32
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Release AVX2|Win32.Build.0 = Release|Win32
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Release AVX2|x64.ActiveCfg = Release|Win32
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Release SSE2|Win32.ActiveCfg = Release|Win32
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Release SSE2|Win32.Build.0 = Release|Win32
{E4081455-398C-4610-A87C-90A8A7D72DC3}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -449,6 +508,9 @@ Global
{26511268-2902-4997-8421-ECD7055F9E28}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Debug AVX|Win32.Build.0 = Debug|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Debug AVX|x64.ActiveCfg = Debug|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Debug AVX2|Win32.Build.0 = Debug|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Debug SSE2|Win32.Build.0 = Debug|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -467,6 +529,9 @@ Global
{26511268-2902-4997-8421-ECD7055F9E28}.Release AVX|Win32.ActiveCfg = Release|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Release AVX|Win32.Build.0 = Release|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Release AVX|x64.ActiveCfg = Release|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Release AVX2|Win32.ActiveCfg = Release|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Release AVX2|Win32.Build.0 = Release|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Release AVX2|x64.ActiveCfg = Release|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Release SSE2|Win32.ActiveCfg = Release|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Release SSE2|Win32.Build.0 = Release|Win32
{26511268-2902-4997-8421-ECD7055F9E28}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -481,6 +546,8 @@ Global
{26511268-2902-4997-8421-ECD7055F9E28}.Release|x64.ActiveCfg = Release|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Debug AVX|x64.ActiveCfg = Debug|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Debug SSE2|x64.ActiveCfg = Debug|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Debug SSE4|Win32.ActiveCfg = Debug|Win32
@ -496,6 +563,9 @@ Global
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Release AVX|Win32.ActiveCfg = Release|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Release AVX|Win32.Build.0 = Release|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Release AVX|x64.ActiveCfg = Release|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Release AVX2|Win32.ActiveCfg = Release|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Release AVX2|Win32.Build.0 = Release|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Release AVX2|x64.ActiveCfg = Release|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Release SSE2|Win32.ActiveCfg = Release|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Release SSE2|Win32.Build.0 = Release|Win32
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -509,6 +579,8 @@ Global
{7F059854-568D-4E08-9D00-1E78E203E4DC}.Release|x64.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug AVX|x64.ActiveCfg = Debug|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug AVX2|x64.ActiveCfg = Debug|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE2|x64.ActiveCfg = Debug|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE4|Win32.ActiveCfg = Debug|Win32
@ -524,6 +596,9 @@ Global
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release AVX|Win32.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release AVX|Win32.Build.0 = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release AVX|x64.ActiveCfg = Release|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release AVX2|Win32.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release AVX2|Win32.Build.0 = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release AVX2|x64.ActiveCfg = Release|x64
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE2|Win32.ActiveCfg = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE2|Win32.Build.0 = Release|Win32
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE2|x64.ActiveCfg = Release|x64
@ -537,6 +612,8 @@ Global
{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release|x64.ActiveCfg = Release|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug AVX|x64.ActiveCfg = Debug|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug AVX2|x64.ActiveCfg = Debug|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug SSE2|x64.ActiveCfg = Debug|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Debug SSE4|Win32.ActiveCfg = Debug|Win32
@ -552,6 +629,9 @@ Global
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release AVX|Win32.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release AVX|Win32.Build.0 = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release AVX|x64.ActiveCfg = Release|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release AVX2|Win32.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release AVX2|Win32.Build.0 = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release AVX2|x64.ActiveCfg = Release|x64
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE2|Win32.ActiveCfg = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE2|Win32.Build.0 = Release|Win32
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release SSE2|x64.ActiveCfg = Release|x64
@ -565,6 +645,8 @@ Global
{FCDF5AE2-EA47-4CC6-9F20-23A0517FEBCB}.Release|x64.ActiveCfg = Release|x64
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Debug AVX|Win32.ActiveCfg = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Debug AVX|x64.ActiveCfg = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Debug AVX2|Win32.ActiveCfg = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Debug AVX2|x64.ActiveCfg = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Debug SSE2|Win32.ActiveCfg = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Debug SSE2|x64.ActiveCfg = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Debug SSE4|Win32.ActiveCfg = Release|Win32
@ -580,6 +662,9 @@ Global
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Release AVX|Win32.ActiveCfg = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Release AVX|Win32.Build.0 = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Release AVX|x64.ActiveCfg = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Release AVX2|Win32.ActiveCfg = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Release AVX2|Win32.Build.0 = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Release AVX2|x64.ActiveCfg = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Release SSE2|Win32.ActiveCfg = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Release SSE2|Win32.Build.0 = Release|Win32
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -593,6 +678,8 @@ Global
{F38D9DF0-F68D-49D9-B3A0-932E74FB74A0}.Release|x64.ActiveCfg = Release|Win32
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Debug AVX|x64.ActiveCfg = Debug|Win32
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Debug SSE2|x64.ActiveCfg = Debug|Win32
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Debug SSE4|Win32.ActiveCfg = Debug|Win32
@ -608,6 +695,9 @@ Global
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Release AVX|Win32.ActiveCfg = Release|Win32
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Release AVX|Win32.Build.0 = Release|Win32
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Release AVX|x64.ActiveCfg = Release|Win32
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Release AVX2|Win32.ActiveCfg = Release|Win32
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Release AVX2|Win32.Build.0 = Release|Win32
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Release AVX2|x64.ActiveCfg = Release|Win32
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Release SSE2|Win32.ActiveCfg = Release|Win32
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Release SSE2|Win32.Build.0 = Release|Win32
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -621,6 +711,8 @@ Global
{BF7B81A5-E348-4F7C-A69F-F74C8EEEAD70}.Release|x64.ActiveCfg = Release|Win32
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Debug AVX|x64.ActiveCfg = Debug|Win32
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Debug SSE2|x64.ActiveCfg = Debug|Win32
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Debug SSE4|Win32.ActiveCfg = Debug|Win32
@ -636,6 +728,9 @@ Global
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Release AVX|Win32.ActiveCfg = Release|Win32
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Release AVX|Win32.Build.0 = Release|Win32
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Release AVX|x64.ActiveCfg = Release|Win32
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Release AVX2|Win32.ActiveCfg = Release|Win32
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Release AVX2|Win32.Build.0 = Release|Win32
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Release AVX2|x64.ActiveCfg = Release|Win32
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Release SSE2|Win32.ActiveCfg = Release|Win32
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Release SSE2|Win32.Build.0 = Release|Win32
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -649,6 +744,8 @@ Global
{3D0EB14D-32F3-4D82-9C6D-B806ADBB859C}.Release|x64.ActiveCfg = Release|Win32
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Debug AVX|x64.ActiveCfg = Debug|Win32
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Debug SSE2|x64.ActiveCfg = Debug|Win32
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Debug SSE4|Win32.ActiveCfg = Debug|Win32
@ -664,6 +761,9 @@ Global
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Release AVX|Win32.ActiveCfg = Release|Win32
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Release AVX|Win32.Build.0 = Release|Win32
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Release AVX|x64.ActiveCfg = Release|Win32
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Release AVX2|Win32.ActiveCfg = Release|Win32
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Release AVX2|Win32.Build.0 = Release|Win32
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Release AVX2|x64.ActiveCfg = Release|Win32
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Release SSE2|Win32.ActiveCfg = Release|Win32
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Release SSE2|Win32.Build.0 = Release|Win32
{04439C5F-05FB-4A9C-AAD1-5388C25377DB}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -678,6 +778,9 @@ Global
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Debug AVX|Win32.Build.0 = Debug|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Debug AVX|x64.ActiveCfg = Debug|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Debug AVX2|Win32.Build.0 = Debug|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Debug SSE2|Win32.Build.0 = Debug|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -696,6 +799,9 @@ Global
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Release AVX|Win32.ActiveCfg = Release|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Release AVX|Win32.Build.0 = Release|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Release AVX|x64.ActiveCfg = Release|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Release AVX2|Win32.ActiveCfg = Release|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Release AVX2|Win32.Build.0 = Release|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Release AVX2|x64.ActiveCfg = Release|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Release SSE2|Win32.ActiveCfg = Release|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Release SSE2|Win32.Build.0 = Release|Win32
{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -711,6 +817,9 @@ Global
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Debug AVX|Win32.Build.0 = Debug|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Debug AVX|x64.ActiveCfg = Debug|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Debug AVX2|Win32.Build.0 = Debug|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Debug SSE2|Win32.Build.0 = Debug|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -729,6 +838,9 @@ Global
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Release AVX|Win32.ActiveCfg = Release|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Release AVX|Win32.Build.0 = Release|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Release AVX|x64.ActiveCfg = Release|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Release AVX2|Win32.ActiveCfg = Release|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Release AVX2|Win32.Build.0 = Release|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Release AVX2|x64.ActiveCfg = Release|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Release SSE2|Win32.ActiveCfg = Release|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Release SSE2|Win32.Build.0 = Release|Win32
{0318BA30-EF48-441A-9E10-DC85EFAE39F0}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -744,6 +856,9 @@ Global
{C34487AF-228A-4D11-8E50-27803DF76873}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Debug AVX|Win32.Build.0 = Debug|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Debug AVX|x64.ActiveCfg = Debug|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Debug AVX2|Win32.Build.0 = Debug|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Debug SSE2|Win32.Build.0 = Debug|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -762,6 +877,9 @@ Global
{C34487AF-228A-4D11-8E50-27803DF76873}.Release AVX|Win32.ActiveCfg = Release|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Release AVX|Win32.Build.0 = Release|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Release AVX|x64.ActiveCfg = Release|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Release AVX2|Win32.ActiveCfg = Release|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Release AVX2|Win32.Build.0 = Release|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Release AVX2|x64.ActiveCfg = Release|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Release SSE2|Win32.ActiveCfg = Release|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Release SSE2|Win32.Build.0 = Release|Win32
{C34487AF-228A-4D11-8E50-27803DF76873}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -777,6 +895,9 @@ Global
{A51123F5-9505-4EAE-85E7-D320290A272C}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Debug AVX|Win32.Build.0 = Debug|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Debug AVX|x64.ActiveCfg = Debug|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Debug AVX2|Win32.Build.0 = Debug|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Debug SSE2|Win32.Build.0 = Debug|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -795,6 +916,9 @@ Global
{A51123F5-9505-4EAE-85E7-D320290A272C}.Release AVX|Win32.ActiveCfg = Release|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Release AVX|Win32.Build.0 = Release|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Release AVX|x64.ActiveCfg = Release|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Release AVX2|Win32.ActiveCfg = Release|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Release AVX2|Win32.Build.0 = Release|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Release AVX2|x64.ActiveCfg = Release|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Release SSE2|Win32.ActiveCfg = Release|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Release SSE2|Win32.Build.0 = Release|Win32
{A51123F5-9505-4EAE-85E7-D320290A272C}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -810,6 +934,9 @@ Global
{4639972E-424E-4E13-8B07-CA403C481346}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Debug AVX|Win32.Build.0 = Debug|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Debug AVX|x64.ActiveCfg = Debug|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Debug AVX2|Win32.Build.0 = Debug|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Debug SSE2|Win32.Build.0 = Debug|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -828,6 +955,9 @@ Global
{4639972E-424E-4E13-8B07-CA403C481346}.Release AVX|Win32.ActiveCfg = Release|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Release AVX|Win32.Build.0 = Release|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Release AVX|x64.ActiveCfg = Release|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Release AVX2|Win32.ActiveCfg = Release|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Release AVX2|Win32.Build.0 = Release|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Release AVX2|x64.ActiveCfg = Release|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Release SSE2|Win32.ActiveCfg = Release|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Release SSE2|Win32.Build.0 = Release|Win32
{4639972E-424E-4E13-8B07-CA403C481346}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -843,6 +973,9 @@ Global
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Debug AVX|Win32.Build.0 = Debug|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Debug AVX|x64.ActiveCfg = Debug|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Debug AVX2|Win32.Build.0 = Debug|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Debug SSE2|Win32.Build.0 = Debug|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -861,6 +994,9 @@ Global
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Release AVX|Win32.ActiveCfg = Release|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Release AVX|Win32.Build.0 = Release|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Release AVX|x64.ActiveCfg = Release|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Release AVX2|Win32.ActiveCfg = Release|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Release AVX2|Win32.Build.0 = Release|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Release AVX2|x64.ActiveCfg = Release|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Release SSE2|Win32.ActiveCfg = Release|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Release SSE2|Win32.Build.0 = Release|Win32
{0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -876,6 +1012,9 @@ Global
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Debug AVX|Win32.Build.0 = Debug|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Debug AVX|x64.ActiveCfg = Debug|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Debug AVX2|Win32.Build.0 = Debug|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Debug SSE2|Win32.Build.0 = Debug|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -894,6 +1033,9 @@ Global
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release AVX|Win32.ActiveCfg = Release|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release AVX|Win32.Build.0 = Release|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release AVX|x64.ActiveCfg = Release|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release AVX2|Win32.ActiveCfg = Release|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release AVX2|Win32.Build.0 = Release|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release AVX2|x64.ActiveCfg = Release|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release SSE2|Win32.ActiveCfg = Release|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release SSE2|Win32.Build.0 = Release|Win32
{677B7D11-D5E1-40B3-88B1-9A4DF83D2213}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -909,6 +1051,9 @@ Global
{BC236261-77E8-4567-8D09-45CD02965EB6}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Debug AVX|Win32.Build.0 = Debug|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Debug AVX|x64.ActiveCfg = Debug|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Debug AVX2|Win32.Build.0 = Debug|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Debug SSE2|Win32.Build.0 = Debug|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -927,6 +1072,9 @@ Global
{BC236261-77E8-4567-8D09-45CD02965EB6}.Release AVX|Win32.ActiveCfg = Release|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Release AVX|Win32.Build.0 = Release|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Release AVX|x64.ActiveCfg = Release|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Release AVX2|Win32.ActiveCfg = Release|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Release AVX2|Win32.Build.0 = Release|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Release AVX2|x64.ActiveCfg = Release|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Release SSE2|Win32.ActiveCfg = Release|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Release SSE2|Win32.Build.0 = Release|Win32
{BC236261-77E8-4567-8D09-45CD02965EB6}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -942,6 +1090,9 @@ Global
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Debug AVX|Win32.Build.0 = Debug|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Debug AVX|x64.ActiveCfg = Debug|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Debug AVX2|Win32.Build.0 = Debug|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Debug SSE2|Win32.Build.0 = Debug|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -960,6 +1111,9 @@ Global
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Release AVX|Win32.ActiveCfg = Release|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Release AVX|Win32.Build.0 = Release|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Release AVX|x64.ActiveCfg = Release|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Release AVX2|Win32.ActiveCfg = Release|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Release AVX2|Win32.Build.0 = Release|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Release AVX2|x64.ActiveCfg = Release|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Release SSE2|Win32.ActiveCfg = Release|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Release SSE2|Win32.Build.0 = Release|Win32
{7E9B2BE7-CEC3-4F14-847B-0AB8D562FB86}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -975,6 +1129,9 @@ Global
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Debug AVX|Win32.Build.0 = Debug|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Debug AVX|x64.ActiveCfg = Debug|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Debug AVX2|Win32.Build.0 = Debug|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Debug SSE2|Win32.Build.0 = Debug|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -993,6 +1150,9 @@ Global
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Release AVX|Win32.ActiveCfg = Release|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Release AVX|Win32.Build.0 = Release|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Release AVX|x64.ActiveCfg = Release|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Release AVX2|Win32.ActiveCfg = Release|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Release AVX2|Win32.Build.0 = Release|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Release AVX2|x64.ActiveCfg = Release|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Release SSE2|Win32.ActiveCfg = Release|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Release SSE2|Win32.Build.0 = Release|Win32
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -1007,6 +1167,8 @@ Global
{5CF88D5F-64DD-4EDC-9F1A-436BD502940A}.Release|x64.ActiveCfg = Release|Win32
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Debug AVX|x64.ActiveCfg = Debug|Win32
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Debug SSE2|x64.ActiveCfg = Debug|Win32
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Debug SSE4|Win32.ActiveCfg = Debug|Win32
@ -1022,6 +1184,9 @@ Global
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Release AVX|Win32.ActiveCfg = Release|Win32
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Release AVX|Win32.Build.0 = Release|Win32
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Release AVX|x64.ActiveCfg = Release|Win32
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Release AVX2|Win32.ActiveCfg = Release|Win32
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Release AVX2|Win32.Build.0 = Release|Win32
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Release AVX2|x64.ActiveCfg = Release|Win32
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Release SSE2|Win32.ActiveCfg = Release|Win32
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Release SSE2|Win32.Build.0 = Release|Win32
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -1035,6 +1200,8 @@ Global
{5FCBD521-5A0B-4D97-A823-A97E6BAB9101}.Release|x64.ActiveCfg = Release|Win32
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Debug AVX|x64.ActiveCfg = Debug|Win32
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Debug SSE2|x64.ActiveCfg = Debug|Win32
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Debug SSE4|Win32.ActiveCfg = Debug|Win32
@ -1050,6 +1217,9 @@ Global
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Release AVX|Win32.ActiveCfg = Release|Win32
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Release AVX|Win32.Build.0 = Release|Win32
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Release AVX|x64.ActiveCfg = Release|Win32
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Release AVX2|Win32.ActiveCfg = Release|Win32
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Release AVX2|Win32.Build.0 = Release|Win32
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Release AVX2|x64.ActiveCfg = Release|Win32
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Release SSE2|Win32.ActiveCfg = Release|Win32
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Release SSE2|Win32.Build.0 = Release|Win32
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -1063,6 +1233,8 @@ Global
{6C8D28E4-447E-4856-BD9E-6B8F5E7C58C9}.Release|x64.ActiveCfg = Release|Win32
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Debug AVX|x64.ActiveCfg = Debug|Win32
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Debug SSE2|x64.ActiveCfg = Debug|Win32
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Debug SSE4|Win32.ActiveCfg = Debug|Win32
@ -1078,6 +1250,9 @@ Global
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Release AVX|Win32.ActiveCfg = Release|Win32
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Release AVX|Win32.Build.0 = Release|Win32
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Release AVX|x64.ActiveCfg = Release|Win32
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Release AVX2|Win32.ActiveCfg = Release|Win32
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Release AVX2|Win32.Build.0 = Release|Win32
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Release AVX2|x64.ActiveCfg = Release|Win32
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Release SSE2|Win32.ActiveCfg = Release|Win32
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Release SSE2|Win32.Build.0 = Release|Win32
{6BC4D85D-A399-407E-96A9-CD5416A54269}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -1092,6 +1267,9 @@ Global
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Debug AVX|Win32.ActiveCfg = Debug (NO ASIO)|Win32
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Debug AVX|x64.ActiveCfg = Debug (NO ASIO)|x64
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Debug AVX|x64.Build.0 = Debug (NO ASIO)|x64
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Debug AVX2|Win32.ActiveCfg = Debug (NO ASIO)|Win32
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Debug AVX2|x64.ActiveCfg = Debug (NO ASIO)|x64
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Debug AVX2|x64.Build.0 = Debug (NO ASIO)|x64
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Debug SSE2|Win32.ActiveCfg = Debug (NO ASIO)|Win32
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Debug SSE2|x64.ActiveCfg = Debug (NO ASIO)|x64
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Debug SSE2|x64.Build.0 = Debug (NO ASIO)|x64
@ -1113,6 +1291,10 @@ Global
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release AVX|Win32.Build.0 = Release (NO ASIO)|Win32
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release AVX|x64.ActiveCfg = Release (NO ASIO)|x64
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release AVX|x64.Build.0 = Release (NO ASIO)|x64
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release AVX2|Win32.ActiveCfg = Release (NO ASIO)|Win32
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release AVX2|Win32.Build.0 = Release (NO ASIO)|Win32
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release AVX2|x64.ActiveCfg = Release (NO ASIO)|x64
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release AVX2|x64.Build.0 = Release (NO ASIO)|x64
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release SSE2|Win32.ActiveCfg = Release (NO ASIO)|Win32
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release SSE2|Win32.Build.0 = Release (NO ASIO)|Win32
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release SSE2|x64.ActiveCfg = Release (NO ASIO)|x64
@ -1131,6 +1313,8 @@ Global
{0A18A071-125E-442F-AFF7-A3F68ABECF99}.Release|x64.Build.0 = Release|x64
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Debug AVX|x64.ActiveCfg = Debug|Win32
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Debug SSE2|x64.ActiveCfg = Debug|Win32
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Debug SSE4|Win32.ActiveCfg = Debug|Win32
@ -1146,6 +1330,9 @@ Global
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Release AVX|Win32.ActiveCfg = Release|Win32
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Release AVX|Win32.Build.0 = Release|Win32
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Release AVX|x64.ActiveCfg = Release|Win32
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Release AVX2|Win32.ActiveCfg = Release|Win32
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Release AVX2|Win32.Build.0 = Release|Win32
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Release AVX2|x64.ActiveCfg = Release|Win32
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Release SSE2|Win32.ActiveCfg = Release|Win32
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Release SSE2|Win32.Build.0 = Release|Win32
{2D4E85B2-F47F-4D65-B091-701E5C031DAC}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -1160,6 +1347,9 @@ Global
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Debug AVX|Win32.Build.0 = Debug|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Debug AVX|x64.ActiveCfg = Debug|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Debug AVX2|Win32.Build.0 = Debug|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Debug SSE2|Win32.Build.0 = Debug|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -1178,6 +1368,9 @@ Global
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Release AVX|Win32.ActiveCfg = Release|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Release AVX|Win32.Build.0 = Release|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Release AVX|x64.ActiveCfg = Release|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Release AVX2|Win32.ActiveCfg = Release|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Release AVX2|Win32.Build.0 = Release|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Release AVX2|x64.ActiveCfg = Release|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Release SSE2|Win32.ActiveCfg = Release|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Release SSE2|Win32.Build.0 = Release|Win32
{E613DA9F-41B4-4613-9911-E418EF5533BC}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -1193,6 +1386,9 @@ Global
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Debug AVX|Win32.Build.0 = Debug|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Debug AVX|x64.ActiveCfg = Debug|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Debug AVX2|Win32.Build.0 = Debug|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Debug SSE2|Win32.Build.0 = Debug|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -1211,6 +1407,9 @@ Global
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Release AVX|Win32.ActiveCfg = Release|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Release AVX|Win32.Build.0 = Release|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Release AVX|x64.ActiveCfg = Release|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Release AVX2|Win32.ActiveCfg = Release|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Release AVX2|Win32.Build.0 = Release|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Release AVX2|x64.ActiveCfg = Release|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Release SSE2|Win32.ActiveCfg = Release|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Release SSE2|Win32.Build.0 = Release|Win32
{019773FA-2DAA-4C12-9511-BD2D4EB2A718}.Release SSE2|x64.ActiveCfg = Release|Win32
@ -1226,6 +1425,9 @@ Global
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Debug AVX|Win32.ActiveCfg = Debug|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Debug AVX|Win32.Build.0 = Debug|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Debug AVX|x64.ActiveCfg = Debug|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Debug AVX2|Win32.ActiveCfg = Debug|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Debug AVX2|Win32.Build.0 = Debug|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Debug AVX2|x64.ActiveCfg = Debug|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Debug SSE2|Win32.ActiveCfg = Debug|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Debug SSE2|Win32.Build.0 = Debug|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Debug SSE2|x64.ActiveCfg = Debug|Win32
@ -1244,6 +1446,9 @@ Global
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Release AVX|Win32.ActiveCfg = Release|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Release AVX|Win32.Build.0 = Release|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Release AVX|x64.ActiveCfg = Release|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Release AVX2|Win32.ActiveCfg = Release|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Release AVX2|Win32.Build.0 = Release|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Release AVX2|x64.ActiveCfg = Release|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Release SSE2|Win32.ActiveCfg = Release|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Release SSE2|Win32.Build.0 = Release|Win32
{BBE4E5FB-530A-4D18-A633-35AF0577B7F3}.Release SSE2|x64.ActiveCfg = Release|Win32

View File

@ -1131,7 +1131,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
{
::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS);
FILE* file = fopen("c:\\log.txt", "a");
FILE* file = fopen("c:\\temp1\\log.txt", "a");
fprintf(file, "-------------------------\n\n");

View File

@ -22,14 +22,27 @@
#include "stdafx.h"
#include "GSBlock.h"
#if _M_SSE >= 0x501
const GSVector8i GSBlock::m_r16mask(
0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15,
0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
#else
const GSVector4i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
#endif
const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15);
const GSVector4i GSBlock::m_r4mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
#if _M_SSE >= 0x501
const GSVector8i GSBlock::m_xxxa(0x00008000);
const GSVector8i GSBlock::m_xxbx(0x00007c00);
const GSVector8i GSBlock::m_xgxx(0x000003e0);
const GSVector8i GSBlock::m_rxxx(0x0000001f);
#else
const GSVector4i GSBlock::m_xxxa(0x00008000);
const GSVector4i GSBlock::m_xxbx(0x00007c00);
const GSVector4i GSBlock::m_xgxx(0x000003e0);
const GSVector4i GSBlock::m_rxxx(0x0000001f);
#endif
const GSVector4i GSBlock::m_uw8hmask0 = GSVector4i(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9);
const GSVector4i GSBlock::m_uw8hmask1 = GSVector4i(2, 2, 2, 2, 3, 3, 3, 3, 10, 10, 10, 10, 11, 11, 11, 11);

View File

@ -27,14 +27,25 @@
class GSBlock
{
#if _M_SSE >= 0x501
static const GSVector8i m_r16mask;
#else
static const GSVector4i m_r16mask;
#endif
static const GSVector4i m_r8mask;
static const GSVector4i m_r4mask;
#if _M_SSE >= 0x501
static const GSVector8i m_xxxa;
static const GSVector8i m_xxbx;
static const GSVector8i m_xgxx;
static const GSVector8i m_rxxx;
#else
static const GSVector4i m_xxxa;
static const GSVector4i m_xxbx;
static const GSVector4i m_xgxx;
static const GSVector4i m_rxxx;
#endif
static const GSVector4i m_uw8hmask0;
static const GSVector4i m_uw8hmask1;
@ -277,41 +288,62 @@ public:
WriteColumn4<3, aligned>(dst, src, srcpitch);
}
template<int i, bool aligned> __forceinline static void ReadColumn32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
template<int i> __forceinline static void ReadColumn32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
GSVector4i v0, v1, v2, v3;
#if _M_SSE >= 0x501
if(aligned)
{
const GSVector4i* s = (const GSVector4i*)src;
const GSVector8i* s = (const GSVector8i*)src;
v0 = s[i * 4 + 0];
v1 = s[i * 4 + 1];
v2 = s[i * 4 + 2];
v3 = s[i * 4 + 3];
GSVector8i v0 = s[i * 2 + 0];
GSVector8i v1 = s[i * 2 + 1];
GSVector4i::sw64(v0, v1, v2, v3);
}
else
{
v0 = GSVector4i::load(&src[i * 64 + 0], &src[i * 64 + 16]);
v1 = GSVector4i::load(&src[i * 64 + 32], &src[i * 64 + 48]);
v2 = GSVector4i::load(&src[i * 64 + 8], &src[i * 64 + 24]);
v3 = GSVector4i::load(&src[i * 64 + 40], &src[i * 64 + 56]);
}
GSVector8i::sw128(v0, v1);
GSVector8i::sw64(v0, v1);
GSVector8i::store<true>(&dst[dstpitch * 0], v0);
GSVector8i::store<true>(&dst[dstpitch * 1], v1);
#else
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0 = s[i * 4 + 0];
GSVector4i v1 = s[i * 4 + 1];
GSVector4i v2 = s[i * 4 + 2];
GSVector4i v3 = s[i * 4 + 3];
GSVector4i::sw64(v0, v1, v2, v3);
GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0];
GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1];
GSVector4i::store<aligned>(&d0[0], v0);
GSVector4i::store<aligned>(&d0[1], v1);
GSVector4i::store<aligned>(&d1[0], v2);
GSVector4i::store<aligned>(&d1[1], v3);
GSVector4i::store<true>(&d0[0], v0);
GSVector4i::store<true>(&d0[1], v1);
GSVector4i::store<true>(&d1[0], v2);
GSVector4i::store<true>(&d1[1], v3);
#endif
}
template<int i, bool aligned> __forceinline static void ReadColumn16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
template<int i> __forceinline static void ReadColumn16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
#if _M_SSE >= 0x301
#if _M_SSE >= 0x501
const GSVector8i* s = (const GSVector8i*)src;
GSVector8i v0 = s[i * 2 + 0].shuffle8(m_r16mask);
GSVector8i v1 = s[i * 2 + 1].shuffle8(m_r16mask);
GSVector8i::sw128(v0, v1);
GSVector8i::sw32(v0, v1);
v0 = v0.acbd();
v1 = v1.acbd();
GSVector8i::store<true>(&dst[dstpitch * 0], v0);
GSVector8i::store<true>(&dst[dstpitch * 1], v1);
#elif _M_SSE >= 0x301
const GSVector4i* s = (const GSVector4i*)src;
@ -326,10 +358,10 @@ public:
GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0];
GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1];
GSVector4i::store<aligned>(&d0[0], v0);
GSVector4i::store<aligned>(&d0[1], v2);
GSVector4i::store<aligned>(&d1[0], v1);
GSVector4i::store<aligned>(&d1[1], v3);
GSVector4i::store<true>(&d0[0], v0);
GSVector4i::store<true>(&d0[1], v2);
GSVector4i::store<true>(&d1[0], v1);
GSVector4i::store<true>(&d1[1], v3);
#else
@ -340,6 +372,8 @@ public:
GSVector4i v2 = s[i * 4 + 2];
GSVector4i v3 = s[i * 4 + 3];
//for(int16 i = 0; i < 8; i++) {v0.i16[i] = i; v1.i16[i] = i + 8; v2.i16[i] = i + 16; v3.i16[i] = i + 24;}
GSVector4i::sw16(v0, v1, v2, v3);
GSVector4i::sw32(v0, v1, v2, v3);
GSVector4i::sw16(v0, v2, v1, v3);
@ -347,15 +381,15 @@ public:
GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0];
GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1];
GSVector4i::store<aligned>(&d0[0], v0);
GSVector4i::store<aligned>(&d0[1], v1);
GSVector4i::store<aligned>(&d1[0], v2);
GSVector4i::store<aligned>(&d1[1], v3);
GSVector4i::store<true>(&d0[0], v0);
GSVector4i::store<true>(&d0[1], v1);
GSVector4i::store<true>(&d1[0], v2);
GSVector4i::store<true>(&d1[1], v3);
#endif
}
template<int i, bool aligned> __forceinline static void ReadColumn8(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
template<int i> __forceinline static void ReadColumn8(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
#if _M_SSE >= 0x301
@ -386,10 +420,10 @@ public:
GSVector4i::sw16(v0, v1, v2, v3);
GSVector4i::sw32(v0, v1, v3, v2);
GSVector4i::store<aligned>(&dst[dstpitch * 0], v0);
GSVector4i::store<aligned>(&dst[dstpitch * 1], v3);
GSVector4i::store<aligned>(&dst[dstpitch * 2], v1);
GSVector4i::store<aligned>(&dst[dstpitch * 3], v2);
GSVector4i::store<true>(&dst[dstpitch * 0], v0);
GSVector4i::store<true>(&dst[dstpitch * 1], v3);
GSVector4i::store<true>(&dst[dstpitch * 2], v1);
GSVector4i::store<true>(&dst[dstpitch * 3], v2);
#else
@ -416,15 +450,15 @@ public:
v1 = v1.yxwz();
}
GSVector4i::store<aligned>(&dst[dstpitch * 0], v0);
GSVector4i::store<aligned>(&dst[dstpitch * 1], v1);
GSVector4i::store<aligned>(&dst[dstpitch * 2], v2);
GSVector4i::store<aligned>(&dst[dstpitch * 3], v3);
GSVector4i::store<true>(&dst[dstpitch * 0], v0);
GSVector4i::store<true>(&dst[dstpitch * 1], v1);
GSVector4i::store<true>(&dst[dstpitch * 2], v2);
GSVector4i::store<true>(&dst[dstpitch * 3], v3);
#endif
}
template<int i, bool aligned> __forceinline static void ReadColumn4(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
template<int i> __forceinline static void ReadColumn4(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
#if _M_SSE >= 0x301
@ -453,10 +487,10 @@ public:
GSVector4i::sw16rl(v0, v1, v2, v3);
}
GSVector4i::store<aligned>(&dst[dstpitch * 0], v0);
GSVector4i::store<aligned>(&dst[dstpitch * 1], v1);
GSVector4i::store<aligned>(&dst[dstpitch * 2], v2);
GSVector4i::store<aligned>(&dst[dstpitch * 3], v3);
GSVector4i::store<true>(&dst[dstpitch * 0], v0);
GSVector4i::store<true>(&dst[dstpitch * 1], v1);
GSVector4i::store<true>(&dst[dstpitch * 2], v2);
GSVector4i::store<true>(&dst[dstpitch * 3], v3);
#else
@ -491,104 +525,104 @@ public:
v1 = v1.yxwzlh();
}
GSVector4i::store<aligned>(&dst[dstpitch * 0], v0);
GSVector4i::store<aligned>(&dst[dstpitch * 1], v1);
GSVector4i::store<aligned>(&dst[dstpitch * 2], v2);
GSVector4i::store<aligned>(&dst[dstpitch * 3], v3);
GSVector4i::store<true>(&dst[dstpitch * 0], v0);
GSVector4i::store<true>(&dst[dstpitch * 1], v1);
GSVector4i::store<true>(&dst[dstpitch * 2], v2);
GSVector4i::store<true>(&dst[dstpitch * 3], v3);
#endif
}
template<bool aligned> static void ReadColumn32(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
static void ReadColumn32(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
switch((y >> 1) & 3)
{
case 0: ReadColumn32<0, aligned>(src, dst, dstpitch); break;
case 1: ReadColumn32<1, aligned>(src, dst, dstpitch); break;
case 2: ReadColumn32<2, aligned>(src, dst, dstpitch); break;
case 3: ReadColumn32<3, aligned>(src, dst, dstpitch); break;
case 0: ReadColumn32<0>(src, dst, dstpitch); break;
case 1: ReadColumn32<1>(src, dst, dstpitch); break;
case 2: ReadColumn32<2>(src, dst, dstpitch); break;
case 3: ReadColumn32<3>(src, dst, dstpitch); break;
default: __assume(0);
}
}
template<bool aligned> static void ReadColumn16(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
static void ReadColumn16(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
switch((y >> 1) & 3)
{
case 0: ReadColumn16<0, aligned>(src, dst, dstpitch); break;
case 1: ReadColumn16<1, aligned>(src, dst, dstpitch); break;
case 2: ReadColumn16<2, aligned>(src, dst, dstpitch); break;
case 3: ReadColumn16<3, aligned>(src, dst, dstpitch); break;
case 0: ReadColumn16<0>(src, dst, dstpitch); break;
case 1: ReadColumn16<1>(src, dst, dstpitch); break;
case 2: ReadColumn16<2>(src, dst, dstpitch); break;
case 3: ReadColumn16<3>(src, dst, dstpitch); break;
default: __assume(0);
}
}
template<bool aligned> static void ReadColumn8(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
static void ReadColumn8(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
switch((y >> 2) & 3)
{
case 0: ReadColumn8<0, aligned>(src, dst, dstpitch); break;
case 1: ReadColumn8<1, aligned>(src, dst, dstpitch); break;
case 2: ReadColumn8<2, aligned>(src, dst, dstpitch); break;
case 3: ReadColumn8<3, aligned>(src, dst, dstpitch); break;
case 0: ReadColumn8<0>(src, dst, dstpitch); break;
case 1: ReadColumn8<1>(src, dst, dstpitch); break;
case 2: ReadColumn8<2>(src, dst, dstpitch); break;
case 3: ReadColumn8<3>(src, dst, dstpitch); break;
default: __assume(0);
}
}
template<bool aligned> static void ReadColumn4(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
static void ReadColumn4(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
switch((y >> 2) & 3)
{
case 0: ReadColumn4<0, aligned>(src, dst, dstpitch); break;
case 1: ReadColumn4<1, aligned>(src, dst, dstpitch); break;
case 2: ReadColumn4<2, aligned>(src, dst, dstpitch); break;
case 3: ReadColumn4<3, aligned>(src, dst, dstpitch); break;
case 0: ReadColumn4<0>(src, dst, dstpitch); break;
case 1: ReadColumn4<1>(src, dst, dstpitch); break;
case 2: ReadColumn4<2>(src, dst, dstpitch); break;
case 3: ReadColumn4<3>(src, dst, dstpitch); break;
default: __assume(0);
}
}
template<bool aligned> static void ReadBlock32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
static void ReadBlock32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
ReadColumn32<0, aligned>(src, dst, dstpitch);
ReadColumn32<0>(src, dst, dstpitch);
dst += dstpitch * 2;
ReadColumn32<1, aligned>(src, dst, dstpitch);
ReadColumn32<1>(src, dst, dstpitch);
dst += dstpitch * 2;
ReadColumn32<2, aligned>(src, dst, dstpitch);
ReadColumn32<2>(src, dst, dstpitch);
dst += dstpitch * 2;
ReadColumn32<3, aligned>(src, dst, dstpitch);
ReadColumn32<3>(src, dst, dstpitch);
}
template<bool aligned> static void ReadBlock16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
static void ReadBlock16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
ReadColumn16<0, aligned>(src, dst, dstpitch);
ReadColumn16<0>(src, dst, dstpitch);
dst += dstpitch * 2;
ReadColumn16<1, aligned>(src, dst, dstpitch);
ReadColumn16<1>(src, dst, dstpitch);
dst += dstpitch * 2;
ReadColumn16<2, aligned>(src, dst, dstpitch);
ReadColumn16<2>(src, dst, dstpitch);
dst += dstpitch * 2;
ReadColumn16<3, aligned>(src, dst, dstpitch);
ReadColumn16<3>(src, dst, dstpitch);
}
template<bool aligned> static void ReadBlock8(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
static void ReadBlock8(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
ReadColumn8<0, aligned>(src, dst, dstpitch);
ReadColumn8<0>(src, dst, dstpitch);
dst += dstpitch * 4;
ReadColumn8<1, aligned>(src, dst, dstpitch);
ReadColumn8<1>(src, dst, dstpitch);
dst += dstpitch * 4;
ReadColumn8<2, aligned>(src, dst, dstpitch);
ReadColumn8<2>(src, dst, dstpitch);
dst += dstpitch * 4;
ReadColumn8<3, aligned>(src, dst, dstpitch);
ReadColumn8<3>(src, dst, dstpitch);
}
template<bool aligned> static void ReadBlock4(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
static void ReadBlock4(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
ReadColumn4<0, aligned>(src, dst, dstpitch);
ReadColumn4<0>(src, dst, dstpitch);
dst += dstpitch * 4;
ReadColumn4<1, aligned>(src, dst, dstpitch);
ReadColumn4<1>(src, dst, dstpitch);
dst += dstpitch * 4;
ReadColumn4<2, aligned>(src, dst, dstpitch);
ReadColumn4<2>(src, dst, dstpitch);
dst += dstpitch * 4;
ReadColumn4<3, aligned>(src, dst, dstpitch);
ReadColumn4<3>(src, dst, dstpitch);
}
__forceinline static void ReadBlock4P(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
@ -850,8 +884,39 @@ public:
}
}
template<bool AEM, class V> __forceinline static V Expand24to32(const V& c, const V& TA0)
{
return c | (AEM ? TA0.andnot(c == V::zero()) : TA0); // TA0 & (c != GSVector4i::zero())
}
template<bool AEM, class V> __forceinline static V Expand16to32(const V& c, const V& TA0, const V& TA1)
{
return ((c & m_rxxx) << 3) | ((c & m_xgxx) << 6) | ((c & m_xxbx) << 9) | (AEM ? TA0.blend8(TA1, c.sra16(15)).andnot(c == V::zero()) : TA0.blend(TA1, c.sra16(15)));
}
template<bool AEM> static void ExpandBlock24(const uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA)
{
#if _M_SSE >= 0x501
const GSVector8i* s = (const GSVector8i*)src;
GSVector8i TA0(TEXA.TA0 << 24);
GSVector8i mask = GSVector8i::x00ffffff();
for(int i = 0; i < 4; i++, dst += dstpitch * 2)
{
GSVector8i v0 = s[i * 2 + 0] & mask;
GSVector8i v1 = s[i * 2 + 1] & mask;
GSVector8i* d0 = (GSVector8i*)&dst[dstpitch * 0];
GSVector8i* d1 = (GSVector8i*)&dst[dstpitch * 1];
d0[0] = Expand24to32<AEM>(v0, TA0);
d1[0] = Expand24to32<AEM>(v1, TA0);
}
#else
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i TA0(TEXA.TA0 << 24);
@ -867,68 +932,53 @@ public:
GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0];
GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1];
if(AEM)
{
d0[0] = v0 | TA0.andnot(v0 == GSVector4i::zero()); // TA0 & (v0 != GSVector4i::zero())
d0[1] = v1 | TA0.andnot(v1 == GSVector4i::zero()); // TA0 & (v1 != GSVector4i::zero())
d1[0] = v2 | TA0.andnot(v2 == GSVector4i::zero()); // TA0 & (v2 != GSVector4i::zero())
d1[1] = v3 | TA0.andnot(v3 == GSVector4i::zero()); // TA0 & (v3 != GSVector4i::zero())
}
else
{
d0[0] = v0 | TA0;
d0[1] = v1 | TA0;
d1[0] = v2 | TA0;
d1[1] = v3 | TA0;
}
d0[0] = Expand24to32<AEM>(v0, TA0);
d0[1] = Expand24to32<AEM>(v1, TA0);
d1[0] = Expand24to32<AEM>(v2, TA0);
d1[1] = Expand24to32<AEM>(v3, TA0);
}
#endif
}
template<bool AEM> static void ExpandBlock16(const uint16* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) // do not inline, uses too many xmm regs
{
#if _M_SSE >= 0x501
const GSVector8i* s = (const GSVector8i*)src;
GSVector8i TA0(TEXA.TA0 << 24);
GSVector8i TA1(TEXA.TA1 << 24);
for(int i = 0; i < 8; i++, dst += dstpitch)
{
GSVector8i v = s[i].acbd();
((GSVector8i*)dst)[0] = Expand16to32<AEM>(v.upl16(v), TA0, TA1);
((GSVector8i*)dst)[1] = Expand16to32<AEM>(v.uph16(v), TA0, TA1);
}
#else
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i TA0(TEXA.TA0 << 24);
GSVector4i TA1(TEXA.TA1 << 24);
GSVector4i rm = m_rxxx;
GSVector4i gm = m_xgxx;
GSVector4i bm = m_xxbx;
GSVector4i l, h;
for(int i = 0; i < 8; i++, dst += dstpitch)
{
GSVector4i v0 = s[i * 2 + 0];
l = v0.upl16(v0);
h = v0.uph16(v0);
if(AEM)
{
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend8(TA1, l.sra16(15)).andnot(l == GSVector4i::zero());
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend8(TA1, h.sra16(15)).andnot(h == GSVector4i::zero());
}
else
{
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend(TA1, l.sra16(15));
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15));
}
((GSVector4i*)dst)[0] = Expand16to32<AEM>(v0.upl16(v0), TA0, TA1);
((GSVector4i*)dst)[1] = Expand16to32<AEM>(v0.uph16(v0), TA0, TA1);
GSVector4i v1 = s[i * 2 + 1];
l = v1.upl16(v1);
h = v1.uph16(v1);
if(AEM)
{
((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend8(TA1, l.sra16(15)).andnot(l == GSVector4i::zero());
((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend8(TA1, h.sra16(15)).andnot(h == GSVector4i::zero());
}
else
{
((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend(TA1, l.sra16(15));
((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15));
}
((GSVector4i*)dst)[2] = Expand16to32<AEM>(v1.upl16(v1), TA0, TA1);
((GSVector4i*)dst)[3] = Expand16to32<AEM>(v1.uph16(v1), TA0, TA1);
}
#endif
}
__forceinline static void ExpandBlock8_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
@ -1386,6 +1436,33 @@ public:
template<bool AEM> __forceinline static void ReadAndExpandBlock24(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA)
{
#if _M_SSE >= 0x501
const GSVector8i* s = (const GSVector8i*)src;
GSVector8i TA0(TEXA.TA0 << 24);
GSVector8i mask = GSVector8i::x00ffffff();
for(int i = 0; i < 4; i++, dst += dstpitch * 2)
{
GSVector8i v0 = s[i * 2 + 0];
GSVector8i v1 = s[i * 2 + 1];
GSVector8i::sw128(v0, v1);
GSVector8i::sw64(v0, v1);
v0 &= mask;
v1 &= mask;
GSVector8i* d0 = (GSVector8i*)&dst[dstpitch * 0];
GSVector8i* d1 = (GSVector8i*)&dst[dstpitch * 1];
d0[0] = Expand24to32<AEM>(v0, TA0);
d1[0] = Expand24to32<AEM>(v1, TA0);
}
#else
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i TA0(TEXA.TA0 << 24);
@ -1408,30 +1485,42 @@ public:
GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0];
GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1];
if(AEM)
{
d0[0] = v0 | TA0.andnot(v0 == GSVector4i::zero()); // TA0 & (v0 != GSVector4i::zero())
d0[1] = v1 | TA0.andnot(v1 == GSVector4i::zero()); // TA0 & (v1 != GSVector4i::zero())
d1[0] = v2 | TA0.andnot(v2 == GSVector4i::zero()); // TA0 & (v2 != GSVector4i::zero())
d1[1] = v3 | TA0.andnot(v3 == GSVector4i::zero()); // TA0 & (v3 != GSVector4i::zero())
}
else
{
d0[0] = v0 | TA0;
d0[1] = v1 | TA0;
d1[0] = v2 | TA0;
d1[1] = v3 | TA0;
}
d0[0] = Expand24to32<AEM>(v0, TA0);
d0[1] = Expand24to32<AEM>(v1, TA0);
d1[0] = Expand24to32<AEM>(v2, TA0);
d1[1] = Expand24to32<AEM>(v3, TA0);
}
}
template<bool AEM> __forceinline static GSVector4i Expand16to32(const GSVector4i& c, const GSVector4i& TA0, const GSVector4i& TA1)
{
return ((c & m_rxxx) << 3) | ((c & m_xgxx) << 6) | ((c & m_xxbx) << 9) | (AEM ? TA0.blend8(TA1, c.sra16(15)).andnot(c == GSVector4i::zero()) : TA0.blend(TA1, c.sra16(15)));
#endif
}
template<bool AEM> __forceinline static void ReadAndExpandBlock16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA)
{
#if 0 // not faster
#if _M_SSE >= 0x501
const GSVector8i* s = (const GSVector8i*)src;
GSVector8i TA0(TEXA.TA0 << 24);
GSVector8i TA1(TEXA.TA1 << 24);
for(int i = 0; i < 4; i++, dst += dstpitch * 2)
{
GSVector8i v0 = s[i * 2 + 0].shuffle8(m_r16mask);
GSVector8i v1 = s[i * 2 + 1].shuffle8(m_r16mask);
GSVector8i::sw128(v0, v1);
GSVector8i::sw32(v0, v1);
GSVector8i* d0 = (GSVector8i*)&dst[dstpitch * 0];
GSVector8i* d1 = (GSVector8i*)&dst[dstpitch * 1];
d0[0] = Expand16to32<AEM>(v0.upl16(v0), TA0, TA1);
d0[1] = Expand16to32<AEM>(v0.uph16(v0), TA0, TA1);
d1[0] = Expand16to32<AEM>(v1.upl16(v1), TA0, TA1);
d1[1] = Expand16to32<AEM>(v1.uph16(v1), TA0, TA1);
}
#elif 0 // not faster
const GSVector4i* s = (const GSVector4i*)src;
@ -1468,7 +1557,7 @@ public:
__aligned(uint16, 32) block[16 * 8];
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
ReadBlock16(src, (uint8*)block, sizeof(block) / 8);
ExpandBlock16<AEM>(block, dst, dstpitch, TEXA);
@ -1525,7 +1614,7 @@ public:
__aligned(uint8, 32) block[16 * 16];
ReadBlock8<true>(src, (uint8*)block, sizeof(block) / 16);
ReadBlock8(src, (uint8*)block, sizeof(block) / 16);
ExpandBlock8_32(block, dst, dstpitch, pal);
@ -1600,7 +1689,7 @@ public:
__aligned(uint8, 32) block[(32 / 2) * 16];
ReadBlock4<true>(src, (uint8*)block, sizeof(block) / 16);
ReadBlock4(src, (uint8*)block, sizeof(block) / 16);
ExpandBlock4_32(block, dst, dstpitch, pal);
@ -1641,7 +1730,7 @@ public:
__aligned(uint32, 32) block[8 * 8];
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
ReadBlock32(src, (uint8*)block, sizeof(block) / 8);
ExpandBlock8H_32(block, dst, dstpitch, pal);
@ -1682,7 +1771,7 @@ public:
__aligned(uint32, 32) block[8 * 8];
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
ReadBlock32(src, (uint8*)block, sizeof(block) / 8);
ExpandBlock4HL_32(block, dst, dstpitch, pal);
@ -1723,7 +1812,7 @@ public:
__aligned(uint32, 32) block[8 * 8];
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
ReadBlock32(src, (uint8*)block, sizeof(block) / 8);
ExpandBlock4HH_32(block, dst, dstpitch, pal);

View File

@ -126,21 +126,26 @@ void GSClut::Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
(this->*m_wc[TEX0.CSM][TEX0.CPSM][TEX0.PSM])(TEX0, TEXCLUT);
// Mirror write to other half of buffer to simulate wrapping memory
int offset = (TEX0.CSA & (TEX0.CPSM < PSM_PSMCT16 ? 15 : 31)) * 16;
if (TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT8H)
if(TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT8H)
{
int size = TEX0.CPSM < PSM_PSMCT16 ? 512 : 256;
memcpy(m_clut + 512 + offset, m_clut + offset, sizeof *m_clut * min(size, 512 - offset));
memcpy(m_clut, m_clut + 512, sizeof *m_clut * max(0, size + offset - 512));
memcpy(m_clut + 512 + offset, m_clut + offset, sizeof(*m_clut) * min(size, 512 - offset));
memcpy(m_clut, m_clut + 512, sizeof(*m_clut) * max(0, size + offset - 512));
}
else
{
int size = 16;
memcpy(m_clut + 512 + offset, m_clut + offset, sizeof *m_clut * size);
if (TEX0.CPSM < PSM_PSMCT16)
memcpy(m_clut + 512 + 256 + offset, m_clut + 256 + offset, sizeof *m_clut * size);
memcpy(m_clut + 512 + offset, m_clut + offset, sizeof(*m_clut) * size);
if(TEX0.CPSM < PSM_PSMCT16)
{
memcpy(m_clut + 512 + 256 + offset, m_clut + 256 + offset, sizeof(*m_clut) * size);
}
}
}
@ -289,7 +294,7 @@ void GSClut::Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
case PSM_PSMT8:
case PSM_PSMT8H:
clut += (TEX0.CSA & 15) << 4;
clut += (TEX0.CSA & 15) << 4; // disney golf title screen
ReadCLUT_T32_I8(clut, m_buff32);
break;
case PSM_PSMT4:

View File

@ -958,6 +958,15 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
rb = m_global.frb.lerp16<0>(rb, fog);
ga = m_global.fga.lerp16<0>(ga, fog).mix16(ga);
/*
fog = fog.srl16(7);
GSVector4i ifog = GSVector4i::x00ff().sub16(fog);
rb = rb.mul16l(fog).add16(m_global.frb.mul16l(ifog)).srl16(8);
ga = ga.mul16l(fog).add16(m_global.fga.mul16l(ifog)).srl16(8).mix16(ga);
*/
}
// ReadFrame
@ -1204,12 +1213,6 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
if(sel.fwrite)
{
if(sel.colclamp == 0)
{
rb &= GSVector4i::x00ff();
ga &= GSVector4i::x00ff();
}
if(sel.fpsm == 2 && sel.dthe)
{
int y = (top & 3) << 1;
@ -1218,6 +1221,12 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
ga = ga.add16(m_global.dimx[1 + y]);
}
if(sel.colclamp == 0)
{
rb &= GSVector4i::x00ff();
ga &= GSVector4i::x00ff();
}
GSVector4i fs = rb.upl16(ga).pu16(rb.uph16(ga));
if(sel.fba && sel.fpsm != 1)
@ -1544,19 +1553,30 @@ void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col,
{
if(m == 0xffffffff) return;
#if _M_SSE >= 0x501
GSVector8i color((int)c);
GSVector8i mask((int)m);
#else
GSVector4i color((int)c);
GSVector4i mask((int)m);
#endif
if(sizeof(T) == sizeof(uint16))
{
color = color.xxzzlh();
mask = mask.xxzzlh();
c = (c & 0xffff) | (c << 16);
m = (m & 0xffff) | (m << 16);
}
if(masked) ASSERT(mask.u32[0] != 0);
color = color.andnot(mask);
c = color.extract32<0>();
c = c & (~m);
if(masked) ASSERT(mask.u32[0] != 0);
GSVector4i br = r.ralign<Align_Inside>(GSVector2i(8 * 4 / sizeof(T), 8));
@ -1597,6 +1617,37 @@ void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col,
}
}
#if _M_SSE >= 0x501
template<class T, bool masked>
void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m)
{
if(r.x >= r.z) return;
T* vm = (T*)m_global.vm;
for(int y = r.y; y < r.w; y += 8)
{
T* RESTRICT d = &vm[row[y]];
for(int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
{
GSVector8i* RESTRICT p = (GSVector8i*)&d[col[x]];
p[0] = !masked ? c : (c | (p[0] & m));
p[1] = !masked ? c : (c | (p[1] & m));
p[2] = !masked ? c : (c | (p[2] & m));
p[3] = !masked ? c : (c | (p[3] & m));
p[4] = !masked ? c : (c | (p[4] & m));
p[5] = !masked ? c : (c | (p[5] & m));
p[6] = !masked ? c : (c | (p[6] & m));
p[7] = !masked ? c : (c | (p[7] & m));
}
}
}
#else
template<class T, bool masked>
void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m)
{
@ -1622,3 +1673,5 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
}
}
}
#endif

View File

@ -49,9 +49,18 @@ protected:
template<class T, bool masked>
__forceinline void FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
#if _M_SSE >= 0x501
template<class T, bool masked>
__forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m);
#else
template<class T, bool masked>
__forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
#endif
public:
GSDrawScanline();
virtual ~GSDrawScanline();

View File

@ -31,6 +31,7 @@ static const int _v = _args + 8;
void GSDrawScanlineCodeGenerator::Generate()
{
//ret(8);
push(ebx);
push(esi);
push(edi);
@ -1143,13 +1144,6 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
if(!m_sel.lcm)
{
// store u/v
vpunpckldq(xmm0, xmm2, xmm3);
vmovdqa(ptr[&m_local.temp.uv[0]], xmm0);
vpunpckhdq(xmm0, xmm2, xmm3);
vmovdqa(ptr[&m_local.temp.uv[1]], xmm0);
// lod = -log2(Q) * (1 << L) + K
vpcmpeqd(xmm1, xmm1);
@ -1167,18 +1161,37 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// xmm4 = mant(q) | 1.0f
vmulps(xmm5, xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]);
vaddps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]);
vmulps(xmm5, xmm4);
vsubps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]);
vaddps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]);
vmulps(xmm4, xmm5);
vaddps(xmm4, xmm0);
if(m_cpu.has(util::Cpu::tFMA))
{
vmovaps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); // c0
vfmadd213ps(xmm5, xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); // c0 * xmm4 + c1
vfmadd213ps(xmm5, xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); // (c0 * xmm4 + c1) * xmm4 + c2
vsubps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); // xmm4 - 1.0f
vfmadd213ps(xmm4, xmm5, xmm0); // ((c0 * xmm4 + c1) * xmm4 + c2) * (xmm4 - 1.0f) + xmm0
}
else
{
vmulps(xmm5, xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]);
vaddps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]);
vmulps(xmm5, xmm4);
vsubps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]);
vaddps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]);
vmulps(xmm4, xmm5);
vaddps(xmm4, xmm0);
}
// xmm4 = log2(Q) = ((((c0 * xmm4) + c1) * xmm4) + c2) * (xmm4 - 1.0f) + xmm0
vmulps(xmm4, ptr[&m_local.gd->l]);
vaddps(xmm4, ptr[&m_local.gd->k]);
if(m_cpu.has(util::Cpu::tFMA))
{
vmovaps(xmm5, ptr[&m_local.gd->l]);
vfmadd213ps(xmm4, xmm5, ptr[&m_local.gd->k]);
}
else
{
vmulps(xmm4, ptr[&m_local.gd->l]);
vaddps(xmm4, ptr[&m_local.gd->k]);
}
// xmm4 = (-log2(Q) * (1 << L) + K) * 0x10000
@ -1196,6 +1209,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
}
vpsrld(xmm0, xmm4, 16);
vmovdqa(ptr[&m_local.temp.lod.i], xmm0);
/*
vpslld(xmm5, xmm0, 6);
@ -1205,58 +1219,93 @@ return;
*/
if(m_sel.mmin == 2) // trilinear mode
{
vpshuflw(xmm0, xmm4, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vmovdqa(ptr[&m_local.temp.lod.f], xmm0);
vpshuflw(xmm1, xmm4, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 0, 0));
vmovdqa(ptr[&m_local.temp.lod.f], xmm1);
}
// shift u/v by (int)lod
// shift u/v/minmax by (int)lod
vmovq(xmm4, ptr[&m_local.gd->t.minmax]);
if(m_cpu.has(util::Cpu::tAVX2))
{
vpsravd(xmm2, xmm2, xmm0);
vpsravd(xmm3, xmm3, xmm0);
vmovdqa(xmm2, ptr[&m_local.temp.uv[0]]);
vmovdqa(xmm5, xmm2);
vmovdqa(xmm3, ptr[&m_local.temp.uv[1]]);
vmovdqa(xmm6, xmm3);
vmovdqa(ptr[&m_local.temp.uv[0]], xmm2);
vmovdqa(ptr[&m_local.temp.uv[1]], xmm3);
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[0]]);
vpsrad(xmm2, xmm0);
vpsrlw(xmm1, xmm4, xmm0);
vmovq(ptr[&m_local.temp.uv_minmax[0].u32[0]], xmm1);
// m_local.gd->t.minmax => m_local.temp.uv_minmax[0/1]
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[1]]);
vpsrad(xmm5, xmm0);
vpsrlw(xmm1, xmm4, xmm0);
vmovq(ptr[&m_local.temp.uv_minmax[1].u32[0]], xmm1);
vmovq(xmm4, ptr[&m_local.gd->t.minmax]); // x x x x maxv maxu minv minu
vpunpcklwd(xmm4, xmm4); // maxv maxv maxu maxu minv minv minu minu
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[2]]);
vpsrad(xmm3, xmm0);
vpsrlw(xmm1, xmm4, xmm0);
vmovq(ptr[&m_local.temp.uv_minmax[0].u32[2]], xmm1);
vpxor(xmm1, xmm1);
vpunpckldq(xmm6, xmm4, xmm4); // minv minv minv minv minu minu minu minu
vpunpcklwd(xmm5, xmm6, xmm1); // 0 minu 0 minu 0 minu 0 minu
vpsrlvd(xmm5, xmm5, xmm0);
vpunpckhwd(xmm6, xmm6, xmm1); // 0 minv 0 minv 0 minv 0 minv
vpsrlvd(xmm6, xmm6, xmm0);
vpackusdw(xmm5, xmm6); // xmm5 = minv minv minv minv minu minu minu minu
vpunpckhdq(xmm4, xmm4); // maxv maxv maxv maxv maxu maxu maxu maxu
vpunpcklwd(xmm6, xmm4, xmm1); // 0 maxu 0 maxu 0 maxu 0 maxu
vpsrlvd(xmm6, xmm6, xmm0);
vpunpckhwd(xmm4, xmm1); // 0 maxv 0 maxv 0 maxv 0 maxv
vpsrlvd(xmm4, xmm4, xmm0);
vpackusdw(xmm6, xmm4); // xmm6 = maxv maxv maxv maxv maxu maxu maxu maxu
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[3]]);
vpsrad(xmm6, xmm0);
vpsrlw(xmm1, xmm4, xmm0);
vmovq(ptr[&m_local.temp.uv_minmax[1].u32[2]], xmm1);
vmovdqa(ptr[&m_local.temp.uv_minmax[0]], xmm5);
vmovdqa(ptr[&m_local.temp.uv_minmax[1]], xmm6);
}
else
{
vmovq(xmm4, ptr[&m_local.gd->t.minmax]);
vpunpckldq(xmm2, xmm3);
vpunpckhdq(xmm5, xmm6);
vpunpckhdq(xmm3, xmm2, xmm5);
vpunpckldq(xmm2, xmm5);
vpunpckldq(xmm5, xmm2, xmm3);
vpunpckhdq(xmm6, xmm2, xmm3);
vmovdqa(xmm2, xmm5);
vmovdqa(xmm3, xmm6);
vmovdqa(ptr[&m_local.temp.uv[0]], xmm2);
vmovdqa(ptr[&m_local.temp.uv[1]], xmm3);
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[0]]);
vpsrad(xmm2, xmm0);
vpsrlw(xmm1, xmm4, xmm0);
vmovq(ptr[&m_local.temp.uv_minmax[0].u32[0]], xmm1);
vmovdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]);
vmovdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]);
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[1]]);
vpsrad(xmm5, xmm0);
vpsrlw(xmm1, xmm4, xmm0);
vmovq(ptr[&m_local.temp.uv_minmax[1].u32[0]], xmm1);
vpunpcklwd(xmm0, xmm5, xmm6);
vpunpckhwd(xmm1, xmm5, xmm6);
vpunpckldq(xmm5, xmm0, xmm1);
vpunpckhdq(xmm6, xmm0, xmm1);
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[2]]);
vpsrad(xmm3, xmm0);
vpsrlw(xmm1, xmm4, xmm0);
vmovq(ptr[&m_local.temp.uv_minmax[0].u32[2]], xmm1);
vmovdqa(ptr[&m_local.temp.uv_minmax[0]], xmm5);
vmovdqa(ptr[&m_local.temp.uv_minmax[1]], xmm6);
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[3]]);
vpsrad(xmm6, xmm0);
vpsrlw(xmm1, xmm4, xmm0);
vmovq(ptr[&m_local.temp.uv_minmax[1].u32[2]], xmm1);
vpunpckldq(xmm2, xmm3);
vpunpckhdq(xmm5, xmm6);
vpunpckhdq(xmm3, xmm2, xmm5);
vpunpckldq(xmm2, xmm5);
vmovdqa(ptr[&m_local.temp.uv[0]], xmm2);
vmovdqa(ptr[&m_local.temp.uv[1]], xmm3);
vmovdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]);
vmovdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]);
vpunpcklwd(xmm0, xmm5, xmm6);
vpunpckhwd(xmm1, xmm5, xmm6);
vpunpckldq(xmm5, xmm0, xmm1);
vpunpckhdq(xmm6, xmm0, xmm1);
vmovdqa(ptr[&m_local.temp.uv_minmax[0]], xmm5);
vmovdqa(ptr[&m_local.temp.uv_minmax[1]], xmm6);
}
}
else
{
@ -2610,6 +2659,16 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
return;
}
if(m_sel.fpsm == 2 && m_sel.dthe)
{
mov(eax, ptr[esp + _top]);
and(eax, 3);
shl(eax, 5);
mov(ebp, ptr[&m_local.gd->dimx]);
vpaddw(xmm5, ptr[ebp + eax + sizeof(GSVector4i) * 0]);
vpaddw(xmm6, ptr[ebp + eax + sizeof(GSVector4i) * 1]);
}
if(m_sel.colclamp == 0)
{
// c[0] &= 0x00ff00ff;
@ -2621,16 +2680,6 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
vpand(xmm6, xmm7);
}
if(m_sel.fpsm == 2 && m_sel.dthe)
{
mov(eax, ptr[esp + _top]);
and(eax, 3);
shl(eax, 5);
mov(ebp, ptr[&m_local.gd->dimx]);
vpaddw(xmm5, ptr[ebp + eax + sizeof(GSVector4i) * 0]);
vpaddw(xmm6, ptr[ebp + eax + sizeof(GSVector4i) * 1]);
}
// GSVector4i fs = c[0].upl16(c[1]).pu16(c[0].uph16(c[1]));
vpunpckhwd(xmm7, xmm5, xmm6);
@ -2842,12 +2891,22 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
}
const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
const int t[] = {4, 1, 5, 2};
for(int i = 0; i < pixels; i++)
{
for(int j = 0; j < 4; j++)
if(m_cpu.has(util::Cpu::tAVX2) && !m_sel.tlu) // vpgatherdd seems to be dead slow for byte aligned offsets, not using it for palette lookups
{
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
Xmm mask = Xmm(t[i]);
vpcmpeqd(mask, mask);
vpgatherdd(Xmm(r[i * 2 + 1]), ptr[ebx + Xmm(r[i * 2 + 0]) * 4], mask);
}
else
{
for(int j = 0; j < 4; j++)
{
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
}
}
}
}

View File

@ -2745,6 +2745,16 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
return;
}
if(m_sel.fpsm == 2 && m_sel.dthe)
{
mov(eax, ptr[esp + _top]);
and(eax, 3);
shl(eax, 5);
mov(ebp, ptr[&m_local.gd->dimx]);
paddw(xmm5, ptr[ebp + eax + sizeof(GSVector4i) * 0]);
paddw(xmm6, ptr[ebp + eax + sizeof(GSVector4i) * 1]);
}
if(m_sel.colclamp == 0)
{
// c[0] &= 0x000000ff;
@ -2756,16 +2766,6 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
pand(xmm6, xmm7);
}
if(m_sel.fpsm == 2 && m_sel.dthe)
{
mov(eax, ptr[esp + _top]);
and(eax, 3);
shl(eax, 5);
mov(ebp, ptr[&m_local.gd->dimx]);
paddw(xmm5, ptr[ebp + eax + sizeof(GSVector4i) * 0]);
paddw(xmm6, ptr[ebp + eax + sizeof(GSVector4i) * 1]);
}
// GSVector4i fs = c[0].upl16(c[1]).pu16(c[0].uph16(c[1]));
movdqa(xmm7, xmm5);

View File

@ -801,7 +801,7 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8*
{
case PSM_PSMCT32:
case PSM_PSMZ32:
ReadColumn32<true>(y, dst, buff, 32);
ReadColumn32(y, dst, buff, 32);
memcpy(&buff[32], &src[x * 4], 32);
WriteColumn32<true, 0xffffffff>(y, dst, buff, 32);
break;
@ -809,17 +809,17 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8*
case PSM_PSMCT16S:
case PSM_PSMZ16:
case PSM_PSMZ16S:
ReadColumn16<true>(y, dst, buff, 32);
ReadColumn16(y, dst, buff, 32);
memcpy(&buff[32], &src[x * 2], 32);
WriteColumn16<true>(y, dst, buff, 32);
break;
case PSM_PSMT8:
ReadColumn8<true>(y, dst, buff, 16);
ReadColumn8(y, dst, buff, 16);
for(int i = 0, j = y2; i < h2; i++, j++) memcpy(&buff[j * 16], &src[i * srcpitch + x], 16);
WriteColumn8<true>(y, dst, buff, 16);
break;
case PSM_PSMT4:
ReadColumn4<true>(y, dst, buff, 16);
ReadColumn4(y, dst, buff, 16);
for(int i = 0, j = y2; i < h2; i++, j++) memcpy(&buff[j * 16], &src[i * srcpitch + (x >> 1)], 16);
WriteColumn4<true>(y, dst, buff, 16);
break;
@ -882,7 +882,7 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8*
{
case PSM_PSMCT32:
case PSM_PSMZ32:
ReadColumn32<true>(y, dst, buff, 32);
ReadColumn32(y, dst, buff, 32);
memcpy(&buff[0], &src[x * 4], 32);
WriteColumn32<true, 0xffffffff>(y, dst, buff, 32);
break;
@ -890,17 +890,17 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8*
case PSM_PSMCT16S:
case PSM_PSMZ16:
case PSM_PSMZ16S:
ReadColumn16<true>(y, dst, buff, 32);
ReadColumn16(y, dst, buff, 32);
memcpy(&buff[0], &src[x * 2], 32);
WriteColumn16<true>(y, dst, buff, 32);
break;
case PSM_PSMT8:
ReadColumn8<true>(y, dst, buff, 16);
ReadColumn8(y, dst, buff, 16);
for(int i = 0; i < h; i++) memcpy(&buff[i * 16], &src[i * srcpitch + x], 16);
WriteColumn8<true>(y, dst, buff, 16);
break;
case PSM_PSMT4:
ReadColumn4<true>(y, dst, buff, 16);
ReadColumn4(y, dst, buff, 16);
for(int i = 0; i < h; i++) memcpy(&buff[i * 16], &src[i * srcpitch + (x >> 1)], 16);
WriteColumn4<true>(y, dst, buff, 16);
break;
@ -1157,6 +1157,7 @@ void GSLocalMemory::WriteImage4HH(int& tx, int& ty, const uint8* src, int len, G
ty = th;
}
}
void GSLocalMemory::WriteImage24Z(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
{
if(TRXREG.RRW == 0) return;
@ -1190,6 +1191,7 @@ void GSLocalMemory::WriteImage24Z(int& tx, int& ty, const uint8* src, int len, G
ty = th;
}
}
void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
{
if(len <= 0) return;
@ -1224,7 +1226,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel32(addr + offset[x], *pd);
}
if(x == ex) {x = sx; y++;}
if(x >= ex) {x = sx; y++;}
}
break;
@ -1244,7 +1246,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel24(addr + offset[x], *(uint32*)pb);
}
if(x == ex) {x = sx; y++;}
if(x >= ex) {x = sx; y++;}
}
break;
@ -1266,7 +1268,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel16(addr + offset[x], *pw);
}
if(x == ex) {x = sx; y++;}
if(x >= ex) {x = sx; y++;}
}
break;
@ -1283,7 +1285,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel8(addr + offset[x], *pb);
}
if(x == ex) {x = sx; y++;}
if(x >= ex) {x = sx; y++;}
}
break;
@ -1301,7 +1303,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel4(addr + offset[x + 1], *pb >> 4);
}
if(x == ex) {x = sx; y++;}
if(x >= ex) {x = sx; y++;}
}
break;
@ -1318,7 +1320,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel8H(addr + offset[x], *pb);
}
if(x == ex) {x = sx; y++;}
if(x >= ex) {x = sx; y++;}
}
break;
@ -1336,7 +1338,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel4HL(addr + offset[x + 1], *pb >> 4);
}
if(x == ex) {x = sx; y++;}
if(x >= ex) {x = sx; y++;}
}
break;
@ -1354,7 +1356,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel4HH(addr + offset[x + 1], *pb >> 4);
}
if(x == ex) {x = sx; y++;}
if(x >= ex) {x = sx; y++;}
}
break;
@ -1587,7 +1589,7 @@ void GSLocalMemory::ReadTexture32(const GSOffset* RESTRICT o, const GSVector4i&
{
FOREACH_BLOCK_START(r, 8, 8, 32)
{
ReadBlock32<true>(src, dst, dstpitch);
ReadBlock32(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
@ -1693,7 +1695,7 @@ void GSLocalMemory::ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, cons
{
ALIGN_STACK(32);
ReadBlock32<true>(BlockPtr(bp), dst, dstpitch);
ReadBlock32(BlockPtr(bp), dst, dstpitch);
}
void GSLocalMemory::ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
@ -1845,7 +1847,7 @@ void GSLocalMemory::ReadTexture8P(const GSOffset* RESTRICT o, const GSVector4i&
{
FOREACH_BLOCK_START(r, 16, 16, 8)
{
ReadBlock8<true>(src, dst, dstpitch);
ReadBlock8(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
@ -1890,7 +1892,7 @@ void GSLocalMemory::ReadTexture4HHP(const GSOffset* RESTRICT o, const GSVector4i
void GSLocalMemory::ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
ReadBlock8<true>(BlockPtr(bp), dst, dstpitch);
ReadBlock8(BlockPtr(bp), dst, dstpitch);
}
void GSLocalMemory::ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const

View File

@ -489,7 +489,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
{
edge = v1;
edge.p = (v0.p.xxxx() + ddx[m2] * dv[0].p.yyyy()).xyzw(edge.p);
edge.p = v0.p.xxxx().addm(ddx[m2], dv[0].p.yyyy()).xyzw(edge.p);
dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p);
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v1.p);
@ -532,7 +532,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
GSVertexSW scan;
scan.p = edge.p + dedge.p * dy;
scan.p = edge.p.addm(dedge.p, dy);
GSVector4 lrf = scan.p.ceil();
GSVector4 l = lrf.max(scissor);
@ -546,14 +546,14 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
if(pixels > 0)
{
scan.t = edge.t + dedge.t * dy;
scan.c = edge.c + dedge.c * dy;
scan.t = edge.t.addm(dedge.t, dy);
scan.c = edge.c.addm(dedge.c, dy);
GSVector4 prestep = (l - p0).xxxx();
scan.p += dscan.p * prestep;
scan.t += dscan.t * prestep;
scan.c += dscan.c * prestep;
scan.p = scan.p.addm(dscan.p, prestep);
scan.t = scan.t.addm(dscan.t, prestep);
scan.c = scan.c.addm(dscan.c, prestep);
AddScanline(e++, pixels, left, top, scan);
}

View File

@ -31,12 +31,11 @@ GSRendererDX::GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter)
m_fba = !!theApp.GetConfig("fba", 1);
UserHacks_AlphaHack = !!theApp.GetConfig("UserHacks_AlphaHack", 0) && !!theApp.GetConfig("UserHacks", 0);
UserHacks_WildHack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_WildHack", 0) : 0;
UserHacks_AlphaStencil = !!theApp.GetConfig("UserHacks_AlphaStencil", 0) && !!theApp.GetConfig("UserHacks", 0);
UserHacks_TCOffset = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_TCOffset", 0) : 0;
UserHacks_TCO_x = (UserHacks_TCOffset & 0xFFFF) / -1000.0f;
UserHacks_TCO_y = ((UserHacks_TCOffset >> 16) & 0xFFFF) / -1000.0f;
UserHacks_TCO_x = (UserHacks_TCOffset & 0xFFFF) / -1000.0f;
UserHacks_TCO_y = ((UserHacks_TCOffset >> 16) & 0xFFFF) / -1000.0f;
}
GSRendererDX::~GSRendererDX()

View File

@ -37,7 +37,6 @@ protected:
virtual void SetupIA() = 0;
virtual void UpdateFBA(GSTexture* rt) {}
unsigned int UserHacks_WildHack;
unsigned int UserHacks_TCOffset;
float UserHacks_TCO_x, UserHacks_TCO_y;

View File

@ -51,8 +51,10 @@ void GSRendererDX11::SetupIA()
{
GSVertex* RESTRICT d = (GSVertex*)ptr;
for(unsigned int i = 0; i < m_vertex.next; i++, d++)
if(PRIM->TME && PRIM->FST) d->UV &= 0x3FEF3FEF;
for(unsigned int i = 0; i < m_vertex.next; i++)
{
if(PRIM->TME && PRIM->FST) d[i].UV &= 0x3FEF3FEF;
}
}
dev->IAUnmapVertexBuffer();

View File

@ -205,7 +205,9 @@ void GSRendererDX9::SetupIA()
//printf("GSDX: %08X | D3D9(%d) %s\n", s->UV & 0x3FEF3FEF, m_vertex.next, i == 0 ? "*" : "");
}
else
{
t = GSVector4(GSVector4i::load(s->UV).upl16());
}
}
else
{

View File

@ -202,6 +202,11 @@ void GSRendererHW::Draw()
if(PRIM->TME)
{
if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
{
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
}
GSVector4i r;
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());

View File

@ -47,6 +47,16 @@ GSState::GSState()
{
m_nativeres = !!theApp.GetConfig("nativeres", 1);
s_n = 0;
s_dump = !!theApp.GetConfig("dump", 0);
s_save = !!theApp.GetConfig("save", 0);
s_savez = !!theApp.GetConfig("savez", 0);
s_saven = theApp.GetConfig("saven", 0);
UserHacks_AggressiveCRC = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_AggressiveCRC", 0) : 0;
UserHacks_DisableCrcHacks = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig( "UserHacks_DisableCrcHacks", 0 ) : 0;
UserHacks_WildHack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_WildHack", 0) : 0;
memset(&m_v, 0, sizeof(m_v));
memset(&m_vertex, 0, sizeof(m_vertex));
memset(&m_index, 0, sizeof(m_index));
@ -112,15 +122,6 @@ GSState::GSState()
Reset();
ResetHandlers();
s_n = 0;
s_dump = !!theApp.GetConfig("dump", 0);
s_save = !!theApp.GetConfig("save", 0);
s_savez = !!theApp.GetConfig("savez", 0);
s_saven = theApp.GetConfig("saven", 0);
userHacks_AggressiveCRC = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_AggressiveCRC", 0) : 0;
userHacks_DisableCrcHacks = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig( "UserHacks_DisableCrcHacks", 0 ) : 0;
}
GSState::~GSState()
@ -243,7 +244,7 @@ void GSState::ResetHandlers()
m_fpGIFPackedRegHandlers[GIF_REG_PRIM] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerPRIM;
m_fpGIFPackedRegHandlers[GIF_REG_RGBA] = &GSState::GIFPackedRegHandlerRGBA;
m_fpGIFPackedRegHandlers[GIF_REG_STQ] = &GSState::GIFPackedRegHandlerSTQ;
m_fpGIFPackedRegHandlers[GIF_REG_UV] = &GSState::GIFPackedRegHandlerUV;
m_fpGIFPackedRegHandlers[GIF_REG_UV] = !UserHacks_WildHack ? &GSState::GIFPackedRegHandlerUV : &GSState::GIFPackedRegHandlerUV_Hack;
m_fpGIFPackedRegHandlers[GIF_REG_TEX0_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerTEX0<0>;
m_fpGIFPackedRegHandlers[GIF_REG_TEX0_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerTEX0<1>;
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<0>;
@ -281,7 +282,7 @@ void GSState::ResetHandlers()
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM;
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ;
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST;
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV;
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = !UserHacks_WildHack ? &GSState::GIFRegHandlerUV : &GSState::GIFRegHandlerUV_Hack;
m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_1] = &GSState::GIFRegHandlerTEX0<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_2] = &GSState::GIFRegHandlerTEX0<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_1] = &GSState::GIFRegHandlerCLAMP<0>;
@ -492,6 +493,13 @@ void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r)
{
GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff();
m_v.UV = (uint32)GSVector4i::store(v.ps32(v));
}
void GSState::GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r)
{
GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff();
m_v.UV = (uint32)GSVector4i::store(v.ps32(v));
isPackedUV_HackFlag = true;
@ -682,6 +690,11 @@ void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r)
}
void GSState::GIFRegHandlerUV(const GIFReg* RESTRICT r)
{
m_v.UV = r->UV.u32[0] & 0x3fff3fff;
}
void GSState::GIFRegHandlerUV_Hack(const GIFReg* RESTRICT r)
{
m_v.UV = r->UV.u32[0] & 0x3fff3fff;
@ -1179,6 +1192,8 @@ template<int i> void GSState::GIFRegHandlerZBUF(const GIFReg* RESTRICT r)
{
// during startup all regs are cleared to 0 (by the bios or something), so we mask z until this register becomes valid
// edit: breaks Grandia Xtreme and sounds like a bad idea generally. What was the intend?
// edit2: should be set only before any serious drawing happens, grandia extreme nulls out this register throughout the whole game,
// I already forgot what it fixed, that game never masked the zbuffer, but assumed it was set by default
//ZBUF.ZMSK = 1;
}
@ -2229,7 +2244,7 @@ void GSState::SetGameCRC(uint32 crc, int options)
{
m_crc = crc;
m_options = options;
m_game = CRC::Lookup(userHacks_DisableCrcHacks ? 0 : crc);
m_game = CRC::Lookup(UserHacks_DisableCrcHacks ? 0 : crc);
}
//
@ -2589,7 +2604,7 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
mask = (uu.upl32(vv) == uu.uph32(vv)).mask();
}
uv = uv.rintersect(vr - GSVector4i(0,0,1,1));
uv = uv.rintersect(tr);
switch(wms)
{
@ -2604,8 +2619,13 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if(vr.x < uv.x) vr.x = uv.x;
if(vr.z > uv.z + 1) vr.z = uv.z + 1;
if(vr.x > uv.z) vr.z = vr.x + 1;
else if(vr.z < uv.x) vr.x = vr.z - 1;
else
{
if(vr.x < uv.x) vr.x = uv.x;
if(vr.z > uv.z + 1) vr.z = uv.z + 1;
}
break;
case CLAMP_REGION_REPEAT:
break;
@ -2625,8 +2645,13 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if(vr.y < uv.y) vr.y = uv.y;
if(vr.w > uv.w + 1) vr.w = uv.w + 1;
if(vr.y > uv.w) vr.w = vr.y + 1;
else if(vr.w < uv.y) vr.y = vr.w - 1;
else
{
if(vr.y < uv.y) vr.y = uv.y;
if(vr.w > uv.w + 1) vr.w = uv.w + 1;
}
break;
case CLAMP_REGION_REPEAT:
break;
@ -2682,7 +2707,6 @@ void GSState::GetAlphaMinMax()
a.w = max(env.TEXA.TA0, env.TEXA.TA1);
break;
case 3:
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
m_mem.m_clut.GetAlphaMinMax32(a.y, a.w);
break;
default:
@ -5355,7 +5379,7 @@ bool GSState::IsBadFrame(int& skip, int UserHacks_SkipDraw)
GetSkipCount gsc = map[m_game.title];
g_crc_region = m_game.region;
g_aggressive = userHacks_AggressiveCRC;
g_aggressive = UserHacks_AggressiveCRC;
#ifdef ENABLE_DYNAMIC_CRC_HACK
bool res=false; if(IsInvokedDynamicCrcHack(fi, skip, g_crc_region, res, m_crc)){ if( !res ) return false; } else

View File

@ -48,6 +48,7 @@ class GSState : public GSAlignedClass<32>
void GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r);
template<uint32 prim, uint32 adc> void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r);
template<uint32 prim, uint32 adc> void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r);
@ -77,6 +78,7 @@ class GSState : public GSAlignedClass<32>
void GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r);
void GIFRegHandlerST(const GIFReg* RESTRICT r);
void GIFRegHandlerUV(const GIFReg* RESTRICT r);
void GIFRegHandlerUV_Hack(const GIFReg* RESTRICT r);
template<uint32 prim, uint32 adc> void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r);
template<uint32 prim, uint32 adc> void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerTEX0(const GIFReg* RESTRICT r);
@ -138,8 +140,11 @@ class GSState : public GSAlignedClass<32>
protected:
bool IsBadFrame(int& skip, int UserHacks_SkipDraw);
int userHacks_AggressiveCRC;
int userHacks_DisableCrcHacks;
int UserHacks_AggressiveCRC;
int UserHacks_DisableCrcHacks;
int UserHacks_WildHack;
bool isPackedUV_HackFlag;
GSVertex m_v;
float m_q;
@ -196,8 +201,6 @@ public:
GSDump m_dump;
bool m_nativeres;
bool isPackedUV_HackFlag;
int s_n;
bool s_dump;
bool s_save;

View File

@ -62,7 +62,9 @@ const char* GSUtil::GetLibName()
sl.push_back(format("GCC %d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__));
#endif
#if _M_SSE >= 0x500
#if _M_SSE >= 0x501
sl.push_back("AVX2");
#elif _M_SSE >= 0x500
sl.push_back("AVX");
#elif _M_SSE >= 0x402
sl.push_back("SSE42");

View File

@ -73,6 +73,92 @@ const GSVector4 GSVector4::m_four(4.0f);
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
const GSVector4 GSVector4::m_x4f800000(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000)));
#if _M_SSE >= 0x500
const GSVector8 GSVector8::m_one(1.0f);
const GSVector8 GSVector8::m_x7fffffff(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)));
const GSVector8 GSVector8::m_x80000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)));
#endif
#if _M_SSE >= 0x501
const GSVector8i GSVector8i::m_xff[33] =
{
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
};
const GSVector8i GSVector8i::m_x0f[33] =
{
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
};
#endif
GSVector4i GSVector4i::fit(int arx, int ary) const
{
GSVector4i r = *this;

File diff suppressed because it is too large Load Diff

View File

@ -20,4 +20,4 @@
*/
#include "stdafx.h"
#include "GSVertexSW.h"
#include "GSVertexSW.h"

View File

@ -160,7 +160,7 @@
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\avx.props" />
<Import Project="vsprops\avx_vs10.props" />
<Import Project="vsprops\common.props" />
<Import Project="vsprops\release.props" />
<Import Project="vsprops\x86.props" />
@ -176,7 +176,7 @@
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\avx.props" />
<Import Project="vsprops\avx_vs10.props" />
<Import Project="vsprops\common.props" />
<Import Project="vsprops\debug.props" />
<Import Project="vsprops\x86.props" />
@ -224,7 +224,7 @@
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\avx.props" />
<Import Project="vsprops\avx_vs10.props" />
<Import Project="vsprops\common.props" />
<Import Project="vsprops\release.props" />
<Import Project="vsprops\x64.props" />
@ -240,7 +240,7 @@
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\avx.props" />
<Import Project="vsprops\avx_vs10.props" />
<Import Project="vsprops\common.props" />
<Import Project="vsprops\debug.props" />
<Import Project="vsprops\x64.props" />

View File

@ -1,6 +1,14 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug AVX2|Win32">
<Configuration>Debug AVX2</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug AVX2|x64">
<Configuration>Debug AVX2</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug AVX|Win32">
<Configuration>Debug AVX</Configuration>
<Platform>Win32</Platform>
@ -33,6 +41,14 @@
<Configuration>Debug SSSE3</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release AVX2|Win32">
<Configuration>Release AVX2</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release AVX2|x64">
<Configuration>Release AVX2</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release AVX|Win32">
<Configuration>Release AVX</Configuration>
<Platform>Win32</Platform>
@ -85,6 +101,12 @@
<WholeProgramOptimization>true</WholeProgramOptimization>
<PlatformToolset>v110</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<WholeProgramOptimization>true</WholeProgramOptimization>
<PlatformToolset>v110</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|Win32'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
@ -95,6 +117,11 @@
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v110</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v110</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
@ -129,6 +156,12 @@
<WholeProgramOptimization>true</WholeProgramOptimization>
<PlatformToolset>v110</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<WholeProgramOptimization>true</WholeProgramOptimization>
<PlatformToolset>v110</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
@ -139,6 +172,11 @@
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v110</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v110</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
@ -176,7 +214,15 @@
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\avx.props" />
<Import Project="vsprops\avx_vs11.props" />
<Import Project="vsprops\common.props" />
<Import Project="vsprops\release.props" />
<Import Project="vsprops\x86.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\avx2.props" />
<Import Project="vsprops\common.props" />
<Import Project="vsprops\release.props" />
<Import Project="vsprops\x86.props" />
@ -192,7 +238,15 @@
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\avx.props" />
<Import Project="vsprops\avx_vs11.props" />
<Import Project="vsprops\common.props" />
<Import Project="vsprops\debug.props" />
<Import Project="vsprops\x86.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\avx2.props" />
<Import Project="vsprops\common.props" />
<Import Project="vsprops\debug.props" />
<Import Project="vsprops\x86.props" />
@ -240,7 +294,15 @@
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\avx.props" />
<Import Project="vsprops\avx_vs11.props" />
<Import Project="vsprops\common.props" />
<Import Project="vsprops\release.props" />
<Import Project="vsprops\x64.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\avx2.props" />
<Import Project="vsprops\common.props" />
<Import Project="vsprops\release.props" />
<Import Project="vsprops\x64.props" />
@ -256,7 +318,15 @@
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\avx.props" />
<Import Project="vsprops\avx_vs11.props" />
<Import Project="vsprops\common.props" />
<Import Project="vsprops\debug.props" />
<Import Project="vsprops\x64.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="vsprops\ProjectRootDir.props" />
<Import Project="vsprops\avx2.props" />
<Import Project="vsprops\common.props" />
<Import Project="vsprops\debug.props" />
<Import Project="vsprops\x64.props" />
@ -387,6 +457,15 @@
<TargetMachine>MachineX86</TargetMachine>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
</ClCompile>
<Link>
<ModuleDefinitionFile>.\GSdx.def</ModuleDefinitionFile>
<TargetMachine>MachineX86</TargetMachine>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
@ -411,6 +490,18 @@
</DataExecutionPrevention>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
</Midl>
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
</ClCompile>
<Link>
<DataExecutionPrevention>
</DataExecutionPrevention>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
@ -429,6 +520,15 @@
<TargetMachine>MachineX86</TargetMachine>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
</ClCompile>
<Link>
<ModuleDefinitionFile>.\GSdx.def</ModuleDefinitionFile>
<TargetMachine>MachineX86</TargetMachine>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
@ -453,6 +553,18 @@
</DataExecutionPrevention>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
</Midl>
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
</ClCompile>
<Link>
<DataExecutionPrevention>
</DataExecutionPrevention>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="GLLoader.cpp" />
<ClCompile Include="GPU.cpp" />
@ -485,10 +597,12 @@
<ClCompile Include="GSDrawScanlineCodeGenerator.cpp" />
<ClCompile Include="GSDrawScanlineCodeGenerator.x64.avx.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">true</ExcludedFromBuild>
@ -501,22 +615,28 @@
</ClCompile>
<ClCompile Include="GSDrawScanlineCodeGenerator.x64.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GSDrawScanlineCodeGenerator.x86.avx.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">true</ExcludedFromBuild>
@ -529,15 +649,19 @@
</ClCompile>
<ClCompile Include="GSDrawScanlineCodeGenerator.x86.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GSDump.cpp" />
<ClCompile Include="GSdx.cpp" />
@ -546,6 +670,8 @@
<ClCompile Include="GSPerfMon.cpp" />
<ClCompile Include="GSRasterizer.cpp">
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">AssemblyAndSourceCode</AssemblerOutput>
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">AssemblyAndSourceCode</AssemblerOutput>
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">AssemblyAndSourceCode</AssemblerOutput>
</ClCompile>
<ClCompile Include="GSRenderer.cpp" />
<ClCompile Include="GSRendererCS.cpp" />
@ -573,13 +699,19 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GSSetupPrimCodeGenerator.x64.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">true</ExcludedFromBuild>
@ -601,13 +733,19 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GSSetupPrimCodeGenerator.x86.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">true</ExcludedFromBuild>
@ -635,6 +773,7 @@
<ClCompile Include="GSUtil.cpp" />
<ClCompile Include="GSVector.cpp">
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">AssemblyAndSourceCode</AssemblerOutput>
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">AssemblyAndSourceCode</AssemblerOutput>
</ClCompile>
<ClCompile Include="GSVertexList.cpp" />
<ClCompile Include="GSVertexSW.cpp" />
@ -647,8 +786,10 @@
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
@ -657,8 +798,10 @@
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE2|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
@ -672,10 +815,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -692,10 +839,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -712,10 +863,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -732,10 +887,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -752,10 +911,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -772,10 +935,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -792,10 +959,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -812,10 +983,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -832,10 +1007,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -852,10 +1031,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -872,10 +1055,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -892,10 +1079,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -912,10 +1103,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -932,10 +1127,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -952,10 +1151,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -972,10 +1175,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -992,10 +1199,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -1012,10 +1223,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -1032,10 +1247,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -1052,10 +1271,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -1072,10 +1295,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -1092,10 +1319,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -1112,10 +1343,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -1132,10 +1367,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -1152,10 +1391,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -1172,10 +1415,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -1192,10 +1439,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -1212,10 +1463,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -1232,10 +1487,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -1252,10 +1511,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -1272,10 +1535,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -1292,10 +1559,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -1312,10 +1583,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -1332,10 +1607,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -1352,10 +1631,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -1372,10 +1655,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -1392,10 +1679,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -1412,10 +1703,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -1432,10 +1727,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -1452,10 +1751,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -1472,10 +1775,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -1492,10 +1799,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -1512,10 +1823,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -1532,10 +1847,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">
@ -1552,10 +1871,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">
@ -1572,10 +1895,14 @@
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release AVX2|x64'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">
</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">

View File

@ -0,0 +1,20 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup Label="UserMacros">
<SSEtype>AVX2</SSEtype>
</PropertyGroup>
<PropertyGroup>
<_ProjectFileVersion>10.0.30128.1</_ProjectFileVersion>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<PreprocessorDefinitions>_M_SSE=0x501;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
</ClCompile>
</ItemDefinitionGroup>
<ItemGroup>
<BuildMacro Include="SSEtype">
<Value>$(SSEtype)</Value>
</BuildMacro>
</ItemGroup>
</Project>

View File

@ -9,7 +9,7 @@
<ItemDefinitionGroup>
<ClCompile>
<PreprocessorDefinitions>_M_SSE=0x500;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalOptions>/arch:AVX %(AdditionalOptions)</AdditionalOptions>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
</ClCompile>
</ItemDefinitionGroup>
<ItemGroup>

View File

@ -0,0 +1,20 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup Label="UserMacros">
<SSEtype>AVX</SSEtype>
</PropertyGroup>
<PropertyGroup>
<_ProjectFileVersion>10.0.30128.1</_ProjectFileVersion>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<PreprocessorDefinitions>_M_SSE=0x500;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
</ClCompile>
</ItemDefinitionGroup>
<ItemGroup>
<BuildMacro Include="SSEtype">
<Value>$(SSEtype)</Value>
</BuildMacro>
</ItemGroup>
</Project>

View File

@ -14,7 +14,7 @@
<WarningLevel>Level4</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<DisableSpecificWarnings>4996;4995;4324;4100;4101;4201;4556;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<AdditionalIncludeDirectories>$(DXSDK_DIR)include;$(VTUNE_AMPLIFIER_XE_2011_DIR)include;$(SolutionDir)3rdparty;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(DXSDK_DIR)include;$(VTUNE_AMPLIFIER_XE_2013_DIR)include;$(SolutionDir)3rdparty;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
</ClCompile>
<Link>
@ -23,7 +23,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Windows</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<AdditionalLibraryDirectories>$(VTUNE_AMPLIFIER_XE_2011_DIR)lib32;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories>$(VTUNE_AMPLIFIER_XE_2013_DIR)lib32;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
</Link>
<PostBuildEvent>
<Command>.\postBuild.cmd "$(TargetPath)" "$(TargetName)" $(TargetExt) $(PcsxSubsection)</Command>

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
const char *getVersionString() const { return "2.991"; }
const char *getVersionString() const { return "4.00"; }
void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); }
void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); }
void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); }
@ -272,8 +272,16 @@ void setnle(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 15); }
void cmovg(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); }
void jg(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }
void setg(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 15); }
#ifdef XBYAK32
void jcxz(const char *label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
void jecxz(const char *label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
#else
void jecxz(const char *label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
void jrcxz(const char *label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
#endif
#ifdef XBYAK64
void cdqe() { db(0x48); db(0x98); }
void cqo() { db(0x48); db(0x99); }
#else
void aaa() { db(0x37); }
void aad() { db(0xD5); db(0x0A); }
@ -315,7 +323,9 @@ void rdmsr() { db(0x0F); db(0x32); }
void rdpmc() { db(0x0F); db(0x33); }
void rdtsc() { db(0x0F); db(0x31); }
void rdtscp() { db(0x0F); db(0x01); db(0xF9); }
void ud2() { db(0x0F); db(0x0B); }
void wait() { db(0x9B); }
void fwait() { db(0x9B); }
void wbinvd() { db(0x0F); db(0x09); }
void wrmsr() { db(0x0F); db(0x30); }
void xlatb() { db(0xD7); }
@ -336,6 +346,8 @@ void fdecstp() { db(0xD9); db(0xF6); }
void fdivp() { db(0xDE); db(0xF9); }
void fdivrp() { db(0xDE); db(0xF1); }
void fincstp() { db(0xD9); db(0xF7); }
void finit() { db(0x9B); db(0xDB); db(0xE3); }
void fninit() { db(0xDB); db(0xE3); }
void fld1() { db(0xD9); db(0xE8); }
void fldl2t() { db(0xD9); db(0xE9); }
void fldl2e() { db(0xD9); db(0xEA); }
@ -369,18 +381,30 @@ void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
void add(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x00); }
void add(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x00, 0); }
void and_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x20); }
void and_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x20, 4); }
#ifndef XBYAK_NO_OP_NAMES
void and(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x20); }
void and(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x20, 4); }
#endif
void cmp(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x38); }
void cmp(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x38, 7); }
void or_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); }
void or_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); }
#ifndef XBYAK_NO_OP_NAMES
void or(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); }
void or(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); }
#endif
void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); }
void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); }
void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); }
void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); }
void xor_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); }
void xor_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); }
#ifndef XBYAK_NO_OP_NAMES
void xor(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); }
void xor(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); }
#endif
void dec(const Operand& op) { opIncDec(op, 0x48, 1); }
void inc(const Operand& op) { opIncDec(op, 0x40, 0); }
void div(const Operand& op) { opR_ModM(op, 0, 6, 0xF6); }
@ -388,7 +412,10 @@ void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); }
void imul(const Operand& op) { opR_ModM(op, 0, 5, 0xF6); }
void mul(const Operand& op) { opR_ModM(op, 0, 4, 0xF6); }
void neg(const Operand& op) { opR_ModM(op, 0, 3, 0xF6); }
void not_(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); }
#ifndef XBYAK_NO_OP_NAMES
void not(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); }
#endif
void rcl(const Operand& op, int imm) { opShift(op, imm, 2); }
void rcl(const Operand& op, const Reg8& cl) { opShift(op, cl, 2); }
void rcr(const Operand& op, int imm) { opShift(op, imm, 3); }
@ -411,6 +438,9 @@ void shrd(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0
void shrd(const Operand& op, const Reg& reg, const Reg8& cl) { opShxd(op, reg, 0, 0xAC, &cl); }
void bsf(const Reg&reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBC); }
void bsr(const Reg&reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); }
void popcnt(const Reg&reg, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xB8); }
void tzcnt(const Reg&reg, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBC); }
void lzcnt(const Reg&reg, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); }
void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, 0x66, NONE, 0x38); }
void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, 0x66, NONE, 0x38); }
void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, 0x66, NONE, 0x38); }
@ -478,9 +508,15 @@ void pcmpistrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x62
void pcmpistri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void pclmulqdq(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x44, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void aeskeygenassist(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void pclmullqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x00); }
void pclmulhqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x01); }
void pclmullqhdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x10); }
void pclmulhqhdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x11); }
void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); }
void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); }
void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); }
void fldcw(const Address& addr) { opModM(addr, Reg32(5), 0xD9, 0x100); }
void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, NONE); }
void movntpd(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x2B); }
void movntdq(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0xE7); }
void movsx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xBE); }
@ -509,29 +545,53 @@ void fisub(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 4, 0); }
void fsubr(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 5, 0); }
void fisubr(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 5, 0); }
void fadd(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C0, 0xDCC0); }
void fadd(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8C0, 0xDCC0); }
void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); }
void faddp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC0); }
void fcmovb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); }
void fcmovb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC0, 0x00C0); }
void fcmove(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC8, 0x00C8); }
void fcmove(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC8, 0x00C8); }
void fcmovbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD0, 0x00D0); }
void fcmovbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD0, 0x00D0); }
void fcmovu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD8, 0x00D8); }
void fcmovu(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD8, 0x00D8); }
void fcmovnb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC0, 0x00C0); }
void fcmovnb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBC0, 0x00C0); }
void fcmovne(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC8, 0x00C8); }
void fcmovne(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBC8, 0x00C8); }
void fcmovnbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD0, 0x00D0); }
void fcmovnbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBD0, 0x00D0); }
void fcmovnu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD8, 0x00D8); }
void fcmovnu(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBD8, 0x00D8); }
void fcomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBF0, 0x00F0); }
void fcomi(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBF0, 0x00F0); }
void fcomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFF0, 0x00F0); }
void fcomip(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDFF0, 0x00F0); }
void fucomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBE8, 0x00E8); }
void fucomi(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBE8, 0x00E8); }
void fucomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFE8, 0x00E8); }
void fucomip(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDFE8, 0x00E8); }
void fdiv(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F0, 0xDCF8); }
void fdiv(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8F0, 0xDCF8); }
void fdivp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF8); }
void fdivp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEF8); }
void fdivr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F8, 0xDCF0); }
void fdivr(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8F8, 0xDCF0); }
void fdivrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF0); }
void fdivrp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEF0); }
void fmul(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C8, 0xDCC8); }
void fmul(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8C8, 0xDCC8); }
void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); }
void fmulp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC8); }
void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); }
void fsub(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E0, 0xDCE8); }
void fsubp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE8); }
void fsubp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEE8); }
void fsubr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E8, 0xDCE0); }
void fsubr(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E8, 0xDCE0); }
void fsubrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE0); }
void fsubrp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEE0); }
void fcom(const Fpu& reg) { opFpu(reg, 0xD8, 0xD0); }
void fcomp(const Fpu& reg) { opFpu(reg, 0xD8, 0xD8); }
void ffree(const Fpu& reg) { opFpu(reg, 0xDD, 0xC0); }
@ -581,10 +641,12 @@ void vdppd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX
void vdppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x41, false, 0); db(imm); }
void vdpps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); }
void vdpps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); }
void vmpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x42, false, 0); db(imm); }
void vmpsadbw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x42, false, 0); db(imm); }
void vpblendw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0E, false, 0); db(imm); }
void vpblendw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0E, false, 0); db(imm); }
void vmpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x42, true, 0); db(imm); }
void vmpsadbw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x42, true, 0); db(imm); }
void vpblendw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0E, true, 0); db(imm); }
void vpblendw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0E, true, 0); db(imm); }
void vpblendd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x02, true, 0); db(imm); }
void vpblendd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x02, true, 0); db(imm); }
void vroundsd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); }
void vroundsd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); }
void vroundss(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0A, false, 0); db(imm); }
@ -593,6 +655,11 @@ void vpclmulqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) {
void vpclmulqdq(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x44, false, 0); db(imm); }
void vpermilps(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0C, true, 0); }
void vpermilpd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0D, true, 0); }
void vpsllvd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x47, true, 0); }
void vpsllvq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x47, true, 1); }
void vpsravd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x46, true, 0); }
void vpsrlvd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x45, true, 0); }
void vpsrlvq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x45, true, 1); }
void vcmppd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xC2, true, -1); db(imm); }
void vcmppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xC2, true, -1); db(imm); }
void vcmpps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F, 0xC2, true, -1); db(imm); }
@ -607,121 +674,121 @@ void vcvtss2sd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM
void vcvtss2sd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x5A, false, -1); }
void vinsertps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x21, false, 0); db(imm); }
void vinsertps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x21, false, 0); db(imm); }
void vpacksswb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x63, false, -1); }
void vpacksswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x63, false, -1); }
void vpackssdw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6B, false, -1); }
void vpackssdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6B, false, -1); }
void vpackuswb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x67, false, -1); }
void vpackuswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x67, false, -1); }
void vpackusdw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x2B, false, -1); }
void vpackusdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x2B, false, -1); }
void vpaddb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFC, false, -1); }
void vpaddb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFC, false, -1); }
void vpaddw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFD, false, -1); }
void vpaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFD, false, -1); }
void vpaddd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFE, false, -1); }
void vpaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFE, false, -1); }
void vpaddq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD4, false, -1); }
void vpaddq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD4, false, -1); }
void vpaddsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEC, false, -1); }
void vpaddsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEC, false, -1); }
void vpaddsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xED, false, -1); }
void vpaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xED, false, -1); }
void vpaddusb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDC, false, -1); }
void vpaddusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDC, false, -1); }
void vpaddusw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDD, false, -1); }
void vpaddusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDD, false, -1); }
void vpalignr(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0F, false, -1); db(imm); }
void vpalignr(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0F, false, -1); db(imm); }
void vpand(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDB, false, -1); }
void vpand(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDB, false, -1); }
void vpandn(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDF, false, -1); }
void vpandn(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDF, false, -1); }
void vpavgb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE0, false, -1); }
void vpavgb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE0, false, -1); }
void vpavgw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE3, false, -1); }
void vpavgw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE3, false, -1); }
void vpcmpeqb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x74, false, -1); }
void vpcmpeqb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x74, false, -1); }
void vpcmpeqw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x75, false, -1); }
void vpcmpeqw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x75, false, -1); }
void vpcmpeqd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x76, false, -1); }
void vpcmpeqd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x76, false, -1); }
void vpcmpeqq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x29, false, -1); }
void vpcmpeqq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x29, false, -1); }
void vpcmpgtb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x64, false, -1); }
void vpcmpgtb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x64, false, -1); }
void vpcmpgtw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x65, false, -1); }
void vpcmpgtw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x65, false, -1); }
void vpcmpgtd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x66, false, -1); }
void vpcmpgtd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x66, false, -1); }
void vpcmpgtq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x37, false, -1); }
void vpcmpgtq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x37, false, -1); }
void vphaddw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x01, false, -1); }
void vphaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x01, false, -1); }
void vphaddd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x02, false, -1); }
void vphaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x02, false, -1); }
void vphaddsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x03, false, -1); }
void vphaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x03, false, -1); }
void vphsubw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x05, false, -1); }
void vphsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x05, false, -1); }
void vphsubd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x06, false, -1); }
void vphsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x06, false, -1); }
void vphsubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x07, false, -1); }
void vphsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x07, false, -1); }
void vpmaddwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF5, false, -1); }
void vpmaddwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF5, false, -1); }
void vpmaddubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x04, false, -1); }
void vpmaddubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x04, false, -1); }
void vpmaxsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3C, false, -1); }
void vpmaxsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3C, false, -1); }
void vpmaxsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEE, false, -1); }
void vpmaxsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEE, false, -1); }
void vpmaxsd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3D, false, -1); }
void vpmaxsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3D, false, -1); }
void vpmaxub(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDE, false, -1); }
void vpmaxub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDE, false, -1); }
void vpmaxuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3E, false, -1); }
void vpmaxuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3E, false, -1); }
void vpmaxud(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3F, false, -1); }
void vpmaxud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3F, false, -1); }
void vpminsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x38, false, -1); }
void vpminsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x38, false, -1); }
void vpminsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEA, false, -1); }
void vpminsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEA, false, -1); }
void vpminsd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x39, false, -1); }
void vpminsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x39, false, -1); }
void vpminub(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDA, false, -1); }
void vpminub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDA, false, -1); }
void vpminuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3A, false, -1); }
void vpminuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3A, false, -1); }
void vpminud(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3B, false, -1); }
void vpminud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3B, false, -1); }
void vpmulhuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE4, false, -1); }
void vpmulhuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE4, false, -1); }
void vpmulhrsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0B, false, -1); }
void vpmulhrsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0B, false, -1); }
void vpmulhw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE5, false, -1); }
void vpmulhw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE5, false, -1); }
void vpmullw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD5, false, -1); }
void vpmullw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD5, false, -1); }
void vpmulld(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x40, false, -1); }
void vpmulld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x40, false, -1); }
void vpacksswb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x63, true, -1); }
void vpacksswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x63, true, -1); }
void vpackssdw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6B, true, -1); }
void vpackssdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6B, true, -1); }
void vpackuswb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x67, true, -1); }
void vpackuswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x67, true, -1); }
void vpackusdw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x2B, true, -1); }
void vpackusdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x2B, true, -1); }
void vpaddb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFC, true, -1); }
void vpaddb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFC, true, -1); }
void vpaddw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFD, true, -1); }
void vpaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFD, true, -1); }
void vpaddd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFE, true, -1); }
void vpaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFE, true, -1); }
void vpaddq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD4, true, -1); }
void vpaddq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD4, true, -1); }
void vpaddsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEC, true, -1); }
void vpaddsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEC, true, -1); }
void vpaddsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xED, true, -1); }
void vpaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xED, true, -1); }
void vpaddusb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDC, true, -1); }
void vpaddusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDC, true, -1); }
void vpaddusw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDD, true, -1); }
void vpaddusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDD, true, -1); }
void vpalignr(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0F, true, -1); db(imm); }
void vpalignr(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0F, true, -1); db(imm); }
void vpand(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDB, true, -1); }
void vpand(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDB, true, -1); }
void vpandn(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDF, true, -1); }
void vpandn(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDF, true, -1); }
void vpavgb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE0, true, -1); }
void vpavgb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE0, true, -1); }
void vpavgw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE3, true, -1); }
void vpavgw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE3, true, -1); }
void vpcmpeqb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x74, true, -1); }
void vpcmpeqb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x74, true, -1); }
void vpcmpeqw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x75, true, -1); }
void vpcmpeqw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x75, true, -1); }
void vpcmpeqd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x76, true, -1); }
void vpcmpeqd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x76, true, -1); }
void vpcmpeqq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x29, true, -1); }
void vpcmpeqq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x29, true, -1); }
void vpcmpgtb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x64, true, -1); }
void vpcmpgtb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x64, true, -1); }
void vpcmpgtw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x65, true, -1); }
void vpcmpgtw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x65, true, -1); }
void vpcmpgtd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x66, true, -1); }
void vpcmpgtd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x66, true, -1); }
void vpcmpgtq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x37, true, -1); }
void vpcmpgtq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x37, true, -1); }
void vphaddw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x01, true, -1); }
void vphaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x01, true, -1); }
void vphaddd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x02, true, -1); }
void vphaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x02, true, -1); }
void vphaddsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x03, true, -1); }
void vphaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x03, true, -1); }
void vphsubw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x05, true, -1); }
void vphsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x05, true, -1); }
void vphsubd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x06, true, -1); }
void vphsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x06, true, -1); }
void vphsubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x07, true, -1); }
void vphsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x07, true, -1); }
void vpmaddwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF5, true, -1); }
void vpmaddwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF5, true, -1); }
void vpmaddubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x04, true, -1); }
void vpmaddubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x04, true, -1); }
void vpmaxsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3C, true, -1); }
void vpmaxsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3C, true, -1); }
void vpmaxsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEE, true, -1); }
void vpmaxsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEE, true, -1); }
void vpmaxsd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3D, true, -1); }
void vpmaxsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3D, true, -1); }
void vpmaxub(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDE, true, -1); }
void vpmaxub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDE, true, -1); }
void vpmaxuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3E, true, -1); }
void vpmaxuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3E, true, -1); }
void vpmaxud(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3F, true, -1); }
void vpmaxud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3F, true, -1); }
void vpminsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x38, true, -1); }
void vpminsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x38, true, -1); }
void vpminsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEA, true, -1); }
void vpminsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEA, true, -1); }
void vpminsd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x39, true, -1); }
void vpminsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x39, true, -1); }
void vpminub(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDA, true, -1); }
void vpminub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDA, true, -1); }
void vpminuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3A, true, -1); }
void vpminuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3A, true, -1); }
void vpminud(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3B, true, -1); }
void vpminud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3B, true, -1); }
void vpmulhuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE4, true, -1); }
void vpmulhuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE4, true, -1); }
void vpmulhrsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0B, true, -1); }
void vpmulhrsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0B, true, -1); }
void vpmulhw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE5, true, -1); }
void vpmulhw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE5, true, -1); }
void vpmullw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD5, true, -1); }
void vpmullw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD5, true, -1); }
void vpmulld(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x40, true, -1); }
void vpmulld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x40, true, -1); }
void vpmuludq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF4, false, -1); }
void vpmuludq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF4, false, -1); }
void vpmuldq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x28, false, -1); }
void vpmuldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x28, false, -1); }
void vpor(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEB, false, -1); }
void vpor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEB, false, -1); }
void vpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF6, false, -1); }
void vpsadbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF6, false, -1); }
void vpshufb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x00, false, -1); }
void vpsignb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x08, false, -1); }
void vpsignb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x08, false, -1); }
void vpsignw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x09, false, -1); }
void vpsignw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x09, false, -1); }
void vpsignd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0A, false, -1); }
void vpsignd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0A, false, -1); }
void vpmuldq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x28, true, -1); }
void vpmuldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x28, true, -1); }
void vpor(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEB, true, -1); }
void vpor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEB, true, -1); }
void vpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF6, true, -1); }
void vpsadbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF6, true, -1); }
void vpshufb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x00, true, -1); }
void vpsignb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x08, true, -1); }
void vpsignb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x08, true, -1); }
void vpsignw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x09, true, -1); }
void vpsignw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x09, true, -1); }
void vpsignd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0A, true, -1); }
void vpsignd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0A, true, -1); }
void vpsllw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF1, false, -1); }
void vpsllw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF1, false, -1); }
void vpslld(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF2, false, -1); }
@ -738,40 +805,40 @@ void vpsrld(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm
void vpsrld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD2, false, -1); }
void vpsrlq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD3, false, -1); }
void vpsrlq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD3, false, -1); }
void vpsubb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF8, false, -1); }
void vpsubb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF8, false, -1); }
void vpsubw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF9, false, -1); }
void vpsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF9, false, -1); }
void vpsubd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFA, false, -1); }
void vpsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFA, false, -1); }
void vpsubq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFB, false, -1); }
void vpsubq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFB, false, -1); }
void vpsubsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE8, false, -1); }
void vpsubsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE8, false, -1); }
void vpsubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE9, false, -1); }
void vpsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE9, false, -1); }
void vpsubusb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD8, false, -1); }
void vpsubusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD8, false, -1); }
void vpsubusw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD9, false, -1); }
void vpsubusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD9, false, -1); }
void vpunpckhbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x68, false, -1); }
void vpunpckhbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x68, false, -1); }
void vpunpckhwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x69, false, -1); }
void vpunpckhwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x69, false, -1); }
void vpunpckhdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6A, false, -1); }
void vpunpckhdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6A, false, -1); }
void vpunpckhqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6D, false, -1); }
void vpunpckhqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6D, false, -1); }
void vpunpcklbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x60, false, -1); }
void vpunpcklbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x60, false, -1); }
void vpunpcklwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x61, false, -1); }
void vpunpcklwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x61, false, -1); }
void vpunpckldq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x62, false, -1); }
void vpunpckldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x62, false, -1); }
void vpunpcklqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6C, false, -1); }
void vpunpcklqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6C, false, -1); }
void vpxor(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEF, false, -1); }
void vpxor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEF, false, -1); }
void vpsubb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF8, true, -1); }
void vpsubb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF8, true, -1); }
void vpsubw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF9, true, -1); }
void vpsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF9, true, -1); }
void vpsubd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFA, true, -1); }
void vpsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFA, true, -1); }
void vpsubq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFB, true, -1); }
void vpsubq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFB, true, -1); }
void vpsubsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE8, true, -1); }
void vpsubsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE8, true, -1); }
void vpsubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE9, true, -1); }
void vpsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE9, true, -1); }
void vpsubusb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD8, true, -1); }
void vpsubusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD8, true, -1); }
void vpsubusw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD9, true, -1); }
void vpsubusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD9, true, -1); }
void vpunpckhbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x68, true, -1); }
void vpunpckhbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x68, true, -1); }
void vpunpckhwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x69, true, -1); }
void vpunpckhwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x69, true, -1); }
void vpunpckhdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6A, true, -1); }
void vpunpckhdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6A, true, -1); }
void vpunpckhqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6D, true, -1); }
void vpunpckhqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6D, true, -1); }
void vpunpcklbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x60, true, -1); }
void vpunpcklbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x60, true, -1); }
void vpunpcklwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x61, true, -1); }
void vpunpcklwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x61, true, -1); }
void vpunpckldq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x62, true, -1); }
void vpunpckldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x62, true, -1); }
void vpunpcklqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6C, true, -1); }
void vpunpcklqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6C, true, -1); }
void vpxor(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEF, true, -1); }
void vpxor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEF, true, -1); }
void vrcpss(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F3, 0x53, false, -1); }
void vrcpss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x53, false, -1); }
void vrsqrtss(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F3, 0x52, false, -1); }
@ -817,25 +884,25 @@ void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F
void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x12, true, -1); }
void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x10, true, -1); }
void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x10, true, -1); }
void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1C, false, -1); }
void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1D, false, -1); }
void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1E, false, -1); }
void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1C, true, -1); }
void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1D, true, -1); }
void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1E, true, -1); }
void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x41, false, -1); }
void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x20, false, -1); }
void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x21, false, -1); }
void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x22, false, -1); }
void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x23, false, -1); }
void vpmovsxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x24, false, -1); }
void vpmovsxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x25, false, -1); }
void vpmovzxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x30, false, -1); }
void vpmovzxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x31, false, -1); }
void vpmovzxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x32, false, -1); }
void vpmovzxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x33, false, -1); }
void vpmovzxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x34, false, -1); }
void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x35, false, -1); }
void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x70, false, -1, imm); }
void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x70, false, -1, imm); }
void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x70, false, -1, imm); }
void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x20, true, -1); }
void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x21, true, -1); }
void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x22, true, -1); }
void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x23, true, -1); }
void vpmovsxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x24, true, -1); }
void vpmovsxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x25, true, -1); }
void vpmovzxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x30, true, -1); }
void vpmovzxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x31, true, -1); }
void vpmovzxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x32, true, -1); }
void vpmovzxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x33, true, -1); }
void vpmovzxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x34, true, -1); }
void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x35, true, -1); }
void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x70, true, -1, imm); }
void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x70, true, -1, imm); }
void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x70, true, -1, imm); }
void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x17, false, -1); }
void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x53, true, -1); }
void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x52, true, -1); }
@ -863,6 +930,302 @@ void vmaskmovps(const Xmm& xm1, const Xmm& xm2, const Address& addr) { opAVX_X_X
void vmaskmovps(const Address& addr, const Xmm& xm1, const Xmm& xm2) { opAVX_X_X_XM(xm2, xm1, addr, MM_0F38 | PP_66, 0x2E, true, 0); }
void vmaskmovpd(const Xmm& xm1, const Xmm& xm2, const Address& addr) { opAVX_X_X_XM(xm1, xm2, addr, MM_0F38 | PP_66, 0x2D, true, 0); }
void vmaskmovpd(const Address& addr, const Xmm& xm1, const Xmm& xm2) { opAVX_X_X_XM(xm2, xm1, addr, MM_0F38 | PP_66, 0x2F, true, 0); }
void vpmaskmovd(const Xmm& xm1, const Xmm& xm2, const Address& addr) { opAVX_X_X_XM(xm1, xm2, addr, MM_0F38 | PP_66, 0x8C, true, 0); }
void vpmaskmovd(const Address& addr, const Xmm& xm1, const Xmm& xm2) { opAVX_X_X_XM(xm2, xm1, addr, MM_0F38 | PP_66, 0x8E, true, 0); }
void vpmaskmovq(const Xmm& xm1, const Xmm& xm2, const Address& addr) { opAVX_X_X_XM(xm1, xm2, addr, MM_0F38 | PP_66, 0x8C, true, 1); }
void vpmaskmovq(const Address& addr, const Xmm& xm1, const Xmm& xm2) { opAVX_X_X_XM(xm2, xm1, addr, MM_0F38 | PP_66, 0x8E, true, 1); }
void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, MM_0F38 | PP_66, 0x36, true, 0); }
void vpermps(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, MM_0F38 | PP_66, 0x16, true, 0); }
void vpermq(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, MM_0F3A | PP_66, 0x00, true, 1, imm); }
void vpermpd(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, MM_0F3A | PP_66, 0x01, true, 1, imm); }
void cmpeqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 0); }
void vcmpeqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 0); }
void vcmpeqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 0); }
void cmpltpd(const Xmm& x, const Operand& op) { cmppd(x, op, 1); }
void vcmpltpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 1); }
void vcmpltpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 1); }
void cmplepd(const Xmm& x, const Operand& op) { cmppd(x, op, 2); }
void vcmplepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 2); }
void vcmplepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 2); }
void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); }
void vcmpunordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 3); }
void vcmpunordpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 3); }
void cmpneqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 4); }
void vcmpneqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 4); }
void vcmpneqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 4); }
void cmpnltpd(const Xmm& x, const Operand& op) { cmppd(x, op, 5); }
void vcmpnltpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 5); }
void vcmpnltpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 5); }
void cmpnlepd(const Xmm& x, const Operand& op) { cmppd(x, op, 6); }
void vcmpnlepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 6); }
void vcmpnlepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 6); }
void cmpordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 7); }
void vcmpordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 7); }
void vcmpordpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 7); }
void vcmpeq_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 8); }
void vcmpeq_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 8); }
void vcmpngepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 9); }
void vcmpngepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 9); }
void vcmpngtpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 10); }
void vcmpngtpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 10); }
void vcmpfalsepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 11); }
void vcmpfalsepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 11); }
void vcmpneq_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 12); }
void vcmpneq_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 12); }
void vcmpgepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 13); }
void vcmpgepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 13); }
void vcmpgtpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 14); }
void vcmpgtpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 14); }
void vcmptruepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 15); }
void vcmptruepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 15); }
void vcmpeq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 16); }
void vcmpeq_ospd(const Xmm& x, const Operand& op) { vcmppd(x, op, 16); }
void vcmplt_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 17); }
void vcmplt_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 17); }
void vcmple_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 18); }
void vcmple_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 18); }
void vcmpunord_spd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 19); }
void vcmpunord_spd(const Xmm& x, const Operand& op) { vcmppd(x, op, 19); }
void vcmpneq_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 20); }
void vcmpneq_uspd(const Xmm& x, const Operand& op) { vcmppd(x, op, 20); }
void vcmpnlt_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 21); }
void vcmpnlt_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 21); }
void vcmpnle_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 22); }
void vcmpnle_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 22); }
void vcmpord_spd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 23); }
void vcmpord_spd(const Xmm& x, const Operand& op) { vcmppd(x, op, 23); }
void vcmpeq_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 24); }
void vcmpeq_uspd(const Xmm& x, const Operand& op) { vcmppd(x, op, 24); }
void vcmpnge_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 25); }
void vcmpnge_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 25); }
void vcmpngt_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 26); }
void vcmpngt_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 26); }
void vcmpfalse_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 27); }
void vcmpfalse_ospd(const Xmm& x, const Operand& op) { vcmppd(x, op, 27); }
void vcmpneq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 28); }
void vcmpneq_ospd(const Xmm& x, const Operand& op) { vcmppd(x, op, 28); }
void vcmpge_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 29); }
void vcmpge_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 29); }
void vcmpgt_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 30); }
void vcmpgt_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 30); }
void vcmptrue_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 31); }
void vcmptrue_uspd(const Xmm& x, const Operand& op) { vcmppd(x, op, 31); }
void cmpeqps(const Xmm& x, const Operand& op) { cmpps(x, op, 0); }
void vcmpeqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 0); }
void vcmpeqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 0); }
void cmpltps(const Xmm& x, const Operand& op) { cmpps(x, op, 1); }
void vcmpltps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 1); }
void vcmpltps(const Xmm& x, const Operand& op) { vcmpps(x, op, 1); }
void cmpleps(const Xmm& x, const Operand& op) { cmpps(x, op, 2); }
void vcmpleps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 2); }
void vcmpleps(const Xmm& x, const Operand& op) { vcmpps(x, op, 2); }
void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); }
void vcmpunordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 3); }
void vcmpunordps(const Xmm& x, const Operand& op) { vcmpps(x, op, 3); }
void cmpneqps(const Xmm& x, const Operand& op) { cmpps(x, op, 4); }
void vcmpneqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 4); }
void vcmpneqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 4); }
void cmpnltps(const Xmm& x, const Operand& op) { cmpps(x, op, 5); }
void vcmpnltps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 5); }
void vcmpnltps(const Xmm& x, const Operand& op) { vcmpps(x, op, 5); }
void cmpnleps(const Xmm& x, const Operand& op) { cmpps(x, op, 6); }
void vcmpnleps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 6); }
void vcmpnleps(const Xmm& x, const Operand& op) { vcmpps(x, op, 6); }
void cmpordps(const Xmm& x, const Operand& op) { cmpps(x, op, 7); }
void vcmpordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 7); }
void vcmpordps(const Xmm& x, const Operand& op) { vcmpps(x, op, 7); }
void vcmpeq_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 8); }
void vcmpeq_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 8); }
void vcmpngeps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 9); }
void vcmpngeps(const Xmm& x, const Operand& op) { vcmpps(x, op, 9); }
void vcmpngtps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 10); }
void vcmpngtps(const Xmm& x, const Operand& op) { vcmpps(x, op, 10); }
void vcmpfalseps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 11); }
void vcmpfalseps(const Xmm& x, const Operand& op) { vcmpps(x, op, 11); }
void vcmpneq_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 12); }
void vcmpneq_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 12); }
void vcmpgeps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 13); }
void vcmpgeps(const Xmm& x, const Operand& op) { vcmpps(x, op, 13); }
void vcmpgtps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 14); }
void vcmpgtps(const Xmm& x, const Operand& op) { vcmpps(x, op, 14); }
void vcmptrueps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 15); }
void vcmptrueps(const Xmm& x, const Operand& op) { vcmpps(x, op, 15); }
void vcmpeq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 16); }
void vcmpeq_osps(const Xmm& x, const Operand& op) { vcmpps(x, op, 16); }
void vcmplt_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 17); }
void vcmplt_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 17); }
void vcmple_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 18); }
void vcmple_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 18); }
void vcmpunord_sps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 19); }
void vcmpunord_sps(const Xmm& x, const Operand& op) { vcmpps(x, op, 19); }
void vcmpneq_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 20); }
void vcmpneq_usps(const Xmm& x, const Operand& op) { vcmpps(x, op, 20); }
void vcmpnlt_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 21); }
void vcmpnlt_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 21); }
void vcmpnle_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 22); }
void vcmpnle_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 22); }
void vcmpord_sps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 23); }
void vcmpord_sps(const Xmm& x, const Operand& op) { vcmpps(x, op, 23); }
void vcmpeq_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 24); }
void vcmpeq_usps(const Xmm& x, const Operand& op) { vcmpps(x, op, 24); }
void vcmpnge_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 25); }
void vcmpnge_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 25); }
void vcmpngt_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 26); }
void vcmpngt_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 26); }
void vcmpfalse_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 27); }
void vcmpfalse_osps(const Xmm& x, const Operand& op) { vcmpps(x, op, 27); }
void vcmpneq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 28); }
void vcmpneq_osps(const Xmm& x, const Operand& op) { vcmpps(x, op, 28); }
void vcmpge_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 29); }
void vcmpge_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 29); }
void vcmpgt_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 30); }
void vcmpgt_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 30); }
void vcmptrue_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 31); }
void vcmptrue_usps(const Xmm& x, const Operand& op) { vcmpps(x, op, 31); }
void cmpeqsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 0); }
void vcmpeqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 0); }
void vcmpeqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 0); }
void cmpltsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 1); }
void vcmpltsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 1); }
void vcmpltsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 1); }
void cmplesd(const Xmm& x, const Operand& op) { cmpsd(x, op, 2); }
void vcmplesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 2); }
void vcmplesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 2); }
void cmpunordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 3); }
void vcmpunordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 3); }
void vcmpunordsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 3); }
void cmpneqsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 4); }
void vcmpneqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 4); }
void vcmpneqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 4); }
void cmpnltsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 5); }
void vcmpnltsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 5); }
void vcmpnltsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 5); }
void cmpnlesd(const Xmm& x, const Operand& op) { cmpsd(x, op, 6); }
void vcmpnlesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 6); }
void vcmpnlesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 6); }
void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); }
void vcmpordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 7); }
void vcmpordsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 7); }
void vcmpeq_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 8); }
void vcmpeq_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 8); }
void vcmpngesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 9); }
void vcmpngesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 9); }
void vcmpngtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 10); }
void vcmpngtsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 10); }
void vcmpfalsesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 11); }
void vcmpfalsesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 11); }
void vcmpneq_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 12); }
void vcmpneq_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 12); }
void vcmpgesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 13); }
void vcmpgesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 13); }
void vcmpgtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 14); }
void vcmpgtsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 14); }
void vcmptruesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 15); }
void vcmptruesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 15); }
void vcmpeq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 16); }
void vcmpeq_ossd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 16); }
void vcmplt_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 17); }
void vcmplt_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 17); }
void vcmple_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 18); }
void vcmple_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 18); }
void vcmpunord_ssd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 19); }
void vcmpunord_ssd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 19); }
void vcmpneq_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 20); }
void vcmpneq_ussd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 20); }
void vcmpnlt_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 21); }
void vcmpnlt_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 21); }
void vcmpnle_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 22); }
void vcmpnle_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 22); }
void vcmpord_ssd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 23); }
void vcmpord_ssd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 23); }
void vcmpeq_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 24); }
void vcmpeq_ussd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 24); }
void vcmpnge_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 25); }
void vcmpnge_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 25); }
void vcmpngt_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 26); }
void vcmpngt_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 26); }
void vcmpfalse_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 27); }
void vcmpfalse_ossd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 27); }
void vcmpneq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 28); }
void vcmpneq_ossd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 28); }
void vcmpge_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 29); }
void vcmpge_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 29); }
void vcmpgt_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 30); }
void vcmpgt_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 30); }
void vcmptrue_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 31); }
void vcmptrue_ussd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 31); }
void cmpeqss(const Xmm& x, const Operand& op) { cmpss(x, op, 0); }
void vcmpeqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 0); }
void vcmpeqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 0); }
void cmpltss(const Xmm& x, const Operand& op) { cmpss(x, op, 1); }
void vcmpltss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 1); }
void vcmpltss(const Xmm& x, const Operand& op) { vcmpss(x, op, 1); }
void cmpless(const Xmm& x, const Operand& op) { cmpss(x, op, 2); }
void vcmpless(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 2); }
void vcmpless(const Xmm& x, const Operand& op) { vcmpss(x, op, 2); }
void cmpunordss(const Xmm& x, const Operand& op) { cmpss(x, op, 3); }
void vcmpunordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 3); }
void vcmpunordss(const Xmm& x, const Operand& op) { vcmpss(x, op, 3); }
void cmpneqss(const Xmm& x, const Operand& op) { cmpss(x, op, 4); }
void vcmpneqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 4); }
void vcmpneqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 4); }
void cmpnltss(const Xmm& x, const Operand& op) { cmpss(x, op, 5); }
void vcmpnltss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 5); }
void vcmpnltss(const Xmm& x, const Operand& op) { vcmpss(x, op, 5); }
void cmpnless(const Xmm& x, const Operand& op) { cmpss(x, op, 6); }
void vcmpnless(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 6); }
void vcmpnless(const Xmm& x, const Operand& op) { vcmpss(x, op, 6); }
void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); }
void vcmpordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 7); }
void vcmpordss(const Xmm& x, const Operand& op) { vcmpss(x, op, 7); }
void vcmpeq_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 8); }
void vcmpeq_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 8); }
void vcmpngess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 9); }
void vcmpngess(const Xmm& x, const Operand& op) { vcmpss(x, op, 9); }
void vcmpngtss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 10); }
void vcmpngtss(const Xmm& x, const Operand& op) { vcmpss(x, op, 10); }
void vcmpfalsess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 11); }
void vcmpfalsess(const Xmm& x, const Operand& op) { vcmpss(x, op, 11); }
void vcmpneq_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 12); }
void vcmpneq_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 12); }
void vcmpgess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 13); }
void vcmpgess(const Xmm& x, const Operand& op) { vcmpss(x, op, 13); }
void vcmpgtss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 14); }
void vcmpgtss(const Xmm& x, const Operand& op) { vcmpss(x, op, 14); }
void vcmptruess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 15); }
void vcmptruess(const Xmm& x, const Operand& op) { vcmpss(x, op, 15); }
void vcmpeq_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 16); }
void vcmpeq_osss(const Xmm& x, const Operand& op) { vcmpss(x, op, 16); }
void vcmplt_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 17); }
void vcmplt_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 17); }
void vcmple_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 18); }
void vcmple_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 18); }
void vcmpunord_sss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 19); }
void vcmpunord_sss(const Xmm& x, const Operand& op) { vcmpss(x, op, 19); }
void vcmpneq_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 20); }
void vcmpneq_usss(const Xmm& x, const Operand& op) { vcmpss(x, op, 20); }
void vcmpnlt_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 21); }
void vcmpnlt_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 21); }
void vcmpnle_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 22); }
void vcmpnle_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 22); }
void vcmpord_sss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 23); }
void vcmpord_sss(const Xmm& x, const Operand& op) { vcmpss(x, op, 23); }
void vcmpeq_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 24); }
void vcmpeq_usss(const Xmm& x, const Operand& op) { vcmpss(x, op, 24); }
void vcmpnge_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 25); }
void vcmpnge_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 25); }
void vcmpngt_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 26); }
void vcmpngt_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 26); }
void vcmpfalse_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 27); }
void vcmpfalse_osss(const Xmm& x, const Operand& op) { vcmpss(x, op, 27); }
void vcmpneq_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 28); }
void vcmpneq_osss(const Xmm& x, const Operand& op) { vcmpss(x, op, 28); }
void vcmpge_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 29); }
void vcmpge_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 29); }
void vcmpgt_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 30); }
void vcmpgt_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 30); }
void vcmptrue_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 31); }
void vcmptrue_usss(const Xmm& x, const Operand& op) { vcmpss(x, op, 31); }
void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, op2, MM_0F | PP_66, 0x16, false); }
void vmovhpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x17, false); }
void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, op2, MM_0F, 0x16, false); }
@ -933,46 +1296,51 @@ void vfnmsub213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()
void vfnmsub231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBF, false, 0); }
void vaesimc(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0xDB, false, 0); }
void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, MM_0F38 | PP_66, 0x1A, true, 0); }
void vbroadcastsd(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, MM_0F38 | PP_66, 0x19, true, 0); }
void vbroadcastss(const Xmm& x, const Address& addr) { opAVX_X_XM_IMM(x, addr, MM_0F38 | PP_66, 0x18, true, 0); }
void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_XM_IMM(y, cvtReg(op, op.isXMM(), Operand::YMM), MM_0F3A | PP_66, 0x19, true, 0, imm); }
void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, cvtReg(op, op.isXMM(), Operand::YMM), MM_0F3A | PP_66, 0x18, true, 0); db(imm); }
void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, MM_0F38 | PP_66, 0x5A, true, 0); }
void vbroadcastsd(const Ymm& y, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_XM_IMM(y, op, MM_0F38 | PP_66, 0x19, true, 0); }
void vbroadcastss(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0x18, true, 0); }
void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, y.isXMM() ? xm0 : ym0, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x19, true, 0); db(imm); }
void vextracti128(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, y.isXMM() ? xm0 : ym0, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x39, true, 0); db(imm); }
void vextractps(const Operand& op, const Xmm& x, uint8 imm) { if (!(op.isREG(32) || op.isMEM()) || x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x.isXMM() ? xm0 : ym0, op, op.isREG(), Operand::XMM, MM_0F3A | PP_66, 0x17, false, 0); db(imm); }
void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XMcvt(y1, y2, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x18, true, 0); db(imm); }
void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XMcvt(y1, y2, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x38, true, 0); db(imm); }
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, MM_0F3A | PP_66, 0x06, true, 0); db(imm); }
void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, MM_0F3A | PP_66, 0x46, true, 0); db(imm); }
void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_F2, 0xF0, true, 0); }
void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, MM_0F, 0xAE, false, -1); }
void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, MM_0F, 0xAE, false, -1); }
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); }
void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); }
void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, xm0, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x14, false); db(imm); }
void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xC5, false, r.isBit(64) ? 1 : 0); db(imm); }
void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); }
void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); }
void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x20, false); db(imm); }
void vpinsrb(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x20, false); db(imm); }
void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F | PP_66, 0xC4, false); db(imm); }
void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F | PP_66, 0xC4, false); db(imm); }
void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 0); db(imm); }
void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 0); db(imm); }
void vpmovmskb(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xD7, false); }
void vpslldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm7, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); }
void vpslldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm7, x, x, MM_0F | PP_66, 0x73, false); db(imm); }
void vpsrldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm3, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); }
void vpsrldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm3, x, x, MM_0F | PP_66, 0x73, false); db(imm); }
void vpsllw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm6, x1, x2, MM_0F | PP_66, 0x71, false); db(imm); }
void vpsllw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm6, x, x, MM_0F | PP_66, 0x71, false); db(imm); }
void vpslld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm6, x1, x2, MM_0F | PP_66, 0x72, false); db(imm); }
void vpslld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm6, x, x, MM_0F | PP_66, 0x72, false); db(imm); }
void vpsllq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm6, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); }
void vpsllq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm6, x, x, MM_0F | PP_66, 0x73, false); db(imm); }
void vpsraw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm4, x1, x2, MM_0F | PP_66, 0x71, false); db(imm); }
void vpsraw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm4, x, x, MM_0F | PP_66, 0x71, false); db(imm); }
void vpsrad(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm4, x1, x2, MM_0F | PP_66, 0x72, false); db(imm); }
void vpsrad(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm4, x, x, MM_0F | PP_66, 0x72, false); db(imm); }
void vpsrlw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm2, x1, x2, MM_0F | PP_66, 0x71, false); db(imm); }
void vpsrlw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm2, x, x, MM_0F | PP_66, 0x71, false); db(imm); }
void vpsrld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm2, x1, x2, MM_0F | PP_66, 0x72, false); db(imm); }
void vpsrld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm2, x, x, MM_0F | PP_66, 0x72, false); db(imm); }
void vpsrlq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm2, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); }
void vpsrlq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm2, x, x, MM_0F | PP_66, 0x73, false); db(imm); }
void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, xm0, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x16, false, 0); db(imm); }
void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x20, false); db(imm); }
void vpinsrb(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x20, false); db(imm); }
void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F | PP_66, 0xC4, false); db(imm); }
void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F | PP_66, 0xC4, false); db(imm); }
void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 0); db(imm); }
void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 0); db(imm); }
void vpmovmskb(const Reg32e& r, const Xmm& x) { bool isYMM= x.isYMM(); opAVX_X_X_XM(isYMM ? Ymm(r.getIdx()) : Xmm(r.getIdx()), isYMM ? ym0 : xm0, x, MM_0F | PP_66, 0xD7, true); }
void vpslldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym7 : xm7, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); }
void vpslldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym7 : xm7, x, x, MM_0F | PP_66, 0x73, true); db(imm); }
void vpsrldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym3 : xm3, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); }
void vpsrldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym3 : xm3, x, x, MM_0F | PP_66, 0x73, true); db(imm); }
void vpsllw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym6 : xm6, x1, x2, MM_0F | PP_66, 0x71, true); db(imm); }
void vpsllw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym6 : xm6, x, x, MM_0F | PP_66, 0x71, true); db(imm); }
void vpslld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym6 : xm6, x1, x2, MM_0F | PP_66, 0x72, true); db(imm); }
void vpslld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym6 : xm6, x, x, MM_0F | PP_66, 0x72, true); db(imm); }
void vpsllq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym6 : xm6, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); }
void vpsllq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym6 : xm6, x, x, MM_0F | PP_66, 0x73, true); db(imm); }
void vpsraw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym4 : xm4, x1, x2, MM_0F | PP_66, 0x71, true); db(imm); }
void vpsraw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym4 : xm4, x, x, MM_0F | PP_66, 0x71, true); db(imm); }
void vpsrad(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym4 : xm4, x1, x2, MM_0F | PP_66, 0x72, true); db(imm); }
void vpsrad(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym4 : xm4, x, x, MM_0F | PP_66, 0x72, true); db(imm); }
void vpsrlw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym2 : xm2, x1, x2, MM_0F | PP_66, 0x71, true); db(imm); }
void vpsrlw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym2 : xm2, x, x, MM_0F | PP_66, 0x71, true); db(imm); }
void vpsrld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym2 : xm2, x1, x2, MM_0F | PP_66, 0x72, true); db(imm); }
void vpsrld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym2 : xm2, x, x, MM_0F | PP_66, 0x72, true); db(imm); }
void vpsrlq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym2 : xm2, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); }
void vpsrlq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym2 : xm2, x, x, MM_0F | PP_66, 0x73, true); db(imm); }
void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4B, true); db(x4.getIdx() << 4); }
void vblendvpd(const Xmm& x1, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x1, op, MM_0F3A | PP_66, 0x4B, true); db(x4.getIdx() << 4); }
void vblendvps(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4A, true); db(x4.getIdx() << 4); }
@ -983,6 +1351,9 @@ void vmovd(const Xmm& x, const Reg32& reg) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx
void vmovd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x6E, false, 0); }
void vmovd(const Reg32& reg, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x7E, false, 0); }
void vmovd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x7E, false, 0); }
void vmovq(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x7E, false, -1); }
void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0xD6, false, -1); }
void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_F3, 0x7E, false, -1); }
void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F, 0x12, false); }
void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F, 0x16, false); }
void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), x.isXMM() ? xm0 : ym0, x, MM_0F | PP_66, 0x50, true, 0); }
@ -990,7 +1361,7 @@ void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw ERR_BAD_C
void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0xE7, true); }
void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0x2B, true); }
void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F, 0x2B, true); }
void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F38 | PP_66, 0x2A, false); }
void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ymm0, addr, MM_0F38 | PP_66, 0x2A, true); }
void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0x10, false); }
void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x10, false); }
void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x11, false); }
@ -1001,25 +1372,41 @@ void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx())
void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2C, false, 0); }
void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2D, false, 0); }
void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2C, false, 0); }
void vcvtsi2ss(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, cvtReg(op2, op2.isREG(), Operand::XMM), MM_0F | PP_F3, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); }
void vcvtsi2sd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, cvtReg(op2, op2.isREG(), Operand::XMM), MM_0F | PP_F2, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); }
void vcvtps2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, cvtReg(op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM), MM_0F, 0x5A, true); }
void vcvtdq2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, cvtReg(op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM), MM_0F | PP_F3, 0xE6, true); }
void vcvtsi2ss(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, op1, op2, op2.isREG(), Operand::XMM, MM_0F | PP_F3, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); }
void vcvtsi2sd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, op1, op2, op2.isREG(), Operand::XMM, MM_0F | PP_F2, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); }
void vcvtps2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x.isXMM() ? xm0 : ym0, op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, MM_0F, 0x5A, true); }
void vcvtdq2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x.isXMM() ? xm0 : ym0, op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, MM_0F | PP_F3, 0xE6, true); }
void vcvtpd2ps(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0x5A, true); }
void vcvtpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_F2, 0xE6, true); }
void vcvttpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0xE6, true); }
void vmovq(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x7E, false, -1); }
void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0xD6, false, -1); }
#ifdef XBYAK64
void vmovq(const Xmm& x, const Reg64& reg) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x6E, false, 1); }
void vmovq(const Reg64& reg, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x7E, false, 1); }
void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_F3, 0x7E, false, -1); }
void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 1); db(imm); }
void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 1); db(imm); }
void vpinsrq(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 1); db(imm); }
void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, xm0, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x16, false, 1); db(imm); }
void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 1); db(imm); }
void vpinsrq(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 1); db(imm); }
void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2D, false, 1); }
void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2C, false, 1); }
void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2D, false, 1); }
void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2C, false, 1); }
void movsxd(const Reg64& reg, const Operand& op) { opMovsxd(reg, op); }
#endif
void andn(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38, 0xf2, true); }
void mulx(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38 | PP_F2, 0xf6, true); }
void pdep(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38 | PP_F2, 0xf5, true); }
void pext(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38 | PP_F3, 0xf5, true); }
void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38, 0xf7, false); }
void bzhi(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38, 0xf5, false); }
void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38 | PP_F3, 0xf7, false); }
void shlx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38 | PP_66, 0xf7, false); }
void shrx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38 | PP_F2, 0xf7, false); }
void blsi(const Reg32e& r, const Operand& op) { opGpr(Reg32e(3, r.getBit()), op, r, MM_0F38, 0xf3, false); }
void blsmsk(const Reg32e& r, const Operand& op) { opGpr(Reg32e(2, r.getBit()), op, r, MM_0F38, 0xf3, false); }
void blsr(const Reg32e& r, const Operand& op) { opGpr(Reg32e(1, r.getBit()), op, r, MM_0F38, 0xf3, false); }
void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x92, 1, 0); }
void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x93, 1, 1); }
void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x92, 0, 1); }
void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x93, 0, 2); }
void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x90, 0, 1); }
void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x91, 0, 2); }
void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x90, 1, 0); }
void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x91, 1, 1); }

View File

@ -3,6 +3,8 @@
/**
utility class and functions for Xbyak
Xbyak::util::Clock ; rdtsc timer
Xbyak::util::Cpu ; detect CPU
@note this header is UNDER CONSTRUCTION!
*/
#include "xbyak.h"
@ -31,15 +33,17 @@
#endif
#else
#ifndef __GNUC_PREREQ
#define __GNUC_PREREQ(major, minor) (((major) << 16) + (minor))
#define __GNUC_PREREQ(major, minor) ((((__GNUC__) << 16) + (__GNUC_MINOR__)) >= (((major) << 16) + (minor)))
#endif
#if __GNUC_PREREQ(4, 3) && !defined(__APPLE__)
#include <cpuid.h>
#else
#if defined(__APPLE__) && defined(XBYAK32) // avoid err : can't find a register in class `BREG' while reloading `asm'
#define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
#define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn))
#else
#define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
#define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn))
#endif
#endif
#endif
@ -59,13 +63,53 @@ class Cpu {
{
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
}
unsigned int mask(int n) const
{
return (1U << n) - 1;
}
void setFamily()
{
unsigned int data[4];
getCpuid(1, data);
stepping = data[0] & mask(4);
model = (data[0] >> 4) & mask(4);
family = (data[0] >> 8) & mask(4);
// type = (data[0] >> 12) & mask(2);
extModel = (data[0] >> 16) & mask(4);
extFamily = (data[0] >> 20) & mask(8);
if (family == 0x0f) {
displayFamily = family + extFamily;
} else {
displayFamily = family;
}
if (family == 6 || family == 0x0f) {
displayModel = (extModel << 4) + model;
} else {
displayModel = model;
}
}
public:
int model;
int family;
int stepping;
int extModel;
int extFamily;
int displayFamily; // family + extFamily
int displayModel; // model + extModel
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
{
#ifdef _WIN32
__cpuid(reinterpret_cast<int*>(data), eaxIn);
#else
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
#endif
}
static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
{
#ifdef _WIN32
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
#else
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
#endif
}
static inline uint64 getXfeature()
@ -74,7 +118,9 @@ public:
return __xgetbv(0);
#else
unsigned int eax, edx;
__asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
// xgetvb is not support on gcc 4.2
// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
return ((uint64)edx << 32) | eax;
#endif
}
@ -92,7 +138,7 @@ public:
tPOPCNT = 1 << 9,
tAESNI = 1 << 10,
tSSE5 = 1 << 11,
tOSXSACE = 1 << 12,
tOSXSAVE = 1 << 12,
tPCLMULQDQ = 1 << 13,
tAVX = 1 << 14,
tFMA = 1 << 15,
@ -101,6 +147,10 @@ public:
tE3DN = 1 << 17,
tSSE4a = 1 << 18,
tRDTSCP = 1 << 19,
tAVX2 = 1 << 20,
tGPR1 = 1 << 21, // andn, bextr, blsi, blsmk, blsr, tzcnt
tGPR2 = 1 << 22, // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx
tLZCNT = 1 << 23,
tINTEL = 1 << 24,
tAMD = 1 << 25
@ -125,6 +175,7 @@ public:
type_ |= tINTEL;
getCpuid(0x80000001, data);
if (data[3] & (1U << 27)) type_ |= tRDTSCP;
if (data[2] & (1U << 5)) type_ |= tLZCNT;
}
getCpuid(1, data);
if (data[2] & (1U << 0)) type_ |= tSSE3;
@ -134,9 +185,14 @@ public:
if (data[2] & (1U << 23)) type_ |= tPOPCNT;
if (data[2] & (1U << 25)) type_ |= tAESNI;
if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ;
if (data[2] & (1U << 27)) type_ |= tOSXSACE;
if (data[2] & (1U << 27)) type_ |= tOSXSAVE;
if (type_ & tOSXSACE) {
if (data[3] & (1U << 15)) type_ |= tCMOV;
if (data[3] & (1U << 23)) type_ |= tMMX;
if (data[3] & (1U << 25)) type_ |= tMMX2 | tSSE;
if (data[3] & (1U << 26)) type_ |= tSSE2;
if (type_ & tOSXSAVE) {
// check XFEATURE_ENABLED_MASK[2:1] = '11b'
uint64 bv = getXfeature();
if ((bv & 6) == 6) {
@ -144,11 +200,17 @@ public:
if (data[2] & (1U << 12)) type_ |= tFMA;
}
}
if (data[3] & (1U << 15)) type_ |= tCMOV;
if (data[3] & (1U << 23)) type_ |= tMMX;
if (data[3] & (1U << 25)) type_ |= tMMX2 | tSSE;
if (data[3] & (1U << 26)) type_ |= tSSE2;
getCpuidEx(7, 0, data);
if (type_ & tAVX && data[1] & 0x20) type_ |= tAVX2;
if (data[1] & (1U << 3)) type_ |= tGPR1;
if (data[1] & (1U << 8)) type_ |= tGPR2;
setFamily();
}
void putFamily()
{
printf("family=%d, model=%X, stepping=%d, extFamily=%d, extModel=%X\n",
family, model, stepping, extFamily, extModel);
printf("display:family=%X, model=%X\n", displayFamily, displayModel);
}
bool has(Type type) const
{
@ -190,70 +252,251 @@ private:
int count_;
};
#ifdef XBYAK32
#ifdef XBYAK64
const int UseRCX = 1 << 6;
const int UseRDX = 1 << 7;
namespace local {
#ifdef _WIN32
#define XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(x) static inline __declspec(naked) void set_eip_to_ ## x() { \
__asm { mov x, dword ptr [esp] } __asm { ret } \
class Pack {
static const size_t maxTblNum = 10;
const Xbyak::Reg64 *tbl_[maxTblNum];
size_t n_;
public:
Pack() : n_(0) {}
Pack(const Xbyak::Reg64 *tbl, size_t n) { init(tbl, n); }
Pack(const Pack& rhs)
: n_(rhs.n_)
{
for (size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i];
}
Pack(const Xbyak::Reg64& t0)
{ n_ = 1; tbl_[0] = &t0; }
Pack(const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 2; tbl_[0] = &t0; tbl_[1] = &t1; }
Pack(const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 3; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; }
Pack(const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 4; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; }
Pack(const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 5; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; }
Pack(const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 6; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; }
Pack(const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 7; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; }
Pack(const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 8; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; }
Pack(const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 9; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; }
Pack(const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 10; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; tbl_[9] = &t9; }
Pack& append(const Xbyak::Reg64& t)
{
if (n_ == 10) {
fprintf(stderr, "ERR Pack::can't append\n");
throw ERR_BAD_PARAMETER;
}
tbl_[n_++] = &t;
return *this;
}
void init(const Xbyak::Reg64 *tbl, size_t n)
{
if (n > maxTblNum) {
fprintf(stderr, "ERR Pack::init bad n=%d\n", (int)n);
throw ERR_BAD_PARAMETER;
}
n_ = n;
for (size_t i = 0; i < n; i++) {
tbl_[i] = &tbl[i];
}
}
const Xbyak::Reg64& operator[](size_t n) const
{
if (n >= n_) {
fprintf(stderr, "ERR Pack bad n=%d\n", (int)n);
throw ERR_BAD_PARAMETER;
}
return *tbl_[n];
}
size_t size() const { return n_; }
/*
get tbl[pos, pos + num)
*/
Pack sub(size_t pos, size_t num = size_t(-1)) const
{
if (num == size_t(-1)) num = n_ - pos;
if (pos + num > n_) {
fprintf(stderr, "ERR Pack::sub bad pos=%d, num=%d\n", (int)pos, (int)num);
throw ERR_BAD_PARAMETER;
}
Pack pack;
pack.n_ = num;
for (size_t i = 0; i < num; i++) {
pack.tbl_[i] = tbl_[pos + i];
}
return pack;
}
};
class StackFrame {
#ifdef XBYAK64_WIN
static const int noSaveNum = 6;
static const int rcxPos = 0;
static const int rdxPos = 1;
#else
#define XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(x) static inline void set_eip_to_ ## x() { \
__asm__ volatile("movl (%esp), %" #x); \
}
static const int noSaveNum = 8;
static const int rcxPos = 3;
static const int rdxPos = 2;
#endif
Xbyak::CodeGenerator *code_;
int pNum_;
int tNum_;
bool useRcx_;
bool useRdx_;
int saveNum_;
int P_;
bool makeEpilog_;
Xbyak::Reg64 pTbl_[4];
Xbyak::Reg64 tTbl_[10];
Pack p_;
Pack t_;
StackFrame(const StackFrame&);
void operator=(const StackFrame&);
public:
const Pack& p;
const Pack& t;
/*
make stack frame
@param sf [in] this
@param pNum [in] num of function parameter(0 <= pNum <= 4)
@param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX)
@param stackSizeByte [in] local stack size
@param makeEpilog [in] automatically call close() if true
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(eax)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ecx)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(edx)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ebx)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(esi)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(edi)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ebp)
#undef XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG
} // end of local
/**
get eip to out register
@note out is not esp
*/
template<class T>
void setEipTo(T *self, const Xbyak::Reg32& out)
{
#if 0
self->call("@f");
self->L("@@");
self->pop(out);
you can use
rax
gp0, ..., gp(pNum - 1)
gt0, ..., gt(tNum-1)
rcx if tNum & UseRCX
rdx if tNum & UseRDX
rsp[0..stackSizeByte - 1]
*/
StackFrame(Xbyak::CodeGenerator *code, int pNum, int tNum = 0, int stackSizeByte = 0, bool makeEpilog = true)
: code_(code)
, pNum_(pNum)
, tNum_(tNum & ~(UseRCX | UseRDX))
, useRcx_((tNum & UseRCX) != 0)
, useRdx_((tNum & UseRDX) != 0)
, saveNum_(0)
, P_(0)
, makeEpilog_(makeEpilog)
, p(p_)
, t(t_)
{
using namespace Xbyak;
if (pNum < 0 || pNum > 4) throw ERR_BAD_PNUM;
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
if (allRegNum < pNum || allRegNum > 14) throw ERR_BAD_TNUM;
const Reg64& rsp = code->rsp;
const AddressFrame& ptr = code->ptr;
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
const int *tbl = getOrderTbl() + noSaveNum;
P_ = saveNum_ + (stackSizeByte + 7) / 8;
if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment
P_ *= 8;
if (P_ > 0) code->sub(rsp, P_);
#ifdef XBYAK64_WIN
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
code->mov(ptr [rsp + P_ + (i + 1) * 8], Reg64(tbl[i]));
}
for (int i = 4; i < saveNum_; i++) {
code->mov(ptr [rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
}
#else
int idx = out.getIdx();
switch (idx) {
case Xbyak::Operand::EAX:
self->call((void*)local::set_eip_to_eax);
break;
case Xbyak::Operand::ECX:
self->call((void*)local::set_eip_to_ecx);
break;
case Xbyak::Operand::EDX:
self->call((void*)local::set_eip_to_edx);
break;
case Xbyak::Operand::EBX:
self->call((void*)local::set_eip_to_ebx);
break;
case Xbyak::Operand::ESI:
self->call((void*)local::set_eip_to_esi);
break;
case Xbyak::Operand::EDI:
self->call((void*)local::set_eip_to_edi);
break;
case Xbyak::Operand::EBP:
self->call((void*)local::set_eip_to_ebp);
break;
default:
assert(0);
}
for (int i = 0; i < saveNum_; i++) {
code->mov(ptr [rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
}
#endif
}
int pos = 0;
for (int i = 0; i < pNum; i++) {
pTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
}
for (int i = 0; i < tNum_; i++) {
tTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
}
if (useRcx_ && rcxPos < pNum) code_->mov(code_->r10, code_->rcx);
if (useRdx_ && rdxPos < pNum) code_->mov(code_->r11, code_->rdx);
p_.init(pTbl_, pNum);
t_.init(tTbl_, tNum_);
}
/*
make epilog manually
@param callRet [in] call ret() if true
*/
void close(bool callRet = true)
{
using namespace Xbyak;
const Reg64& rsp = code_->rsp;
const AddressFrame& ptr = code_->ptr;
const int *tbl = getOrderTbl() + noSaveNum;
#ifdef XBYAK64_WIN
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
code_->mov(Reg64(tbl[i]), ptr [rsp + P_ + (i + 1) * 8]);
}
for (int i = 4; i < saveNum_; i++) {
code_->mov(Reg64(tbl[i]), ptr [rsp + P_ - 8 * (saveNum_ - i)]);
}
#else
for (int i = 0; i < saveNum_; i++) {
code_->mov(Reg64(tbl[i]), ptr [rsp + P_ - 8 * (saveNum_ - i)]);
}
#endif
if (P_ > 0) code_->add(rsp, P_);
if (callRet) code_->ret();
}
~StackFrame()
{
if (!makeEpilog_) return;
try {
close();
} catch (Xbyak::Error e) {
printf("ERR:StackFrame %s\n", ConvertErrorToString(e));
exit(1);
} catch (...) {
printf("ERR:StackFrame otherwise\n");
exit(1);
}
}
private:
const int *getOrderTbl() const
{
using namespace Xbyak;
static const int tbl[] = {
#ifdef XBYAK64_WIN
Operand::RCX, Operand::RDX, Operand::R8, Operand::R9, Operand::R10, Operand::R11, Operand::RDI, Operand::RSI,
#else
Operand::RDI, Operand::RSI, Operand::RDX, Operand::RCX, Operand::R8, Operand::R9, Operand::R10, Operand::R11,
#endif
Operand::RBX, Operand::RBP, Operand::R12, Operand::R13, Operand::R14, Operand::R15
};
return &tbl[0];
}
int getRegIdx(int& pos) const
{
assert(pos < 14);
using namespace Xbyak;
const int *tbl = getOrderTbl();
int r = tbl[pos++];
if (useRcx_) {
if (r == Operand::RCX) { return Operand::R10; }
if (r == Operand::R10) { r = tbl[pos++]; }
}
if (useRdx_) {
if (r == Operand::RDX) { return Operand::R11; }
if (r == Operand::R11) { return tbl[pos++]; }
}
return r;
}
};
#endif
} } // end of util

View File

@ -121,6 +121,7 @@ int ds2mode = 0; // DS Mode at start
FILE *padLog = NULL;
pthread_spinlock_t mutex_KeyEvent;
bool mutex_WasInit = false;
KeyStatus* key_status = NULL;
queue<keyEvent> ev_fifo;
@ -277,6 +278,7 @@ EXPORT_C_(s32) PADopen(void *pDsp)
while (!ev_fifo.empty()) ev_fifo.pop();
pthread_spin_init(&mutex_KeyEvent, PTHREAD_PROCESS_PRIVATE);
mutex_WasInit = true;
#ifdef __LINUX__
JoystickInfo::EnumerateJoysticks(s_vjoysticks);
@ -303,6 +305,7 @@ EXPORT_C_(void) PADsetLogDir(const char* dir)
EXPORT_C_(void) PADclose()
{
while (!ev_fifo.empty()) ev_fifo.pop();
mutex_WasInit = false;
pthread_spin_destroy(&mutex_KeyEvent);
_PADclose();
}
@ -666,8 +669,12 @@ EXPORT_C_(keyEvent*) PADkeyEvent()
#ifdef __LINUX__
EXPORT_C_(void) PADWriteEvent(keyEvent &evt)
{
pthread_spin_lock(&mutex_KeyEvent);
ev_fifo.push(evt);
pthread_spin_unlock(&mutex_KeyEvent);
// This function call be called before PADopen. Therefore we cann't
// guarantee that the spin lock was initialized
if (mutex_WasInit) {
pthread_spin_lock(&mutex_KeyEvent);
ev_fifo.push(evt);
pthread_spin_unlock(&mutex_KeyEvent);
}
}
#endif